From 3f803a9421fddf10a30745fc145d565d9737bd40 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Fri, 28 Sep 2018 17:18:01 -0700
Subject: [PATCH 001/873] Make add_n() handle a single IndexedSlices argument
 properly

---
 tensorflow/python/ops/math_ops.py      |  4 +++-
 tensorflow/python/ops/math_ops_test.py | 11 +++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index f57abf6704..ebdfa592d3 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -2135,6 +2135,8 @@ def _as_indexed_slices_list(inputs, optimize=True):
 def add_n(inputs, name=None):
   """Adds all input tensors element-wise.
 
+  Converts `IndexedSlices` objects into dense tensors prior to adding.
+
   Args:
     inputs: A list of `Tensor` or `IndexedSlices` objects, each with same shape
       and type.
@@ -2157,7 +2159,7 @@ def add_n(inputs, name=None):
 
   if len(inputs) == 1:
     if isinstance(inputs[0], ops.IndexedSlices):
-      values = inputs[0].values
+      values = ops.convert_to_tensor(inputs[0])
     else:
       values = inputs[0]
     if name:
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index f051850d92..cd9c89e519 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -359,6 +359,17 @@ class AddNTest(test_util.TensorFlowTestCase):
                             [g.eval() for g in add_n_grad])
 
 
+  def testIndexedSlices(self):
+    slc = tf.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), 
+        array_ops.constant([2]), array_ops.constant([2,2])
+    slc_as_dense = np.array([[0, 0], [1, 2]])
+    with self.test_session(use_gpu=True):
+      # add_n currently always converts IndexedSlices to dense
+      self.assertAllEqual(slc_as_dense, math_ops.add_n([slc]).eval())
+      self.assertAllEqual(2 * slc_as_dense, math_ops.add_n([slc, slc]).eval())
+
+
+
 class DivAndModTest(test_util.TensorFlowTestCase):
   # TODO(aselle): Test more types before exposing new division operators.
 
-- 
GitLab


From ca7105c42182f6ef562d18a7843090a2ef458b83 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Mon, 1 Oct 2018 17:25:33 -0700
Subject: [PATCH 002/873] Oops, missing paren

---
 tensorflow/python/ops/math_ops_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index cd9c89e519..fbae792cd0 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -360,8 +360,8 @@ class AddNTest(test_util.TensorFlowTestCase):
 
 
   def testIndexedSlices(self):
-    slc = tf.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), 
-        array_ops.constant([2]), array_ops.constant([2,2])
+    slc = ops.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), 
+        array_ops.constant([1]), array_ops.constant([2,2]))
     slc_as_dense = np.array([[0, 0], [1, 2]])
     with self.test_session(use_gpu=True):
       # add_n currently always converts IndexedSlices to dense
-- 
GitLab


From 2918d022954d4ce75e2b2ce4cd30c7f06d820444 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Mon, 1 Oct 2018 18:27:06 -0700
Subject: [PATCH 003/873] Remove extra blank line

---
 tensorflow/python/ops/math_ops_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index fbae792cd0..06abdcfc54 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -369,7 +369,6 @@ class AddNTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(2 * slc_as_dense, math_ops.add_n([slc, slc]).eval())
 
 
-
 class DivAndModTest(test_util.TensorFlowTestCase):
   # TODO(aselle): Test more types before exposing new division operators.
 
-- 
GitLab


From aa9bb45cc8d534e5b1cec8613bea4b4e30f622de Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Fri, 12 Oct 2018 17:55:19 -0700
Subject: [PATCH 004/873] Explicitly set jdk8 in ci_parameterized_build.sh
 (#22956)

PiperOrigin-RevId: 216946217
---
 tensorflow/tools/ci_build/ci_parameterized_build.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index 489722c0e9..bc9cb4e9a1 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -149,8 +149,12 @@ BAZEL_TEST_FLAGS=""\
 "--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}"
 BAZEL_BUILD_FLAGS="--keep_going"
 
-BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS}"
-BAZEL_BUILD_ONLY_CMD="bazel build ${BAZEL_BUILD_FLAGS}"
+# Explicitly set jdk8 since that's what's installed in our images. Note that
+# bazel 0.16 and higher defaults to jdk9, which causes failures. See b/117634064
+BAZEL_JAVA_FLAGS="--java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8"
+
+BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS} ${BAZEL_JAVA_FLAGS}"
+BAZEL_BUILD_ONLY_CMD="bazel build ${BAZEL_BUILD_FLAGS} ${BAZEL_JAVA_FLAGS}"
 BAZEL_CLEAN_CMD="bazel clean"
 
 PIP_CMD="${CI_BUILD_DIR}/builds/pip.sh"
-- 
GitLab


From 5ffddda5b707099fb62097aae00ba9403adedd13 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Mon, 15 Oct 2018 15:05:58 -0700
Subject: [PATCH 005/873] lint issues

---
 tensorflow/python/ops/math_ops_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 06abdcfc54..0973e707a7 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -361,7 +361,7 @@ class AddNTest(test_util.TensorFlowTestCase):
 
   def testIndexedSlices(self):
     slc = ops.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), 
-        array_ops.constant([1]), array_ops.constant([2,2]))
+                            array_ops.constant([1]), array_ops.constant([2, 2]))
     slc_as_dense = np.array([[0, 0], [1, 2]])
     with self.test_session(use_gpu=True):
       # add_n currently always converts IndexedSlices to dense
-- 
GitLab


From 7b081981131bf6da32065b8ecc3b8c5bd1280c4a Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Tue, 16 Oct 2018 10:14:23 -0700
Subject: [PATCH 006/873] Update version information in preparation for
 1.12.0-rc1 (#23028)

---
 tensorflow/core/public/version.h      | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 07eeeb4f03..592dd5da16 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc0"
+#define TF_VERSION_SUFFIX "-rc1"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 3632ee2076..7593cfb58b 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.12.0-rc0'
+_VERSION = '1.12.0-rc1'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From b379cecbdc4a9e6a0f8e468e0877888956e35dd5 Mon Sep 17 00:00:00 2001
From: annarev <annarev@google.com>
Date: Thu, 18 Oct 2018 14:04:48 -0700
Subject: [PATCH 007/873] Include .inc files for absl headers (#23081)

---
 tensorflow/tools/pip_package/setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 7593cfb58b..8c3bd4ac70 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -232,6 +232,8 @@ headers = (list(find_files('*.h', 'tensorflow/core')) +
            list(find_files('*', 'third_party/eigen3')) +
            list(find_files('*.h',
                            'tensorflow/include/external/com_google_absl')) +
+           list(find_files('*.inc',
+                           'tensorflow/include/external/com_google_absl')) +
            list(find_files('*', 'tensorflow/include/external/eigen_archive')))
 
 setup(
-- 
GitLab


From 2aaf639173420403b804a7216f8f1c51027b6240 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 19 Oct 2018 09:00:12 -0700
Subject: [PATCH 008/873] Update relnotes with Ignite information

---
 RELEASE.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE.md b/RELEASE.md
index 58d918895c..dbe34db0bb 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -4,6 +4,7 @@
 * Keras models can now be directly exported to the SavedModel format(`tf.contrib.saved_model.save_keras_model()`) and used with Tensorflow Serving.
 * Keras models now support evaluating with a `tf.data.Dataset`.
 * TensorFlow binaries are built with XLA support linked in by default.
+* Ignite Dataset added to contrib/ignite that allows to work with Apache Ignite.
 
 ## Bug Fixes and Other Changes
 
-- 
GitLab


From 878e98c1abd6cbd5bd044ddf8660c55e0c2a1634 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Fri, 19 Oct 2018 13:52:44 -0700
Subject: [PATCH 009/873] Update TF 1.12 version to 1.12-rc2

---
 tensorflow/core/public/version.h      | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 592dd5da16..500ec8f97b 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc1"
+#define TF_VERSION_SUFFIX "-rc2"
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 8c3bd4ac70..b7eed56695 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.12.0-rc1'
+_VERSION = '1.12.0-rc2'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 4b4052c90e17c2c5bed45dc47c2d59d22f341b48 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Sat, 20 Oct 2018 16:17:55 -0700
Subject: [PATCH 010/873] Check for the presence of a Worker machine when
 reassigning hooks in distributed (#23116)

training jobs.

PiperOrigin-RevId: 217407558
---
 tensorflow/python/estimator/estimator.py      |  6 ++
 tensorflow/python/estimator/estimator_test.py | 61 +++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py
index e6d82f0db7..8b957288c3 100644
--- a/tensorflow/python/estimator/estimator.py
+++ b/tensorflow/python/estimator/estimator.py
@@ -1424,7 +1424,13 @@ class Estimator(object):
     # evaluations.
     save_summary_steps = self._config.save_summary_steps
     log_step_count_steps = self._config.log_step_count_steps
+
+    # Check existence of appropriate cluster spec fields, as well as master and
+    # worker nodes. As master also performs evaluation, summary writing must
+    # occur on a different node. The presence of a worker is also checked to
+    # prevent reassigning hooks for single-replica jobs with just a master node.
     if (self._config.cluster_spec and self._config.cluster_spec.jobs and
+        (run_config.TaskType.WORKER in self._config.cluster_spec.jobs) and
         (run_config.TaskType.MASTER in self._config.cluster_spec.jobs)):
       # Update config values to prevent the default hooks from being created on
       # the master or other workers.
diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py
index 246dfb1a4b..c26b3e6509 100644
--- a/tensorflow/python/estimator/estimator_test.py
+++ b/tensorflow/python/estimator/estimator_test.py
@@ -1063,6 +1063,67 @@ class EstimatorTrainTest(test.TestCase):
       self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps'])
       self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps'])
 
+  def test_master_hooks_single_replica(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.MASTER: ['localhost:1234']
+        },
+        'task': {
+            'type': run_config.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig(
+              save_summary_steps=100, log_step_count_steps=200))
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps'])
+
+  def test_master_hooks_single_replica_with_ps(self):
+    tf_config = json.dumps({
+        'cluster': {
+            run_config.TaskType.MASTER: ['localhost:1234'],
+            run_config.TaskType.PS: ['localhost: 1235'],
+        },
+        'task': {
+            'type': run_config.TaskType.MASTER,
+            'index': 0
+        }
+    })
+    with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
+      est = estimator.Estimator(
+          model_fn=model_fn_global_step_incrementer,
+          config=run_config.RunConfig(
+              save_summary_steps=100, log_step_count_steps=200))
+
+    with test.mock.patch.object(training,
+                                'MonitoredTrainingSession') as mock_sess:
+      est.train(dummy_input_fn, steps=1)
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.SummarySaverHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertFalse(
+          any(
+              isinstance(hook, basic_session_run_hooks.StepCounterHook)
+              for hook in mock_sess.call_args[1]['hooks']))
+      self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps'])
+      self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps'])
+
 
 def _model_fn_with_eval_metric_ops(features, labels, mode, params):
   _, _ = features, labels
-- 
GitLab


From 20b53f7fe512a022ccbf97c71da4bd49f2fd5a04 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Sat, 20 Oct 2018 18:02:12 -0700
Subject: [PATCH 011/873] Fix triggering of asynchronous checkpoints. (#23138)

PiperOrigin-RevId: 217570792
---
 .../contrib/tpu/python/tpu/async_checkpoint.py    | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
index 20b7ba0997..700598d2f4 100644
--- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
+++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
@@ -114,15 +114,12 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
     return SessionRunArgs(self._global_step_tensor)
 
   def after_run(self, run_context, run_values):
-    stale_global_step = run_values.results
-    if self._timer.should_trigger_for_step(stale_global_step +
-                                           self._steps_per_run):
-      # get the real value after train op.
-      global_step = run_context.session.run(self._global_step_tensor)
-      if self._timer.should_trigger_for_step(global_step):
-        self._timer.update_last_triggered_step(global_step)
-        if self._save(run_context.session, global_step):
-          run_context.request_stop()
+    global_step = run_context.session.run(self._global_step_tensor)
+    if self._timer.should_trigger_for_step(global_step):
+      self._timer.update_last_triggered_step(global_step)
+      logging.info("Triggering checkpoint. %s", global_step)
+      if self._save(run_context.session, global_step):
+        run_context.request_stop()
 
   def end(self, session):
     if self._save_thread:
-- 
GitLab


From 238bf3f5a503227befb15ba3dd8a861eb30c6f5c Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Sat, 20 Oct 2018 18:22:35 -0700
Subject: [PATCH 012/873] Async checkpointing: Save the graph in a background
 thread. (#23139)

PiperOrigin-RevId: 217747382
---
 .../contrib/tpu/python/tpu/async_checkpoint.py     | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
index 700598d2f4..78253d83fc 100644
--- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
+++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
@@ -69,6 +69,7 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
       raise ValueError("You cannot provide both saver and scaffold.")
     self._saver = saver
     self._save_thread = None
+    self._write_graph_thread = None
     self._checkpoint_dir = checkpoint_dir
     self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
     self._scaffold = scaffold
@@ -97,9 +98,13 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
     # We do write graph and saver_def at the first call of before_run.
     # We cannot do this in begin, since we let other hooks to change graph and
     # add variables in begin. Graph is finalized after all begin calls.
-    training_util.write_graph(
-        ops.get_default_graph().as_graph_def(add_shapes=True),
-        self._checkpoint_dir, "graph.pbtxt")
+    def _write_graph_fn(self):
+      training_util.write_graph(
+          ops.get_default_graph().as_graph_def(add_shapes=True),
+          self._checkpoint_dir, "graph.pbtxt")
+    self._write_graph_thread = threading.Thread(target=_write_graph_fn)
+    self._write_graph_thread.start()
+
     saver_def = self._get_saver().saver_def if self._get_saver() else None
     graph = ops.get_default_graph()
     meta_graph_def = meta_graph.create_meta_graph_def(
@@ -125,6 +130,9 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
     if self._save_thread:
       logging.info("Waiting for any pending checkpoints to finish.")
       self._save_thread.join()
+    if self._write_graph_thread:
+      logging.info("Waiting for any pending write_graph to finish.")
+      self._write_graph_thread.join()
 
     last_step = session.run(self._global_step_tensor)
 
-- 
GitLab


From e40642fb03f96881c6e046e8b84606f29ab5d2b1 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Sat, 20 Oct 2018 18:43:02 -0700
Subject: [PATCH 013/873] Support fp16 types in ScatterNd GPU version (#23141)

PiperOrigin-RevId: 217749577
---
 tensorflow/core/kernels/scatter_nd_op.cc      |   4 +-
 .../kernel_tests/scatter_nd_ops_test.py       | 176 +++++++++---------
 2 files changed, 88 insertions(+), 92 deletions(-)

diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 2f8aede427..fd54c6d6d7 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -297,8 +297,7 @@ TF_CALL_bool(REGISTER_SCATTER_ND_CPU);
   REGISTER_SCATTER_ND_GPU(type);
 
 TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU);
-// TODO(b/66916790): Support half types in ScatterNd.
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ALL_GPU);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_ND_ALL_GPU);
 TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU);
 TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU);
 
@@ -587,7 +586,6 @@ namespace functor {
   DECLARE_GPU_SPECS_INDEX(T, int64)
 
 TF_CALL_int32(DECLARE_GPU_SPECS);
-// TODO(b/66916790): Support half types in ScatterNd.
 TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS);
 TF_CALL_complex64(DECLARE_GPU_SPECS);
 TF_CALL_complex128(DECLARE_GPU_SPECS);
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index 4b92309e4d..49d83fb1d5 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -36,6 +36,9 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
+GRADIENT_TESTS_DTYPES = (dtypes.float16, dtypes.float32, dtypes.float64)
+
+
 def _AsType(v, vtype):
   return v.astype(vtype) if isinstance(v, np.ndarray) else vtype(v)
 
@@ -144,9 +147,8 @@ class StatefulScatterNdTest(test.TestCase):
         self.assertAllClose(new, ref_var.eval())
 
   def _VariableRankTests(self, np_scatter, tf_scatter):
-    for vtype in (np.int32,
-                  np.float32, np.float64,
-                  np.complex64, np.complex128):
+    for vtype in (np.int32, np.float16, np.float32, np.float64, np.complex64,
+                  np.complex128):
       for itype in (np.int32, np.int64):
         self._VariableRankTest(np_scatter, tf_scatter, vtype, itype)
 
@@ -223,7 +225,7 @@ class StatefulScatterNdTest(test.TestCase):
   #   self._VariableRankTests(_NumpyDiv, state_ops.scatter_nd_div)
 
   def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter):
-    for vtype in (np.int32, np.float32, np.float64):
+    for vtype in (np.int32, np.float16, np.float32, np.float64):
       for itype in (np.int32, np.int64):
         self._VariableRankTest(
             np_scatter, tf_scatter, vtype, itype, repeat_indices=True)
@@ -520,97 +522,93 @@ class ScatterNdTest(test.TestCase):
       self.scatter_nd(indices, updates, shape)
 
   def testGradientsRank2ElementUpdate(self):
-    indices = constant_op.constant([[0, 0], [1, 1]], dtype=dtypes.int32)
-    updates = constant_op.constant([1, 4], dtype=dtypes.float64)
-    shape = constant_op.constant([2, 2], dtype=dtypes.int32)
-    input_ = array_ops.zeros(shape, dtype=dtypes.float64)
-    outputs = self.scatter_nd(indices, updates, shape, input_)
-
-    grad_vals = constant_op.constant([[1, 2], [3, 4]], dtype=dtypes.float64)
-    updates_grad, input_grad = gradients_impl.gradients(
-        [outputs], [updates, input_], [grad_vals])
-    expected_updates_grad = np.array([1, 4], dtype=np.float64)
-    expected_input_grad = np.array([[1, 2], [3, 4]], dtype=np.float64)
-    with self.cached_session():
-      self.assertAllEqual(expected_updates_grad, updates_grad.eval())
-      if self.non_aliasing_add_test:
-        self.assertAllEqual(expected_input_grad, input_grad.eval())
+    for dtype in GRADIENT_TESTS_DTYPES:
+      indices = constant_op.constant([[0, 0], [1, 1]], dtype=dtypes.int32)
+      updates = constant_op.constant([1, 4], dtype=dtype)
+      shape = constant_op.constant([2, 2], dtype=dtypes.int32)
+      input_ = array_ops.zeros(shape, dtype=dtype)
+      outputs = self.scatter_nd(indices, updates, shape, input_)
+
+      grad_vals = constant_op.constant([[1, 2], [3, 4]], dtype=dtype)
+      updates_grad, input_grad = gradients_impl.gradients(
+          [outputs], [updates, input_], [grad_vals])
+      expected_updates_grad = np.array([1, 4], dtype=dtype.as_numpy_dtype())
+      expected_input_grad = np.array([[1, 2], [3, 4]],
+                                     dtype=dtype.as_numpy_dtype())
+      with self.cached_session():
+        self.assertAllEqual(expected_updates_grad, updates_grad.eval())
+        if self.non_aliasing_add_test:
+          self.assertAllEqual(expected_input_grad, input_grad.eval())
 
   def testGradientsRank2SliceUpdate(self):
-    indices = constant_op.constant([[1], [0]], dtype=dtypes.int32)
-    updates = constant_op.constant([[3, 4], [1, 2]], dtype=dtypes.float64)
-    shape = constant_op.constant([2, 2], dtype=dtypes.int32)
-    input_ = array_ops.zeros(shape, dtype=dtypes.float64)
-    outputs = self.scatter_nd(indices, updates, shape, input_)
-
-    grad_vals = constant_op.constant([[3, 4], [1, 2]], dtype=dtypes.float64)
-    updates_grad, input_grad = gradients_impl.gradients(
-        [outputs], [updates, input_], [grad_vals])
-    expected_updates_grad = np.array([[1, 2], [3, 4]], dtype=np.float64)
-    expected_input_grad = np.array([[3, 4], [1, 2]], dtype=np.float64)
-    with self.cached_session():
-      self.assertAllEqual(expected_updates_grad, updates_grad.eval())
-      if self.non_aliasing_add_test:
-        self.assertAllEqual(expected_input_grad, input_grad.eval())
+    for dtype in GRADIENT_TESTS_DTYPES:
+      indices = constant_op.constant([[1], [0]], dtype=dtypes.int32)
+      updates = constant_op.constant([[3, 4], [1, 2]], dtype=dtype)
+      shape = constant_op.constant([2, 2], dtype=dtypes.int32)
+      input_ = array_ops.zeros(shape, dtype=dtype)
+      outputs = self.scatter_nd(indices, updates, shape, input_)
+
+      grad_vals = constant_op.constant([[3, 4], [1, 2]], dtype=dtype)
+      updates_grad, input_grad = gradients_impl.gradients(
+          [outputs], [updates, input_], [grad_vals])
+      expected_updates_grad = np.array([[1, 2], [3, 4]],
+                                       dtype=dtype.as_numpy_dtype())
+      expected_input_grad = np.array([[3, 4], [1, 2]],
+                                     dtype=dtype.as_numpy_dtype())
+      with self.cached_session():
+        self.assertAllEqual(expected_updates_grad, updates_grad.eval())
+        if self.non_aliasing_add_test:
+          self.assertAllEqual(expected_input_grad, input_grad.eval())
 
   def testGradientsRank3SliceUpdate(self):
-    indices = constant_op.constant(
-        [[[0, 1], [1, 0]], [[0, 0], [1, 1]]], dtype=dtypes.int32)
-    updates = constant_op.constant(
-        [[[5, 7], [2, 4]], [[1, 3], [6, 8]]], dtype=dtypes.float64)
-    shape = constant_op.constant([2, 2, 2], dtype=dtypes.int32)
-    input_ = array_ops.zeros(shape, dtype=dtypes.float64)
-    outputs = self.scatter_nd(indices, updates, shape, input_)
-
-    grad_vals = constant_op.constant(
-        [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=dtypes.float64)
-    updates_grad, input_grad = gradients_impl.gradients(
-        [outputs], [updates, input_], [grad_vals])
-    expected_updates_grad = np.array(
-        [[[3, 4], [5, 6]], [[1, 2], [7, 8]]], dtype=np.float64)
-    expected_input_grad = np.array(
-        [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.float64)
-    with self.cached_session():
-      self.assertAllEqual(expected_updates_grad, updates_grad.eval())
-      if self.non_aliasing_add_test:
-        self.assertAllEqual(expected_input_grad, input_grad.eval())
+    for dtype in GRADIENT_TESTS_DTYPES:
+      indices = constant_op.constant([[[0, 1], [1, 0]], [[0, 0], [1, 1]]],
+                                     dtype=dtypes.int32)
+      updates = constant_op.constant([[[5, 7], [2, 4]], [[1, 3], [6, 8]]],
+                                     dtype=dtype)
+      shape = constant_op.constant([2, 2, 2], dtype=dtypes.int32)
+      input_ = array_ops.zeros(shape, dtype=dtype)
+      outputs = self.scatter_nd(indices, updates, shape, input_)
+
+      grad_vals = constant_op.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
+                                       dtype=dtype)
+      updates_grad, input_grad = gradients_impl.gradients(
+          [outputs], [updates, input_], [grad_vals])
+      expected_updates_grad = np.array([[[3, 4], [5, 6]], [[1, 2], [7, 8]]],
+                                       dtype=dtype.as_numpy_dtype())
+      expected_input_grad = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
+                                     dtype=dtype.as_numpy_dtype())
+      with self.cached_session():
+        self.assertAllEqual(expected_updates_grad, updates_grad.eval())
+        if self.non_aliasing_add_test:
+          self.assertAllEqual(expected_input_grad, input_grad.eval())
 
   def testGradientsRank7SliceUpdate(self):
-    indices = constant_op.constant(
-        [[[
-            [[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]],
-            [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]
-        ]]], dtype=dtypes.int32)
-    updates = constant_op.constant(
-        [[[
-            [[[[5, 6], [2, 4]]]],
-            [[[[1, 3], [6, 8]]]]
-        ]]], dtype=dtypes.float64)
-    shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32)
-    input_ = array_ops.zeros(shape, dtype=dtypes.float64)
-    outputs = self.scatter_nd(indices, updates, shape, input_)
-
-    grad_vals = constant_op.constant(
-        [[[
-            [[[[1, 2], [3, 4]]]],
-            [[[[5, 6], [7, 8]]]]
-        ]]], dtype=dtypes.float64)
-    updates_grad, input_grad = gradients_impl.gradients(
-        [outputs], [updates, input_], [grad_vals])
-    expected_updates_grad = np.array(
-        [[[
-            [[[[3, 4], [5, 6]]]],
-            [[[[1, 2], [7, 8]]]]
-        ]]], dtype=np.float64)
-    expected_input_grad = np.array(
-        [[[
-            [[[[1, 2], [3, 4]]]],
-            [[[[5, 6], [7, 8]]]]
-        ]]], dtype=np.float64)
-    with self.cached_session():
-      self.assertAllEqual(expected_updates_grad, updates_grad.eval())
-      if self.non_aliasing_add_test:
-        self.assertAllEqual(expected_input_grad, input_grad.eval())
+    for dtype in GRADIENT_TESTS_DTYPES:
+      indices = constant_op.constant(
+          [[[[[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]],
+             [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]]]],
+          dtype=dtypes.int32)
+      updates = constant_op.constant(
+          [[[[[[[5, 6], [2, 4]]]], [[[[1, 3], [6, 8]]]]]]], dtype=dtype)
+      shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32)
+      input_ = array_ops.zeros(shape, dtype=dtype)
+      outputs = self.scatter_nd(indices, updates, shape, input_)
+
+      grad_vals = constant_op.constant(
+          [[[[[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]]]]], dtype=dtype)
+      updates_grad, input_grad = gradients_impl.gradients(
+          [outputs], [updates, input_], [grad_vals])
+      expected_updates_grad = np.array(
+          [[[[[[[3, 4], [5, 6]]]], [[[[1, 2], [7, 8]]]]]]],
+          dtype=dtype.as_numpy_dtype())
+      expected_input_grad = np.array(
+          [[[[[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]]]]],
+          dtype=dtype.as_numpy_dtype())
+      with self.cached_session():
+        self.assertAllEqual(expected_updates_grad, updates_grad.eval())
+        if self.non_aliasing_add_test:
+          self.assertAllEqual(expected_input_grad, input_grad.eval())
 
   def testScatterNdRepatedIndicesAdd(self):
     indices = array_ops.zeros([100000, 1], dtypes.int32)
-- 
GitLab


From 3f9564a8b901c94eab2a21a764d8e177a45af12f Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 22 Oct 2018 14:12:18 -0700
Subject: [PATCH 014/873] Merging confusion_matrix naming

---
 tensorflow/python/ops/confusion_matrix.py             | 3 +--
 tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt | 4 ----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py
index 8259142456..3c55ae68ac 100644
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@@ -90,8 +90,7 @@ def remove_squeezable_dimensions(
     return labels, predictions
 
 
-@tf_export('train.confusion_matrix', 'confusion_matrix')
-@deprecation.deprecated_endpoints('confusion_matrix')
+@tf_export('confusion_matrix')
 def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32,
                      name=None, weights=None):
   """Computes the confusion matrix from predictions and labels.
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
index 45c81fdd3b..9f35395284 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt
@@ -272,10 +272,6 @@ tf_module {
     name: "checkpoint_exists"
     argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "confusion_matrix"
-    argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'int32\'>\", \'None\', \'None\'], "
-  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
-- 
GitLab


From 9fa2e774d3aa3f53592cf5e0d3fe26cb40e3d6a1 Mon Sep 17 00:00:00 2001
From: Anna Revinskaya <annarev@google.com>
Date: Mon, 22 Oct 2018 18:47:25 -0700
Subject: [PATCH 015/873] Removed unused import

---
 tensorflow/python/ops/confusion_matrix.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py
index 3c55ae68ac..c09154129f 100644
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@@ -26,7 +26,6 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
-from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-- 
GitLab


From 185ae29da792ee8d42fa153e819c75787717174e Mon Sep 17 00:00:00 2001
From: Anna Revinskaya <annarev@google.com>
Date: Mon, 22 Oct 2018 19:06:15 -0700
Subject: [PATCH 016/873] Update V2 golden as well

---
 tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index 7e980fe44d..cb6da5088b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -252,10 +252,6 @@ tf_module {
     name: "checkpoint_exists"
     argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "confusion_matrix"
-    argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'int32\'>\", \'None\', \'None\'], "
-  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
-- 
GitLab


From da1b48ddd04875995098f3c5c3fe0740b72518b8 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Wed, 24 Oct 2018 16:39:49 -0700
Subject: [PATCH 017/873] Declare that stateless random ops are not
 differentiable in C++ code. (#23227)

PiperOrigin-RevId: 215935319
---
 tensorflow/core/BUILD                        |  1 +
 tensorflow/core/ops/stateless_random_grad.cc | 23 ++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 tensorflow/core/ops/stateless_random_grad.cc

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 6a3ee3c1cb..900a0e11c4 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1242,6 +1242,7 @@ cc_library(
     srcs = [
         "ops/math_grad.cc",
         "ops/random_grad.cc",
+        "ops/stateless_random_grad.cc",
     ],
     linkstatic = 1,  # Needed since alwayslink is broken in bazel b/27630669
     visibility = ["//visibility:public"],
diff --git a/tensorflow/core/ops/stateless_random_grad.cc b/tensorflow/core/ops/stateless_random_grad.cc
new file mode 100644
index 0000000000..331e1d0152
--- /dev/null
+++ b/tensorflow/core/ops/stateless_random_grad.cc
@@ -0,0 +1,23 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/function.h"
+
+namespace tensorflow {
+REGISTER_OP_NO_GRADIENT("StatelessRandomUniform");
+REGISTER_OP_NO_GRADIENT("StatelessRandomNormal");
+REGISTER_OP_NO_GRADIENT("StatelessTruncatedNormal");
+REGISTER_OP_NO_GRADIENT("StatelessMultinomial");
+}  // end namespace tensorflow
-- 
GitLab


From e72c9ebe78a119715541f40ea99b1a8c89639968 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Wed, 24 Oct 2018 17:46:03 -0700
Subject: [PATCH 018/873] 1.12.0-rc2 cherry-pick request: Various XLA scatter
 improvements. (#23235)

* [XLA] Update Tf2Xla bridge to use Scatter HLO.

PiperOrigin-RevId: 215687800

* [XLA:GPU] Add an implementation of scatter for GPU

This simple has a kernel that runs on every element of the updates tensor,
figure out the right indices to perform the update, and applies it with an
atomic operation.

Currently we emit a CAS for plain (i.e. non-add) updates, which is inefficient.
Also TuplePointsToAnalysis doesn't know that it should alias the operand and
output buffers of a scatter, which would avoid a copy.

PiperOrigin-RevId: 216412467

* [XLA] Allow scatter to share the operand buffer with the output

This avoids a copy.

PiperOrigin-RevId: 216437329

* [XLA:GPU] Elide the SequentialThunk when emitting scatter with no copy

We have a 1-element thunk sequence if we're not copying. That's still two
thunks and hlo profiling gets confused if it sees two thunks for the same
instruction and one of them claims to be the whole instruction.

PiperOrigin-RevId: 216448063

* [XLA:GPU] Allow input fusion into scatter

We fuse everything into the scatter now, and emit two kernels. The first kernel
fills the output buffer with the computation fused into the scatter operand.
The second kernel is a regular scatter, which also contains the fused
operations from the updates and scatter_indices inputs.

PiperOrigin-RevId: 216624225

* [XLA:GPU] Adding a test case for Scatter where GPU implementation fails.

PiperOrigin-RevId: 216798034

* [XLA:GPU] Fix scatter oob check computation

This was comparing the index after adding it to the window, and then comparing
against the window dimension. This means that the bounds check was only correct
for the first element of a window. Instead compare the scatter index, which is
the same for all elements of a window.

PiperOrigin-RevId: 216921512

* [XLA:GPU] Elide tuple roots of the entry computation

The tuple buffer is never read, so stop emitting code to fill it. A typical
root tuple consists of a H2D memcpy and a host callback, both of which are
somewhat slow.

This helps tiny models and inference benchmarks, where the host/device syncs
can be a significant part of the runtime of the entire computation.

PiperOrigin-RevId: 216968475
---
 tensorflow/compiler/tf2xla/lib/scatter.cc     | 213 +++++++++-------
 tensorflow/compiler/tf2xla/lib/scatter.h      |   6 +-
 tensorflow/compiler/xla/client/xla_builder.cc |   3 +
 tensorflow/compiler/xla/service/gpu/BUILD     |   1 -
 .../xla/service/gpu/instruction_fusion.cc     |   9 +-
 .../service/gpu/instruction_fusion_test.cc    |  39 +++
 .../xla/service/gpu/ir_emitter_unnested.cc    | 241 +++++++++++++++++-
 .../xla/service/gpu/ir_emitter_unnested.h     |   9 +
 .../xla/service/gpu/nvptx_compiler.cc         |   3 -
 .../xla/service/hlo_dataflow_analysis.cc      |   1 +
 .../xla/service/hlo_dataflow_analysis_test.cc |  38 +++
 .../compiler/xla/service/hlo_matchers.h       |   1 +
 tensorflow/compiler/xla/service/hlo_module.cc |   3 +-
 tensorflow/compiler/xla/service/inliner.cc    |  32 ++-
 .../compiler/xla/service/inliner_test.cc      |  30 +++
 .../compiler/xla/service/layout_assignment.cc |   2 +-
 .../xla/service/tuple_points_to_analysis.cc   |   1 +
 .../service/tuple_points_to_analysis_test.cc  |  38 +++
 tensorflow/compiler/xla/tests/scatter_test.cc |  62 +++++
 19 files changed, 613 insertions(+), 119 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc
index 38dfde165d..2b1c2ced92 100644
--- a/tensorflow/compiler/tf2xla/lib/scatter.cc
+++ b/tensorflow/compiler/tf2xla/lib/scatter.cc
@@ -38,12 +38,10 @@ xla::StatusOr<xla::XlaOp> XlaScatter(
         combiner,
     xla::XlaBuilder* builder) {
   TF_ASSIGN_OR_RETURN(xla::Shape buffer_shape, builder->GetShape(buffer));
-  TF_RETURN_IF_ERROR(builder->GetShape(updates).status());
+  TF_ASSIGN_OR_RETURN(xla::Shape updates_shape, builder->GetShape(updates));
   TF_ASSIGN_OR_RETURN(xla::Shape indices_shape, builder->GetShape(indices));
   absl::Span<const int64> indices_dims =
       xla::AsInt64Slice(indices_shape.dimensions());
-  absl::Span<const int64> buffer_dims =
-      xla::AsInt64Slice(buffer_shape.dimensions());
 
   // If the indices are N-dimensional, the minor dimension of indices contains
   // the indices to update. Otherwise the indices are all scalars.
@@ -81,104 +79,129 @@ xla::StatusOr<xla::XlaOp> XlaScatter(
     }
   }
 
-  // Shape of the non-indexed dimensions of the buffer.
-  std::vector<int64> buffer_shape_post_axes(
-      buffer_dims.begin() + num_index_dims, buffer_dims.end());
-
-  // Flatten the major dimensions of indices and updates into a single dimension
-  // for ease of iteration.
-  std::vector<int64> flat_indices_shape({num_indices});
-  if (indices_are_vectors) {
-    flat_indices_shape.push_back(num_index_dims);
+  // Example of a 1-D scatter that updates two [3,1] tensors in a tensor of
+  // shape [3,3]:
+  // NOTE: ***This case will not be generated by any of the tf.scatter ops.***
+  //
+  //   operand = s32[3,3] parameter(0)
+  //   indices = s32[2] parameter(1)
+  //   updates = s32[3,2] parameter(2)
+  //   scatter = s32[3,3] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={0},
+  //       inserted_window_dims={1},
+  //       scatter_dims_to_operand_dims={1},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of a 1-D scatter that updates two [1,3] tensors in a tensor of
+  // shape [3,3]:
+  //
+  //   operand = s32[3,3] parameter(0)
+  //   indices = s32[2] parameter(1)
+  //   updates = s32[2,3] parameter(2)
+  //   scatter = s32[3,3] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={1},
+  //       inserted_window_dims={0},
+  //       scatter_dims_to_operand_dims={0},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of an N-D scatter updating slices of shape [1,1,2] in a tensor of
+  // shape [3,3,2]
+  //
+  //   operand = s32[3,3,2] parameter(0)
+  //   indices = s32[2,2] parameter(1)
+  //   updates = s32[2,2] parameter(2)
+  //   scatter = s32[3,3,2] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={1},
+  //       inserted_window_dims={0,1},
+  //       scatter_dims_to_operand_dims={0,1},
+  //       index_vector_dim=1
+  //
+  //
+  // Example of a scatter updating slices of shape [] in a tensor of shape [1,1]
+  //
+  //   operand = s32[1,1] parameter(0)
+  //   indices = s32[1] parameter(1)
+  //   updates = s32[1] parameter(2)
+  //   scatter = s32[1,1] scatter(operand, indices, updates),
+  //       to_apply=update_computation,
+  //       update_window_dims={},
+  //       inserted_window_dims={0,1},
+  //       scatter_dims_to_operand_dims={0},
+  //       index_vector_dim=1
+  // Note that updates operand would be broadcasted into [1] in this case.
+  //
+
+  xla::ScatterDimensionNumbers dim_numbers;
+  dim_numbers.set_index_vector_dim(indices_are_vectors
+                                       ? indices_shape.dimensions_size() - 1
+                                       : indices_shape.dimensions_size());
+
+  int64 updates_rank = xla::ShapeUtil::Rank(updates_shape);
+  int64 buffer_rank = xla::ShapeUtil::Rank(buffer_shape);
+  int64 num_window_dims_in_updates = buffer_rank - num_index_dims;
+
+  // If the rank of `updates` is 0 and does not match the expected rank of
+  // updates, broadcast `updates` to the expected shape of updates.
+  auto new_updates = updates;
+  std::vector<int64> expected_updates_dims(indices_dims.begin(),
+                                           indices_dims.end());
+  for (int64 dim = num_index_dims; dim < buffer_rank; ++dim) {
+    expected_updates_dims.push_back(buffer_shape.dimensions(dim));
+  }
+  int64 expected_updates_rank = expected_updates_dims.size();
+  if (updates_rank == 0 && expected_updates_rank != 0) {
+    new_updates = xla::Broadcast(updates, expected_updates_dims);
+    TF_ASSIGN_OR_RETURN(updates_shape, builder->GetShape(new_updates));
+    updates_rank = xla::ShapeUtil::Rank(updates_shape);
   }
 
-  std::vector<int64> flat_updates_shape({num_indices});
-  flat_updates_shape.insert(flat_updates_shape.end(),
-                            buffer_shape_post_axes.begin(),
-                            buffer_shape_post_axes.end());
-
-  // Construct the initial values of the loop-carried Tensors.
-  auto flat_indices = xla::Reshape(indices, flat_indices_shape);
-  auto flat_updates = xla::Reshape(updates, flat_updates_shape);
-  auto init = {flat_indices, flat_updates, buffer};
-
-  // Constructs the loop body. The implementation of scatter is essentially:
-  // for i in range(num_indices):
-  //   index = dynamic-slice(indices, i)
-  //   update = dynamic-slice(updates, i)
-  //   buffer = dynamic-update-slice(buffer, update, index)
-  auto body_fn = [&](xla::XlaOp i, absl::Span<const xla::XlaOp> loop_vars,
-                     xla::XlaBuilder* body_builder) {
-    auto indices = loop_vars[0];
-    auto updates = loop_vars[1];
-    auto buffer = loop_vars[2];
-
-    auto zero_index = xla::ConstantLiteral(
-        body_builder, xla::LiteralUtil::Zero(indices_shape.element_type()));
-
-    // Slice the i-th index from the indices array.
-    xla::XlaOp index;
-    auto indices_offset = xla::Reshape(i, {1});
-    if (indices_are_vectors) {
-      indices_offset = xla::Pad(indices_offset, zero_index,
-                                xla::MakeEdgePaddingConfig({{0, 1}}));
-
-      index = xla::DynamicSlice(indices, indices_offset, {1, num_index_dims});
-      index = xla::Collapse(index, {0, 1});
-    } else {
-      index = xla::DynamicSlice(indices, indices_offset, {1});
+  if (updates_rank > 0) {
+    for (int64 i = (updates_rank - num_window_dims_in_updates);
+         i < updates_rank; ++i) {
+      dim_numbers.add_update_window_dims(i);
     }
+  }
 
-    // Discard updates with negative indices, since some users expect this.
-    auto index_in_range = xla::ReduceAll(
-        xla::Le(zero_index, index), xla::ConstantR0<bool>(body_builder, true),
-        xla::CreateScalarAndComputation(xla::PRED, body_builder));
-
-    // Make the index in bounds to prevent implementation defined behavior.
-    index = xla::Max(index, zero_index);
-    index = xla::Pad(
-        index, zero_index,
-        xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}}));
-
-    // Slice the i-th index from the updates array.
-    auto updates_offset = xla::Reshape(i, {1});
-    updates_offset = xla::Pad(
-        updates_offset, zero_index,
-        xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}}));
-    std::vector<int64> flat_updates_slice_shape({1});
-    flat_updates_slice_shape.insert(flat_updates_slice_shape.end(),
-                                    buffer_shape_post_axes.begin(),
-                                    buffer_shape_post_axes.end());
-    auto update =
-        xla::DynamicSlice(updates, updates_offset, flat_updates_slice_shape);
-
-    // Unflatten the major (iteration) dimensions of the slice to their
-    // original shape.
-    std::vector<int64> updates_slice_shape(num_index_dims, 1);
-    updates_slice_shape.insert(updates_slice_shape.end(),
-                               buffer_shape_post_axes.begin(),
-                               buffer_shape_post_axes.end());
-    update = xla::Reshape(update, updates_slice_shape);
-
-    // Apply the update to the buffer. If there is a combiner, use it to merge
-    // the current values with the update.
-    auto current_value = xla::DynamicSlice(buffer, index, updates_slice_shape);
+  for (int64 i = 0; i < num_index_dims; ++i) {
+    dim_numbers.add_inserted_window_dims(i);
+    dim_numbers.add_scatter_dims_to_operand_dims(i);
+  }
+
+  // Build the combiner computation.
+  xla::XlaComputation combiner_computation;
+  {
+    xla::XlaBuilder cb("scatter-combiner");
+    auto xla_scalar_shape =
+        xla::ShapeUtil::MakeShape(buffer_shape.element_type(), {});
+    auto p0 = xla::Parameter(&cb, 0, xla_scalar_shape, "p0");
+    auto p1 = xla::Parameter(&cb, 1, xla_scalar_shape, "p1");
     if (combiner) {
-      update = combiner(current_value, update, body_builder);
+      combiner(p0, p1, &cb);
     }
-    // Use the current value instead of the update if the index is out of
-    // bounds.
-    update = xla::Select(index_in_range, update, current_value);
-    // Apply the update.
-    buffer = xla::DynamicUpdateSlice(buffer, update, index);
-
-    return std::vector<xla::XlaOp>{indices, updates, buffer};
-  };
-
-  TF_ASSIGN_OR_RETURN(auto outputs,
-                      XlaForEachIndex(num_indices, indices_shape.element_type(),
-                                      body_fn, init, "scatter", builder));
-  return outputs[2];
+    combiner_computation = cb.Build().ConsumeValueOrDie();
+  }
+
+  VLOG(3) << "Scatter op:";
+  VLOG(3) << "  Input: " << xla::ShapeUtil::HumanString(buffer_shape);
+  VLOG(3) << "  Indices: " << xla::ShapeUtil::HumanString(indices_shape);
+  VLOG(3) << "  Updates: " << xla::ShapeUtil::HumanString(updates_shape);
+  VLOG(3) << "  Scatter Dimension Numbers: ";
+  VLOG(3) << "    index_vector_dim: " << dim_numbers.index_vector_dim();
+  VLOG(3) << "    update_window_dims: ["
+          << absl::StrJoin(dim_numbers.update_window_dims(), ",") << "]";
+  VLOG(3) << "    inserted_window_dims: ["
+          << absl::StrJoin(dim_numbers.inserted_window_dims(), ",") << "]";
+  VLOG(3) << "    scatter_dims_to_operand_dims: ["
+          << absl::StrJoin(dim_numbers.scatter_dims_to_operand_dims(), ",")
+          << "]";
+
+  return xla::Scatter(buffer, indices, new_updates, combiner_computation,
+                      dim_numbers);
 }
 
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/lib/scatter.h b/tensorflow/compiler/tf2xla/lib/scatter.h
index 13a5f1b850..4cf478c4b9 100644
--- a/tensorflow/compiler/tf2xla/lib/scatter.h
+++ b/tensorflow/compiler/tf2xla/lib/scatter.h
@@ -34,7 +34,11 @@ namespace tensorflow {
 // Otherwise, `indices_are_vectors`, then indices are multidimensional and the
 // minor dimension of `indices` represents a vector of indices.
 //
-// If any indices are negative, the corresponding update is discarded.
+// If `updates` is a scalar, then it will be broadcasted into the expected shape
+// of updates.
+//
+// If any part of the update region is out-of-bounds, the corresponding update
+// is discarded.
 //
 // If a `combiner` is provided, updates are combined with the existing values in
 // the buffer using the combiner function. Otherwise, the updates replace the
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index e0ec91dba1..d196252db1 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -208,6 +208,9 @@ void XlaBuilder::IsConstantVisitor(const int64 op_handle,
     case HloOpcode::kWhile:
       // TODO(b/32495713): We aren't checking the condition and body
       // computations themselves.
+    case HloOpcode::kScatter:
+      // TODO(b/32495713): We aren't checking the embedded computation in
+      // Scatter.
     case HloOpcode::kSend:
     case HloOpcode::kRecv:
     case HloOpcode::kParameter:
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index a838464cae..dde0cc7459 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -704,7 +704,6 @@ cc_library(
         "//tensorflow/compiler/xla/service:llvm_compiler",
         "//tensorflow/compiler/xla/service:reduce_precision_insertion",
         "//tensorflow/compiler/xla/service:reshape_mover",
-        "//tensorflow/compiler/xla/service:scatter_expander",
         "//tensorflow/compiler/xla/service:transpose_folding",
         "//tensorflow/compiler/xla/service:tuple_simplifier",
         "//tensorflow/compiler/xla/service:while_loop_constant_sinking",
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
index b61f038739..1d66787d89 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
@@ -47,6 +47,7 @@ bool IsFusible(const HloInstruction& hlo) {
          hlo.opcode() == HloOpcode::kReduce ||
          hlo.opcode() == HloOpcode::kReduceWindow ||
          hlo.opcode() == HloOpcode::kReshape ||
+         hlo.opcode() == HloOpcode::kScatter ||
          hlo.opcode() == HloOpcode::kSlice ||
          hlo.opcode() == HloOpcode::kTranspose;
 }
@@ -223,6 +224,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
     return false;
   }
 
+  // Scatter is only supported at the root of a kInput fusion.
+  if (producer->opcode() == HloOpcode::kScatter) {
+    return false;
+  }
+
   // Do not fuse into reduce input fusions if the resulting kernel would suffer
   // from poor data locality (due to unfriendly input layouts).
   if (IsInputFusibleReduction(*consumer) &&
@@ -285,7 +291,8 @@ bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer,
 
 HloInstruction::FusionKind GpuInstructionFusion::ChooseKind(
     const HloInstruction* producer, const HloInstruction* consumer) {
-  if (IsReductionToVector(*consumer)) {
+  if (IsReductionToVector(*consumer) ||
+      consumer->opcode() == HloOpcode::kScatter) {
     return HloInstruction::FusionKind::kInput;
   }
   if (producer->opcode() == HloOpcode::kDot ||
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
index 96bfe0c12e..fd9b7cee80 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc
@@ -709,5 +709,44 @@ TEST_F(InstructionFusionTest, AvoidsLargeFusion) {
   }
 }
 
+TEST_F(InstructionFusionTest, FuseIntoScatter) {
+  auto module = ParseHloString(R"(
+    HloModule test_module
+
+    add {
+      lhs = f32[] parameter(0)
+      rhs = f32[] parameter(1)
+      ROOT add = f32[] add(lhs, rhs)
+    }
+
+    ENTRY FuseIntoScatter {
+      p0 = s32[3,3] parameter(0)
+      operand = s32[3,3] add(p0, p0)
+      p1 = s32[2] parameter(1)
+      indices = s32[2] add(p1, p1)
+      p2 = s32[2,3] parameter(2)
+      updates = s32[2,3] add(p2, p2)
+      scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=add,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+      ROOT add = s32[3,3] add(scatter, scatter)
+    })")
+                    .ValueOrDie();
+
+  EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true)
+                  .Run(module.get())
+                  .ValueOrDie());
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, op::Add(op::Fusion(), op::Fusion()));
+  EXPECT_EQ(root->operand(0)->fusion_kind(),
+            HloInstruction::FusionKind::kInput);
+  EXPECT_THAT(root->operand(0)->fused_expression_root(),
+              op::Scatter(op::Add(), op::Add(), op::Add()));
+}
+
 }  // namespace gpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index c792dd2ddb..2951f7a65f 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -493,13 +493,68 @@ Status IrEmitterUnnested::HandleFft(HloInstruction* fft) {
 
 Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
   HloInstruction* root = fusion->fused_expression_root();
-  // HandleFusion specializes reduction from a multi-dimensional array to a 1D
-  // array. The specialized version requires a initializer thunk that
-  // initializes the output array to the initial value of the reduce.
   if (HloInstruction::FusionKind::kInput == fusion->fusion_kind()) {
     switch (root->opcode()) {
+      case HloOpcode::kScatter: {
+        std::vector<std::unique_ptr<Thunk>> thunks;
+        // The initialization from 'operand' is using different loop bounds, so
+        // emit it in a separate kernel. Treat it like a loop fusion, writing to
+        // the output buffer.
+        {
+          int unroll_factor = ComputeMaxUnrollFactor(fusion);
+          thunks.push_back(BuildKernelThunk(
+              fusion, /*implements_whole_instruction=*/false, unroll_factor));
+
+          std::vector<IrArray> operand_parameter_arrays;
+          for (HloInstruction* operand : fusion->operands()) {
+            operand_parameter_arrays.push_back(GetIrArray(*operand, *fusion));
+          }
+          GpuElementalIrEmitter operand_elemental_emitter(
+              hlo_module_config_, ir_emitter_context_->llvm_module(), &b_,
+              GetNestedComputer());
+          FusedIrEmitter operand_fused_emitter(operand_parameter_arrays,
+                                               &operand_elemental_emitter);
+          TF_RETURN_IF_ERROR(
+              root->mutable_operand(0)->Accept(&operand_fused_emitter));
+
+          TF_RETURN_IF_ERROR(EmitTargetElementLoopInThunk(
+              *fusion, operand_fused_emitter.GetGenerator(root->operand(0)),
+              static_cast<KernelThunk*>(thunks.back().get())));
+        }
+
+        // Now build the actual scatter, reading and writing to the freshly
+        // filled output buffer.
+        {
+          thunks.push_back(
+              BuildKernelThunk(fusion,
+                               /*implements_whole_instruction=*/false));
+          // Spin up a new fused emitter for the scatter kernel and emit it.
+          std::vector<IrArray> scatter_parameter_arrays;
+          for (HloInstruction* operand : fusion->operands()) {
+            scatter_parameter_arrays.push_back(GetIrArray(*operand, *fusion));
+          }
+          GpuElementalIrEmitter scatter_elemental_emitter(
+              hlo_module_config_, ir_emitter_context_->llvm_module(), &b_,
+              GetNestedComputer());
+          FusedIrEmitter scatter_fused_emitter(scatter_parameter_arrays,
+                                               &scatter_elemental_emitter);
+          TF_RETURN_IF_ERROR(root->Accept(&scatter_fused_emitter));
+          TF_RETURN_IF_ERROR(EmitScatter(
+              thunks.back().get(), root,
+              /*scatter_indices_gen=*/
+              scatter_fused_emitter.GetGenerator(root->operand(1)),
+              /*updates_gen=*/
+              scatter_fused_emitter.GetGenerator(root->operand(2))));
+        }
+        thunk_sequence_->emplace_back(
+            absl::make_unique<SequentialThunk>(std::move(thunks), fusion));
+        return Status::OK();
+      }
       case HloOpcode::kTuple:
       case HloOpcode::kReduce: {
+        // HandleFusion specializes reduction from a multi-dimensional array to
+        // a 1D array. The specialized version requires a initializer thunk that
+        // initializes the output array to the initial value of the reduce.
         if (root->opcode() == HloOpcode::kReduce &&
             ShapeUtil::IsTuple(root->shape())) {
           // TODO(b/112040122): Support variadic reduce.
@@ -1672,6 +1727,14 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) {
 }
 
 Status IrEmitterUnnested::HandleTuple(HloInstruction* tuple) {
+  // For the root node of the entry computation we can elide writing the tuple
+  // buffer. We can always figure out the contents of the tuples from buffer
+  // assignment because we insert copies to ensure non-ambiguous output buffers.
+  // GpuExecutable never reads the tuple buffer.
+  if (tuple ==
+      tuple->parent()->parent()->entry_computation()->root_instruction()) {
+    return Status::OK();
+  }
   bool all_tuple_elements_have_buffer =
       absl::c_all_of(tuple->operands(), [&](HloInstruction* tuple_element) {
         return ir_emitter_context_->buffer_assignment()
@@ -1958,6 +2021,178 @@ Status IrEmitterUnnested::HandleRng(HloInstruction* rng) {
   return Status::OK();
 }
 
+Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) {
+  const HloInstruction* operand = scatter->operand(0);
+  const HloInstruction* scatter_indices = scatter->operand(1);
+  const HloInstruction* updates = scatter->operand(2);
+
+  std::vector<std::unique_ptr<Thunk>> thunks;
+
+  // Copy the operand into the output if it's not the same buffer already.
+  auto operand_buffer = GetAllocationSlice(*operand);
+  auto destination_buffer = GetAllocationSlice(*scatter);
+  if (operand_buffer != destination_buffer) {
+    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
+        /*source_address=*/operand_buffer,
+        /*destination_buffer=*/destination_buffer,
+        /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter));
+  }
+
+  thunks.push_back(
+      BuildKernelThunk(scatter,
+                       /*implements_whole_instruction=*/thunks.empty()));
+
+  TF_RETURN_IF_ERROR(
+      EmitScatter(thunks.back().get(), scatter,
+                  /*scatter_indices_gen=*/
+                  [=](const IrArray::Index& index) {
+                    return GetIrArray(*scatter_indices, *scatter)
+                        .EmitReadArrayElement(index, &b_, "scatter_index");
+                  },
+                  /*updates_gen=*/
+                  [=](const IrArray::Index& index) {
+                    return GetIrArray(*updates, *scatter)
+                        .EmitReadArrayElement(index, &b_, "update");
+                  }));
+
+  // Elide the sequential thunk if there's no copy.
+  if (thunks.size() == 1) {
+    thunk_sequence_->push_back(std::move(thunks[0]));
+  } else {
+    thunk_sequence_->emplace_back(
+        absl::make_unique<SequentialThunk>(std::move(thunks), scatter));
+  }
+  return Status::OK();
+}
+
+Status IrEmitterUnnested::EmitScatter(
+    Thunk* thunk, HloInstruction* scatter,
+    const llvm_ir::ElementGenerator& scatter_indices_gen,
+    const llvm_ir::ElementGenerator& updates_gen) {
+  const HloInstruction* operand = scatter->operand(0);
+  const HloInstruction* scatter_indices = scatter->operand(1);
+  const HloInstruction* updates = scatter->operand(2);
+  const ScatterDimensionNumbers& dim_numbers =
+      scatter->scatter_dimension_numbers();
+  CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape()));
+
+  auto loop_body_emitter = [&](const IrArray::Index& index) -> Status {
+    std::vector<llvm::Value*> raw_window_multidim;
+    std::vector<llvm::Value*> input_scatter_multidim;
+    std::vector<int64> raw_window_bounds;
+
+    // Partition the index into window indices and scatter indices.
+    for (int64 i = 0, e = index.size(); i != e; ++i) {
+      // For window indices also remember the window size, this comes in handy
+      // later.
+      if (absl::c_binary_search(dim_numbers.update_window_dims(), i)) {
+        raw_window_multidim.push_back(index[i]);
+        raw_window_bounds.push_back(updates->shape().dimensions(i));
+      } else {
+        input_scatter_multidim.push_back(index[i]);
+      }
+    }
+    DCHECK_EQ(raw_window_multidim.size(),
+              dim_numbers.update_window_dims_size());
+
+    // Apply inserted_window_dims to the window dimensions.
+    int64 raw_window_multidim_idx = 0;
+    std::vector<llvm::Value*> input_window_multidim;
+    std::vector<int64> input_window_bounds;
+    for (int64 i = 0, e = ShapeUtil::Rank(operand->shape()); i != e; ++i) {
+      if (absl::c_binary_search(dim_numbers.inserted_window_dims(), i)) {
+        input_window_bounds.push_back(1);  // Trivial dimension.
+        input_window_multidim.push_back(index.GetConstantWithIndexType(0));
+      } else {
+        input_window_bounds.push_back(
+            raw_window_bounds[raw_window_multidim_idx]);
+        input_window_multidim.push_back(
+            raw_window_multidim[raw_window_multidim_idx]);
+        ++raw_window_multidim_idx;
+      }
+    }
+    DCHECK_EQ(input_window_multidim.size(), ShapeUtil::Rank(operand->shape()));
+
+    // Insert a 1 dimension at the end if index_vector_dim requests one.
+    Shape scatter_indices_shape = scatter_indices->shape();
+    if (dim_numbers.index_vector_dim() ==
+        ShapeUtil::Rank(scatter_indices_shape)) {
+      scatter_indices_shape.add_dimensions(1);
+      scatter_indices_shape.mutable_layout()->add_minor_to_major(
+          dim_numbers.index_vector_dim());
+    }
+
+    // Now load the indices corresponding to the current window from
+    // scatter_indices.
+    llvm_ir::IrArray::Index raw_scatter_index_index(input_scatter_multidim,
+                                                    index.GetType());
+    raw_scatter_index_index.InsertAt(dim_numbers.index_vector_dim(), nullptr);
+    llvm::Value* is_in_bounds = b_.getTrue();
+    for (int64 i = 0, e = dim_numbers.scatter_dims_to_operand_dims_size();
+         i != e; ++i) {
+      // Our index is stored along index_vector_dim, insert that into the lookup
+      // index into scatter_indices.
+      raw_scatter_index_index[dim_numbers.index_vector_dim()] =
+          raw_scatter_index_index.GetConstantWithIndexType(i);
+
+      int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i);
+      TF_ASSIGN_OR_RETURN(
+          llvm::Value* const loaded_scatter_index,
+          scatter_indices_gen(raw_scatter_index_index.SourceIndexOfReshape(
+              scatter_indices_shape, scatter_indices->shape(), &b_)));
+      // And add the index to our window index. This yields the output index.
+      llvm::Value* casted_scatter_index =
+          IntCast(loaded_scatter_index, index.GetType(),
+                  /*isSigned=*/true);
+      llvm::Value* dim_offset =
+          Add(input_window_multidim[operand_dim], casted_scatter_index);
+      input_window_multidim[operand_dim] = dim_offset;
+
+      // Also do the bounds check now.
+      int64 max_index = operand->shape().dimensions(operand_dim) -
+                        input_window_bounds[operand_dim] + 1;
+      // is_in_bounds = index >= 0 && index < dim_size-window_size+1
+      //   --> index u< dim_size-window_size+1
+      is_in_bounds =
+          And(is_in_bounds, ICmpULT(casted_scatter_index,
+                                    index.GetConstantWithIndexType(max_index)));
+    }
+
+    llvm_ir::LlvmIfData if_window_in_bounds_data = llvm_ir::EmitIfThenElse(
+        is_in_bounds, "scatter.in_bounds", &b_, /*emit_else=*/false);
+    llvm_ir::SetToFirstInsertPoint(if_window_in_bounds_data.true_block, &b_);
+    // All done, now just read from the calculated input from the window, and do
+    // an atomic store to the calculated location in the output.
+    llvm_ir::IrArray::Index input_window_index(input_window_multidim,
+                                               index.GetType());
+    HloInstruction* output_hlo =
+        scatter->IsFused() ? scatter->parent()->FusionInstruction() : scatter;
+    llvm::Value* output_address =
+        GetIrArray(*output_hlo, *output_hlo)
+            .EmitArrayElementAddress(input_window_index, &b_);
+    llvm::Value* input_address = Alloca(llvm_ir::PrimitiveTypeToIrType(
+        updates->shape().element_type(), module_));
+    TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, updates_gen(index));
+    Store(input_ir_value, input_address);
+    return EmitAtomicOperationForNestedComputation(
+        *scatter->to_apply(), output_address, input_address);
+  };
+
+  // Launch a kernel that reads every element in the updates tensor. We could
+  // also do one kernel per window instead if bounds checks turn out to be a
+  // bottleneck.
+  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
+      updates->shape(), ir_emitter_context_->device_description());
+  UpdateLaunchDimensions(launch_dimensions, thunk,
+                         ir_emitter_context_->llvm_module());
+
+  return ParallelLoopEmitter(loop_body_emitter, updates->shape(),
+                             launch_dimensions, &b_)
+      .EmitLoop(IrName(scatter),
+                GetIndexTypeForKernel(scatter, launch_dimensions.launch_bound(),
+                                      &b_));
+}
+
 Status IrEmitterUnnested::HandleSelect(HloInstruction* select) {
   thunk_sequence_->push_back(
       BuildKernelThunk(select, /*implements_whole_instruction=*/true));
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index bd5db72051..93f11c069a 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -76,6 +76,7 @@ class IrEmitterUnnested : public IrEmitter {
   Status HandleInfeed(HloInstruction* xla_infeed) override;
   Status HandleOutfeed(HloInstruction* outfeed) override;
   Status HandleRng(HloInstruction* random) override;
+  Status HandleScatter(HloInstruction* scatter) override;
   Status HandleSelect(HloInstruction* select) override;
   Status HandleSort(HloInstruction* sort) override;
   Status HandleTupleSelect(HloInstruction* tuple_select) override;
@@ -184,6 +185,14 @@ class IrEmitterUnnested : public IrEmitter {
       absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
+  // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in
+  // the process. `scatter` may be fused, scatter indices are taken from
+  // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is
+  // expected to have the operand values in it already.
+  Status EmitScatter(Thunk* thunk, HloInstruction* scatter,
+                     const llvm_ir::ElementGenerator& scatter_indices_gen,
+                     const llvm_ir::ElementGenerator& updates_gen);
+
   // Returns true if a 0-2-1 tiling algorithm is already used to emit the kernel
   // for the hlo instruction.
   bool CheckAndEmitHloWithTile021(HloInstruction* hlo);
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index b4ae2e42c7..89c5f2b128 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -75,7 +75,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h"
 #include "tensorflow/compiler/xla/service/reshape_mover.h"
-#include "tensorflow/compiler/xla/service/scatter_expander.h"
 #include "tensorflow/compiler/xla/service/transpose_folding.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h"
@@ -176,8 +175,6 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
       // elimination has to come after that pass.
       pipeline.AddPass<ZeroSizedHloElimination>();
 
-      pipeline.AddPass<ScatterExpander>();
-
       pass.AddPass<AlgebraicSimplifier>(
           /*is_layout_sensitive=*/false,
           [](const Shape&, const Shape&) { return false; });
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 44cde4a3d2..1f7d4205ab 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -1072,6 +1072,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser(
   }
 
   if (user->opcode() == HloOpcode::kDynamicUpdateSlice ||
+      user->opcode() == HloOpcode::kScatter ||
       user->opcode() == HloOpcode::kWhile) {
     // We eliminated other users in BufferLiveness::live_range_strictly_before,
     // so here we just need to check that the use is at operand index 0.
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index 510d6360a1..d27786d160 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -2283,6 +2283,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) {
       dataflow_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {}));
 }
 
+TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) {
+  const char* hlo_text = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text));
+  computation_ = module_->entry_computation();
+  RunAnalysis();
+
+  HloInstruction* operand_param = computation_->parameter_instruction(0);
+  HloInstruction* indices_param = computation_->parameter_instruction(1);
+  HloInstruction* updates_param = computation_->parameter_instruction(2);
+  HloInstruction* scatter = computation_->root_instruction();
+
+  EXPECT_TRUE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      operand_param, {}, scatter, {}));
+  EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      indices_param, {}, scatter, {}));
+  EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser(
+      updates_param, {}, scatter, {}));
+}
+
 TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) {
   auto builder = HloComputation::Builder(TestName());
 
diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h
index 5502e565b6..ab901b435a 100644
--- a/tensorflow/compiler/xla/service/hlo_matchers.h
+++ b/tensorflow/compiler/xla/service/hlo_matchers.h
@@ -216,6 +216,7 @@ HLO_MATCHER(Remainder);
 HLO_MATCHER(Reshape);
 HLO_MATCHER(Reverse);
 HLO_MATCHER(Rng);
+HLO_MATCHER(Scatter);
 HLO_MATCHER(Select);
 HLO_MATCHER(SelectAndScatter);
 HLO_MATCHER(Send);
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 7527e35c95..93e04eb3db 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -146,7 +146,8 @@ void HloModule::ReplaceComputations(
         case HloOpcode::kCall:
         case HloOpcode::kMap:
         case HloOpcode::kReduce:
-        case HloOpcode::kReduceWindow: {
+        case HloOpcode::kReduceWindow:
+        case HloOpcode::kScatter: {
           HloComputation* new_arg = tensorflow::gtl::FindWithDefault(
               replacements, instruction->to_apply(), nullptr);
           if (new_arg != nullptr) {
diff --git a/tensorflow/compiler/xla/service/inliner.cc b/tensorflow/compiler/xla/service/inliner.cc
index 5fd779ebf9..50c408f5bb 100644
--- a/tensorflow/compiler/xla/service/inliner.cc
+++ b/tensorflow/compiler/xla/service/inliner.cc
@@ -71,26 +71,23 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
   // profitability model for inlining is defined.
   if (hlo_query::AllOperandsAreParameters(root)) {
     if (root.opcode() == HloOpcode::kFusion ||
-        root.opcode() == HloOpcode::kParameter ||
         root.opcode() == HloOpcode::kTrace) {
       // Cloning not supported for these instructions.
       return Status::OK();
     }
     VLOG(10) << "inlining map({X ... Y}, op) => : op(X ... Y) with function "
              << root.ToShortString();
-    // If the input is a constant then the shape of the constant could be
-    // different than the map shape. Hence, a broadcast is needed, else the
-    // cloned operand with new shape and operands work.
-    if (root.opcode() != HloOpcode::kConstant) {
-      std::vector<HloInstruction*> params;
-      for (int64 o = 0; o < root.operands().size(); o++) {
-        params.push_back(map->operands()[root.operand(o)->parameter_number()]);
-      }
-      HloInstruction* placed_instruction = computation_->AddInstruction(
-          root.CloneWithNewOperands(map->shape(), params));
+    if (root.opcode() == HloOpcode::kParameter) {
+      // If the root is a parameter, then use the corresponding operand as the
+      // result of the computation.
       TF_RETURN_IF_ERROR(
-          computation_->ReplaceInstruction(map, placed_instruction));
-    } else {
+          map->ReplaceAllUsesWith(map->operands()[root.parameter_number()]));
+      TF_RETURN_IF_ERROR(computation_->RemoveInstruction(map));
+    } else if (root.opcode() == HloOpcode::kConstant) {
+      // If the input is a constant then the shape of the constant could be
+      // different than the map shape. Hence, a broadcast is needed, else the
+      // cloned operand with new shape and operands work.
+      //
       // The constant is in an embedded computation and needs to be recreated
       // as part of the computation that the broadcast is inserted into.
       HloInstruction* constant = computation_->AddInstruction(root.Clone());
@@ -98,6 +95,15 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) {
           HloInstruction::CreateBroadcast(map->shape(), constant, {}));
       TF_RETURN_IF_ERROR(
           computation_->ReplaceInstruction(map, placed_instruction));
+    } else {
+      std::vector<HloInstruction*> params;
+      for (int64 o = 0; o < root.operands().size(); o++) {
+        params.push_back(map->operands()[root.operand(o)->parameter_number()]);
+      }
+      HloInstruction* placed_instruction = computation_->AddInstruction(
+          root.CloneWithNewOperands(map->shape(), params));
+      TF_RETURN_IF_ERROR(
+          computation_->ReplaceInstruction(map, placed_instruction));
     }
     changed_ = true;
     return Status::OK();
diff --git a/tensorflow/compiler/xla/service/inliner_test.cc b/tensorflow/compiler/xla/service/inliner_test.cc
index 7e967f035c..98e0f2cfd7 100644
--- a/tensorflow/compiler/xla/service/inliner_test.cc
+++ b/tensorflow/compiler/xla/service/inliner_test.cc
@@ -146,6 +146,36 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) {
   EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
 }
 
+TEST_F(InlinerTest, MapParameter) {
+  Shape r0f32 = ShapeUtil::MakeShape(F32, {});
+
+  auto param_builder = HloComputation::Builder(TestName());
+  param_builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32, "p0"));
+  param_builder.AddInstruction(HloInstruction::CreateParameter(1, r0f32, "p1"));
+  auto param_f32 = param_builder.Build();
+
+  auto builder = HloComputation::Builder("MapParamFunction");
+  auto lhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1)));
+  auto rhs = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(4)));
+  builder.AddInstruction(
+      HloInstruction::CreateMap(lhs->shape(), {lhs, rhs}, param_f32.get()));
+
+  auto computation = builder.Build();
+  auto hlo_module = CreateNewVerifiedModule();
+  hlo_module->AddEmbeddedComputation(std::move(param_f32));
+  hlo_module->AddEntryComputation(std::move(computation));
+
+  Inliner inliner;
+  EXPECT_TRUE(inliner.Run(hlo_module.get()).ValueOrDie());
+  EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), rhs);
+
+  // Verify execution on CPU.
+  auto result = ExecuteAndTransfer(hlo_module->Clone(), {});
+  auto expected = LiteralUtil::CreateR0<float>(4);
+  EXPECT_TRUE(LiteralTestUtil::Equal(result, expected));
+}
 
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 395e01fb59..9ebb603ca5 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1862,6 +1862,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kRemainder:
     case HloOpcode::kReverse:
     case HloOpcode::kRoundNearestAfz:
+    case HloOpcode::kScatter:
     case HloOpcode::kSelect:
     case HloOpcode::kSelectAndScatter:
     case HloOpcode::kShiftLeft:
@@ -1899,7 +1900,6 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kReduce:
     case HloOpcode::kReshape:
     case HloOpcode::kRng:
-    case HloOpcode::kScatter:
     case HloOpcode::kSend:
     case HloOpcode::kSendDone:
     case HloOpcode::kAfterAll:
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index 6fed7c76d0..6ef6b58e50 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -771,6 +771,7 @@ bool TuplePointsToAnalysis::CanShareOperandBufferWithUser(
     }
   }
   if (user->opcode() == HloOpcode::kDynamicUpdateSlice ||
+      user->opcode() == HloOpcode::kScatter ||
       user->opcode() == HloOpcode::kWhile) {
     // We eliminated other users in BufferLiveness::live_range_strictly_before,
     // so here we just need to check that the use is at operand index 0.
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
index e9a07b14ed..a571bd571b 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
@@ -1010,6 +1010,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) {
       points_to_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {}));
 }
 
+TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) {
+  const char* hlo_text = R"(
+    HloModule TensorFlowScatterV1
+
+    update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+      lhs = s32[] parameter(0)
+      ROOT rhs = s32[] parameter(1)
+    }
+
+    ENTRY main {
+      operand = s32[3,3] parameter(0)
+      indices = s32[2] parameter(1)
+      updates = s32[2,3] parameter(2)
+      ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+          to_apply=update_s32,
+          update_window_dims={1},
+          inserted_window_dims={0},
+          scatter_dims_to_operand_dims={0},
+          index_vector_dim=1
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text));
+  computation_ = module_->entry_computation();
+  RunAnalysis();
+
+  HloInstruction* operand_param = computation_->parameter_instruction(0);
+  HloInstruction* indices_param = computation_->parameter_instruction(1);
+  HloInstruction* updates_param = computation_->parameter_instruction(2);
+  HloInstruction* scatter = computation_->root_instruction();
+
+  EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser(
+      operand_param, {}, scatter, {}));
+  EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(
+      indices_param, {}, scatter, {}));
+  EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser(
+      updates_param, {}, scatter, {}));
+}
+
 TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) {
   auto builder = HloComputation::Builder(TestName());
 
diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc
index b21dd56045..7e1f4aa0eb 100644
--- a/tensorflow/compiler/xla/tests/scatter_test.cc
+++ b/tensorflow/compiler/xla/tests/scatter_test.cc
@@ -69,6 +69,37 @@ ENTRY main {
   RunTest(hlo_text, &operand, &scatter_indices, &updates);
 }
 
+XLA_TEST_F(ScatterTest, TensorFlowScatterV1_WithFusedAdds) {
+  const string hlo_text = R"(
+HloModule TensorFlowScatterV1
+
+update_s32 (lhs: s32[], rhs: s32[]) -> s32[] {
+  lhs = s32[] parameter(0)
+  ROOT rhs = s32[] parameter(1)
+}
+
+ENTRY main {
+  p0 = s32[3,3] parameter(0)
+  operand = s32[3,3] add(p0, p0)
+  p1 = s32[2] parameter(1)
+  indices = s32[2] add(p1, p1)
+  p2 = s32[2,3] parameter(2)
+  updates = s32[2,3] add(p2, p2)
+  ROOT scatter = s32[3,3] scatter(operand, indices, updates),
+      to_apply=update_s32,
+      update_window_dims={1},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0},
+      index_vector_dim=1
+}
+)";
+  Literal operand =
+      LiteralUtil::CreateR2<int32>({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}});
+  Literal scatter_indices = LiteralUtil::CreateR1<int32>({0, 1});
+  Literal updates = LiteralUtil::CreateR2<int32>({{10, 20, 30}, {70, 80, 90}});
+  RunTest(hlo_text, &operand, &scatter_indices, &updates);
+}
+
 XLA_TEST_F(ScatterTest, TensorFlowScatterV2_Update) {
   const char* hlo_text = R"(
 HloModule TensorFlowScatterV2
@@ -98,6 +129,37 @@ ENTRY main {
   RunTest(hlo_text, &operand, &scatter_indices, &updates);
 }
 
+XLA_TEST_F(ScatterTest, SimpleR4) {
+  const char* hlo_text = R"(
+HloModule SimpleR4
+
+add_f32 (lhs: f32[], rhs: f32[]) -> f32[] {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(f32[] lhs, f32[] rhs)
+}
+
+ENTRY main {
+  operand = f32[1,2,2,1] parameter(0)
+  indices = s32[1,3] parameter(1)
+  updates = f32[1,2,2,1] parameter(2)
+  ROOT scatter = f32[1,2,2,1] scatter(operand, indices, updates),
+      to_apply=add_f32,
+      update_window_dims={1,2,3},
+      inserted_window_dims={0},
+      scatter_dims_to_operand_dims={0, 2, 1},
+      index_vector_dim=1
+}
+)";
+
+  Literal operand =
+      LiteralUtil::CreateR4<float>({{{{0.f}, {0.f}}, {{0.f}, {0.f}}}});
+  Literal updates =
+      LiteralUtil::CreateR4<float>({{{{0.12}, {0.28}}, {{0.018}, {0.42}}}});
+  Literal scatter_indices = LiteralUtil::CreateR2<int32>({{0, 0, 0}});
+  RunTest(hlo_text, &operand, &scatter_indices, &updates);
+}
+
 XLA_TEST_F(ScatterTest, TensorFlowScatter_Add) {
   const string hlo_text = R"(
 HloModule TensorFlowScatter_Add
-- 
GitLab


From e3f4d32490e9a28cba0bfa5614255dc5d517ca91 Mon Sep 17 00:00:00 2001
From: Nick Felt <nfelt@users.noreply.github.com>
Date: Wed, 24 Oct 2018 18:01:36 -0700
Subject: [PATCH 019/873] Update tensorboard dependency to 1.12.x (#23230)

Also updated tb-nightly to +1 minor version, 1.13.x.

PiperOrigin-RevId: 218582588
---
 tensorflow/tools/pip_package/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index b7eed56695..ceaa96b690 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -56,7 +56,7 @@ REQUIRED_PACKAGES = [
     'numpy >= 1.13.3',
     'six >= 1.10.0',
     'protobuf >= 3.6.1',
-    'tensorboard >= 1.11.0, < 1.12.0',
+    'tensorboard >= 1.12.0, < 1.13.0',
     'termcolor >= 1.1.0',
 ]
 
@@ -85,7 +85,7 @@ else:
 if 'tf_nightly' in project_name:
   for i, pkg in enumerate(REQUIRED_PACKAGES):
     if 'tensorboard' in pkg:
-      REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.12.0a0, < 1.13.0a0'
+      REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.13.0a0, < 1.14.0a0'
       break
 
 # weakref.finalize and enum were introduced in Python 3.4
-- 
GitLab


From 43ec5a3d6ee49eadc98835d1ab18c62cafa5043d Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Wed, 24 Oct 2018 18:29:31 -0700
Subject: [PATCH 020/873] Fix string comparison (#23237)

PiperOrigin-RevId: 218607372
---
 tensorflow/tools/ci_build/builds/configured | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/builds/configured b/tensorflow/tools/ci_build/builds/configured
index 3eee11fd7e..f8a9311918 100755
--- a/tensorflow/tools/ci_build/builds/configured
+++ b/tensorflow/tools/ci_build/builds/configured
@@ -33,7 +33,7 @@ COMMAND=("$@")
 export CI_BUILD_PYTHON="${CI_BUILD_PYTHON:-python}"
 export PYTHON_BIN_PATH="${PYTHON_BIN_PATH:-$(which ${CI_BUILD_PYTHON})}"
 # XLA currently does not build under Android, so disable it for now.
-if [[ "${CONTAINER_TYPE}" -eq 'android' ]]; then
+if [[ "${CONTAINER_TYPE}" == 'android' ]]; then
   export TF_ENABLE_XLA=0
 fi
 
-- 
GitLab


From dd9ebe12df7906a3211b8db2d21fa73c4504d118 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Thu, 25 Oct 2018 11:03:59 +0800
Subject: [PATCH 021/873] fix softmax dims error

Change-Id: I3303f368053a691787a0922098ee75e3b0c26219

Conflicts:
	tensorflow/core/kernels/mkl_softmax_op.cc
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index cfab529662..92167e06d5 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -50,8 +50,8 @@ class MklSoftmaxOp : public OpKernel {
       // src_tensor now points to the 0-th input of global data struct "context"
       size_t src_idx = 0;
       const Tensor& src_tensor = MklGetInput(context, src_idx);
-      const int input_dims = src_tensor.dims();
-
+      //const int input_dims = src_tensor.dims();
+      //  printf("input_dims = %d\n", input_dims);
       // Add: get MklShape
       MklDnnShape src_mkl_shape;
       GetMklShape(context, src_idx, &src_mkl_shape);
@@ -61,6 +61,7 @@ class MklSoftmaxOp : public OpKernel {
       auto src_tf_shape = src_mkl_shape.IsMklTensor()
                               ? src_mkl_shape.GetTfShape()
                               : src_tensor.shape();
+      const int input_dims = src_tf_shape.dims();
       auto src_dims = TFShapeToMklDnnDims(src_tf_shape);
       auto output_dims = src_dims;
       memory::format layout_type;
-- 
GitLab


From ec31b13690118d1998824ba4d350fcbc22fbfb60 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 09:22:14 -0700
Subject: [PATCH 022/873] Explicitly quote every command piece. (#23259)

PiperOrigin-RevId: 218399942
---
 third_party/repo.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/repo.bzl b/third_party/repo.bzl
index 6e30618d39..391622e237 100644
--- a/third_party/repo.bzl
+++ b/third_party/repo.bzl
@@ -26,7 +26,7 @@ def _wrap_bash_cmd(ctx, cmd):
         bazel_sh = _get_env_var(ctx, "BAZEL_SH")
         if not bazel_sh:
             fail("BAZEL_SH environment variable is not set")
-        cmd = [bazel_sh, "-l", "-c", " ".join(cmd)]
+        cmd = [bazel_sh, "-l", "-c", " ".join(["\"%s\"" % s for s in cmd])]
     return cmd
 
 def _get_env_var(ctx, name):
-- 
GitLab


From a315296d577b09eca88fe1a6cd36a13502d72067 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 10:44:18 -0700
Subject: [PATCH 023/873] Don't set TF_PER_DEVICE_MEMORY_LIMIT_MB as a
 --test_env if it isn't specified. (#23258)

PiperOrigin-RevId: 218634344
---
 .../tools/ci_build/ci_parameterized_build.sh  | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh
index bc9cb4e9a1..435ec7ca68 100755
--- a/tensorflow/tools/ci_build/ci_parameterized_build.sh
+++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh
@@ -128,8 +128,9 @@ NO_DOCKER_OPT_FLAG="--genrule_strategy=standalone"
 
 DO_DOCKER=1
 
-# Bazel uses defaults for all test sizes when given `-1`.
-TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1}
+# Default values for various settings.
+TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1}  # Use bazel defaults
+TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 
 # Helpful flags:
 # --test_summary=detailed: Tell us more about which targets are being built
@@ -144,9 +145,20 @@ TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1}
 BAZEL_TEST_FLAGS=""\
 "--test_summary=detailed --build_tests_only --keep_going "\
 "--test_timeout=${TF_BUILD_TEST_TIMEOUT} "\
-"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\
-"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\
+"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT}"
+
+# Only set these environment variables if they're specified, to avoid causing
+# problems like b/118404869, where an envvar set to the empty string has
+# different semantics from an unset envvar.
+if [ -n "${TF_TESTS_PER_GPU}" ]; then
+  BAZEL_TEST_FLAGS="${BAZEL_TEST_FLAGS} "\
+"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU}"
+fi
+if [ -n "${TF_PER_DEVICE_MEMORY_LIMIT_MB}" ]; then
+  BAZEL_TEST_FLAGS="${BAZEL_TEST_FLAGS} "\
 "--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}"
+fi
+
 BAZEL_BUILD_FLAGS="--keep_going"
 
 # Explicitly set jdk8 since that's what's installed in our images. Note that
@@ -163,7 +175,6 @@ PIP_INTEGRATION_TESTS_FLAG="--integration_tests"
 ANDROID_CMD="${CI_BUILD_DIR}/builds/android.sh"
 ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh"
 
-TF_GPU_COUNT=${TF_GPU_COUNT:-4}
 PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute'
 
 BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh"
-- 
GitLab


From 37a2e36733b0f12102133e8ff5fb516573bdf7ec Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 12:49:30 -0700
Subject: [PATCH 024/873] Upgrade setuptools before installing absl-py in
 remaining scripts. (#23264)

PiperOrigin-RevId: 218730741
---
 .../ci_build/install/install_python3.5_pip_packages.sh | 10 ++++------
 .../ci_build/install/install_python3.6_pip_packages.sh |  6 ++++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 61d4fe3fe8..62e04df717 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -41,6 +41,10 @@ fi
 set -e
 pip3.5 install --upgrade pip
 
+# Install last working version of setuptools. This must happen before we install
+# absl-py, which uses install_requires notation introduced in setuptools 20.5.
+pip3.5 install --upgrade setuptools==39.1.0
+
 pip3.5 install --upgrade virtualenv
 
 # Install six.
@@ -81,15 +85,9 @@ pip3.5 install --upgrade astor
 pip3.5 install --upgrade gast
 pip3.5 install --upgrade termcolor
 
-# Install last working version of setuptools.
-pip3.5 install --upgrade setuptools==39.1.0
-
 # Keras
 pip3.5 install keras_applications==1.0.6
 pip3.5 install keras_preprocessing==1.0.5
 pip3.5 install --upgrade h5py==2.8.0
 
-# Install last working version of setuptools.
-pip3.5 install --upgrade setuptools==39.1.0
-
 # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh)
diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
index 8949af8a88..48d556b1dd 100755
--- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh
@@ -51,6 +51,10 @@ ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3
 
 pip3 install --upgrade pip
 
+# Install last working version of setuptools. This must happen before we install
+# absl-py, which uses install_requires notation introduced in setuptools 20.5.
+pip3 install --upgrade setuptools==39.1.0
+
 pip3 install --upgrade virtualenv
 
 set -e
@@ -97,8 +101,6 @@ pip3 install --upgrade astor
 pip3 install --upgrade gast
 pip3 install --upgrade termcolor
 
-# Install last working version of setuptools.
-pip3 install --upgrade setuptools==39.1.0
 pip3 install --upgrade h5py==2.8.0
 
 # Keras
-- 
GitLab


From 405b34608005bc17c50dbbe915e4d68a694274ca Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 13:56:08 -0700
Subject: [PATCH 025/873] Fp16 LSTMBlocKCell and LSTMBlockFusedCell (#23267)

PiperOrigin-RevId: 216632480
---
 tensorflow/contrib/rnn/kernels/blas_gemm.cc   |   7 +-
 tensorflow/contrib/rnn/kernels/blas_gemm.h    |   9 +-
 tensorflow/contrib/rnn/kernels/lstm_ops.cc    | 163 +++++++++---------
 tensorflow/contrib/rnn/kernels/lstm_ops.h     |  34 ++--
 .../contrib/rnn/kernels/lstm_ops_gpu.cu.cc    |  80 +++++++--
 tensorflow/contrib/rnn/ops/lstm_ops.cc        |   8 +-
 tensorflow/contrib/rnn/python/ops/lstm_ops.py |   5 +-
 7 files changed, 185 insertions(+), 121 deletions(-)

diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc
index 45d22b739b..56ec86418d 100644
--- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc
+++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc
@@ -38,8 +38,9 @@ namespace functor {
 template <typename T>
 void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx, bool transa,
                                      bool transb, uint64 m, uint64 n, uint64 k,
-                                     T alpha, const T* a, int lda, const T* b,
-                                     int ldb, T beta, T* c, int ldc) {
+                                     float alpha, const T* a, int lda,
+                                     const T* b, int ldb, float beta, T* c,
+                                     int ldc) {
 #if GOOGLE_CUDA
   se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose,
                                  se::blas::Transpose::kTranspose};
@@ -60,8 +61,8 @@ void TensorCuBlasGemm<T>::operator()(OpKernelContext* ctx, bool transa,
 #endif
 }
 
+template struct TensorCuBlasGemm<Eigen::half>;
 template struct TensorCuBlasGemm<float>;
-template struct TensorCuBlasGemm<double>;
 
 }  // end namespace functor
 }  // end namespace tensorflow
diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h
index a52c934233..9535a76566 100644
--- a/tensorflow/contrib/rnn/kernels/blas_gemm.h
+++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h
@@ -28,8 +28,8 @@ namespace functor {
 template <typename T>
 struct TensorCuBlasGemm {
   void operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m,
-                  uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b,
-                  int ldb, T beta, T* c, int ldc);
+                  uint64 n, uint64 k, float alpha, const T* a, int lda,
+                  const T* b, int ldb, float beta, T* c, int ldc);
 };
 
 template <typename Device, typename T, bool USE_CUBLAS>
@@ -38,8 +38,9 @@ struct TensorBlasGemm;
 template <typename Device, typename T>
 struct TensorBlasGemm<Device, T, true /* USE_CUBLAS */> {
   static void compute(OpKernelContext* ctx, const Device& d, bool transa,
-                      bool transb, T alpha, typename TTypes<T>::ConstMatrix a,
-                      typename TTypes<T>::ConstMatrix b, T beta,
+                      bool transb, float alpha,
+                      typename TTypes<T>::ConstMatrix a,
+                      typename TTypes<T>::ConstMatrix b, float beta,
                       typename TTypes<T>::Matrix c) {
     int64 m = c.dimensions()[0];
     int64 n = c.dimensions()[1];
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc
index 5e7cf0ce84..ee08d306f8 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc
@@ -44,7 +44,7 @@ namespace functor {
 template <typename T>
 void LSTMBlockCellFpropWithEigen(
     const LSTMBlockCell& cell, OpKernelContext* ctx, const CPUDevice& d,
-    const T forget_bias, const T cell_clip, bool use_peephole,
+    const float forget_bias, const float cell_clip, bool use_peephole,
     typename TTypes<T>::ConstMatrix x, typename TTypes<T>::ConstMatrix cs_prev,
     typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
     typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
@@ -177,50 +177,51 @@ void LSTMBlockCellBpropWithEigen(
   }
 }
 
-#define DEFINE_CPU_SPECS(T)                                                    \
-  template <>                                                                  \
-  void LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(   \
-      OpKernelContext* ctx, const CPUDevice& d, const T forget_bias,           \
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x, \
-      typename TTypes<T>::ConstMatrix cs_prev,                                 \
-      typename TTypes<T>::ConstMatrix h_prev,                                  \
-      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
-      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,      \
-      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,           \
-      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,             \
-      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,              \
-      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,            \
-      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h) {         \
-    LSTMBlockCellFpropWithEigen<T>(                                            \
-        *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev,       \
-        h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h);        \
-  }                                                                            \
-  template <>                                                                  \
-  void LSTMBlockCellBprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(   \
-      OpKernelContext* ctx, const CPUDevice& d, bool use_peephole,             \
-      typename TTypes<T>::ConstMatrix x,                                       \
-      typename TTypes<T>::ConstMatrix cs_prev,                                 \
-      typename TTypes<T>::ConstMatrix h_prev,                                  \
-      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
-      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,      \
-      typename TTypes<T>::ConstVec b, typename TTypes<T>::ConstMatrix i,       \
-      typename TTypes<T>::ConstMatrix cs, typename TTypes<T>::ConstMatrix f,   \
-      typename TTypes<T>::ConstMatrix o, typename TTypes<T>::ConstMatrix ci,   \
-      typename TTypes<T>::ConstMatrix co,                                      \
-      typename TTypes<T>::ConstMatrix cs_grad,                                 \
-      typename TTypes<T>::ConstMatrix h_grad, typename TTypes<T>::Matrix do_,  \
-      typename TTypes<T>::Matrix dcs, typename TTypes<T>::Matrix dci,          \
-      typename TTypes<T>::Matrix df, typename TTypes<T>::Matrix di,            \
-      typename TTypes<T>::Matrix dicfo,                                        \
-      typename TTypes<T>::Matrix cs_prev_grad,                                 \
-      typename TTypes<T>::Vec wci_grad, typename TTypes<T>::Vec wcf_grad,      \
-      typename TTypes<T>::Vec wco_grad) {                                      \
-    LSTMBlockCellBpropWithEigen<CPUDevice, T, false /* USE_CUBLAS */>(         \
-        *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b,  \
-        i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo,    \
-        cs_prev_grad, wci_grad, wcf_grad, wco_grad);                           \
-  }                                                                            \
-  template struct LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>;    \
+#define DEFINE_CPU_SPECS(T)                                                   \
+  template <>                                                                 \
+  void LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(  \
+      OpKernelContext* ctx, const CPUDevice& d, const float forget_bias,      \
+      const float cell_clip, bool use_peephole,                               \
+      typename TTypes<T>::ConstMatrix x,                                      \
+      typename TTypes<T>::ConstMatrix cs_prev,                                \
+      typename TTypes<T>::ConstMatrix h_prev,                                 \
+      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,    \
+      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,     \
+      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,          \
+      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,            \
+      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,             \
+      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,           \
+      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h) {        \
+    LSTMBlockCellFpropWithEigen<T>(                                           \
+        *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev,      \
+        h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h);       \
+  }                                                                           \
+  template <>                                                                 \
+  void LSTMBlockCellBprop<CPUDevice, T, false /* USE_CUBLAS */>::operator()(  \
+      OpKernelContext* ctx, const CPUDevice& d, bool use_peephole,            \
+      typename TTypes<T>::ConstMatrix x,                                      \
+      typename TTypes<T>::ConstMatrix cs_prev,                                \
+      typename TTypes<T>::ConstMatrix h_prev,                                 \
+      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,    \
+      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,     \
+      typename TTypes<T>::ConstVec b, typename TTypes<T>::ConstMatrix i,      \
+      typename TTypes<T>::ConstMatrix cs, typename TTypes<T>::ConstMatrix f,  \
+      typename TTypes<T>::ConstMatrix o, typename TTypes<T>::ConstMatrix ci,  \
+      typename TTypes<T>::ConstMatrix co,                                     \
+      typename TTypes<T>::ConstMatrix cs_grad,                                \
+      typename TTypes<T>::ConstMatrix h_grad, typename TTypes<T>::Matrix do_, \
+      typename TTypes<T>::Matrix dcs, typename TTypes<T>::Matrix dci,         \
+      typename TTypes<T>::Matrix df, typename TTypes<T>::Matrix di,           \
+      typename TTypes<T>::Matrix dicfo,                                       \
+      typename TTypes<T>::Matrix cs_prev_grad,                                \
+      typename TTypes<T>::Vec wci_grad, typename TTypes<T>::Vec wcf_grad,     \
+      typename TTypes<T>::Vec wco_grad) {                                     \
+    LSTMBlockCellBpropWithEigen<CPUDevice, T, false /* USE_CUBLAS */>(        \
+        *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \
+        i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo,   \
+        cs_prev_grad, wci_grad, wcf_grad, wco_grad);                          \
+  }                                                                           \
+  template struct LSTMBlockCellFprop<CPUDevice, T, false /* USE_CUBLAS */>;   \
   template struct LSTMBlockCellBprop<CPUDevice, T, false /* USE_CUBLAS */>;
 
 DEFINE_CPU_SPECS(float);
@@ -377,24 +378,26 @@ REGISTER_KERNEL(float);
 
 #if GOOGLE_CUDA
 namespace functor {
-#define DECLARE_GPU_SPEC(T)                                                    \
-  template <>                                                                  \
-  void LSTMBlockCellFprop<GPUDevice, T, true>::operator()(                     \
-      OpKernelContext* ctx, const GPUDevice& d, const T forget_bias,           \
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x, \
-      typename TTypes<T>::ConstMatrix cs_prev,                                 \
-      typename TTypes<T>::ConstMatrix h_prev,                                  \
-      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
-      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,      \
-      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,           \
-      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,             \
-      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,              \
-      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,            \
-      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h);          \
-                                                                               \
+#define DECLARE_GPU_SPEC(T)                                                \
+  template <>                                                              \
+  void LSTMBlockCellFprop<GPUDevice, T, true>::operator()(                 \
+      OpKernelContext* ctx, const GPUDevice& d, const float forget_bias,   \
+      const float cell_clip, bool use_peephole,                            \
+      typename TTypes<T>::ConstMatrix x,                                   \
+      typename TTypes<T>::ConstMatrix cs_prev,                             \
+      typename TTypes<T>::ConstMatrix h_prev,                              \
+      typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci, \
+      typename TTypes<T>::ConstVec wcf, typename TTypes<T>::ConstVec wco,  \
+      typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,       \
+      typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,         \
+      typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,          \
+      typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,        \
+      typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h);      \
+                                                                           \
   extern template struct LSTMBlockCellFprop<GPUDevice, T, true>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // end namespace functor
@@ -405,6 +408,7 @@ DECLARE_GPU_SPEC(float);
       LSTMBlockCellOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
@@ -629,9 +633,9 @@ class LSTMBlockCellGradOp : public OpKernel {
 
     const Device& device = ctx->eigen_device<Device>();
 
-    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<float>());
+    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<T>());
 
     functor::LSTMBlockCellBprop<Device, T, USE_CUBLAS>(batch_size, input_size,
                                                        cell_size)(
@@ -688,6 +692,7 @@ namespace functor {
                                             true /* USE_CUBLAS */>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // namespace functor
@@ -698,6 +703,7 @@ DECLARE_GPU_SPEC(float);
       LSTMBlockCellGradOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
@@ -984,10 +990,10 @@ class BlockLSTMOp : public OpKernel {
       Tensor cs_tensor = cs_out->Slice(seq_len_max, timelen);
       Tensor h_tensor = h_out->Slice(seq_len_max, timelen);
 
-      functor::TensorUnalignedZero<Device, T>()(
-          device, cs_tensor.unaligned_flat<float>());
-      functor::TensorUnalignedZero<Device, T>()(
-          device, h_tensor.unaligned_flat<float>());
+      functor::TensorUnalignedZero<Device, T>()(device,
+                                                cs_tensor.unaligned_flat<T>());
+      functor::TensorUnalignedZero<Device, T>()(device,
+                                                h_tensor.unaligned_flat<T>());
     }
   }
 
@@ -1021,6 +1027,7 @@ namespace functor {
   extern template struct TensorUnalignedZero<GPUDevice, T>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // end namespace functor
@@ -1033,6 +1040,7 @@ DECLARE_GPU_SPEC(float);
                           BlockLSTMOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
@@ -1195,16 +1203,15 @@ class BlockLSTMGradOp : public OpKernel {
 
     const Device& device = ctx->eigen_device<Device>();
 
-    functor::TensorZero<Device, T>()(device, cs_grad_tensor.flat<float>());
-    functor::TensorZero<Device, T>()(device,
-                                     cs_prev_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, h_grad_tensor.flat<float>());
-    functor::TensorZero<Device, T>()(device, h_prev_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, w_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<float>());
-    functor::TensorZero<Device, T>()(device, b_grad_tensor->flat<float>());
+    functor::TensorZero<Device, T>()(device, cs_grad_tensor.flat<T>());
+    functor::TensorZero<Device, T>()(device, cs_prev_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, h_grad_tensor.flat<T>());
+    functor::TensorZero<Device, T>()(device, h_prev_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, w_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wci_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wcf_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, wco_grad_tensor->flat<T>());
+    functor::TensorZero<Device, T>()(device, b_grad_tensor->flat<T>());
 
     const int64 seq_len_max = seq_len_max_tensor->scalar<int64>()();
     SliceHelper<Device, T> slicer(ctx);
@@ -1331,6 +1338,7 @@ namespace functor {
   extern template struct BlockLSTMBprop<GPUDevice, T, true>;
 
 DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
 // DECLARE_GPU_SPEC(double);
 #undef DECLARE_GPU_SPEC
 }  // end namespace functor
@@ -1343,6 +1351,7 @@ DECLARE_GPU_SPEC(float);
                           BlockLSTMGradOp<GPUDevice, T, true>);
 
 REGISTER_GPU_KERNEL(float);
+REGISTER_GPU_KERNEL(Eigen::half);
 // REGISTER_GPU_KERNEL(double);
 #undef REGISTER_GPU_KERNEL
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h
index d23cedc234..5ca1dad655 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops.h
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h
@@ -77,8 +77,7 @@ template <typename Device, typename T>
 struct TensorZeroPadding {
   void operator()(const Device& d, const int64 time_idx,
                   typename TTypes<int64>::ConstVec seq_len,
-                  typename TTypes<float>::Vec mask,
-                  typename TTypes<float>::Matrix m) {
+                  typename TTypes<T>::Vec mask, typename TTypes<T>::Matrix m) {
     // mask is shape [batch_size].
     mask.device(d) = seq_len.constant(time_idx) < seq_len;
 
@@ -154,18 +153,21 @@ struct LSTMBlockCellFprop : public LSTMBlockCell {
                      const int cell_size)
       : LSTMBlockCell(batch_size, input_size, cell_size) {}
 
-  void operator()(
-      OpKernelContext* ctx, const Device& d, const T forget_bias,
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x,
-      typename TTypes<T>::ConstMatrix cs_prev,
-      typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
-      typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
-      typename TTypes<T>::ConstVec wco, typename TTypes<T>::ConstVec b,
-      typename TTypes<T>::Matrix xh, typename TTypes<T>::Matrix i,
-      typename TTypes<T>::Matrix cs, typename TTypes<T>::Matrix f,
-      typename TTypes<T>::Matrix o, typename TTypes<T>::Matrix ci,
-      typename TTypes<T>::Matrix co, typename TTypes<T>::Matrix icfo,
-      typename TTypes<T>::Matrix h);
+  void operator()(OpKernelContext* ctx, const Device& d,
+                  const float forget_bias, const float cell_clip,
+                  bool use_peephole, typename TTypes<T>::ConstMatrix x,
+                  typename TTypes<T>::ConstMatrix cs_prev,
+                  typename TTypes<T>::ConstMatrix h_prev,
+                  typename TTypes<T>::ConstMatrix w,
+                  typename TTypes<T>::ConstVec wci,
+                  typename TTypes<T>::ConstVec wcf,
+                  typename TTypes<T>::ConstVec wco,
+                  typename TTypes<T>::ConstVec b, typename TTypes<T>::Matrix xh,
+                  typename TTypes<T>::Matrix i, typename TTypes<T>::Matrix cs,
+                  typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,
+                  typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,
+                  typename TTypes<T>::Matrix icfo,
+                  typename TTypes<T>::Matrix h);
 };
 
 // See lstm_ops.cc for CPUDevice implementation and lstm_ops_gpu.cu.cc for
@@ -261,7 +263,7 @@ struct BlockLSTMBprop : public LSTMBlockCell {
     typename TTypes<T>::ConstMatrix const_dicfo(dicfo.data(),
                                                 dicfo.dimensions());
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, d, false, true, T(1), const_dicfo, w, T(0), xh_grad);
+        ctx, d, false, true, 1.f, const_dicfo, w, 0.f, xh_grad);
 
     // xh.
     xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x;
@@ -274,7 +276,7 @@ struct BlockLSTMBprop : public LSTMBlockCell {
 
     // w_grad.
     TensorBlasGemm<Device, T, USE_CUBLAS>::compute(
-        ctx, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad);
+        ctx, d, true, false, 1.f, const_xh, const_dicfo, 1.f, w_grad);
 
     // b_grad.
     b_grad.device(d) += dicfo.sum(Eigen::array<int, 1>({0}));
diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc
index 6d3758fef1..b664b0f45e 100644
--- a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc
+++ b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc
@@ -31,6 +31,49 @@ typedef Eigen::GpuDevice GPUDevice;
 
 namespace {
 
+struct FloatToHalf {
+  __host__ __device__ EIGEN_STRONG_INLINE Eigen::half operator()(
+      const float& x) const {
+    return Eigen::half_impl::float_to_half_rtne(x);
+  }
+};
+
+template <typename U, typename T>
+__host__ __device__ EIGEN_STRONG_INLINE
+    typename std::enable_if<!std::is_same<T, U>::value, U>::type
+    strict_cast(T t);
+
+template <typename U, typename T>
+__host__ __device__ EIGEN_STRONG_INLINE
+    typename std::enable_if<std::is_same<T, U>::value, U>::type
+    strict_cast(T t) {
+  return t;
+}
+
+template <>
+__host__ __device__ EIGEN_STRONG_INLINE Eigen::half
+strict_cast<Eigen::half, float>(float t) {
+  return FloatToHalf()(t);
+}
+
+}  // namespace
+
+template <typename T>
+struct TensorZero<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::Flat t) {
+    t.device(d) = t.constant(strict_cast<T>(0.f));
+  }
+};
+
+template <typename T>
+struct TensorUnalignedZero<GPUDevice, T> {
+  void operator()(const GPUDevice& d, typename TTypes<T>::UnalignedFlat t) {
+    t.device(d) = t.constant(strict_cast<T>(0.f));
+  }
+};
+
+namespace {
+
 // Adds bias, applies non-linearities and gates.
 //
 // Launch with a 2D setup such that there is one thread per (example,
@@ -42,12 +85,15 @@ namespace {
 template <typename T, bool use_peephole>
 __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev,
                            const T* wci, const T* wcf, const T* wco, T* o, T* h,
-                           T* ci, T* cs, T* co, T* i, T* f, const T forget_bias,
-                           const T cell_clip, const int batch_size,
-                           const int cell_size) {
+                           T* ci, T* cs, T* co, T* i, T* f,
+                           const float forget_bias, const float cell_clip,
+                           const int batch_size, const int cell_size) {
   const int batch_id = blockIdx.x * blockDim.x + threadIdx.x;
   const int act_id = blockIdx.y * blockDim.y + threadIdx.y;
 
+  T forget_bias_t = strict_cast<T>(forget_bias);
+  T cell_clip_t = strict_cast<T>(cell_clip);
+
   if (batch_id >= batch_size || act_id >= cell_size) return;
 
   // The following code assumes the input arrays are of the following
@@ -115,16 +161,16 @@ __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev,
   T f_local;
   if (use_peephole) {
     f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] +
-                         forget_bias + cs_prev[cid] * wcf[act_id]);
+                         forget_bias_t + cs_prev[cid] * wcf[act_id]);
   } else {
     f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] +
-                         forget_bias);
+                         forget_bias_t);
   }
   f[cid] = f_local;
 
   T cs_local = i_local * ci_local + f_local * cs_prev[cid];
-  if (cell_clip > 0.0) {
-    cs_local = clip_op(cs_local, cell_clip);
+  if (cell_clip_t > strict_cast<T>(0.0f)) {
+    cs_local = clip_op(cs_local, cell_clip_t);
   }
   cs[cid] = cs_local;
 
@@ -174,8 +220,8 @@ __global__ void concat_xh(T* xh, const T* x, const T* h_prev,
 
 template <typename T>
 void LSTMBlockCellFpropWithCUDA(
-    OpKernelContext* ctx, const GPUDevice& d, const T forget_bias,
-    const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x,
+    OpKernelContext* ctx, const GPUDevice& d, const float forget_bias,
+    const float cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x,
     typename TTypes<T>::ConstMatrix cs_prev,
     typename TTypes<T>::ConstMatrix h_prev, typename TTypes<T>::ConstMatrix w,
     typename TTypes<T>::ConstVec wci, typename TTypes<T>::ConstVec wcf,
@@ -202,7 +248,7 @@ void LSTMBlockCellFpropWithCUDA(
   // states1 = xh * w
   typename TTypes<T>::ConstMatrix const_xh(xh.data(), xh.dimensions());
   TensorBlasGemm<GPUDevice, T, true /* USE_CUBLAS */>::compute(
-      ctx, d, false, false, T(1), const_xh, w, T(0), icfo);
+      ctx, d, false, false, 1.f, const_xh, w, 0.f, icfo);
 
   // Add bias, apply non-linearities and gating.
   //
@@ -357,8 +403,9 @@ void LSTMBlockCellBpropWithCUDA(
   template struct TensorAdd<GPUDevice, T>;                                     \
   template <>                                                                  \
   void LSTMBlockCellFprop<GPUDevice, T, true /* USE_CUBLAS */>::operator()(    \
-      OpKernelContext* ctx, const GPUDevice& d, const T forget_bias,           \
-      const T cell_clip, bool use_peephole, typename TTypes<T>::ConstMatrix x, \
+      OpKernelContext* ctx, const GPUDevice& d, const float forget_bias,       \
+      const float cell_clip, bool use_peephole,                                \
+      typename TTypes<T>::ConstMatrix x,                                       \
       typename TTypes<T>::ConstMatrix cs_prev,                                 \
       typename TTypes<T>::ConstMatrix h_prev,                                  \
       typename TTypes<T>::ConstMatrix w, typename TTypes<T>::ConstVec wci,     \
@@ -368,10 +415,10 @@ void LSTMBlockCellBpropWithCUDA(
       typename TTypes<T>::Matrix f, typename TTypes<T>::Matrix o,              \
       typename TTypes<T>::Matrix ci, typename TTypes<T>::Matrix co,            \
       typename TTypes<T>::Matrix icfo, typename TTypes<T>::Matrix h) {         \
-    LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, use_peephole,   \
-                               x, cs_prev, h_prev, w, wci, wcf, wco, b, xh, i, \
-                               cs, f, o, ci, co, icfo, h, batch_size_,         \
-                               cell_size_, input_size_);                       \
+    LSTMBlockCellFpropWithCUDA<T>(ctx, d, forget_bias, cell_clip,              \
+                                  use_peephole, x, cs_prev, h_prev, w, wci,    \
+                                  wcf, wco, b, xh, i, cs, f, o, ci, co, icfo,  \
+                                  h, batch_size_, cell_size_, input_size_);    \
   }                                                                            \
   template <>                                                                  \
   void LSTMBlockCellBprop<GPUDevice, T, true /* USE_CUBLAS */>::operator()(    \
@@ -403,6 +450,7 @@ void LSTMBlockCellBpropWithCUDA(
   template struct BlockLSTMBprop<GPUDevice, T, true /* USE_CUBLAS */>;
 
 DEFINE_GPU_SPECS(float);
+DEFINE_GPU_SPECS(Eigen::half);
 // DEFINE_GPU_SPECS(double);
 #undef DEFINE_GPU_SPECS
 
diff --git a/tensorflow/contrib/rnn/ops/lstm_ops.cc b/tensorflow/contrib/rnn/ops/lstm_ops.cc
index 699cc6c88a..1679e35518 100644
--- a/tensorflow/contrib/rnn/ops/lstm_ops.cc
+++ b/tensorflow/contrib/rnn/ops/lstm_ops.cc
@@ -41,7 +41,7 @@ REGISTER_OP("LSTMBlockCell")
     .Attr("forget_bias: float = 1.0")
     .Attr("cell_clip: float = 3.0")
     .Attr("use_peephole: bool = false")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, cs_prev;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x));
@@ -128,7 +128,7 @@ REGISTER_OP("LSTMBlockCellGrad")
     .Output("wcf_grad: T")
     .Output("wco_grad: T")
     .Attr("use_peephole: bool")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, cs_prev;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x));
@@ -196,7 +196,7 @@ REGISTER_OP("BlockLSTM")
     .Attr("forget_bias: float = 1.0")
     .Attr("cell_clip: float = 3.0")
     .Attr("use_peephole: bool = false")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, b;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x));
@@ -288,7 +288,7 @@ REGISTER_OP("BlockLSTMGrad")
     .Output("wco_grad: T")
     .Output("b_grad: T")
     .Attr("use_peephole: bool")
-    .Attr("T: {float}")
+    .Attr("T: {half, float}")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle x, cs_prev, h_prev, w, wci, wco, wcf, b;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x));
diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
index 9e61fc54d1..f645165efe 100644
--- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py
+++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py
@@ -596,6 +596,7 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
                cell_clip=None,
                use_peephole=False,
                reuse=None,
+               dtype=None,
                name="lstm_fused_cell"):
     """Initialize the LSTM cell.
 
@@ -607,12 +608,14 @@ class LSTMBlockFusedCell(LSTMBlockWrapper):
       reuse: (optional) boolean describing whether to reuse variables in an
         existing scope.  If not `True`, and the existing scope already has the
         given variables, an error is raised.
+      dtype: the dtype of variables of this layer.
       name: String, the name of the layer. Layers with the same name will
         share weights, but to avoid mistakes we require reuse=True in such
         cases.  By default this is "lstm_cell", for variable-name compatibility
         with `tf.nn.rnn_cell.LSTMCell`.
     """
-    super(LSTMBlockFusedCell, self).__init__(_reuse=reuse, name=name)
+    super(LSTMBlockFusedCell, self).__init__(
+        _reuse=reuse, name=name, dtype=dtype)
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._cell_clip = cell_clip if cell_clip is not None else -1
-- 
GitLab


From 40dd7b0096f3e344444766169617a57ce410fd17 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 14:06:44 -0700
Subject: [PATCH 026/873] Upgrade setuptools before installing absl-py.
 (#23266)

PiperOrigin-RevId: 218471042
---
 .../tools/ci_build/install/install_pip_packages.sh  | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 7f293e8604..2c142041f3 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -29,6 +29,11 @@ easy_install3 -U pip==9.0.3
 pip2 install wheel==0.31.1
 pip3 install wheel==0.31.1
 
+# Install last working version of setuptools. This must happen before we install
+# absl-py, which uses install_requires notation introduced in setuptools 20.5.
+pip2 install --upgrade setuptools==39.1.0
+pip3 install --upgrade setuptools==39.1.0
+
 pip2 install virtualenv
 pip3 install virtualenv
 
@@ -112,10 +117,6 @@ pip3 install --upgrade gast
 pip2 install --upgrade termcolor
 pip3 install --upgrade termcolor
 
-# Install last working version of setuptools.
-pip2 install --upgrade setuptools==39.1.0
-pip3 install --upgrade setuptools==39.1.0
-
 # Keras
 pip2 install keras_applications==1.0.6 --no-deps
 pip3 install keras_applications==1.0.6 --no-deps
@@ -123,7 +124,3 @@ pip2 install keras_preprocessing==1.0.5 --no-deps
 pip3 install keras_preprocessing==1.0.5 --no-deps
 pip2 install --upgrade h5py==2.8.0
 pip3 install --upgrade h5py==2.8.0
-
-# Install last working version of setuptools.
-pip2 install --upgrade setuptools==39.1.0
-pip3 install --upgrade setuptools==39.1.0
-- 
GitLab


From b58290fc603760724dc4fb55585ad81094204f56 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 18:11:56 -0700
Subject: [PATCH 027/873] Allow empty GCS tokens to be cached. (#23275)

PiperOrigin-RevId: 217159671
---
 tensorflow/core/platform/cloud/google_auth_provider.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc
index 6ffe51e897..e15400780a 100644
--- a/tensorflow/core/platform/cloud/google_auth_provider.cc
+++ b/tensorflow/core/platform/cloud/google_auth_provider.cc
@@ -135,8 +135,7 @@ Status GoogleAuthProvider::GetToken(string* t) {
   mutex_lock lock(mu_);
   const uint64 now_sec = env_->NowSeconds();
 
-  if (!current_token_.empty() &&
-      now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) {
+  if (now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) {
     *t = current_token_;
     return Status::OK();
   }
-- 
GitLab


From f90c2141ce5417e26bbf3dbcae426a8987cb60f1 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 18:36:15 -0700
Subject: [PATCH 028/873] Upgrade setuptools before clean pip install pulls in
 absl-py. (#23276)

absl-py recently added a version dependency to the package, causing
install to fail on the old setuptools

PiperOrigin-RevId: 218783878
---
 tensorflow/tools/ci_build/builds/pip.sh | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh
index 6543779022..d1fad98ed7 100755
--- a/tensorflow/tools/ci_build/builds/pip.sh
+++ b/tensorflow/tools/ci_build/builds/pip.sh
@@ -321,6 +321,12 @@ create_activate_virtualenv_and_install_tensorflow() {
   # some versions in python
   curl https://bootstrap.pypa.io/get-pip.py | python
 
+  # Force upgrade of setuptools. This must happen before the pip install of the
+  # WHL_PATH, which pulls in absl-py, which uses install_requires notation
+  # introduced in setuptools >=20.5. The default version of setuptools is 5.5.1,
+  # which is too old for absl-py.
+  pip install --upgrade setuptools==39.1.0
+
   # Force tensorflow reinstallation. Otherwise it may not get installed from
   # last build if it had the same version number as previous build.
   PIP_FLAGS="--upgrade --force-reinstall"
@@ -328,9 +334,11 @@ create_activate_virtualenv_and_install_tensorflow() {
     die "pip install (forcing to reinstall tensorflow) FAILED"
   echo "Successfully installed pip package ${TF_WHEEL_PATH}"
 
-  # Force downgrade setuptools.
+  # Force downgrade of setuptools. This must happen after the pip install of the
+  # WHL_PATH, which ends up upgrading to the latest version of setuptools.
+  # Versions of setuptools >= 39.1.0 will cause tests to fail like this:
+  #   ImportError: cannot import name py31compat
   pip install --upgrade setuptools==39.1.0
-
 }
 
 ################################################################################
-- 
GitLab


From 748435b8ef55a554e011e97a9f893304e737775a Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 25 Oct 2018 20:26:00 -0700
Subject: [PATCH 029/873] Fixed the issue that each invocation of
 model.fit/evaluate/predict modifies the (#23280)

graph.

PiperOrigin-RevId: 218793646
---
 .../contrib/tpu/python/tpu/keras_support.py     | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index d628258b9d..a8eb3aa892 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -97,14 +97,25 @@ from tensorflow.python.platform import tf_logging as logging
 
 # TODO(b/114775106): temporary shim to optionally initialize the TPU
 # This increases the odds our session is initialized, but shouldn't be needed.
+_TEST_REWRITE_OP = None
+
+
 def _maybe_initialize_tpu(session):
   """Initialize the TPU if it has not already been initialized."""
+  global _TEST_REWRITE_OP
   try:
+    # Try to use cached version to avoid another ground of graph optimization.
+    test_rewrite_op = _TEST_REWRITE_OP
+    if (test_rewrite_op is None or
+        test_rewrite_op[0].graph != ops.get_default_graph()):
+
+      def test_op():
+        return constant_op.constant(1) + constant_op.constant(1)
 
-    def test_op():
-      return constant_op.constant(1) + constant_op.constant(1)
+      test_rewrite_op = tpu.rewrite(test_op)
+      _TEST_REWRITE_OP = test_rewrite_op
 
-    session.run(tpu.rewrite(test_op))
+    session.run(test_rewrite_op)
   except errors.FailedPreconditionError as _:
     session.run(tpu.initialize_system())
 
-- 
GitLab


From 0fb33d8f232eff875aac4379a2bd347fbd0ef8e1 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Thu, 1 Nov 2018 23:00:07 +0800
Subject: [PATCH 030/873] fix softmax

Change-Id: Ic882c0c071c650400a3aadb9025b37381c762262
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index 92167e06d5..6ff27b1957 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -63,7 +63,13 @@ class MklSoftmaxOp : public OpKernel {
                               : src_tensor.shape();
       const int input_dims = src_tf_shape.dims();
       auto src_dims = TFShapeToMklDnnDims(src_tf_shape);
-      auto output_dims = src_dims;
+      memory::dims output_dims;
+      if(src_mkl_shape.IsMklTensor()) {
+        output_dims = src_mkl_shape.GetSizesAsMklDnnDims();
+      }
+      else {
+        output_dims = src_dims; //nhwc
+      }
       memory::format layout_type;
       // In MKL, data format passed to mkl softmax op depends on dimension of the input tensor.
       // Here "x" data format in MKL is used for 1 dim tensor, "nc" for 2 dim tensor, 
@@ -82,10 +88,10 @@ class MklSoftmaxOp : public OpKernel {
           layout_type = memory::format::tnc;
           break;
         case 4:
-          layout_type = memory::format::nchw;
+          layout_type = memory::format::nhwc;
           break;
         case 5:
-          layout_type = memory::format::ncdhw;
+          layout_type = memory::format::ndhwc;
           break;
         default:
           OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1"));
-- 
GitLab


From 4cdcadc62394e3f07520e0a04208a6916f178f42 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 1 Nov 2018 16:56:56 -0700
Subject: [PATCH 031/873] AsyncCheckpoints: Add missing 'self' arg to
 write_graph_fn. (#23439)

PiperOrigin-RevId: 219365527
---
 tensorflow/contrib/tpu/python/tpu/async_checkpoint.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
index 78253d83fc..c32bd5997c 100644
--- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
+++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py
@@ -102,7 +102,8 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook):
       training_util.write_graph(
           ops.get_default_graph().as_graph_def(add_shapes=True),
           self._checkpoint_dir, "graph.pbtxt")
-    self._write_graph_thread = threading.Thread(target=_write_graph_fn)
+    self._write_graph_thread = threading.Thread(target=_write_graph_fn,
+                                                args=[self])
     self._write_graph_thread.start()
 
     saver_def = self._get_saver().saver_def if self._get_saver() else None
-- 
GitLab


From 8ce231a8ebc73be5be53ccd90387fc68b187bcec Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 1 Nov 2018 18:12:13 -0700
Subject: [PATCH 032/873] Update version to 1.12.0 final (#23444)

---
 tensorflow/core/public/version.h      | 2 +-
 tensorflow/tools/pip_package/setup.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 500ec8f97b..a55fe17dd5 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
 // "-beta", "-rc", "-rc.1")
-#define TF_VERSION_SUFFIX "-rc2"
+#define TF_VERSION_SUFFIX ""
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index ceaa96b690..036830dd22 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.12.0-rc2'
+_VERSION = '1.12.0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From a6d8ffae097d0132989ae4688d224121ec6d8f35 Mon Sep 17 00:00:00 2001
From: Todd Wang <toddwang@gmail.com>
Date: Thu, 1 Nov 2018 18:35:10 -0700
Subject: [PATCH 033/873] Fix a bug in tpu.py and xla.py that while creating an
 identity node for control input edges under rewrite context, the parent
 control flow context is lost. (#23446)

PiperOrigin-RevId: 219724472
---
 tensorflow/contrib/compiler/xla.py       | 13 +++++--------
 tensorflow/contrib/tpu/python/tpu/tpu.py | 13 +++++--------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py
index 873b03580d..83d9d8c54a 100644
--- a/tensorflow/contrib/compiler/xla.py
+++ b/tensorflow/contrib/compiler/xla.py
@@ -179,14 +179,11 @@ class XLACompileContext(control_flow_ops.XLAControlFlowContext):
     if external_control_inputs:
       # Use an identity to pull control inputs as data inputs. Note that we
       # ignore ops which don't have outputs. TODO(phawkins): fix that.
-      with ops.control_dependencies(None):
-        self.Enter()
-        external_control_inputs = [
-            array_ops.identity(x.outputs[0]).op
-            for x in external_control_inputs
-            if x.outputs
-        ]
-        self.Exit()
+      external_control_inputs = [
+          array_ops.identity(x.outputs[0]).op
+          for x in external_control_inputs
+          if x.outputs
+      ]
       # pylint: disable=protected-access
       op._add_control_inputs(external_control_inputs)
       # pylint: enable=protected-access
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 11aaa1c66a..a5ccaa071b 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -371,14 +371,11 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
     if external_control_inputs:
       # Use an identity to pull control inputs as data inputs. Note that we
       # ignore ops which don't have outputs. TODO(phawkins): fix that.
-      with ops.control_dependencies(None):
-        self.Enter()
-        external_control_inputs = [
-            array_ops.identity(x.outputs[0]).op
-            for x in external_control_inputs
-            if x.outputs
-        ]
-        self.Exit()
+      external_control_inputs = [
+          array_ops.identity(x.outputs[0]).op
+          for x in external_control_inputs
+          if x.outputs
+      ]
       # pylint: disable=protected-access
       op._add_control_inputs(external_control_inputs)
       # pylint: enable=protected-access
-- 
GitLab


From c475ede7a02ff9a3e919ecbb9545be9377013bf1 Mon Sep 17 00:00:00 2001
From: George Sterpu <george.sterpu@gmail.com>
Date: Fri, 2 Nov 2018 17:11:03 +0000
Subject: [PATCH 034/873] Update beam_search_decoder.py

#22172
probably not the neatest way to update my previous pull request...
---
 tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
index ab36848f13..8f8f057702 100644
--- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
+++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py
@@ -921,6 +921,7 @@ def _get_scores(log_probs, sequence_lengths, length_penalty_weight,
   """
   length_penalty_ = _length_penalty(
       sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight)
+  length_penalty_ = math_ops.cast(length_penalty_, dtype=log_probs.dtype)
   scores = log_probs / length_penalty_
 
   coverage_penalty_weight = ops.convert_to_tensor(
-- 
GitLab


From 090cb450e25f14942e70c53d0d82ea8f9d164d57 Mon Sep 17 00:00:00 2001
From: Bhavani Subramanian <bhavani1.subramanian@intel.com>
Date: Mon, 5 Nov 2018 15:30:04 -0800
Subject: [PATCH 035/873] Fix for build failure (#424)

Temporarily merging fix into our master so testing can progress
---
 tensorflow/core/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index afe4c46c8e..26dd295d0c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -383,6 +383,7 @@ cc_library(
         ":lib_platform",
         ":platform_base",
         "//tensorflow/core/platform/default/build_config:port",
+        "@com_google_absl//absl/base",
         "@snappy",
     ],
 )
-- 
GitLab


From 88026690778a4960c23019d13572f0f346f19916 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Tue, 6 Nov 2018 21:13:00 +0800
Subject: [PATCH 036/873] update mkl_softmax comments

Change-Id: I95428c0e1d4df73f984b3b1f0e9770ec14688dd1
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index 6ff27b1957..c8b78f6187 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -50,8 +50,6 @@ class MklSoftmaxOp : public OpKernel {
       // src_tensor now points to the 0-th input of global data struct "context"
       size_t src_idx = 0;
       const Tensor& src_tensor = MklGetInput(context, src_idx);
-      //const int input_dims = src_tensor.dims();
-      //  printf("input_dims = %d\n", input_dims);
       // Add: get MklShape
       MklDnnShape src_mkl_shape;
       GetMklShape(context, src_idx, &src_mkl_shape);
@@ -122,6 +120,8 @@ class MklSoftmaxOp : public OpKernel {
 
       // creating a memory descriptor
       // passing outermost dim as default axis, where the softmax is applied
+      // If axis is not the last dimension, python op will do a transpose so that we can
+      // still perform softmax on its last dimension.
       int axis = input_dims - 1;
       auto softmax_fwd_desc = softmax_forward::desc(prop_kind::forward_scoring,
                                                     src.GetOpMemDesc(), axis);
-- 
GitLab


From 7f642e5afd7ddaad5215958ce3f22523ccb08a9c Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 27 Nov 2017 11:28:59 -0800
Subject: [PATCH 037/873] Fix issue in tf.nn.softmax where negative dims could
 only be -1

This fix tries to address the issue raised in 14916 where
negative dims could only be -1 in tf.nn.softmax.
The issue was that dims=-1 was handled as a case of "last dim"
with `is_last_dim = (dim is -1) or (dim == shape.ndims - 1)`
but the generic negative dims were never processed.

This fix adds `dim += shape.ndims` for generic negative dims.

This fix fixes 14916.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/nn_ops.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index bc195993c2..0b6d8e836f 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1698,6 +1698,10 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   # If dim is not the last dimension, we have to do a transpose so that we can
   # still perform softmax on its last dimension.
 
+  # In case dim is negative (and is not last dimension -1), add shape.ndims
+  if dim < 0:
+    dim += shape.ndims
+
   # Swap logits' dimension of dim and its last dimension.
   input_rank = array_ops.rank(logits)
   dim_axis = dim % shape.ndims
-- 
GitLab


From e459d7ed9e843d2e6cad5cee2cfd0cbeb9d0c462 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 27 Nov 2017 11:35:21 -0800
Subject: [PATCH 038/873] Add test case for negative dims (other than -1) for
 tf.nn.softmax

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/softmax_op_test.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py
index ef9301d4e3..c87b6728aa 100644
--- a/tensorflow/python/kernel_tests/softmax_op_test.py
+++ b/tensorflow/python/kernel_tests/softmax_op_test.py
@@ -200,6 +200,15 @@ class SoftmaxTest(test.TestCase):
         use_gpu=False)
     self._testOverflow(use_gpu=False)
 
+  def testAlongNegativeDimension(self):
+    self._testSoftmax(
+        np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
+                  [[2., 3., 4., 5.], [6., 7., 8., 9.]],
+                  [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32),
+        dim=-2,
+        use_gpu=False)
+    self._testOverflow(use_gpu=False)
+
   def testShapeInference(self):
     op = nn_ops.softmax([[[1., 1., 1., 1.], [1., 2., 3., 4.]],
                          [[2., 3., 4., 5.], [6., 7., 8., 9.]],
-- 
GitLab


From 2ec6dcb7fe33ffac1dc55b9d7f6f23c417cb3dc1 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Tue, 6 Nov 2018 23:12:59 +0000
Subject: [PATCH 039/873] Fix broken test

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/nn_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 0b6d8e836f..a2305cefba 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1699,7 +1699,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   # still perform softmax on its last dimension.
 
   # In case dim is negative (and is not last dimension -1), add shape.ndims
-  if dim < 0:
+  if not isinstance(dim, ops.Tensor) and dim < 0:
     dim += shape.ndims
 
   # Swap logits' dimension of dim and its last dimension.
-- 
GitLab


From 8e4ec9ae62135adbc523470af1546c178a7f97c5 Mon Sep 17 00:00:00 2001
From: frreiss <frreiss@us.ibm.com>
Date: Tue, 6 Nov 2018 12:55:29 -0800
Subject: [PATCH 040/873] Add missing random seed field to OrderedEnqueuer

Fix whitespace

Simplify changes

Simplify changeset
---
 tensorflow/python/keras/utils/data_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/utils/data_utils.py b/tensorflow/python/keras/utils/data_utils.py
index 01a9d61a84..8e36d4dea7 100644
--- a/tensorflow/python/keras/utils/data_utils.py
+++ b/tensorflow/python/keras/utils/data_utils.py
@@ -598,7 +598,7 @@ class OrderedEnqueuer(SequenceEnqueuer):
     def pool_fn(seqs):
       return multiprocessing.Pool(workers,
                                   initializer=init_pool_generator,
-                                  initargs=(seqs, self.random_seed))
+                                  initargs=(seqs, None))
     return pool_fn
 
   def _wait_queue(self):
-- 
GitLab


From 117d30b9e313f93a39f17883e2e64960b4015c15 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Thu, 8 Nov 2018 22:33:42 +0800
Subject: [PATCH 041/873] use different layout for mkl and tf

Change-Id: Id148c006fa74ca0382af8e67c6437f551fbba1b7
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index c8b78f6187..ca78164ac9 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -62,7 +62,7 @@ class MklSoftmaxOp : public OpKernel {
       const int input_dims = src_tf_shape.dims();
       auto src_dims = TFShapeToMklDnnDims(src_tf_shape);
       memory::dims output_dims;
-      if(src_mkl_shape.IsMklTensor()) {
+      if (src_mkl_shape.IsMklTensor()) {
         output_dims = src_mkl_shape.GetSizesAsMklDnnDims();
       }
       else {
@@ -75,6 +75,7 @@ class MklSoftmaxOp : public OpKernel {
       // Each of the simbols has the following meaning:
       // n = batch, c = channels, t = sequence lenght, h = height,
       // w = width, d = depth 
+      
       switch (input_dims) {
         case 1:
           layout_type = memory::format::x;
@@ -86,10 +87,20 @@ class MklSoftmaxOp : public OpKernel {
           layout_type = memory::format::tnc;
           break;
         case 4:
-          layout_type = memory::format::nhwc;
+          if (src_mkl_shape.IsMklTensor()) {
+            layout_type = memory::format::nhwc;
+          }
+          else {
+            layout_type = memory::format::nchw;
+          }
           break;
         case 5:
-          layout_type = memory::format::ndhwc;
+          if (src_mkl_shape.IsMklTensor()) {
+            layout_type = memory::format::ndhwc;
+          }
+          else {
+            layout_type = memory::format::ncdhw;
+          }
           break;
         default:
           OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1"));
-- 
GitLab


From 902b080a85fc78816f0ca0c8b66d80411b372579 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Mon, 12 Nov 2018 00:48:03 +0800
Subject: [PATCH 042/873] fix layout error

Change-Id: I24d66af494a9e96cfa13c885b3765f3f74dc2976
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index ca78164ac9..6d644fba69 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -62,10 +62,13 @@ class MklSoftmaxOp : public OpKernel {
       const int input_dims = src_tf_shape.dims();
       auto src_dims = TFShapeToMklDnnDims(src_tf_shape);
       memory::dims output_dims;
+      int axis;
       if (src_mkl_shape.IsMklTensor()) {
+        axis = 1;
         output_dims = src_mkl_shape.GetSizesAsMklDnnDims();
       }
       else {
+        axis = input_dims - 1;
         output_dims = src_dims; //nhwc
       }
       memory::format layout_type;
@@ -87,20 +90,10 @@ class MklSoftmaxOp : public OpKernel {
           layout_type = memory::format::tnc;
           break;
         case 4:
-          if (src_mkl_shape.IsMklTensor()) {
-            layout_type = memory::format::nhwc;
-          }
-          else {
-            layout_type = memory::format::nchw;
-          }
+          layout_type = memory::format::nchw;
           break;
         case 5:
-          if (src_mkl_shape.IsMklTensor()) {
-            layout_type = memory::format::ndhwc;
-          }
-          else {
-            layout_type = memory::format::ncdhw;
-          }
+          layout_type = memory::format::ncdhw;
           break;
         default:
           OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1"));
@@ -127,15 +120,13 @@ class MklSoftmaxOp : public OpKernel {
       // data format is "nc" for src and dst; since the src and dst buffer is
       // always in 2D shape
       src.SetUsrMem(src_md, &src_tensor);
-      src.SetOpMemDesc(src_dims, layout_type);
 
       // creating a memory descriptor
       // passing outermost dim as default axis, where the softmax is applied
       // If axis is not the last dimension, python op will do a transpose so that we can
       // still perform softmax on its last dimension.
-      int axis = input_dims - 1;
       auto softmax_fwd_desc = softmax_forward::desc(prop_kind::forward_scoring,
-                                                    src.GetOpMemDesc(), axis);
+                                                    src.GetUsrMemDesc(), axis);
       auto softmax_fwd_pd =
           softmax_forward::primitive_desc(softmax_fwd_desc, cpu_engine);
 
-- 
GitLab


From 669698caf6e886c27d4a9494760078ef3f4f1d40 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Wed, 14 Nov 2018 09:26:32 +0800
Subject: [PATCH 043/873] update comments

Change-Id: Ie781dba3b07cee43bf1864ab5155a710d322aa19
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index 6d644fba69..4e093cbf4b 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -69,7 +69,7 @@ class MklSoftmaxOp : public OpKernel {
       }
       else {
         axis = input_dims - 1;
-        output_dims = src_dims; //nhwc
+        output_dims = src_dims;
       }
       memory::format layout_type;
       // In MKL, data format passed to mkl softmax op depends on dimension of the input tensor.
@@ -113,18 +113,11 @@ class MklSoftmaxOp : public OpKernel {
               ? src_mkl_shape.GetMklLayout()
               : memory::desc(src_dims, MklDnnType<T>(), layout_type);
 
-      // src: setting memory descriptor and op memory descriptor
-      // Basically following two functions maps the TF "src_tensor" to mkl
-      // tensor object "src"
+      // src: setting memory descriptor
       // following functions are in mkl_util.h
-      // data format is "nc" for src and dst; since the src and dst buffer is
-      // always in 2D shape
       src.SetUsrMem(src_md, &src_tensor);
 
       // creating a memory descriptor
-      // passing outermost dim as default axis, where the softmax is applied
-      // If axis is not the last dimension, python op will do a transpose so that we can
-      // still perform softmax on its last dimension.
       auto softmax_fwd_desc = softmax_forward::desc(prop_kind::forward_scoring,
                                                     src.GetUsrMemDesc(), axis);
       auto softmax_fwd_pd =
-- 
GitLab


From 489e181be77b83b2b631f48968aaf40897001838 Mon Sep 17 00:00:00 2001
From: Siju <siju.samuel@huawei.com>
Date: Fri, 16 Nov 2018 11:56:50 +0530
Subject: [PATCH 044/873] Update graph_transformations.h

---
 .../lite/toco/graph_transformations/graph_transformations.h     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/lite/toco/graph_transformations/graph_transformations.h
index 73a90c8239..187b584b69 100644
--- a/tensorflow/lite/toco/graph_transformations/graph_transformations.h
+++ b/tensorflow/lite/toco/graph_transformations/graph_transformations.h
@@ -139,7 +139,7 @@ DECLARE_GRAPH_TRANSFORMATION(MakeInitialDequantizeOperator)
 DECLARE_GRAPH_TRANSFORMATION(MoveBinaryOperatorBeforeReshape)
 DECLARE_GRAPH_TRANSFORMATION(PropagateActivationFunctionIntoConstants)
 DECLARE_GRAPH_TRANSFORMATION(PropagateArrayDataTypes)
-DECLARE_GRAPH_TRANSFORMATION(PropagateFakeQuantNumBits);
+DECLARE_GRAPH_TRANSFORMATION(PropagateFakeQuantNumBits)
 DECLARE_GRAPH_TRANSFORMATION(PropagateFixedSizes)
 DECLARE_GRAPH_TRANSFORMATION(HardcodeMinMax)
 DECLARE_GRAPH_TRANSFORMATION(Quantize)
-- 
GitLab


From 2427ff8fe9a24f4d9581716af46ef07f99408e0f Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Wed, 24 Oct 2018 15:49:46 +0800
Subject: [PATCH 045/873] fix layout error when src tensor is mkl

Change-Id: I6bcfc8981867f1b60591c65fde77c92cff298694
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index 4e093cbf4b..25c0c7b078 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -90,10 +90,20 @@ class MklSoftmaxOp : public OpKernel {
           layout_type = memory::format::tnc;
           break;
         case 4:
-          layout_type = memory::format::nchw;
+          if (src_mkl_shape.IsMklTensor()) {
+            layout_type = memory::format::nhwc;
+          } 
+          else {
+            layout_type = memory::format::nchw;
+          }
           break;
         case 5:
-          layout_type = memory::format::ncdhw;
+          if (src_mkl_shape.IsMklTensor()) {
+            layout_type = memory::format::ndhwc;
+          } 
+          else {
+            layout_type = memory::format::ncdhw;
+          }
           break;
         default:
           OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1"));
-- 
GitLab


From 437aeb55cc89fade6e386205b30148bc21471bb1 Mon Sep 17 00:00:00 2001
From: Castiel <castielwongsysu@gmail.com>
Date: Fri, 23 Nov 2018 07:31:57 +1030
Subject: [PATCH 046/873] Minor change in word2vec_basic tutorial

---
 tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index b09ee99768..bbcfc32098 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -91,7 +91,7 @@ vocabulary_size = 50000
 
 def build_dataset(words, n_words):
   """Process raw inputs into a dataset."""
-  count = [['UNK', -1]]
+  count = [('UNK', -1)]
   count.extend(collections.Counter(words).most_common(n_words - 1))
   dictionary = dict()
   for word, _ in count:
@@ -125,6 +125,7 @@ data_index = 0
 
 # Step 3: Function to generate a training batch for the skip-gram model.
 def generate_batch(batch_size, num_skips, skip_window):
+  global data
   global data_index
   assert batch_size % num_skips == 0
   assert num_skips <= 2 * skip_window
-- 
GitLab


From 8c9dc06e8e2566de4e3c2d6067d8f1b8bc077307 Mon Sep 17 00:00:00 2001
From: Karl Lessard <karl@kubx.ca>
Date: Mon, 26 Nov 2018 09:34:16 -0500
Subject: [PATCH 047/873] Define API defs for Java

---
 tensorflow/core/api_def/java_api/api_def_Abort.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_Abs.pbtxt          | 6 ++++++
 .../core/api_def/java_api/api_def_AccumulateNV2.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_AccumulatorApplyGradient.pbtxt | 6 ++++++
 .../java_api/api_def_AccumulatorNumAccumulated.pbtxt        | 6 ++++++
 .../api_def/java_api/api_def_AccumulatorSetGlobalStep.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_AccumulatorTakeGradient.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Acos.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Acosh.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Add.pbtxt          | 6 ++++++
 .../java_api/api_def_AddManySparseToTensorsMap.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AddN.pbtxt         | 6 ++++++
 .../api_def/java_api/api_def_AddSparseToTensorsMap.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_AdjustContrast.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_AdjustContrastv2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AdjustHue.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_AdjustSaturation.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_All.pbtxt          | 3 +++
 .../core/api_def/java_api/api_def_AllCandidateSampler.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Angle.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_AnonymousIterator.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Any.pbtxt          | 3 +++
 tensorflow/core/api_def/java_api/api_def_ApplyAdaMax.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_ApplyAdadelta.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ApplyAdagrad.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ApplyAdam.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ApplyAddSign.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_ApplyGradientDescent.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_ApplyMomentum.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_ApplyPowerSign.pbtxt      | 6 ++++++
 .../api_def/java_api/api_def_ApplyProximalAdagrad.pbtxt     | 6 ++++++
 .../java_api/api_def_ApplyProximalGradientDescent.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_ApproximateEqual.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ArgMax.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ArgMin.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AsString.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Asin.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Asinh.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Assert.pbtxt       | 6 ++++--
 tensorflow/core/api_def/java_api/api_def_Assign.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_AssignAdd.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_AssignAddVariableOp.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_AssignSub.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_AssignSubVariableOp.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_AssignVariableOp.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_Atan.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Atan2.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Atanh.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_AudioSpectrogram.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_AudioSummaryV2.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AvgPool.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AvgPool3D.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_AvgPoolGrad.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Barrier.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_BarrierClose.pbtxt | 3 +++
 .../api_def/java_api/api_def_BarrierIncompleteSize.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_BarrierInsertMany.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_BarrierReadySize.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_BarrierTakeMany.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_Batch.pbtxt        | 3 +++
 .../core/api_def/java_api/api_def_BatchCholesky.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_BatchCholeskyGrad.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_BatchDatasetV2.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_BatchFunction.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchMatMul.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_BatchMatrixBandPart.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_BatchMatrixDeterminant.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_BatchMatrixDiag.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_BatchMatrixDiagPart.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_BatchMatrixInverse.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_BatchMatrixSetDiag.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_BatchMatrixSolve.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_BatchMatrixSolveLs.pbtxt  | 6 ++++++
 .../java_api/api_def_BatchMatrixTriangularSolve.pbtxt       | 6 ++++++
 .../java_api/api_def_BatchNormWithGlobalNormalization.pbtxt | 6 ++++++
 .../api_def_BatchNormWithGlobalNormalizationGrad.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchSvd.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BatchToSpace.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_BatchToSpaceND.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BesselI0e.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BesselI1e.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Betainc.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BiasAdd.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BiasAddGrad.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_BigQueryReader.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Bincount.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Bitcast.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_BitwiseAnd.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BitwiseOr.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_BitwiseXor.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_BoostedTreesBucketize.pbtxt    | 3 +++
 .../api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt  | 3 +++
 .../api_def/java_api/api_def_BoostedTreesCenterBias.pbtxt   | 3 +++
 .../java_api/api_def_BoostedTreesCreateEnsemble.pbtxt       | 3 +++
 .../api_def_BoostedTreesCreateQuantileStreamResource.pbtxt  | 3 +++
 .../java_api/api_def_BoostedTreesDeserializeEnsemble.pbtxt  | 3 +++
 .../api_def_BoostedTreesEnsembleResourceHandleOp.pbtxt      | 3 +++
 .../java_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt  | 3 +++
 .../java_api/api_def_BoostedTreesGetEnsembleStates.pbtxt    | 3 +++
 .../api_def_BoostedTreesMakeQuantileSummaries.pbtxt         | 3 +++
 .../java_api/api_def_BoostedTreesMakeStatsSummary.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_BoostedTreesPredict.pbtxt | 3 +++
 ...def_BoostedTreesQuantileStreamResourceAddSummaries.pbtxt | 3 +++
 ..._def_BoostedTreesQuantileStreamResourceDeserialize.pbtxt | 3 +++
 .../api_def_BoostedTreesQuantileStreamResourceFlush.pbtxt   | 3 +++
 ...stedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt | 3 +++
 ...api_def_BoostedTreesQuantileStreamResourceHandleOp.pbtxt | 3 +++
 .../java_api/api_def_BoostedTreesSerializeEnsemble.pbtxt    | 3 +++
 .../java_api/api_def_BoostedTreesTrainingPredict.pbtxt      | 3 +++
 .../java_api/api_def_BoostedTreesUpdateEnsemble.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_BroadcastArgs.pbtxt       | 3 +++
 .../api_def/java_api/api_def_BroadcastGradientArgs.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_BroadcastTo.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_Bucketize.pbtxt    | 3 +++
 .../java_api/api_def_BytesProducedStatsDataset.pbtxt        | 6 ++++++
 .../api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_CacheDataset.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Cast.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Ceil.pbtxt         | 6 ++++++
 .../core/api_def/java_api/api_def_CheckNumerics.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Cholesky.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_CholeskyGrad.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ClipByValue.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_CloseSummaryWriter.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_CollectiveBcastRecv.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_CollectiveBcastSend.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_CollectiveReduce.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_CompareAndBitpack.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Complex.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ComplexAbs.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_ComputeAccidentalHits.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Concat.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_ConcatOffset.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_ConcatenateDataset.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_ConditionalAccumulator.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Conj.pbtxt         | 6 ++++++
 .../core/api_def/java_api/api_def_ConjugateTranspose.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Const.pbtxt        | 4 ++--
 .../core/api_def/java_api/api_def_ConsumeMutexLock.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_ControlTrigger.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Cos.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Cosh.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_CountUpTo.pbtxt    | 3 +++
 .../api_def/java_api/api_def_CreateSummaryDbWriter.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_CreateSummaryFileWriter.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_CropAndResize.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_CropAndResizeGradBoxes.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_CropAndResizeGradImage.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Cross.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt  | 6 ++++++
 .../java_api/api_def_CudnnRNNCanonicalToParams.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt  | 6 ++++++
 .../java_api/api_def_CudnnRNNParamsToCanonical.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Cumprod.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Cumsum.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_DataFormatDimMap.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_DataFormatVecPermute.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_DatasetToGraph.pbtxt      | 6 ++++++
 .../api_def/java_api/api_def_DatasetToSingleElement.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_DebugGradientIdentity.pbtxt    | 4 ++++
 .../api_def/java_api/api_def_DebugGradientRefIdentity.pbtxt | 4 ++++
 .../core/api_def/java_api/api_def_DecodeAndCropJpeg.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DecodeBase64.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DecodeBmp.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_DecodeCompressed.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DecodeGif.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_DecodeJSONExample.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DecodeJpeg.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DecodePng.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_DecodeProtoV2.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_DecodeRaw.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DecodeWav.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DeepCopy.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_DeleteSessionTensor.pbtxt | 3 +++
 .../api_def/java_api/api_def_DenseToDenseSetOperation.pbtxt | 6 ++++++
 .../java_api/api_def_DenseToSparseBatchDataset.pbtxt        | 6 ++++++
 .../java_api/api_def_DenseToSparseSetOperation.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DepthToSpace.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_DepthwiseConv2dNative.pbtxt    | 6 ++++++
 .../api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt       | 6 ++++++
 .../api_def_DepthwiseConv2dNativeBackpropInput.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Dequantize.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_DeserializeIterator.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_DeserializeManySparse.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_DeserializeSparse.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_DestroyResourceOp.pbtxt   | 3 +++
 .../api_def/java_api/api_def_DestroyTemporaryVariable.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_Diag.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DiagPart.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Digamma.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Dilation2D.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Div.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_DivNoNan.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_DrawBoundingBoxes.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_DynamicPartition.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_DynamicStitch.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_EagerPyFunc.pbtxt  | 4 ++++
 tensorflow/core/api_def/java_api/api_def_EditDistance.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_Elu.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_EluGrad.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Empty.pbtxt        | 3 +++
 .../core/api_def/java_api/api_def_EmptyTensorList.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_EncodeBase64.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_EncodeJpeg.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_EncodePng.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_EncodeProto.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_EncodeWav.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_EnqueueInQueueDataset.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_EnsureShape.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_Enter.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_Equal.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Erf.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Erfc.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Exit.pbtxt         | 3 +++
 tensorflow/core/api_def/java_api/api_def_Exp.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ExpandDims.pbtxt   | 3 +++
 .../java_api/api_def_ExperimentalAssertNextDataset.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt   | 6 ++++++
 .../api_def_ExperimentalDirectedInterleaveDataset.pbtxt     | 6 ++++++
 .../api_def_ExperimentalFunctionBufferingResource.pbtxt     | 6 ++++++
 ...i_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt | 6 ++++++
 ...api_def_ExperimentalFunctionBufferingResourceReset.pbtxt | 6 ++++++
 .../api_def_ExperimentalIdentityIndexedDataset.pbtxt        | 6 ++++++
 .../java_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt  | 6 ++++++
 .../java_api/api_def_ExperimentalIndexedDatasetGet.pbtxt    | 6 ++++++
 .../api_def_ExperimentalIndexedDatasetMaterialize.pbtxt     | 6 ++++++
 .../java_api/api_def_ExperimentalIteratorGetDevice.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_ExperimentalMapDataset.pbtxt   | 6 ++++++
 ...api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt | 6 ++++++
 .../api_def_ExperimentalNonSerializableDataset.pbtxt        | 6 ++++++
 .../api_def_ExperimentalNumaMapAndBatchDataset.pbtxt        | 6 ++++++
 .../api_def/java_api/api_def_ExperimentalSleepDataset.pbtxt | 6 ++++++
 .../java_api/api_def_ExperimentalThreadPoolDataset.pbtxt    | 6 ++++++
 .../java_api/api_def_ExperimentalThreadPoolHandle.pbtxt     | 6 ++++++
 .../java_api/api_def_ExperimentalUniqueDataset.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Expm1.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_ExtractGlimpse.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_ExtractImagePatches.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_ExtractJpegShape.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_ExtractVolumePatches.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_FFT.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FFT2D.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FFT3D.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Fact.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FakeParam.pbtxt    | 3 +++
 .../api_def/java_api/api_def_FakeQuantWithMinMaxArgs.pbtxt  | 6 ++++++
 .../java_api/api_def_FakeQuantWithMinMaxArgsGradient.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_FakeQuantWithMinMaxVars.pbtxt  | 6 ++++++
 .../java_api/api_def_FakeQuantWithMinMaxVarsGradient.pbtxt  | 6 ++++++
 .../api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt         | 6 ++++++
 .../api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FakeQueue.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Fill.pbtxt         | 3 +++
 .../java_api/api_def_FilterByLastComponentDataset.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_FilterDataset.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt | 6 ++++++
 .../java_api/api_def_FixedLengthRecordDatasetV2.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_FixedLengthRecordReader.pbtxt  | 6 ++++++
 .../java_api/api_def_FixedLengthRecordReaderV2.pbtxt        | 6 ++++++
 .../java_api/api_def_FixedUnigramCandidateSampler.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_FlatMapDataset.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Floor.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FloorDiv.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_FloorMod.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_FlushSummaryWriter.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_For.pbtxt          | 3 +++
 .../core/api_def/java_api/api_def_FractionalAvgPool.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_FractionalAvgPoolGrad.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_FractionalMaxPool.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_FractionalMaxPoolGrad.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_FusedBatchNorm.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_FusedPadConv2D.pbtxt      | 6 ++++++
 .../api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Gather.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_GatherNd.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt     | 3 +++
 .../api_def/java_api/api_def_GcsConfigureBlockCache.pbtxt   | 3 +++
 .../api_def/java_api/api_def_GcsConfigureCredentials.pbtxt  | 3 +++
 .../java_api/api_def_GenerateBigQueryReaderPartitions.pbtxt | 3 +++
 .../api_def/java_api/api_def_GenerateVocabRemapping.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_GeneratorDataset.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_GetSessionHandle.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_GetSessionTensor.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_Greater.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_GreaterEqual.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_GroupByReducerDataset.pbtxt    | 3 +++
 .../api_def/java_api/api_def_GroupByWindowDataset.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_GuaranteeConst.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_HSVToRGB.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_HistogramFixedWidth.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_HistogramSummary.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_HostConst.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_IFFT.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IFFT2D.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IFFT3D.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Identity.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_IdentityN.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_IdentityReader.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_IdentityReaderV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_If.pbtxt           | 3 +++
 tensorflow/core/api_def/java_api/api_def_Igamma.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IgammaGradA.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Igammac.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Imag.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ImageSummary.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_ImmutableConst.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_ImportEvent.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_InitializeTable.pbtxt     | 3 +++
 .../java_api/api_def_InitializeTableFromTextFile.pbtxt      | 3 +++
 .../java_api/api_def_InitializeTableFromTextFileV2.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_InitializeTableV2.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_InplaceAdd.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_InplaceSub.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_InplaceUpdate.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_InterleaveDataset.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Inv.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_InvGrad.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Invert.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_InvertPermutation.pbtxt   | 6 ++++++
 .../api_def_IsBoostedTreesEnsembleInitialized.pbtxt         | 3 +++
 ...ef_IsBoostedTreesQuantileStreamResourceInitialized.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_IsFinite.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IsInf.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IsNan.pbtxt        | 6 ++++++
 .../api_def/java_api/api_def_IsVariableInitialized.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_IteratorFromStringHandle.pbtxt | 6 ++++++
 .../java_api/api_def_IteratorFromStringHandleV2.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_IteratorGetNext.pbtxt     | 6 ++++++
 .../java_api/api_def_IteratorGetNextAsOptional.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_IteratorGetNextSync.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_IteratorToStringHandle.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_L2Loss.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LRN.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LRNGrad.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_LatencyStatsDataset.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LeakyRelu.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_LeakyReluGrad.pbtxt       | 6 ++++++
 .../java_api/api_def_LearnedUnigramCandidateSampler.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LeftShift.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Less.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LessEqual.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Lgamma.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LinSpace.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_LoadAndRemapMatrix.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Log.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Log1p.pbtxt        | 6 ++++++
 .../api_def/java_api/api_def_LogMatrixDeterminant.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LogSoftmax.pbtxt   | 6 ++++++
 .../java_api/api_def_LogUniformCandidateSampler.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LogicalAnd.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LogicalNot.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_LogicalOr.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_LookupTableExport.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableExportV2.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_LookupTableFind.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_LookupTableFindV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableImport.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableImportV2.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_LookupTableInsert.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_LookupTableSize.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_LoopCond.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_LowerBound.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_MakeIterator.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_MapClear.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_MapDataset.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_MapDefun.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_MapIncompleteSize.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_MapPeek.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_MapSize.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_MapStage.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_MapUnstage.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_MapUnstageNoKey.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_MatMul.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_MatchingFiles.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_MatchingFilesDataset.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixBandPart.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixDeterminant.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_MatrixDiag.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixDiagPart.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixExponential.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixInverse.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixLogarithm.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixSetDiag.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_MatrixSolve.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixSolveLs.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_MatrixSquareRoot.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_MatrixTriangularSolve.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Max.pbtxt          | 3 +++
 tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_MaxPool3D.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt   | 6 ++++++
 .../java_api/api_def_MaxPoolGradGradWithArgmax.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_MaxPoolGradWithArgmax.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_MaxPoolWithArgmax.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Maximum.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Mean.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Merge.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_MergeSummary.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_MergeV2Checkpoints.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Mfcc.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Min.pbtxt          | 3 +++
 tensorflow/core/api_def/java_api/api_def_Minimum.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_MirrorPad.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_MirrorPadGrad.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_Mod.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ModelDataset.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Mul.pbtxt          | 6 ++++++
 .../core/api_def/java_api/api_def_MultiDeviceIterator.pbtxt | 6 ++++++
 .../api_def_MultiDeviceIteratorFromStringHandle.pbtxt       | 6 ++++++
 .../api_def_MultiDeviceIteratorGetNextFromShard.pbtxt       | 6 ++++++
 .../api_def/java_api/api_def_MultiDeviceIteratorInit.pbtxt  | 6 ++++++
 .../api_def_MultiDeviceIteratorToStringHandle.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Multinomial.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_MutableDenseHashTable.pbtxt    | 3 +++
 .../api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_MutableHashTable.pbtxt    | 3 +++
 .../java_api/api_def_MutableHashTableOfTensors.pbtxt        | 3 +++
 .../java_api/api_def_MutableHashTableOfTensorsV2.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_MutableHashTableV2.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_MutexLock.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_NcclAllReduce.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_NcclBroadcast.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_NcclReduce.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_Neg.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_NegTrain.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_NextIteration.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_NoOp.pbtxt         | 3 +++
 .../core/api_def/java_api/api_def_NonMaxSuppression.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt | 6 ++++++
 .../java_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_NotEqual.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_NthElement.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_OneHot.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_OneShotIterator.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_OnesLike.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_OptimizeDataset.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_OptionalFromValue.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_OptionalGetValue.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_OptionalHasValue.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_OptionalNone.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_OrderedMapClear.pbtxt     | 3 +++
 .../api_def/java_api/api_def_OrderedMapIncompleteSize.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_OrderedMapPeek.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_OrderedMapSize.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_OrderedMapStage.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_OrderedMapUnstage.pbtxt   | 3 +++
 .../api_def/java_api/api_def_OrderedMapUnstageNoKey.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_Pack.pbtxt         | 3 +++
 tensorflow/core/api_def/java_api/api_def_Pad.pbtxt          | 3 +++
 tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt        | 3 +++
 .../core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_ParallelConcat.pbtxt      | 3 +++
 .../api_def/java_api/api_def_ParallelDynamicStitch.pbtxt    | 3 +++
 .../java_api/api_def_ParallelInterleaveDataset.pbtxt        | 6 ++++++
 .../java_api/api_def_ParallelInterleaveDatasetV2.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_ParallelMapDataset.pbtxt  | 6 ++++++
 .../java_api/api_def_ParameterizedTruncatedNormal.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ParseExample.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_ParseExampleDataset.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_ParseSequenceExample.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_ParseSingleExample.pbtxt  | 6 ++++++
 .../java_api/api_def_ParseSingleSequenceExample.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ParseTensor.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_PartitionedCall.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_PlaceholderV2.pbtxt       | 3 +++
 .../api_def/java_api/api_def_PlaceholderWithDefault.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_Polygamma.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_PopulationCount.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Pow.pbtxt          | 6 ++++++
 .../core/api_def/java_api/api_def_PrefetchDataset.pbtxt     | 6 ++++++
 .../api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_PreventGradient.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Print.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_PriorityQueue.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_PriorityQueueV2.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Prod.pbtxt         | 3 +++
 tensorflow/core/api_def/java_api/api_def_PyFunc.pbtxt       | 4 ++++
 .../core/api_def/java_api/api_def_PyFuncStateless.pbtxt     | 4 ++++
 tensorflow/core/api_def/java_api/api_def_Qr.pbtxt           | 6 ++++++
 .../api_def/java_api/api_def_QuantizeAndDequantize.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt  | 6 ++++++
 .../java_api/api_def_QuantizeDownAndShrinkRange.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QuantizedAdd.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedAvgPool.pbtxt    | 6 ++++++
 .../api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedBiasAdd.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedConcat.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_QuantizedConv2D.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_QuantizedInstanceNorm.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedMatMul.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedMaxPool.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QuantizedMul.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedRelu.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedRelu6.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedReluX.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_QuantizedReshape.pbtxt    | 3 +++
 .../api_def/java_api/api_def_QuantizedResizeBilinear.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_QueueDequeueMany.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_QueueDequeueV2.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_QueueIsClosed.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RGBToHSV.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RaggedGather.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_RaggedRange.pbtxt  | 3 +++
 .../api_def/java_api/api_def_RaggedTensorToSparse.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_RandomCrop.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_RandomDataset.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RandomGamma.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_RandomGammaGrad.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_RandomPoisson.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_RandomPoissonV2.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_RandomShuffle.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_RandomStandardNormal.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_RandomUniform.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_RandomUniformInt.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Range.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_RangeDataset.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Rank.pbtxt         | 3 +++
 tensorflow/core/api_def/java_api/api_def_ReadFile.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_ReadVariableOp.pbtxt      | 3 +++
 .../api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt | 6 ++++++
 .../java_api/api_def_ReaderNumRecordsProducedV2.pbtxt       | 6 ++++++
 .../java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt      | 6 ++++++
 .../java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_ReaderResetV2.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_ReaderRestoreState.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_ReaderSerializeState.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Real.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RealDiv.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Reciprocal.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_ReciprocalGrad.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RecordInput.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_ReduceDataset.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_ReduceJoin.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RefEnter.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_RefExit.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_RefIdentity.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_RefMerge.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_RefNextIteration.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_RefSelect.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_RefSwitch.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_RegexFullMatch.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RegexReplace.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Relu.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Relu6.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Relu6Grad.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ReluGrad.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RemoteCall.pbtxt   | 3 +++
 .../api_def/java_api/api_def_RemoteFusedGraphExecute.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_RepeatDataset.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_RequantizationRange.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Requantize.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Reshape.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ResizeArea.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_ResizeBicubic.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_ResizeBicubicGrad.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_ResizeBilinear.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_ResizeBilinearGrad.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_ResizeNearestNeighbor.pbtxt    | 6 ++++++
 .../java_api/api_def_ResizeNearestNeighborGrad.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_ResourceApplyAdaMax.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_ResourceApplyAdadelta.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_ResourceApplyAdagrad.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_ResourceApplyAdam.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_ResourceApplyAddSign.pbtxt     | 6 ++++++
 .../java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt | 6 ++++++
 .../java_api/api_def_ResourceApplyGradientDescent.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_ResourceApplyMomentum.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_ResourceApplyPowerSign.pbtxt   | 6 ++++++
 .../java_api/api_def_ResourceApplyProximalAdagrad.pbtxt     | 6 ++++++
 .../api_def_ResourceApplyProximalGradientDescent.pbtxt      | 6 ++++++
 .../api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_ResourceCountUpTo.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_ResourceGather.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_ResourceScatterAdd.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_ResourceScatterDiv.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_ResourceScatterMax.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_ResourceScatterMin.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_ResourceScatterMul.pbtxt  | 3 +++
 .../api_def/java_api/api_def_ResourceScatterNdAdd.pbtxt     | 3 +++
 .../api_def/java_api/api_def_ResourceScatterNdUpdate.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_ResourceScatterSub.pbtxt  | 3 +++
 .../api_def/java_api/api_def_ResourceScatterUpdate.pbtxt    | 3 +++
 .../java_api/api_def_ResourceSparseApplyAdadelta.pbtxt      | 6 ++++++
 .../java_api/api_def_ResourceSparseApplyAdagrad.pbtxt       | 6 ++++++
 .../java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt     | 6 ++++++
 .../api_def_ResourceSparseApplyCenteredRMSProp.pbtxt        | 6 ++++++
 .../api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt  | 6 ++++++
 .../java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt        | 6 ++++++
 .../java_api/api_def_ResourceSparseApplyMomentum.pbtxt      | 6 ++++++
 .../api_def_ResourceSparseApplyProximalAdagrad.pbtxt        | 6 ++++++
 ...api_def_ResourceSparseApplyProximalGradientDescent.pbtxt | 6 ++++++
 .../java_api/api_def_ResourceSparseApplyRMSProp.pbtxt       | 6 ++++++
 .../java_api/api_def_ResourceStridedSliceAssign.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_Restore.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RestoreSlice.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_ReverseSequence.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_ReverseV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RightShift.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Rint.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Roll.pbtxt         | 3 +++
 tensorflow/core/api_def/java_api/api_def_Round.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Rpc.pbtxt          | 3 +++
 tensorflow/core/api_def/java_api/api_def_Rsqrt.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_RsqrtGrad.pbtxt    | 6 ++++++
 .../java_api/api_def_SampleDistortedBoundingBox.pbtxt       | 6 ++++++
 .../java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Save.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SaveSlices.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_ScalarSummary.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ScanDataset.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ScatterAdd.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_ScatterDiv.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_ScatterMax.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_ScatterMin.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_ScatterMul.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_ScatterNd.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ScatterNdAdd.pbtxt | 3 +++
 .../api_def/java_api/api_def_ScatterNdNonAliasingAdd.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_ScatterNdSub.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_ScatterNdUpdate.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_ScatterSub.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_ScatterUpdate.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_SdcaFprint.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_SdcaOptimizer.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SdcaShrinkL1.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SegmentMax.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SegmentMean.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SegmentMin.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SegmentProd.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SegmentSum.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Select.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_SelfAdjointEig.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Selu.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SeluGrad.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_SerializeIterator.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_SerializeManySparse.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SerializeSparse.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_SerializeTensor.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SetSize.pbtxt      | 3 +++
 .../java_api/api_def_SetStatsAggregatorDataset.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Shape.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_ShapeN.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_ShardedFilename.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_ShardedFilespec.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_ShuffleAndRepeatDataset.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_ShuffleDataset.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Sigmoid.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SigmoidGrad.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Sign.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Sin.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Sinh.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Size.pbtxt         | 3 +++
 tensorflow/core/api_def/java_api/api_def_SkipDataset.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Skipgram.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_Slice.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_SlideDataset.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Snapshot.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_Softmax.pbtxt      | 6 ++++++
 .../java_api/api_def_SoftmaxCrossEntropyWithLogits.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Softplus.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SoftplusGrad.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Softsign.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SoftsignGrad.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SpaceToBatch.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SpaceToBatchND.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SpaceToDepth.pbtxt | 6 ++++++
 .../java_api/api_def_SparseAccumulatorApplyGradient.pbtxt   | 6 ++++++
 .../java_api/api_def_SparseAccumulatorTakeGradient.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SparseAdd.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_SparseAddGrad.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_SparseApplyAdadelta.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseApplyAdagrad.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt     | 6 ++++++
 .../java_api/api_def_SparseApplyCenteredRMSProp.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_SparseApplyMomentum.pbtxt | 6 ++++++
 .../java_api/api_def_SparseApplyProximalAdagrad.pbtxt       | 6 ++++++
 .../api_def_SparseApplyProximalGradientDescent.pbtxt        | 6 ++++++
 .../core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SparseConcat.pbtxt | 6 ++++++
 .../java_api/api_def_SparseConditionalAccumulator.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SparseCross.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_SparseDenseCwiseAdd.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseDenseCwiseDiv.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseDenseCwiseMul.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseFillEmptyRows.pbtxt | 6 ++++++
 .../api_def/java_api/api_def_SparseFillEmptyRowsGrad.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SparseMatMul.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseReduceMax.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_SparseReduceMaxSparse.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_SparseReduceSum.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_SparseReduceSumSparse.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_SparseReorder.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_SparseReshape.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_SparseSegmentMean.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_SparseSegmentMeanGrad.pbtxt    | 6 ++++++
 .../java_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseSegmentSqrtN.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_SparseSegmentSqrtNGrad.pbtxt   | 6 ++++++
 .../api_def_SparseSegmentSqrtNWithNumSegments.pbtxt         | 6 ++++++
 .../core/api_def/java_api/api_def_SparseSegmentSum.pbtxt    | 6 ++++++
 .../java_api/api_def_SparseSegmentSumWithNumSegments.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SparseSlice.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_SparseSliceGrad.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_SparseSoftmax.pbtxt       | 6 ++++++
 .../api_def_SparseSoftmaxCrossEntropyWithLogits.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_SparseSparseMaximum.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseSparseMinimum.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SparseSplit.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_SparseTensorDenseAdd.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_SparseTensorDenseMatMul.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_SparseTensorSliceDataset.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_SparseToDense.pbtxt       | 6 ++++++
 .../java_api/api_def_SparseToSparseSetOperation.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Split.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_SplitV.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_SqlDataset.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Sqrt.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_SqrtGrad.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Square.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_SquaredDifference.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Squeeze.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_Stack.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_Stage.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_StageClear.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_StagePeek.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_StageSize.pbtxt    | 3 +++
 .../api_def/java_api/api_def_StatefulPartitionedCall.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_StatelessIf.pbtxt  | 3 +++
 .../api_def/java_api/api_def_StatelessMultinomial.pbtxt     | 6 ++++++
 .../api_def/java_api/api_def_StatelessRandomNormal.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_StatelessRandomUniform.pbtxt   | 6 ++++++
 .../java_api/api_def_StatelessRandomUniformInt.pbtxt        | 6 ++++++
 .../api_def/java_api/api_def_StatelessTruncatedNormal.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_StatelessWhile.pbtxt      | 3 +++
 .../api_def/java_api/api_def_StaticRegexFullMatch.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_StaticRegexReplace.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_StatsAggregatorHandle.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_StatsAggregatorSummary.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_StopGradient.pbtxt | 3 +++
 tensorflow/core/api_def/java_api/api_def_StridedSlice.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_StridedSliceAssign.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_StridedSliceGrad.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_StringFormat.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_StringJoin.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_StringLength.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_StringSplitV2.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_StringStrip.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_StringToHashBucket.pbtxt  | 6 ++++++
 .../api_def/java_api/api_def_StringToHashBucketFast.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_StringToHashBucketStrong.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_StringToNumber.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Sub.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Substr.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Sum.pbtxt          | 3 +++
 .../core/api_def/java_api/api_def_SummaryWriter.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Svd.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Switch.pbtxt       | 6 ++++--
 .../core/api_def/java_api/api_def_SymbolicGradient.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_TFRecordDataset.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_TFRecordReader.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_TakeDataset.pbtxt  | 6 ++++++
 .../java_api/api_def_TakeManySparseFromTensorsMap.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Tan.pbtxt          | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Tanh.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_TanhGrad.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_TemporaryVariable.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayClose.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayConcat.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGather.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGrad.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt   | 3 +++
 .../api_def/java_api/api_def_TensorArrayGradWithShape.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayPack.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayRead.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayScatter.pbtxt  | 3 +++
 .../api_def/java_api/api_def_TensorArrayScatterV2.pbtxt     | 3 +++
 .../api_def/java_api/api_def_TensorArrayScatterV3.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySize.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySplit.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayUnpack.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayV2.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayV3.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayWrite.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorDataset.pbtxt       | 6 ++++++
 .../java_api/api_def_TensorForestCreateTreeVariable.pbtxt   | 3 +++
 .../java_api/api_def_TensorForestTreeDeserialize.pbtxt      | 3 +++
 .../java_api/api_def_TensorForestTreeIsInitializedOp.pbtxt  | 3 +++
 .../api_def/java_api/api_def_TensorForestTreePredict.pbtxt  | 3 +++
 .../java_api/api_def_TensorForestTreeResourceHandleOp.pbtxt | 3 +++
 .../java_api/api_def_TensorForestTreeSerialize.pbtxt        | 3 +++
 .../api_def/java_api/api_def_TensorForestTreeSize.pbtxt     | 3 +++
 .../api_def/java_api/api_def_TensorListConcatLists.pbtxt    | 3 +++
 .../api_def/java_api/api_def_TensorListElementShape.pbtxt   | 3 +++
 .../api_def/java_api/api_def_TensorListFromTensor.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorListGather.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_TensorListGetItem.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorListLength.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_TensorListPopBack.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorListPushBack.pbtxt  | 3 +++
 .../api_def/java_api/api_def_TensorListPushBackBatch.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorListReserve.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorListScatter.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorListSetItem.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorListStack.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorSliceDataset.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_TensorSummary.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_TensorSummaryV2.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_TextLineDataset.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_TextLineReader.pbtxt      | 6 ++++++
 .../core/api_def/java_api/api_def_TextLineReaderV2.pbtxt    | 6 ++++++
 .../api_def_ThreadUnsafeUnigramCandidateSampler.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Tile.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_TileGrad.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Timestamp.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_TopK.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt       | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Transpose.pbtxt    | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_TruncateDiv.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_TruncateMod.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_TruncatedNormal.pbtxt     | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_TryRpc.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_Unbatch.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_UnbatchDataset.pbtxt      | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_UnbatchGrad.pbtxt  | 3 +++
 .../api_def/java_api/api_def_UnicodeDecodeWithOffsets.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_UnicodeScript.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_UnicodeTranscode.pbtxt    | 6 ++++++
 .../api_def/java_api/api_def_UniformCandidateSampler.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Unique.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_UniqueWithCounts.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_Unpack.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_UnravelIndex.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_UnsortedSegmentMax.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_UnsortedSegmentMin.pbtxt  | 6 ++++++
 .../core/api_def/java_api/api_def_UnsortedSegmentProd.pbtxt | 6 ++++++
 .../core/api_def/java_api/api_def_UnsortedSegmentSum.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Unstage.pbtxt      | 3 +++
 tensorflow/core/api_def/java_api/api_def_UpperBound.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_VarHandleOp.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_VarIsInitializedOp.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_Variable.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_VariableShape.pbtxt       | 3 +++
 tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_Where.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_While.pbtxt        | 3 +++
 .../core/api_def/java_api/api_def_WholeFileReader.pbtxt     | 6 ++++++
 .../core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_WindowDataset.pbtxt       | 6 ++++++
 .../core/api_def/java_api/api_def_WriteAudioSummary.pbtxt   | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_WriteFile.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_WriteGraphSummary.pbtxt   | 6 ++++++
 .../api_def/java_api/api_def_WriteHistogramSummary.pbtxt    | 6 ++++++
 .../core/api_def/java_api/api_def_WriteImageSummary.pbtxt   | 6 ++++++
 .../core/api_def/java_api/api_def_WriteScalarSummary.pbtxt  | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_WriteSummary.pbtxt | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Xdivy.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_Xlogy.pbtxt        | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ZerosLike.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_Zeta.pbtxt         | 6 ++++++
 tensorflow/core/api_def/java_api/api_def_ZipDataset.pbtxt   | 6 ++++++
 997 files changed, 5031 insertions(+), 6 deletions(-)
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Abort.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Abs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AccumulatorApplyGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AccumulatorNumAccumulated.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AccumulatorSetGlobalStep.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AccumulatorTakeGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Acos.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Acosh.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Add.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AddManySparseToTensorsMap.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AddN.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AddSparseToTensorsMap.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AdjustHue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AdjustSaturation.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_All.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AllCandidateSampler.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Angle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AnonymousIterator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Any.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyAdaMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyAdadelta.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyAdam.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyAddSign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyGradientDescent.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyMomentum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyPowerSign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyProximalAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyProximalGradientDescent.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ApproximateEqual.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ArgMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ArgMin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AsString.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Asin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Asinh.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Assign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AssignAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AssignAddVariableOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AssignSub.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AssignSubVariableOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AssignVariableOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Atan.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Atan2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Atanh.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AudioSpectrogram.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AvgPool.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AvgPool3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_AvgPoolGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Barrier.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BarrierClose.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BarrierIncompleteSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BarrierInsertMany.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BarrierReadySize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BarrierTakeMany.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Batch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchCholesky.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchCholeskyGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchFunction.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixBandPart.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixDeterminant.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixDiag.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixDiagPart.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixInverse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixSetDiag.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixSolve.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixSolveLs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchMatrixTriangularSolve.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalization.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalizationGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchSvd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchToSpace.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BatchToSpaceND.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BesselI0e.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BesselI1e.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Betainc.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BiasAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BiasAddGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BigQueryReader.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Bincount.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Bitcast.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BitwiseAnd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BitwiseOr.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BitwiseXor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesBucketize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesCenterBias.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateEnsemble.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateQuantileStreamResource.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesDeserializeEnsemble.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesEnsembleResourceHandleOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesGetEnsembleStates.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeQuantileSummaries.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeStatsSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesPredict.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceAddSummaries.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceDeserialize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceFlush.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceHandleOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesSerializeEnsemble.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesTrainingPredict.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BoostedTreesUpdateEnsemble.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BroadcastArgs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BroadcastGradientArgs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BroadcastTo.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Bucketize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_BytesProducedStatsDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CacheDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Cast.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Ceil.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CheckNumerics.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Cholesky.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CholeskyGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ClipByValue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CloseSummaryWriter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CollectiveBcastRecv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CollectiveBcastSend.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CollectiveReduce.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CompareAndBitpack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Complex.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ComplexAbs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ComputeAccidentalHits.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Concat.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ConcatOffset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ConcatenateDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ConditionalAccumulator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conj.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ConjugateTranspose.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ConsumeMutexLock.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ControlTrigger.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Cos.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Cosh.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CountUpTo.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CreateSummaryDbWriter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CreateSummaryFileWriter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CropAndResize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CropAndResizeGradBoxes.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CropAndResizeGradImage.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Cross.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Cumprod.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Cumsum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DataFormatDimMap.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DataFormatVecPermute.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DatasetToGraph.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DatasetToSingleElement.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DebugGradientIdentity.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DebugGradientRefIdentity.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeAndCropJpeg.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeBase64.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeBmp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeCompressed.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeGif.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeJSONExample.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeJpeg.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodePng.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeRaw.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DecodeWav.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DeepCopy.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DeleteSessionTensor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DenseToDenseSetOperation.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DenseToSparseBatchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DenseToSparseSetOperation.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DepthToSpace.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNative.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Dequantize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DeserializeIterator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DeserializeManySparse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DeserializeSparse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DestroyResourceOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DestroyTemporaryVariable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Diag.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DiagPart.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Digamma.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Dilation2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Div.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DivNoNan.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DrawBoundingBoxes.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DynamicPartition.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_DynamicStitch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EagerPyFunc.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EditDistance.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Elu.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EluGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Empty.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EmptyTensorList.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EncodeBase64.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EncodeJpeg.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EncodePng.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EncodeProto.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EncodeWav.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EnqueueInQueueDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_EnsureShape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Enter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Equal.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Erf.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Erfc.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Exit.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Exp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExpandDims.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalAssertNextDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResource.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetGet.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalIteratorGetDevice.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalMapDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalNonSerializableDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalSleepDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExperimentalUniqueDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Expm1.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExtractGlimpse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExtractImagePatches.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExtractJpegShape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ExtractVolumePatches.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FFT.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FFT2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FFT3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Fact.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeParam.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgsGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVars.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FakeQueue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Fill.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FilterByLastComponentDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FilterDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FixedUnigramCandidateSampler.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FlatMapDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Floor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FloorDiv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FloorMod.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FlushSummaryWriter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_For.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FractionalAvgPool.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FractionalAvgPoolGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FractionalMaxPool.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FractionalMaxPoolGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Gather.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GatherNd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GcsConfigureBlockCache.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GcsConfigureCredentials.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GenerateBigQueryReaderPartitions.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GenerateVocabRemapping.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GeneratorDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GetSessionTensor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Greater.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GreaterEqual.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GroupByReducerDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GroupByWindowDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_GuaranteeConst.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_HSVToRGB.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_HistogramFixedWidth.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_HistogramSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_HostConst.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IFFT.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IFFT2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IFFT3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Identity.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IdentityN.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_If.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Igamma.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IgammaGradA.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Igammac.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Imag.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ImageSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ImmutableConst.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ImportEvent.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InplaceAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InplaceSub.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InplaceUpdate.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InterleaveDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Inv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InvGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Invert.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_InvertPermutation.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IsBoostedTreesEnsembleInitialized.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IsBoostedTreesQuantileStreamResourceInitialized.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IsFinite.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IsInf.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IsNan.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IsVariableInitialized.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IteratorGetNext.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IteratorGetNextAsOptional.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IteratorGetNextSync.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IteratorToStringHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_L2Loss.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LRN.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LRNGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LatencyStatsDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LeakyRelu.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LeakyReluGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LearnedUnigramCandidateSampler.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LeftShift.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Less.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LessEqual.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Lgamma.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LinSpace.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LoadAndRemapMatrix.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Log.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Log1p.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LogMatrixDeterminant.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LogSoftmax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LogUniformCandidateSampler.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LogicalAnd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LogicalNot.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LogicalOr.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LoopCond.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_LowerBound.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MakeIterator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapClear.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapDefun.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapIncompleteSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapPeek.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapStage.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapUnstage.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MapUnstageNoKey.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatchingFiles.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatchingFilesDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixBandPart.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixDeterminant.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixDiag.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixDiagPart.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixExponential.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixInverse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixLogarithm.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixSetDiag.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixSolve.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixSolveLs.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixSquareRoot.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MatrixTriangularSolve.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Max.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPool3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradWithArgmax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolGradWithArgmax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MaxPoolWithArgmax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Maximum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Mean.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Merge.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MergeSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MergeV2Checkpoints.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Mfcc.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Min.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Minimum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MirrorPad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MirrorPadGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Mod.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ModelDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Mul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MultiDeviceIterator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorInit.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Multinomial.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutexLock.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NcclAllReduce.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NcclBroadcast.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NcclReduce.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Neg.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NegTrain.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NextIteration.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NoOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NotEqual.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_NthElement.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OneHot.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OneShotIterator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OnesLike.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OptimizeDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OptionalFromValue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OptionalGetValue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OptionalHasValue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OptionalNone.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OrderedMapClear.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OrderedMapIncompleteSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OrderedMapPeek.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OrderedMapSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OrderedMapStage.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OrderedMapUnstage.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_OrderedMapUnstageNoKey.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Pack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Pad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParallelConcat.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParallelDynamicStitch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParallelMapDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParameterizedTruncatedNormal.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParseExample.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParseExampleDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParseSequenceExample.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParseSingleExample.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParseSingleSequenceExample.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ParseTensor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PartitionedCall.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PlaceholderWithDefault.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Polygamma.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PopulationCount.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Pow.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PrefetchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PreventGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Print.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Prod.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PyFunc.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_PyFuncStateless.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Qr.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeDownAndShrinkRange.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedAvgPool.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedBiasAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedConcat.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedInstanceNorm.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedMatMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedMaxPool.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedRelu.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedRelu6.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedReluX.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedReshape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QuantizedResizeBilinear.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RGBToHSV.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RaggedGather.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RaggedRange.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RaggedTensorToSparse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomCrop.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomGamma.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomGammaGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomShuffle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomStandardNormal.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomUniform.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RandomUniformInt.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Range.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RangeDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Rank.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReadFile.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReadVariableOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Real.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RealDiv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Reciprocal.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReciprocalGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RecordInput.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReduceDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReduceJoin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RefEnter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RefExit.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RefIdentity.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RefMerge.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RefNextIteration.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RefSelect.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RefSwitch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RegexFullMatch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RegexReplace.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Relu.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Relu6.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Relu6Grad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReluGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RemoteCall.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RemoteFusedGraphExecute.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RepeatDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RequantizationRange.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Requantize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Reshape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResizeArea.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResizeBicubic.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResizeBicubicGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResizeBilinear.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResizeBilinearGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighbor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighborGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyAdaMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyAdadelta.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyAdam.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyAddSign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyGradientDescent.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyMomentum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyPowerSign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalGradientDescent.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceCountUpTo.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceGather.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterDiv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterMin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterNdAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterNdUpdate.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterSub.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceScatterUpdate.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdadelta.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyMomentum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ResourceStridedSliceAssign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Restore.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RestoreSlice.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReverseSequence.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ReverseV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RightShift.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Rint.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Roll.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Round.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Rpc.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Rsqrt.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_RsqrtGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Save.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SaveSlices.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScalarSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScanDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterDiv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterMin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterNd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterNdAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterNdNonAliasingAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterNdSub.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterNdUpdate.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterSub.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ScatterUpdate.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SdcaFprint.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SdcaShrinkL1.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SegmentMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SegmentMean.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SegmentMin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SegmentProd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SegmentSum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Select.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Selu.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SeluGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SerializeIterator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SerializeManySparse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SerializeSparse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SerializeTensor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SetSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SetStatsAggregatorDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Shape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ShapeN.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ShardedFilename.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ShardedFilespec.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ShuffleAndRepeatDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ShuffleDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Sigmoid.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SigmoidGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Sign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Sin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Sinh.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Size.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SkipDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Skipgram.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Slice.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SlideDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Snapshot.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Softmax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SoftmaxCrossEntropyWithLogits.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Softplus.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SoftplusGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Softsign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SoftsignGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SpaceToBatch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SpaceToBatchND.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SpaceToDepth.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseAccumulatorApplyGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseAccumulatorTakeGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseAddGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyAdadelta.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyMomentum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyProximalAdagrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyProximalGradientDescent.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseConcat.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseConditionalAccumulator.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseCross.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseDiv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRows.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRowsGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseMatMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseReduceMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseReduceMaxSparse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseReduceSum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseReduceSumSparse.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseReorder.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseReshape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentMean.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtN.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentSum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSlice.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSliceGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSoftmax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSoftmaxCrossEntropyWithLogits.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSparseMaximum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSparseMinimum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseSplit.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseTensorDenseAdd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseTensorDenseMatMul.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseTensorSliceDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseToDense.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SparseToSparseSetOperation.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Split.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SplitV.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SqlDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Sqrt.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SqrtGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Square.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SquaredDifference.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Squeeze.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Stack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Stage.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StageClear.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StagePeek.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StageSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatefulPartitionedCall.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatelessIf.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatelessMultinomial.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatelessRandomNormal.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatelessRandomUniform.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatelessRandomUniformInt.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatelessTruncatedNormal.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatelessWhile.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StaticRegexFullMatch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StaticRegexReplace.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatsAggregatorHandle.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StatsAggregatorSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StopGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StridedSlice.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StridedSliceAssign.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StridedSliceGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringFormat.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringJoin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringLength.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringStrip.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringToHashBucket.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringToHashBucketFast.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringToHashBucketStrong.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_StringToNumber.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Sub.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Substr.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Sum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SummaryWriter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Svd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_SymbolicGradient.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TakeDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TakeManySparseFromTensorsMap.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Tan.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Tanh.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TanhGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TemporaryVariable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayGradWithShape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayPack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayUnpack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorForestCreateTreeVariable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorForestTreeDeserialize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorForestTreeIsInitializedOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorForestTreePredict.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorForestTreeResourceHandleOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorForestTreeSerialize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorForestTreeSize.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListConcatLists.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListElementShape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListFromTensor.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListGather.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListGetItem.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListLength.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListPopBack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListPushBack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListPushBackBatch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListReserve.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListScatter.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListSetItem.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorListStack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorSliceDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TextLineDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ThreadUnsafeUnigramCandidateSampler.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Tile.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TileGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Timestamp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TopK.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Transpose.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TruncateDiv.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TruncateMod.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TruncatedNormal.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_TryRpc.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Unbatch.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnbatchDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnbatchGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnicodeDecodeWithOffsets.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnicodeScript.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnicodeTranscode.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UniformCandidateSampler.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Unique.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Unpack.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnravelIndex.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMax.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMin.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnsortedSegmentProd.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UnsortedSegmentSum.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Unstage.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_UpperBound.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_VarHandleOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_VarIsInitializedOp.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Variable.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_VariableShape.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Where.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_While.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WindowDataset.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WriteAudioSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WriteFile.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WriteGraphSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WriteHistogramSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WriteImageSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WriteScalarSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_WriteSummary.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Xdivy.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Xlogy.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ZerosLike.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_Zeta.pbtxt
 create mode 100644 tensorflow/core/api_def/java_api/api_def_ZipDataset.pbtxt

diff --git a/tensorflow/core/api_def/java_api/api_def_Abort.pbtxt b/tensorflow/core/api_def/java_api/api_def_Abort.pbtxt
new file mode 100644
index 0000000000..58448c2d17
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Abort.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Abort"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Abs.pbtxt b/tensorflow/core/api_def/java_api/api_def_Abs.pbtxt
new file mode 100644
index 0000000000..ece45cf73f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Abs.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Abs"
+  endpoint {
+    name: "math.Abs"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt
new file mode 100644
index 0000000000..09e7a155e0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AccumulateNV2"
+  endpoint {
+    name: "math.AccumulateNV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AccumulatorApplyGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_AccumulatorApplyGradient.pbtxt
new file mode 100644
index 0000000000..49b7acad7d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AccumulatorApplyGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AccumulatorApplyGradient"
+  endpoint {
+    name: "train.AccumulatorApplyGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AccumulatorNumAccumulated.pbtxt b/tensorflow/core/api_def/java_api/api_def_AccumulatorNumAccumulated.pbtxt
new file mode 100644
index 0000000000..1c42e819bf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AccumulatorNumAccumulated.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AccumulatorNumAccumulated"
+  endpoint {
+    name: "train.AccumulatorNumAccumulated"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AccumulatorSetGlobalStep.pbtxt b/tensorflow/core/api_def/java_api/api_def_AccumulatorSetGlobalStep.pbtxt
new file mode 100644
index 0000000000..ca85302cdb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AccumulatorSetGlobalStep.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AccumulatorSetGlobalStep"
+  endpoint {
+    name: "train.AccumulatorSetGlobalStep"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AccumulatorTakeGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_AccumulatorTakeGradient.pbtxt
new file mode 100644
index 0000000000..4883802c63
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AccumulatorTakeGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AccumulatorTakeGradient"
+  endpoint {
+    name: "train.AccumulatorTakeGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Acos.pbtxt b/tensorflow/core/api_def/java_api/api_def_Acos.pbtxt
new file mode 100644
index 0000000000..847986b429
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Acos.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Acos"
+  endpoint {
+    name: "math.Acos"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Acosh.pbtxt b/tensorflow/core/api_def/java_api/api_def_Acosh.pbtxt
new file mode 100644
index 0000000000..76d8f5fad0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Acosh.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Acosh"
+  endpoint {
+    name: "math.Acosh"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Add.pbtxt b/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
new file mode 100644
index 0000000000..4f78ccc9ea
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Add"
+  endpoint {
+    name: "math.Add"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AddManySparseToTensorsMap.pbtxt b/tensorflow/core/api_def/java_api/api_def_AddManySparseToTensorsMap.pbtxt
new file mode 100644
index 0000000000..e009ba19d3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AddManySparseToTensorsMap.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AddManySparseToTensorsMap"
+  endpoint {
+    name: "sparse.AddManySparseToTensorsMap"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AddN.pbtxt b/tensorflow/core/api_def/java_api/api_def_AddN.pbtxt
new file mode 100644
index 0000000000..20d469ae73
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AddN.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AddN"
+  endpoint {
+    name: "math.AddN"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AddSparseToTensorsMap.pbtxt b/tensorflow/core/api_def/java_api/api_def_AddSparseToTensorsMap.pbtxt
new file mode 100644
index 0000000000..0bb20186de
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AddSparseToTensorsMap.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AddSparseToTensorsMap"
+  endpoint {
+    name: "sparse.AddSparseToTensorsMap"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
new file mode 100644
index 0000000000..f42a2add07
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AddV2"
+  endpoint {
+    name: "math.AddV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt b/tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt
new file mode 100644
index 0000000000..ff49aec2db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AdjustContrast"
+  endpoint {
+    name: "image.AdjustContrast"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt
new file mode 100644
index 0000000000..dd6647b137
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AdjustContrastv2"
+  endpoint {
+    name: "image.AdjustContrastv2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AdjustHue.pbtxt b/tensorflow/core/api_def/java_api/api_def_AdjustHue.pbtxt
new file mode 100644
index 0000000000..0847cad403
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AdjustHue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AdjustHue"
+  endpoint {
+    name: "image.AdjustHue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AdjustSaturation.pbtxt b/tensorflow/core/api_def/java_api/api_def_AdjustSaturation.pbtxt
new file mode 100644
index 0000000000..d685636eb1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AdjustSaturation.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AdjustSaturation"
+  endpoint {
+    name: "image.AdjustSaturation"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_All.pbtxt b/tensorflow/core/api_def/java_api/api_def_All.pbtxt
new file mode 100644
index 0000000000..a6459c56b7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_All.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "All"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AllCandidateSampler.pbtxt b/tensorflow/core/api_def/java_api/api_def_AllCandidateSampler.pbtxt
new file mode 100644
index 0000000000..607c208a46
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AllCandidateSampler.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AllCandidateSampler"
+  endpoint {
+    name: "random.AllCandidateSampler"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Angle.pbtxt b/tensorflow/core/api_def/java_api/api_def_Angle.pbtxt
new file mode 100644
index 0000000000..a92ccf357d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Angle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Angle"
+  endpoint {
+    name: "math.Angle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AnonymousIterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_AnonymousIterator.pbtxt
new file mode 100644
index 0000000000..894f85ae88
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AnonymousIterator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AnonymousIterator"
+  endpoint {
+    name: "data.AnonymousIterator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Any.pbtxt b/tensorflow/core/api_def/java_api/api_def_Any.pbtxt
new file mode 100644
index 0000000000..20b36eda3f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Any.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Any"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyAdaMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyAdaMax.pbtxt
new file mode 100644
index 0000000000..583f164e06
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyAdaMax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyAdaMax"
+  endpoint {
+    name: "train.ApplyAdaMax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyAdadelta.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyAdadelta.pbtxt
new file mode 100644
index 0000000000..e672a8ef03
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyAdadelta.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyAdadelta"
+  endpoint {
+    name: "train.ApplyAdadelta"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyAdagrad.pbtxt
new file mode 100644
index 0000000000..980c57c5fe
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyAdagrad"
+  endpoint {
+    name: "train.ApplyAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt
new file mode 100644
index 0000000000..d2b38707fb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyAdagradDA"
+  endpoint {
+    name: "train.ApplyAdagradDA"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyAdam.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyAdam.pbtxt
new file mode 100644
index 0000000000..56461b1d3d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyAdam.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyAdam"
+  endpoint {
+    name: "train.ApplyAdam"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyAddSign.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyAddSign.pbtxt
new file mode 100644
index 0000000000..b54ff6eca4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyAddSign.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyAddSign"
+  endpoint {
+    name: "train.ApplyAddSign"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt
new file mode 100644
index 0000000000..6ed0660165
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyCenteredRMSProp"
+  endpoint {
+    name: "train.ApplyCenteredRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt
new file mode 100644
index 0000000000..388e39c6ba
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyFtrl"
+  endpoint {
+    name: "train.ApplyFtrl"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt
new file mode 100644
index 0000000000..8463ad163b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyFtrlV2"
+  endpoint {
+    name: "train.ApplyFtrlV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyGradientDescent.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyGradientDescent.pbtxt
new file mode 100644
index 0000000000..1fa569ed32
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyGradientDescent.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyGradientDescent"
+  endpoint {
+    name: "train.ApplyGradientDescent"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyMomentum.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyMomentum.pbtxt
new file mode 100644
index 0000000000..96c21199f0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyMomentum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyMomentum"
+  endpoint {
+    name: "train.ApplyMomentum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyPowerSign.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyPowerSign.pbtxt
new file mode 100644
index 0000000000..e5c2234755
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyPowerSign.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyPowerSign"
+  endpoint {
+    name: "train.ApplyPowerSign"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyProximalAdagrad.pbtxt
new file mode 100644
index 0000000000..a52d8c3591
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyProximalAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyProximalAdagrad"
+  endpoint {
+    name: "train.ApplyProximalAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyProximalGradientDescent.pbtxt
new file mode 100644
index 0000000000..74ea29cf88
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyProximalGradientDescent.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyProximalGradientDescent"
+  endpoint {
+    name: "train.ApplyProximalGradientDescent"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt
new file mode 100644
index 0000000000..a1397b16fc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApplyRMSProp"
+  endpoint {
+    name: "train.ApplyRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ApproximateEqual.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApproximateEqual.pbtxt
new file mode 100644
index 0000000000..029dc6d293
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ApproximateEqual.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ApproximateEqual"
+  endpoint {
+    name: "math.ApproximateEqual"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ArgMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_ArgMax.pbtxt
new file mode 100644
index 0000000000..f9effd49c4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ArgMax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ArgMax"
+  endpoint {
+    name: "math.ArgMax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ArgMin.pbtxt b/tensorflow/core/api_def/java_api/api_def_ArgMin.pbtxt
new file mode 100644
index 0000000000..5ff04c0d1a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ArgMin.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ArgMin"
+  endpoint {
+    name: "math.ArgMin"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AsString.pbtxt b/tensorflow/core/api_def/java_api/api_def_AsString.pbtxt
new file mode 100644
index 0000000000..e8c875ea81
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AsString.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AsString"
+  endpoint {
+    name: "dtypes.AsString"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Asin.pbtxt b/tensorflow/core/api_def/java_api/api_def_Asin.pbtxt
new file mode 100644
index 0000000000..8ffc8e3e57
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Asin.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Asin"
+  endpoint {
+    name: "math.Asin"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Asinh.pbtxt b/tensorflow/core/api_def/java_api/api_def_Asinh.pbtxt
new file mode 100644
index 0000000000..e3b30dd512
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Asinh.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Asinh"
+  endpoint {
+    name: "math.Asinh"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Assert.pbtxt b/tensorflow/core/api_def/java_api/api_def_Assert.pbtxt
index b1f868897d..a9e107b478 100644
--- a/tensorflow/core/api_def/java_api/api_def_Assert.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Assert.pbtxt
@@ -1,4 +1,6 @@
 op {
-  graph_op_name: "Assert" #TODO(karllessard) escape that reserved name
-  visibility: HIDDEN
+  graph_op_name: "Assert"
+  endpoint {
+    name: "AssertThat"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Assign.pbtxt b/tensorflow/core/api_def/java_api/api_def_Assign.pbtxt
new file mode 100644
index 0000000000..15d778f61e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Assign.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Assign"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AssignAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_AssignAdd.pbtxt
new file mode 100644
index 0000000000..a4118b64af
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AssignAdd.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "AssignAdd"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AssignAddVariableOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_AssignAddVariableOp.pbtxt
new file mode 100644
index 0000000000..05fecb191b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AssignAddVariableOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "AssignAddVariableOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AssignSub.pbtxt b/tensorflow/core/api_def/java_api/api_def_AssignSub.pbtxt
new file mode 100644
index 0000000000..aaf9246a6a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AssignSub.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "AssignSub"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AssignSubVariableOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_AssignSubVariableOp.pbtxt
new file mode 100644
index 0000000000..6e8791aed2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AssignSubVariableOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "AssignSubVariableOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AssignVariableOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_AssignVariableOp.pbtxt
new file mode 100644
index 0000000000..980e696826
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AssignVariableOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "AssignVariableOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Atan.pbtxt b/tensorflow/core/api_def/java_api/api_def_Atan.pbtxt
new file mode 100644
index 0000000000..e51aee9abc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Atan.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Atan"
+  endpoint {
+    name: "math.Atan"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Atan2.pbtxt b/tensorflow/core/api_def/java_api/api_def_Atan2.pbtxt
new file mode 100644
index 0000000000..302b05f9dc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Atan2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Atan2"
+  endpoint {
+    name: "math.Atan2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Atanh.pbtxt b/tensorflow/core/api_def/java_api/api_def_Atanh.pbtxt
new file mode 100644
index 0000000000..b9c4a41154
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Atanh.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Atanh"
+  endpoint {
+    name: "math.Atanh"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AudioSpectrogram.pbtxt b/tensorflow/core/api_def/java_api/api_def_AudioSpectrogram.pbtxt
new file mode 100644
index 0000000000..bd8f3a5e33
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AudioSpectrogram.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AudioSpectrogram"
+  endpoint {
+    name: "audio.AudioSpectrogram"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt
new file mode 100644
index 0000000000..026ec79dd9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AudioSummary"
+  endpoint {
+    name: "summary.AudioSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt
new file mode 100644
index 0000000000..07d24ec3a3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AudioSummaryV2"
+  endpoint {
+    name: "summary.AudioSummaryV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AvgPool.pbtxt b/tensorflow/core/api_def/java_api/api_def_AvgPool.pbtxt
new file mode 100644
index 0000000000..10d87802f0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AvgPool.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AvgPool"
+  endpoint {
+    name: "nn.AvgPool"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AvgPool3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_AvgPool3D.pbtxt
new file mode 100644
index 0000000000..1ae2794f48
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AvgPool3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AvgPool3D"
+  endpoint {
+    name: "nn.AvgPool3d"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt
new file mode 100644
index 0000000000..ac51715095
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AvgPool3DGrad"
+  endpoint {
+    name: "nn.AvgPool3DGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_AvgPoolGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_AvgPoolGrad.pbtxt
new file mode 100644
index 0000000000..fc8fec89b9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_AvgPoolGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "AvgPoolGrad"
+  endpoint {
+    name: "nn.AvgPoolGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Barrier.pbtxt b/tensorflow/core/api_def/java_api/api_def_Barrier.pbtxt
new file mode 100644
index 0000000000..6e282ca7b3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Barrier.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Barrier"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BarrierClose.pbtxt b/tensorflow/core/api_def/java_api/api_def_BarrierClose.pbtxt
new file mode 100644
index 0000000000..0307318763
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BarrierClose.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BarrierClose"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BarrierIncompleteSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_BarrierIncompleteSize.pbtxt
new file mode 100644
index 0000000000..fb11b18e95
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BarrierIncompleteSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BarrierIncompleteSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BarrierInsertMany.pbtxt b/tensorflow/core/api_def/java_api/api_def_BarrierInsertMany.pbtxt
new file mode 100644
index 0000000000..32e29f0015
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BarrierInsertMany.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BarrierInsertMany"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BarrierReadySize.pbtxt b/tensorflow/core/api_def/java_api/api_def_BarrierReadySize.pbtxt
new file mode 100644
index 0000000000..0ed50b2579
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BarrierReadySize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BarrierReadySize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BarrierTakeMany.pbtxt b/tensorflow/core/api_def/java_api/api_def_BarrierTakeMany.pbtxt
new file mode 100644
index 0000000000..21f08878c6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BarrierTakeMany.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BarrierTakeMany"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Batch.pbtxt b/tensorflow/core/api_def/java_api/api_def_Batch.pbtxt
new file mode 100644
index 0000000000..2c21faf72d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Batch.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Batch"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchCholesky.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchCholesky.pbtxt
new file mode 100644
index 0000000000..15048109fd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchCholesky.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchCholesky"
+  endpoint {
+    name: "linalg.BatchCholesky"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchCholeskyGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchCholeskyGrad.pbtxt
new file mode 100644
index 0000000000..eb0e2c6bc8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchCholeskyGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchCholeskyGrad"
+  endpoint {
+    name: "linalg.BatchCholeskyGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt
new file mode 100644
index 0000000000..fe47605f37
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchDataset"
+  endpoint {
+    name: "data.BatchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt
new file mode 100644
index 0000000000..079efe146f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BatchDatasetV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt
new file mode 100644
index 0000000000..62bc804c7e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchFFT"
+  endpoint {
+    name: "signal.BatchFFT"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt
new file mode 100644
index 0000000000..89479537a2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchFFT2D"
+  endpoint {
+    name: "signal.BatchFFT2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt
new file mode 100644
index 0000000000..e9cff897bb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchFFT3D"
+  endpoint {
+    name: "signal.BatchFFT3D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchFunction.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchFunction.pbtxt
new file mode 100644
index 0000000000..8789dc6acb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchFunction.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BatchFunction"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt
new file mode 100644
index 0000000000..f156a6df39
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchIFFT"
+  endpoint {
+    name: "signal.BatchIFFT"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt
new file mode 100644
index 0000000000..7cce88f8f7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchIFFT2D"
+  endpoint {
+    name: "signal.BatchIFFT2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt
new file mode 100644
index 0000000000..b30e596d75
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchIFFT3D"
+  endpoint {
+    name: "signal.BatchIFFT3D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatMul.pbtxt
new file mode 100644
index 0000000000..95aa644615
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatMul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatMul"
+  endpoint {
+    name: "linalg.BatchMatMul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixBandPart.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixBandPart.pbtxt
new file mode 100644
index 0000000000..de989c6d52
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixBandPart.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixBandPart"
+  endpoint {
+    name: "linalg.BatchMatrixBandPart"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixDeterminant.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixDeterminant.pbtxt
new file mode 100644
index 0000000000..a45fe25d10
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixDeterminant.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixDeterminant"
+  endpoint {
+    name: "linalg.BatchMatrixDeterminant"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixDiag.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixDiag.pbtxt
new file mode 100644
index 0000000000..d85d76f02f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixDiag.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixDiag"
+  endpoint {
+    name: "linalg.BatchMatrixDiag"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixDiagPart.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixDiagPart.pbtxt
new file mode 100644
index 0000000000..4b5350b11e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixDiagPart.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixDiagPart"
+  endpoint {
+    name: "linalg.BatchMatrixDiagPart"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixInverse.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixInverse.pbtxt
new file mode 100644
index 0000000000..f40ea50d4b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixInverse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixInverse"
+  endpoint {
+    name: "linalg.BatchMatrixInverse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixSetDiag.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixSetDiag.pbtxt
new file mode 100644
index 0000000000..ac4cd6889b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixSetDiag.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixSetDiag"
+  endpoint {
+    name: "linalg.BatchMatrixSetDiag"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixSolve.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixSolve.pbtxt
new file mode 100644
index 0000000000..97435acb4e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixSolve.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixSolve"
+  endpoint {
+    name: "linalg.BatchMatrixSolve"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixSolveLs.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixSolveLs.pbtxt
new file mode 100644
index 0000000000..aee0b4add3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixSolveLs.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixSolveLs"
+  endpoint {
+    name: "linalg.BatchMatrixSolveLs"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchMatrixTriangularSolve.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchMatrixTriangularSolve.pbtxt
new file mode 100644
index 0000000000..554eff1574
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchMatrixTriangularSolve.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchMatrixTriangularSolve"
+  endpoint {
+    name: "linalg.BatchMatrixTriangularSolve"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalization.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalization.pbtxt
new file mode 100644
index 0000000000..8285ac284d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalization.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchNormWithGlobalNormalization"
+  endpoint {
+    name: "nn.BatchNormWithGlobalNormalization"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalizationGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalizationGrad.pbtxt
new file mode 100644
index 0000000000..7b18bf52ac
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchNormWithGlobalNormalizationGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchNormWithGlobalNormalizationGrad"
+  endpoint {
+    name: "nn.BatchNormWithGlobalNormalizationGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt
new file mode 100644
index 0000000000..58d60fa962
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchSelfAdjointEig"
+  endpoint {
+    name: "linalg.BatchSelfAdjointEig"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt
new file mode 100644
index 0000000000..fb18b2a000
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchSelfAdjointEigV2"
+  endpoint {
+    name: "linalg.BatchSelfAdjointEigV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchSvd.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchSvd.pbtxt
new file mode 100644
index 0000000000..8696359df8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchSvd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchSvd"
+  endpoint {
+    name: "linalg.BatchSvd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchToSpace.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchToSpace.pbtxt
new file mode 100644
index 0000000000..affbc519e5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchToSpace.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BatchToSpace"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchToSpaceND.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchToSpaceND.pbtxt
new file mode 100644
index 0000000000..6c7d2fbdb9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BatchToSpaceND.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BatchToSpaceND"
+  endpoint {
+    name: "BatchToSpaceNd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BesselI0e.pbtxt b/tensorflow/core/api_def/java_api/api_def_BesselI0e.pbtxt
new file mode 100644
index 0000000000..84eb3b5e71
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BesselI0e.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BesselI0e"
+  endpoint {
+    name: "math.BesselI0e"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BesselI1e.pbtxt b/tensorflow/core/api_def/java_api/api_def_BesselI1e.pbtxt
new file mode 100644
index 0000000000..43f9113b0b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BesselI1e.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BesselI1e"
+  endpoint {
+    name: "math.BesselI1e"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Betainc.pbtxt b/tensorflow/core/api_def/java_api/api_def_Betainc.pbtxt
new file mode 100644
index 0000000000..6e9956d9ec
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Betainc.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Betainc"
+  endpoint {
+    name: "math.Betainc"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BiasAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_BiasAdd.pbtxt
new file mode 100644
index 0000000000..eb3be23bd9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BiasAdd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BiasAdd"
+  endpoint {
+    name: "nn.BiasAdd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BiasAddGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_BiasAddGrad.pbtxt
new file mode 100644
index 0000000000..4e040bf6df
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BiasAddGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BiasAddGrad"
+  endpoint {
+    name: "nn.BiasAddGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt b/tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt
new file mode 100644
index 0000000000..76ae8ec8ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BiasAddV1"
+  endpoint {
+    name: "nn.BiasAddV1"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BigQueryReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_BigQueryReader.pbtxt
new file mode 100644
index 0000000000..5b6e11687a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BigQueryReader.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BigQueryReader"
+  endpoint {
+    name: "io.BigQueryReader"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Bincount.pbtxt b/tensorflow/core/api_def/java_api/api_def_Bincount.pbtxt
new file mode 100644
index 0000000000..b894fd6ec5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Bincount.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Bincount"
+  endpoint {
+    name: "math.Bincount"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Bitcast.pbtxt b/tensorflow/core/api_def/java_api/api_def_Bitcast.pbtxt
new file mode 100644
index 0000000000..9d2db26851
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Bitcast.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Bitcast"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BitwiseAnd.pbtxt b/tensorflow/core/api_def/java_api/api_def_BitwiseAnd.pbtxt
new file mode 100644
index 0000000000..db5fada246
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BitwiseAnd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BitwiseAnd"
+  endpoint {
+    name: "bitwise.BitwiseAnd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BitwiseOr.pbtxt b/tensorflow/core/api_def/java_api/api_def_BitwiseOr.pbtxt
new file mode 100644
index 0000000000..8f9d1bc2fe
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BitwiseOr.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BitwiseOr"
+  endpoint {
+    name: "bitwise.BitwiseOr"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BitwiseXor.pbtxt b/tensorflow/core/api_def/java_api/api_def_BitwiseXor.pbtxt
new file mode 100644
index 0000000000..28f405b8ad
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BitwiseXor.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BitwiseXor"
+  endpoint {
+    name: "bitwise.BitwiseXor"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesBucketize.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesBucketize.pbtxt
new file mode 100644
index 0000000000..acec845ca4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesBucketize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesBucketize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt
new file mode 100644
index 0000000000..fa22216ed5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCalculateBestGainsPerFeature.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesCalculateBestGainsPerFeature"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesCenterBias.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCenterBias.pbtxt
new file mode 100644
index 0000000000..1b65775a12
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCenterBias.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesCenterBias"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateEnsemble.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateEnsemble.pbtxt
new file mode 100644
index 0000000000..194251d433
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateEnsemble.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesCreateEnsemble"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateQuantileStreamResource.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateQuantileStreamResource.pbtxt
new file mode 100644
index 0000000000..7d9c8c9229
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesCreateQuantileStreamResource.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesCreateQuantileStreamResource"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesDeserializeEnsemble.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesDeserializeEnsemble.pbtxt
new file mode 100644
index 0000000000..2ed17ca30f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesDeserializeEnsemble.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesDeserializeEnsemble"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesEnsembleResourceHandleOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesEnsembleResourceHandleOp.pbtxt
new file mode 100644
index 0000000000..a44c86614a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesEnsembleResourceHandleOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesEnsembleResourceHandleOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt
new file mode 100644
index 0000000000..4d6f276911
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesExampleDebugOutputs.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesExampleDebugOutputs"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesGetEnsembleStates.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesGetEnsembleStates.pbtxt
new file mode 100644
index 0000000000..a591013fb7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesGetEnsembleStates.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesGetEnsembleStates"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeQuantileSummaries.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeQuantileSummaries.pbtxt
new file mode 100644
index 0000000000..4e147765a7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeQuantileSummaries.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesMakeQuantileSummaries"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeStatsSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeStatsSummary.pbtxt
new file mode 100644
index 0000000000..bbee8bb47c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesMakeStatsSummary.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesMakeStatsSummary"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesPredict.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesPredict.pbtxt
new file mode 100644
index 0000000000..e25d43a18f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesPredict.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesPredict"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceAddSummaries.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceAddSummaries.pbtxt
new file mode 100644
index 0000000000..d9fe96d3cd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceAddSummaries.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesQuantileStreamResourceAddSummaries"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceDeserialize.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceDeserialize.pbtxt
new file mode 100644
index 0000000000..86e8090241
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceDeserialize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesQuantileStreamResourceDeserialize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceFlush.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceFlush.pbtxt
new file mode 100644
index 0000000000..c98375bb24
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceFlush.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesQuantileStreamResourceFlush"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt
new file mode 100644
index 0000000000..e0421be40a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceGetBucketBoundaries.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesQuantileStreamResourceGetBucketBoundaries"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceHandleOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceHandleOp.pbtxt
new file mode 100644
index 0000000000..b7f5e8aa65
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesQuantileStreamResourceHandleOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesQuantileStreamResourceHandleOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesSerializeEnsemble.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesSerializeEnsemble.pbtxt
new file mode 100644
index 0000000000..db5807344b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesSerializeEnsemble.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesSerializeEnsemble"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesTrainingPredict.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesTrainingPredict.pbtxt
new file mode 100644
index 0000000000..b722233953
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesTrainingPredict.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesTrainingPredict"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BoostedTreesUpdateEnsemble.pbtxt b/tensorflow/core/api_def/java_api/api_def_BoostedTreesUpdateEnsemble.pbtxt
new file mode 100644
index 0000000000..fb642dd430
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BoostedTreesUpdateEnsemble.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BoostedTreesUpdateEnsemble"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BroadcastArgs.pbtxt b/tensorflow/core/api_def/java_api/api_def_BroadcastArgs.pbtxt
new file mode 100644
index 0000000000..484742a2d0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BroadcastArgs.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BroadcastArgs"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BroadcastGradientArgs.pbtxt b/tensorflow/core/api_def/java_api/api_def_BroadcastGradientArgs.pbtxt
new file mode 100644
index 0000000000..50f631b2a6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BroadcastGradientArgs.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BroadcastGradientArgs"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BroadcastTo.pbtxt b/tensorflow/core/api_def/java_api/api_def_BroadcastTo.pbtxt
new file mode 100644
index 0000000000..127458816c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BroadcastTo.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "BroadcastTo"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Bucketize.pbtxt b/tensorflow/core/api_def/java_api/api_def_Bucketize.pbtxt
new file mode 100644
index 0000000000..5a99712fd6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Bucketize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Bucketize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_BytesProducedStatsDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_BytesProducedStatsDataset.pbtxt
new file mode 100644
index 0000000000..cd7f24d961
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_BytesProducedStatsDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "BytesProducedStatsDataset"
+  endpoint {
+    name: "data.BytesProducedStatsDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt b/tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt
new file mode 100644
index 0000000000..79dae81df9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CTCBeamSearchDecoder"
+  endpoint {
+    name: "nn.CTCBeamSearchDecoder"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt b/tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt
new file mode 100644
index 0000000000..6a58e628bd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CTCGreedyDecoder"
+  endpoint {
+    name: "nn.CTCGreedyDecoder"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt b/tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt
new file mode 100644
index 0000000000..361270e1f7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CTCLoss"
+  endpoint {
+    name: "nn.CTCLoss"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CacheDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_CacheDataset.pbtxt
new file mode 100644
index 0000000000..11c26c1dfc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CacheDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CacheDataset"
+  endpoint {
+    name: "data.CacheDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Cast.pbtxt b/tensorflow/core/api_def/java_api/api_def_Cast.pbtxt
new file mode 100644
index 0000000000..ea9f812e2a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Cast.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Cast"
+  endpoint {
+    name: "dtypes.Cast"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Ceil.pbtxt b/tensorflow/core/api_def/java_api/api_def_Ceil.pbtxt
new file mode 100644
index 0000000000..d1a75f27d9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Ceil.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Ceil"
+  endpoint {
+    name: "math.Ceil"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CheckNumerics.pbtxt b/tensorflow/core/api_def/java_api/api_def_CheckNumerics.pbtxt
new file mode 100644
index 0000000000..731e9030a0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CheckNumerics.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CheckNumerics"
+  endpoint {
+    name: "math.CheckNumerics"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Cholesky.pbtxt b/tensorflow/core/api_def/java_api/api_def_Cholesky.pbtxt
new file mode 100644
index 0000000000..a60c4e3663
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Cholesky.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Cholesky"
+  endpoint {
+    name: "linalg.Cholesky"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CholeskyGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_CholeskyGrad.pbtxt
new file mode 100644
index 0000000000..2601d41554
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CholeskyGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CholeskyGrad"
+  endpoint {
+    name: "linalg.CholeskyGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ClipByValue.pbtxt b/tensorflow/core/api_def/java_api/api_def_ClipByValue.pbtxt
new file mode 100644
index 0000000000..c4e055c117
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ClipByValue.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ClipByValue"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CloseSummaryWriter.pbtxt b/tensorflow/core/api_def/java_api/api_def_CloseSummaryWriter.pbtxt
new file mode 100644
index 0000000000..d5fbe557db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CloseSummaryWriter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CloseSummaryWriter"
+  endpoint {
+    name: "summary.CloseSummaryWriter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CollectiveBcastRecv.pbtxt b/tensorflow/core/api_def/java_api/api_def_CollectiveBcastRecv.pbtxt
new file mode 100644
index 0000000000..8ada333e44
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CollectiveBcastRecv.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CollectiveBcastRecv"
+  endpoint {
+    name: "collective.BroadcastRecv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CollectiveBcastSend.pbtxt b/tensorflow/core/api_def/java_api/api_def_CollectiveBcastSend.pbtxt
new file mode 100644
index 0000000000..18b4bef345
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CollectiveBcastSend.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CollectiveBcastSend"
+  endpoint {
+    name: "collective.BroadcastSend"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CollectiveReduce.pbtxt b/tensorflow/core/api_def/java_api/api_def_CollectiveReduce.pbtxt
new file mode 100644
index 0000000000..6226cc05ec
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CollectiveReduce.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CollectiveReduce"
+  endpoint {
+    name: "collective.AllReduce"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CompareAndBitpack.pbtxt b/tensorflow/core/api_def/java_api/api_def_CompareAndBitpack.pbtxt
new file mode 100644
index 0000000000..d744fbbc90
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CompareAndBitpack.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CompareAndBitpack"
+  endpoint {
+    name: "math.CompareAndBitpack"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Complex.pbtxt b/tensorflow/core/api_def/java_api/api_def_Complex.pbtxt
new file mode 100644
index 0000000000..4889360a96
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Complex.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Complex"
+  endpoint {
+    name: "dtypes.Complex"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ComplexAbs.pbtxt b/tensorflow/core/api_def/java_api/api_def_ComplexAbs.pbtxt
new file mode 100644
index 0000000000..42a6a3c6a1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ComplexAbs.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ComplexAbs"
+  endpoint {
+    name: "math.ComplexAbs"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ComputeAccidentalHits.pbtxt b/tensorflow/core/api_def/java_api/api_def_ComputeAccidentalHits.pbtxt
new file mode 100644
index 0000000000..ca9e590fbc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ComputeAccidentalHits.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ComputeAccidentalHits"
+  endpoint {
+    name: "nn.ComputeAccidentalHits"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Concat.pbtxt b/tensorflow/core/api_def/java_api/api_def_Concat.pbtxt
new file mode 100644
index 0000000000..b13eb820ec
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Concat.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Concat"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ConcatOffset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ConcatOffset.pbtxt
new file mode 100644
index 0000000000..e8e23cf559
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ConcatOffset.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ConcatOffset"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt
new file mode 100644
index 0000000000..a600c31ed9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ConcatV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ConcatenateDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ConcatenateDataset.pbtxt
new file mode 100644
index 0000000000..ec8253e9b5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ConcatenateDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ConcatenateDataset"
+  endpoint {
+    name: "data.ConcatenateDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ConditionalAccumulator.pbtxt b/tensorflow/core/api_def/java_api/api_def_ConditionalAccumulator.pbtxt
new file mode 100644
index 0000000000..08431982da
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ConditionalAccumulator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ConditionalAccumulator"
+  endpoint {
+    name: "train.ConditionalAccumulator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conj.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conj.pbtxt
new file mode 100644
index 0000000000..7de199b55f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conj.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conj"
+  endpoint {
+    name: "math.Conj"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ConjugateTranspose.pbtxt b/tensorflow/core/api_def/java_api/api_def_ConjugateTranspose.pbtxt
new file mode 100644
index 0000000000..42173088ae
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ConjugateTranspose.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ConjugateTranspose"
+  endpoint {
+    name: "linalg.ConjugateTranspose"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Const.pbtxt b/tensorflow/core/api_def/java_api/api_def_Const.pbtxt
index 2dbdca34e0..a73f1e6c3a 100644
--- a/tensorflow/core/api_def/java_api/api_def_Const.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Const.pbtxt
@@ -1,4 +1,4 @@
 op {
-  graph_op_name: "Const" #TODO(karllessard) escape that reserved name
-  visibility: HIDDEN
+  graph_op_name: "Const"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ConsumeMutexLock.pbtxt b/tensorflow/core/api_def/java_api/api_def_ConsumeMutexLock.pbtxt
new file mode 100644
index 0000000000..1e0d136bc2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ConsumeMutexLock.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ConsumeMutexLock"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ControlTrigger.pbtxt b/tensorflow/core/api_def/java_api/api_def_ControlTrigger.pbtxt
new file mode 100644
index 0000000000..4517b4373f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ControlTrigger.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ControlTrigger"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt
new file mode 100644
index 0000000000..04c4fcec16
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv2D"
+  endpoint {
+    name: "nn.Conv2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt
new file mode 100644
index 0000000000..990a54262e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv2DBackpropFilter"
+  endpoint {
+    name: "nn.Conv2DBackpropFilter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt
new file mode 100644
index 0000000000..11bbaa455c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv2DBackpropInput"
+  endpoint {
+    name: "nn.Conv2DBackpropInput"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt
new file mode 100644
index 0000000000..0d97065704
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv3D"
+  endpoint {
+    name: "nn.Conv3D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
new file mode 100644
index 0000000000..279cd5867e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv3DBackpropFilter"
+  endpoint {
+    name: "nn.Conv3DBackpropFilter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
new file mode 100644
index 0000000000..0643cc14a9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv3DBackpropFilterV2"
+  endpoint {
+    name: "nn.Conv3dBackpropFilter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
new file mode 100644
index 0000000000..5be32ebe3c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv3DBackpropInput"
+  endpoint {
+    name: "nn.Conv3DBackpropInput"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
new file mode 100644
index 0000000000..2ce9cf134b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Conv3DBackpropInputV2"
+  endpoint {
+    name: "nn.Conv3DBackpropInputV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Cos.pbtxt b/tensorflow/core/api_def/java_api/api_def_Cos.pbtxt
new file mode 100644
index 0000000000..db1f62806e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Cos.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Cos"
+  endpoint {
+    name: "math.Cos"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Cosh.pbtxt b/tensorflow/core/api_def/java_api/api_def_Cosh.pbtxt
new file mode 100644
index 0000000000..a4b5e752bf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Cosh.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Cosh"
+  endpoint {
+    name: "math.Cosh"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CountUpTo.pbtxt b/tensorflow/core/api_def/java_api/api_def_CountUpTo.pbtxt
new file mode 100644
index 0000000000..eb9f328ce0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CountUpTo.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "CountUpTo"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CreateSummaryDbWriter.pbtxt b/tensorflow/core/api_def/java_api/api_def_CreateSummaryDbWriter.pbtxt
new file mode 100644
index 0000000000..299f881dd4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CreateSummaryDbWriter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CreateSummaryDbWriter"
+  endpoint {
+    name: "summary.CreateSummaryDbWriter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CreateSummaryFileWriter.pbtxt b/tensorflow/core/api_def/java_api/api_def_CreateSummaryFileWriter.pbtxt
new file mode 100644
index 0000000000..26c7941ce5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CreateSummaryFileWriter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CreateSummaryFileWriter"
+  endpoint {
+    name: "summary.CreateSummaryFileWriter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CropAndResize.pbtxt b/tensorflow/core/api_def/java_api/api_def_CropAndResize.pbtxt
new file mode 100644
index 0000000000..cbf9aa8f99
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CropAndResize.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CropAndResize"
+  endpoint {
+    name: "image.CropAndResize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CropAndResizeGradBoxes.pbtxt b/tensorflow/core/api_def/java_api/api_def_CropAndResizeGradBoxes.pbtxt
new file mode 100644
index 0000000000..44354bdfa0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CropAndResizeGradBoxes.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CropAndResizeGradBoxes"
+  endpoint {
+    name: "image.CropAndResizeGradBoxes"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CropAndResizeGradImage.pbtxt b/tensorflow/core/api_def/java_api/api_def_CropAndResizeGradImage.pbtxt
new file mode 100644
index 0000000000..0618db9a8d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CropAndResizeGradImage.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CropAndResizeGradImage"
+  endpoint {
+    name: "image.CropAndResizeGradImage"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Cross.pbtxt b/tensorflow/core/api_def/java_api/api_def_Cross.pbtxt
new file mode 100644
index 0000000000..c027884250
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Cross.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Cross"
+  endpoint {
+    name: "linalg.Cross"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
new file mode 100644
index 0000000000..c3d7f17596
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CudnnRNN"
+  endpoint {
+    name: "nn.CudnnRNN"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
new file mode 100644
index 0000000000..371b6a18db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CudnnRNNBackprop"
+  endpoint {
+    name: "nn.CudnnRNNBackprop"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
new file mode 100644
index 0000000000..756abdb71f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CudnnRNNBackpropV2"
+  endpoint {
+    name: "nn.CudnnRNNBackpropV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt
new file mode 100644
index 0000000000..86d7045db7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CudnnRNNCanonicalToParams"
+  endpoint {
+    name: "nn.CudnnRNNCanonicalToParams"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt
new file mode 100644
index 0000000000..73922b0343
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CudnnRNNParamsSize"
+  endpoint {
+    name: "nn.CudnnRNNParamsSize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt
new file mode 100644
index 0000000000..e1c567163a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CudnnRNNParamsToCanonical"
+  endpoint {
+    name: "nn.CudnnRNNParamsToCanonical"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
new file mode 100644
index 0000000000..d535f35459
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "CudnnRNNV2"
+  endpoint {
+    name: "nn.CudnnRNNV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Cumprod.pbtxt b/tensorflow/core/api_def/java_api/api_def_Cumprod.pbtxt
new file mode 100644
index 0000000000..0cb7862413
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Cumprod.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Cumprod"
+  endpoint {
+    name: "math.Cumprod"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Cumsum.pbtxt b/tensorflow/core/api_def/java_api/api_def_Cumsum.pbtxt
new file mode 100644
index 0000000000..e7d9076532
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Cumsum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Cumsum"
+  endpoint {
+    name: "math.Cumsum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DataFormatDimMap.pbtxt b/tensorflow/core/api_def/java_api/api_def_DataFormatDimMap.pbtxt
new file mode 100644
index 0000000000..36ea17793f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DataFormatDimMap.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DataFormatDimMap"
+  endpoint {
+    name: "nn.DataFormatDimMap"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DataFormatVecPermute.pbtxt b/tensorflow/core/api_def/java_api/api_def_DataFormatVecPermute.pbtxt
new file mode 100644
index 0000000000..b6b7e2dc76
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DataFormatVecPermute.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DataFormatVecPermute"
+  endpoint {
+    name: "nn.DataFormatVecPermute"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DatasetToGraph.pbtxt b/tensorflow/core/api_def/java_api/api_def_DatasetToGraph.pbtxt
new file mode 100644
index 0000000000..2e7d48961d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DatasetToGraph.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DatasetToGraph"
+  endpoint {
+    name: "data.DatasetToGraph"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DatasetToSingleElement.pbtxt b/tensorflow/core/api_def/java_api/api_def_DatasetToSingleElement.pbtxt
new file mode 100644
index 0000000000..0ac42e0e93
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DatasetToSingleElement.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DatasetToSingleElement"
+  endpoint {
+    name: "data.DatasetToSingleElement"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt b/tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt
new file mode 100644
index 0000000000..5884fe960d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DatasetToTFRecord"
+  endpoint {
+    name: "data.DatasetToTFRecord"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DebugGradientIdentity.pbtxt b/tensorflow/core/api_def/java_api/api_def_DebugGradientIdentity.pbtxt
new file mode 100644
index 0000000000..7d50c5c868
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DebugGradientIdentity.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "DebugGradientIdentity"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DebugGradientRefIdentity.pbtxt b/tensorflow/core/api_def/java_api/api_def_DebugGradientRefIdentity.pbtxt
new file mode 100644
index 0000000000..5e14e5fffd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DebugGradientRefIdentity.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "DebugGradientRefIdentity"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeAndCropJpeg.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeAndCropJpeg.pbtxt
new file mode 100644
index 0000000000..c07bb7a1bd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeAndCropJpeg.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeAndCropJpeg"
+  endpoint {
+    name: "image.DecodeAndCropJpeg"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeBase64.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeBase64.pbtxt
new file mode 100644
index 0000000000..49c93453f7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeBase64.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeBase64"
+  endpoint {
+    name: "io.DecodeBase64"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeBmp.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeBmp.pbtxt
new file mode 100644
index 0000000000..049cfa153d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeBmp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeBmp"
+  endpoint {
+    name: "image.DecodeBmp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt
new file mode 100644
index 0000000000..2c738cf4dd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeCSV"
+  endpoint {
+    name: "io.DecodeCSV"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeCompressed.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeCompressed.pbtxt
new file mode 100644
index 0000000000..91327a92ec
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeCompressed.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeCompressed"
+  endpoint {
+    name: "io.DecodeCompressed"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeGif.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeGif.pbtxt
new file mode 100644
index 0000000000..355643ff77
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeGif.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeGif"
+  endpoint {
+    name: "image.DecodeGif"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeJSONExample.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeJSONExample.pbtxt
new file mode 100644
index 0000000000..6ecba5ab05
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeJSONExample.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeJSONExample"
+  endpoint {
+    name: "io.DecodeJsonExample"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeJpeg.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeJpeg.pbtxt
new file mode 100644
index 0000000000..c0ebf2e315
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeJpeg.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeJpeg"
+  endpoint {
+    name: "image.DecodeJpeg"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodePng.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodePng.pbtxt
new file mode 100644
index 0000000000..d94537dc92
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodePng.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodePng"
+  endpoint {
+    name: "image.DecodePng"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt
new file mode 100644
index 0000000000..ff6531a3ac
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "DecodeProtoV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeRaw.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeRaw.pbtxt
new file mode 100644
index 0000000000..73067173ed
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeRaw.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeRaw"
+  endpoint {
+    name: "io.DecodeRaw"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeWav.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeWav.pbtxt
new file mode 100644
index 0000000000..9b249cc6e9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeWav.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DecodeWav"
+  endpoint {
+    name: "audio.DecodeWav"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DeepCopy.pbtxt b/tensorflow/core/api_def/java_api/api_def_DeepCopy.pbtxt
new file mode 100644
index 0000000000..88a87c9291
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DeepCopy.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "DeepCopy"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DeleteSessionTensor.pbtxt b/tensorflow/core/api_def/java_api/api_def_DeleteSessionTensor.pbtxt
new file mode 100644
index 0000000000..1865b461de
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DeleteSessionTensor.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "DeleteSessionTensor"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DenseToDenseSetOperation.pbtxt b/tensorflow/core/api_def/java_api/api_def_DenseToDenseSetOperation.pbtxt
new file mode 100644
index 0000000000..f85def92ee
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DenseToDenseSetOperation.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DenseToDenseSetOperation"
+  endpoint {
+    name: "sparse.DenseToDenseSetOperation"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DenseToSparseBatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_DenseToSparseBatchDataset.pbtxt
new file mode 100644
index 0000000000..76f6ba0b8a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DenseToSparseBatchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DenseToSparseBatchDataset"
+  endpoint {
+    name: "data.DenseToSparseBatchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DenseToSparseSetOperation.pbtxt b/tensorflow/core/api_def/java_api/api_def_DenseToSparseSetOperation.pbtxt
new file mode 100644
index 0000000000..11fbef8ff1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DenseToSparseSetOperation.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DenseToSparseSetOperation"
+  endpoint {
+    name: "sparse.DenseToSparseSetOperation"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DepthToSpace.pbtxt b/tensorflow/core/api_def/java_api/api_def_DepthToSpace.pbtxt
new file mode 100644
index 0000000000..0d2cbd2b90
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DepthToSpace.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DepthToSpace"
+  endpoint {
+    name: "nn.DepthToSpace"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNative.pbtxt b/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNative.pbtxt
new file mode 100644
index 0000000000..1aaa480fef
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNative.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DepthwiseConv2dNative"
+  endpoint {
+    name: "nn.DepthwiseConv2dNative"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
new file mode 100644
index 0000000000..1a62d8cf63
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropFilter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DepthwiseConv2dNativeBackpropFilter"
+  endpoint {
+    name: "nn.DepthwiseConv2dNativeBackpropFilter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
new file mode 100644
index 0000000000..9106dd2f8f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DepthwiseConv2dNativeBackpropInput.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DepthwiseConv2dNativeBackpropInput"
+  endpoint {
+    name: "nn.DepthwiseConv2dNativeBackpropInput"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Dequantize.pbtxt b/tensorflow/core/api_def/java_api/api_def_Dequantize.pbtxt
new file mode 100644
index 0000000000..8ee4daa2f7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Dequantize.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Dequantize"
+  endpoint {
+    name: "quantization.Dequantize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DeserializeIterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_DeserializeIterator.pbtxt
new file mode 100644
index 0000000000..bdd03f5dc6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DeserializeIterator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DeserializeIterator"
+  endpoint {
+    name: "data.DeserializeIterator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DeserializeManySparse.pbtxt b/tensorflow/core/api_def/java_api/api_def_DeserializeManySparse.pbtxt
new file mode 100644
index 0000000000..826d49f546
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DeserializeManySparse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DeserializeManySparse"
+  endpoint {
+    name: "io.DeserializeManySparse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DeserializeSparse.pbtxt b/tensorflow/core/api_def/java_api/api_def_DeserializeSparse.pbtxt
new file mode 100644
index 0000000000..e6f24bb625
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DeserializeSparse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DeserializeSparse"
+  endpoint {
+    name: "sparse.DeserializeSparse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DestroyResourceOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_DestroyResourceOp.pbtxt
new file mode 100644
index 0000000000..733e5e5029
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DestroyResourceOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "DestroyResourceOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DestroyTemporaryVariable.pbtxt b/tensorflow/core/api_def/java_api/api_def_DestroyTemporaryVariable.pbtxt
new file mode 100644
index 0000000000..bd416eb68f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DestroyTemporaryVariable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "DestroyTemporaryVariable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Diag.pbtxt b/tensorflow/core/api_def/java_api/api_def_Diag.pbtxt
new file mode 100644
index 0000000000..374b3c97e1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Diag.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Diag"
+  endpoint {
+    name: "linalg.TensorDiag"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DiagPart.pbtxt b/tensorflow/core/api_def/java_api/api_def_DiagPart.pbtxt
new file mode 100644
index 0000000000..70db2357d0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DiagPart.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DiagPart"
+  endpoint {
+    name: "linalg.TensorDiagPart"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Digamma.pbtxt b/tensorflow/core/api_def/java_api/api_def_Digamma.pbtxt
new file mode 100644
index 0000000000..68dc74c64e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Digamma.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Digamma"
+  endpoint {
+    name: "math.Digamma"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Dilation2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_Dilation2D.pbtxt
new file mode 100644
index 0000000000..914ea29812
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Dilation2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Dilation2D"
+  endpoint {
+    name: "nn.Dilation2d"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt
new file mode 100644
index 0000000000..ab8b79d681
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Dilation2DBackpropFilter"
+  endpoint {
+    name: "nn.Dilation2DBackpropFilter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt
new file mode 100644
index 0000000000..9dece4569c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Dilation2DBackpropInput"
+  endpoint {
+    name: "nn.Dilation2DBackpropInput"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Div.pbtxt b/tensorflow/core/api_def/java_api/api_def_Div.pbtxt
new file mode 100644
index 0000000000..2abba7f05f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Div.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Div"
+  endpoint {
+    name: "math.Div"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DivNoNan.pbtxt b/tensorflow/core/api_def/java_api/api_def_DivNoNan.pbtxt
new file mode 100644
index 0000000000..c124044604
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DivNoNan.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DivNoNan"
+  endpoint {
+    name: "math.DivNoNan"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DrawBoundingBoxes.pbtxt b/tensorflow/core/api_def/java_api/api_def_DrawBoundingBoxes.pbtxt
new file mode 100644
index 0000000000..2e7954e2b7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DrawBoundingBoxes.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "DrawBoundingBoxes"
+  endpoint {
+    name: "image.DrawBoundingBoxes"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DynamicPartition.pbtxt b/tensorflow/core/api_def/java_api/api_def_DynamicPartition.pbtxt
new file mode 100644
index 0000000000..cc585676e4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DynamicPartition.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "DynamicPartition"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_DynamicStitch.pbtxt b/tensorflow/core/api_def/java_api/api_def_DynamicStitch.pbtxt
new file mode 100644
index 0000000000..ac1fef4b6a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_DynamicStitch.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "DynamicStitch"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EagerPyFunc.pbtxt b/tensorflow/core/api_def/java_api/api_def_EagerPyFunc.pbtxt
new file mode 100644
index 0000000000..e097041d73
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EagerPyFunc.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "EagerPyFunc"
+  visibility: SKIP
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EditDistance.pbtxt b/tensorflow/core/api_def/java_api/api_def_EditDistance.pbtxt
new file mode 100644
index 0000000000..ca65c2c6e5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EditDistance.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "EditDistance"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Elu.pbtxt b/tensorflow/core/api_def/java_api/api_def_Elu.pbtxt
new file mode 100644
index 0000000000..bfe8d972cf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Elu.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Elu"
+  endpoint {
+    name: "nn.Elu"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EluGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_EluGrad.pbtxt
new file mode 100644
index 0000000000..3757357c00
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EluGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "EluGrad"
+  endpoint {
+    name: "nn.EluGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Empty.pbtxt b/tensorflow/core/api_def/java_api/api_def_Empty.pbtxt
new file mode 100644
index 0000000000..6522f51d9d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Empty.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Empty"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EmptyTensorList.pbtxt b/tensorflow/core/api_def/java_api/api_def_EmptyTensorList.pbtxt
new file mode 100644
index 0000000000..ef3f533964
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EmptyTensorList.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "EmptyTensorList"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EncodeBase64.pbtxt b/tensorflow/core/api_def/java_api/api_def_EncodeBase64.pbtxt
new file mode 100644
index 0000000000..66f19def9a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EncodeBase64.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "EncodeBase64"
+  endpoint {
+    name: "io.EncodeBase64"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EncodeJpeg.pbtxt b/tensorflow/core/api_def/java_api/api_def_EncodeJpeg.pbtxt
new file mode 100644
index 0000000000..1e151665f8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EncodeJpeg.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "EncodeJpeg"
+  endpoint {
+    name: "image.EncodeJpeg"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EncodePng.pbtxt b/tensorflow/core/api_def/java_api/api_def_EncodePng.pbtxt
new file mode 100644
index 0000000000..7a8d713c86
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EncodePng.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "EncodePng"
+  endpoint {
+    name: "image.EncodePng"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EncodeProto.pbtxt b/tensorflow/core/api_def/java_api/api_def_EncodeProto.pbtxt
new file mode 100644
index 0000000000..ac6a04b4bc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EncodeProto.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "EncodeProto"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EncodeWav.pbtxt b/tensorflow/core/api_def/java_api/api_def_EncodeWav.pbtxt
new file mode 100644
index 0000000000..f3b22fde66
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EncodeWav.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "EncodeWav"
+  endpoint {
+    name: "audio.EncodeWav"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EnqueueInQueueDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_EnqueueInQueueDataset.pbtxt
new file mode 100644
index 0000000000..26051ab446
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EnqueueInQueueDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "EnqueueInQueueDataset"
+  endpoint {
+    name: "data.EnqueueInQueueDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_EnsureShape.pbtxt b/tensorflow/core/api_def/java_api/api_def_EnsureShape.pbtxt
new file mode 100644
index 0000000000..6238947598
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_EnsureShape.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "EnsureShape"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Enter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Enter.pbtxt
new file mode 100644
index 0000000000..ffc10c91be
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Enter.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Enter"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Equal.pbtxt b/tensorflow/core/api_def/java_api/api_def_Equal.pbtxt
new file mode 100644
index 0000000000..c2256c2433
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Equal.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Equal"
+  endpoint {
+    name: "math.Equal"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Erf.pbtxt b/tensorflow/core/api_def/java_api/api_def_Erf.pbtxt
new file mode 100644
index 0000000000..9efcc3983c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Erf.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Erf"
+  endpoint {
+    name: "math.Erf"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Erfc.pbtxt b/tensorflow/core/api_def/java_api/api_def_Erfc.pbtxt
new file mode 100644
index 0000000000..c0f4db61ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Erfc.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Erfc"
+  endpoint {
+    name: "math.Erfc"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Exit.pbtxt b/tensorflow/core/api_def/java_api/api_def_Exit.pbtxt
new file mode 100644
index 0000000000..6215cd2229
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Exit.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Exit"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/java_api/api_def_Exp.pbtxt
new file mode 100644
index 0000000000..b2790c8306
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Exp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Exp"
+  endpoint {
+    name: "math.Exp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExpandDims.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExpandDims.pbtxt
new file mode 100644
index 0000000000..66902ccb5b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExpandDims.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ExpandDims"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalAssertNextDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalAssertNextDataset.pbtxt
new file mode 100644
index 0000000000..cec4c229e4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalAssertNextDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalAssertNextDataset"
+  endpoint {
+    name: "data.ExperimentalAssertNextDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt
new file mode 100644
index 0000000000..2c555c3f1a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalCSVDataset"
+  endpoint {
+    name:  "data.ExperimentalCSVDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt
new file mode 100644
index 0000000000..77fe42fd94
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalDirectedInterleaveDataset"
+  endpoint {
+    name: "data.ExperimentalDirectedInterleaveDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResource.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResource.pbtxt
new file mode 100644
index 0000000000..320f4affb5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResource.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalFunctionBufferingResource"
+  endpoint {
+    name: "data.ExperimentalFunctionBufferingResource"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt
new file mode 100644
index 0000000000..d3dbe7600a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalFunctionBufferingResourceGetNext"
+  endpoint {
+    name: "data.ExperimentalFunctionBufferingResourceGetNext"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt
new file mode 100644
index 0000000000..6909e8678a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalFunctionBufferingResourceReset"
+  endpoint {
+    name: "data.ExperimentalFunctionBufferingResourceReset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt
new file mode 100644
index 0000000000..c49c6de217
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalIdentityIndexedDataset"
+  endpoint {
+    name: "data.ExperimentalIdentityIndexedDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt
new file mode 100644
index 0000000000..7750a43de2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalIgnoreErrorsDataset"
+  endpoint {
+    name: "data.ExperimentalIgnoreErrorsDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetGet.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetGet.pbtxt
new file mode 100644
index 0000000000..96a3befe8b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetGet.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalIndexedDatasetGet"
+  endpoint {
+    name: "data.ExperimentalIndexedDatasetGet"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt
new file mode 100644
index 0000000000..731309d329
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalIndexedDatasetMaterialize"
+  endpoint {
+    name: "data.ExperimentalIndexedDatasetMaterialize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalIteratorGetDevice.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalIteratorGetDevice.pbtxt
new file mode 100644
index 0000000000..838d579ef7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalIteratorGetDevice.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalIteratorGetDevice"
+  endpoint {
+    name: "data.ExperimentalIteratorGetDevice"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt
new file mode 100644
index 0000000000..dd24ff544f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalLMDBDataset"
+  endpoint {
+    name: "data.ExperimentalLMDBDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalMapDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalMapDataset.pbtxt
new file mode 100644
index 0000000000..bea6dffd9c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalMapDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalMapDataset"
+  endpoint {
+    name: "data.ExperimentalMapDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt
new file mode 100644
index 0000000000..06632e9041
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalMaterializedIndexDatasetHandle"
+  endpoint {
+    name: "data.ExperimentalMaterializedIndexDatasetHandle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalNonSerializableDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalNonSerializableDataset.pbtxt
new file mode 100644
index 0000000000..7b26cf129c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalNonSerializableDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalNonSerializableDataset"
+  endpoint {
+    name: "data.ExperimentalNonSerializableDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
new file mode 100644
index 0000000000..a2c0d92f89
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalNumaMapAndBatchDataset"
+  endpoint {
+    name: "data.ExperimentalNumaMapAndBatchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalSleepDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalSleepDataset.pbtxt
new file mode 100644
index 0000000000..9e95c55ac2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalSleepDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalSleepDataset"
+  endpoint {
+    name: "data.ExperimentalSleepDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolDataset.pbtxt
new file mode 100644
index 0000000000..3e4aef1c68
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalThreadPoolDataset"
+  endpoint {
+    name: "data.ExperimentalThreadPoolDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolHandle.pbtxt
new file mode 100644
index 0000000000..73cf6767b3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalThreadPoolHandle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalThreadPoolHandle"
+  endpoint {
+    name: "data.ExperimentalThreadPoolHandle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalUniqueDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalUniqueDataset.pbtxt
new file mode 100644
index 0000000000..0d834193dd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalUniqueDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExperimentalUniqueDataset"
+  endpoint {
+    name: "data.ExperimentalUniqueDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Expm1.pbtxt b/tensorflow/core/api_def/java_api/api_def_Expm1.pbtxt
new file mode 100644
index 0000000000..71a8fcf022
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Expm1.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Expm1"
+  endpoint {
+    name: "math.Expm1"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExtractGlimpse.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExtractGlimpse.pbtxt
new file mode 100644
index 0000000000..3591f93f71
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExtractGlimpse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExtractGlimpse"
+  endpoint {
+    name: "image.ExtractGlimpse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExtractImagePatches.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExtractImagePatches.pbtxt
new file mode 100644
index 0000000000..7cdcfd7528
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExtractImagePatches.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExtractImagePatches"
+  endpoint {
+    name: "image.ExtractImagePatches"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExtractJpegShape.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExtractJpegShape.pbtxt
new file mode 100644
index 0000000000..c95fcc9cef
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExtractJpegShape.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ExtractJpegShape"
+  endpoint {
+    name: "image.ExtractJpegShape"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ExtractVolumePatches.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExtractVolumePatches.pbtxt
new file mode 100644
index 0000000000..6f61c83210
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ExtractVolumePatches.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ExtractVolumePatches"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_FFT.pbtxt
new file mode 100644
index 0000000000..d965590308
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FFT.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FFT"
+  endpoint {
+    name: "signal.Fft"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_FFT2D.pbtxt
new file mode 100644
index 0000000000..474103076b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FFT2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FFT2D"
+  endpoint {
+    name: "signal.Fft2d"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_FFT3D.pbtxt
new file mode 100644
index 0000000000..8e1606b8f9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FFT3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FFT3D"
+  endpoint {
+    name: "signal.Fft3d"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
new file mode 100644
index 0000000000..e1760b685b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FIFOQueue"
+  endpoint {
+    name: "io.FIFOQueue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
new file mode 100644
index 0000000000..fe260ca2be
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FIFOQueueV2"
+  endpoint {
+    name: "io.FIFOQueueV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Fact.pbtxt b/tensorflow/core/api_def/java_api/api_def_Fact.pbtxt
new file mode 100644
index 0000000000..436664e554
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Fact.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Fact"
+  endpoint {
+    name: "math.Fact"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeParam.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeParam.pbtxt
new file mode 100644
index 0000000000..ac8f751442
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeParam.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "FakeParam"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgs.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgs.pbtxt
new file mode 100644
index 0000000000..809d231a55
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgs.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FakeQuantWithMinMaxArgs"
+  endpoint {
+    name: "quantization.FakeQuantWithMinMaxArgs"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgsGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgsGradient.pbtxt
new file mode 100644
index 0000000000..50d0f51a14
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxArgsGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FakeQuantWithMinMaxArgsGradient"
+  endpoint {
+    name: "quantization.FakeQuantWithMinMaxArgsGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVars.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVars.pbtxt
new file mode 100644
index 0000000000..b86258aab2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVars.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FakeQuantWithMinMaxVars"
+  endpoint {
+    name: "quantization.FakeQuantWithMinMaxVars"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsGradient.pbtxt
new file mode 100644
index 0000000000..3c1343423c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FakeQuantWithMinMaxVarsGradient"
+  endpoint {
+    name: "quantization.FakeQuantWithMinMaxVarsGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt
new file mode 100644
index 0000000000..afe45a290d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannel.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FakeQuantWithMinMaxVarsPerChannel"
+  endpoint {
+    name: "quantization.FakeQuantWithMinMaxVarsPerChannel"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt
new file mode 100644
index 0000000000..9dd62fdffd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeQuantWithMinMaxVarsPerChannelGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FakeQuantWithMinMaxVarsPerChannelGradient"
+  endpoint {
+    name: "quantization.FakeQuantWithMinMaxVarsPerChannelGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FakeQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_FakeQueue.pbtxt
new file mode 100644
index 0000000000..8960966f08
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FakeQueue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FakeQueue"
+  endpoint {
+    name: "io.FakeQueue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Fill.pbtxt b/tensorflow/core/api_def/java_api/api_def_Fill.pbtxt
new file mode 100644
index 0000000000..3997328ed3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Fill.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Fill"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FilterByLastComponentDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_FilterByLastComponentDataset.pbtxt
new file mode 100644
index 0000000000..b7111f48fa
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FilterByLastComponentDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FilterByLastComponentDataset"
+  endpoint {
+    name: "data.FilterByLastComponentDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FilterDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_FilterDataset.pbtxt
new file mode 100644
index 0000000000..930fff4191
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FilterDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FilterDataset"
+  endpoint {
+    name: "data.FilterDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt
new file mode 100644
index 0000000000..8f38364f85
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FixedLengthRecordDataset"
+  endpoint {
+    name: "data.FixedLengthRecordDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt
new file mode 100644
index 0000000000..723248019d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FixedLengthRecordDatasetV2"
+  endpoint {
+    name: "data.FixedLengthRecordDatasetV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt
new file mode 100644
index 0000000000..295526d074
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FixedLengthRecordReader"
+  endpoint {
+    name: "io.FixedLengthRecordReader"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt
new file mode 100644
index 0000000000..0cfefe2075
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FixedLengthRecordReaderV2"
+  endpoint {
+    name: "io.FixedLengthRecordReaderV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedUnigramCandidateSampler.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedUnigramCandidateSampler.pbtxt
new file mode 100644
index 0000000000..eb9c68d4db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FixedUnigramCandidateSampler.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FixedUnigramCandidateSampler"
+  endpoint {
+    name: "nn.FixedUnigramCandidateSampler"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FlatMapDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_FlatMapDataset.pbtxt
new file mode 100644
index 0000000000..d6e96cb4e0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FlatMapDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FlatMapDataset"
+  endpoint {
+    name: "data.FlatMapDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Floor.pbtxt b/tensorflow/core/api_def/java_api/api_def_Floor.pbtxt
new file mode 100644
index 0000000000..a2b80f97e0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Floor.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Floor"
+  endpoint {
+    name: "math.Floor"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FloorDiv.pbtxt b/tensorflow/core/api_def/java_api/api_def_FloorDiv.pbtxt
new file mode 100644
index 0000000000..054d85f55c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FloorDiv.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FloorDiv"
+  endpoint {
+    name: "math.FloorDiv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FloorMod.pbtxt b/tensorflow/core/api_def/java_api/api_def_FloorMod.pbtxt
new file mode 100644
index 0000000000..ff2216a935
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FloorMod.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FloorMod"
+  endpoint {
+    name: "math.FloorMod"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FlushSummaryWriter.pbtxt b/tensorflow/core/api_def/java_api/api_def_FlushSummaryWriter.pbtxt
new file mode 100644
index 0000000000..feaa3a6dc2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FlushSummaryWriter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FlushSummaryWriter"
+  endpoint {
+    name: "summary.FlushSummaryWriter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_For.pbtxt b/tensorflow/core/api_def/java_api/api_def_For.pbtxt
new file mode 100644
index 0000000000..30363d1e96
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_For.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "For"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FractionalAvgPool.pbtxt b/tensorflow/core/api_def/java_api/api_def_FractionalAvgPool.pbtxt
new file mode 100644
index 0000000000..fc2e6ca54b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FractionalAvgPool.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FractionalAvgPool"
+  endpoint {
+    name: "nn.FractionalAvgPool"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FractionalAvgPoolGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_FractionalAvgPoolGrad.pbtxt
new file mode 100644
index 0000000000..4e11d5e395
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FractionalAvgPoolGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FractionalAvgPoolGrad"
+  endpoint {
+    name: "nn.FractionalAvgPoolGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FractionalMaxPool.pbtxt b/tensorflow/core/api_def/java_api/api_def_FractionalMaxPool.pbtxt
new file mode 100644
index 0000000000..061b358ec2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FractionalMaxPool.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FractionalMaxPool"
+  endpoint {
+    name: "nn.FractionalMaxPool"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FractionalMaxPoolGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_FractionalMaxPoolGrad.pbtxt
new file mode 100644
index 0000000000..c70e6d721e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FractionalMaxPoolGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FractionalMaxPoolGrad"
+  endpoint {
+    name: "nn.FractionalMaxPoolGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt
new file mode 100644
index 0000000000..058c82c177
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FusedBatchNorm"
+  endpoint {
+    name: "nn.FusedBatchNorm"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt
new file mode 100644
index 0000000000..69baf2a8e3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FusedBatchNormGrad"
+  endpoint {
+    name: "nn.FusedBatchNormGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt
new file mode 100644
index 0000000000..81da6f4bb2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FusedBatchNormGradV2"
+  endpoint {
+    name: "nn.FusedBatchNormGradV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt
new file mode 100644
index 0000000000..e81d78ef5c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FusedBatchNormV2"
+  endpoint {
+    name: "nn.FusedBatchNormV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt
new file mode 100644
index 0000000000..e4836df290
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FusedPadConv2D"
+  endpoint {
+    name: "nn.FusedPadConv2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt
new file mode 100644
index 0000000000..6cd0caa088
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "FusedResizeAndPadConv2D"
+  endpoint {
+    name: "nn.FusedResizeAndPadConv2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Gather.pbtxt b/tensorflow/core/api_def/java_api/api_def_Gather.pbtxt
new file mode 100644
index 0000000000..6848641714
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Gather.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Gather"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/java_api/api_def_GatherNd.pbtxt
new file mode 100644
index 0000000000..257c0316ea
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GatherNd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "GatherNd"
+  endpoint {
+    name: "GatherNd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt
new file mode 100644
index 0000000000..f5b2cb92f6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GatherV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GcsConfigureBlockCache.pbtxt b/tensorflow/core/api_def/java_api/api_def_GcsConfigureBlockCache.pbtxt
new file mode 100644
index 0000000000..1ba3044d4c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GcsConfigureBlockCache.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GcsConfigureBlockCache"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GcsConfigureCredentials.pbtxt b/tensorflow/core/api_def/java_api/api_def_GcsConfigureCredentials.pbtxt
new file mode 100644
index 0000000000..98bd555fb8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GcsConfigureCredentials.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GcsConfigureCredentials"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GenerateBigQueryReaderPartitions.pbtxt b/tensorflow/core/api_def/java_api/api_def_GenerateBigQueryReaderPartitions.pbtxt
new file mode 100644
index 0000000000..956f40762d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GenerateBigQueryReaderPartitions.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GenerateBigQueryReaderPartitions"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GenerateVocabRemapping.pbtxt b/tensorflow/core/api_def/java_api/api_def_GenerateVocabRemapping.pbtxt
new file mode 100644
index 0000000000..9aac3b17f3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GenerateVocabRemapping.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "GenerateVocabRemapping"
+  endpoint {
+    name: "train.GenerateVocabRemapping"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GeneratorDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_GeneratorDataset.pbtxt
new file mode 100644
index 0000000000..b1719005e9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GeneratorDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "GeneratorDataset"
+  endpoint {
+    name: "data.GeneratorDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt
new file mode 100644
index 0000000000..84b14a3335
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GetSessionHandle"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt
new file mode 100644
index 0000000000..28488ac79a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GetSessionHandleV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GetSessionTensor.pbtxt b/tensorflow/core/api_def/java_api/api_def_GetSessionTensor.pbtxt
new file mode 100644
index 0000000000..34b6e627cd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GetSessionTensor.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GetSessionTensor"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Greater.pbtxt b/tensorflow/core/api_def/java_api/api_def_Greater.pbtxt
new file mode 100644
index 0000000000..594f9276be
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Greater.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Greater"
+  endpoint {
+    name: "math.Greater"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GreaterEqual.pbtxt b/tensorflow/core/api_def/java_api/api_def_GreaterEqual.pbtxt
new file mode 100644
index 0000000000..17ea8696b0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GreaterEqual.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "GreaterEqual"
+  endpoint {
+    name: "math.GreaterEqual"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GroupByReducerDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_GroupByReducerDataset.pbtxt
new file mode 100644
index 0000000000..1bd2c8f531
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GroupByReducerDataset.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GroupByReducerDataset"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GroupByWindowDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_GroupByWindowDataset.pbtxt
new file mode 100644
index 0000000000..9e4c4cd4ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GroupByWindowDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "GroupByWindowDataset"
+  endpoint {
+    name: "data.GroupByWindowDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_GuaranteeConst.pbtxt b/tensorflow/core/api_def/java_api/api_def_GuaranteeConst.pbtxt
new file mode 100644
index 0000000000..8cac25787d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_GuaranteeConst.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GuaranteeConst"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_HSVToRGB.pbtxt b/tensorflow/core/api_def/java_api/api_def_HSVToRGB.pbtxt
new file mode 100644
index 0000000000..95b042d5d6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_HSVToRGB.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "HSVToRGB"
+  endpoint {
+    name: "image.HsvToRgb"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt
new file mode 100644
index 0000000000..f733b277ea
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "HashTable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt
new file mode 100644
index 0000000000..74fda0380a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "HashTableV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_HistogramFixedWidth.pbtxt b/tensorflow/core/api_def/java_api/api_def_HistogramFixedWidth.pbtxt
new file mode 100644
index 0000000000..f64d9ae1d2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_HistogramFixedWidth.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "HistogramFixedWidth"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_HistogramSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_HistogramSummary.pbtxt
new file mode 100644
index 0000000000..97f28335bb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_HistogramSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "HistogramSummary"
+  endpoint {
+    name: "summary.HistogramSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_HostConst.pbtxt b/tensorflow/core/api_def/java_api/api_def_HostConst.pbtxt
new file mode 100644
index 0000000000..ba589e73e7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_HostConst.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "HostConst"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_IFFT.pbtxt
new file mode 100644
index 0000000000..4a15ebec7f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IFFT.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IFFT"
+  endpoint {
+    name: "signal.Ifft"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_IFFT2D.pbtxt
new file mode 100644
index 0000000000..35d696ee73
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IFFT2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IFFT2D"
+  endpoint {
+    name: "signal.Ifft2d"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_IFFT3D.pbtxt
new file mode 100644
index 0000000000..76a3164e6a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IFFT3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IFFT3D"
+  endpoint {
+    name: "signal.Ifft3d"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt
new file mode 100644
index 0000000000..d2ade5b16e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IRFFT"
+  endpoint {
+    name: "signal.IRFFT"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt
new file mode 100644
index 0000000000..3a5a5b0c71
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IRFFT2D"
+  endpoint {
+    name: "signal.IRFFT2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt
new file mode 100644
index 0000000000..fc5a5451b4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IRFFT3D"
+  endpoint {
+    name: "signal.IRFFT3D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Identity.pbtxt b/tensorflow/core/api_def/java_api/api_def_Identity.pbtxt
new file mode 100644
index 0000000000..b6df3c6cfe
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Identity.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Identity"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IdentityN.pbtxt b/tensorflow/core/api_def/java_api/api_def_IdentityN.pbtxt
new file mode 100644
index 0000000000..827df10c65
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IdentityN.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "IdentityN"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt
new file mode 100644
index 0000000000..1e1314213d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IdentityReader"
+  endpoint {
+    name: "io.IdentityReader"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt
new file mode 100644
index 0000000000..2973807a9c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IdentityReaderV2"
+  endpoint {
+    name: "io.IdentityReaderV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_If.pbtxt b/tensorflow/core/api_def/java_api/api_def_If.pbtxt
new file mode 100644
index 0000000000..a3bc33ac2c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_If.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "If"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Igamma.pbtxt b/tensorflow/core/api_def/java_api/api_def_Igamma.pbtxt
new file mode 100644
index 0000000000..cbdd8b984c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Igamma.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Igamma"
+  endpoint {
+    name: "math.Igamma"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IgammaGradA.pbtxt b/tensorflow/core/api_def/java_api/api_def_IgammaGradA.pbtxt
new file mode 100644
index 0000000000..0659c80c39
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IgammaGradA.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IgammaGradA"
+  endpoint {
+    name: "math.IgammaGradA"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Igammac.pbtxt b/tensorflow/core/api_def/java_api/api_def_Igammac.pbtxt
new file mode 100644
index 0000000000..94f6085e1a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Igammac.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Igammac"
+  endpoint {
+    name: "math.Igammac"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Imag.pbtxt b/tensorflow/core/api_def/java_api/api_def_Imag.pbtxt
new file mode 100644
index 0000000000..4227c7078f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Imag.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Imag"
+  endpoint {
+    name: "math.Imag"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ImageSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_ImageSummary.pbtxt
new file mode 100644
index 0000000000..1871e6b655
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ImageSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ImageSummary"
+  endpoint {
+    name: "summary.ImageSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ImmutableConst.pbtxt b/tensorflow/core/api_def/java_api/api_def_ImmutableConst.pbtxt
new file mode 100644
index 0000000000..fd0384dc45
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ImmutableConst.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ImmutableConst"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ImportEvent.pbtxt b/tensorflow/core/api_def/java_api/api_def_ImportEvent.pbtxt
new file mode 100644
index 0000000000..c2d8d2eba3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ImportEvent.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ImportEvent"
+  endpoint {
+    name: "summary.ImportEvent"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt b/tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt
new file mode 100644
index 0000000000..1cca92e0ea
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "InTopK"
+  endpoint {
+    name: "nn.InTopK"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt
new file mode 100644
index 0000000000..3dc9bff289
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "InTopKV2"
+  endpoint {
+    name: "nn.InTopKV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt
new file mode 100644
index 0000000000..49496f29a0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "InitializeTable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt
new file mode 100644
index 0000000000..8cc206b0f0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "InitializeTableFromTextFile"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt
new file mode 100644
index 0000000000..2a665348a7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "InitializeTableFromTextFileV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt
new file mode 100644
index 0000000000..8d9bec2f2f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "InitializeTableV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InplaceAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_InplaceAdd.pbtxt
new file mode 100644
index 0000000000..3d157ab7f8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InplaceAdd.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "InplaceAdd"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InplaceSub.pbtxt b/tensorflow/core/api_def/java_api/api_def_InplaceSub.pbtxt
new file mode 100644
index 0000000000..b2ed549643
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InplaceSub.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "InplaceSub"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InplaceUpdate.pbtxt b/tensorflow/core/api_def/java_api/api_def_InplaceUpdate.pbtxt
new file mode 100644
index 0000000000..91041b43ab
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InplaceUpdate.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "InplaceUpdate"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InterleaveDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_InterleaveDataset.pbtxt
new file mode 100644
index 0000000000..25e40ac2db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InterleaveDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "InterleaveDataset"
+  endpoint {
+    name: "data.InterleaveDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Inv.pbtxt b/tensorflow/core/api_def/java_api/api_def_Inv.pbtxt
new file mode 100644
index 0000000000..49f3e6c042
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Inv.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Inv"
+  endpoint {
+    name: "linalg.Inv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InvGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_InvGrad.pbtxt
new file mode 100644
index 0000000000..d3bfa78e99
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InvGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "InvGrad"
+  endpoint {
+    name: "nn.InvGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Invert.pbtxt b/tensorflow/core/api_def/java_api/api_def_Invert.pbtxt
new file mode 100644
index 0000000000..9898bfa003
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Invert.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Invert"
+  endpoint {
+    name: "bitwise.Invert"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_InvertPermutation.pbtxt b/tensorflow/core/api_def/java_api/api_def_InvertPermutation.pbtxt
new file mode 100644
index 0000000000..9ee103f554
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_InvertPermutation.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "InvertPermutation"
+  endpoint {
+    name: "math.InvertPermutation"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IsBoostedTreesEnsembleInitialized.pbtxt b/tensorflow/core/api_def/java_api/api_def_IsBoostedTreesEnsembleInitialized.pbtxt
new file mode 100644
index 0000000000..35f17b7924
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IsBoostedTreesEnsembleInitialized.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "IsBoostedTreesEnsembleInitialized"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IsBoostedTreesQuantileStreamResourceInitialized.pbtxt b/tensorflow/core/api_def/java_api/api_def_IsBoostedTreesQuantileStreamResourceInitialized.pbtxt
new file mode 100644
index 0000000000..d6cd2dcc45
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IsBoostedTreesQuantileStreamResourceInitialized.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "IsBoostedTreesQuantileStreamResourceInitialized"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IsFinite.pbtxt b/tensorflow/core/api_def/java_api/api_def_IsFinite.pbtxt
new file mode 100644
index 0000000000..fce5890399
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IsFinite.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IsFinite"
+  endpoint {
+    name: "math.IsFinite"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IsInf.pbtxt b/tensorflow/core/api_def/java_api/api_def_IsInf.pbtxt
new file mode 100644
index 0000000000..823c1d7281
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IsInf.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IsInf"
+  endpoint {
+    name: "math.IsInf"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IsNan.pbtxt b/tensorflow/core/api_def/java_api/api_def_IsNan.pbtxt
new file mode 100644
index 0000000000..58805bf99f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IsNan.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IsNan"
+  endpoint {
+    name: "math.IsNan"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IsVariableInitialized.pbtxt b/tensorflow/core/api_def/java_api/api_def_IsVariableInitialized.pbtxt
new file mode 100644
index 0000000000..7bf51da2da
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IsVariableInitialized.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "IsVariableInitialized"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt
new file mode 100644
index 0000000000..7d4b5e6328
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Iterator"
+  endpoint {
+    name: "data.Iterator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt
new file mode 100644
index 0000000000..b2fe71a0c1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IteratorFromStringHandle"
+  endpoint {
+    name: "data.IteratorFromStringHandle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt
new file mode 100644
index 0000000000..c77959f34c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IteratorFromStringHandleV2"
+  endpoint {
+    name: "data.IteratorFromStringHandleV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorGetNext.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorGetNext.pbtxt
new file mode 100644
index 0000000000..2248ff9f5f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorGetNext.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IteratorGetNext"
+  endpoint {
+    name: "data.IteratorGetNext"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorGetNextAsOptional.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorGetNextAsOptional.pbtxt
new file mode 100644
index 0000000000..ae02a0e017
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorGetNextAsOptional.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IteratorGetNextAsOptional"
+  endpoint {
+    name: "data.IteratorGetNextAsOptional"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorGetNextSync.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorGetNextSync.pbtxt
new file mode 100644
index 0000000000..4aa7c07a77
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorGetNextSync.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IteratorGetNextSync"
+  endpoint {
+    name: "data.IteratorGetNextSync"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorToStringHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorToStringHandle.pbtxt
new file mode 100644
index 0000000000..7413ec846e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorToStringHandle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IteratorToStringHandle"
+  endpoint {
+    name: "data.IteratorToStringHandle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt
new file mode 100644
index 0000000000..7892b096fe
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "IteratorV2"
+  endpoint {
+    name: "data.IteratorV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_L2Loss.pbtxt b/tensorflow/core/api_def/java_api/api_def_L2Loss.pbtxt
new file mode 100644
index 0000000000..c348e0f0e0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_L2Loss.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "L2Loss"
+  endpoint {
+    name: "nn.L2Loss"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt
new file mode 100644
index 0000000000..226fa21953
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LMDBReader"
+  endpoint {
+    name: "io.LMDBReader"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LRN.pbtxt b/tensorflow/core/api_def/java_api/api_def_LRN.pbtxt
new file mode 100644
index 0000000000..d16fea3184
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LRN.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LRN"
+  endpoint {
+    name: "nn.LocalResponseNormalization"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LRNGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_LRNGrad.pbtxt
new file mode 100644
index 0000000000..a50e738d78
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LRNGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LRNGrad"
+  endpoint {
+    name: "nn.LocalResponseNormalizationGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LatencyStatsDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_LatencyStatsDataset.pbtxt
new file mode 100644
index 0000000000..bf0bf2a5ed
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LatencyStatsDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LatencyStatsDataset"
+  endpoint {
+    name: "data.LatencyStatsDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/java_api/api_def_LeakyRelu.pbtxt
new file mode 100644
index 0000000000..31a4f01167
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LeakyRelu.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LeakyRelu"
+  endpoint {
+    name: "nn.LeakyRelu"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LeakyReluGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_LeakyReluGrad.pbtxt
new file mode 100644
index 0000000000..9899c64c13
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LeakyReluGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LeakyReluGrad"
+  endpoint {
+    name: "data.LeakyReluGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LearnedUnigramCandidateSampler.pbtxt b/tensorflow/core/api_def/java_api/api_def_LearnedUnigramCandidateSampler.pbtxt
new file mode 100644
index 0000000000..5f193da1be
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LearnedUnigramCandidateSampler.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LearnedUnigramCandidateSampler"
+  endpoint {
+    name: "nn.LearnedUnigramCandidateSampler"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LeftShift.pbtxt b/tensorflow/core/api_def/java_api/api_def_LeftShift.pbtxt
new file mode 100644
index 0000000000..44a8727e40
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LeftShift.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LeftShift"
+  endpoint {
+    name: "bitwise.LeftShift"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Less.pbtxt b/tensorflow/core/api_def/java_api/api_def_Less.pbtxt
new file mode 100644
index 0000000000..577d2556b8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Less.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Less"
+  endpoint {
+    name: "math.Less"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LessEqual.pbtxt b/tensorflow/core/api_def/java_api/api_def_LessEqual.pbtxt
new file mode 100644
index 0000000000..6cad35c622
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LessEqual.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LessEqual"
+  endpoint {
+    name: "math.LessEqual"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Lgamma.pbtxt b/tensorflow/core/api_def/java_api/api_def_Lgamma.pbtxt
new file mode 100644
index 0000000000..eb7bc9660c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Lgamma.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Lgamma"
+  endpoint {
+    name: "math.Lgamma"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LinSpace.pbtxt b/tensorflow/core/api_def/java_api/api_def_LinSpace.pbtxt
new file mode 100644
index 0000000000..599c310021
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LinSpace.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LinSpace"
+  endpoint {
+    name: "LinSpace"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt b/tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt
new file mode 100644
index 0000000000..bbbd3bb3ec
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ListDiff"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LoadAndRemapMatrix.pbtxt b/tensorflow/core/api_def/java_api/api_def_LoadAndRemapMatrix.pbtxt
new file mode 100644
index 0000000000..54ee68fde4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LoadAndRemapMatrix.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LoadAndRemapMatrix"
+  endpoint {
+    name: "linalg.LoadAndRemapMatrix"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Log.pbtxt b/tensorflow/core/api_def/java_api/api_def_Log.pbtxt
new file mode 100644
index 0000000000..9d11c26c71
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Log.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Log"
+  endpoint {
+    name: "math.Log"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Log1p.pbtxt b/tensorflow/core/api_def/java_api/api_def_Log1p.pbtxt
new file mode 100644
index 0000000000..6cc1d6e6c8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Log1p.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Log1p"
+  endpoint {
+    name: "math.Log1p"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LogMatrixDeterminant.pbtxt b/tensorflow/core/api_def/java_api/api_def_LogMatrixDeterminant.pbtxt
new file mode 100644
index 0000000000..5e52d9eced
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LogMatrixDeterminant.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LogMatrixDeterminant"
+  endpoint {
+    name: "linalg.LogMatrixDeterminant"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LogSoftmax.pbtxt b/tensorflow/core/api_def/java_api/api_def_LogSoftmax.pbtxt
new file mode 100644
index 0000000000..19518a71ea
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LogSoftmax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LogSoftmax"
+  endpoint {
+    name: "nn.LogSoftmax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LogUniformCandidateSampler.pbtxt b/tensorflow/core/api_def/java_api/api_def_LogUniformCandidateSampler.pbtxt
new file mode 100644
index 0000000000..bdcf01c20f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LogUniformCandidateSampler.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LogUniformCandidateSampler"
+  endpoint {
+    name: "random.LogUniformCandidateSampler"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LogicalAnd.pbtxt b/tensorflow/core/api_def/java_api/api_def_LogicalAnd.pbtxt
new file mode 100644
index 0000000000..12921dd932
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LogicalAnd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LogicalAnd"
+  endpoint {
+    name: "math.LogicalAnd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LogicalNot.pbtxt b/tensorflow/core/api_def/java_api/api_def_LogicalNot.pbtxt
new file mode 100644
index 0000000000..9e0960958e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LogicalNot.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LogicalNot"
+  endpoint {
+    name: "math.LogicalNot"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LogicalOr.pbtxt b/tensorflow/core/api_def/java_api/api_def_LogicalOr.pbtxt
new file mode 100644
index 0000000000..6c834e4641
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LogicalOr.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "LogicalOr"
+  endpoint {
+    name: "math.LogicalOr"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt
new file mode 100644
index 0000000000..49637da997
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableExport"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt
new file mode 100644
index 0000000000..fce8a6e3cc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableExportV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt
new file mode 100644
index 0000000000..f8637744ae
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableFind"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt
new file mode 100644
index 0000000000..cb78732e9c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableFindV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt
new file mode 100644
index 0000000000..4cf7971f56
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableImport"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt
new file mode 100644
index 0000000000..3216346961
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableImportV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt
new file mode 100644
index 0000000000..51fe22ba60
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableInsert"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt
new file mode 100644
index 0000000000..429bf25b0a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableInsertV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt
new file mode 100644
index 0000000000..274dd236e3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableRemoveV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt
new file mode 100644
index 0000000000..70329e9e90
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt
new file mode 100644
index 0000000000..d5681c3fa0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LookupTableSizeV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LoopCond.pbtxt b/tensorflow/core/api_def/java_api/api_def_LoopCond.pbtxt
new file mode 100644
index 0000000000..492f78f62e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LoopCond.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LoopCond"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_LowerBound.pbtxt b/tensorflow/core/api_def/java_api/api_def_LowerBound.pbtxt
new file mode 100644
index 0000000000..31f1d3038c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_LowerBound.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "LowerBound"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MakeIterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_MakeIterator.pbtxt
new file mode 100644
index 0000000000..9dfa761370
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MakeIterator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MakeIterator"
+  endpoint {
+    name: "data.MakeIterator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt
new file mode 100644
index 0000000000..b8f23009d7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MapAndBatchDataset"
+  endpoint {
+    name: "data.MapAndBatchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt
new file mode 100644
index 0000000000..a0bc306c52
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapAndBatchDatasetV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapClear.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapClear.pbtxt
new file mode 100644
index 0000000000..3ed9bf8a5d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapClear.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapClear"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapDataset.pbtxt
new file mode 100644
index 0000000000..fdec9eb857
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MapDataset"
+  endpoint {
+    name: "data.MapDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapDefun.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapDefun.pbtxt
new file mode 100644
index 0000000000..43b1dc722c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapDefun.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapDefun"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapIncompleteSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapIncompleteSize.pbtxt
new file mode 100644
index 0000000000..659993e42b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapIncompleteSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapIncompleteSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapPeek.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapPeek.pbtxt
new file mode 100644
index 0000000000..eb1bd158f0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapPeek.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapPeek"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapSize.pbtxt
new file mode 100644
index 0000000000..4da151152c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapStage.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapStage.pbtxt
new file mode 100644
index 0000000000..6d9f66cfc4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapStage.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapStage"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapUnstage.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapUnstage.pbtxt
new file mode 100644
index 0000000000..bb118f0fcb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapUnstage.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapUnstage"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MapUnstageNoKey.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapUnstageNoKey.pbtxt
new file mode 100644
index 0000000000..1004e96482
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MapUnstageNoKey.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MapUnstageNoKey"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatMul.pbtxt
new file mode 100644
index 0000000000..fe4b8405b9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatMul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatMul"
+  endpoint {
+    name: "linalg.MatMul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatchingFiles.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatchingFiles.pbtxt
new file mode 100644
index 0000000000..bb7b096895
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatchingFiles.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatchingFiles"
+  endpoint {
+    name: "io.MatchingFiles"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatchingFilesDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatchingFilesDataset.pbtxt
new file mode 100644
index 0000000000..749257c37b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatchingFilesDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatchingFilesDataset"
+  endpoint {
+    name: "data.MatchingFilesDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixBandPart.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixBandPart.pbtxt
new file mode 100644
index 0000000000..eaf426c00e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixBandPart.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixBandPart"
+  endpoint {
+    name: "linalg.BandPart"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixDeterminant.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixDeterminant.pbtxt
new file mode 100644
index 0000000000..b56d2dad3a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixDeterminant.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixDeterminant"
+  endpoint {
+    name: "linalg.Det"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixDiag.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixDiag.pbtxt
new file mode 100644
index 0000000000..839cd82b89
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixDiag.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixDiag"
+  endpoint {
+    name: "linalg.Diag"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixDiagPart.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixDiagPart.pbtxt
new file mode 100644
index 0000000000..008f75c1e9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixDiagPart.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixDiagPart"
+  endpoint {
+    name: "linalg.DiagPart"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixExponential.pbtxt
new file mode 100644
index 0000000000..fb232dab98
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixExponential.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixExponential"
+  endpoint {
+    name: "linalg.MatrixExponential"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixInverse.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixInverse.pbtxt
new file mode 100644
index 0000000000..68721fc78d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixInverse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixInverse"
+  endpoint {
+    name: "linalg.Inv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixLogarithm.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixLogarithm.pbtxt
new file mode 100644
index 0000000000..04137ffae7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixLogarithm.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixLogarithm"
+  endpoint {
+    name: "linalg.MatrixLogarithm"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixSetDiag.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixSetDiag.pbtxt
new file mode 100644
index 0000000000..61001fa38c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixSetDiag.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixSetDiag"
+  endpoint {
+    name: "linalg.SetDiag"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixSolve.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixSolve.pbtxt
new file mode 100644
index 0000000000..02c21448bb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixSolve.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixSolve"
+  endpoint {
+    name: "linalg.Solve"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixSolveLs.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixSolveLs.pbtxt
new file mode 100644
index 0000000000..9cee578ec2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixSolveLs.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixSolveLs"
+  endpoint {
+    name: "linalg.MatrixSolveLs"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixSquareRoot.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixSquareRoot.pbtxt
new file mode 100644
index 0000000000..14c7624fe3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixSquareRoot.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixSquareRoot"
+  endpoint {
+    name: "linalg.Sqrtm"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MatrixTriangularSolve.pbtxt b/tensorflow/core/api_def/java_api/api_def_MatrixTriangularSolve.pbtxt
new file mode 100644
index 0000000000..1f61e99efe
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MatrixTriangularSolve.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MatrixTriangularSolve"
+  endpoint {
+    name: "linalg.TriangularSolve"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Max.pbtxt b/tensorflow/core/api_def/java_api/api_def_Max.pbtxt
new file mode 100644
index 0000000000..03868720ed
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Max.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Max"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt
new file mode 100644
index 0000000000..17c17b5699
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPool"
+  endpoint {
+    name: "nn.MaxPool"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPool3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPool3D.pbtxt
new file mode 100644
index 0000000000..17aeb6a8c9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPool3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPool3D"
+  endpoint {
+    name: "nn.MaxPool3d"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt
new file mode 100644
index 0000000000..7f5ab71845
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPool3DGrad"
+  endpoint {
+    name: "nn.MaxPool3DGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt
new file mode 100644
index 0000000000..4d7211add2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPool3DGradGrad"
+  endpoint {
+    name: "nn.MaxPool3DGradGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt
new file mode 100644
index 0000000000..c8b783ee7a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolGrad"
+  endpoint {
+    name: "nn.MaxPoolGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt
new file mode 100644
index 0000000000..2dd7fdf229
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolGradGrad"
+  endpoint {
+    name: "nn.MaxPoolGradGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt
new file mode 100644
index 0000000000..e2cf7927ca
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolGradGradV2"
+  endpoint {
+    name: "nn.MaxPoolGradGradV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradWithArgmax.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradWithArgmax.pbtxt
new file mode 100644
index 0000000000..d43cf7447c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradWithArgmax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolGradGradWithArgmax"
+  endpoint {
+    name: "nn.MaxPoolGradGradWithArgmax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt
new file mode 100644
index 0000000000..1e47cd13e3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolGradV2"
+  endpoint {
+    name: "nn.MaxPoolGradV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradWithArgmax.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradWithArgmax.pbtxt
new file mode 100644
index 0000000000..c10701f555
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradWithArgmax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolGradWithArgmax"
+  endpoint {
+    name: "nn.MaxPoolGradWithArgmax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt
new file mode 100644
index 0000000000..bd885135d5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolV2"
+  endpoint {
+    name: "nn.MaxPoolV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolWithArgmax.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolWithArgmax.pbtxt
new file mode 100644
index 0000000000..43630534cb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolWithArgmax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MaxPoolWithArgmax"
+  endpoint {
+    name: "nn.MaxPoolWithArgmax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Maximum.pbtxt b/tensorflow/core/api_def/java_api/api_def_Maximum.pbtxt
new file mode 100644
index 0000000000..1df9c60530
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Maximum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Maximum"
+  endpoint {
+    name: "math.Maximum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Mean.pbtxt b/tensorflow/core/api_def/java_api/api_def_Mean.pbtxt
new file mode 100644
index 0000000000..7bdcdc3d74
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Mean.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Mean"
+  endpoint {
+    name: "math.Mean"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Merge.pbtxt b/tensorflow/core/api_def/java_api/api_def_Merge.pbtxt
new file mode 100644
index 0000000000..954d5085ad
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Merge.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Merge"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MergeSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_MergeSummary.pbtxt
new file mode 100644
index 0000000000..f52c7c0996
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MergeSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MergeSummary"
+  endpoint {
+    name: "summary.MergeSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MergeV2Checkpoints.pbtxt b/tensorflow/core/api_def/java_api/api_def_MergeV2Checkpoints.pbtxt
new file mode 100644
index 0000000000..8899c8c4ed
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MergeV2Checkpoints.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MergeV2Checkpoints"
+  endpoint {
+    name: "train.MergeV2Checkpoints"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Mfcc.pbtxt b/tensorflow/core/api_def/java_api/api_def_Mfcc.pbtxt
new file mode 100644
index 0000000000..6cb04e73ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Mfcc.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Mfcc"
+  endpoint {
+    name: "audio.Mfcc"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Min.pbtxt b/tensorflow/core/api_def/java_api/api_def_Min.pbtxt
new file mode 100644
index 0000000000..72894c1ffd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Min.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Min"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Minimum.pbtxt b/tensorflow/core/api_def/java_api/api_def_Minimum.pbtxt
new file mode 100644
index 0000000000..69f76a9829
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Minimum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Minimum"
+  endpoint {
+    name: "math.Minimum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MirrorPad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MirrorPad.pbtxt
new file mode 100644
index 0000000000..e1cb766f8f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MirrorPad.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MirrorPad"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MirrorPadGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MirrorPadGrad.pbtxt
new file mode 100644
index 0000000000..ddd8ab3ba1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MirrorPadGrad.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MirrorPadGrad"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Mod.pbtxt b/tensorflow/core/api_def/java_api/api_def_Mod.pbtxt
new file mode 100644
index 0000000000..76fbbe97a8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Mod.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Mod"
+  endpoint {
+    name: "math.Mod"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ModelDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ModelDataset.pbtxt
new file mode 100644
index 0000000000..143c7afd72
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ModelDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ModelDataset"
+  endpoint {
+    name: "data.ModelDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Mul.pbtxt b/tensorflow/core/api_def/java_api/api_def_Mul.pbtxt
new file mode 100644
index 0000000000..605e110931
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Mul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Mul"
+  endpoint {
+    name: "math.Mul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MultiDeviceIterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIterator.pbtxt
new file mode 100644
index 0000000000..81eabf9bdc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIterator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MultiDeviceIterator"
+  endpoint {
+    name: "data.MultiDeviceIterator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt
new file mode 100644
index 0000000000..4006f72d52
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorFromStringHandle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MultiDeviceIteratorFromStringHandle"
+  endpoint {
+    name: "data.MultiDeviceIteratorFromStringHandle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt
new file mode 100644
index 0000000000..a7e6fc1508
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorGetNextFromShard.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MultiDeviceIteratorGetNextFromShard"
+  endpoint {
+    name: "data.MultiDeviceIteratorGetNextFromShard"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorInit.pbtxt b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorInit.pbtxt
new file mode 100644
index 0000000000..1663bc5c22
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorInit.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MultiDeviceIteratorInit"
+  endpoint {
+    name: "data.MultiDeviceIteratorInit"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt
new file mode 100644
index 0000000000..ff061da390
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MultiDeviceIteratorToStringHandle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "MultiDeviceIteratorToStringHandle"
+  endpoint {
+    name: "data.MultiDeviceIteratorToStringHandle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Multinomial.pbtxt b/tensorflow/core/api_def/java_api/api_def_Multinomial.pbtxt
new file mode 100644
index 0000000000..bd98bb10b4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Multinomial.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Multinomial"
+  endpoint {
+    name: "random.Multinomial"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt
new file mode 100644
index 0000000000..15803468ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutableDenseHashTable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt
new file mode 100644
index 0000000000..03fdeddb95
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutableDenseHashTableV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt
new file mode 100644
index 0000000000..1a80874495
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutableHashTable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt
new file mode 100644
index 0000000000..bf2fa065dc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutableHashTableOfTensors"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt
new file mode 100644
index 0000000000..53d780e925
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutableHashTableOfTensorsV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt
new file mode 100644
index 0000000000..99007df253
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutableHashTableV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutexLock.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutexLock.pbtxt
new file mode 100644
index 0000000000..75c7be5286
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutexLock.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutexLock"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt
new file mode 100644
index 0000000000..93988914aa
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "MutexV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NcclAllReduce.pbtxt b/tensorflow/core/api_def/java_api/api_def_NcclAllReduce.pbtxt
new file mode 100644
index 0000000000..c7133d4a4a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NcclAllReduce.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "NcclAllReduce"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NcclBroadcast.pbtxt b/tensorflow/core/api_def/java_api/api_def_NcclBroadcast.pbtxt
new file mode 100644
index 0000000000..b9b1a345c9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NcclBroadcast.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "NcclBroadcast"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NcclReduce.pbtxt b/tensorflow/core/api_def/java_api/api_def_NcclReduce.pbtxt
new file mode 100644
index 0000000000..18dc890177
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NcclReduce.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "NcclReduce"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Neg.pbtxt b/tensorflow/core/api_def/java_api/api_def_Neg.pbtxt
new file mode 100644
index 0000000000..c7e9ede2a5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Neg.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Neg"
+  endpoint {
+    name: "math.Neg"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NegTrain.pbtxt b/tensorflow/core/api_def/java_api/api_def_NegTrain.pbtxt
new file mode 100644
index 0000000000..eb62186362
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NegTrain.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NegTrain"
+  endpoint {
+    name: "train.NegTrain"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NextIteration.pbtxt b/tensorflow/core/api_def/java_api/api_def_NextIteration.pbtxt
new file mode 100644
index 0000000000..bc63f6ada1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NextIteration.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "NextIteration"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NoOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_NoOp.pbtxt
new file mode 100644
index 0000000000..337fb5da14
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NoOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "NoOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt
new file mode 100644
index 0000000000..d7156b0a3a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NonMaxSuppression"
+  endpoint {
+    name: "image.NonMaxSuppression"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt
new file mode 100644
index 0000000000..d78eb9745d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NonMaxSuppressionV2"
+  endpoint {
+    name: "image.NonMaxSuppressionV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt
new file mode 100644
index 0000000000..e3fde0a566
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NonMaxSuppressionV3"
+  endpoint {
+    name: "image.NonMaxSuppressionV3"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt
new file mode 100644
index 0000000000..98776e8e7f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NonMaxSuppressionV4"
+  endpoint {
+    name: "image.NonMaxSuppressionV4"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt
new file mode 100644
index 0000000000..06fa52920d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NonMaxSuppressionWithOverlaps"
+  endpoint {
+    name: "image.NonMaxSuppressionWithOverlaps"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NotEqual.pbtxt b/tensorflow/core/api_def/java_api/api_def_NotEqual.pbtxt
new file mode 100644
index 0000000000..0d99af40b5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NotEqual.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NotEqual"
+  endpoint {
+    name: "math.NotEqual"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_NthElement.pbtxt b/tensorflow/core/api_def/java_api/api_def_NthElement.pbtxt
new file mode 100644
index 0000000000..57097e634a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_NthElement.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "NthElement"
+  endpoint {
+    name: "nn.NthElement"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OneHot.pbtxt b/tensorflow/core/api_def/java_api/api_def_OneHot.pbtxt
new file mode 100644
index 0000000000..66872d5eb8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OneHot.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OneHot"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OneShotIterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_OneShotIterator.pbtxt
new file mode 100644
index 0000000000..39af8cefde
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OneShotIterator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "OneShotIterator"
+  endpoint {
+    name: "data.OneShotIterator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OnesLike.pbtxt b/tensorflow/core/api_def/java_api/api_def_OnesLike.pbtxt
new file mode 100644
index 0000000000..97abe0814a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OnesLike.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OnesLike"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OptimizeDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_OptimizeDataset.pbtxt
new file mode 100644
index 0000000000..e7ddf97d1a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OptimizeDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "OptimizeDataset"
+  endpoint {
+    name: "data.OptimizeDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OptionalFromValue.pbtxt b/tensorflow/core/api_def/java_api/api_def_OptionalFromValue.pbtxt
new file mode 100644
index 0000000000..d251fd5d94
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OptionalFromValue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "OptionalFromValue"
+  endpoint {
+    name: "data.OptionalFromValue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OptionalGetValue.pbtxt b/tensorflow/core/api_def/java_api/api_def_OptionalGetValue.pbtxt
new file mode 100644
index 0000000000..7fcdb5ac69
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OptionalGetValue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "OptionalGetValue"
+  endpoint {
+    name: "data.OptionalGetValue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OptionalHasValue.pbtxt b/tensorflow/core/api_def/java_api/api_def_OptionalHasValue.pbtxt
new file mode 100644
index 0000000000..4ffa15b564
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OptionalHasValue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "OptionalHasValue"
+  endpoint {
+    name: "data.OptionalHasValue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OptionalNone.pbtxt b/tensorflow/core/api_def/java_api/api_def_OptionalNone.pbtxt
new file mode 100644
index 0000000000..cec29a42ae
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OptionalNone.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "OptionalNone"
+  endpoint {
+    name: "data.OptionalNone"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OrderedMapClear.pbtxt b/tensorflow/core/api_def/java_api/api_def_OrderedMapClear.pbtxt
new file mode 100644
index 0000000000..e36b2aa3e4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OrderedMapClear.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OrderedMapClear"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OrderedMapIncompleteSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_OrderedMapIncompleteSize.pbtxt
new file mode 100644
index 0000000000..c609e9e50a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OrderedMapIncompleteSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OrderedMapIncompleteSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OrderedMapPeek.pbtxt b/tensorflow/core/api_def/java_api/api_def_OrderedMapPeek.pbtxt
new file mode 100644
index 0000000000..06fc218277
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OrderedMapPeek.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OrderedMapPeek"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OrderedMapSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_OrderedMapSize.pbtxt
new file mode 100644
index 0000000000..7beef3f376
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OrderedMapSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OrderedMapSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OrderedMapStage.pbtxt b/tensorflow/core/api_def/java_api/api_def_OrderedMapStage.pbtxt
new file mode 100644
index 0000000000..8b579d21a0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OrderedMapStage.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OrderedMapStage"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OrderedMapUnstage.pbtxt b/tensorflow/core/api_def/java_api/api_def_OrderedMapUnstage.pbtxt
new file mode 100644
index 0000000000..d3d6862fbe
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OrderedMapUnstage.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OrderedMapUnstage"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_OrderedMapUnstageNoKey.pbtxt b/tensorflow/core/api_def/java_api/api_def_OrderedMapUnstageNoKey.pbtxt
new file mode 100644
index 0000000000..3d275c85d9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_OrderedMapUnstageNoKey.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "OrderedMapUnstageNoKey"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Pack.pbtxt b/tensorflow/core/api_def/java_api/api_def_Pack.pbtxt
new file mode 100644
index 0000000000..d9e9897d77
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Pack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Pack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Pad.pbtxt b/tensorflow/core/api_def/java_api/api_def_Pad.pbtxt
new file mode 100644
index 0000000000..a9de5541ac
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Pad.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Pad"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt
new file mode 100644
index 0000000000..1554cd0b64
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "PadV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt
new file mode 100644
index 0000000000..2d734539da
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PaddedBatchDataset"
+  endpoint {
+    name: "data.PaddedBatchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt
new file mode 100644
index 0000000000..d4b85550e9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PaddedBatchDatasetV2"
+  endpoint {
+    name: "data.PaddedBatchDatasetV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
new file mode 100644
index 0000000000..151ab0f872
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PaddingFIFOQueue"
+  endpoint {
+    name: "io.PaddingFIFOQueue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
new file mode 100644
index 0000000000..4b589a9afb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PaddingFIFOQueueV2"
+  endpoint {
+    name: "io.PaddingFIFOQueueV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParallelConcat.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParallelConcat.pbtxt
new file mode 100644
index 0000000000..0b17c7d256
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParallelConcat.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ParallelConcat"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParallelDynamicStitch.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParallelDynamicStitch.pbtxt
new file mode 100644
index 0000000000..79a55b763f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParallelDynamicStitch.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ParallelDynamicStitch"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt
new file mode 100644
index 0000000000..3ccc2a6bf7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParallelInterleaveDataset"
+  endpoint {
+    name: "data.ParallelInterleaveDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt
new file mode 100644
index 0000000000..56b05cc2f6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ParallelInterleaveDatasetV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParallelMapDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParallelMapDataset.pbtxt
new file mode 100644
index 0000000000..5091bb9cec
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParallelMapDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParallelMapDataset"
+  endpoint {
+    name: "data.ParallelMapDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParameterizedTruncatedNormal.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParameterizedTruncatedNormal.pbtxt
new file mode 100644
index 0000000000..26ca2fc86f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParameterizedTruncatedNormal.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParameterizedTruncatedNormal"
+  endpoint {
+    name: "random.ParameterizedTruncatedNormal"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParseExample.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParseExample.pbtxt
new file mode 100644
index 0000000000..6e7384f2e3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParseExample.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParseExample"
+  endpoint {
+    name: "io.ParseExample"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParseExampleDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParseExampleDataset.pbtxt
new file mode 100644
index 0000000000..4309645093
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParseExampleDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParseExampleDataset"
+  endpoint {
+    name: "data.ParseExampleDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParseSequenceExample.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParseSequenceExample.pbtxt
new file mode 100644
index 0000000000..09ee715ac7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParseSequenceExample.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParseSequenceExample"
+  endpoint {
+    name: "io.ParseSequenceExample"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParseSingleExample.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParseSingleExample.pbtxt
new file mode 100644
index 0000000000..7559957b35
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParseSingleExample.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParseSingleExample"
+  endpoint {
+    name: "io.ParseSingleExample"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParseSingleSequenceExample.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParseSingleSequenceExample.pbtxt
new file mode 100644
index 0000000000..00eb325b2a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParseSingleSequenceExample.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParseSingleSequenceExample"
+  endpoint {
+    name: "io.ParseSingleSequenceExample"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ParseTensor.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParseTensor.pbtxt
new file mode 100644
index 0000000000..a78cdc7f5c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ParseTensor.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ParseTensor"
+  endpoint {
+    name: "io.ParseTensor"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PartitionedCall.pbtxt b/tensorflow/core/api_def/java_api/api_def_PartitionedCall.pbtxt
new file mode 100644
index 0000000000..1ac10b6028
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PartitionedCall.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "PartitionedCall"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt b/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
new file mode 100644
index 0000000000..5e6daa2ae4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Placeholder"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
new file mode 100644
index 0000000000..39012d4aa2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "PlaceholderV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PlaceholderWithDefault.pbtxt b/tensorflow/core/api_def/java_api/api_def_PlaceholderWithDefault.pbtxt
new file mode 100644
index 0000000000..59067a9c68
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PlaceholderWithDefault.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "PlaceholderWithDefault"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Polygamma.pbtxt b/tensorflow/core/api_def/java_api/api_def_Polygamma.pbtxt
new file mode 100644
index 0000000000..746b3375a0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Polygamma.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Polygamma"
+  endpoint {
+    name: "math.Polygamma"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PopulationCount.pbtxt b/tensorflow/core/api_def/java_api/api_def_PopulationCount.pbtxt
new file mode 100644
index 0000000000..6aacdf4d12
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PopulationCount.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PopulationCount"
+  endpoint {
+    name: "math.PopulationCount"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Pow.pbtxt b/tensorflow/core/api_def/java_api/api_def_Pow.pbtxt
new file mode 100644
index 0000000000..e7eaaed695
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Pow.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Pow"
+  endpoint {
+    name: "math.Pow"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PrefetchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_PrefetchDataset.pbtxt
new file mode 100644
index 0000000000..beaad84d15
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PrefetchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PrefetchDataset"
+  endpoint {
+    name: "data.PrefetchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt
new file mode 100644
index 0000000000..7c9d509b16
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PrependFromQueueAndPaddedBatchDataset"
+  endpoint {
+    name: "data.PrependFromQueueAndPaddedBatchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PreventGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_PreventGradient.pbtxt
new file mode 100644
index 0000000000..4731f21af4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PreventGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PreventGradient"
+  endpoint {
+    name: "train.PreventGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Print.pbtxt b/tensorflow/core/api_def/java_api/api_def_Print.pbtxt
new file mode 100644
index 0000000000..5b837135fd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Print.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Print"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt
new file mode 100644
index 0000000000..c6e406d08f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "PrintV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt
new file mode 100644
index 0000000000..cee973139d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PriorityQueue"
+  endpoint {
+    name: "io.PriorityQueue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt
new file mode 100644
index 0000000000..27e89f92be
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "PriorityQueueV2"
+  endpoint {
+    name: "io.PriorityQueueV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Prod.pbtxt b/tensorflow/core/api_def/java_api/api_def_Prod.pbtxt
new file mode 100644
index 0000000000..7a98972a87
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Prod.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Prod"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PyFunc.pbtxt b/tensorflow/core/api_def/java_api/api_def_PyFunc.pbtxt
new file mode 100644
index 0000000000..5f1f7c47ca
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PyFunc.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "PyFunc"
+  visibility: SKIP
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_PyFuncStateless.pbtxt b/tensorflow/core/api_def/java_api/api_def_PyFuncStateless.pbtxt
new file mode 100644
index 0000000000..684ef58d1b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_PyFuncStateless.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "PyFuncStateless"
+  visibility: SKIP
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Qr.pbtxt b/tensorflow/core/api_def/java_api/api_def_Qr.pbtxt
new file mode 100644
index 0000000000..c0d31e7977
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Qr.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Qr"
+  endpoint {
+    name: "linalg.Qr"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt
new file mode 100644
index 0000000000..fe8401d8f9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizeAndDequantize"
+  endpoint {
+    name: "quantization.QuantizeAndDequantize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt
new file mode 100644
index 0000000000..0535993d40
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV2"
+  endpoint {
+    name: "quantization.QuantizeAndDequantize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt
new file mode 100644
index 0000000000..d056b320c8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizeAndDequantizeV3"
+  endpoint {
+    name: "quantization.QuantizeAndDequantizeV3"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeDownAndShrinkRange.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeDownAndShrinkRange.pbtxt
new file mode 100644
index 0000000000..7119f53cb2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeDownAndShrinkRange.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizeDownAndShrinkRange"
+  endpoint {
+    name: "quantization.QuantizeDownAndShrinkRange"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt
new file mode 100644
index 0000000000..d04a01726f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizeV2"
+  endpoint {
+    name: "quantization.QuantizeV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedAdd.pbtxt
new file mode 100644
index 0000000000..1a2bfa36ed
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedAdd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedAdd"
+  endpoint {
+    name: "math.QuantizedAdd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedAvgPool.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedAvgPool.pbtxt
new file mode 100644
index 0000000000..7f16fb046d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedAvgPool.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedAvgPool"
+  endpoint {
+    name: "nn.QuantizedAvgPool"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt
new file mode 100644
index 0000000000..2101ce5d69
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedBatchNormWithGlobalNormalization"
+  endpoint {
+    name: "nn.QuantizedBatchNormWithGlobalNormalization"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedBiasAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedBiasAdd.pbtxt
new file mode 100644
index 0000000000..c18fb1c574
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedBiasAdd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedBiasAdd"
+  endpoint {
+    name: "nn.QuantizedBiasAdd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedConcat.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedConcat.pbtxt
new file mode 100644
index 0000000000..cb5d0c0fda
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedConcat.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "QuantizedConcat"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt
new file mode 100644
index 0000000000..8345892762
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedConv2D"
+  endpoint {
+    name: "nn.QuantizedConv2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedInstanceNorm.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedInstanceNorm.pbtxt
new file mode 100644
index 0000000000..bbd2e7fc5e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedInstanceNorm.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedInstanceNorm"
+  endpoint {
+    name: "nn.QuantizedInstanceNorm"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedMatMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedMatMul.pbtxt
new file mode 100644
index 0000000000..7962cbade6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedMatMul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedMatMul"
+  endpoint {
+    name: "linalg.QuantizedMatMul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedMaxPool.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedMaxPool.pbtxt
new file mode 100644
index 0000000000..57e900494e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedMaxPool.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedMaxPool"
+  endpoint {
+    name: "nn.QuantizedMaxPool"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedMul.pbtxt
new file mode 100644
index 0000000000..be23ef706e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedMul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedMul"
+  endpoint {
+    name: "math.QuantizedMul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedRelu.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedRelu.pbtxt
new file mode 100644
index 0000000000..7b9a11640b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedRelu.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedRelu"
+  endpoint {
+    name: "nn.QuantizedRelu"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedRelu6.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedRelu6.pbtxt
new file mode 100644
index 0000000000..6a60e2112e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedRelu6.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedRelu6"
+  endpoint {
+    name: "nn.QuantizedRelu6"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedReluX.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedReluX.pbtxt
new file mode 100644
index 0000000000..cc47d322b2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedReluX.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedReluX"
+  endpoint {
+    name: "nn.QuantizedReluX"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedReshape.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedReshape.pbtxt
new file mode 100644
index 0000000000..4557853d94
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedReshape.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "QuantizedReshape"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedResizeBilinear.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedResizeBilinear.pbtxt
new file mode 100644
index 0000000000..81dca49094
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedResizeBilinear.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QuantizedResizeBilinear"
+  endpoint {
+    name: "image.QuantizedResizeBilinear"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt
new file mode 100644
index 0000000000..70e559c773
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueClose"
+  endpoint {
+    name: "io.QueueClose"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt
new file mode 100644
index 0000000000..01460f27e2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueCloseV2"
+  endpoint {
+    name: "io.QueueCloseV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt
new file mode 100644
index 0000000000..9cd77b4ca3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueDequeue"
+  endpoint {
+    name: "io.QueueDequeue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt
new file mode 100644
index 0000000000..ceb2e82394
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueDequeueMany"
+  endpoint {
+    name: "io.QueueDequeueMany"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt
new file mode 100644
index 0000000000..ac27fcb620
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueDequeueManyV2"
+  endpoint {
+    name: "io.QueueDequeueManyV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt
new file mode 100644
index 0000000000..657c63363f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueDequeueUpTo"
+  endpoint {
+    name: "io.QueueDequeueUpTo"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt
new file mode 100644
index 0000000000..19b3fff653
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueDequeueUpToV2"
+  endpoint {
+    name: "io.QueueDequeueUpToV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt
new file mode 100644
index 0000000000..8aca207816
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueDequeueV2"
+  endpoint {
+    name: "io.QueueDequeueV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt
new file mode 100644
index 0000000000..e516dab297
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueEnqueue"
+  endpoint {
+    name: "io.QueueEnqueue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt
new file mode 100644
index 0000000000..b9c0e4fd84
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueEnqueueMany"
+  endpoint {
+    name: "io.QueueEnqueueMany"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt
new file mode 100644
index 0000000000..153700a646
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueEnqueueManyV2"
+  endpoint {
+    name: "io.QueueEnqueueManyV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt
new file mode 100644
index 0000000000..68bacd7b5b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueEnqueueV2"
+  endpoint {
+    name: "io.QueueEnqueueV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt
new file mode 100644
index 0000000000..86914c7124
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueIsClosed"
+  endpoint {
+    name: "io.QueueIsClosed"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt
new file mode 100644
index 0000000000..ce33b2498b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueIsClosedV2"
+  endpoint {
+    name: "io.QueueIsClosedV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt
new file mode 100644
index 0000000000..5592d58f9f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueSize"
+  endpoint {
+    name: "io.QueueSize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt
new file mode 100644
index 0000000000..68364aa605
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "QueueSizeV2"
+  endpoint {
+    name: "io.QueueSizeV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt
new file mode 100644
index 0000000000..e56a0ba30a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RFFT"
+  endpoint {
+    name: "signal.RFFT"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt
new file mode 100644
index 0000000000..eb193e638d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RFFT2D"
+  endpoint {
+    name: "signal.RFFT2D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt
new file mode 100644
index 0000000000..292b957f3e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RFFT3D"
+  endpoint {
+    name: "signal.RFFT3D"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RGBToHSV.pbtxt b/tensorflow/core/api_def/java_api/api_def_RGBToHSV.pbtxt
new file mode 100644
index 0000000000..1b35891ae2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RGBToHSV.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RGBToHSV"
+  endpoint {
+    name: "image.RgbToHsv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RaggedGather.pbtxt b/tensorflow/core/api_def/java_api/api_def_RaggedGather.pbtxt
new file mode 100644
index 0000000000..f060daeb65
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RaggedGather.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RaggedGather"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RaggedRange.pbtxt b/tensorflow/core/api_def/java_api/api_def_RaggedRange.pbtxt
new file mode 100644
index 0000000000..b1a5bab0ac
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RaggedRange.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RaggedRange"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RaggedTensorToSparse.pbtxt b/tensorflow/core/api_def/java_api/api_def_RaggedTensorToSparse.pbtxt
new file mode 100644
index 0000000000..f049f47b46
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RaggedTensorToSparse.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RaggedTensorToSparse"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomCrop.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomCrop.pbtxt
new file mode 100644
index 0000000000..a3b8a3cecd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomCrop.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomCrop"
+  endpoint {
+    name: "image.RandomCrop"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomDataset.pbtxt
new file mode 100644
index 0000000000..43921e6eaf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomDataset"
+  endpoint {
+    name: "data.RandomDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomGamma.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomGamma.pbtxt
new file mode 100644
index 0000000000..927f2c5693
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomGamma.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomGamma"
+  endpoint {
+    name: "random.RandomGamma"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomGammaGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomGammaGrad.pbtxt
new file mode 100644
index 0000000000..9257495c9b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomGammaGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomGammaGrad"
+  endpoint {
+    name: "random.RandomGammaGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt
new file mode 100644
index 0000000000..42ce1a5fb1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomPoisson"
+  endpoint {
+    name: "random.RandomPoisson"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt
new file mode 100644
index 0000000000..adc5441abc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomPoissonV2"
+  endpoint {
+    name: "random.RandomPoissonV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomShuffle.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomShuffle.pbtxt
new file mode 100644
index 0000000000..6dcd12fd37
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomShuffle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomShuffle"
+  endpoint {
+    name: "random.RandomShuffle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt
new file mode 100644
index 0000000000..f622eb4e0d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomShuffleQueue"
+  endpoint {
+    name: "io.RandomShuffleQueue"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt
new file mode 100644
index 0000000000..c88c2a4631
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomShuffleQueueV2"
+  endpoint {
+    name: "io.RandomShuffleQueueV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomStandardNormal.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomStandardNormal.pbtxt
new file mode 100644
index 0000000000..413fc87bdf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomStandardNormal.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomStandardNormal"
+  endpoint {
+    name: "random.RandomStandardNormal"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomUniform.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomUniform.pbtxt
new file mode 100644
index 0000000000..2a93df83df
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomUniform.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomUniform"
+  endpoint {
+    name: "random.RandomUniform"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomUniformInt.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomUniformInt.pbtxt
new file mode 100644
index 0000000000..a1383f406a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RandomUniformInt.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RandomUniformInt"
+  endpoint {
+    name: "random.RandomUniformInt"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Range.pbtxt b/tensorflow/core/api_def/java_api/api_def_Range.pbtxt
new file mode 100644
index 0000000000..24f3787a8e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Range.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Range"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RangeDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_RangeDataset.pbtxt
new file mode 100644
index 0000000000..b6180261b0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RangeDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RangeDataset"
+  endpoint {
+    name: "data.RangeDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Rank.pbtxt b/tensorflow/core/api_def/java_api/api_def_Rank.pbtxt
new file mode 100644
index 0000000000..baa84aab10
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Rank.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Rank"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReadFile.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReadFile.pbtxt
new file mode 100644
index 0000000000..f74250d42f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReadFile.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReadFile"
+  endpoint {
+    name: "io.ReadFile"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReadVariableOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReadVariableOp.pbtxt
new file mode 100644
index 0000000000..018886d5b8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReadVariableOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ReadVariableOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt
new file mode 100644
index 0000000000..67dbe1035e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderNumRecordsProduced"
+  endpoint {
+    name: "io.ReaderNumRecordsProduced"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt
new file mode 100644
index 0000000000..5063706eb0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderNumRecordsProducedV2"
+  endpoint {
+    name: "io.ReaderNumRecordsProducedV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt
new file mode 100644
index 0000000000..63bb2d4108
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderNumWorkUnitsCompleted"
+  endpoint {
+    name: "io.ReaderNumWorkUnitsCompleted"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt
new file mode 100644
index 0000000000..cf72ecc562
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderNumWorkUnitsCompletedV2"
+  endpoint {
+    name: "io.ReaderNumWorkUnitsCompletedV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt
new file mode 100644
index 0000000000..47e9679f80
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderRead"
+  endpoint {
+    name: "io.ReaderRead"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt
new file mode 100644
index 0000000000..985d0035b0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderReadUpTo"
+  endpoint {
+    name: "io.ReaderReadUpTo"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt
new file mode 100644
index 0000000000..a001349e2d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderReadUpToV2"
+  endpoint {
+    name: "io.ReaderReadUpToV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt
new file mode 100644
index 0000000000..a64349d15e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderReadV2"
+  endpoint {
+    name: "io.ReaderReadV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt
new file mode 100644
index 0000000000..9a387753f5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderReset"
+  endpoint {
+    name: "io.ReaderReset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt
new file mode 100644
index 0000000000..6e31d5a8ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderResetV2"
+  endpoint {
+    name: "io.ReaderResetV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt
new file mode 100644
index 0000000000..d148adde6a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderRestoreState"
+  endpoint {
+    name: "io.ReaderRestoreState"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt
new file mode 100644
index 0000000000..3d78d8eb00
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderRestoreStateV2"
+  endpoint {
+    name: "io.ReaderRestoreStateV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt
new file mode 100644
index 0000000000..fd12a4c784
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderSerializeState"
+  endpoint {
+    name: "io.ReaderSerializeState"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt
new file mode 100644
index 0000000000..28c5048fc9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReaderSerializeStateV2"
+  endpoint {
+    name: "io.ReaderSerializeStateV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Real.pbtxt b/tensorflow/core/api_def/java_api/api_def_Real.pbtxt
new file mode 100644
index 0000000000..3aaea928de
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Real.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Real"
+  endpoint {
+    name: "math.Real"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RealDiv.pbtxt b/tensorflow/core/api_def/java_api/api_def_RealDiv.pbtxt
new file mode 100644
index 0000000000..415bd29da0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RealDiv.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RealDiv"
+  endpoint {
+    name: "math.RealDiv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Reciprocal.pbtxt b/tensorflow/core/api_def/java_api/api_def_Reciprocal.pbtxt
new file mode 100644
index 0000000000..1c0d787c24
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Reciprocal.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Reciprocal"
+  endpoint {
+    name: "math.Reciprocal"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReciprocalGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReciprocalGrad.pbtxt
new file mode 100644
index 0000000000..68879669b5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReciprocalGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReciprocalGrad"
+  endpoint {
+    name: "math.ReciprocalGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RecordInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_RecordInput.pbtxt
new file mode 100644
index 0000000000..c4807c68de
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RecordInput.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RecordInput"
+  endpoint {
+    name: "random.RecordInput"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReduceDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReduceDataset.pbtxt
new file mode 100644
index 0000000000..b16c5dbb96
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReduceDataset.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ReduceDataset"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReduceJoin.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReduceJoin.pbtxt
new file mode 100644
index 0000000000..7919322201
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReduceJoin.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReduceJoin"
+  endpoint {
+    name: "strings.ReduceJoin"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RefEnter.pbtxt b/tensorflow/core/api_def/java_api/api_def_RefEnter.pbtxt
new file mode 100644
index 0000000000..9cd2281bc6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RefEnter.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RefEnter"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RefExit.pbtxt b/tensorflow/core/api_def/java_api/api_def_RefExit.pbtxt
new file mode 100644
index 0000000000..67e8d39c9a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RefExit.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RefExit"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RefIdentity.pbtxt b/tensorflow/core/api_def/java_api/api_def_RefIdentity.pbtxt
new file mode 100644
index 0000000000..53483bd1bb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RefIdentity.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RefIdentity"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RefMerge.pbtxt b/tensorflow/core/api_def/java_api/api_def_RefMerge.pbtxt
new file mode 100644
index 0000000000..6ea3145841
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RefMerge.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RefMerge"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RefNextIteration.pbtxt b/tensorflow/core/api_def/java_api/api_def_RefNextIteration.pbtxt
new file mode 100644
index 0000000000..5d008204b7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RefNextIteration.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RefNextIteration"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RefSelect.pbtxt b/tensorflow/core/api_def/java_api/api_def_RefSelect.pbtxt
new file mode 100644
index 0000000000..d7cda2d5b3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RefSelect.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RefSelect"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RefSwitch.pbtxt b/tensorflow/core/api_def/java_api/api_def_RefSwitch.pbtxt
new file mode 100644
index 0000000000..78261d8b7e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RefSwitch.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RefSwitch"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RegexFullMatch.pbtxt b/tensorflow/core/api_def/java_api/api_def_RegexFullMatch.pbtxt
new file mode 100644
index 0000000000..7f88e24eac
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RegexFullMatch.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RegexFullMatch"
+  endpoint {
+    name: "strings.RegexFullMatch"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RegexReplace.pbtxt b/tensorflow/core/api_def/java_api/api_def_RegexReplace.pbtxt
new file mode 100644
index 0000000000..01c9e93cab
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RegexReplace.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RegexReplace"
+  endpoint {
+    name: "strings.RegexReplace"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Relu.pbtxt b/tensorflow/core/api_def/java_api/api_def_Relu.pbtxt
new file mode 100644
index 0000000000..39d7fec452
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Relu.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Relu"
+  endpoint {
+    name: "nn.Relu"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Relu6.pbtxt b/tensorflow/core/api_def/java_api/api_def_Relu6.pbtxt
new file mode 100644
index 0000000000..fcc012b503
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Relu6.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Relu6"
+  endpoint {
+    name: "nn.Relu6"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Relu6Grad.pbtxt b/tensorflow/core/api_def/java_api/api_def_Relu6Grad.pbtxt
new file mode 100644
index 0000000000..33e959cc7b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Relu6Grad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Relu6Grad"
+  endpoint {
+    name: "nn.Relu6Grad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReluGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReluGrad.pbtxt
new file mode 100644
index 0000000000..ec4a8b5f97
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReluGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReluGrad"
+  endpoint {
+    name: "nn.ReluGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RemoteCall.pbtxt b/tensorflow/core/api_def/java_api/api_def_RemoteCall.pbtxt
new file mode 100644
index 0000000000..a0ddb01784
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RemoteCall.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RemoteCall"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RemoteFusedGraphExecute.pbtxt b/tensorflow/core/api_def/java_api/api_def_RemoteFusedGraphExecute.pbtxt
new file mode 100644
index 0000000000..b73e633ef2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RemoteFusedGraphExecute.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "RemoteFusedGraphExecute"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RepeatDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_RepeatDataset.pbtxt
new file mode 100644
index 0000000000..871824f886
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RepeatDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RepeatDataset"
+  endpoint {
+    name: "data.RepeatDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RequantizationRange.pbtxt b/tensorflow/core/api_def/java_api/api_def_RequantizationRange.pbtxt
new file mode 100644
index 0000000000..d365e89925
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RequantizationRange.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RequantizationRange"
+  endpoint {
+    name: "quantization.RequantizationRange"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Requantize.pbtxt b/tensorflow/core/api_def/java_api/api_def_Requantize.pbtxt
new file mode 100644
index 0000000000..d397cde4db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Requantize.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Requantize"
+  endpoint {
+    name: "quantization.Requantize"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Reshape.pbtxt b/tensorflow/core/api_def/java_api/api_def_Reshape.pbtxt
new file mode 100644
index 0000000000..4bf3a409d1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Reshape.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Reshape"
+  endpoint {
+    name: "Reshape"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResizeArea.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResizeArea.pbtxt
new file mode 100644
index 0000000000..5358c18d4b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResizeArea.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResizeArea"
+  endpoint {
+    name: "image.ResizeArea"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResizeBicubic.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResizeBicubic.pbtxt
new file mode 100644
index 0000000000..0d0942e266
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResizeBicubic.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResizeBicubic"
+  endpoint {
+    name: "image.ResizeBicubic"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResizeBicubicGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResizeBicubicGrad.pbtxt
new file mode 100644
index 0000000000..12e61dc823
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResizeBicubicGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResizeBicubicGrad"
+  endpoint {
+    name: "image.ResizeBicubicGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResizeBilinear.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResizeBilinear.pbtxt
new file mode 100644
index 0000000000..ad123744a9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResizeBilinear.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResizeBilinear"
+  endpoint {
+    name: "image.ResizeBilinear"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResizeBilinearGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResizeBilinearGrad.pbtxt
new file mode 100644
index 0000000000..04f3e9f19e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResizeBilinearGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResizeBilinearGrad"
+  endpoint {
+    name: "image.ResizeBilinearGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighbor.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighbor.pbtxt
new file mode 100644
index 0000000000..86ad39a517
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighbor.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResizeNearestNeighbor"
+  endpoint {
+    name: "image.ResizeNearestNeighbor"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighborGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighborGrad.pbtxt
new file mode 100644
index 0000000000..70eeb906fa
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResizeNearestNeighborGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResizeNearestNeighborGrad"
+  endpoint {
+    name: "image.ResizeNearestNeighborGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdaMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdaMax.pbtxt
new file mode 100644
index 0000000000..ff57bd5849
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdaMax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyAdaMax"
+  endpoint {
+    name: "train.ResourceApplyAdaMax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdadelta.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdadelta.pbtxt
new file mode 100644
index 0000000000..d4369f0ead
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdadelta.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyAdadelta"
+  endpoint {
+    name: "train.ResourceApplyAdadelta"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagrad.pbtxt
new file mode 100644
index 0000000000..9bf7c20ee7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyAdagrad"
+  endpoint {
+    name: "train.ResourceApplyAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt
new file mode 100644
index 0000000000..904e434b8a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyAdagradDA"
+  endpoint {
+    name: "train.ResourceApplyAdagradDA"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdam.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdam.pbtxt
new file mode 100644
index 0000000000..390bd999c4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdam.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyAdam"
+  endpoint {
+    name: "train.ResourceApplyAdam"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAddSign.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAddSign.pbtxt
new file mode 100644
index 0000000000..bf944477be
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAddSign.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyAddSign"
+  endpoint {
+    name: "train.ResourceApplyAddSign"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt
new file mode 100644
index 0000000000..58027823e0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyCenteredRMSProp"
+  endpoint {
+    name: "train.ResourceApplyCenteredRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt
new file mode 100644
index 0000000000..db64e86fd4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyFtrl"
+  endpoint {
+    name: "train.ResourceApplyFtrl"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt
new file mode 100644
index 0000000000..547041e649
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyFtrlV2"
+  endpoint {
+    name: "train.ResourceApplyFtrlV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyGradientDescent.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyGradientDescent.pbtxt
new file mode 100644
index 0000000000..a54fed14d1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyGradientDescent.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyGradientDescent"
+  endpoint {
+    name: "train.ResourceApplyGradientDescent"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyMomentum.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyMomentum.pbtxt
new file mode 100644
index 0000000000..debb0a8131
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyMomentum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyMomentum"
+  endpoint {
+    name: "train.ResourceApplyMomentum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyPowerSign.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyPowerSign.pbtxt
new file mode 100644
index 0000000000..96df22c81f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyPowerSign.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyPowerSign"
+  endpoint {
+    name: "train.ResourceApplyPowerSign"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalAdagrad.pbtxt
new file mode 100644
index 0000000000..809b8b3af3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyProximalAdagrad"
+  endpoint {
+    name: "train.ResourceApplyProximalAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalGradientDescent.pbtxt
new file mode 100644
index 0000000000..c9ff5a499d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyProximalGradientDescent.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyProximalGradientDescent"
+  endpoint {
+    name: "train.ResourceApplyProximalGradientDescent"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt
new file mode 100644
index 0000000000..74a3d77531
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceApplyRMSProp"
+  endpoint {
+    name: "train.ResourceApplyRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceCountUpTo.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceCountUpTo.pbtxt
new file mode 100644
index 0000000000..439c1f1755
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceCountUpTo.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceCountUpTo"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceGather.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceGather.pbtxt
new file mode 100644
index 0000000000..79c6e8abbc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceGather.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceGather"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterAdd.pbtxt
new file mode 100644
index 0000000000..e4184e33bf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterAdd.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterAdd"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterDiv.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterDiv.pbtxt
new file mode 100644
index 0000000000..3e21c24a58
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterDiv.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterDiv"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterMax.pbtxt
new file mode 100644
index 0000000000..d25b14272d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterMax.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterMax"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterMin.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterMin.pbtxt
new file mode 100644
index 0000000000..6243cc1ae3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterMin.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterMin"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterMul.pbtxt
new file mode 100644
index 0000000000..393e5556c0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterMul.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterMul"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterNdAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterNdAdd.pbtxt
new file mode 100644
index 0000000000..2fd38f7be8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterNdAdd.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterNdAdd"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterNdUpdate.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterNdUpdate.pbtxt
new file mode 100644
index 0000000000..732de5f1cc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterNdUpdate.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterNdUpdate"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterSub.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterSub.pbtxt
new file mode 100644
index 0000000000..77081dda4d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterSub.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterSub"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceScatterUpdate.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceScatterUpdate.pbtxt
new file mode 100644
index 0000000000..9c2cc0ec21
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceScatterUpdate.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceScatterUpdate"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdadelta.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdadelta.pbtxt
new file mode 100644
index 0000000000..c6e3ae2219
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdadelta.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyAdadelta"
+  endpoint {
+    name: "train.ResourceSparseApplyAdadelta"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagrad.pbtxt
new file mode 100644
index 0000000000..5be4d6199b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyAdagrad"
+  endpoint {
+    name: "train.ResourceSparseApplyAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt
new file mode 100644
index 0000000000..bd66d64160
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyAdagradDA"
+  endpoint {
+    name: "train.ResourceSparseApplyAdagradDA"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt
new file mode 100644
index 0000000000..afda4adb0d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyCenteredRMSProp"
+  endpoint {
+    name: "train.ResourceSparseApplyCenteredRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt
new file mode 100644
index 0000000000..195198fc9a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyFtrl"
+  endpoint {
+    name: "train.ResourceSparseApplyFtrl"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt
new file mode 100644
index 0000000000..49c6d34dcc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyFtrlV2"
+  endpoint {
+    name: "train.ResourceSparseApplyFtrlV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyMomentum.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyMomentum.pbtxt
new file mode 100644
index 0000000000..7e00039e01
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyMomentum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyMomentum"
+  endpoint {
+    name: "train.ResourceSparseApplyMomentum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt
new file mode 100644
index 0000000000..04fe8504e5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyProximalAdagrad"
+  endpoint {
+    name: "train.ResourceSparseApplyProximalAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt
new file mode 100644
index 0000000000..27df43c9c5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyProximalGradientDescent"
+  endpoint {
+    name: "train.ResourceSparseApplyProximalGradientDescent"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt
new file mode 100644
index 0000000000..3e78f6f06d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ResourceSparseApplyRMSProp"
+  endpoint {
+    name: "train.ResourceSparseApplyRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceStridedSliceAssign.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceStridedSliceAssign.pbtxt
new file mode 100644
index 0000000000..83805389b9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceStridedSliceAssign.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ResourceStridedSliceAssign"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Restore.pbtxt b/tensorflow/core/api_def/java_api/api_def_Restore.pbtxt
new file mode 100644
index 0000000000..ca79fc10db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Restore.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Restore"
+  endpoint {
+    name: "train.Restore"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RestoreSlice.pbtxt b/tensorflow/core/api_def/java_api/api_def_RestoreSlice.pbtxt
new file mode 100644
index 0000000000..0728f5908b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RestoreSlice.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RestoreSlice"
+  endpoint {
+    name: "train.RestoreSlice"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt
new file mode 100644
index 0000000000..e877ff9869
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RestoreV2"
+  endpoint {
+    name: "train.RestoreV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt b/tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt
new file mode 100644
index 0000000000..2bd25417aa
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Reverse"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReverseSequence.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReverseSequence.pbtxt
new file mode 100644
index 0000000000..87638c0dcc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReverseSequence.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ReverseSequence"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ReverseV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReverseV2.pbtxt
new file mode 100644
index 0000000000..71efbe1892
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ReverseV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ReverseV2"
+  endpoint {
+    name: "Reverse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RightShift.pbtxt b/tensorflow/core/api_def/java_api/api_def_RightShift.pbtxt
new file mode 100644
index 0000000000..68fab3e8cf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RightShift.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RightShift"
+  endpoint {
+    name: "bitwise.RightShift"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Rint.pbtxt b/tensorflow/core/api_def/java_api/api_def_Rint.pbtxt
new file mode 100644
index 0000000000..48fbcc7c34
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Rint.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Rint"
+  endpoint {
+    name: "math.Rint"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Roll.pbtxt b/tensorflow/core/api_def/java_api/api_def_Roll.pbtxt
new file mode 100644
index 0000000000..50f7915a65
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Roll.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Roll"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Round.pbtxt b/tensorflow/core/api_def/java_api/api_def_Round.pbtxt
new file mode 100644
index 0000000000..dd612a33d6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Round.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Round"
+  endpoint {
+    name: "math.Round"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Rpc.pbtxt b/tensorflow/core/api_def/java_api/api_def_Rpc.pbtxt
new file mode 100644
index 0000000000..0d1e2b90e6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Rpc.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Rpc"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Rsqrt.pbtxt b/tensorflow/core/api_def/java_api/api_def_Rsqrt.pbtxt
new file mode 100644
index 0000000000..06b1b81ecd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Rsqrt.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Rsqrt"
+  endpoint {
+    name: "math.Rsqrt"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_RsqrtGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_RsqrtGrad.pbtxt
new file mode 100644
index 0000000000..88073b6f25
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_RsqrtGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "RsqrtGrad"
+  endpoint {
+    name: "math.RsqrtGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt
new file mode 100644
index 0000000000..e7c1c90ea6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SampleDistortedBoundingBox"
+  endpoint {
+    name: "image.SampleDistortedBoundingBox"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
new file mode 100644
index 0000000000..8656977bf6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SampleDistortedBoundingBoxV2"
+  endpoint {
+    name: "image.SampleDistortedBoundingBoxV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Save.pbtxt b/tensorflow/core/api_def/java_api/api_def_Save.pbtxt
new file mode 100644
index 0000000000..87dab6dc1f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Save.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Save"
+  endpoint {
+    name: "train.Save"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SaveSlices.pbtxt b/tensorflow/core/api_def/java_api/api_def_SaveSlices.pbtxt
new file mode 100644
index 0000000000..b336035680
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SaveSlices.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SaveSlices"
+  endpoint {
+    name: "train.SaveSlices"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt
new file mode 100644
index 0000000000..6417252f45
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SaveV2"
+  endpoint {
+    name: "train.SaveV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScalarSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScalarSummary.pbtxt
new file mode 100644
index 0000000000..c339ce0a7a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScalarSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ScalarSummary"
+  endpoint {
+    name: "summary.ScalarSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScanDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScanDataset.pbtxt
new file mode 100644
index 0000000000..89b63c53f7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScanDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ScanDataset"
+  endpoint {
+    name: "data.ScanDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterAdd.pbtxt
new file mode 100644
index 0000000000..41c63dc0a4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterAdd.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterAdd"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterDiv.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterDiv.pbtxt
new file mode 100644
index 0000000000..5754249eaf
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterDiv.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterDiv"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterMax.pbtxt
new file mode 100644
index 0000000000..aa6375cbd7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterMax.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterMax"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterMin.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterMin.pbtxt
new file mode 100644
index 0000000000..ea007120c3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterMin.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterMin"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterMul.pbtxt
new file mode 100644
index 0000000000..f1d91258e4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterMul.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterMul"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterNd.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterNd.pbtxt
new file mode 100644
index 0000000000..8ef01b2fca
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterNd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ScatterNd"
+  endpoint {
+    name: "ScatterNd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterNdAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterNdAdd.pbtxt
new file mode 100644
index 0000000000..bea152a9da
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterNdAdd.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterNdAdd"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterNdNonAliasingAdd.pbtxt
new file mode 100644
index 0000000000..4dd756bfc0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterNdNonAliasingAdd.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterNdNonAliasingAdd"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterNdSub.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterNdSub.pbtxt
new file mode 100644
index 0000000000..384e79d64e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterNdSub.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterNdSub"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterNdUpdate.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterNdUpdate.pbtxt
new file mode 100644
index 0000000000..92fce7f0ac
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterNdUpdate.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterNdUpdate"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterSub.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterSub.pbtxt
new file mode 100644
index 0000000000..5baaa4f604
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterSub.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterSub"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ScatterUpdate.pbtxt b/tensorflow/core/api_def/java_api/api_def_ScatterUpdate.pbtxt
new file mode 100644
index 0000000000..83ac128ed6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ScatterUpdate.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScatterUpdate"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SdcaFprint.pbtxt b/tensorflow/core/api_def/java_api/api_def_SdcaFprint.pbtxt
new file mode 100644
index 0000000000..ce179918cd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SdcaFprint.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SdcaFprint"
+  endpoint {
+    name: "train.SdcaFprint"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt
new file mode 100644
index 0000000000..68eb8cf1f0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SdcaOptimizer"
+  endpoint {
+    name: "train.SdcaOptimizer"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt
new file mode 100644
index 0000000000..4e90531060
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SdcaOptimizerV2"
+  endpoint {
+    name: "train.SdcaOptimizerV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SdcaShrinkL1.pbtxt b/tensorflow/core/api_def/java_api/api_def_SdcaShrinkL1.pbtxt
new file mode 100644
index 0000000000..83993bcf14
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SdcaShrinkL1.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SdcaShrinkL1"
+  endpoint {
+    name: "train.SdcaShrinkL1"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SegmentMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_SegmentMax.pbtxt
new file mode 100644
index 0000000000..6ac26c9e9e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SegmentMax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SegmentMax"
+  endpoint {
+    name: "math.SegmentMax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SegmentMean.pbtxt b/tensorflow/core/api_def/java_api/api_def_SegmentMean.pbtxt
new file mode 100644
index 0000000000..982db87bf0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SegmentMean.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SegmentMean"
+  endpoint {
+    name: "math.SegmentMean"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SegmentMin.pbtxt b/tensorflow/core/api_def/java_api/api_def_SegmentMin.pbtxt
new file mode 100644
index 0000000000..7a403b6c63
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SegmentMin.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SegmentMin"
+  endpoint {
+    name: "math.SegmentMin"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SegmentProd.pbtxt b/tensorflow/core/api_def/java_api/api_def_SegmentProd.pbtxt
new file mode 100644
index 0000000000..1bf280edc4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SegmentProd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SegmentProd"
+  endpoint {
+    name: "math.SegmentProd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SegmentSum.pbtxt b/tensorflow/core/api_def/java_api/api_def_SegmentSum.pbtxt
new file mode 100644
index 0000000000..3dcbc35225
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SegmentSum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SegmentSum"
+  endpoint {
+    name: "math.SegmentSum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Select.pbtxt b/tensorflow/core/api_def/java_api/api_def_Select.pbtxt
new file mode 100644
index 0000000000..eeff79284c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Select.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Select"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt
new file mode 100644
index 0000000000..552d397ad8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SelfAdjointEig"
+  endpoint {
+    name: "linalg.SelfAdjointEig"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt
new file mode 100644
index 0000000000..4b610f437c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SelfAdjointEigV2"
+  endpoint {
+    name: "linalg.SelfAdjointEigV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Selu.pbtxt b/tensorflow/core/api_def/java_api/api_def_Selu.pbtxt
new file mode 100644
index 0000000000..7002d5be12
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Selu.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Selu"
+  endpoint {
+    name: "nn.Selu"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SeluGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SeluGrad.pbtxt
new file mode 100644
index 0000000000..b34e2f2239
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SeluGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SeluGrad"
+  endpoint {
+    name: "nn.SeluGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SerializeIterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_SerializeIterator.pbtxt
new file mode 100644
index 0000000000..37789c753b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SerializeIterator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SerializeIterator"
+  endpoint {
+    name: "data.SerializeIterator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SerializeManySparse.pbtxt b/tensorflow/core/api_def/java_api/api_def_SerializeManySparse.pbtxt
new file mode 100644
index 0000000000..567a8e4b7f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SerializeManySparse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SerializeManySparse"
+  endpoint {
+    name: "io.SerializeManySparse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SerializeSparse.pbtxt b/tensorflow/core/api_def/java_api/api_def_SerializeSparse.pbtxt
new file mode 100644
index 0000000000..ad86c7095c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SerializeSparse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SerializeSparse"
+  endpoint {
+    name: "io.SerializeSparse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SerializeTensor.pbtxt b/tensorflow/core/api_def/java_api/api_def_SerializeTensor.pbtxt
new file mode 100644
index 0000000000..169120a063
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SerializeTensor.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SerializeTensor"
+  endpoint {
+    name: "io.SerializeTensor"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SetSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_SetSize.pbtxt
new file mode 100644
index 0000000000..1c000e9c8a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SetSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "SetSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SetStatsAggregatorDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_SetStatsAggregatorDataset.pbtxt
new file mode 100644
index 0000000000..f57abe5a66
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SetStatsAggregatorDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SetStatsAggregatorDataset"
+  endpoint {
+    name: "data.SetStatsAggregatorDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Shape.pbtxt b/tensorflow/core/api_def/java_api/api_def_Shape.pbtxt
new file mode 100644
index 0000000000..5074000b53
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Shape.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Shape"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ShapeN.pbtxt b/tensorflow/core/api_def/java_api/api_def_ShapeN.pbtxt
new file mode 100644
index 0000000000..b4bfb91118
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ShapeN.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ShapeN"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ShardedFilename.pbtxt b/tensorflow/core/api_def/java_api/api_def_ShardedFilename.pbtxt
new file mode 100644
index 0000000000..8efd0afb8b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ShardedFilename.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ShardedFilename"
+  endpoint {
+    name: "io.ShardedFilename"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ShardedFilespec.pbtxt b/tensorflow/core/api_def/java_api/api_def_ShardedFilespec.pbtxt
new file mode 100644
index 0000000000..e31cac1040
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ShardedFilespec.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ShardedFilespec"
+  endpoint {
+    name: "io.ShardedFilespec"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ShuffleAndRepeatDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ShuffleAndRepeatDataset.pbtxt
new file mode 100644
index 0000000000..543d5a109c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ShuffleAndRepeatDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ShuffleAndRepeatDataset"
+  endpoint {
+    name: "data.ShuffleAndRepeatDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ShuffleDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ShuffleDataset.pbtxt
new file mode 100644
index 0000000000..36f4979c96
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ShuffleDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ShuffleDataset"
+  endpoint {
+    name: "data.ShuffleDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Sigmoid.pbtxt b/tensorflow/core/api_def/java_api/api_def_Sigmoid.pbtxt
new file mode 100644
index 0000000000..b6e16a4110
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Sigmoid.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Sigmoid"
+  endpoint {
+    name: "math.Sigmoid"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SigmoidGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SigmoidGrad.pbtxt
new file mode 100644
index 0000000000..bb141abe3b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SigmoidGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SigmoidGrad"
+  endpoint {
+    name: "math.SigmoidGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Sign.pbtxt b/tensorflow/core/api_def/java_api/api_def_Sign.pbtxt
new file mode 100644
index 0000000000..435fb9e825
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Sign.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Sign"
+  endpoint {
+    name: "math.Sign"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Sin.pbtxt b/tensorflow/core/api_def/java_api/api_def_Sin.pbtxt
new file mode 100644
index 0000000000..2fc95755ba
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Sin.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Sin"
+  endpoint {
+    name: "math.Sin"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Sinh.pbtxt b/tensorflow/core/api_def/java_api/api_def_Sinh.pbtxt
new file mode 100644
index 0000000000..f343685e80
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Sinh.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Sinh"
+  endpoint {
+    name: "math.Sinh"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Size.pbtxt b/tensorflow/core/api_def/java_api/api_def_Size.pbtxt
new file mode 100644
index 0000000000..a41cddd8ac
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Size.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Size"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SkipDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_SkipDataset.pbtxt
new file mode 100644
index 0000000000..39bce67a3e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SkipDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SkipDataset"
+  endpoint {
+    name: "data.SkipDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Skipgram.pbtxt b/tensorflow/core/api_def/java_api/api_def_Skipgram.pbtxt
new file mode 100644
index 0000000000..d095c7b61b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Skipgram.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Skipgram"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Slice.pbtxt b/tensorflow/core/api_def/java_api/api_def_Slice.pbtxt
new file mode 100644
index 0000000000..adfe6fa4fd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Slice.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Slice"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SlideDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_SlideDataset.pbtxt
new file mode 100644
index 0000000000..bc284c2833
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SlideDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SlideDataset"
+  endpoint {
+    name: "data.SlideDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Snapshot.pbtxt b/tensorflow/core/api_def/java_api/api_def_Snapshot.pbtxt
new file mode 100644
index 0000000000..6e49c1a543
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Snapshot.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Snapshot"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Softmax.pbtxt b/tensorflow/core/api_def/java_api/api_def_Softmax.pbtxt
new file mode 100644
index 0000000000..cb27a04d21
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Softmax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Softmax"
+  endpoint {
+    name: "nn.Softmax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SoftmaxCrossEntropyWithLogits.pbtxt b/tensorflow/core/api_def/java_api/api_def_SoftmaxCrossEntropyWithLogits.pbtxt
new file mode 100644
index 0000000000..e064562c0f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SoftmaxCrossEntropyWithLogits.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SoftmaxCrossEntropyWithLogits"
+  endpoint {
+    name: "nn.SoftmaxCrossEntropyWithLogits"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Softplus.pbtxt b/tensorflow/core/api_def/java_api/api_def_Softplus.pbtxt
new file mode 100644
index 0000000000..671656c287
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Softplus.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Softplus"
+  endpoint {
+    name: "math.Softplus"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SoftplusGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SoftplusGrad.pbtxt
new file mode 100644
index 0000000000..d1b074a9b7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SoftplusGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SoftplusGrad"
+  endpoint {
+    name: "math.SoftplusGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Softsign.pbtxt b/tensorflow/core/api_def/java_api/api_def_Softsign.pbtxt
new file mode 100644
index 0000000000..23aa1e3c58
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Softsign.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Softsign"
+  endpoint {
+    name: "nn.Softsign"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SoftsignGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SoftsignGrad.pbtxt
new file mode 100644
index 0000000000..73faf74511
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SoftsignGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SoftsignGrad"
+  endpoint {
+    name: "nn.SoftsignGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SpaceToBatch.pbtxt b/tensorflow/core/api_def/java_api/api_def_SpaceToBatch.pbtxt
new file mode 100644
index 0000000000..4fe7232e54
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SpaceToBatch.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SpaceToBatch"
+  endpoint {
+    name: "nn.SpaceToBatch"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SpaceToBatchND.pbtxt b/tensorflow/core/api_def/java_api/api_def_SpaceToBatchND.pbtxt
new file mode 100644
index 0000000000..6612b48286
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SpaceToBatchND.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SpaceToBatchND"
+  endpoint {
+    name: "SpaceToBatchNd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SpaceToDepth.pbtxt b/tensorflow/core/api_def/java_api/api_def_SpaceToDepth.pbtxt
new file mode 100644
index 0000000000..cb421c75db
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SpaceToDepth.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SpaceToDepth"
+  endpoint {
+    name: "nn.SpaceToDepth"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseAccumulatorApplyGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseAccumulatorApplyGradient.pbtxt
new file mode 100644
index 0000000000..cdb0b14b67
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseAccumulatorApplyGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseAccumulatorApplyGradient"
+  endpoint {
+    name: "sparse.SparseAccumulatorApplyGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseAccumulatorTakeGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseAccumulatorTakeGradient.pbtxt
new file mode 100644
index 0000000000..8fc1e70959
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseAccumulatorTakeGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseAccumulatorTakeGradient"
+  endpoint {
+    name: "sparse.SparseAccumulatorTakeGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseAdd.pbtxt
new file mode 100644
index 0000000000..0761f2ed16
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseAdd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseAdd"
+  endpoint {
+    name: "sparse.SparseAdd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseAddGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseAddGrad.pbtxt
new file mode 100644
index 0000000000..6529c46a17
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseAddGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseAddGrad"
+  endpoint {
+    name: "sparse.SparseAddGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyAdadelta.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdadelta.pbtxt
new file mode 100644
index 0000000000..7122f210a4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdadelta.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyAdadelta"
+  endpoint {
+    name: "train.SparseApplyAdadelta"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagrad.pbtxt
new file mode 100644
index 0000000000..184a8cfb2f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyAdagrad"
+  endpoint {
+    name: "train.SparseApplyAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt
new file mode 100644
index 0000000000..56d9b1c49b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyAdagradDA"
+  endpoint {
+    name: "train.SparseApplyAdagradDA"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt
new file mode 100644
index 0000000000..fa35db29b0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyCenteredRMSProp"
+  endpoint {
+    name: "train.SparseApplyCenteredRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt
new file mode 100644
index 0000000000..bb669aa773
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyFtrl"
+  endpoint {
+    name: "train.SparseApplyFtrl"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt
new file mode 100644
index 0000000000..673f71c34b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyFtrlV2"
+  endpoint {
+    name: "train.SparseApplyFtrlV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyMomentum.pbtxt
new file mode 100644
index 0000000000..f7e79c5e7e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyMomentum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyMomentum"
+  endpoint {
+    name: "train.SparseApplyMomentum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyProximalAdagrad.pbtxt
new file mode 100644
index 0000000000..8ac6cf771c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyProximalAdagrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyProximalAdagrad"
+  endpoint {
+    name: "train.SparseApplyProximalAdagrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyProximalGradientDescent.pbtxt
new file mode 100644
index 0000000000..926ed2c1d4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyProximalGradientDescent.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyProximalGradientDescent"
+  endpoint {
+    name: "train.SparseApplyProximalGradientDescent"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt
new file mode 100644
index 0000000000..94646093af
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseApplyRMSProp"
+  endpoint {
+    name: "train.SparseApplyRMSProp"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseConcat.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseConcat.pbtxt
new file mode 100644
index 0000000000..8ceb600a42
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseConcat.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseConcat"
+  endpoint {
+    name: "sparse.SparseConcat"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseConditionalAccumulator.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseConditionalAccumulator.pbtxt
new file mode 100644
index 0000000000..3dc2c1ea8a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseConditionalAccumulator.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseConditionalAccumulator"
+  endpoint {
+    name: "sparse.SparseConditionalAccumulator"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseCross.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseCross.pbtxt
new file mode 100644
index 0000000000..130f333d35
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseCross.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseCross"
+  endpoint {
+    name: "sparse.SparseCross"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseAdd.pbtxt
new file mode 100644
index 0000000000..013b7eede9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseAdd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseDenseCwiseAdd"
+  endpoint {
+    name: "sparse.SparseDenseCwiseAdd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseDiv.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseDiv.pbtxt
new file mode 100644
index 0000000000..8cf56d7b41
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseDiv.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseDenseCwiseDiv"
+  endpoint {
+    name: "sparse.SparseDenseCwiseDiv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseMul.pbtxt
new file mode 100644
index 0000000000..37c50f2327
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseDenseCwiseMul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseDenseCwiseMul"
+  endpoint {
+    name: "sparse.SparseDenseCwiseMul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRows.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRows.pbtxt
new file mode 100644
index 0000000000..3adddbd34b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRows.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseFillEmptyRows"
+  endpoint {
+    name: "sparse.SparseFillEmptyRows"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRowsGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRowsGrad.pbtxt
new file mode 100644
index 0000000000..708069d028
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseFillEmptyRowsGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseFillEmptyRowsGrad"
+  endpoint {
+    name: "sparse.SparseFillEmptyRowsGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseMatMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseMatMul.pbtxt
new file mode 100644
index 0000000000..445d53b023
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseMatMul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseMatMul"
+  endpoint {
+    name: "sparse.SparseMatMul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseReduceMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseReduceMax.pbtxt
new file mode 100644
index 0000000000..a7467b9b47
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseReduceMax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseReduceMax"
+  endpoint {
+    name: "sparse.SparseReduceMax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseReduceMaxSparse.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseReduceMaxSparse.pbtxt
new file mode 100644
index 0000000000..987e98467e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseReduceMaxSparse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseReduceMaxSparse"
+  endpoint {
+    name: "sparse.SparseReduceMaxSparse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseReduceSum.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseReduceSum.pbtxt
new file mode 100644
index 0000000000..739fb5fb95
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseReduceSum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseReduceSum"
+  endpoint {
+    name: "sparse.SparseReduceSum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseReduceSumSparse.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseReduceSumSparse.pbtxt
new file mode 100644
index 0000000000..64d6d45f1a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseReduceSumSparse.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseReduceSumSparse"
+  endpoint {
+    name: "sparse.SparseReduceSumSparse"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseReorder.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseReorder.pbtxt
new file mode 100644
index 0000000000..202066e76f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseReorder.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseReorder"
+  endpoint {
+    name: "sparse.SparseReorder"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseReshape.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseReshape.pbtxt
new file mode 100644
index 0000000000..0a393a6105
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseReshape.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseReshape"
+  endpoint {
+    name: "sparse.SparseReshape"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentMean.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentMean.pbtxt
new file mode 100644
index 0000000000..9187dbed67
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentMean.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentMean"
+  endpoint {
+    name: "sparse.SparseSegmentMean"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanGrad.pbtxt
new file mode 100644
index 0000000000..1b7d5bbcf0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentMeanGrad"
+  endpoint {
+    name: "sparse.SparseSegmentMeanGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
new file mode 100644
index 0000000000..bb3ac2256d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentMeanWithNumSegments"
+  endpoint {
+    name: "sparse.SparseSegmentMeanWithNumSegments"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtN.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtN.pbtxt
new file mode 100644
index 0000000000..2cecff503f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtN.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentSqrtN"
+  endpoint {
+    name: "sparse.SparseSegmentSqrtN"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNGrad.pbtxt
new file mode 100644
index 0000000000..e6973eb773
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentSqrtNGrad"
+  endpoint {
+    name: "sparse.SparseSegmentSqrtNGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
new file mode 100644
index 0000000000..78aed85f0a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentSqrtNWithNumSegments"
+  endpoint {
+    name: "sparse.SparseSegmentSqrtNWithNumSegments"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentSum.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSum.pbtxt
new file mode 100644
index 0000000000..f18f351667
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentSum"
+  endpoint {
+    name: "sparse.SparseSegmentSum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSegmentSumWithNumSegments.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
new file mode 100644
index 0000000000..8e384857cc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSegmentSumWithNumSegments.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSegmentSumWithNumSegments"
+  endpoint {
+    name: "sparse.SparseSegmentSumWithNumSegments"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSlice.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSlice.pbtxt
new file mode 100644
index 0000000000..1d794df925
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSlice.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSlice"
+  endpoint {
+    name: "sparse.SparseSlice"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSliceGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSliceGrad.pbtxt
new file mode 100644
index 0000000000..979326c0fc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSliceGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSliceGrad"
+  endpoint {
+    name: "sparse.SparseSliceGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSoftmax.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSoftmax.pbtxt
new file mode 100644
index 0000000000..56c96640cb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSoftmax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSoftmax"
+  endpoint {
+    name: "sparse.SparseSoftmax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSoftmaxCrossEntropyWithLogits.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSoftmaxCrossEntropyWithLogits.pbtxt
new file mode 100644
index 0000000000..7627d5f607
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSoftmaxCrossEntropyWithLogits.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSoftmaxCrossEntropyWithLogits"
+  endpoint {
+    name: "nn.SparseSoftmaxCrossEntropyWithLogits"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSparseMaximum.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSparseMaximum.pbtxt
new file mode 100644
index 0000000000..3143096213
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSparseMaximum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSparseMaximum"
+  endpoint {
+    name: "sparse.SparseSparseMaximum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSparseMinimum.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSparseMinimum.pbtxt
new file mode 100644
index 0000000000..fc04bb4fed
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSparseMinimum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSparseMinimum"
+  endpoint {
+    name: "sparse.SparseSparseMinimum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseSplit.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseSplit.pbtxt
new file mode 100644
index 0000000000..0afc95199a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseSplit.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseSplit"
+  endpoint {
+    name: "sparse.SparseSplit"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseTensorDenseAdd.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseTensorDenseAdd.pbtxt
new file mode 100644
index 0000000000..fc5c882f83
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseTensorDenseAdd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseTensorDenseAdd"
+  endpoint {
+    name: "sparse.SparseTensorDenseAdd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseTensorDenseMatMul.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseTensorDenseMatMul.pbtxt
new file mode 100644
index 0000000000..cd1f1e09e9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseTensorDenseMatMul.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseTensorDenseMatMul"
+  endpoint {
+    name: "sparse.SparseTensorDenseMatMul"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseTensorSliceDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseTensorSliceDataset.pbtxt
new file mode 100644
index 0000000000..bb0d1d7a94
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseTensorSliceDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseTensorSliceDataset"
+  endpoint {
+    name: "data.SparseTensorSliceDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseToDense.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseToDense.pbtxt
new file mode 100644
index 0000000000..68df155e46
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseToDense.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseToDense"
+  endpoint {
+    name: "sparse.SparseToDense"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseToSparseSetOperation.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseToSparseSetOperation.pbtxt
new file mode 100644
index 0000000000..fb04366fea
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SparseToSparseSetOperation.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SparseToSparseSetOperation"
+  endpoint {
+    name: "sparse.SparseToSparseSetOperation"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Split.pbtxt b/tensorflow/core/api_def/java_api/api_def_Split.pbtxt
new file mode 100644
index 0000000000..ffb7b52e09
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Split.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Split"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SplitV.pbtxt b/tensorflow/core/api_def/java_api/api_def_SplitV.pbtxt
new file mode 100644
index 0000000000..94f4a08d70
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SplitV.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "SplitV"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SqlDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_SqlDataset.pbtxt
new file mode 100644
index 0000000000..8764e81af2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SqlDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SqlDataset"
+  endpoint {
+    name: "data.SqlDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Sqrt.pbtxt b/tensorflow/core/api_def/java_api/api_def_Sqrt.pbtxt
new file mode 100644
index 0000000000..daa2a1ee86
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Sqrt.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Sqrt"
+  endpoint {
+    name: "math.Sqrt"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SqrtGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_SqrtGrad.pbtxt
new file mode 100644
index 0000000000..8eca1b89b3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SqrtGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SqrtGrad"
+  endpoint {
+    name: "math.SqrtGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Square.pbtxt b/tensorflow/core/api_def/java_api/api_def_Square.pbtxt
new file mode 100644
index 0000000000..561e10e020
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Square.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Square"
+  endpoint {
+    name: "math.Square"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SquaredDifference.pbtxt b/tensorflow/core/api_def/java_api/api_def_SquaredDifference.pbtxt
new file mode 100644
index 0000000000..752dbcce7b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SquaredDifference.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SquaredDifference"
+  endpoint {
+    name: "math.SquaredDifference"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Squeeze.pbtxt b/tensorflow/core/api_def/java_api/api_def_Squeeze.pbtxt
new file mode 100644
index 0000000000..264c384603
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Squeeze.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Squeeze"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Stack.pbtxt b/tensorflow/core/api_def/java_api/api_def_Stack.pbtxt
new file mode 100644
index 0000000000..522e5efec5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Stack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Stack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt
new file mode 100644
index 0000000000..1cba682de9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StackClose"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt
new file mode 100644
index 0000000000..be5a0f535a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StackCloseV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt
new file mode 100644
index 0000000000..4439e73843
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StackPop"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt
new file mode 100644
index 0000000000..b8babb4c56
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StackPopV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt
new file mode 100644
index 0000000000..41792e327f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StackPush"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt
new file mode 100644
index 0000000000..4920152259
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StackPushV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt
new file mode 100644
index 0000000000..5232e0425e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StackV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Stage.pbtxt b/tensorflow/core/api_def/java_api/api_def_Stage.pbtxt
new file mode 100644
index 0000000000..87e0c7d981
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Stage.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Stage"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StageClear.pbtxt b/tensorflow/core/api_def/java_api/api_def_StageClear.pbtxt
new file mode 100644
index 0000000000..26890a55b3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StageClear.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StageClear"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StagePeek.pbtxt b/tensorflow/core/api_def/java_api/api_def_StagePeek.pbtxt
new file mode 100644
index 0000000000..7c3ed3dc91
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StagePeek.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StagePeek"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StageSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_StageSize.pbtxt
new file mode 100644
index 0000000000..d8188c3e0b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StageSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StageSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatefulPartitionedCall.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatefulPartitionedCall.pbtxt
new file mode 100644
index 0000000000..2e6decf19a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatefulPartitionedCall.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StatefulPartitionedCall"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatelessIf.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatelessIf.pbtxt
new file mode 100644
index 0000000000..37c7b9a962
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatelessIf.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StatelessIf"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatelessMultinomial.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatelessMultinomial.pbtxt
new file mode 100644
index 0000000000..8cfbbfb2c2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatelessMultinomial.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StatelessMultinomial"
+  endpoint {
+    name: "random.StatelessMultinomial"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatelessRandomNormal.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatelessRandomNormal.pbtxt
new file mode 100644
index 0000000000..4e648cfa69
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatelessRandomNormal.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StatelessRandomNormal"
+  endpoint {
+    name: "random.StatelessRandomNormal"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatelessRandomUniform.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatelessRandomUniform.pbtxt
new file mode 100644
index 0000000000..6f8f328b41
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatelessRandomUniform.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StatelessRandomUniform"
+  endpoint {
+    name: "random.StatelessRandomUniform"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatelessRandomUniformInt.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatelessRandomUniformInt.pbtxt
new file mode 100644
index 0000000000..2fec4a7cc6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatelessRandomUniformInt.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StatelessRandomUniformInt"
+  endpoint {
+    name: "random.StatelessRandomUniformInt"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatelessTruncatedNormal.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatelessTruncatedNormal.pbtxt
new file mode 100644
index 0000000000..c05071dd73
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatelessTruncatedNormal.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StatelessTruncatedNormal"
+  endpoint {
+    name: "random.StatelessTruncatedNormal"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatelessWhile.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatelessWhile.pbtxt
new file mode 100644
index 0000000000..1a4252c90a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatelessWhile.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StatelessWhile"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StaticRegexFullMatch.pbtxt b/tensorflow/core/api_def/java_api/api_def_StaticRegexFullMatch.pbtxt
new file mode 100644
index 0000000000..66b841ad74
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StaticRegexFullMatch.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StaticRegexFullMatch"
+  endpoint {
+    name: "strings.StaticRegexFullMatch"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StaticRegexReplace.pbtxt b/tensorflow/core/api_def/java_api/api_def_StaticRegexReplace.pbtxt
new file mode 100644
index 0000000000..305bc8a3ca
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StaticRegexReplace.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StaticRegexReplace"
+  endpoint {
+    name: "strings.StaticRegexReplace"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatsAggregatorHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatsAggregatorHandle.pbtxt
new file mode 100644
index 0000000000..301dc982e3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatsAggregatorHandle.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StatsAggregatorHandle"
+  endpoint {
+    name: "data.StatsAggregatorHandle"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StatsAggregatorSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_StatsAggregatorSummary.pbtxt
new file mode 100644
index 0000000000..f83c8f5a4f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StatsAggregatorSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StatsAggregatorSummary"
+  endpoint {
+    name: "summary.StatsAggregatorSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StopGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_StopGradient.pbtxt
new file mode 100644
index 0000000000..ad8473e816
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StopGradient.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StopGradient"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StridedSlice.pbtxt b/tensorflow/core/api_def/java_api/api_def_StridedSlice.pbtxt
new file mode 100644
index 0000000000..b0c301dab1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StridedSlice.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StridedSlice"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StridedSliceAssign.pbtxt b/tensorflow/core/api_def/java_api/api_def_StridedSliceAssign.pbtxt
new file mode 100644
index 0000000000..6850dc2d1b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StridedSliceAssign.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StridedSliceAssign"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StridedSliceGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_StridedSliceGrad.pbtxt
new file mode 100644
index 0000000000..b03204bcc4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StridedSliceGrad.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "StridedSliceGrad"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringFormat.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringFormat.pbtxt
new file mode 100644
index 0000000000..cdd0313996
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringFormat.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringFormat"
+  endpoint {
+    name: "strings.StringFormat"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringJoin.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringJoin.pbtxt
new file mode 100644
index 0000000000..b43ff157cd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringJoin.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringJoin"
+  endpoint {
+    name: "strings.Join"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringLength.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringLength.pbtxt
new file mode 100644
index 0000000000..c8eb48cc3c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringLength.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringLength"
+  endpoint {
+    name: "strings.StringLength"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt
new file mode 100644
index 0000000000..f16e196a6f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringSplit"
+  endpoint {
+    name: "strings.StringSplit"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt
new file mode 100644
index 0000000000..0779eb7f71
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringSplitV2"
+  endpoint {
+    name: "strings.StringSplitV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringStrip.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringStrip.pbtxt
new file mode 100644
index 0000000000..01691211ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringStrip.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringStrip"
+  endpoint {
+    name: "strings.Strip"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringToHashBucket.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringToHashBucket.pbtxt
new file mode 100644
index 0000000000..765517578d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringToHashBucket.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringToHashBucket"
+  endpoint {
+    name: "strings.ToHashBucket"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringToHashBucketFast.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringToHashBucketFast.pbtxt
new file mode 100644
index 0000000000..de08bc2d36
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringToHashBucketFast.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringToHashBucketFast"
+  endpoint {
+    name: "strings.ToHashBucketFast"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringToHashBucketStrong.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringToHashBucketStrong.pbtxt
new file mode 100644
index 0000000000..15b9138238
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringToHashBucketStrong.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringToHashBucketStrong"
+  endpoint {
+    name: "strings.ToHashBucketStrong"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_StringToNumber.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringToNumber.pbtxt
new file mode 100644
index 0000000000..196f694da5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_StringToNumber.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "StringToNumber"
+  endpoint {
+    name: "strings.ToNumber"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Sub.pbtxt b/tensorflow/core/api_def/java_api/api_def_Sub.pbtxt
new file mode 100644
index 0000000000..e79c6a0036
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Sub.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Sub"
+  endpoint {
+    name: "math.Sub"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Substr.pbtxt b/tensorflow/core/api_def/java_api/api_def_Substr.pbtxt
new file mode 100644
index 0000000000..78f34550a5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Substr.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Substr"
+  endpoint {
+    name: "strings.Substr"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Sum.pbtxt b/tensorflow/core/api_def/java_api/api_def_Sum.pbtxt
new file mode 100644
index 0000000000..3999fa6ed1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Sum.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Sum"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_SummaryWriter.pbtxt b/tensorflow/core/api_def/java_api/api_def_SummaryWriter.pbtxt
new file mode 100644
index 0000000000..8338c0fa18
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SummaryWriter.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SummaryWriter"
+  endpoint {
+    name: "summary.SummaryWriter"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Svd.pbtxt b/tensorflow/core/api_def/java_api/api_def_Svd.pbtxt
new file mode 100644
index 0000000000..889d416281
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Svd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Svd"
+  endpoint {
+    name: "linalg.Svd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Switch.pbtxt b/tensorflow/core/api_def/java_api/api_def_Switch.pbtxt
index 0d3362a91e..edd9255452 100644
--- a/tensorflow/core/api_def/java_api/api_def_Switch.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Switch.pbtxt
@@ -1,4 +1,6 @@
 op {
-  graph_op_name: "Switch" #TODO(karllessard) escape that reserved name
-  visibility: HIDDEN
+  graph_op_name: "Switch"
+  endpoint {
+    name: "SwitchCond"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SymbolicGradient.pbtxt b/tensorflow/core/api_def/java_api/api_def_SymbolicGradient.pbtxt
new file mode 100644
index 0000000000..6c6e68ae74
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_SymbolicGradient.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "SymbolicGradient"
+  endpoint {
+    name: "train.SymbolicGradient"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt
new file mode 100644
index 0000000000..76c077f452
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TFRecordDataset"
+  endpoint {
+    name: "data.TFRecordDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
new file mode 100644
index 0000000000..72d02db279
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TFRecordReader"
+  endpoint {
+    name: "io.TFRecordReader"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
new file mode 100644
index 0000000000..beb7f58862
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TFRecordReaderV2"
+  endpoint {
+    name: "io.TFRecordReaderV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TakeDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_TakeDataset.pbtxt
new file mode 100644
index 0000000000..2b0c0544fb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TakeDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TakeDataset"
+  endpoint {
+    name: "data.TakeDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TakeManySparseFromTensorsMap.pbtxt b/tensorflow/core/api_def/java_api/api_def_TakeManySparseFromTensorsMap.pbtxt
new file mode 100644
index 0000000000..620e9fb012
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TakeManySparseFromTensorsMap.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TakeManySparseFromTensorsMap"
+  endpoint {
+    name: "sparse.TakeManySparseFromTensorsMap"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Tan.pbtxt b/tensorflow/core/api_def/java_api/api_def_Tan.pbtxt
new file mode 100644
index 0000000000..560ca546b7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Tan.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Tan"
+  endpoint {
+    name: "math.Tan"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Tanh.pbtxt b/tensorflow/core/api_def/java_api/api_def_Tanh.pbtxt
new file mode 100644
index 0000000000..90e441808f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Tanh.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Tanh"
+  endpoint {
+    name: "math.Tanh"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TanhGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_TanhGrad.pbtxt
new file mode 100644
index 0000000000..c0837d0487
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TanhGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TanhGrad"
+  endpoint {
+    name: "math.TanhGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TemporaryVariable.pbtxt b/tensorflow/core/api_def/java_api/api_def_TemporaryVariable.pbtxt
new file mode 100644
index 0000000000..8ce760f9c9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TemporaryVariable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TemporaryVariable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt
new file mode 100644
index 0000000000..7eaa468130
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArray"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt
new file mode 100644
index 0000000000..e866250d3a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayClose"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt
new file mode 100644
index 0000000000..5add953e66
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayCloseV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt
new file mode 100644
index 0000000000..b881089718
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayCloseV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt
new file mode 100644
index 0000000000..e72b58de1c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayConcat"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt
new file mode 100644
index 0000000000..abc3e60f0c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayConcatV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt
new file mode 100644
index 0000000000..d1a91e90d7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayConcatV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt
new file mode 100644
index 0000000000..d4d179874f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayGather"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt
new file mode 100644
index 0000000000..dd94fc4ef5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayGatherV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt
new file mode 100644
index 0000000000..b8f01e6ac1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayGatherV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt
new file mode 100644
index 0000000000..517461edba
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayGrad"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt
new file mode 100644
index 0000000000..b7278b5ffa
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayGradV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt
new file mode 100644
index 0000000000..b7c3f143ef
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayGradV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGradWithShape.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradWithShape.pbtxt
new file mode 100644
index 0000000000..d6804fb697
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradWithShape.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayGradWithShape"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayPack.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayPack.pbtxt
new file mode 100644
index 0000000000..030950b06f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayPack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayPack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt
new file mode 100644
index 0000000000..1b62f7fac7
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayRead"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt
new file mode 100644
index 0000000000..ba02f61028
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayReadV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt
new file mode 100644
index 0000000000..c30428c23a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayReadV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt
new file mode 100644
index 0000000000..a3e8d1625e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayScatter"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt
new file mode 100644
index 0000000000..38080410e6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayScatterV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt
new file mode 100644
index 0000000000..6f42524af8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayScatterV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt
new file mode 100644
index 0000000000..fb3a6fae1c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArraySize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt
new file mode 100644
index 0000000000..03bf061f8b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArraySizeV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt
new file mode 100644
index 0000000000..0c14360641
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArraySizeV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt
new file mode 100644
index 0000000000..3eb8d6c7ff
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArraySplit"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt
new file mode 100644
index 0000000000..34740aa2ef
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArraySplitV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt
new file mode 100644
index 0000000000..4b22f2bdf6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArraySplitV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayUnpack.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayUnpack.pbtxt
new file mode 100644
index 0000000000..a9011de23e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayUnpack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayUnpack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt
new file mode 100644
index 0000000000..d18517725e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt
new file mode 100644
index 0000000000..06e65ef93c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt
new file mode 100644
index 0000000000..92ab1764ec
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayWrite"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt
new file mode 100644
index 0000000000..10c505cff4
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayWriteV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt
new file mode 100644
index 0000000000..2fe2d02127
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorArrayWriteV3"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorDataset.pbtxt
new file mode 100644
index 0000000000..ed0ead6e7a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TensorDataset"
+  endpoint {
+    name: "data.TensorDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorForestCreateTreeVariable.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorForestCreateTreeVariable.pbtxt
new file mode 100644
index 0000000000..8e2410a0dd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorForestCreateTreeVariable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorForestCreateTreeVariable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorForestTreeDeserialize.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeDeserialize.pbtxt
new file mode 100644
index 0000000000..724bdb282d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeDeserialize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorForestTreeDeserialize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorForestTreeIsInitializedOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeIsInitializedOp.pbtxt
new file mode 100644
index 0000000000..7e93af8508
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeIsInitializedOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorForestTreeIsInitializedOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorForestTreePredict.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorForestTreePredict.pbtxt
new file mode 100644
index 0000000000..73770fa291
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorForestTreePredict.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorForestTreePredict"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorForestTreeResourceHandleOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeResourceHandleOp.pbtxt
new file mode 100644
index 0000000000..c2ef0ee5dd
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeResourceHandleOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorForestTreeResourceHandleOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorForestTreeSerialize.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeSerialize.pbtxt
new file mode 100644
index 0000000000..d10f9e632b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeSerialize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorForestTreeSerialize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorForestTreeSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeSize.pbtxt
new file mode 100644
index 0000000000..9d81f1ea8c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorForestTreeSize.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorForestTreeSize"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListConcatLists.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListConcatLists.pbtxt
new file mode 100644
index 0000000000..3fa6265e10
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListConcatLists.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListConcatLists"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListElementShape.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListElementShape.pbtxt
new file mode 100644
index 0000000000..396a0cfa8f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListElementShape.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListElementShape"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListFromTensor.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListFromTensor.pbtxt
new file mode 100644
index 0000000000..3179feddd6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListFromTensor.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListFromTensor"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListGather.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListGather.pbtxt
new file mode 100644
index 0000000000..580d34b68f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListGather.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListGather"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListGetItem.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListGetItem.pbtxt
new file mode 100644
index 0000000000..2c47208fa0
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListGetItem.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListGetItem"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListLength.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListLength.pbtxt
new file mode 100644
index 0000000000..2ec689d71c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListLength.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListLength"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListPopBack.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListPopBack.pbtxt
new file mode 100644
index 0000000000..8d0d6ed55b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListPopBack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListPopBack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListPushBack.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListPushBack.pbtxt
new file mode 100644
index 0000000000..285351cf4f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListPushBack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListPushBack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListPushBackBatch.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListPushBackBatch.pbtxt
new file mode 100644
index 0000000000..1f33d49260
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListPushBackBatch.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListPushBackBatch"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListReserve.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListReserve.pbtxt
new file mode 100644
index 0000000000..5336600688
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListReserve.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListReserve"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListScatter.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListScatter.pbtxt
new file mode 100644
index 0000000000..f3a56f1292
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListScatter.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListScatter"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListSetItem.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListSetItem.pbtxt
new file mode 100644
index 0000000000..002e2a9bd3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListSetItem.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListSetItem"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorListStack.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorListStack.pbtxt
new file mode 100644
index 0000000000..b4b03c86e5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorListStack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TensorListStack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorSliceDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorSliceDataset.pbtxt
new file mode 100644
index 0000000000..3729a025e6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorSliceDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TensorSliceDataset"
+  endpoint {
+    name: "data.TensorSliceDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt
new file mode 100644
index 0000000000..a720ca9076
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TensorSummary"
+  endpoint {
+    name: "summary.TensorSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt
new file mode 100644
index 0000000000..ba5131c85e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TensorSummaryV2"
+  endpoint {
+    name: "summary.TensorSummaryV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TextLineDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_TextLineDataset.pbtxt
new file mode 100644
index 0000000000..c3c75d5703
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TextLineDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TextLineDataset"
+  endpoint {
+    name: "data.TextLineDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt
new file mode 100644
index 0000000000..17eb20cffe
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TextLineReader"
+  endpoint {
+    name: "io.TextLineReader"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt
new file mode 100644
index 0000000000..745c85fac9
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TextLineReaderV2"
+  endpoint {
+    name: "io.TextLineReaderV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ThreadUnsafeUnigramCandidateSampler.pbtxt b/tensorflow/core/api_def/java_api/api_def_ThreadUnsafeUnigramCandidateSampler.pbtxt
new file mode 100644
index 0000000000..2ef4a83478
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ThreadUnsafeUnigramCandidateSampler.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ThreadUnsafeUnigramCandidateSampler"
+  endpoint {
+    name: "random.ThreadUnsafeUnigramCandidateSampler"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Tile.pbtxt b/tensorflow/core/api_def/java_api/api_def_Tile.pbtxt
new file mode 100644
index 0000000000..8e4e63a222
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Tile.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Tile"
+  endpoint {
+    name: "Tile"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TileGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_TileGrad.pbtxt
new file mode 100644
index 0000000000..7cd975c02f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TileGrad.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TileGrad"
+  endpoint {
+    name: "train.TileGrad"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Timestamp.pbtxt b/tensorflow/core/api_def/java_api/api_def_Timestamp.pbtxt
new file mode 100644
index 0000000000..9ebc664ae1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Timestamp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Timestamp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TopK.pbtxt b/tensorflow/core/api_def/java_api/api_def_TopK.pbtxt
new file mode 100644
index 0000000000..108701adf1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TopK.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TopK"
+  endpoint {
+    name: "nn.TopK"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt
new file mode 100644
index 0000000000..2aa89adcce
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TopKV2"
+  endpoint {
+    name: "nn.TopKV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Transpose.pbtxt b/tensorflow/core/api_def/java_api/api_def_Transpose.pbtxt
new file mode 100644
index 0000000000..ad7900c00b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Transpose.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Transpose"
+  endpoint {
+    name: "linalg.Transpose"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TruncateDiv.pbtxt b/tensorflow/core/api_def/java_api/api_def_TruncateDiv.pbtxt
new file mode 100644
index 0000000000..4fbd682340
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TruncateDiv.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TruncateDiv"
+  endpoint {
+    name: "math.TruncateDiv"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TruncateMod.pbtxt b/tensorflow/core/api_def/java_api/api_def_TruncateMod.pbtxt
new file mode 100644
index 0000000000..7d1ae9a14f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TruncateMod.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TruncateMod"
+  endpoint {
+    name: "math.TruncateMod"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TruncatedNormal.pbtxt b/tensorflow/core/api_def/java_api/api_def_TruncatedNormal.pbtxt
new file mode 100644
index 0000000000..b2dd52c955
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TruncatedNormal.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "TruncatedNormal"
+  endpoint {
+    name: "random.TruncatedNormal"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_TryRpc.pbtxt b/tensorflow/core/api_def/java_api/api_def_TryRpc.pbtxt
new file mode 100644
index 0000000000..7ca476086a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_TryRpc.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "TryRpc"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Unbatch.pbtxt b/tensorflow/core/api_def/java_api/api_def_Unbatch.pbtxt
new file mode 100644
index 0000000000..513b05593a
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Unbatch.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Unbatch"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnbatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnbatchDataset.pbtxt
new file mode 100644
index 0000000000..24907c804b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnbatchDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnbatchDataset"
+  endpoint {
+    name: "data.UnbatchDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnbatchGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnbatchGrad.pbtxt
new file mode 100644
index 0000000000..ce612f84f6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnbatchGrad.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "UnbatchGrad"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnicodeDecodeWithOffsets.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnicodeDecodeWithOffsets.pbtxt
new file mode 100644
index 0000000000..16cc033140
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnicodeDecodeWithOffsets.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnicodeDecodeWithOffsets"
+  endpoint {
+    name: "strings.UnicodeDecodeWithOffsets"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnicodeScript.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnicodeScript.pbtxt
new file mode 100644
index 0000000000..a873151d5f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnicodeScript.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnicodeScript"
+  endpoint {
+    name: "strings.UnicodeScript"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnicodeTranscode.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnicodeTranscode.pbtxt
new file mode 100644
index 0000000000..131cc6169c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnicodeTranscode.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnicodeTranscode"
+  endpoint {
+    name: "strings.UnicodeTranscode"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UniformCandidateSampler.pbtxt b/tensorflow/core/api_def/java_api/api_def_UniformCandidateSampler.pbtxt
new file mode 100644
index 0000000000..d7a56c2a6e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UniformCandidateSampler.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UniformCandidateSampler"
+  endpoint {
+    name: "random.UniformCandidateSampler"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Unique.pbtxt b/tensorflow/core/api_def/java_api/api_def_Unique.pbtxt
new file mode 100644
index 0000000000..5c4262b5fb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Unique.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Unique"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt
new file mode 100644
index 0000000000..4d43500a0d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "UniqueV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt b/tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt
new file mode 100644
index 0000000000..1ce5314157
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "UniqueWithCounts"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt
new file mode 100644
index 0000000000..abfd496b8e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "UniqueWithCountsV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Unpack.pbtxt b/tensorflow/core/api_def/java_api/api_def_Unpack.pbtxt
new file mode 100644
index 0000000000..c3ad63e8f8
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Unpack.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Unpack"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnravelIndex.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnravelIndex.pbtxt
new file mode 100644
index 0000000000..1a08dc8f1e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnravelIndex.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "UnravelIndex"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMax.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMax.pbtxt
new file mode 100644
index 0000000000..a58d8740d5
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMax.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnsortedSegmentMax"
+  endpoint {
+    name: "math.UnsortedSegmentMax"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMin.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMin.pbtxt
new file mode 100644
index 0000000000..2f17ab4624
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentMin.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnsortedSegmentMin"
+  endpoint {
+    name: "math.UnsortedSegmentMin"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentProd.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentProd.pbtxt
new file mode 100644
index 0000000000..c594941bcc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentProd.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnsortedSegmentProd"
+  endpoint {
+    name: "math.UnsortedSegmentProd"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentSum.pbtxt
new file mode 100644
index 0000000000..e10cf8a6c2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UnsortedSegmentSum.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "UnsortedSegmentSum"
+  endpoint {
+    name: "math.UnsortedSegmentSum"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Unstage.pbtxt b/tensorflow/core/api_def/java_api/api_def_Unstage.pbtxt
new file mode 100644
index 0000000000..31a97cf84d
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Unstage.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Unstage"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_UpperBound.pbtxt b/tensorflow/core/api_def/java_api/api_def_UpperBound.pbtxt
new file mode 100644
index 0000000000..229a6ddfc3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_UpperBound.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "UpperBound"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_VarHandleOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_VarHandleOp.pbtxt
new file mode 100644
index 0000000000..30bdace0e6
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_VarHandleOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "VarHandleOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_VarIsInitializedOp.pbtxt b/tensorflow/core/api_def/java_api/api_def_VarIsInitializedOp.pbtxt
new file mode 100644
index 0000000000..a3992019bc
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_VarIsInitializedOp.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "VarIsInitializedOp"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Variable.pbtxt b/tensorflow/core/api_def/java_api/api_def_Variable.pbtxt
new file mode 100644
index 0000000000..abc2f9c954
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Variable.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Variable"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_VariableShape.pbtxt b/tensorflow/core/api_def/java_api/api_def_VariableShape.pbtxt
new file mode 100644
index 0000000000..38c63b5b70
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_VariableShape.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "VariableShape"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt
new file mode 100644
index 0000000000..221a80f760
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "VariableV2"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Where.pbtxt b/tensorflow/core/api_def/java_api/api_def_Where.pbtxt
new file mode 100644
index 0000000000..f130181a6e
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Where.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Where"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_While.pbtxt b/tensorflow/core/api_def/java_api/api_def_While.pbtxt
new file mode 100644
index 0000000000..9d0f3b0763
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_While.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "While"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt
new file mode 100644
index 0000000000..67a8933efb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WholeFileReader"
+  endpoint {
+    name: "io.WholeFileReader"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt
new file mode 100644
index 0000000000..172268a122
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WholeFileReaderV2"
+  endpoint {
+    name: "io.WholeFileReaderV2"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WindowDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_WindowDataset.pbtxt
new file mode 100644
index 0000000000..69f12c55e1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WindowDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WindowDataset"
+  endpoint {
+    name: "data.WindowDataset"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WriteAudioSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_WriteAudioSummary.pbtxt
new file mode 100644
index 0000000000..fcd0df85c2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WriteAudioSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WriteAudioSummary"
+  endpoint {
+    name: "summary.WriteAudioSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WriteFile.pbtxt b/tensorflow/core/api_def/java_api/api_def_WriteFile.pbtxt
new file mode 100644
index 0000000000..a2d6a5bace
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WriteFile.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WriteFile"
+  endpoint {
+    name: "io.WriteFile"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WriteGraphSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_WriteGraphSummary.pbtxt
new file mode 100644
index 0000000000..8e461bbba3
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WriteGraphSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WriteGraphSummary"
+  endpoint {
+    name: "summary.WriteGraphSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WriteHistogramSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_WriteHistogramSummary.pbtxt
new file mode 100644
index 0000000000..c991079032
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WriteHistogramSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WriteHistogramSummary"
+  endpoint {
+    name: "summary.WriteHistogramSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WriteImageSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_WriteImageSummary.pbtxt
new file mode 100644
index 0000000000..08bf0adb2f
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WriteImageSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WriteImageSummary"
+  endpoint {
+    name: "summary.WriteImageSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WriteScalarSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_WriteScalarSummary.pbtxt
new file mode 100644
index 0000000000..7bc7c16a0c
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WriteScalarSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WriteScalarSummary"
+  endpoint {
+    name: "summary.WriteScalarSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_WriteSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_WriteSummary.pbtxt
new file mode 100644
index 0000000000..1141cb6dbb
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_WriteSummary.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "WriteSummary"
+  endpoint {
+    name: "summary.WriteSummary"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Xdivy.pbtxt b/tensorflow/core/api_def/java_api/api_def_Xdivy.pbtxt
new file mode 100644
index 0000000000..13a94b8a56
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Xdivy.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Xdivy"
+  endpoint {
+    name: "math.Xdivy"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Xlogy.pbtxt b/tensorflow/core/api_def/java_api/api_def_Xlogy.pbtxt
new file mode 100644
index 0000000000..52f457d645
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Xlogy.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Xlogy"
+  endpoint {
+    name: "math.Xlogy"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ZerosLike.pbtxt b/tensorflow/core/api_def/java_api/api_def_ZerosLike.pbtxt
new file mode 100644
index 0000000000..8ef17aba9b
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ZerosLike.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ZerosLike"
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_Zeta.pbtxt b/tensorflow/core/api_def/java_api/api_def_Zeta.pbtxt
new file mode 100644
index 0000000000..b602bbc7e1
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_Zeta.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "Zeta"
+  endpoint {
+    name: "math.Zeta"
+  }
+}
diff --git a/tensorflow/core/api_def/java_api/api_def_ZipDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ZipDataset.pbtxt
new file mode 100644
index 0000000000..e32362bef2
--- /dev/null
+++ b/tensorflow/core/api_def/java_api/api_def_ZipDataset.pbtxt
@@ -0,0 +1,6 @@
+op {
+  graph_op_name: "ZipDataset"
+  endpoint {
+    name: "data.ZipDataset"
+  }
+}
-- 
GitLab


From b381d3c10bef3d3bd87cf691a40502986b3f35dc Mon Sep 17 00:00:00 2001
From: Karl Lessard <karl@kubx.ca>
Date: Mon, 26 Nov 2018 22:45:29 -0500
Subject: [PATCH 048/873] Use lower-camelcase in ops names and fix uniqueness

---
 tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt   | 2 +-
 tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt   | 2 +-
 tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt        | 2 +-
 tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt       | 2 +-
 tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt     | 2 +-
 .../core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt   | 2 +-
 .../core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt       | 2 +-
 tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt         | 2 +-
 tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt          | 2 +-
 .../core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt   | 2 +-
 .../core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt          | 2 +-
 .../core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt   | 2 +-
 .../core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt | 2 +-
 .../core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt    | 2 +-
 .../core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt  | 2 +-
 tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt        | 2 +-
 .../core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt       | 2 +-
 .../core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt     | 2 +-
 .../api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt   | 2 +-
 .../core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt     | 2 +-
 .../api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt   | 2 +-
 tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt      | 2 +-
 .../core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt       | 2 +-
 .../api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt    | 2 +-
 .../api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt     | 2 +-
 .../core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt | 2 +-
 .../api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt       | 2 +-
 tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt  | 2 +-
 .../api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt           | 2 +-
 tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt         | 2 +-
 tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt         | 2 +-
 tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt   | 2 +-
 .../core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt      | 2 +-
 .../core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt       | 2 +-
 .../core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt            | 2 +-
 tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt          | 2 +-
 tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt          | 2 +-
 .../core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt | 2 +-
 .../java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt        | 2 +-
 .../core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt   | 2 +-
 .../java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt        | 2 +-
 .../java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt  | 2 +-
 .../api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt   | 2 +-
 .../api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt       | 2 +-
 63 files changed, 65 insertions(+), 62 deletions(-)

diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt
index d2b38707fb..815df985ef 100644
--- a/tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyAdagradDA.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ApplyAdagradDA"
   endpoint {
-    name: "train.ApplyAdagradDA"
+    name: "train.ApplyAdagradDa"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt
index 6ed0660165..1b831bca43 100644
--- a/tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyCenteredRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ApplyCenteredRMSProp"
   endpoint {
-    name: "train.ApplyCenteredRMSProp"
+    name: "train.ApplyCenteredRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt
index a1397b16fc..90171ccc75 100644
--- a/tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ApplyRMSProp"
   endpoint {
-    name: "train.ApplyRMSProp"
+    name: "train.ApplyRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt
index ac51715095..09aba78ca2 100644
--- a/tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AvgPool3DGrad.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "AvgPool3DGrad"
   endpoint {
-    name: "nn.AvgPool3DGrad"
+    name: "nn.AvgPool3dGrad"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt
index 62bc804c7e..4dda7c1fb6 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchFFT.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "BatchFFT"
   endpoint {
-    name: "signal.BatchFFT"
+    name: "signal.BatchFft"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt
index 89479537a2..e11860138a 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchFFT2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "BatchFFT2D"
   endpoint {
-    name: "signal.BatchFFT2D"
+    name: "signal.BatchFft2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt
index e9cff897bb..3be0b516d0 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchFFT3D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "BatchFFT3D"
   endpoint {
-    name: "signal.BatchFFT3D"
+    name: "signal.BatchFft3d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt
index f156a6df39..de37ada148 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchIFFT.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "BatchIFFT"
   endpoint {
-    name: "signal.BatchIFFT"
+    name: "signal.BatchIfft"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt
index 7cce88f8f7..4ae7fb4cb0 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchIFFT2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "BatchIFFT2D"
   endpoint {
-    name: "signal.BatchIFFT2D"
+    name: "signal.BatchIfft2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt
index b30e596d75..0ecb52714b 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchIFFT3D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "BatchIFFT3D"
   endpoint {
-    name: "signal.BatchIFFT3D"
+    name: "signal.BatchIfft3d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt b/tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt
index 79dae81df9..39739f03a3 100644
--- a/tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CTCBeamSearchDecoder.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CTCBeamSearchDecoder"
   endpoint {
-    name: "nn.CTCBeamSearchDecoder"
+    name: "nn.CtcBeamSearchDecoder"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt b/tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt
index 6a58e628bd..009742f097 100644
--- a/tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CTCGreedyDecoder.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CTCGreedyDecoder"
   endpoint {
-    name: "nn.CTCGreedyDecoder"
+    name: "nn.CtcGreedyDecoder"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt b/tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt
index 361270e1f7..dbeefa4017 100644
--- a/tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CTCLoss.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CTCLoss"
   endpoint {
-    name: "nn.CTCLoss"
+    name: "nn.CtcLoss"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt
index 04c4fcec16..21d1398e09 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv2D"
   endpoint {
-    name: "nn.Conv2D"
+    name: "nn.Conv2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt
index 990a54262e..30eb55c6f2 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropFilter.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv2DBackpropFilter"
   endpoint {
-    name: "nn.Conv2DBackpropFilter"
+    name: "nn.Conv2dBackpropFilter"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt
index 11bbaa455c..7c98646c13 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv2DBackpropInput.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv2DBackpropInput"
   endpoint {
-    name: "nn.Conv2DBackpropInput"
+    name: "nn.Conv2dBackpropInput"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt
index 0d97065704..6ee1befcff 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv3D"
   endpoint {
-    name: "nn.Conv3D"
+    name: "nn.Conv3d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
index 279cd5867e..f66034aec1 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv3DBackpropFilter"
   endpoint {
-    name: "nn.Conv3DBackpropFilter"
+    name: "nn.Conv3dBackpropFilter"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
index 0643cc14a9..13db85238f 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv3DBackpropFilterV2"
   endpoint {
-    name: "nn.Conv3dBackpropFilter"
+    name: "nn.Conv3dBackpropFilterV2"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
index 5be32ebe3c..fdb2a9d66c 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv3DBackpropInput"
   endpoint {
-    name: "nn.Conv3DBackpropInput"
+    name: "nn.Conv3dBackpropInput"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
index 2ce9cf134b..7f25b7b9af 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv3DBackpropInputV2"
   endpoint {
-    name: "nn.Conv3DBackpropInputV2"
+    name: "nn.Conv3dBackpropInputV2"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
index c3d7f17596..4cd8e4fa0f 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNN"
   endpoint {
-    name: "nn.CudnnRNN"
+    name: "nn.CudnnRnn"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
index 371b6a18db..b2a222e03a 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNBackprop"
   endpoint {
-    name: "nn.CudnnRNNBackprop"
+    name: "nn.CudnnRnnBackprop"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
index 756abdb71f..880a3b2bc4 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNBackpropV2"
   endpoint {
-    name: "nn.CudnnRNNBackpropV2"
+    name: "nn.CudnnRnnBackpropV2"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt
index 86d7045db7..8d351fa860 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNCanonicalToParams.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNCanonicalToParams"
   endpoint {
-    name: "nn.CudnnRNNCanonicalToParams"
+    name: "nn.CudnnRnnCanonicalToParams"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt
index 73922b0343..3f1193fe60 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsSize.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNParamsSize"
   endpoint {
-    name: "nn.CudnnRNNParamsSize"
+    name: "nn.CudnnRnnParamsSize"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt
index e1c567163a..d2e4c6201e 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNParamsToCanonical.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNParamsToCanonical"
   endpoint {
-    name: "nn.CudnnRNNParamsToCanonical"
+    name: "nn.CudnnRnnParamsToCanonical"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
index d535f35459..7086d3c9d2 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNV2"
   endpoint {
-    name: "nn.CudnnRNNV2"
+    name: "nn.CudnnRnnV2"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt b/tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt
index 5884fe960d..3d38857063 100644
--- a/tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_DatasetToTFRecord.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "DatasetToTFRecord"
   endpoint {
-    name: "data.DatasetToTFRecord"
+    name: "data.DatasetToTfRecord"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt
index 2c738cf4dd..1d60107ada 100644
--- a/tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeCSV.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "DecodeCSV"
   endpoint {
-    name: "io.DecodeCSV"
+    name: "io.DecodeCsv"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt
index ab8b79d681..db3c68e088 100644
--- a/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropFilter.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Dilation2DBackpropFilter"
   endpoint {
-    name: "nn.Dilation2DBackpropFilter"
+    name: "nn.Dilation2dBackpropFilter"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt
index 9dece4569c..c935144f7a 100644
--- a/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Dilation2DBackpropInput.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Dilation2DBackpropInput"
   endpoint {
-    name: "nn.Dilation2DBackpropInput"
+    name: "nn.Dilation2dBackpropInput"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt
index 2c555c3f1a..51fdd9f0b0 100644
--- a/tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalCSVDataset.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ExperimentalCSVDataset"
   endpoint {
-    name:  "data.ExperimentalCSVDataset"
+    name:  "data.ExperimentalCsvDataset"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt
index dd24ff544f..a092b1cf39 100644
--- a/tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ExperimentalLMDBDataset.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ExperimentalLMDBDataset"
   endpoint {
-    name: "data.ExperimentalLMDBDataset"
+    name: "data.ExperimentalLmdbDataset"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
index e1760b685b..18c3ed4979 100644
--- a/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FIFOQueue"
   endpoint {
-    name: "io.FIFOQueue"
+    name: "io.FifoQueue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
index fe260ca2be..f892d7291d 100644
--- a/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FIFOQueueV2"
   endpoint {
-    name: "io.FIFOQueueV2"
+    name: "io.FifoQueueV2"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt
index e4836df290..e7ee10e0c5 100644
--- a/tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FusedPadConv2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FusedPadConv2D"
   endpoint {
-    name: "nn.FusedPadConv2D"
+    name: "nn.FusedPadConv2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt
index 6cd0caa088..6948fc1b87 100644
--- a/tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FusedResizeAndPadConv2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FusedResizeAndPadConv2D"
   endpoint {
-    name: "nn.FusedResizeAndPadConv2D"
+    name: "nn.FusedResizeAndPadConv2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt
index d2ade5b16e..7a68b01524 100644
--- a/tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IRFFT.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "IRFFT"
   endpoint {
-    name: "signal.IRFFT"
+    name: "signal.Irfft"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt
index 3a5a5b0c71..239ec445d0 100644
--- a/tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IRFFT2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "IRFFT2D"
   endpoint {
-    name: "signal.IRFFT2D"
+    name: "signal.Irfft2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt
index fc5a5451b4..87969436b7 100644
--- a/tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IRFFT3D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "IRFFT3D"
   endpoint {
-    name: "signal.IRFFT3D"
+    name: "signal.Irfft3d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt
index 226fa21953..b6cd7bdbb7 100644
--- a/tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LMDBReader.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "LMDBReader"
   endpoint {
-    name: "io.LMDBReader"
+    name: "io.LmdbReader"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt b/tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt
index bbbd3bb3ec..aa94c958f1 100644
--- a/tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ListDiff.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "ListDiff"
+  endpoint {
+    name: "SetDiff1d"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt
index 7f5ab71845..ca7a7be835 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGrad.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "MaxPool3DGrad"
   endpoint {
-    name: "nn.MaxPool3DGrad"
+    name: "nn.MaxPool3dGrad"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt
index 4d7211add2..c70aa3fe30 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPool3DGradGrad.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "MaxPool3DGradGrad"
   endpoint {
-    name: "nn.MaxPool3DGradGrad"
+    name: "nn.MaxPool3dGradGrad"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
index 151ab0f872..952516cfff 100644
--- a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "PaddingFIFOQueue"
   endpoint {
-    name: "io.PaddingFIFOQueue"
+    name: "io.PaddingFifoQueue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
index 4b589a9afb..80c53e7ee9 100644
--- a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "PaddingFIFOQueueV2"
   endpoint {
-    name: "io.PaddingFIFOQueueV2"
+    name: "io.PaddingFifoQueueV2"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt
index 8345892762..9909157603 100644
--- a/tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizedConv2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QuantizedConv2D"
   endpoint {
-    name: "nn.QuantizedConv2D"
+    name: "nn.QuantizedConv2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt b/tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt
index e56a0ba30a..9576600e75 100644
--- a/tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RFFT.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "RFFT"
   endpoint {
-    name: "signal.RFFT"
+    name: "signal.Rfft"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt b/tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt
index eb193e638d..41d638b26a 100644
--- a/tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RFFT2D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "RFFT2D"
   endpoint {
-    name: "signal.RFFT2D"
+    name: "signal.Rfft2d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt b/tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt
index 292b957f3e..7a762d22e5 100644
--- a/tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RFFT3D.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "RFFT3D"
   endpoint {
-    name: "signal.RFFT3D"
+    name: "signal.Rfft3d"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt
index 904e434b8a..afddaaff57 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyAdagradDA.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceApplyAdagradDA"
   endpoint {
-    name: "train.ResourceApplyAdagradDA"
+    name: "train.ResourceApplyAdagradDa"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt
index 58027823e0..85c97b430a 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyCenteredRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceApplyCenteredRMSProp"
   endpoint {
-    name: "train.ResourceApplyCenteredRMSProp"
+    name: "train.ResourceApplyCenteredRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt
index 74a3d77531..fa3adf759e 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceApplyRMSProp"
   endpoint {
-    name: "train.ResourceApplyRMSProp"
+    name: "train.ResourceApplyRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt
index bd66d64160..0547687d64 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyAdagradDA.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceSparseApplyAdagradDA"
   endpoint {
-    name: "train.ResourceSparseApplyAdagradDA"
+    name: "train.ResourceSparseApplyAdagradDa"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt
index afda4adb0d..632b0ab4c2 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceSparseApplyCenteredRMSProp"
   endpoint {
-    name: "train.ResourceSparseApplyCenteredRMSProp"
+    name: "train.ResourceSparseApplyCenteredRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt
index 3e78f6f06d..ec8910a88a 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceSparseApplyRMSProp"
   endpoint {
-    name: "train.ResourceSparseApplyRMSProp"
+    name: "train.ResourceSparseApplyRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt
index 56d9b1c49b..950dc00dd3 100644
--- a/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyAdagradDA.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SparseApplyAdagradDA"
   endpoint {
-    name: "train.SparseApplyAdagradDA"
+    name: "train.SparseApplyAdagradDa"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt
index fa35db29b0..090536f5eb 100644
--- a/tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyCenteredRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SparseApplyCenteredRMSProp"
   endpoint {
-    name: "train.SparseApplyCenteredRMSProp"
+    name: "train.SparseApplyCenteredRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt
index 94646093af..3e39f4ffa5 100644
--- a/tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyRMSProp.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SparseApplyRMSProp"
   endpoint {
-    name: "train.SparseApplyRMSProp"
+    name: "train.SparseApplyRmsProp"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt
index 76c077f452..f1d42edd63 100644
--- a/tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordDataset.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "TFRecordDataset"
   endpoint {
-    name: "data.TFRecordDataset"
+    name: "data.TfRecordDataset"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
index 72d02db279..c93d53fa1a 100644
--- a/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "TFRecordReader"
   endpoint {
-    name: "io.TFRecordReader"
+    name: "io.TfRecordReader"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
index beb7f58862..84df23f6e2 100644
--- a/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "TFRecordReaderV2"
   endpoint {
-    name: "io.TFRecordReaderV2"
+    name: "io.TfRecordReaderV2"
   }
 }
-- 
GitLab


From ce619f2697afd683813264ae2d068a1038acab77 Mon Sep 17 00:00:00 2001
From: Clayne Robison <clayne.b.robison@intel.com>
Date: Wed, 28 Nov 2018 08:43:33 -0700
Subject: [PATCH 049/873] [Intel MKL] Updating README.md with new links to
 Intel(R) Optimized Tensorflow

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8af5370bef..02a40c49b0 100644
--- a/README.md
+++ b/README.md
@@ -115,7 +115,7 @@ Build Type
 **IBM ppc64le GPU** Nightly                                                                                                                                                                     | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)            | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
 **IBM ppc64le GPU** Stable Release                                                                                                                                                              | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)                  | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)
 **Linux CPU with Intel® MKL-DNN** Nightly                                                                                                                                                       | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/)                                | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)
-**Linux CPU with Intel® MKL-DNN** Python 2.7<br> **Linux CPU with Intel® MKL-DNN** Python 3.4<br> **Linux CPU with Intel® MKL-DNN** Python 3.5<br> **Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.11.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp27-cp27mu-linux_x86_64.whl)<br>[1.11.0 py3.4](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp34-cp34m-linux_x86_64.whl)<br>[1.11.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp35-cp35m-linux_x86_64.whl)<br>[1.11.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp36-cp36m-linux_x86_64.whl)
+**Linux CPU with Intel® MKL-DNN** Python 2.7<br> **Linux CPU with Intel® MKL-DNN** Python 3.4<br> **Linux CPU with Intel® MKL-DNN** Python 3.5<br> **Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.12.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp27-cp27mu-linux_x86_64.whl)<br>[1.12.0 py3.4](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp34-cp34m-linux_x86_64.whl)<br>[1.12.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp35-cp35m-linux_x86_64.whl)<br>[1.12.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp36-cp36m-linux_x86_64.whl)
 
 ## For more information
 * [TensorFlow Website](https://www.tensorflow.org)
-- 
GitLab


From 046d08eff5cad0231c738dd362c85abf0877aa1d Mon Sep 17 00:00:00 2001
From: Karl Lessard <karl@kubx.ca>
Date: Wed, 28 Nov 2018 23:02:18 -0500
Subject: [PATCH 050/873] Only keep latest version available for each ops,
 skipping others.

---
 tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt  | 2 +-
 tensorflow/core/api_def/java_api/api_def_Add.pbtxt            | 4 +---
 tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt          | 2 +-
 tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt | 4 +---
 .../core/api_def/java_api/api_def_AdjustContrastv2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt      | 4 +---
 tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt   | 4 +---
 tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt   | 4 +---
 tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt | 3 +++
 .../core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt   | 4 +---
 .../core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt      | 4 +---
 tensorflow/core/api_def/java_api/api_def_Concat.pbtxt         | 1 +
 tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt  | 4 +---
 .../api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt     | 2 +-
 .../core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt   | 4 +---
 .../core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt       | 4 +---
 .../core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt      | 4 +---
 .../core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt      | 4 +---
 tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt    | 2 +-
 .../api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt   | 4 +---
 .../api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt | 2 +-
 .../api_def/java_api/api_def_FixedLengthRecordReader.pbtxt    | 4 +---
 .../api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt  | 2 +-
 tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt | 4 +---
 .../core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt    | 4 +---
 .../core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_Gather.pbtxt         | 1 +
 tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_GetSessionHandle.pbtxt      | 1 +
 .../core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt      | 1 +
 tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt | 4 +---
 .../core/api_def/java_api/api_def_IdentityReaderV2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt         | 4 +---
 tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt       | 2 +-
 .../core/api_def/java_api/api_def_InitializeTable.pbtxt       | 1 +
 .../java_api/api_def_InitializeTableFromTextFile.pbtxt        | 1 +
 .../java_api/api_def_InitializeTableFromTextFileV2.pbtxt      | 3 +++
 .../core/api_def/java_api/api_def_InitializeTableV2.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt       | 4 +---
 .../api_def/java_api/api_def_IteratorFromStringHandle.pbtxt   | 4 +---
 .../api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt     | 2 +-
 .../core/api_def/java_api/api_def_LookupTableExport.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_LookupTableExportV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableFind.pbtxt       | 1 +
 .../core/api_def/java_api/api_def_LookupTableFindV2.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_LookupTableImport.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_LookupTableImportV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableInsert.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_LookupTableSize.pbtxt       | 1 +
 .../core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt    | 4 +---
 .../core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt        | 4 +---
 tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt    | 4 +---
 .../core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt       | 4 +---
 .../core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt  | 2 +-
 tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt      | 2 +-
 .../core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt | 1 +
 .../api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_MutableHashTable.pbtxt      | 1 +
 .../api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt  | 1 +
 .../java_api/api_def_MutableHashTableOfTensorsV2.pbtxt        | 3 +++
 .../core/api_def/java_api/api_def_MutableHashTableV2.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt        | 3 +++
 .../core/api_def/java_api/api_def_NonMaxSuppression.pbtxt     | 4 +---
 .../core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt   | 4 +---
 .../core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt   | 4 +---
 .../core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt   | 2 +-
 tensorflow/core/api_def/java_api/api_def_Pad.pbtxt            | 1 +
 tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt          | 3 +++
 .../core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt    | 4 +---
 .../core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt      | 4 +---
 .../core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt    | 2 +-
 .../api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt  | 4 +---
 .../java_api/api_def_ParallelInterleaveDatasetV2.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt    | 1 +
 tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt  | 3 +++
 tensorflow/core/api_def/java_api/api_def_Print.pbtxt          | 1 +
 tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt  | 4 +---
 .../core/api_def/java_api/api_def_PriorityQueueV2.pbtxt       | 2 +-
 .../core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt | 4 +---
 .../api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt    | 4 +---
 .../api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt     | 4 +---
 tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt   | 2 +-
 tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt   | 4 +---
 .../core/api_def/java_api/api_def_QueueDequeueMany.pbtxt      | 4 +---
 .../core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt    | 2 +-
 .../core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt      | 4 +---
 .../core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt   | 4 +---
 .../core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt      | 4 +---
 .../core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt | 2 +-
 tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt  | 4 +---
 .../core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt       | 2 +-
 tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt      | 4 +---
 tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt    | 2 +-
 tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt  | 4 +---
 .../core/api_def/java_api/api_def_RandomPoissonV2.pbtxt       | 2 +-
 .../core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt    | 4 +---
 .../core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt  | 2 +-
 .../api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt   | 4 +---
 .../api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt | 2 +-
 .../java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt        | 4 +---
 .../java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt     | 4 +---
 tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt | 4 +---
 .../core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt   | 2 +-
 tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt    | 4 +---
 tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_ReaderRestoreState.pbtxt    | 4 +---
 .../core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt  | 2 +-
 .../core/api_def/java_api/api_def_ReaderSerializeState.pbtxt  | 4 +---
 .../api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt     | 2 +-
 .../core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt     | 4 +---
 .../core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt   | 2 +-
 .../api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt    | 4 +---
 .../api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt  | 2 +-
 tensorflow/core/api_def/java_api/api_def_Restore.pbtxt        | 4 +---
 tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt        | 1 +
 .../api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt | 4 +---
 .../java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt       | 2 +-
 tensorflow/core/api_def/java_api/api_def_Save.pbtxt           | 4 +---
 tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt         | 2 +-
 tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt  | 4 +---
 .../core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt       | 2 +-
 tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt | 4 +---
 .../core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt      | 2 +-
 .../core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt       | 4 +---
 .../core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt     | 2 +-
 tensorflow/core/api_def/java_api/api_def_Stack.pbtxt          | 1 +
 tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt     | 1 +
 tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt   | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt       | 1 +
 tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt     | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt      | 1 +
 tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt        | 3 +++
 tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt    | 4 +---
 tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt  | 2 +-
 tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt | 4 +---
 .../core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt    | 1 +
 .../core/api_def/java_api/api_def_TensorArrayClose.pbtxt      | 1 +
 .../core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt    | 1 +
 .../core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt    | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayConcat.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt   | 1 +
 .../core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGather.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt   | 1 +
 .../core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt   | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayGrad.pbtxt       | 1 +
 .../core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayRead.pbtxt       | 1 +
 .../core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayScatter.pbtxt    | 1 +
 .../core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt  | 1 +
 .../core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySize.pbtxt       | 1 +
 .../core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt     | 1 +
 .../core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_TensorArraySplit.pbtxt      | 1 +
 .../core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt    | 1 +
 .../core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt  | 1 +
 tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt  | 3 +++
 .../core/api_def/java_api/api_def_TensorArrayWrite.pbtxt      | 1 +
 .../core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt    | 1 +
 .../core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt  | 4 +---
 .../core/api_def/java_api/api_def_TensorSummaryV2.pbtxt       | 2 +-
 tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt | 4 +---
 .../core/api_def/java_api/api_def_TextLineReaderV2.pbtxt      | 2 +-
 tensorflow/core/api_def/java_api/api_def_TopK.pbtxt           | 4 +---
 tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt         | 2 +-
 tensorflow/core/api_def/java_api/api_def_Unique.pbtxt         | 1 +
 tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt       | 3 +++
 .../core/api_def/java_api/api_def_UniqueWithCounts.pbtxt      | 1 +
 .../core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt    | 3 +++
 tensorflow/core/api_def/java_api/api_def_Variable.pbtxt       | 1 +
 tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt     | 3 +++
 .../core/api_def/java_api/api_def_WholeFileReader.pbtxt       | 4 +---
 .../core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt     | 2 +-
 208 files changed, 288 insertions(+), 251 deletions(-)

diff --git a/tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt
index 09e7a155e0..0c7a080c11 100644
--- a/tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AccumulateNV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "AccumulateNV2"
   endpoint {
-    name: "math.AccumulateNV2"
+    name: "math.AccumulateN"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Add.pbtxt b/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
index 4f78ccc9ea..b082b055bf 100644
--- a/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "Add"
-  endpoint {
-    name: "math.Add"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
index f42a2add07..863ec4f33c 100644
--- a/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "AddV2"
   endpoint {
-    name: "math.AddV2"
+    name: "math.Add"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt b/tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt
index ff49aec2db..daad141027 100644
--- a/tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AdjustContrast.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "AdjustContrast"
-  endpoint {
-    name: "image.AdjustContrast"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt
index dd6647b137..81f565c1d5 100644
--- a/tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AdjustContrastv2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "AdjustContrastv2"
   endpoint {
-    name: "image.AdjustContrastv2"
+    name: "image.AdjustContrast"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt
index 388e39c6ba..176de19a9a 100644
--- a/tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyFtrl.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ApplyFtrl"
-  endpoint {
-    name: "train.ApplyFtrl"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt
index 8463ad163b..da0fc8fcbf 100644
--- a/tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ApplyFtrlV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ApplyFtrlV2"
   endpoint {
-    name: "train.ApplyFtrlV2"
+    name: "train.ApplyFtrl"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt
index 026ec79dd9..13d30de29d 100644
--- a/tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AudioSummary.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "AudioSummary"
-  endpoint {
-    name: "summary.AudioSummary"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt
index 07d24ec3a3..e4eda8b09a 100644
--- a/tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AudioSummaryV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "AudioSummaryV2"
   endpoint {
-    name: "summary.AudioSummaryV2"
+    name: "summary.AudioSummary"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt
index fe47605f37..0bb7298ba9 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchDataset.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "BatchDataset"
-  endpoint {
-    name: "data.BatchDataset"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt
index 079efe146f..cd81b0b1cf 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchDatasetV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "BatchDatasetV2"
+  endpoint {
+    name: "data.BatchDataset"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt
index 58d60fa962..517030fd69 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEig.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "BatchSelfAdjointEig"
-  endpoint {
-    name: "linalg.BatchSelfAdjointEig"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt
index fb18b2a000..9c97344390 100644
--- a/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BatchSelfAdjointEigV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "BatchSelfAdjointEigV2"
   endpoint {
-    name: "linalg.BatchSelfAdjointEigV2"
+    name: "linalg.BatchSelfAdjointEig"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt b/tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt
index 76ae8ec8ff..651c434e64 100644
--- a/tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_BiasAddV1.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "BiasAddV1"
-  endpoint {
-    name: "nn.BiasAddV1"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Concat.pbtxt b/tensorflow/core/api_def/java_api/api_def_Concat.pbtxt
index b13eb820ec..e2fc7eef88 100644
--- a/tensorflow/core/api_def/java_api/api_def_Concat.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Concat.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Concat"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt
index a600c31ed9..7035796981 100644
--- a/tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ConcatV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "ConcatV2"
+  endpoint {
+    name: "Concat"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
index f66034aec1..2e5f6c99d5 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilter.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "Conv3DBackpropFilter"
-  endpoint {
-    name: "nn.Conv3dBackpropFilter"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
index 13db85238f..0643cc14a9 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropFilterV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv3DBackpropFilterV2"
   endpoint {
-    name: "nn.Conv3dBackpropFilterV2"
+    name: "nn.Conv3dBackpropFilter"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
index fdb2a9d66c..cbb2c9f136 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInput.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "Conv3DBackpropInput"
-  endpoint {
-    name: "nn.Conv3dBackpropInput"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
index 7f25b7b9af..33c8f5a3ce 100644
--- a/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Conv3DBackpropInputV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "Conv3DBackpropInputV2"
   endpoint {
-    name: "nn.Conv3dBackpropInputV2"
+    name: "nn.Conv3dBackpropInput"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
index 4cd8e4fa0f..7e88d20713 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNN.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "CudnnRNN"
-  endpoint {
-    name: "nn.CudnnRnn"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
index b2a222e03a..9c9fc5f029 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackprop.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "CudnnRNNBackprop"
-  endpoint {
-    name: "nn.CudnnRnnBackprop"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
index 880a3b2bc4..c2e7ebc27d 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNBackpropV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNBackpropV2"
   endpoint {
-    name: "nn.CudnnRnnBackpropV2"
+    name: "nn.CudnnRnnBackprop"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
index 7086d3c9d2..e6dd5f42fc 100644
--- a/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_CudnnRNNV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "CudnnRNNV2"
   endpoint {
-    name: "nn.CudnnRnnV2"
+    name: "nn.CudnnRnn"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt
index ff6531a3ac..4ba118cb0e 100644
--- a/tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_DecodeProtoV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "DecodeProtoV2"
+  endpoint {
+    name: "DecodeProto"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
index 18c3ed4979..e5b2f73c55 100644
--- a/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FIFOQueue.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "FIFOQueue"
-  endpoint {
-    name: "io.FifoQueue"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
index f892d7291d..c0861a6e8d 100644
--- a/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FIFOQueueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FIFOQueueV2"
   endpoint {
-    name: "io.FifoQueueV2"
+    name: "io.FifoQueue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt
index 8f38364f85..d4f23d94c0 100644
--- a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDataset.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "FixedLengthRecordDataset"
-  endpoint {
-    name: "data.FixedLengthRecordDataset"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt
index 723248019d..b8012bbe16 100644
--- a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordDatasetV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FixedLengthRecordDatasetV2"
   endpoint {
-    name: "data.FixedLengthRecordDatasetV2"
+    name: "data.FixedLengthRecordDataset"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt
index 295526d074..f76cd49456 100644
--- a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReader.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "FixedLengthRecordReader"
-  endpoint {
-    name: "io.FixedLengthRecordReader"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt
index 0cfefe2075..f897c21365 100644
--- a/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FixedLengthRecordReaderV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FixedLengthRecordReaderV2"
   endpoint {
-    name: "io.FixedLengthRecordReaderV2"
+    name: "io.FixedLengthRecordReader"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt
index 058c82c177..9d6166fe81 100644
--- a/tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNorm.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "FusedBatchNorm"
-  endpoint {
-    name: "nn.FusedBatchNorm"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt
index 69baf2a8e3..5e1d066d8d 100644
--- a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGrad.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "FusedBatchNormGrad"
-  endpoint {
-    name: "nn.FusedBatchNormGrad"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt
index 81da6f4bb2..8f333c91f4 100644
--- a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormGradV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FusedBatchNormGradV2"
   endpoint {
-    name: "nn.FusedBatchNormGradV2"
+    name: "nn.FusedBatchNormGrad"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt
index e81d78ef5c..8a4e76c949 100644
--- a/tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_FusedBatchNormV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "FusedBatchNormV2"
   endpoint {
-    name: "nn.FusedBatchNormV2"
+    name: "nn.FusedBatchNorm"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Gather.pbtxt b/tensorflow/core/api_def/java_api/api_def_Gather.pbtxt
index 6848641714..5c4ccda48b 100644
--- a/tensorflow/core/api_def/java_api/api_def_Gather.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Gather.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Gather"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt
index f5b2cb92f6..0927e77a96 100644
--- a/tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_GatherV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "GatherV2"
+  endpoint {
+    name: "Gather"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt
index 84b14a3335..0ee6fe18a2 100644
--- a/tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_GetSessionHandle.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "GetSessionHandle"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt
index 28488ac79a..ba89942d77 100644
--- a/tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_GetSessionHandleV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "GetSessionHandleV2"
+  endpoint {
+    name: "GetSessionHandle"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt
index f733b277ea..d48c2224f6 100644
--- a/tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_HashTable.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "HashTable"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt
index 74fda0380a..38cc5818d3 100644
--- a/tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_HashTableV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "HashTableV2"
+  endpoint {
+    name: "HashTable"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt
index 1e1314213d..42fe85a567 100644
--- a/tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IdentityReader.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "IdentityReader"
-  endpoint {
-    name: "io.IdentityReader"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt
index 2973807a9c..8081ac26b3 100644
--- a/tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IdentityReaderV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "IdentityReaderV2"
   endpoint {
-    name: "io.IdentityReaderV2"
+    name: "io.IdentityReader"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt b/tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt
index 1cca92e0ea..bf90fd0f81 100644
--- a/tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_InTopK.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "InTopK"
-  endpoint {
-    name: "nn.InTopK"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt
index 3dc9bff289..400ee71462 100644
--- a/tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_InTopKV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "InTopKV2"
   endpoint {
-    name: "nn.InTopKV2"
+    name: "nn.InTopK"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt
index 49496f29a0..30e3d66bfe 100644
--- a/tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTable.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "InitializeTable"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt
index 8cc206b0f0..786e22cd47 100644
--- a/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFile.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "InitializeTableFromTextFile"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt
index 2a665348a7..9a4f702201 100644
--- a/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTableFromTextFileV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "InitializeTableFromTextFileV2"
+  endpoint {
+    name: "InitializeTableFromTextFile"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt
index 8d9bec2f2f..d7a9a813d0 100644
--- a/tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_InitializeTableV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "InitializeTableV2"
+  endpoint {
+    name: "InitializeTable"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt b/tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt
index 7d4b5e6328..0f4d9967c3 100644
--- a/tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Iterator.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "Iterator"
-  endpoint {
-    name: "data.Iterator"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt
index b2fe71a0c1..0a4e443cde 100644
--- a/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandle.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "IteratorFromStringHandle"
-  endpoint {
-    name: "data.IteratorFromStringHandle"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt
index c77959f34c..86745a3a56 100644
--- a/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorFromStringHandleV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "IteratorFromStringHandleV2"
   endpoint {
-    name: "data.IteratorFromStringHandleV2"
+    name: "data.IteratorFromStringHandle"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt
index 7892b096fe..87720d441b 100644
--- a/tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_IteratorV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "IteratorV2"
   endpoint {
-    name: "data.IteratorV2"
+    name: "data.Iterator"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt
index 49637da997..29885222a4 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableExport.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "LookupTableExport"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt
index fce8a6e3cc..d780f2a21d 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableExportV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "LookupTableExportV2"
+  endpoint {
+    name: "LookupTableExport"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt
index f8637744ae..23f7facaa2 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableFind.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "LookupTableFind"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt
index cb78732e9c..2247547b62 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableFindV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "LookupTableFindV2"
+  endpoint {
+    name: "LookupTableFind"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt
index 4cf7971f56..f87ea9c073 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableImport.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "LookupTableImport"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt
index 3216346961..a39cffa12d 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableImportV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "LookupTableImportV2"
+  endpoint {
+    name: "LookupTableImport"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt
index 51fe22ba60..a45b3f52a5 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableInsert.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "LookupTableInsert"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt
index 429bf25b0a..037b743b6b 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableInsertV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "LookupTableInsertV2"
+  endpoint {
+    name: "LookupTableInsert"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt
index 274dd236e3..61f6d8db36 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableRemoveV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "LookupTableRemoveV2"
+  endpoint {
+    name: "LookupTableRemove"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt
index 70329e9e90..391dc5dfad 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableSize.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "LookupTableSize"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt
index d5681c3fa0..ad646e25a6 100644
--- a/tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_LookupTableSizeV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "LookupTableSizeV2"
+  endpoint {
+    name: "LookupTableSize"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt
index b8f23009d7..cb96bf63d8 100644
--- a/tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDataset.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "MapAndBatchDataset"
-  endpoint {
-    name: "data.MapAndBatchDataset"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt
index a0bc306c52..b29c21888f 100644
--- a/tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MapAndBatchDatasetV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "MapAndBatchDatasetV2"
+  endpoint {
+    name: "data.MapAndBatchDataset"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt
index 17c17b5699..5ebc9e6a6f 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPool.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "MaxPool"
-  endpoint {
-    name: "nn.MaxPool"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt
index c8b783ee7a..9ad85fa08e 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGrad.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "MaxPoolGrad"
-  endpoint {
-    name: "nn.MaxPoolGrad"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt
index 2dd7fdf229..3375ebc77d 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGrad.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "MaxPoolGradGrad"
-  endpoint {
-    name: "nn.MaxPoolGradGrad"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt
index e2cf7927ca..2ca8a7b022 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradGradV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "MaxPoolGradGradV2"
   endpoint {
-    name: "nn.MaxPoolGradGradV2"
+    name: "nn.MaxPoolGradGrad"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt
index 1e47cd13e3..556dd0be50 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolGradV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "MaxPoolGradV2"
   endpoint {
-    name: "nn.MaxPoolGradV2"
+    name: "nn.MaxPoolGrad"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt
index bd885135d5..8463494355 100644
--- a/tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MaxPoolV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "MaxPoolV2"
   endpoint {
-    name: "nn.MaxPoolV2"
+    name: "nn.MaxPool"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt
index 15803468ff..d7494815d8 100644
--- a/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTable.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "MutableDenseHashTable"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt
index 03fdeddb95..d1f7f26848 100644
--- a/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MutableDenseHashTableV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "MutableDenseHashTableV2"
+  endpoint {
+    name: "MutableDenseHashTable"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt
index 1a80874495..c446ff8b27 100644
--- a/tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTable.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "MutableHashTable"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt
index bf2fa065dc..76df883d7d 100644
--- a/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensors.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "MutableHashTableOfTensors"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt
index 53d780e925..f6d7451267 100644
--- a/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTableOfTensorsV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "MutableHashTableOfTensorsV2"
+  endpoint {
+    name: "MutableHashTableOfTensors"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt
index 99007df253..45d619d674 100644
--- a/tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MutableHashTableV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "MutableHashTableV2"
+  endpoint {
+    name: "MutableHashTable"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt
index 93988914aa..f89cd10643 100644
--- a/tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_MutexV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "MutexV2"
+  endpoint {
+    name: "Mutex"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt
index d7156b0a3a..49ac0de4ce 100644
--- a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppression.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "NonMaxSuppression"
-  endpoint {
-    name: "image.NonMaxSuppression"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt
index d78eb9745d..fabf5c6215 100644
--- a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV2.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "NonMaxSuppressionV2"
-  endpoint {
-    name: "image.NonMaxSuppressionV2"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt
index e3fde0a566..0aefcb5509 100644
--- a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV3.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "NonMaxSuppressionV3"
-  endpoint {
-    name: "image.NonMaxSuppressionV3"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt
index 98776e8e7f..e71de7f4a6 100644
--- a/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_NonMaxSuppressionV4.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "NonMaxSuppressionV4"
   endpoint {
-    name: "image.NonMaxSuppressionV4"
+    name: "image.NonMaxSuppression"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Pad.pbtxt b/tensorflow/core/api_def/java_api/api_def_Pad.pbtxt
index a9de5541ac..f83f451552 100644
--- a/tensorflow/core/api_def/java_api/api_def_Pad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Pad.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Pad"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt
index 1554cd0b64..ffc1216456 100644
--- a/tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PadV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "PadV2"
+  endpoint {
+    name: "Pad"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt
index 2d734539da..7cec77427c 100644
--- a/tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDataset.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "PaddedBatchDataset"
-  endpoint {
-    name: "data.PaddedBatchDataset"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt
index d4b85550e9..22dfe84f0c 100644
--- a/tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PaddedBatchDatasetV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "PaddedBatchDatasetV2"
   endpoint {
-    name: "data.PaddedBatchDatasetV2"
+    name: "data.PaddedBatchDataset"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
index 952516cfff..03db4bf185 100644
--- a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueue.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "PaddingFIFOQueue"
-  endpoint {
-    name: "io.PaddingFifoQueue"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
index 80c53e7ee9..605025be79 100644
--- a/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PaddingFIFOQueueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "PaddingFIFOQueueV2"
   endpoint {
-    name: "io.PaddingFifoQueueV2"
+    name: "io.PaddingFifoQueue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt
index 3ccc2a6bf7..6a985d24fa 100644
--- a/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDataset.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ParallelInterleaveDataset"
-  endpoint {
-    name: "data.ParallelInterleaveDataset"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt
index 56b05cc2f6..6c74149a6c 100644
--- a/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ParallelInterleaveDatasetV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "ParallelInterleaveDatasetV2"
+  endpoint {
+    name: "ParallelInterleaveDataset"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt b/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
index 5e6daa2ae4..efea7bd2b1 100644
--- a/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Placeholder"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
index 39012d4aa2..a61f6f4d1e 100644
--- a/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "PlaceholderV2"
+  endpoint {
+    name: "Placeholder"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Print.pbtxt b/tensorflow/core/api_def/java_api/api_def_Print.pbtxt
index 5b837135fd..21a085a1c2 100644
--- a/tensorflow/core/api_def/java_api/api_def_Print.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Print.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Print"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt
index c6e406d08f..d1e4d74b1e 100644
--- a/tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PrintV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "PrintV2"
+  endpoint {
+    name: "Print"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt
index cee973139d..0a9909d122 100644
--- a/tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PriorityQueue.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "PriorityQueue"
-  endpoint {
-    name: "io.PriorityQueue"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt
index 27e89f92be..1f6a6f2906 100644
--- a/tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PriorityQueueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "PriorityQueueV2"
   endpoint {
-    name: "io.PriorityQueueV2"
+    name: "io.PriorityQueue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt
index fe8401d8f9..e6ba0ce8b8 100644
--- a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantize.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QuantizeAndDequantize"
-  endpoint {
-    name: "quantization.QuantizeAndDequantize"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt
index 0535993d40..678a77113c 100644
--- a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV2.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QuantizeAndDequantizeV2"
-  endpoint {
-    name: "quantization.QuantizeAndDequantize"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt
index d056b320c8..c9e52e770a 100644
--- a/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeAndDequantizeV3.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QuantizeAndDequantizeV3"
   endpoint {
-    name: "quantization.QuantizeAndDequantizeV3"
+    name: "quantization.QuantizeAndDequantize"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt
index d04a01726f..25c9c3bdce 100644
--- a/tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QuantizeV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QuantizeV2"
   endpoint {
-    name: "quantization.QuantizeV2"
+    name: "quantization.Quantize"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt
index 70e559c773..4a6bada741 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueClose.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueClose"
-  endpoint {
-    name: "io.QueueClose"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt
index 01460f27e2..ce779650e5 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueCloseV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueCloseV2"
   endpoint {
-    name: "io.QueueCloseV2"
+    name: "io.QueueClose"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt
index 9cd77b4ca3..45c811a6b4 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeue.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueDequeue"
-  endpoint {
-    name: "io.QueueDequeue"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt
index ceb2e82394..9e088ef258 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueMany.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueDequeueMany"
-  endpoint {
-    name: "io.QueueDequeueMany"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt
index ac27fcb620..10fe198ff2 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueManyV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueDequeueManyV2"
   endpoint {
-    name: "io.QueueDequeueManyV2"
+    name: "io.QueueDequeueMany"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt
index 657c63363f..b96e568c41 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpTo.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueDequeueUpTo"
-  endpoint {
-    name: "io.QueueDequeueUpTo"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt
index 19b3fff653..fadea0926b 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueUpToV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueDequeueUpToV2"
   endpoint {
-    name: "io.QueueDequeueUpToV2"
+    name: "io.QueueDequeueUpTo"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt
index 8aca207816..7ba03afbfe 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueDequeueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueDequeueV2"
   endpoint {
-    name: "io.QueueDequeueV2"
+    name: "io.QueueDequeue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt
index e516dab297..2945c46d6e 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueue.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueEnqueue"
-  endpoint {
-    name: "io.QueueEnqueue"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt
index b9c0e4fd84..442ddcbc03 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueMany.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueEnqueueMany"
-  endpoint {
-    name: "io.QueueEnqueueMany"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt
index 153700a646..be3fed4789 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueManyV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueEnqueueManyV2"
   endpoint {
-    name: "io.QueueEnqueueManyV2"
+    name: "io.QueueEnqueueMany"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt
index 68bacd7b5b..e71a2211e1 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueEnqueueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueEnqueueV2"
   endpoint {
-    name: "io.QueueEnqueueV2"
+    name: "io.QueueEnqueue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt
index 86914c7124..0b51b208b7 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueIsClosed.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueIsClosed"
-  endpoint {
-    name: "io.QueueIsClosed"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt
index ce33b2498b..148d313a6d 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueIsClosedV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueIsClosedV2"
   endpoint {
-    name: "io.QueueIsClosedV2"
+    name: "io.QueueIsClosed"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt
index 5592d58f9f..9bd7244d68 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueSize.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "QueueSize"
-  endpoint {
-    name: "io.QueueSize"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt
index 68364aa605..e93e07a2b3 100644
--- a/tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_QueueSizeV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "QueueSizeV2"
   endpoint {
-    name: "io.QueueSizeV2"
+    name: "io.QueueSize"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt
index 42ce1a5fb1..d1ea795024 100644
--- a/tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RandomPoisson.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "RandomPoisson"
-  endpoint {
-    name: "random.RandomPoisson"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt
index adc5441abc..5efe01bf40 100644
--- a/tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RandomPoissonV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "RandomPoissonV2"
   endpoint {
-    name: "random.RandomPoissonV2"
+    name: "random.RandomPoisson"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt
index f622eb4e0d..9660121a07 100644
--- a/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueue.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "RandomShuffleQueue"
-  endpoint {
-    name: "io.RandomShuffleQueue"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt
index c88c2a4631..779363303c 100644
--- a/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RandomShuffleQueueV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "RandomShuffleQueueV2"
   endpoint {
-    name: "io.RandomShuffleQueueV2"
+    name: "io.RandomShuffleQueue"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt
index 67dbe1035e..b087d11182 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProduced.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ReaderNumRecordsProduced"
-  endpoint {
-    name: "io.ReaderNumRecordsProduced"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt
index 5063706eb0..54a30abe18 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumRecordsProducedV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ReaderNumRecordsProducedV2"
   endpoint {
-    name: "io.ReaderNumRecordsProducedV2"
+    name: "io.ReaderNumRecordsProduced"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt
index 63bb2d4108..e30e97fd08 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompleted.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ReaderNumWorkUnitsCompleted"
-  endpoint {
-    name: "io.ReaderNumWorkUnitsCompleted"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt
index cf72ecc562..0904ba19e5 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderNumWorkUnitsCompletedV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ReaderNumWorkUnitsCompletedV2"
   endpoint {
-    name: "io.ReaderNumWorkUnitsCompletedV2"
+    name: "io.ReaderNumWorkUnitsCompleted"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt
index 47e9679f80..8f98d88bda 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderRead.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ReaderRead"
-  endpoint {
-    name: "io.ReaderRead"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt
index 985d0035b0..d418b00b27 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpTo.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ReaderReadUpTo"
-  endpoint {
-    name: "io.ReaderReadUpTo"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt
index a001349e2d..777d09fa2c 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReadUpToV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ReaderReadUpToV2"
   endpoint {
-    name: "io.ReaderReadUpToV2"
+    name: "io.ReaderReadUpTo"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt
index a64349d15e..a5d45bd1db 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReadV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ReaderReadV2"
   endpoint {
-    name: "io.ReaderReadV2"
+    name: "io.ReaderRead"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt
index 9a387753f5..e6041caabd 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderReset.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ReaderReset"
-  endpoint {
-    name: "io.ReaderReset"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt
index 6e31d5a8ff..265a3442f5 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderResetV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ReaderResetV2"
   endpoint {
-    name: "io.ReaderResetV2"
+    name: "io.ReaderReset"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt
index d148adde6a..0aa0ec595d 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreState.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ReaderRestoreState"
-  endpoint {
-    name: "io.ReaderRestoreState"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt
index 3d78d8eb00..4728ce7796 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderRestoreStateV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ReaderRestoreStateV2"
   endpoint {
-    name: "io.ReaderRestoreStateV2"
+    name: "io.ReaderRestoreState"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt
index fd12a4c784..5e23e285fb 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeState.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ReaderSerializeState"
-  endpoint {
-    name: "io.ReaderSerializeState"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt
index 28c5048fc9..aa396095b1 100644
--- a/tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ReaderSerializeStateV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ReaderSerializeStateV2"
   endpoint {
-    name: "io.ReaderSerializeStateV2"
+    name: "io.ReaderSerializeState"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt
index db64e86fd4..61bec5bb10 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrl.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ResourceApplyFtrl"
-  endpoint {
-    name: "train.ResourceApplyFtrl"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt
index 547041e649..8209fd607e 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceApplyFtrlV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceApplyFtrlV2"
   endpoint {
-    name: "train.ResourceApplyFtrlV2"
+    name: "train.ResourceApplyFtrl"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt
index 195198fc9a..2e6fed9469 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrl.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "ResourceSparseApplyFtrl"
-  endpoint {
-    name: "train.ResourceSparseApplyFtrl"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt
index 49c6d34dcc..cd126d78ab 100644
--- a/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_ResourceSparseApplyFtrlV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "ResourceSparseApplyFtrlV2"
   endpoint {
-    name: "train.ResourceSparseApplyFtrlV2"
+    name: "train.ResourceSparseApplyFtrl"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Restore.pbtxt b/tensorflow/core/api_def/java_api/api_def_Restore.pbtxt
index ca79fc10db..5e5b021b08 100644
--- a/tensorflow/core/api_def/java_api/api_def_Restore.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Restore.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "Restore"
-  endpoint {
-    name: "train.Restore"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt
index e877ff9869..909968873f 100644
--- a/tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_RestoreV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "RestoreV2"
   endpoint {
-    name: "train.RestoreV2"
+    name: "train.Restore"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt b/tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt
index 2bd25417aa..d2a199d2fc 100644
--- a/tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Reverse.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Reverse"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt
index e7c1c90ea6..3dffd53b05 100644
--- a/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBox.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "SampleDistortedBoundingBox"
-  endpoint {
-    name: "image.SampleDistortedBoundingBox"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
index 8656977bf6..6557314137 100644
--- a/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SampleDistortedBoundingBoxV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SampleDistortedBoundingBoxV2"
   endpoint {
-    name: "image.SampleDistortedBoundingBoxV2"
+    name: "image.SampleDistortedBoundingBox"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Save.pbtxt b/tensorflow/core/api_def/java_api/api_def_Save.pbtxt
index 87dab6dc1f..36d44001d5 100644
--- a/tensorflow/core/api_def/java_api/api_def_Save.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Save.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "Save"
-  endpoint {
-    name: "train.Save"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt
index 6417252f45..644d1824aa 100644
--- a/tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SaveV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SaveV2"
   endpoint {
-    name: "train.SaveV2"
+    name: "train.Save"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt
index 68eb8cf1f0..fab6393f60 100644
--- a/tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizer.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "SdcaOptimizer"
-  endpoint {
-    name: "train.SdcaOptimizer"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt
index 4e90531060..b72ee64e50 100644
--- a/tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SdcaOptimizerV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SdcaOptimizerV2"
   endpoint {
-    name: "train.SdcaOptimizerV2"
+    name: "train.SdcaOptimizer"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt
index 552d397ad8..dc25ae9de2 100644
--- a/tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEig.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "SelfAdjointEig"
-  endpoint {
-    name: "linalg.SelfAdjointEig"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt
index 4b610f437c..c79f08ac32 100644
--- a/tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SelfAdjointEigV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SelfAdjointEigV2"
   endpoint {
-    name: "linalg.SelfAdjointEigV2"
+    name: "linalg.SelfAdjointEig"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt
index bb669aa773..e961fb7f6b 100644
--- a/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrl.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "SparseApplyFtrl"
-  endpoint {
-    name: "train.SparseApplyFtrl"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt
index 673f71c34b..43b9833451 100644
--- a/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_SparseApplyFtrlV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "SparseApplyFtrlV2"
   endpoint {
-    name: "train.SparseApplyFtrlV2"
+    name: "train.SparseApplyFtrl"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Stack.pbtxt b/tensorflow/core/api_def/java_api/api_def_Stack.pbtxt
index 522e5efec5..8370beee63 100644
--- a/tensorflow/core/api_def/java_api/api_def_Stack.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Stack.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Stack"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt
index 1cba682de9..ac3c410c1c 100644
--- a/tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StackClose.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "StackClose"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt
index be5a0f535a..28aff9e191 100644
--- a/tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StackCloseV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "StackCloseV2"
+  endpoint {
+    name: "StackClose"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt
index 4439e73843..b8658ecbad 100644
--- a/tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StackPop.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "StackPop"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt
index b8babb4c56..d2ecf4e5a8 100644
--- a/tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StackPopV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "StackPopV2"
+  endpoint {
+    name: "StackPop"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt
index 41792e327f..d08fa27b21 100644
--- a/tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StackPush.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "StackPush"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt
index 4920152259..519fd6c6b2 100644
--- a/tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StackPushV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "StackPushV2"
+  endpoint {
+    name: "StackPush"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt
index 5232e0425e..725e469a03 100644
--- a/tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StackV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "StackV2"
+  endpoint {
+    name: "Stack"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt
index f16e196a6f..0e6d1851df 100644
--- a/tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StringSplit.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "StringSplit"
-  endpoint {
-    name: "strings.StringSplit"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt
index 0779eb7f71..18c71d6bd7 100644
--- a/tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_StringSplitV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "StringSplitV2"
   endpoint {
-    name: "strings.StringSplitV2"
+    name: "strings.StringSplit"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
index c93d53fa1a..9ffbeba0ec 100644
--- a/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordReader.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "TFRecordReader"
-  endpoint {
-    name: "io.TfRecordReader"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
index 84df23f6e2..7d252e4942 100644
--- a/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TFRecordReaderV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "TFRecordReaderV2"
   endpoint {
-    name: "io.TfRecordReaderV2"
+    name: "io.TfRecordReader"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt
index 7eaa468130..e315486af2 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArray.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArray"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt
index e866250d3a..951ace8005 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayClose.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayClose"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt
index 5add953e66..6fd2d02592 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayCloseV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt
index b881089718..4a06577979 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayCloseV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayCloseV3"
+  endpoint {
+    name: "TensorArrayClose"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt
index e72b58de1c..f507111093 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcat.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayConcat"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt
index abc3e60f0c..e92cebf2a7 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayConcatV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt
index d1a91e90d7..34d09c901a 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayConcatV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayConcatV3"
+  endpoint {
+    name: "TensorArrayConcat"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt
index d4d179874f..95866b9778 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGather.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayGather"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt
index dd94fc4ef5..f75b50c667 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayGatherV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt
index b8f01e6ac1..b792ee9882 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGatherV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayGatherV3"
+  endpoint {
+    name: "TensorArrayGather"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt
index 517461edba..beb9b5ca12 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGrad.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayGrad"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt
index b7278b5ffa..41d25a4910 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayGradV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt
index b7c3f143ef..2c9adebd04 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayGradV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayGradV3"
+  endpoint {
+    name: "TensorArrayGrad"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt
index 1b62f7fac7..72704746a5 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayRead.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayRead"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt
index ba02f61028..43cd0a2b78 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayReadV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt
index c30428c23a..e6d38d64df 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayReadV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayReadV3"
+  endpoint {
+    name: "TensorArrayRead"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt
index a3e8d1625e..76092a45ed 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatter.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayScatter"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt
index 38080410e6..7dba0fab4c 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayScatterV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt
index 6f42524af8..179c9611f5 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayScatterV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayScatterV3"
+  endpoint {
+    name: "TensorArrayScatter"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt
index fb3a6fae1c..fb2be098c6 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySize.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArraySize"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt
index 03bf061f8b..8e8e44cfe2 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArraySizeV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt
index 0c14360641..2df9a2d3f1 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySizeV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArraySizeV3"
+  endpoint {
+    name: "TensorArraySize"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt
index 3eb8d6c7ff..105031eb98 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySplit.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArraySplit"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt
index 34740aa2ef..ef5d88832a 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArraySplitV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt
index 4b22f2bdf6..721af074d0 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArraySplitV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArraySplitV3"
+  endpoint {
+    name: "TensorArraySplit"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt
index d18517725e..43a441a071 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt
index 06e65ef93c..2b87617a1f 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayV3"
+  endpoint {
+    name: "TensorArray"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt
index 92ab1764ec..2462dae80d 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayWrite.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayWrite"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt
index 10c505cff4..9f670ae181 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV2.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "TensorArrayWriteV2"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt
index 2fe2d02127..7321057b2f 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorArrayWriteV3.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "TensorArrayWriteV3"
+  endpoint {
+    name: "TensorArrayWrite"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt
index a720ca9076..165478d3a0 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorSummary.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "TensorSummary"
-  endpoint {
-    name: "summary.TensorSummary"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt
index ba5131c85e..c285ada012 100644
--- a/tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TensorSummaryV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "TensorSummaryV2"
   endpoint {
-    name: "summary.TensorSummaryV2"
+    name: "summary.TensorSummary"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt
index 17eb20cffe..f86b15cf86 100644
--- a/tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TextLineReader.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "TextLineReader"
-  endpoint {
-    name: "io.TextLineReader"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt
index 745c85fac9..ee57dd8408 100644
--- a/tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TextLineReaderV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "TextLineReaderV2"
   endpoint {
-    name: "io.TextLineReaderV2"
+    name: "io.TextLineReader"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TopK.pbtxt b/tensorflow/core/api_def/java_api/api_def_TopK.pbtxt
index 108701adf1..bb090aa6f1 100644
--- a/tensorflow/core/api_def/java_api/api_def_TopK.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TopK.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "TopK"
-  endpoint {
-    name: "nn.TopK"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt
index 2aa89adcce..2b0dcf7c2a 100644
--- a/tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_TopKV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "TopKV2"
   endpoint {
-    name: "nn.TopKV2"
+    name: "nn.TopK"
   }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Unique.pbtxt b/tensorflow/core/api_def/java_api/api_def_Unique.pbtxt
index 5c4262b5fb..8cc8ec0fed 100644
--- a/tensorflow/core/api_def/java_api/api_def_Unique.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Unique.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Unique"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt
index 4d43500a0d..6fe22cb102 100644
--- a/tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_UniqueV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "UniqueV2"
+  endpoint {
+    name: "Unique"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt b/tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt
index 1ce5314157..0248fab17e 100644
--- a/tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_UniqueWithCounts.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "UniqueWithCounts"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt
index abfd496b8e..eb15745114 100644
--- a/tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_UniqueWithCountsV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "UniqueWithCountsV2"
+  endpoint {
+    name: "UniqueWithCounts"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_Variable.pbtxt b/tensorflow/core/api_def/java_api/api_def_Variable.pbtxt
index abc2f9c954..0978e61451 100644
--- a/tensorflow/core/api_def/java_api/api_def_Variable.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Variable.pbtxt
@@ -1,3 +1,4 @@
 op {
   graph_op_name: "Variable"
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt
index 221a80f760..c566dd1e79 100644
--- a/tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_VariableV2.pbtxt
@@ -1,3 +1,6 @@
 op {
   graph_op_name: "VariableV2"
+  endpoint {
+    name: "Variable"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt b/tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt
index 67a8933efb..aa839ed380 100644
--- a/tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_WholeFileReader.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "WholeFileReader"
-  endpoint {
-    name: "io.WholeFileReader"
-  }
+  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt
index 172268a122..e031d705fb 100644
--- a/tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_WholeFileReaderV2.pbtxt
@@ -1,6 +1,6 @@
 op {
   graph_op_name: "WholeFileReaderV2"
   endpoint {
-    name: "io.WholeFileReaderV2"
+    name: "io.WholeFileReader"
   }
 }
-- 
GitLab


From 937ec3a54b5a6d35a61e690b24c040bb52fc9a9d Mon Sep 17 00:00:00 2001
From: Karl Lessard <karl@kubx.ca>
Date: Thu, 29 Nov 2018 00:10:31 -0500
Subject: [PATCH 051/873] Replace PlaceholderV2 by Placeholder, since V2 is not
 supported anymore.

---
 tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt   | 1 -
 tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt b/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
index efea7bd2b1..5e6daa2ae4 100644
--- a/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Placeholder.pbtxt
@@ -1,4 +1,3 @@
 op {
   graph_op_name: "Placeholder"
-  visibility: SKIP
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
index a61f6f4d1e..419bdf10f7 100644
--- a/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_PlaceholderV2.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "PlaceholderV2"
-  endpoint {
-    name: "Placeholder"
-  }
+  visibility: SKIP
 }
-- 
GitLab


From 797c781499552cdd6790be0e73e06811fce78be7 Mon Sep 17 00:00:00 2001
From: Karl Lessard <karl@kubx.ca>
Date: Thu, 29 Nov 2018 00:15:55 -0500
Subject: [PATCH 052/873] Restore Add over AddV2, since gradients are not
 computed for the later.

---
 tensorflow/core/api_def/java_api/api_def_Add.pbtxt   | 4 +++-
 tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt | 4 +---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/api_def/java_api/api_def_Add.pbtxt b/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
index b082b055bf..4f78ccc9ea 100644
--- a/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_Add.pbtxt
@@ -1,4 +1,6 @@
 op {
   graph_op_name: "Add"
-  visibility: SKIP
+  endpoint {
+    name: "math.Add"
+  }
 }
diff --git a/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt b/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
index 863ec4f33c..a070c6a519 100644
--- a/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
+++ b/tensorflow/core/api_def/java_api/api_def_AddV2.pbtxt
@@ -1,6 +1,4 @@
 op {
   graph_op_name: "AddV2"
-  endpoint {
-    name: "math.Add"
-  }
+  visibility: SKIP
 }
-- 
GitLab


From d00013e72cd3a1a4805395eb8e66748dcf387295 Mon Sep 17 00:00:00 2001
From: Wen yun <shiqing.fsq@alibaba-inc.com>
Date: Wed, 31 Oct 2018 15:22:14 +0800
Subject: [PATCH 053/873] fix the case when input value are MirroredVariable
 for assign_moving_average

---
 .../distribute/python/moving_averages_test.py | 20 +++++++++++++++++++
 .../python/distribute/mirrored_strategy.py    |  3 +++
 2 files changed, 23 insertions(+)

diff --git a/tensorflow/contrib/distribute/python/moving_averages_test.py b/tensorflow/contrib/distribute/python/moving_averages_test.py
index c492d8bafc..da3353b2d5 100644
--- a/tensorflow/contrib/distribute/python/moving_averages_test.py
+++ b/tensorflow/contrib/distribute/python/moving_averages_test.py
@@ -139,6 +139,26 @@ class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase):
            (2.0 * 0.25 + 0.0) / (1.0 * 0.25 + 1.0)],
           var.eval())
 
+  @combinations.generate(all_combinations)
+  def testAssignVariable(self, distribution):
+    def replica_fn():
+      var = variables.Variable([10.0, 11.0])
+      # Here we expect to check the case when input value are variable.
+      val = variables.Variable([1., 2.])
+      decay = 0.25
+      assign = moving_averages.assign_moving_average(
+          var, val, decay, zero_debias=False)
+      return var, assign
+
+    with distribution.scope(), self.cached_session() as sess:
+      var, assign = distribution.call_for_each_replica(replica_fn)
+      variables.global_variables_initializer().run()
+      self.assertAllClose([10.0, 11.0], var.eval())
+      sess.run(distribution.unwrap(assign))
+      self.assertAllClose(
+          [10 * 0.25 + 1. * (1 - 0.25),
+           11 * 0.25 + 2. * (1 - 0.25)],
+          var.eval())
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index d6d40df5ce..3cd5cf09c0 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -598,6 +598,9 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     return self._cross_device_ops
 
   def _reduce_to(self, reduce_op, value, destinations):
+    if (isinstance(value, values.Mirrored) and
+        reduce_op == reduce_util.ReduceOp.MEAN):
+      return value
     assert not isinstance(value, values.Mirrored)
     if not isinstance(value, values.DistributedValues):
       # This function handles reducing values that are not PerReplica or
-- 
GitLab


From 33f3b46e1b209be6a64f53562fd4456352c878ee Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Fri, 30 Nov 2018 10:55:15 +0800
Subject: [PATCH 054/873] [Intel MKL] Enable MKL LeakyRelu OP

---
 tensorflow/core/graph/mkl_layout_pass.cc      |  48 ++++++
 tensorflow/core/graph/mkl_layout_pass_test.cc |  79 +++++++++
 tensorflow/core/kernels/mkl_relu_op.cc        | 150 ++++++++++++++----
 tensorflow/core/ops/nn_ops.cc                 |  35 +++-
 4 files changed, 282 insertions(+), 30 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 69735aac02..8d7ddbd0c3 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -258,6 +258,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     csinfo_.fused_batch_norm = "FusedBatchNorm";
     csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
     csinfo_.identity = "Identity";
+    csinfo_.leakyrelu = "LeakyRelu";
+    csinfo_.leakyrelu_grad = "LeakyReluGrad";
     csinfo_.lrn = "LRN";
     csinfo_.lrn_grad = "LRNGrad";
     csinfo_.matmul = "MatMul";
@@ -381,6 +383,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     rinfo_.push_back({csinfo_.lrn_grad,
                       mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
                       CopyAttrsLRN, LrnGradRewrite});
+    rinfo_.push_back({csinfo_.leakyrelu,
+                      mkl_op_registry::GetMklOpName(csinfo_.leakyrelu),
+                      CopyAttrsLeakyRelu, LeakyReluRewrite});
+    rinfo_.push_back({csinfo_.leakyrelu_grad,
+                      mkl_op_registry::GetMklOpName(csinfo_.leakyrelu_grad),
+                      CopyAttrsLeakyRelu, LeakyReluRewrite});
     rinfo_.push_back({csinfo_.max_pool,
                       mkl_op_registry::GetMklOpName(csinfo_.max_pool),
                       CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
@@ -584,6 +592,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string fused_batch_norm;
     string fused_batch_norm_grad;
     string identity;
+    string leakyrelu;
+    string leakyrelu_grad;
     string lrn;
     string lrn_grad;
     string matmul;
@@ -891,6 +901,29 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     return do_rewrite;
   }
 
+  // To compute LeakyRelu MKL DNN uses (feature), if feature > 0
+  // otherwise it uses (feature * alpha)
+  // while Tensorflow uses max(feature, feature * alpha) to compute LeakyRelu.
+  // These two algorithm are not consistent when alpha > 1
+  // so only LeakyRelu is written to MKL OP when alpha < 1
+  static bool LeakyReluRewrite(const Node* n) {
+    CHECK_NOTNULL(n);
+
+    float alpha;
+    CHECK_EQ(GetNodeAttr(n->def(), "alpha", &alpha).ok(), true);
+
+    // If the alpha of LeakyRelu is less than 1, rewrite the node.
+    // Otherwise eigen node is used instead.
+    if (alpha < 1) {
+      return true;
+    }
+    VLOG(1) << "LeakyReluRewrite: The model sets alpha is not less than 1 "
+            << "which case is not optimized by Intel MKL, thus using Eigen op"
+            << "for LeakyRelu ";
+
+    return false;
+  }
+
   static bool MaxpoolGradRewrite(const Node* n) {
     CHECK_NOTNULL(n);
     bool do_rewrite = false;
@@ -1078,6 +1111,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsLeakyRelu(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsQuantizedPooling(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsQuantizedConv2D(const Node* orig_node, NodeBuilder* nb);
@@ -1663,6 +1697,20 @@ void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node,
   nb->Attr("beta", beta);
 }
 
+void MklLayoutRewritePass::CopyAttrsLeakyRelu(const Node* orig_node,
+                                              NodeBuilder* nb) {
+  DataType T;
+  float alpha;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("alpha", alpha);
+}
+
 void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node,
                                             NodeBuilder* nb) {
   DataType T;
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 7e2d1f7878..f815838a89 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -960,6 +960,85 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Relu6Relu6Grad_Positive) {
             "DMT/_1->C:2");
 }
 
+TEST_F(MklLayoutPassTest, NodeRewrite_LeakyRelu_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LeakyRelu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'            value { f: 0.1 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(_MklLeakyRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;"
+            "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_LeakyRelu_Negative) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LeakyRelu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'            value { f: 2.0 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'B'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(LeakyRelu);C(Zeta)|A->B;A->C;B->C:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'LeakyReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'            value { f: 0.1 } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);"
+            "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;"
+            "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluGrad_Negative) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'LeakyReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'            value { f: 2.0 } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(
+      DoMklLayoutOptimizationPass(),
+      "A(Input);B(Input);C(LeakyReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1");
+}
+
+TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluLeakyReluGrad_Positive) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'LeakyRelu'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'            value { f: 0.1 } }"
+      " input: ['A'] }"
+      "node { name: 'C' op: 'LeakyReluGrad'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'alpha'            value { f: 0.1 } }"
+      " input: ['A', 'B'] }"
+      "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['A', 'C'] }");
+  EXPECT_EQ(
+      DoMklLayoutOptimizationPass(),
+      "A(Input);B(_MklLeakyRelu);C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);"
+      "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;"
+      "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;"
+      "DMT/_1->C:2");
+}
+
 TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 708213648b..2e29eae41b 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -16,12 +16,12 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc.
 #ifdef INTEL_MKL
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
@@ -204,7 +204,7 @@ class MklEltwiseFwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   ~MklEltwiseFwdPrimitiveFactory() {}
 
   static string CreateKey(const MklEltwiseFwdParams<T>& fwdParams,
-                               memory::format src_fmt) {
+                          memory::format src_fmt) {
     string prefix = "eltwise_fwd";
     FactoryKeyCreator key_creator;
     key_creator.AddAsKey(prefix);
@@ -422,8 +422,8 @@ class MklEltwiseBwdPrimitiveFactory : public MklPrimitiveFactory<T> {
 
  private:
   static string CreateKey(const MklEltwiseBwdParams<T>& bwdParams,
-                               const memory::format& src_fmt,
-                               const memory::format& diff_dst_fmt) {
+                          const memory::format& src_fmt,
+                          const memory::format& diff_dst_fmt) {
     string prefix = "eltwise_bwd";
     FactoryKeyCreator key_creator;
     key_creator.AddAsKey(prefix);
@@ -856,9 +856,9 @@ class MklReluOpBase : public OpKernel {
 
       Tensor* dst_tensor = nullptr;
       OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
-                                      {static_cast<const int>(src_index)},
-                                      static_cast<const int>(dst_index),
-                                      tf_shape_dst, &dst_tensor));
+                                  {static_cast<const int>(src_index)},
+                                  static_cast<const int>(dst_index),
+                                  tf_shape_dst, &dst_tensor));
       AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst);
 
       T* dst_data = dst_tensor->flat<T>().data();
@@ -866,19 +866,20 @@ class MklReluOpBase : public OpKernel {
       // execute eltwise
       eltwise_fwd->Execute(src_data, dst_data);
     } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) +
-                         ", in file " + string(__FILE__) + ":" +
-                         std::to_string(__LINE__);
-      OP_REQUIRES_OK(context,
-                     errors::Aborted("Operation received an exception:",
-                        error_msg));
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
+      OP_REQUIRES_OK(
+          context,
+          errors::Aborted("Operation received an exception:", error_msg));
     }
   }
 
  private:
   engine cpu_engine = engine(engine::cpu, 0);
   std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+
+ protected:
   float alpha_;
   float beta_;
 };
@@ -947,11 +948,11 @@ class MklReluGradOpBase : public OpKernel {
         auto diff_dst_tf_data_format =
             MklDnnDataFormatToTFDataFormat(diff_dst_mkl_data_format);
 
-        src_dims = (src_tensor.dims() == 4) 
-                 ? TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
-                                             diff_dst_tf_data_format)
-                 : TFShapeToMklDnnDimsInNCDHW(src_tensor.shape(),
-                                              diff_dst_tf_data_format);
+        src_dims = (src_tensor.dims() == 4)
+                       ? TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
+                                                   diff_dst_tf_data_format)
+                       : TFShapeToMklDnnDimsInNCDHW(src_tensor.shape(),
+                                                    diff_dst_tf_data_format);
         src_md =
             memory::desc(src_dims, MklDnnType<T>(), diff_dst_mkl_data_format);
       } else {
@@ -1001,8 +1002,7 @@ class MklReluGradOpBase : public OpKernel {
       // allocate diff_src tensor
       MklDnnShape dnn_shape_diff_src;
       TensorShape tf_shape_diff_src;
-      if (dnn_shape_src.IsMklTensor() ||
-              dnn_shape_diff_dst.IsMklTensor()) {
+      if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
         auto diff_src_pd = eltwise_bwd_pd->diff_src_primitive_desc();
         dnn_shape_diff_src.SetMklTensor(true);
         dnn_shape_diff_src.SetMklLayout(&diff_src_pd);
@@ -1012,9 +1012,10 @@ class MklReluGradOpBase : public OpKernel {
                                          dnn_shape_src.GetSizesAsMklDnnDims(),
                                          dnn_shape_src.GetTfDataFormat());
         } else {
-          dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(),
-                                 dnn_shape_diff_dst.GetSizesAsMklDnnDims(),
-                                 dnn_shape_diff_dst.GetTfDataFormat());
+          dnn_shape_diff_src.SetTfLayout(
+              dnn_shape_diff_dst.GetDimension(),
+              dnn_shape_diff_dst.GetSizesAsMklDnnDims(),
+              dnn_shape_diff_dst.GetTfDataFormat());
         }
         tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T));
       } else {
@@ -1033,9 +1034,9 @@ class MklReluGradOpBase : public OpKernel {
       // execute eltwise bwd
       eltwise_bwd->Execute(src_data, diff_dst_data, diff_src_data);
     } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
           errors::Aborted("Operation received an exception:", error_msg));
@@ -1045,6 +1046,8 @@ class MklReluGradOpBase : public OpKernel {
  private:
   engine cpu_engine = engine(engine::cpu, 0);
   std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
+
+ protected:
   float alpha_;
   float beta_;
 };
@@ -1312,8 +1315,84 @@ class MklRelu6GradOp
     T* out_o = diff_src_tensor->flat<T>().data();
     T* user_i = const_cast<T*>(src_tensor.flat<T>().data());
     T* user_g = const_cast<T*>(diff_dst_tensor.flat<T>().data());
-    out_o[0] = user_g[0] * user_i[0] > 0 &&
-               (user_i[0] < static_cast<T>(RELU6_UPPER_BOUND));
+    out_o[0] = user_g[0] * (user_i[0] > 0 &&
+                            (user_i[0] < static_cast<T>(RELU6_UPPER_BOUND)));
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklLeakyReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklLeakyReluOp() {}
+
+  explicit MklLeakyReluOp(OpKernelConstruction* context)
+      : MklReluOpBase<Device, T, eltwise_relu>(context, 0.0f, 0.0f) {
+    float alpha;
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
+    OP_REQUIRES(context, alpha < 1,
+                errors::InvalidArgument("MKL LeakyRelu only support alpha < 1. "
+                                        "alpha is: ",
+                                        alpha));
+
+    this->alpha_ = alpha;
+  }
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t src_index = 0;  // index of src input tensor
+    const size_t dst_index = 0;  // index of dst output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    MklDnnShape dnn_shape_src;
+    GetMklShape(context, src_index, &dnn_shape_src);
+
+    Tensor* dst_tensor = nullptr;
+    T* user_i = const_cast<T*>(src_tensor.flat<T>().data());
+    MklDnnShape dnn_shape_dst;
+    dnn_shape_dst.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
+                              src_tensor.shape(), dnn_shape_dst);
+    T* out_o = dst_tensor->flat<T>().data();
+    out_o[0] = std::max(user_i[0], user_i[0] * this->alpha_);
+    return;
+  }
+};
+
+template <typename Device, typename T>
+class MklLeakyReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
+ public:
+  ~MklLeakyReluGradOp() {}
+
+  explicit MklLeakyReluGradOp(OpKernelConstruction* context)
+      : MklReluGradOpBase<Device, T, eltwise_relu>(context, 0.0f, 0.0f) {
+    float alpha;
+    OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
+    OP_REQUIRES(context, alpha < 1,
+                errors::InvalidArgument("MKL LeakyRelu only support alpha < 1. "
+                                        "alpha is: ",
+                                        alpha));
+
+    this->alpha_ = alpha;
+  }
+
+  virtual void Compute_Scalar(OpKernelContext* context) {
+    const size_t diff_dst_index = 0;  // index of diff_dst input tensor
+    const size_t src_index = 1;       // index of src input tensor
+    const size_t diff_src_index = 0;  // index of diff_src output tensor
+    const Tensor& src_tensor = MklGetInput(context, src_index);
+    const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index);
+    Tensor* diff_src_tensor = nullptr;
+
+    MklDnnShape dnn_shape_diff_dst;
+    GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
+
+    MklDnnShape dnn_shape_diff_src;
+    dnn_shape_diff_src.SetMklTensor(false);
+    AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
+                              diff_dst_tensor.shape(), dnn_shape_diff_src);
+    T* out_o = diff_src_tensor->flat<T>().data();
+    T* user_i = const_cast<T*>(src_tensor.flat<T>().data());
+    T* user_g = const_cast<T*>(diff_dst_tensor.flat<T>().data());
+    out_o[0] = user_i[0] > 0 ? user_g[0] : user_g[0] * this->alpha_;
     return;
   }
 };
@@ -1376,6 +1455,19 @@ TF_CALL_float(REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES);
                           MklRelu6GradOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_RELU6_MKL_SUPPORTED_KERNELS_TYPES);
 
+#define REGISTER_LeakyRelu_MKL_SUPPORTED_KERNELS_TYPES(type)        \
+  REGISTER_KERNEL_BUILDER(Name("_MklLeakyRelu")                     \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklLeakyReluOp<CPUDevice, type>);         \
+  REGISTER_KERNEL_BUILDER(Name("_MklLeakyReluGrad")                 \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklLeakyReluGradOp<CPUDevice, type>);
+TF_CALL_float(REGISTER_LeakyRelu_MKL_SUPPORTED_KERNELS_TYPES);
+
 #endif
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index efa84d6c22..ea26f7d2be 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -1915,6 +1915,40 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
+REGISTER_OP("_MklLeakyRelu")
+    .Input("features: T")
+    .Input("mkl_features: uint8")
+    .Output("activations: T")
+    .Output("mkl_activations: uint8")
+    .Attr("T: {half, float, double} = DT_FLOAT")
+    .Attr("alpha: float = 0.2")
+    .SetShapeFn(shape_inference::UnchangedShape)
+    .Doc(R"doc(
+MKL version of LeakyRelu operator. Uses MKL DNN APIs to implement
+LeakyRelu operator.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
+REGISTER_OP("_MklLeakyReluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Input("mkl_gradients: uint8")
+    .Input("mkl_features: uint8")
+    .Output("backprops: T")
+    .Output("mkl_backprops: uint8")
+    .Attr("T: {half, float, double} = DT_FLOAT")
+    .Attr("alpha: float = 0.2")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn)
+    .Doc(R"doc(
+MKL version of LeakyReluGrad operator. Uses MKL DNN APIs to compute rectified
+linear gradients for LeakyReluGrad operation.
+
+NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
+expected to invoke these operators.
+)doc");
+
 REGISTER_OP("_MklElu")
     .Input("features: T")
     .Input("mkl_features: uint8")
@@ -2110,7 +2144,6 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is
 expected to invoke these operators.
 )doc");
 
-
 REGISTER_OP("_MklAvgPool3DGrad")
     .Input("orig_input_shape: int32")
     .Input("grad: T")
-- 
GitLab


From f32d071589507c755f524f9d94ea4ee4174c9498 Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Fri, 30 Nov 2018 11:09:48 +0800
Subject: [PATCH 055/873] Enable reorder cache for MklSlice.

---
 tensorflow/core/kernels/mkl_slice_op.cc | 201 +++++++++++++++++++-----
 1 file changed, 165 insertions(+), 36 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index 85cabeb92b..f32a6003af 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -60,8 +60,10 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 
 // A version of SharedValidation (slice_op.h) written for input that is in
 // either Mkl layout or Tensorflow layout.
-// A shared code to validate input shapes and check for identity, which is not dependent on the type of T.
-// We do this to reduce code size by not duplicating all this for all T (float, double, int32, etc.)
+// A shared code to validate input shapes and check for identity, which is not
+// dependent on the type of T.
+// We do this to reduce code size by not duplicating all this for all T (float,
+// double, int32, etc.)
 static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
                               gtl::InlinedVector<int64, 4>* begin,
                               gtl::InlinedVector<int64, 4>* size) {
@@ -157,13 +159,149 @@ static void CheckCommonCasesForMklInputs(OpKernelContext* context,
   }
 }
 
+// This structure aggregates multiple inputs to Slice methods.
+// Parameters from & to represents memory pointing to reorder.
+// Parameters begin_dims & size_dims represents offset and length
+// passed to view primitive.
+struct MklSliceParams {
+  const memory* from;
+  const memory* to;
+  memory::dims begin_dims;
+  memory::dims size_dims;
+
+  MklSliceParams(const memory* from, const memory* to, memory::dims begin_dims,
+                 memory::dims size_dims)
+      : from(from), to(to), begin_dims(begin_dims), size_dims(size_dims) {}
+};
+
+// This implements the reuse interface of Slice reorders.
+template <typename T>
+class MklSlicePrimitive : public MklPrimitive {
+ public:
+  explicit MklSlicePrimitive(const MklSliceParams& sliceParams) {
+    context_.slice_stream.reset(new stream(stream::kind::eager));
+    Setup(sliceParams);
+  }
+
+  ~MklSlicePrimitive() {}
+
+  void Execute(const MklSliceParams& sliceParams) {
+    context_.src_mem->set_data_handle(sliceParams.from->get_data_handle());
+    context_.dst_mem->set_data_handle(sliceParams.to->get_data_handle());
+    context_.slice_stream->submit(context_.slice_primitives);
+
+    context_.src_mem->set_data_handle(DummyData);
+    context_.dst_mem->set_data_handle(DummyData);
+    return;
+  }
+
+  std::shared_ptr<primitive> GetPrimitive() { return context_.reorder_prim; }
+
+ private:
+  struct SliceContext {
+    std::shared_ptr<mkldnn::memory> src_mem;
+    std::shared_ptr<mkldnn::memory> dst_mem;
+    std::shared_ptr<primitive> reorder_prim;
+    std::shared_ptr<reorder::primitive_desc> reorder_pd;
+    std::shared_ptr<view::primitive_desc> view_pd;
+    std::shared_ptr<mkldnn::stream> slice_stream;
+    std::vector<mkldnn::primitive> slice_primitives;
+    SliceContext()
+        : src_mem(nullptr), dst_mem(nullptr), reorder_prim(nullptr) {}
+  } context_;
+
+  engine cpu_engine_ = engine(engine::cpu, 0);
+
+  void Setup(const MklSliceParams& sliceParams) {
+    context_.src_mem.reset(
+        new memory({sliceParams.from->get_primitive_desc().desc(), cpu_engine_},
+                   DummyData));
+    context_.dst_mem.reset(new memory(
+        {sliceParams.to->get_primitive_desc().desc(), cpu_engine_}, DummyData));
+    auto src_pd = context_.src_mem->get_primitive_desc();
+    auto dst_pd = context_.dst_mem->get_primitive_desc();
+    context_.view_pd =
+        std::make_shared<view::primitive_desc>(view::primitive_desc(
+            src_pd, sliceParams.size_dims, sliceParams.begin_dims));
+    context_.reorder_pd =
+        std::make_shared<reorder::primitive_desc>(reorder::primitive_desc(
+            context_.view_pd->dst_primitive_desc(), dst_pd));
+    context_.reorder_prim = std::make_shared<mkldnn::reorder>(
+        reorder(*context_.reorder_pd, *context_.src_mem, *context_.dst_mem));
+    context_.slice_primitives.push_back(*context_.reorder_prim);
+  }
+};
+
+template <typename T>
+class MklSlicePrimitiveFactory : public MklPrimitiveFactory<T> {
+ public:
+  static MklSlicePrimitive<T>* Get(const MklSliceParams& sliceParams) {
+    auto reorderPrim = static_cast<MklSlicePrimitive<T>*>(
+        MklSlicePrimitiveFactory<T>::GetInstance().GetReorder(sliceParams));
+    if (reorderPrim == nullptr) {
+      reorderPrim = new MklSlicePrimitive<T>(sliceParams);
+      MklSlicePrimitiveFactory<T>::GetInstance().SetReorder(sliceParams,
+                                                            reorderPrim);
+    }
+    return reorderPrim;
+  }
+
+  static MklSlicePrimitiveFactory& GetInstance() {
+    static MklSlicePrimitiveFactory instance_;
+    return instance_;
+  }
+
+ private:
+  MklSlicePrimitiveFactory() {}
+  ~MklSlicePrimitiveFactory() {}
+
+  static string CreateKey(const MklSliceParams& sliceParams) {
+    string prefix = "reorder";
+    FactoryKeyCreator key_creator;
+    auto const& from_desc = sliceParams.from->get_primitive_desc().desc().data;
+    auto const& to_desc = sliceParams.to->get_primitive_desc().desc().data;
+    const int KIdxFirstStride = 0;
+    memory::dims from_dims(from_desc.dims, &from_desc.dims[from_desc.ndims]);
+    memory::dims to_dims(to_desc.dims, &to_desc.dims[to_desc.ndims]);
+    memory::dims from_strides(
+        from_desc.layout_desc.blocking.strides[KIdxFirstStride],
+        &from_desc.layout_desc.blocking.strides[KIdxFirstStride]
+                                               [from_desc.ndims]);
+    memory::dims to_strides(
+        to_desc.layout_desc.blocking.strides[KIdxFirstStride],
+        &to_desc.layout_desc.blocking.strides[KIdxFirstStride][to_desc.ndims]);
+    key_creator.AddAsKey(prefix);
+    key_creator.AddAsKey(static_cast<int>(from_desc.format));
+    key_creator.AddAsKey(static_cast<int>(from_desc.data_type));
+    key_creator.AddAsKey(from_dims);
+    key_creator.AddAsKey(from_strides);
+    key_creator.AddAsKey(static_cast<int>(to_desc.format));
+    key_creator.AddAsKey(static_cast<int>(to_desc.data_type));
+    key_creator.AddAsKey(to_dims);
+    key_creator.AddAsKey(to_strides);
+    key_creator.AddAsKey(sliceParams.begin_dims);
+    key_creator.AddAsKey(sliceParams.size_dims);
+    return key_creator.GetKey();
+  }
+
+  MklPrimitive* GetReorder(const MklSliceParams& sliceParams) {
+    string key = CreateKey(sliceParams);
+    return this->GetOp(key);
+  }
+
+  void SetReorder(const MklSliceParams& sliceParams, MklPrimitive* op) {
+    string key = CreateKey(sliceParams);
+    this->SetOp(key, op);
+  }
+};
+
 // MKL-DNN implementation of Slice
 template <typename Device, typename T>
-class MklDnnSliceOp : public OpKernel {
+class MklSliceOp : public OpKernel {
  public:
-  explicit MklDnnSliceOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit MklSliceOp(OpKernelConstruction* context) : OpKernel(context) {}
 
-  ~MklDnnSliceOp() {}
+  ~MklSliceOp() {}
 
   void Compute(OpKernelContext* context) override {
     gtl::InlinedVector<int64, 4> begin;
@@ -179,17 +317,17 @@ class MklDnnSliceOp : public OpKernel {
     if (begin.size() >= 8) {
       OP_REQUIRES(
           context, false,
-          errors::Unimplemented("MklDnnSliceOp : Unhandled input dimensions"));
+          errors::Unimplemented("MklSliceOp : Unhandled input dimensions"));
     }
 
-    ComputeMklDnnSlice(context, begin, size);
+    ComputeMklSlice(context, begin, size);
   }
 
  private:
   // Slice op implemented using MKL-DNN APIs.
-  void ComputeMklDnnSlice(OpKernelContext* context,
-                          const gtl::InlinedVector<int64, 4>& begin,
-                          const gtl::InlinedVector<int64, 4>& size) {
+  void ComputeMklSlice(OpKernelContext* context,
+                       const gtl::InlinedVector<int64, 4>& begin,
+                       const gtl::InlinedVector<int64, 4>& size) {
     try {
       // MKL-DNN API usage below is guided by description at:
       //  https://github.com/01org/mkl-dnn/issues/69
@@ -200,16 +338,15 @@ class MklDnnSliceOp : public OpKernel {
       // probably change the format). Then your steps are:
       //
       // 1. create memory primitive descriptor in_mem_pd and memory primitive
-      //    in_mem_p for the entire source data.
-      // 2. create view primitive descriptor in_submem_pd based on in_mem_pd,
-      //    initial offsets, and sub-sizes
-      // 3. create memory primitive descriptor out_mem_pd and memory primitive
+      //    in_mem_p for the entire source data. create view primitive
+      //    descriptor
+      //    in_submem_pd based on in_mem_pd, initial offsets, and sub-sizes
+      // 2. create memory primitive descriptor out_mem_pd and memory primitive
       //    out_mem_p for the output (the logical sizes should match sub-sizes
-      //    used in step 2, but the format might be arbitrary)
-      // 4. create reorder primitive descriptor reorder_pd based on in_submem_pd
-      //    and out_mem_pd
-      // 5. create reorder primitive itself based on reorder_pd, in_mem_p, and
-      //    out_mem_p.
+      //    used in step 1, but the format might be arbitrary)
+      // 3. create reorder primitive descriptor reorder_pd based on in_submem_pd
+      //    and out_mem_pd. create reorder primitive itself based on reorder_pd,
+      //    in_mem_p, and out_mem_p.
       //
       // Please notice that there is no view primitive. There is only view
       // primitive descriptor. And the reorder uses source memory as input but
@@ -268,32 +405,24 @@ class MklDnnSliceOp : public OpKernel {
         src.SetUsrMem(input_md, &input_tensor);
       }
 
-      // Step 2 - create view primitive descriptor
-      auto view_pd =
-          view::primitive_desc(src.GetUsrMemPrimDesc(), size_dims, begin_dims)
-              .dst_primitive_desc();
+      // Step 2 - Create memory for output.
       auto output_strides = CalculateTFStrides(size_dims);
       auto output_md =
           MklDnnData<T>::CreateBlockedMemDesc(size_dims, output_strides);
       auto output_pd = memory::primitive_desc(output_md, cpu_engine);
-
-      // Step 3 - Create memory for output. If input is in MklDnn layout, then
-      // output is also in MklDnn layout. Otherwise, output is in Tensorflow
-      // layout.
       AllocateOutputTensor(context, input_mkl_shape, &output_pd, size_dims,
                            &output_tensor, &output_mkl_shape);
       DCHECK(output_tensor);
       DCHECK_EQ(input_mkl_shape.IsMklTensor(), output_mkl_shape.IsMklTensor());
       output.SetUsrMem(output_md, output_tensor);
 
-      std::vector<primitive> net;
-      // Step 4 - create reorder primitive desc between view_pd and output_pd.
-      auto reorder_pd =
-          reorder::primitive_desc(view_pd, output.GetUsrMemPrimDesc());
-      // Step 5 - create reorder primitive itself.
-      net.push_back(reorder(reorder_pd, *src.GetUsrMem(), *output.GetUsrMem()));
-      // Execute the reorder primitive.
-      stream(stream::kind::eager).submit(net).wait();
+      // Step 3 - create reorder primitive.
+      MklSliceParams sliceParams(src.GetUsrMem(), output.GetUsrMem(),
+                                 begin_dims, size_dims);
+      MklSlicePrimitive<T>* reorder_prim =
+          MklSlicePrimitiveFactory<T>::Get(sliceParams);
+      // Execute slice reorder.
+      reorder_prim->Execute(sliceParams);
     } catch (mkldnn::error& e) {
       string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
                          string(e.message) + ", in file " + string(__FILE__) +
@@ -347,7 +476,7 @@ class MklDnnSliceOp : public OpKernel {
                               .HostMemory("begin")                  \
                               .HostMemory("size")                   \
                               .Label(mkl_op_registry::kMklOpLabel), \
-                          MklDnnSliceOp<CPUDevice, type>);
+                          MklSliceOp<CPUDevice, type>);
 
 TF_CALL_float(REGISTER_MKL_SLICE);
 #undef REGISTER_MKL_SLICE
-- 
GitLab


From 3b57fb8afe285c9261d6c8c5fbc0900e58dfc15d Mon Sep 17 00:00:00 2001
From: Younes Khoudli <younes.khoudli@epita.fr>
Date: Fri, 30 Nov 2018 11:40:11 +0100
Subject: [PATCH 056/873] devices: inform user that tensorflow wasn't compiled
 with CUDA support

The current behaviour can be confusing when no GPU are found on a
computer that does have GPUs, so add a warning.
---
 tensorflow/core/grappler/devices.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/devices.cc b/tensorflow/core/grappler/devices.cc
index 3268697671..ddde6a504e 100644
--- a/tensorflow/core/grappler/devices.cc
+++ b/tensorflow/core/grappler/devices.cc
@@ -47,9 +47,13 @@ int GetNumAvailableGPUs() {
       }
     }
   }
-#endif  // GOOGLE_CUDA
   LOG(INFO) << "Number of eligible GPUs (core count >= 8): "
             << num_eligible_gpus;
+#else
+  LOG(INFO) << "Number of eligible GPUs (core count >= 8): "
+            << num_eligible_gpus
+            << " (Note: TensorFlow was not compiled with CUDA support)";
+#endif  // GOOGLE_CUDA
   return num_eligible_gpus;
 }
 
-- 
GitLab


From cc518eea2df346f061a7753efc6d5430d939548e Mon Sep 17 00:00:00 2001
From: "William D. Irons" <wdirons@us.ibm.com>
Date: Fri, 30 Nov 2018 15:28:08 -0600
Subject: [PATCH 057/873] Add link to CPU Artifacts to README.md for ppc64le

adds links to cpu artifiacts for nightly and release builds
Replaces "IBM ppc64le" with "Linux ppc64le"
Uses the build of every commit for build status and not the nightly artifact build.
  - This last change is also made to the GPU build in this commit
---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 044174947a..68d7e180d1 100644
--- a/README.md
+++ b/README.md
@@ -113,9 +113,10 @@ The TensorFlow project strives to abide by generally accepted best practices in
 Build Type                                                                                                                                                                                      | Status                                                                                                                                                                                   | Artifacts
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
 **IBM s390x**                                                                                                                                                                                   | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)                                                        | TBA
-**IBM ppc64le CPU**                                                                                                                                                                             | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/)                                    | TBA
-**IBM ppc64le GPU** Nightly                                                                                                                                                                     | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)            | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
-**IBM ppc64le GPU** Stable Release                                                                                                                                                              | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)                  | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)
+**Linux ppc64le CPU** Nightly                                                                                                                                                                   | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/)                                  | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/)
+**Linux ppc64le CPU** Stable Release                                                                                                                                                            | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/)                  | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/)
+**Linux ppc64le GPU** Nightly                                                                                                                                                                   | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/)                                  | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
+**Linux ppc64le GPU** Stable Release                                                                                                                                                            | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)                  | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)
 **Linux CPU with Intel® MKL-DNN** Nightly                                                                                                                                                       | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/)                                | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)
 **Linux CPU with Intel® MKL-DNN** Python 2.7<br> **Linux CPU with Intel® MKL-DNN** Python 3.4<br> **Linux CPU with Intel® MKL-DNN** Python 3.5<br> **Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.11.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp27-cp27mu-linux_x86_64.whl)<br>[1.11.0 py3.4](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp34-cp34m-linux_x86_64.whl)<br>[1.11.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp35-cp35m-linux_x86_64.whl)<br>[1.11.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp36-cp36m-linux_x86_64.whl)
 
-- 
GitLab


From 97eedeb115372c4a5f9ce77c851b68c211ca36d5 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Fri, 30 Nov 2018 16:12:27 -0800
Subject: [PATCH 058/873] [Intel MKL] Adding support to handle FusedConv2D

This commit adds support to handle Grappler-fused Conv2D operators
in MKL layout pass.

Some changes are from clang format check, and not related to handling
of fusion.
---
 tensorflow/core/graph/mkl_layout_pass.cc      |  59 +++-
 tensorflow/core/graph/mkl_layout_pass_test.cc | 104 +++++-
 tensorflow/core/kernels/BUILD                 |  25 ++
 tensorflow/core/kernels/mkl_conv_ops.cc       | 132 ++++++--
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 306 ++++++++++++++++++
 tensorflow/core/ops/mkl_nn_ops.cc             |  27 ++
 6 files changed, 616 insertions(+), 37 deletions(-)
 create mode 100644 tensorflow/core/kernels/mkl_fused_ops_test.cc

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 69735aac02..4a53b7edc5 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -257,6 +257,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     csinfo_.conv3d_grad_filter = "Conv3DBackpropFilterV2";
     csinfo_.fused_batch_norm = "FusedBatchNorm";
     csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad";
+    csinfo_.fused_conv2d = "_FusedConv2D";
     csinfo_.identity = "Identity";
     csinfo_.lrn = "LRN";
     csinfo_.lrn_grad = "LRNGrad";
@@ -271,6 +272,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias";
     csinfo_.mkl_conv2d_grad_filter_with_bias =
         "_MklConv2DBackpropFilterWithBias";
+    csinfo_.mkl_fused_conv2d = "_MklFusedConv2D";
 // Temporarily don't convert quantized operators into MKL versions for now.
 // TODO(Intel-tf) Once all the relevant PRs have been merged then remove
 // the ifdef.
@@ -373,6 +375,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
         {csinfo_.fused_batch_norm_grad,
          mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm_grad),
          CopyAttrsFusedBatchNorm, AlwaysRewrite});
+    rinfo_.push_back({csinfo_.fused_conv2d, csinfo_.mkl_fused_conv2d,
+                      CopyAttrsFusedConv2D, FusedConv2DRewrite});
     rinfo_.push_back({csinfo_.identity,
                       mkl_op_registry::GetMklOpName(csinfo_.identity),
                       CopyAttrsDataType, AlwaysRewrite});
@@ -583,6 +587,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string conv3d_grad_filter;
     string fused_batch_norm;
     string fused_batch_norm_grad;
+    string fused_conv2d;
     string identity;
     string lrn;
     string lrn_grad;
@@ -597,6 +602,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string mkl_conv2d_grad_filter;
     string mkl_conv2d_grad_filter_with_bias;
     string mkl_conv2d_with_bias;
+    string mkl_fused_conv2d;
     string mul;
     string quantized_avg_pool;
     string quantized_conv2d;
@@ -923,6 +929,19 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     return false;
   }
 
+  static bool FusedConv2DRewrite(const Node* n) {
+    // MKL DNN currently doesn't support all fusions that grappler fuses
+    // together with
+    // Conv2D (ex. batchnorm). We rewrite _FusedConv2D only if it includes those
+    // we
+    // support.
+
+    std::vector<string> fused_ops;
+    CHECK_EQ(GetNodeAttr(n->def(), "fused_ops", &fused_ops).ok(), true);
+    return (fused_ops == {"BiasAdd"} || fused_ops == {"Relu"} ||
+            fused_ops == {"BiasAdd", "Relu"});
+  }
+
   // Rewrites input node to a new node specified by its matching rewrite info.
   //
   // Method first searches matching rewrite info for input node and then
@@ -1077,6 +1096,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   static void CopyAttrsConv(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb);
+  static void CopyAttrsFusedConv2D(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb);
   static void CopyAttrsQuantizedPooling(const Node* orig_node, NodeBuilder* nb);
@@ -1282,10 +1302,12 @@ int MklLayoutRewritePass::SetUpContiguousInputs(
     CHECK_NOTNULL(filter_node);
 
     // Now check which nodes receive from filter_node. Filter feeds as
-    // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias.
+    // 2nd input (slot 1) of _MklConv2D, _MklConv2DWithBias, and
+    // _MklFusedConv2D.
     for (const Edge* e : filter_node->out_edges()) {
       if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
-           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) &&
+           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias ||
+           e->dst()->type_string() == csinfo_.mkl_fused_conv2d) &&
           e->dst_input() == kConv2DFilterInputSlotIdx
           /* filter is 2nd input of Conv2D and _MklConv2D. */) {
         if (conv2d_node != nullptr) {
@@ -1853,6 +1875,38 @@ void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node,
   nb->Attr("is_training", is_training);
 }
 
+void MklLayoutRewritePass::CopyAttrsFusedConv2D(const Node* orig_node,
+                                                NodeBuilder* nb) {
+  DataType T;
+  int num_args;
+  float epsilon;
+  string data_format;
+  string padding;
+  std::vector<int32> strides;
+  std::vector<int32> dilations;
+  std::vector<string> fused_ops;
+
+  // Get all attributes from old node.
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "num_args", &num_args));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "fused_ops", &fused_ops));
+  TF_CHECK_OK(GetNodeAttr(orig_node->def(), "epsilon", &epsilon));
+
+  // Add attributes to new node.
+  nb->Attr("T", T);
+  nb->Attr("num_args", num_args);
+  nb->Attr("strides", strides);
+  nb->Attr("padding", padding);
+  nb->Attr("data_format", data_format);
+  nb->Attr("dilations", dilations);
+  nb->Attr("fused_ops", fused_ops);
+  nb->Attr("epsilon", epsilon);
+}
+
 //////////////////////////////////////////////////////////////////////////
 //           Helper functions related to node merge pass
 //////////////////////////////////////////////////////////////////////////
@@ -2333,6 +2387,7 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const {
   // names.
   if (n->type_string() != csinfo_.conv2d_with_bias &&
       n->type_string() != csinfo_.conv2d_grad_filter_with_bias &&
+      n->type_string() != csinfo_.fused_conv2d &&
       !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(n->type_string()),
                                 T)) {
     return nullptr;
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 7e2d1f7878..af27bc4ca8 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -133,7 +133,7 @@ REGISTER_OP("_MklInput2")
     .SetIsStateful();
 
 /////////////////////////////////////////////////////////////////////
-//  Unit tests related to node merge optiimization
+//  Unit tests related to node merge optimization
 /////////////////////////////////////////////////////////////////////
 
 TEST_F(MklLayoutPassTest, Basic) {
@@ -534,6 +534,108 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Conv2D_Negative_UnsupportedType) {
             "A->C;B->C:1;B->D;C->D:1");
 }
 
+// Rewrite test for _FusedConv2D Op with BiasAdd fusion
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedConv2D_Positive1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: '_FusedConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'num_args'         value { i: 1 } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'fused_ops'        value { list: {s: 'BiasAdd'} } }"
+      " attr { key: 'epsilon'          value { f: 0.001 }}"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['D', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklFusedConv2D);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->D:1;C->D:2;C->E:1;D->E;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Rewrite test for _FusedConv2D Op with Relu fusion
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedConv2D_Positive2) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: '_FusedConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'num_args'         value { i: 1 } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'fused_ops'        value { list: {s: 'Relu'} } }"
+      " attr { key: 'epsilon'          value { f: 0.001 }}"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['D', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklFusedConv2D);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->D:1;C->D:2;C->E:1;D->E;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Rewrite test for _FusedConv2D Op with BiasAdd+Relu fusion
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedConv2D_Positive3) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: '_FusedConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'num_args'         value { i: 1 } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'fused_ops'"
+      "             value { list: {s: 'BiasAdd', s: 'Relu'} } }"
+      " attr { key: 'epsilon'          value { f: 0.001 }}"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['D', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_MklFusedConv2D);DMT/_0(Const);"
+            "DMT/_1(Const);DMT/_2(Const);E(Zeta)|A->D;"
+            "A:control->DMT/_0:control;A:control->DMT/_1:control;"
+            "A:control->DMT/_2:control;B->D:1;C->D:2;C->E:1;D->E;"
+            "DMT/_0->D:3;DMT/_1->D:4;DMT/_2->D:5");
+}
+
+// Rewrite test for _FusedConv2D Op with unsupported fusion
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedConv2D_Negative1) {
+  InitGraph(
+      "node { name: 'A' op: 'Input'}"
+      "node { name: 'B' op: 'Input'}"
+      "node { name: 'C' op: 'Input'}"
+      "node { name: 'D' op: '_FusedConv2D'"
+      " attr { key: 'T'                value { type: DT_FLOAT } }"
+      " attr { key: 'num_args'         value { i: 1 } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'fused_ops'        value { list: {s: 'Unsupported'} } }"
+      " attr { key: 'epsilon'          value { f: 0.001 }}"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }"
+      " input: ['D', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(Input);B(Input);C(Input);D(_FusedConv2D);E(Zeta)|A->D;"
+            "B->D:1;C->D:2;C->E:1;D->E");
+}
+
 TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 60accc0f9b..61128abc7b 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -6741,6 +6741,31 @@ tf_mkl_kernel_library(
     deps = NN_DEPS + mkl_deps() + [":cwise_op"],
 )
 
+tf_cc_test_mkl(
+    name = "mkl_fused_ops_test",
+    size = "small",
+    srcs = ["mkl_fused_ops_test.cc"],
+    linkstatic = 1,
+    deps = [
+        ":conv_ops",
+        ":image",
+        ":mkl_conv_op",
+        ":mkl_tfconv_op",
+        ":ops_testutil",
+        ":ops_util",
+        "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:core_cpu",
+        "//tensorflow/core:framework",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core:tensorflow",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+    ],
+)
+
 tf_mkl_kernel_library(
     name = "mkl_transpose_op",
     srcs = [
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 4b0ced3340..db07bc5d58 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -465,19 +465,18 @@ class MklConvOp : public OpKernel {
                                         filter.shape().DebugString()));
 
     for (int i = 0; i < 3; i++) {
-      OP_REQUIRES(
-          context,
-          FastBoundsCheck(filter.dim_size(i), std::numeric_limits<int>::max()),
-          errors::InvalidArgument("filter too large"));
+      OP_REQUIRES(context, FastBoundsCheck(filter.dim_size(i),
+                                           std::numeric_limits<int>::max()),
+                  errors::InvalidArgument("filter too large"));
     }
 
     const int64 input_depth =
         input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'C')
                             : GetTensorDim(input, data_format_, 'C');
-    OP_REQUIRES(context, input_depth == filter.dim_size(2),
-                errors::InvalidArgument(
-                    "input and filter must have the same depth: ", input_depth,
-                    " vs ", filter.dim_size(2)));
+    OP_REQUIRES(
+        context, input_depth == filter.dim_size(2),
+        errors::InvalidArgument("input and filter must have the same depth: ",
+                                input_depth, " vs ", filter.dim_size(2)));
     // The last dimension for filter is out_depth.
     const int out_depth = static_cast<int>(filter.dim_size(3));
 
@@ -486,10 +485,9 @@ class MklConvOp : public OpKernel {
     const int64 input_rows_raw =
         input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'H')
                             : GetTensorDim(input, data_format_, 'H');
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(input_rows_raw, std::numeric_limits<int>::max()),
-        errors::InvalidArgument("Input rows too large"));
+    OP_REQUIRES(context, FastBoundsCheck(input_rows_raw,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("Input rows too large"));
     const int input_rows = static_cast<int>(input_rows_raw);
     const int filter_rows = static_cast<int>(filter.dim_size(0));
 
@@ -498,10 +496,9 @@ class MklConvOp : public OpKernel {
     const int64 input_cols_raw =
         input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'W')
                             : GetTensorDim(input, data_format_, 'W');
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(input_cols_raw, std::numeric_limits<int>::max()),
-        errors::InvalidArgument("Input cols too large"));
+    OP_REQUIRES(context, FastBoundsCheck(input_cols_raw,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("Input cols too large"));
     const int input_cols = static_cast<int>(input_cols_raw);
     const int filter_cols = static_cast<int>(filter.dim_size(1));
 
@@ -509,10 +506,9 @@ class MklConvOp : public OpKernel {
     const int64 input_batch_raw =
         input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'N')
                             : GetTensorDim(input, data_format_, 'N');
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(input_batch_raw, std::numeric_limits<int>::max()),
-        errors::InvalidArgument("batch is too large"));
+    OP_REQUIRES(context, FastBoundsCheck(input_batch_raw,
+                                         std::numeric_limits<int>::max()),
+                errors::InvalidArgument("batch is too large"));
     const int batch = static_cast<int>(input_batch_raw);
 
     // For now we take the stride from the second and third dimensions only (we
@@ -893,17 +889,15 @@ class MklConvOp : public OpKernel {
       OP_REQUIRES(context, dilations_.size() == 5,
                   errors::InvalidArgument("Dilation rates field must "
                                           "specify 5 dimensions"));
-      OP_REQUIRES(context,
-                  (GetTensorDim(dilations_, data_format_, 'N') == 1 &&
-                   GetTensorDim(dilations_, data_format_, 'C') == 1),
+      OP_REQUIRES(context, (GetTensorDim(dilations_, data_format_, 'N') == 1 &&
+                            GetTensorDim(dilations_, data_format_, 'C') == 1),
                   errors::InvalidArgument(
                       "Current implementation does not yet support "
                       "dilations rates in the batch and depth dimensions."));
       OP_REQUIRES(
-          context,
-          (GetTensorDim(dilations_, data_format_, '0') > 0 &&
-           GetTensorDim(dilations_, data_format_, '1') > 0 &&
-           GetTensorDim(dilations_, data_format_, '2') > 0),
+          context, (GetTensorDim(dilations_, data_format_, '0') > 0 &&
+                    GetTensorDim(dilations_, data_format_, '1') > 0 &&
+                    GetTensorDim(dilations_, data_format_, '2') > 0),
           errors::InvalidArgument("Dilated rates should be larger than 0."));
     }
   }
@@ -1011,7 +1005,7 @@ class MklConvOp : public OpKernel {
       // get a conv2d fwd from primitive pool
       MklConvFwdPrimitive<float, Tinput, Tfilter, Tbias, Ttemp_output>*
           conv_fwd = nullptr;
-      if (biasEnabled) {
+      if (fuse_biasadd_) {
         memory::dims bias_dims = {};
         conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims);
         MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims,
@@ -1083,7 +1077,7 @@ class MklConvOp : public OpKernel {
       }
 
       // execute convolution
-      if (biasEnabled) {
+      if (fuse_biasadd_) {
         const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
         Tbias* bias_data =
             this->GetBiasHandle(context, conv_fwd_pd, bias_tensor);
@@ -1105,6 +1099,12 @@ class MklConvOp : public OpKernel {
   }
 
  protected:
+  void FuseBiasAdd(bool fuse_bias_add) { fuse_biasadd_ = fuse_bias_add; }
+  void FuseRelu(bool fuse_relu) { fuse_relu_ = fuse_relu; }
+
+  // This method is called for the base class MklConvOp, which handles the
+  // floating point implementation of Conv. The quantized conv implementations
+  // will use overiddern versions of this method.
   virtual void ExtendConvFwdParams(OpKernelContext* context,
                                    MklConvFwdParams& params) {
     // Create a string from data types of input, filter, bias, and output.
@@ -1112,6 +1112,11 @@ class MklConvOp : public OpKernel {
     params.dtypes.append(typeid(Tfilter).name());
     params.dtypes.append(typeid(Tbias).name());
     params.dtypes.append(typeid(Toutput).name());
+
+    // Add fusions as post ops
+    if (fuse_relu_) {
+      params.post_op_params.push_back({"relu", {1.0, 0.0, 0.0}});
+    }
   }
 
   virtual Tbias* GetBiasHandle(
@@ -1119,7 +1124,7 @@ class MklConvOp : public OpKernel {
       std::shared_ptr<mkldnn::convolution_forward::primitive_desc>&
           conv2d_fwd_pd,
       const Tensor& bias_tensor) {
-    if (biasEnabled) {
+    if (fuse_biasadd_) {
       return static_cast<Tbias*>(
           const_cast<Tbias*>(bias_tensor.flat<Tbias>().data()));
     } else {
@@ -1165,6 +1170,11 @@ class MklConvOp : public OpKernel {
   std::vector<int32> dilations_;
   Padding padding_;
   TensorFormat data_format_;
+
+  // Initialize to value the template is instantiated with
+  bool fuse_biasadd_ = biasEnabled;
+  bool fuse_relu_ = false;
+
   const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2;
   const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
   const int kDilationH = 0, kDilationW = 1;
@@ -1217,12 +1227,12 @@ class MklConvOp : public OpKernel {
     // Create convolution primitive and add it to net.
     std::vector<primitive> net;
     if (bias) {
-      DCHECK(biasEnabled);
+      DCHECK(fuse_biasadd_);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
                                         filter->GetOpMem(), bias->GetOpMem(),
                                         output->GetOpMem()));
     } else {
-      DCHECK(!biasEnabled);
+      DCHECK(!fuse_biasadd_);
       net.push_back(convolution_forward(conv_prim_desc, src->GetOpMem(),
                                         filter->GetOpMem(),
                                         output->GetOpMem()));
@@ -1232,6 +1242,49 @@ class MklConvOp : public OpKernel {
   }
 };
 
+// Base class for fused convolution forward operations
+template <typename Device, typename Tinput, typename Tfilter, typename Tbias,
+          typename Toutput, typename Ttemp_output>
+class MklFusedConvOp : public MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput,
+                                        Ttemp_output, false> {
+ public:
+  explicit MklFusedConvOp(OpKernelConstruction* context)
+      : MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput, Ttemp_output, false>(
+            context) {
+    // Since we came here through the registration of _MklFusedConv2D then get
+    // all information from 'fused_ops' and 'num_args'
+    std::vector<string> fused_ops;
+    OP_REQUIRES_OK(context, context->GetAttr("fused_ops", &fused_ops));
+
+    int num_args;
+    OP_REQUIRES_OK(context, context->GetAttr("num_args", &num_args));
+    OP_REQUIRES(context, (num_args == 0 || !fused_ops.empty()),
+                errors::InvalidArgument(
+                    "Fused Conv2D must have at least one fused op."));
+
+    if (fused_ops == {"BiasAdd"}) {
+      this->FuseBiasAdd(true);
+      OP_REQUIRES(context, num_args == 1,
+                  errors::InvalidArgument(
+                      "Fused Conv2D must have one extra argument: bias."));
+    } else if (fused_ops == {"Relu"}) {
+      this->FuseRelu(true);
+    } else if (fused_ops == {"BiasAdd", "Relu"}) {
+      this->FuseBiasAdd(true);
+      this->FuseRelu(true);
+      OP_REQUIRES(context, num_args == 1,
+                  errors::InvalidArgument(
+                      "Fused Conv2D must have one extra argument: bias."));
+    } else {
+      OP_REQUIRES(context, false,
+                  errors::Unimplemented("Fusion is not implemented: [",
+                                        str_util::Join(fused_ops, ","), "]"));
+    }
+  }
+
+  virtual ~MklFusedConvOp() {}
+};
+
 // We create new class for each verison of Quantized Convolution and inherit
 // from the FP32 version of the base class
 template <typename Device, typename Tbias, typename Toutput,
@@ -1539,8 +1592,8 @@ class MklQuantizedConv2DSumReluOp
     const float max_filter =
         context->input(5 + bias_index_offset).flat<float>()(0);
 
-    reorder_sum_scale = 255.0 * 127.0 /
-                        (std::max(std::abs(max_input), std::abs(min_input)) *
+    reorder_sum_scale =
+        255.0 * 127.0 / (std::max(std::abs(max_input), std::abs(min_input)) *
                          std::max(std::abs(max_filter), std::abs(min_filter)));
     std::vector<float> scales;
     scales.push_back(reorder_sum_scale);
@@ -1811,6 +1864,17 @@ REGISTER_KERNEL_BUILDER(
 
 TF_CALL_float(REGISTER_MKL_CPU_2D);
 
+#define REGISTER_MKL_CPU_2D_FUSED(T)                                \
+  REGISTER_KERNEL_BUILDER(Name("_MklFusedConv2D")                   \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<T>("T")               \
+                              .Label(mkl_op_registry::kMklOpLabel), \
+                          MklFusedConvOp<CPUDevice, T, T, T, T, T>);
+// Note we are registering _MklFusedConv2D.
+// We check the fused_ops attributes to decide if bias is enabled or not.
+
+TF_CALL_float(REGISTER_MKL_CPU_2D_FUSED);
+
 // Register 3D operations
 #define REGISTER_MKL_CPU_3D(T)                                      \
   REGISTER_KERNEL_BUILDER(Name("_MklConv3D")                        \
diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
new file mode 100644
index 0000000000..7f1965de85
--- /dev/null
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -0,0 +1,306 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifdef INTEL_MKL
+#include "tensorflow/cc/ops/const_op.h"
+#include "tensorflow/cc/ops/image_ops.h"
+#include "tensorflow/cc/ops/nn_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/conv_ops_gpu.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/public/session.h"
+
+namespace tensorflow {
+
+// Helper class for converting MKL tensors to TF tensors and comparing to
+// expected values
+
+static const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0};
+static const TensorShape dummy_shape({8});
+
+template <typename T>
+class ConvMklToTF : public OpsTestBase {
+ public:
+  void PerformConversion(DataType dtype, const Tensor& tensor,
+                         const Tensor& mkl_meta_tensor, Tensor* output) {
+    // Create an MKL to TF conversion node and execute it
+    TF_EXPECT_OK(NodeDefBuilder("mkl_to_tf_op", "_MklToTf")
+                     .Input(FakeInput(dtype))     // Input
+                     .Input(FakeInput(DT_UINT8))  // Mkl second tensor
+                     .Attr("T", dtype)
+                     .Attr("_kernel", "MklOp")
+                     .Finalize(node_def()));
+    TF_EXPECT_OK(InitOp());
+    AddInputFromArray<T>(tensor.shape(), tensor.flat<T>());
+    AddInputFromArray<uint8>(mkl_meta_tensor.shape(),
+                             mkl_meta_tensor.flat<uint8>());
+    TF_ASSERT_OK(RunOpKernel());
+
+    *output = *GetOutput(0);
+  }
+
+  void ConvertAndCompare(DataType dtype, const Tensor& tensor,
+                         const Tensor& mkl_meta_tensor,
+                         const Tensor& expected) {
+    Tensor output;
+    PerformConversion(dtype, tensor, mkl_meta_tensor, &output);
+    test::ExpectTensorNear<T>(expected, output, 1e-5);
+  }
+  void TestBody(){};
+};
+
+// Testing MKL's fused convolution ops
+
+template <typename T>
+class MklFusedConv2DOpTest : public OpsTestBase {
+ protected:
+  static constexpr int kDepth = 3;
+  static constexpr int kImageWidth = 32;
+  static constexpr int kImageHeight = 32;
+  static constexpr int kImageBatchCount = 8;
+
+  using BiasAddGraphRunner =
+      std::function<void(const Tensor& input_data, const Tensor& filter_data,
+                         const Tensor& bias_data, Tensor* out)>;
+
+  // Runs a Tensorflow graph defined by the root scope, and fetches the result
+  // of 'fetch' node into the output Tensor.
+  void RunAndFetch(const tensorflow::Scope& root, const string& fetch,
+                   Tensor* output) {
+    tensorflow::GraphDef graph;
+    TF_ASSERT_OK(root.ToGraphDef(&graph));
+
+    std::unique_ptr<tensorflow::Session> session(
+        tensorflow::NewSession(tensorflow::SessionOptions()));
+    TF_ASSERT_OK(session->Create(graph));
+
+    std::vector<Tensor> unfused_tensors;
+    TF_ASSERT_OK(session->Run({}, {fetch}, {}, &unfused_tensors));
+
+    *output = unfused_tensors[0];
+  }
+
+  void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data,
+                         const Tensor& bias_data, Tensor* output,
+                         int stride = 1) {
+    auto root = tensorflow::Scope::NewRootScope();
+
+    auto conv = ops::Conv2D(
+        root.WithOpName("conv"),
+        ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
+        ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
+        {1, stride, stride, 1}, "SAME");
+
+    auto with_bias = ops::BiasAdd(
+        root.WithOpName("with_bias"), conv,
+        ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
+
+    RunAndFetch(root, "with_bias", output);
+  }
+
+  void RunConv2DWithBiasAndRelu(const Tensor& input_data,
+                                const Tensor& filter_data,
+                                const Tensor& bias_data, Tensor* output,
+                                int stride = 1) {
+    auto root = tensorflow::Scope::NewRootScope();
+
+    auto conv = ops::Conv2D(
+        root.WithOpName("conv"),
+        ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
+        ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
+        {1, stride, stride, 1}, "SAME");
+
+    auto with_bias = ops::BiasAdd(
+        root.WithOpName("with_bias"), conv,
+        ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
+
+    auto with_relu = ops::Relu(root.WithOpName("with_relu"), with_bias);
+
+    RunAndFetch(root, "with_relu", output);
+  }
+
+  void RunMklFusedConv2DOp(const Tensor& image, const Tensor& filter,
+                           const std::vector<Tensor>& args,
+                           const std::vector<string>& fused_ops, Tensor* output,
+                           int stride = 1) {
+    DataType dtype = DataTypeToEnum<T>::v();
+    int num_args = static_cast<int>(args.size());
+
+    TF_EXPECT_OK(NodeDefBuilder("fused_conv_op", "_MklFusedConv2D")
+                     .Input(FakeInput(dtype))
+                     .Input(FakeInput(dtype))
+                     .Attr("num_args", num_args)
+                     .Input(FakeInput(num_args, dtype))
+                     .Input(FakeInput(DT_UINT8))
+                     .Input(FakeInput(DT_UINT8))
+                     .Input(FakeInput(num_args, DT_UINT8))
+                     .Attr("T", dtype)
+                     .Attr("strides", {1, stride, stride, 1})
+                     .Attr("padding", "SAME")
+                     .Attr("fused_ops", fused_ops)
+                     .Attr("_kernel", "MklOp")
+                     .Finalize(node_def()));
+
+    TF_EXPECT_OK(InitOp());
+
+    AddInputFromArray<T>(image.shape(), image.flat<T>());
+    AddInputFromArray<T>(filter.shape(), filter.flat<T>());
+    for (const Tensor& arg : args)
+      AddInputFromArray<T>(arg.shape(), arg.flat<T>());
+    AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+    AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+    for (const Tensor& arg : args)
+      AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+    TF_ASSERT_OK(RunOpKernel());
+
+    // Compare output to expected results
+    const Tensor& output_tensor = *GetOutput(0);
+    const Tensor& output_meta_tensor = *GetOutput(2);
+    ConvMklToTF<T> conv_comp;
+    conv_comp.PerformConversion(dtype, output_tensor, output_meta_tensor,
+                                output);
+  }
+
+  void VerifyBiasAddTensorsNear(int depth, int image_width, int image_height,
+                                int image_batch_count, int filter_size,
+                                int filter_count,
+                                const BiasAddGraphRunner& run_default,
+                                const BiasAddGraphRunner& run_fused) {
+    DataType dtype = DataTypeToEnum<T>::v();
+
+    Tensor image(dtype, {image_batch_count, image_height, image_width, depth});
+    image.flat<T>() = image.flat<T>().setRandom();
+
+    Tensor filter(dtype, {filter_size, filter_size, depth, filter_count});
+    filter.flat<T>() = filter.flat<T>().setRandom();
+
+    const int bias_size = filter_count;
+    Tensor bias(dtype, {bias_size});
+    bias.flat<T>() = bias.flat<T>().setRandom();
+
+    Tensor conv_2d;
+    Tensor fused_conv_2d;
+
+    run_default(image, filter, bias, &conv_2d);
+    run_fused(image, filter, bias, &fused_conv_2d);
+
+    ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
+    ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
+
+    test::ExpectTensorNear<T>(conv_2d, fused_conv_2d, 1e-5);
+  }
+
+  // Verifies that computing Conv2D+BiasAdd in a graph is identical to
+  // FusedConv2D.
+  void VerifyConv2DWithBias(int filter_size, int filter_count,
+                            int depth = kDepth, int image_width = kImageWidth,
+                            int image_height = kImageHeight,
+                            int image_batch_count = kImageBatchCount) {
+    const BiasAddGraphRunner run_default = [this](
+        const Tensor& input_data, const Tensor& filter_data,
+        const Tensor& bias_data, Tensor* out) {
+      RunConv2DWithBias(input_data, filter_data, bias_data, out);
+    };
+
+    const BiasAddGraphRunner run_fused = [this](
+        const Tensor& input_data, const Tensor& filter_data,
+        const Tensor& bias_data, Tensor* out) {
+      RunMklFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"},
+                          out);
+    };
+
+    VerifyBiasAddTensorsNear(depth, image_width, image_height,
+                             image_batch_count, filter_size, filter_count,
+                             run_default, run_fused);
+  }
+
+  // Verifies that computing Conv2D+BiasAdd+Relu in a graph is identical to
+  // FusedConv2D.
+  void VerifyConv2DWithBiasAndRelu(int filter_size, int filter_count,
+                                   int depth = kDepth,
+                                   int image_width = kImageWidth,
+                                   int image_height = kImageHeight,
+                                   int image_batch_count = kImageBatchCount) {
+    const BiasAddGraphRunner run_default = [this](
+        const Tensor& input_data, const Tensor& filter_data,
+        const Tensor& bias_data, Tensor* out) {
+      RunConv2DWithBiasAndRelu(input_data, filter_data, bias_data, out);
+    };
+
+    const BiasAddGraphRunner run_fused = [this](
+        const Tensor& input_data, const Tensor& filter_data,
+        const Tensor& bias_data, Tensor* out) {
+      RunMklFusedConv2DOp(input_data, filter_data, {bias_data},
+                          {"BiasAdd", "Relu"}, out);
+    };
+
+    VerifyBiasAddTensorsNear(depth, image_width, image_height,
+                             image_batch_count, filter_size, filter_count,
+                             run_default, run_fused);
+  }
+};
+
+template <typename T>
+class MklFusedConv2DWithBiasOpTest : public MklFusedConv2DOpTest<T> {};
+
+TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest);
+
+// -------------------------------------------------------------------------- //
+// Conv2D + BiasAdd + {Relu}                                                  //
+// -------------------------------------------------------------------------- //
+
+TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolution) {
+  const int filter_size = 1;
+  const int filter_count = 12;
+  this->VerifyConv2DWithBias(filter_size, filter_count);
+}
+
+TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolution) {
+  const int filter_size = 3;
+  const int filter_count = 12;
+  this->VerifyConv2DWithBias(filter_size, filter_count);
+}
+
+TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolutionAndRelu) {
+  const int filter_size = 1;
+  const int filter_count = 12;
+  this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
+}
+
+TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
+  const int filter_size = 3;
+  const int filter_count = 12;
+  this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
+}
+
+REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,
+                           OneByOneConvolution,         //
+                           SpatialConvolution,          //
+                           OneByOneConvolutionAndRelu,  //
+                           SpatialConvolutionAndRelu);
+
+using MklFusedBiasAddDataTypes = ::testing::Types<float>;
+INSTANTIATE_TYPED_TEST_CASE_P(Test, MklFusedConv2DWithBiasOpTest,
+                              MklFusedBiasAddDataTypes);
+}  // namespace tensorflow
+#endif  // INTEL_MKL
diff --git a/tensorflow/core/ops/mkl_nn_ops.cc b/tensorflow/core/ops/mkl_nn_ops.cc
index 9be3470820..658afd9901 100644
--- a/tensorflow/core/ops/mkl_nn_ops.cc
+++ b/tensorflow/core/ops/mkl_nn_ops.cc
@@ -32,6 +32,33 @@ using shape_inference::DimensionHandle;
 using shape_inference::InferenceContext;
 using shape_inference::ShapeHandle;
 
+REGISTER_OP("_MklFusedConv2D")
+    .Input("input: T")
+    .Input("filter: T")
+    .Input("args: num_args * T")
+    .Input("mkl_input: uint8")
+    .Input("mkl_filter: uint8")
+    .Input("mkl_args: num_args * uint8")
+    .Output("output: T")
+    .Output("filter_output: T")
+    .Output("mkl_output: uint8")
+    .Output("mkl_filter_output: uint8")
+    .Attr("T: {float}")
+    .Attr("num_args: int >= 0")
+    .Attr("strides: list(int)")
+    .Attr(GetPaddingAttrString())
+    .Attr(GetConvnetDataFormatAttrString())
+    .Attr("dilations: list(int) = [1, 1, 1, 1]")
+    .Attr("fused_ops: list(string) = []")
+    // Attributes for the FusedBatchNorm ------------------------------------ //
+    .Attr("epsilon: float = 0.0001")
+    // ---------------------------------------------------------------------- //
+    .SetShapeFn(shape_inference::Conv2DShape)
+    .Doc(R"doc(
+*NOTE*: Do not invoke this operator directly in Python. MKL DNN graph transformer
+ is expected to create these operators.
+)doc");
+
 REGISTER_OP("_MklQuantizedMaxPool")
     .Input("input:         T")
     .Input("min_input:     float")
-- 
GitLab


From bac37febab0c6f5fe008484c6fc255f6c2346775 Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Fri, 30 Nov 2018 16:56:37 -0800
Subject: [PATCH 059/873] Add benchmarks for list_files dataset

---
 tensorflow/python/data/benchmarks/BUILD       | 14 +++
 .../data/benchmarks/list_files_benchmark.py   | 95 +++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 tensorflow/python/data/benchmarks/list_files_benchmark.py

diff --git a/tensorflow/python/data/benchmarks/BUILD b/tensorflow/python/data/benchmarks/BUILD
index 5b0500eae1..fd0eca9dd7 100644
--- a/tensorflow/python/data/benchmarks/BUILD
+++ b/tensorflow/python/data/benchmarks/BUILD
@@ -48,6 +48,20 @@ py_test(
     ],
 )
 
+py_test(
+    name = "list_files_benchmark",
+    srcs = ["list_files_benchmark.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "map_benchmark",
     srcs = ["map_benchmark.py"],
diff --git a/tensorflow/python/data/benchmarks/list_files_benchmark.py b/tensorflow/python/data/benchmarks/list_files_benchmark.py
new file mode 100644
index 0000000000..3ad141fb54
--- /dev/null
+++ b/tensorflow/python/data/benchmarks/list_files_benchmark.py
@@ -0,0 +1,95 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmarks for `tf.data.Dataset.batch()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from os import path
+from os import makedirs
+import shutil
+import time
+import tempfile
+
+import numpy as np
+
+from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+
+class ListFilesBenchmark(test.Benchmark):
+  """Benchmarks for `tf.data.Dataset.list_files()`."""
+
+  def benchmarkNestedDirectories(self):
+    tmp_dir = tempfile.mkdtemp()
+    width = 1024
+    depth = 16
+    for i in range(width):
+      for j in range(depth):
+        new_base = path.join(tmp_dir, str(i),
+                             *[str(dir_name) for dir_name in range(j)])
+        makedirs(new_base)
+        child_files = ['a.py', 'b.pyc'] if j < depth - 1 else ['c.txt', 'd.log']
+        for f in child_files:
+          filename = path.join(new_base, f)
+          open(filename, 'w').close()
+    patterns = [
+        path.join(tmp_dir, path.join(*['**' for _ in range(depth)]), suffix)
+        for suffix in ['*.txt', '*.log']
+    ]
+    deltas = []
+    iters = 3
+    for _ in range(iters):
+      with ops.Graph().as_default():
+        dataset = dataset_ops.Dataset.list_files(patterns)
+        next_element = dataset.make_one_shot_iterator().get_next()
+        with session.Session() as sess:
+          sub_deltas = []
+          while True:
+            try:
+              start = time.time()
+              sess.run(next_element)
+              end = time.time()
+              sub_deltas.append(end - start)
+            except errors.OutOfRangeError:
+              break
+          deltas.append(sub_deltas)
+    median_deltas = np.median(deltas, axis=0)
+    print('Nested directory size (width*depth): %d*%d Median wall time: '
+          '%fs (read first filename), %fs (read second filename), avg %fs'
+          ' (read %d more filenames)' %
+          (width, depth, median_deltas[0], median_deltas[1],
+           np.average(median_deltas[2:]), len(median_deltas) - 2))
+    self.report_benchmark(
+        iters=iters,
+        wall_time=np.sum(median_deltas),
+        extras={
+            'read first file:':
+                median_deltas[0],
+            'read second file:':
+                median_deltas[1],
+            'avg time for reading %d more filenames:' %
+            (len(median_deltas) - 2):
+                np.average(median_deltas[2:])
+        },
+        name='benchmark_list_files_dataset_nesteddirectory(%d*%d)' %
+        (width, depth))
+    shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From 8584f21392772170b007ee3b3fbfed17fe19e32f Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 1 Dec 2018 20:58:09 +0000
Subject: [PATCH 060/873] Fix SparseDenseCwise's broadcasting issue

This fix tries to address the issue raised in 24072.
In `sparse_dense_cwise_mul/add` operations the broadcasting
only support dense to sparse, though the validation was not
captured.

This fix fixes the validation in SparseDenseBinaryOpShared
so that error could be thrown correctly.

This fix fixes 24072.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/sparse_dense_binary_op_shared.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
index ac48202ada..3a6b66302f 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
@@ -88,12 +88,11 @@ class SparseDenseBinaryOpShared : public OpKernel {
     const auto rhs_dims = BCast::FromShape(dense_t->shape());
     BCast b(lhs_dims, rhs_dims, false);  // false for keeping the same num dims.
 
-    // True iff (size(lhs) > size(rhs)), or (sizes equal, lhs cwise rhs).
+    // True iff (size(lhs) >= size(rhs)) and all dims in lhs is smaller or equal to dims in rhs (from right to left).
     auto VecGreaterEq = [](ArraySlice<int64> lhs, ArraySlice<int64> rhs) {
-      if (lhs.size() > rhs.size()) return true;
       if (lhs.size() < rhs.size()) return false;
-      for (size_t i = 0; i < lhs.size(); ++i) {
-        if (lhs[i] < rhs[i]) return false;
+      for (size_t i = 0; i < rhs.size(); ++i) {
+        if (lhs[lhs.size() - 1 - i] < rhs[rhs.size() - 1 - i]) return false;
       }
       return true;
     };
-- 
GitLab


From a7cd4dbea9f276160ebadf82178e77ae5c8d557e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 1 Dec 2018 21:02:04 +0000
Subject: [PATCH 061/873] Add test case for sparse_dense_cwise shape
 validation.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/sparse_ops_test.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index 75f65e6251..f58832a89e 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
@@ -798,6 +799,17 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase):
                                                result_tensor.values).eval()
     self.assertAllEqual(result_np, res_densified)
 
+  @test_util.run_deprecated_v1
+  def testCwiseShapeValidation(self):
+    # Test case for GitHub 24072.
+    with self.session(use_gpu=False):
+      a = array_ops.ones([3, 4, 1], dtype=dtypes.int32)
+      b = sparse_tensor.SparseTensor([[0, 0, 1, 0], [0, 0, 3, 0]], [10, 20], [1, 1, 4, 2])
+      c = a * b
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   "broadcasts dense to sparse only; got incompatible shapes"):
+        c.eval()
+
   @test_util.run_deprecated_v1
   def testCwiseDivAndMul(self):
     np.random.seed(1618)
-- 
GitLab


From 676c6ea316f4dda962d8b2e29855c040020533f5 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 1 Dec 2018 21:04:41 +0000
Subject: [PATCH 062/873] Pylint fix

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/sparse_ops_test.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/kernel_tests/sparse_ops_test.py b/tensorflow/python/kernel_tests/sparse_ops_test.py
index f58832a89e..50b6239185 100644
--- a/tensorflow/python/kernel_tests/sparse_ops_test.py
+++ b/tensorflow/python/kernel_tests/sparse_ops_test.py
@@ -804,10 +804,12 @@ class SparseMathOpsTest(test_util.TensorFlowTestCase):
     # Test case for GitHub 24072.
     with self.session(use_gpu=False):
       a = array_ops.ones([3, 4, 1], dtype=dtypes.int32)
-      b = sparse_tensor.SparseTensor([[0, 0, 1, 0], [0, 0, 3, 0]], [10, 20], [1, 1, 4, 2])
+      b = sparse_tensor.SparseTensor(
+          [[0, 0, 1, 0], [0, 0, 3, 0]], [10, 20], [1, 1, 4, 2])
       c = a * b
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "broadcasts dense to sparse only; got incompatible shapes"):
+      with self.assertRaisesRegexp(
+          errors.InvalidArgumentError,
+          "broadcasts dense to sparse only; got incompatible shapes"):
         c.eval()
 
   @test_util.run_deprecated_v1
-- 
GitLab


From 28e034bd293db09a0ecc707f71f65fe6c5dd2943 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 1 Dec 2018 21:28:19 +0000
Subject: [PATCH 063/873] Fix `Experimental clang-format Check`

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/sparse_dense_binary_op_shared.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
index 3a6b66302f..d7460363fc 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
@@ -88,7 +88,8 @@ class SparseDenseBinaryOpShared : public OpKernel {
     const auto rhs_dims = BCast::FromShape(dense_t->shape());
     BCast b(lhs_dims, rhs_dims, false);  // false for keeping the same num dims.
 
-    // True iff (size(lhs) >= size(rhs)) and all dims in lhs is smaller or equal to dims in rhs (from right to left).
+    // True iff (size(lhs) >= size(rhs)) and all dims in lhs is smaller or equal
+    // to dims in rhs (from right to left).
     auto VecGreaterEq = [](ArraySlice<int64> lhs, ArraySlice<int64> rhs) {
       if (lhs.size() < rhs.size()) return false;
       for (size_t i = 0; i < rhs.size(); ++i) {
-- 
GitLab


From fa9371a2f2095adc9f2b7a2b8700b9e6f0f31c2d Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Sat, 1 Dec 2018 15:27:49 -0800
Subject: [PATCH 064/873] Fix clang format errors

---
 tensorflow/core/kernels/mkl_conv_ops.cc       | 15 ++++++++------
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 20 ++++++++++++++++---
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index db07bc5d58..c354390c69 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1114,9 +1114,7 @@ class MklConvOp : public OpKernel {
     params.dtypes.append(typeid(Toutput).name());
 
     // Add fusions as post ops
-    if (fuse_relu_) {
-      params.post_op_params.push_back({"relu", {1.0, 0.0, 0.0}});
-    }
+    if (fuse_relu_) params.post_op_params.push_back({"relu", {1.0, 0.0, 0.0}});
   }
 
   virtual Tbias* GetBiasHandle(
@@ -1179,6 +1177,11 @@ class MklConvOp : public OpKernel {
   const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
   const int kDilationH = 0, kDilationW = 1;
 
+  // Helper function to compare fused_ops attributes strings
+  bool CompareFusedOps(const std::vector<string>& fused_ops,
+                       const std::vector<string>& expected) {
+    return fused_ops == expected;
+  }
   // Allocate filter output tensor.
   void AllocateFilterOutputTensor(
       OpKernelContext* context,
@@ -1262,14 +1265,14 @@ class MklFusedConvOp : public MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput,
                 errors::InvalidArgument(
                     "Fused Conv2D must have at least one fused op."));
 
-    if (fused_ops == {"BiasAdd"}) {
+    if (CompareFusedOps(fused_ops, {"BiasAdd"})) {
       this->FuseBiasAdd(true);
       OP_REQUIRES(context, num_args == 1,
                   errors::InvalidArgument(
                       "Fused Conv2D must have one extra argument: bias."));
-    } else if (fused_ops == {"Relu"}) {
+    } else if (CompareFusedOps(fused_ops, {"Relu"})) {
       this->FuseRelu(true);
-    } else if (fused_ops == {"BiasAdd", "Relu"}) {
+    } else if (CompareFusedOps(fused_ops, {"BiasAdd", "Relu"})) {
       this->FuseBiasAdd(true);
       this->FuseRelu(true);
       OP_REQUIRES(context, num_args == 1,
diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index 7f1965de85..eb456ce7a3 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -275,6 +275,12 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolution) {
   this->VerifyConv2DWithBias(filter_size, filter_count);
 }
 
+TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, ImageSizeConvolution) {
+  const int filter_size = TestFixture::kImageWidth;
+  const int filter_count = 12;
+  this->VerifyConv2DWithBias(filter_size, filter_count);
+}
+
 TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolution) {
   const int filter_size = 3;
   const int filter_count = 12;
@@ -287,6 +293,12 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolutionAndRelu) {
   this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
 }
 
+TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, ImageSizeConvolutionAndRelu) {
+  const int filter_size = TestFixture::kImageWidth;
+  const int filter_count = 12;
+  this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
+}
+
 TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
   const int filter_size = 3;
   const int filter_count = 12;
@@ -294,9 +306,11 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
 }
 
 REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,
-                           OneByOneConvolution,         //
-                           SpatialConvolution,          //
-                           OneByOneConvolutionAndRelu,  //
+                           OneByOneConvolution,          //
+                           ImageSizeConvolution,         //
+                           SpatialConvolution,           //
+                           OneByOneConvolutionAndRelu,   //
+                           ImageSizeConvolutionAndRelu,  //
                            SpatialConvolutionAndRelu);
 
 using MklFusedBiasAddDataTypes = ::testing::Types<float>;
-- 
GitLab


From e25e93b15d372d1036961cb1d55e29edcc588f29 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Sat, 1 Dec 2018 17:17:02 -0800
Subject: [PATCH 065/873] Added missing file

---
 tensorflow/core/graph/mkl_graph_util.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 990b2fe9b0..7435f4e8c1 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -72,6 +72,14 @@ int inline GetTensorMetaDataIndex(int n, int total_tensors) {
   return DataIndexToMetaDataIndex(tidx, total_tensors);
 }
 
+// Helper function to compare fused_ops attributes strings
+// TODO(Intel-tf) this code is also in mkl_conv_ops.h, we need to move to
+// mkl_util.h
+inline bool CompareFusedOps(const std::vector<string>& fused_ops,
+                            const std::vector<string>& expected) {
+  return fused_ops == expected;
+}
+
 namespace mkl_op_registry {
 static const char* kMklOpLabel = "MklOp";
 static const char* kMklOpLabelPattern = "label='MklOp'";
-- 
GitLab


From cc8e28b72b52718db5f22830e1d529d8e077b537 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sun, 2 Dec 2018 01:55:11 +0000
Subject: [PATCH 066/873] Fix broken link in lite api docs

This fix fixes broken link in lite apis docs

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/lite/g3doc/apis.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/g3doc/apis.md b/tensorflow/lite/g3doc/apis.md
index e9fa24bff1..60ce11d685 100644
--- a/tensorflow/lite/g3doc/apis.md
+++ b/tensorflow/lite/g3doc/apis.md
@@ -347,7 +347,7 @@ interpreter.runForMultipleInputsOutputs(inputs, map_of_indices_to_outputs);
 where each entry in `inputs` corresponds to an input tensor and
 `map_of_indices_to_outputs` maps indices of output tensors to the
 corresponding output data. In both cases the tensor indices should correspond to
-the values given to the [TensorFlow Lite Optimized Converter](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/toco/g3doc/cmdline_examples.md)
+the values given to the [TensorFlow Lite Optimized Converter](convert/cmdline_examples.md)
 when the model was created. Be aware that the order of tensors in `input` must
 match the order given to the `TensorFlow Lite Optimized Converter`.
 
-- 
GitLab


From 5a253d2f476ac5a722fcb67c2c9fa1aaf87ab4db Mon Sep 17 00:00:00 2001
From: Fei Hu <hufei68@gmail.com>
Date: Sun, 2 Dec 2018 12:24:13 -0800
Subject: [PATCH 067/873] Change the function and variable names

---
 tensorflow/python/data/benchmarks/list_files_benchmark.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/data/benchmarks/list_files_benchmark.py b/tensorflow/python/data/benchmarks/list_files_benchmark.py
index 3ad141fb54..5880b28ad8 100644
--- a/tensorflow/python/data/benchmarks/list_files_benchmark.py
+++ b/tensorflow/python/data/benchmarks/list_files_benchmark.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Benchmarks for `tf.data.Dataset.batch()`."""
+"""Benchmarks for `tf.data.Dataset.list_files()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -86,7 +86,7 @@ class ListFilesBenchmark(test.Benchmark):
             (len(median_deltas) - 2):
                 np.average(median_deltas[2:])
         },
-        name='benchmark_list_files_dataset_nesteddirectory(%d*%d)' %
+        name='nested_directory(%d*%d)' %
         (width, depth))
     shutil.rmtree(tmp_dir, ignore_errors=True)
 
-- 
GitLab


From 694e4da7adaaf0db07deddbfdf4d79d5f3053f42 Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Mon, 3 Dec 2018 09:52:16 +0800
Subject: [PATCH 068/873] update CHECK_NOTNULL and CHECK_EQ to DCHECK

---
 tensorflow/core/graph/mkl_layout_pass.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index cd93514bb2..da966483ff 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1097,10 +1097,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   // These two algorithm are not consistent when alpha > 1
   // so only LeakyRelu is written to MKL OP when alpha < 1
   static bool LeakyReluRewrite(const Node* n) {
-    CHECK_NOTNULL(n);
+    DCHECK(n);
 
     float alpha;
-    CHECK_EQ(GetNodeAttr(n->def(), "alpha", &alpha).ok(), true);
+    DCHECK(GetNodeAttr(n->def(), "alpha", &alpha).ok());
 
     // If the alpha of LeakyRelu is less than 1, rewrite the node.
     // Otherwise eigen node is used instead.
-- 
GitLab


From ec803e981cde50dc127f655339215892e5422d3d Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Mon, 3 Dec 2018 09:57:29 +0800
Subject: [PATCH 069/873] Change clang format.

---
 tensorflow/core/kernels/mkl_slice_op.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index f32a6003af..233f33e1cb 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -265,8 +265,8 @@ class MklSlicePrimitiveFactory : public MklPrimitiveFactory<T> {
     memory::dims to_dims(to_desc.dims, &to_desc.dims[to_desc.ndims]);
     memory::dims from_strides(
         from_desc.layout_desc.blocking.strides[KIdxFirstStride],
-        &from_desc.layout_desc.blocking.strides[KIdxFirstStride]
-                                               [from_desc.ndims]);
+        &from_desc.layout_desc.blocking
+             .strides[KIdxFirstStride][from_desc.ndims]);
     memory::dims to_strides(
         to_desc.layout_desc.blocking.strides[KIdxFirstStride],
         &to_desc.layout_desc.blocking.strides[KIdxFirstStride][to_desc.ndims]);
-- 
GitLab


From a02c524cb328dea373b77a03ec4f448f6f386674 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Mon, 3 Dec 2018 16:57:57 +0000
Subject: [PATCH 070/873] Update comment based on feedback

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/sparse_dense_binary_op_shared.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
index d7460363fc..a4e89f439e 100644
--- a/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
+++ b/tensorflow/core/kernels/sparse_dense_binary_op_shared.cc
@@ -88,7 +88,7 @@ class SparseDenseBinaryOpShared : public OpKernel {
     const auto rhs_dims = BCast::FromShape(dense_t->shape());
     BCast b(lhs_dims, rhs_dims, false);  // false for keeping the same num dims.
 
-    // True iff (size(lhs) >= size(rhs)) and all dims in lhs is smaller or equal
+    // True iff (size(lhs) >= size(rhs)) and all dims in lhs is greater or equal
     // to dims in rhs (from right to left).
     auto VecGreaterEq = [](ArraySlice<int64> lhs, ArraySlice<int64> rhs) {
       if (lhs.size() < rhs.size()) return false;
-- 
GitLab


From 8b9636d1d7201f369fbfdb3e079d595888143be6 Mon Sep 17 00:00:00 2001
From: Pooya Davoodi <pdavoodi@nvidia.com>
Date: Mon, 3 Dec 2018 11:04:40 -0800
Subject: [PATCH 071/873] Update README.md

---
 tensorflow/contrib/tensorrt/README.md | 57 ++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md
index caf8b6db0d..09ef7f459f 100644
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@@ -1,8 +1,47 @@
-# Using TensorRT in TensorFlow
+# Using TensorRT in TensorFlow (TF-TRT)
 
-This module provides necessary bindings and introduces TRT_engine_op operator
-that wraps a subgraph in TensorRT. This is still a work in progress but should
-be useable with most common graphs.
+This module provides necessary bindings and introduces
+`TRTEngineOp` operator that wraps a subgraph in TensorRT.
+This is still a work in progress but should be useable
+with most common graphs.
+
+## Installing TF-TRT
+
+Currently Tensorflow nightly builds include TF-TRT by default,
+which means you don't need to install TF-TRT separately.
+You can pull the latest TF containers from docker hub or
+install the latest TF pip package to get access to the latest TF-TRT.
+
+If you want to use TF-TRT on NVIDIA Jetson platform, you can find
+the download links for the relevant Tensorflow pip packages here:
+https://docs.nvidia.com/deeplearning/dgx/index.html#installing-frameworks-for-jetson
+
+## Installing TensorRT
+
+In order to make use of TF-TRT, you will need a local installation
+of TensorRT from the
+[NVIDIA Developer website](https://developer.nvidia.com/tensorrt).
+Installation instructions for compatibility with TensorFlow are provided on the
+[TensorFlow GPU support](https://www.tensorflow.org/install/gpu) guide.
+
+## Tests
+
+TF-TRT includes both Python tests and C++ unit tests.
+Most of Python tests are located in the test directory
+and they can be executed uring `bazel test` or directly
+with the Python command. Most of the C++ unit tests are
+used to test the conversion functions that convert each TF op to
+a number of TensorRT layers.
+
+## Examples
+
+You can find example scripts for running inference on deep learning models
+in this repository: https://github.com/tensorflow/tensorrt
+
+## Documentation
+
+You can find documentation for TF-TRT here:
+https://docs.nvidia.com/deeplearning/dgx/integrate-tf-trt/index.html
 
 ## Compilation
 
@@ -17,13 +56,3 @@ has to set path to location where the library is installed during configuration.
 bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package
 bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/
 ```
-
-After the installation of tensorflow package, TensorRT transformation will be
-available. An example use can be found in test/test_tftrt.py script
-
-## Installing TensorRT 3.0.4
-
-In order to make use of TensorRT integration, you will need a local installation
-of TensorRT 3.0.4 from the [NVIDIA Developer website](https://developer.nvidia.com/tensorrt).
-Installation instructions for compatibility with TensorFlow are provided on the
-[TensorFlow GPU support](https://www.tensorflow.org/install/gpu) guide.
-- 
GitLab


From 215beaca8c28f22a2d6b66b16b4770851491a792 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Mon, 3 Dec 2018 11:26:56 -0800
Subject: [PATCH 072/873] Prevent segments with no inputs

---
 tensorflow/contrib/tensorrt/convert/convert_graph.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 3b32f72bc1..3e599b9174 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -585,6 +585,13 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
       }
     }
   }
+  // We don't support segments with no inputs. Fall back to native TF here to
+  // avoid crash later. Constant folding should've folded the ops that make up
+  // these segments.
+  if (inputs.size() == 0) {
+    return tensorflow::errors::Internal("Segment has no inputs (possible "
+                                        "constfold failure)");
+  }
 
   const bool calibrate_int8 =
       (info.precision_mode == INT8MODE && info.use_calibration);
-- 
GitLab


From 0df72280ac450481f101ac237a18e6a6dc637d01 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Mon, 3 Dec 2018 13:49:03 -0800
Subject: [PATCH 073/873] Changing DCHECK_EQ to TF_CHECK_OK

---
 tensorflow/core/graph/mkl_layout_pass.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 32ac1084d1..177d6becf2 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1125,7 +1125,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     // it includes those we support.
 
     std::vector<string> fused_ops;
-    DCHECK_EQ(GetNodeAttr(n->def(), "fused_ops", &fused_ops).ok(), true);
+    TF_CHECK_OK(GetNodeAttr(n->def(), "fused_ops", &fused_ops));
     return (CompareFusedOps(fused_ops, {"BiasAdd"}) ||
             CompareFusedOps(fused_ops, {"Relu"}) ||
             CompareFusedOps(fused_ops, {"BiasAdd", "Relu"}));
-- 
GitLab


From 27d598cee798cc62434fb0d08abb45e20d650dda Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Tue, 4 Dec 2018 13:40:37 +0800
Subject: [PATCH 074/873] update DCHECK

---
 tensorflow/core/graph/mkl_layout_pass.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index da966483ff..e283d00045 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1100,7 +1100,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     DCHECK(n);
 
     float alpha;
-    DCHECK(GetNodeAttr(n->def(), "alpha", &alpha).ok());
+    bool has_attr = GetNodeAttr(n->def(), "alpha", &alpha).ok();
+    DCHECK(has_attr);
 
     // If the alpha of LeakyRelu is less than 1, rewrite the node.
     // Otherwise eigen node is used instead.
-- 
GitLab


From 900762cd4bca45fd8382778bd65e17f2fe13bf2b Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Wed, 28 Nov 2018 15:35:29 +0800
Subject: [PATCH 075/873] systemlibs: unbundle keras_applications

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 third_party/keras_applications_archive/BUILD.system | 13 +++++++++++++
 .../keras_applications_archive/workspace.bzl        |  1 +
 third_party/systemlibs/syslibs_configure.bzl        |  1 +
 3 files changed, 15 insertions(+)
 create mode 100644 third_party/keras_applications_archive/BUILD.system

diff --git a/third_party/keras_applications_archive/BUILD.system b/third_party/keras_applications_archive/BUILD.system
new file mode 100644
index 0000000000..a3b58f1503
--- /dev/null
+++ b/third_party/keras_applications_archive/BUILD.system
@@ -0,0 +1,13 @@
+# Description: Keras Applications: set of pre-trained deep learning models.
+
+licenses(["notice"])  # MIT
+
+filegroup(
+    name = "LICENSE",
+    visibility = ["//visibility:public"],
+)
+
+py_library(
+    name = "keras_applications",
+    visibility = ["//visibility:public"],
+)
diff --git a/third_party/keras_applications_archive/workspace.bzl b/third_party/keras_applications_archive/workspace.bzl
index e90630fa97..cf9d15ca28 100644
--- a/third_party/keras_applications_archive/workspace.bzl
+++ b/third_party/keras_applications_archive/workspace.bzl
@@ -12,4 +12,5 @@ def repo():
             "https://github.com/keras-team/keras-applications/archive/1.0.6.tar.gz",
         ],
         build_file = "//third_party/keras_applications_archive:BUILD.bazel",
+        system_build_file = "//third_party/keras_applications_archive:BUILD.system",
     )
diff --git a/third_party/systemlibs/syslibs_configure.bzl b/third_party/systemlibs/syslibs_configure.bzl
index dbf4fd6e32..85187587c9 100644
--- a/third_party/systemlibs/syslibs_configure.bzl
+++ b/third_party/systemlibs/syslibs_configure.bzl
@@ -26,6 +26,7 @@ VALID_LIBS = [
     "icu",
     "jpeg",
     "jsoncpp_git",
+    "keras_applications_archive",
     "lmdb",
     "nasm",
     "nsync",
-- 
GitLab


From 813af36087a44f2a5670625408b076f531ea805b Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Wed, 28 Nov 2018 15:35:50 +0800
Subject: [PATCH 076/873] systemlibs: icu: update unbundle

//third_party/icu/data was added which depends on a new icu target that
was missing in the unbundled BUILD file.

Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 third_party/icu/BUILD.system | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/third_party/icu/BUILD.system b/third_party/icu/BUILD.system
index 328e412a8c..8a88a6ef7e 100644
--- a/third_party/icu/BUILD.system
+++ b/third_party/icu/BUILD.system
@@ -1,13 +1,19 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
 licenses(["notice"])  # Apache 2.0
 
 filegroup(
     name = "icu4c/LICENSE",
-    visibility = ["//visibility:public"],
 )
 
 filegroup(
     name = "icu4j/main/shared/licenses/LICENSE",
-    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "headers",
 )
 
 cc_library(
@@ -15,7 +21,6 @@ cc_library(
     deps = [
         ":icuuc",
     ],
-    visibility = ["//visibility:public"],
 )
 
 cc_library(
-- 
GitLab


From 7b1169cd951730e89ec4b019dd1c135e22eed29f Mon Sep 17 00:00:00 2001
From: Chris Antaki <ChrisAntaki@gmail.com>
Date: Tue, 4 Dec 2018 08:57:57 -0800
Subject: [PATCH 077/873] Removes line from README

As @terrytangyuan pointed out in #23647, the line being removed makes a suggestion that can't be followed
---
 tensorflow/contrib/tfprof/README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md
index b29d1acacf..f40e76f554 100644
--- a/tensorflow/contrib/tfprof/README.md
+++ b/tensorflow/contrib/tfprof/README.md
@@ -1,7 +1,5 @@
 # tfprof: TensorFlow Profiler and Beyond
 
-<h1>Please use `tf.profiler.xxx` instead of `tf.contrib.tfprof.xxx`</h1>
-
 <h1>Full Document in
 <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/profiler/README.md">tensorflow/core/profiler/README.md</a><h1>
 
-- 
GitLab


From 30d6a001371a9cef5ab085980356365d4861b8ee Mon Sep 17 00:00:00 2001
From: Pooya Davoodi <pdavoodi@nvidia.com>
Date: Tue, 4 Dec 2018 09:03:15 -0800
Subject: [PATCH 078/873] Update README.md

---
 tensorflow/contrib/tensorrt/README.md | 29 ++++++++++++++++-----------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md
index 09ef7f459f..dedac2c748 100644
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@@ -2,8 +2,7 @@
 
 This module provides necessary bindings and introduces
 `TRTEngineOp` operator that wraps a subgraph in TensorRT.
-This is still a work in progress but should be useable
-with most common graphs.
+This module is under active development.
 
 ## Installing TF-TRT
 
@@ -24,6 +23,21 @@ of TensorRT from the
 Installation instructions for compatibility with TensorFlow are provided on the
 [TensorFlow GPU support](https://www.tensorflow.org/install/gpu) guide.
 
+## Examples
+
+You can find example scripts for running inference on deep learning
+models in this repository: https://github.com/tensorflow/tensorrt
+
+We have used these examples to verify the accuracy and
+performance of TF-TRT. For more information see
+[Verified Models](https://docs.nvidia.com/deeplearning/dgx/integrate-tf-trt/index.html#verified-models).
+
+## Documentation
+
+[TF-TRT documentaion](https://docs.nvidia.com/deeplearning/dgx/integrate-tf-trt/index.html)
+gives an overview of the supported functionalities, provides tutorials
+and verified models, explains best practices with troubleshooting guides.
+
 ## Tests
 
 TF-TRT includes both Python tests and C++ unit tests.
@@ -33,16 +47,6 @@ with the Python command. Most of the C++ unit tests are
 used to test the conversion functions that convert each TF op to
 a number of TensorRT layers.
 
-## Examples
-
-You can find example scripts for running inference on deep learning models
-in this repository: https://github.com/tensorflow/tensorrt
-
-## Documentation
-
-You can find documentation for TF-TRT here:
-https://docs.nvidia.com/deeplearning/dgx/integrate-tf-trt/index.html
-
 ## Compilation
 
 In order to compile the module, you need to have a local TensorRT installation
@@ -56,3 +60,4 @@ has to set path to location where the library is installed during configuration.
 bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package
 bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/
 ```
+
-- 
GitLab


From 12a38bdc89520b86acfd3d6451545bbc0fa407bd Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Tue, 4 Dec 2018 15:52:23 -0800
Subject: [PATCH 079/873] Removing unit tests which are producing slightly
 different results with MKL

---
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index eb456ce7a3..6095a26f62 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -275,12 +275,6 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolution) {
   this->VerifyConv2DWithBias(filter_size, filter_count);
 }
 
-TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, ImageSizeConvolution) {
-  const int filter_size = TestFixture::kImageWidth;
-  const int filter_count = 12;
-  this->VerifyConv2DWithBias(filter_size, filter_count);
-}
-
 TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolution) {
   const int filter_size = 3;
   const int filter_count = 12;
@@ -293,12 +287,6 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolutionAndRelu) {
   this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
 }
 
-TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, ImageSizeConvolutionAndRelu) {
-  const int filter_size = TestFixture::kImageWidth;
-  const int filter_count = 12;
-  this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
-}
-
 TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
   const int filter_size = 3;
   const int filter_count = 12;
@@ -307,10 +295,8 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
 
 REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,
                            OneByOneConvolution,          //
-                           ImageSizeConvolution,         //
                            SpatialConvolution,           //
                            OneByOneConvolutionAndRelu,   //
-                           ImageSizeConvolutionAndRelu,  //
                            SpatialConvolutionAndRelu);
 
 using MklFusedBiasAddDataTypes = ::testing::Types<float>;
-- 
GitLab


From b41761c499a5081870b9da4a8ae73adc45df269a Mon Sep 17 00:00:00 2001
From: Jason Zaman <jason@perfinion.com>
Date: Sun, 14 Oct 2018 11:28:53 +0800
Subject: [PATCH 080/873] Update to bazel-0.18.0 and use try-import

Bazel-0.18.0 adds a try-import option that will non-fatally try and
import a file. Use this for the configure options so that .bazelrc does
not need to change. ./configure rewriting .bazelrc makes using the git
repo annoying because the file is changed.

The allowed bazel range is now 0.18.0-0.20.0 inclusive. The env var
TF_IGNORE_MAX_BAZEL_VERSION can be set to skip the max bazel version
check.

Also optionally import a /.bazelrc.user file that is gitignored so
user-specific options can go in there.

Fixes: https://github.com/tensorflow/tensorflow/issues/22762
Fixes: https://github.com/tensorflow/tensorflow/pull/22906
Signed-off-by: Jason Zaman <jason@perfinion.com>
---
 tools/bazel.rc => .bazelrc                    |  8 +++++++
 .gitignore                                    |  2 +-
 WORKSPACE                                     |  2 +-
 configure.py                                  | 22 ++++---------------
 .../tools/ci_build/install/install_bazel.sh   |  2 +-
 .../install/install_bazel_from_source.sh      |  2 +-
 tensorflow/tools/docker/Dockerfile.devel      |  2 +-
 tensorflow/tools/docker/Dockerfile.devel-gpu  |  2 +-
 tensorflow/tools/docker/Dockerfile.devel-mkl  |  2 +-
 .../tools/docker/Dockerfile.devel-mkl-horovod |  2 +-
 10 files changed, 20 insertions(+), 26 deletions(-)
 rename tools/bazel.rc => .bazelrc (95%)

diff --git a/tools/bazel.rc b/.bazelrc
similarity index 95%
rename from tools/bazel.rc
rename to .bazelrc
index 1fdf51f53e..8d9834f59a 100644
--- a/tools/bazel.rc
+++ b/.bazelrc
@@ -93,3 +93,11 @@ build:dynamic_kernels --copt=-DAUTOLOAD_DYNAMIC_KERNELS
 build --define=PREFIX=/usr
 build --define=LIBDIR=$(PREFIX)/lib
 build --define=INCLUDEDIR=$(PREFIX)/include
+
+# Default options should come above this line
+
+# Options from ./configure
+try-import %workspace%/.tf_configure.bazelrc
+
+# Put user-specific options in .bazelrc.user
+try-import %workspace%/.bazelrc.user
diff --git a/.gitignore b/.gitignore
index 9032405860..e1d352c238 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 .DS_Store
 .ipynb_checkpoints
 node_modules
-/.bazelrc
+/.bazelrc.user
 /.tf_configure.bazelrc
 /bazel-*
 /bazel_pip
diff --git a/WORKSPACE b/WORKSPACE
index 7cc08e0164..0f59c44e39 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -47,7 +47,7 @@ remote_config_workspace()
 # files, in case the parsing of those build files depends on the bazel
 # version we require here.
 load("//tensorflow:version_check.bzl", "check_bazel_version_at_least")
-check_bazel_version_at_least("0.15.0")
+check_bazel_version_at_least("0.18.0")
 
 load("//tensorflow:workspace.bzl", "tf_workspace")
 
diff --git a/configure.py b/configure.py
index 6c905a0be3..d19607af6c 100644
--- a/configure.py
+++ b/configure.py
@@ -255,18 +255,6 @@ def setup_python(environ_cp):
 def reset_tf_configure_bazelrc():
   """Reset file that contains customized config settings."""
   open(_TF_BAZELRC, 'w').close()
-  bazelrc_path = os.path.join(_TF_WORKSPACE_ROOT, '.bazelrc')
-
-  data = []
-  if os.path.exists(bazelrc_path):
-    with open(bazelrc_path, 'r') as f:
-      data = f.read().splitlines()
-  with open(bazelrc_path, 'w') as f:
-    for l in data:
-      if _TF_BAZELRC_FILENAME in l:
-        continue
-      f.write('%s\n' % l)
-    f.write('import %%workspace%%/%s\n' % _TF_BAZELRC_FILENAME)
 
 def cleanup_makefile():
   """Delete any leftover BUILD files from the Makefile build.
@@ -488,11 +476,11 @@ def check_bazel_version(min_version, max_version):
   if curr_version_int < min_version_int:
     print('Please upgrade your bazel installation to version %s or higher to '
           'build TensorFlow!' % min_version)
-    sys.exit(0)
-  if curr_version_int > max_version_int:
+    sys.exit(1)
+  if curr_version_int > max_version_int and not 'TF_IGNORE_MAX_BAZEL_VERSION' in os.environ:
     print('Please downgrade your bazel installation to version %s or lower to '
           'build TensorFlow!' % max_version)
-    sys.exit(0)
+    sys.exit(1)
   return curr_version
 
 
@@ -1565,11 +1553,9 @@ def main():
   # environment variables.
   environ_cp = dict(os.environ)
 
-  check_bazel_version('0.15.0', '0.20.0')
+  check_bazel_version('0.18.0', '0.20.0')
 
   reset_tf_configure_bazelrc()
-  # Explicitly import tools/bazel.rc, this is needed for Bazel 0.19.0 or later
-  write_to_bazelrc('import %workspace%/tools/bazel.rc')
 
   cleanup_makefile()
   setup_python(environ_cp)
diff --git a/tensorflow/tools/ci_build/install/install_bazel.sh b/tensorflow/tools/ci_build/install/install_bazel.sh
index e284401b8a..7472053209 100755
--- a/tensorflow/tools/ci_build/install/install_bazel.sh
+++ b/tensorflow/tools/ci_build/install/install_bazel.sh
@@ -15,7 +15,7 @@
 # ==============================================================================
 
 # Select bazel version.
-BAZEL_VERSION="0.15.0"
+BAZEL_VERSION="0.18.0"
 
 set +e
 local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}')
diff --git a/tensorflow/tools/ci_build/install/install_bazel_from_source.sh b/tensorflow/tools/ci_build/install/install_bazel_from_source.sh
index 87be81577d..4f83815d77 100755
--- a/tensorflow/tools/ci_build/install/install_bazel_from_source.sh
+++ b/tensorflow/tools/ci_build/install/install_bazel_from_source.sh
@@ -18,7 +18,7 @@
 # It will compile bazel from source and install it in /usr/local/bin
 
 # Select bazel version.
-BAZEL_VERSION="0.15.0"
+BAZEL_VERSION="0.18.0"
 
 set +e
 local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}')
diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index c256dd364e..5ddcd3a2fd 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -65,7 +65,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.15.0
+ENV BAZEL_VERSION 0.18.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 7f9b55b455..767e5f4a4f 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -87,7 +87,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.15.0
+ENV BAZEL_VERSION 0.18.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index 2341c0e8cc..0980502bcc 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -88,7 +88,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.15.0
+ENV BAZEL_VERSION 0.18.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
index 5e24617b21..90db249e3d 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
@@ -79,7 +79,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.15.0
+ENV BAZEL_VERSION 0.18.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
-- 
GitLab


From 83e50f1d48a1b1746a55322a57b55a13aa6aca89 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 09:02:27 +0800
Subject: [PATCH 081/873] Update tensorflow/core/graph/mkl_layout_pass.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/graph/mkl_layout_pass.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index e283d00045..1acbf0dc60 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1094,7 +1094,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   // To compute LeakyRelu MKL DNN uses (feature), if feature > 0
   // otherwise it uses (feature * alpha)
   // while Tensorflow uses max(feature, feature * alpha) to compute LeakyRelu.
-  // These two algorithm are not consistent when alpha > 1
+  // These two algorithms are not consistent when alpha > 1,
   // so only LeakyRelu is written to MKL OP when alpha < 1
   static bool LeakyReluRewrite(const Node* n) {
     DCHECK(n);
-- 
GitLab


From 602d56c362689d316410e8fe3d476d380ac25742 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 09:03:03 +0800
Subject: [PATCH 082/873] Update tensorflow/core/graph/mkl_layout_pass.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/graph/mkl_layout_pass.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 1acbf0dc60..b99ec1dcf9 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1095,7 +1095,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
   // otherwise it uses (feature * alpha)
   // while Tensorflow uses max(feature, feature * alpha) to compute LeakyRelu.
   // These two algorithms are not consistent when alpha > 1,
-  // so only LeakyRelu is written to MKL OP when alpha < 1
+  // so we only rewrite LeakyRelu to MKL OP when alpha <= 1.
   static bool LeakyReluRewrite(const Node* n) {
     DCHECK(n);
 
-- 
GitLab


From 0628f12e26c61f2aab628d1979ea9bb79119e1a4 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 09:04:00 +0800
Subject: [PATCH 083/873] Update tensorflow/core/kernels/mkl_relu_op.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/kernels/mkl_relu_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 2e29eae41b..e061b4103e 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1352,7 +1352,7 @@ class MklLeakyReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
     AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
                               src_tensor.shape(), dnn_shape_dst);
     T* out_o = dst_tensor->flat<T>().data();
-    out_o[0] = std::max(user_i[0], user_i[0] * this->alpha_);
+    out_o[0] = user_i[0] >= 0 ? user_g[0] : user_g[0] * this->alpha_;
     return;
   }
 };
-- 
GitLab


From 5c2d58b0828d19d96b7d61de620cf81b88f3aa23 Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Wed, 5 Dec 2018 09:16:35 +0800
Subject: [PATCH 084/873] update comments and rewrite mkl leakyrelu when alpha
 <=1

---
 tensorflow/core/graph/mkl_layout_pass.cc | 8 ++++----
 tensorflow/core/kernels/mkl_relu_op.cc   | 9 +++++----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index b99ec1dcf9..b639c5ea7b 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1091,9 +1091,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     return do_rewrite;
   }
 
-  // To compute LeakyRelu MKL DNN uses (feature), if feature > 0
-  // otherwise it uses (feature * alpha)
-  // while Tensorflow uses max(feature, feature * alpha) to compute LeakyRelu.
+  // MKL-DNN's LeakyRelu(feature) = feature          (if feature > 0), or
+  //                                feature * alpha  (otherwise),
+  // while TensorFlow's LeakyRelu(feature) = max(feature, feature * alpha).
   // These two algorithms are not consistent when alpha > 1,
   // so we only rewrite LeakyRelu to MKL OP when alpha <= 1.
   static bool LeakyReluRewrite(const Node* n) {
@@ -1105,7 +1105,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
 
     // If the alpha of LeakyRelu is less than 1, rewrite the node.
     // Otherwise eigen node is used instead.
-    if (alpha < 1) {
+    if (alpha <= 1) {
       return true;
     }
     VLOG(1) << "LeakyReluRewrite: The model sets alpha is not less than 1 "
diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index e061b4103e..e2ce08feec 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1366,10 +1366,11 @@ class MklLeakyReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
       : MklReluGradOpBase<Device, T, eltwise_relu>(context, 0.0f, 0.0f) {
     float alpha;
     OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
-    OP_REQUIRES(context, alpha < 1,
-                errors::InvalidArgument("MKL LeakyRelu only support alpha < 1. "
-                                        "alpha is: ",
-                                        alpha));
+    OP_REQUIRES(
+        context, alpha <= 1,
+        errors::InvalidArgument("MKL LeakyRelu only support alpha <= 1. "
+                                "alpha is: ",
+                                alpha));
 
     this->alpha_ = alpha;
   }
-- 
GitLab


From 11a420e09a753b2064c9b4a69419f2c78a4f19e0 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Tue, 4 Dec 2018 17:34:37 -0800
Subject: [PATCH 085/873] Fixed clang format error

---
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index 6095a26f62..7f1965de85 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -294,9 +294,9 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
 }
 
 REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,
-                           OneByOneConvolution,          //
-                           SpatialConvolution,           //
-                           OneByOneConvolutionAndRelu,   //
+                           OneByOneConvolution,         //
+                           SpatialConvolution,          //
+                           OneByOneConvolutionAndRelu,  //
                            SpatialConvolutionAndRelu);
 
 using MklFusedBiasAddDataTypes = ::testing::Types<float>;
-- 
GitLab


From 88de1cc935a311a1fe1412e8a821afc7c88ed6a4 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 02:47:30 +0000
Subject: [PATCH 086/873] Add complex64 and complex128 support for `tf.angle`
 on GPU

In PR 10643, complex64 and complex128 support have been added
for `tf.angle` on CPU. However, because of the compilation
errors, the complex support on GPU is not enabled yet.

The issue was that, std::arg is not available on nvidia device
for GPU. This fix changes to used atan2 instead, which is
available in CUDA.

The relevant test cases have bee enabled.

This fix is related to 10643.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/cwise_ops.h | 35 +++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 313def9a75..a10051e811 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -1008,9 +1008,40 @@ template <typename T>
 struct get_imag
     : base<T, Eigen::internal::scalar_imag_op<T>, typename T::value_type> {};
 
+template <typename Scalar>
+struct scalar_get_angle_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
+  typedef typename Eigen::NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type
+  operator()(const Scalar& a) const {
+    return Eigen::numext::arg(a);
+  }
+};
+
+#if GOOGLE_CUDA
+template <>
+struct scalar_get_angle_op<complex64> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
+  typedef typename Eigen::NumTraits<complex64>::Real result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float
+  operator()(const complex64& a) const {
+    return ::atan2f(a.imag(), a.real());
+  }
+};
+
+template <>
+struct scalar_get_angle_op<complex128> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
+  typedef typename Eigen::NumTraits<complex128>::Real result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const double
+  operator()(const complex128& a) const {
+    return ::atan2(a.imag(), a.real());
+  }
+};
+#endif
+
 template <typename T>
-struct get_angle
-    : base<T, Eigen::internal::scalar_arg_op<T>, typename T::value_type> {};
+struct get_angle : base<T, scalar_get_angle_op<T>, typename scalar_get_angle_op<T>::result_type> {};
 
 template <typename T>
 struct conj : base<T, Eigen::internal::scalar_conjugate_op<T>> {};
-- 
GitLab


From 892bd9ef4ab7619c56dc61aa674f0cc70c766ebe Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 02:53:19 +0000
Subject: [PATCH 087/873] Enable complex for tf.angle on GPU.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/cwise_op_arg.cc        | 4 +---
 tensorflow/core/kernels/cwise_op_gpu_arg.cu.cc | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_op_arg.cc b/tensorflow/core/kernels/cwise_op_arg.cc
index 62ffa0718f..ea659facdc 100644
--- a/tensorflow/core/kernels/cwise_op_arg.cc
+++ b/tensorflow/core/kernels/cwise_op_arg.cc
@@ -26,9 +26,7 @@ namespace tensorflow {
 REGISTER_COMPLEX(CPU, float, complex64);
 REGISTER_COMPLEX(CPU, double, complex128);
 
-// TODO: Enable GPU support for angle op after resolving
-// build failures on GPU (See #10643 for context).
-#if 0 && GOOGLE_CUDA
+#if GOOGLE_CUDA
 REGISTER_COMPLEX(GPU, float, complex64);
 REGISTER_COMPLEX(GPU, double, complex128);
 #endif
diff --git a/tensorflow/core/kernels/cwise_op_gpu_arg.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_arg.cu.cc
index 9b3f8200bd..34028e936e 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_arg.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_arg.cu.cc
@@ -13,9 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// TODO: Enable GPU support for angle op after resolving
-// build failures on GPU (See #10643 for context).
-#if 0 && GOOGLE_CUDA
+#if GOOGLE_CUDA
 
 #include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h"
 
-- 
GitLab


From c47fba6210fa8ce9f663f6d3c4ac1382db8ca89e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 02:53:53 +0000
Subject: [PATCH 088/873] Enable test case for complex support of `tf.angle` on
 GPU.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/cwise_ops_test.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py
index 9bb7d8b8b1..70f19f9d2f 100644
--- a/tensorflow/python/kernel_tests/cwise_ops_test.py
+++ b/tensorflow/python/kernel_tests/cwise_ops_test.py
@@ -887,7 +887,7 @@ class ComplexMakeRealImagTest(test.TestCase):
       tf_angle = math_ops.angle(inx)
       tf_angle_val = self.evaluate(tf_angle)
 
-    self.assertAllEqual(np_angle, tf_angle_val)
+    self.assertAllClose(np_angle, tf_angle_val)
     self.assertShapeEqual(np_angle, tf_angle)
 
   def testAngle64(self):
@@ -895,18 +895,14 @@ class ComplexMakeRealImagTest(test.TestCase):
     imag = (np.arange(-3, 3) / 5.).reshape([1, 3, 2]).astype(np.float32)
     cplx = real + 1j * imag
     self._compareAngle(cplx, use_gpu=False)
-    # TODO: Enable GPU tests for angle op after resolving
-    # build failures on GPU (See #10643 for context).
-    # self._compareAngle(cplx, use_gpu=True)
+    self._compareAngle(cplx, use_gpu=True)
 
   def testAngle(self):
     real = (np.arange(-3, 3) / 4.).reshape([1, 3, 2]).astype(np.float64)
     imag = (np.arange(-3, 3) / 5.).reshape([1, 3, 2]).astype(np.float64)
     cplx = real + 1j * imag
     self._compareAngle(cplx, use_gpu=False)
-    # TODO: Enable GPU tests for angle op after resolving
-    # build failures on GPU (See #10643 for context).
-    # self._compareAngle(cplx, use_gpu=True)
+    self._compareAngle(cplx, use_gpu=True)
 
   @test_util.run_deprecated_v1
   def testRealReal(self):
-- 
GitLab


From d285d0cb7a0e4f2ee1f8c99e2f062d63a9d1521e Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 02:57:17 +0000
Subject: [PATCH 089/873] Sanitize with clang-format -i

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/cwise_ops.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index a10051e811..9dcad7e9a4 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -1023,8 +1023,8 @@ template <>
 struct scalar_get_angle_op<complex64> {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
   typedef typename Eigen::NumTraits<complex64>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float
-  operator()(const complex64& a) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator()(
+      const complex64& a) const {
     return ::atan2f(a.imag(), a.real());
   }
 };
@@ -1033,15 +1033,16 @@ template <>
 struct scalar_get_angle_op<complex128> {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
   typedef typename Eigen::NumTraits<complex128>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const double
-  operator()(const complex128& a) const {
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const double operator()(
+      const complex128& a) const {
     return ::atan2(a.imag(), a.real());
   }
 };
 #endif
 
 template <typename T>
-struct get_angle : base<T, scalar_get_angle_op<T>, typename scalar_get_angle_op<T>::result_type> {};
+struct get_angle : base<T, scalar_get_angle_op<T>,
+                        typename scalar_get_angle_op<T>::result_type> {};
 
 template <typename T>
 struct conj : base<T, Eigen::internal::scalar_conjugate_op<T>> {};
-- 
GitLab


From 153e636227c8bd78ba3725942652c0768f369af9 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Sat, 16 Dec 2017 04:20:23 +0000
Subject: [PATCH 090/873] Specialize scalar_arg_op instead.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/cwise_ops.h | 75 ++++++++++++++++-------------
 1 file changed, 41 insertions(+), 34 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index 9dcad7e9a4..bfa7cf9e06 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -29,6 +29,45 @@ limitations under the License.
 namespace Eigen {
 namespace internal {
 
+#if GOOGLE_CUDA
+template <>
+struct scalar_arg_op<std::complex<float>> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
+  typedef typename Eigen::NumTraits<std::complex<float>>::Real result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator()(
+      const std::complex<float>& a) const {
+    return ::atan2f(a.imag(), a.real());
+  }
+};
+
+template <>
+struct scalar_arg_op<std::complex<double>> {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_arg_op)
+  typedef typename Eigen::NumTraits<std::complex<double>>::Real result_type;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const double operator()(
+      const std::complex<double>& a) const {
+    return ::atan2(a.imag(), a.real());
+  }
+};
+#endif
+
+// TODO(rmlarsen): Get rid of fmod2 once fmod is upstreamed to Eigen.
+template <typename T>
+struct scalar_fmod2_op {
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod2_op)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a,
+                                                           const T& b) const {
+    return std::fmod(a, b);
+  }
+};
+template <typename T>
+struct functor_traits<scalar_fmod2_op<T>> {
+  enum {
+    Cost = 13,  // Reciprocal throughput of FPREM on Haswell.
+    PacketAccess = false,
+  };
+};
+
 template <typename T>
 struct scalar_asinh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)
@@ -1008,41 +1047,9 @@ template <typename T>
 struct get_imag
     : base<T, Eigen::internal::scalar_imag_op<T>, typename T::value_type> {};
 
-template <typename Scalar>
-struct scalar_get_angle_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
-  typedef typename Eigen::NumTraits<Scalar>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type
-  operator()(const Scalar& a) const {
-    return Eigen::numext::arg(a);
-  }
-};
-
-#if GOOGLE_CUDA
-template <>
-struct scalar_get_angle_op<complex64> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
-  typedef typename Eigen::NumTraits<complex64>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const float operator()(
-      const complex64& a) const {
-    return ::atan2f(a.imag(), a.real());
-  }
-};
-
-template <>
-struct scalar_get_angle_op<complex128> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_get_angle_op)
-  typedef typename Eigen::NumTraits<complex128>::Real result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const double operator()(
-      const complex128& a) const {
-    return ::atan2(a.imag(), a.real());
-  }
-};
-#endif
-
 template <typename T>
-struct get_angle : base<T, scalar_get_angle_op<T>,
-                        typename scalar_get_angle_op<T>::result_type> {};
+struct get_angle
+    : base<T, Eigen::internal::scalar_arg_op<T>, typename T::value_type> {};
 
 template <typename T>
 struct conj : base<T, Eigen::internal::scalar_conjugate_op<T>> {};
-- 
GitLab


From 3fea0332bb3c49f70ea42b4d0ecdfe16092ef7fe Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Wed, 5 Dec 2018 01:52:50 +0000
Subject: [PATCH 091/873] Fix merge conflict

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/cwise_ops.h | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h
index bfa7cf9e06..be75d0b4b0 100644
--- a/tensorflow/core/kernels/cwise_ops.h
+++ b/tensorflow/core/kernels/cwise_ops.h
@@ -51,23 +51,6 @@ struct scalar_arg_op<std::complex<double>> {
 };
 #endif
 
-// TODO(rmlarsen): Get rid of fmod2 once fmod is upstreamed to Eigen.
-template <typename T>
-struct scalar_fmod2_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod2_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& a,
-                                                           const T& b) const {
-    return std::fmod(a, b);
-  }
-};
-template <typename T>
-struct functor_traits<scalar_fmod2_op<T>> {
-  enum {
-    Cost = 13,  // Reciprocal throughput of FPREM on Haswell.
-    PacketAccess = false,
-  };
-};
-
 template <typename T>
 struct scalar_asinh_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_asinh_op)
-- 
GitLab


From e621ab3c0042a7c3600820ed00396ba4c0023b6e Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 10:04:28 +0800
Subject: [PATCH 092/873] Update tensorflow/core/kernels/mkl_relu_op.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/kernels/mkl_relu_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index e2ce08feec..3656b77032 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1330,7 +1330,7 @@ class MklLeakyReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
       : MklReluOpBase<Device, T, eltwise_relu>(context, 0.0f, 0.0f) {
     float alpha;
     OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
-    OP_REQUIRES(context, alpha < 1,
+    OP_REQUIRES(context, alpha <= 1,
                 errors::InvalidArgument("MKL LeakyRelu only support alpha < 1. "
                                         "alpha is: ",
                                         alpha));
-- 
GitLab


From b5d71d5c61c757fcfea5a113c19049e77e2cae88 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 10:04:37 +0800
Subject: [PATCH 093/873] Update tensorflow/core/kernels/mkl_relu_op.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/kernels/mkl_relu_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 3656b77032..7bec78a062 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1393,7 +1393,7 @@ class MklLeakyReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
     T* out_o = diff_src_tensor->flat<T>().data();
     T* user_i = const_cast<T*>(src_tensor.flat<T>().data());
     T* user_g = const_cast<T*>(diff_dst_tensor.flat<T>().data());
-    out_o[0] = user_i[0] > 0 ? user_g[0] : user_g[0] * this->alpha_;
+    out_o[0] = user_i[0] >= 0 ? user_g[0] : user_g[0] * this->alpha_;
     return;
   }
 };
-- 
GitLab


From 056bd409eb33486e24bf2d6aa8f86a79befeebee Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 10:04:46 +0800
Subject: [PATCH 094/873] Update tensorflow/core/kernels/mkl_relu_op.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/kernels/mkl_relu_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 7bec78a062..7605bfad15 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1331,7 +1331,7 @@ class MklLeakyReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
     float alpha;
     OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
     OP_REQUIRES(context, alpha <= 1,
-                errors::InvalidArgument("MKL LeakyRelu only support alpha < 1. "
+                errors::InvalidArgument("MKL LeakyRelu only supports alpha <= 1. "
                                         "alpha is: ",
                                         alpha));
 
-- 
GitLab


From 2794f95710e18f985bec6f0092f4d328347e238d Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 10:04:54 +0800
Subject: [PATCH 095/873] Update tensorflow/core/graph/mkl_layout_pass.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/graph/mkl_layout_pass.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index b639c5ea7b..adeb33c1b9 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1108,7 +1108,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     if (alpha <= 1) {
       return true;
     }
-    VLOG(1) << "LeakyReluRewrite: The model sets alpha is not less than 1 "
+    VLOG(1) << "LeakyReluRewrite: The model sets alpha is greater than 1 "
             << "which case is not optimized by Intel MKL, thus using Eigen op"
             << "for LeakyRelu ";
 
-- 
GitLab


From 20bab61688b60300eafb2c7cc48b9ad542bcb1a4 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com>
Date: Wed, 5 Dec 2018 10:05:02 +0800
Subject: [PATCH 096/873] Update tensorflow/core/kernels/mkl_relu_op.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>
---
 tensorflow/core/kernels/mkl_relu_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 7605bfad15..03867517ef 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1368,7 +1368,7 @@ class MklLeakyReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
     OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
     OP_REQUIRES(
         context, alpha <= 1,
-        errors::InvalidArgument("MKL LeakyRelu only support alpha <= 1. "
+        errors::InvalidArgument("MKL LeakyRelu only supports alpha <= 1. "
                                 "alpha is: ",
                                 alpha));
 
-- 
GitLab


From 6633267b3f06d8f1b074bfd8a1807b031bfc80de Mon Sep 17 00:00:00 2001
From: "Li, Guizi" <guizi.li@intel.com>
Date: Wed, 5 Dec 2018 11:48:45 +0800
Subject: [PATCH 097/873] fix clang format

---
 tensorflow/core/kernels/mkl_relu_op.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index 03867517ef..43f8a88e66 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -1330,10 +1330,11 @@ class MklLeakyReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
       : MklReluOpBase<Device, T, eltwise_relu>(context, 0.0f, 0.0f) {
     float alpha;
     OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
-    OP_REQUIRES(context, alpha <= 1,
-                errors::InvalidArgument("MKL LeakyRelu only supports alpha <= 1. "
-                                        "alpha is: ",
-                                        alpha));
+    OP_REQUIRES(
+        context, alpha <= 1,
+        errors::InvalidArgument("MKL LeakyRelu only supports alpha <= 1. "
+                                "alpha is: ",
+                                alpha));
 
     this->alpha_ = alpha;
   }
-- 
GitLab


From 5d21cfbed9ed34d6ba90aa6ebbc93c0dd0fe19d2 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Wed, 5 Dec 2018 11:54:03 +0800
Subject: [PATCH 098/873] fix clang format

Change-Id: I89ea6cea2a55c65f9de588c106ee10945d6efa62
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 121 +++++++++++-----------
 1 file changed, 60 insertions(+), 61 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index 25c0c7b078..f81521f4be 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -17,13 +17,13 @@ limitations under the License.
 #ifdef INTEL_MKL
 #ifndef INTEL_MKL_ML_ONLY
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #include "tensorflow/core/util/mkl_util.h"
 
@@ -36,20 +36,19 @@ namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-template <typename Device, typename T>
-class MklSoftmaxOp : public OpKernel {
- public:
+template <typename Device, typename T> class MklSoftmaxOp : public OpKernel {
+public:
   ~MklSoftmaxOp() {}
 
-  explicit MklSoftmaxOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit MklSoftmaxOp(OpKernelConstruction *context) : OpKernel(context) {}
 
-  void Compute(OpKernelContext* context) override {
+  void Compute(OpKernelContext *context) override {
     try {
       auto cpu_engine = engine(engine::cpu, 0);
 
       // src_tensor now points to the 0-th input of global data struct "context"
       size_t src_idx = 0;
-      const Tensor& src_tensor = MklGetInput(context, src_idx);
+      const Tensor &src_tensor = MklGetInput(context, src_idx);
       // Add: get MklShape
       MklDnnShape src_mkl_shape;
       GetMklShape(context, src_idx, &src_mkl_shape);
@@ -66,48 +65,49 @@ class MklSoftmaxOp : public OpKernel {
       if (src_mkl_shape.IsMklTensor()) {
         axis = 1;
         output_dims = src_mkl_shape.GetSizesAsMklDnnDims();
-      }
-      else {
+      } else {
         axis = input_dims - 1;
         output_dims = src_dims;
       }
       memory::format layout_type;
-      // In MKL, data format passed to mkl softmax op depends on dimension of the input tensor.
-      // Here "x" data format in MKL is used for 1 dim tensor, "nc" for 2 dim tensor, 
-      // "tnc" for 3 dim tensor, "nchw" for 4 dim tensor, and "ncdhw" for 5 dim tensor.
+      // In MKL, data format passed to mkl softmax op depends on dimension of
+      // the input tensor.
+      // Here "x" data format in MKL is used for 1 dim tensor, "nc" for 2 dim
+      // tensor,
+      // "tnc" for 3 dim tensor, "nchw" for 4 dim tensor, and "ncdhw" for 5 dim
+      // tensor.
       // Each of the simbols has the following meaning:
       // n = batch, c = channels, t = sequence lenght, h = height,
-      // w = width, d = depth 
-      
+      // w = width, d = depth
+
       switch (input_dims) {
-        case 1:
-          layout_type = memory::format::x;
-          break;
-        case 2:
-          layout_type = memory::format::nc;
-          break;
-        case 3:
-          layout_type = memory::format::tnc;
-          break;
-        case 4:
-          if (src_mkl_shape.IsMklTensor()) {
-            layout_type = memory::format::nhwc;
-          } 
-          else {
-            layout_type = memory::format::nchw;
-          }
-          break;
-        case 5:
-          if (src_mkl_shape.IsMklTensor()) {
-            layout_type = memory::format::ndhwc;
-          } 
-          else {
-            layout_type = memory::format::ncdhw;
-          }
-          break;
-        default:
-          OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1"));
-          return;
+      case 1:
+        layout_type = memory::format::x;
+        break;
+      case 2:
+        layout_type = memory::format::nc;
+        break;
+      case 3:
+        layout_type = memory::format::tnc;
+        break;
+      case 4:
+        if (src_mkl_shape.IsMklTensor()) {
+          layout_type = memory::format::nhwc;
+        } else {
+          layout_type = memory::format::nchw;
+        }
+        break;
+      case 5:
+        if (src_mkl_shape.IsMklTensor()) {
+          layout_type = memory::format::ndhwc;
+        } else {
+          layout_type = memory::format::ncdhw;
+        }
+        break;
+      default:
+        OP_REQUIRES_OK(context,
+                       errors::Aborted("Input dims must be <= 5 and >=1"));
+        return;
       }
       // Create softmax memory for src, dst: both are defined in mkl_util.h,
       // they are wrapper
@@ -118,10 +118,9 @@ class MklSoftmaxOp : public OpKernel {
       // construct input Tf layout. For TF layout, although input shape
       // (src_dims) required is in MKL-DNN order, the layout is Tensorflow's
       // layout
-      auto src_md =
-          src_mkl_shape.IsMklTensor()
-              ? src_mkl_shape.GetMklLayout()
-              : memory::desc(src_dims, MklDnnType<T>(), layout_type);
+      auto src_md = src_mkl_shape.IsMklTensor()
+                        ? src_mkl_shape.GetMklLayout()
+                        : memory::desc(src_dims, MklDnnType<T>(), layout_type);
 
       // src: setting memory descriptor
       // following functions are in mkl_util.h
@@ -134,9 +133,9 @@ class MklSoftmaxOp : public OpKernel {
           softmax_forward::primitive_desc(softmax_fwd_desc, cpu_engine);
 
       // add: output
-      Tensor* output_tensor = nullptr;
+      Tensor *output_tensor = nullptr;
       MklDnnShape output_mkl_shape;
-      TensorShape output_tf_shape;  // shape of output TF tensor.
+      TensorShape output_tf_shape; // shape of output TF tensor.
       // Softmax MklDnn output layout is same as input layout.
       auto dst_pd = src.GetUsrMemPrimDesc();
 
@@ -149,7 +148,7 @@ class MklSoftmaxOp : public OpKernel {
         output_mkl_shape.SetTfLayout(output_dims.size(), output_dims,
                                      layout_type);
         output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
-      } else {  // then output is also TF shape
+      } else { // then output is also TF shape
         output_mkl_shape.SetMklTensor(false);
         output_tf_shape = MklDnnDimsToTFShape(output_dims);
       }
@@ -170,10 +169,10 @@ class MklSoftmaxOp : public OpKernel {
       std::vector<primitive> net;
       net.push_back(softmax_fwd);
       stream(stream::kind::eager).submit(net).wait();
-    } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+    } catch (mkldnn::error &e) {
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
           errors::Aborted("Operation received an exception:", error_msg));
@@ -183,15 +182,15 @@ class MklSoftmaxOp : public OpKernel {
 
 /* Register DNN kernels for supported operations and supported types - right now
  * it is only Softmax and f32 */
-#define REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES(type)          \
-  REGISTER_KERNEL_BUILDER(Name("_MklSoftmax")                       \
-                              .Device(DEVICE_CPU)                   \
-                              .TypeConstraint<type>("T")            \
-                              .Label(mkl_op_registry::kMklOpLabel), \
+#define REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES(type)                     \
+  REGISTER_KERNEL_BUILDER(Name("_MklSoftmax")                                  \
+                              .Device(DEVICE_CPU)                              \
+                              .TypeConstraint<type>("T")                       \
+                              .Label(mkl_op_registry::kMklOpLabel),            \
                           MklSoftmaxOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES);
 
-}  // namespace tensorflow
+} // namespace tensorflow
 
-#endif  // INTEL_MKL_ML_ONLY
-#endif  // INTEL_MKL
+#endif // INTEL_MKL_ML_ONLY
+#endif // INTEL_MKL
-- 
GitLab


From b3687d2d6e6488ac1c90ed6c21ae5eff77f96b98 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Tue, 4 Dec 2018 19:56:50 -0800
Subject: [PATCH 099/873] Corrected typos

---
 tensorflow/core/graph/mkl_graph_util.h  | 6 +++---
 tensorflow/core/kernels/mkl_conv_ops.cc | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index 7435f4e8c1..a599ce3620 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -72,9 +72,9 @@ int inline GetTensorMetaDataIndex(int n, int total_tensors) {
   return DataIndexToMetaDataIndex(tidx, total_tensors);
 }
 
-// Helper function to compare fused_ops attributes strings
-// TODO(Intel-tf) this code is also in mkl_conv_ops.h, we need to move to
-// mkl_util.h
+// Helper function to compare fused_ops attribute strings
+// TODO(Intel) this code is also defined in mkl_conv_ops.h, we need to move to
+// mkl_util.h so we have only one version.
 inline bool CompareFusedOps(const std::vector<string>& fused_ops,
                             const std::vector<string>& expected) {
   return fused_ops == expected;
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index c354390c69..4a4aaffead 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1177,7 +1177,7 @@ class MklConvOp : public OpKernel {
   const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
   const int kDilationH = 0, kDilationW = 1;
 
-  // Helper function to compare fused_ops attributes strings
+  // Helper function to compare fused_ops attribute strings
   bool CompareFusedOps(const std::vector<string>& fused_ops,
                        const std::vector<string>& expected) {
     return fused_ops == expected;
-- 
GitLab


From 2c382f53d4e63646b4ff0e1d83067e594c2ab51f Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Wed, 5 Dec 2018 17:54:07 +0800
Subject: [PATCH 100/873] fix clang format

Change-Id: Iabc5524dc0858611d4a43b2f8992ec2f397d386e
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 89 ++++++++++++-----------
 1 file changed, 45 insertions(+), 44 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index f81521f4be..4067fbb013 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -36,19 +36,20 @@ namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
-template <typename Device, typename T> class MklSoftmaxOp : public OpKernel {
-public:
+template <typename Device, typename T>
+class MklSoftmaxOp : public OpKernel {
+ public:
   ~MklSoftmaxOp() {}
 
-  explicit MklSoftmaxOp(OpKernelConstruction *context) : OpKernel(context) {}
+  explicit MklSoftmaxOp(OpKernelConstruction* context) : OpKernel(context) {}
 
-  void Compute(OpKernelContext *context) override {
+  void Compute(OpKernelContext* context) override {
     try {
       auto cpu_engine = engine(engine::cpu, 0);
 
       // src_tensor now points to the 0-th input of global data struct "context"
       size_t src_idx = 0;
-      const Tensor &src_tensor = MklGetInput(context, src_idx);
+      const Tensor& src_tensor = MklGetInput(context, src_idx);
       // Add: get MklShape
       MklDnnShape src_mkl_shape;
       GetMklShape(context, src_idx, &src_mkl_shape);
@@ -81,33 +82,33 @@ public:
       // w = width, d = depth
 
       switch (input_dims) {
-      case 1:
-        layout_type = memory::format::x;
-        break;
-      case 2:
-        layout_type = memory::format::nc;
-        break;
-      case 3:
-        layout_type = memory::format::tnc;
-        break;
-      case 4:
-        if (src_mkl_shape.IsMklTensor()) {
-          layout_type = memory::format::nhwc;
-        } else {
-          layout_type = memory::format::nchw;
-        }
-        break;
-      case 5:
-        if (src_mkl_shape.IsMklTensor()) {
-          layout_type = memory::format::ndhwc;
-        } else {
-          layout_type = memory::format::ncdhw;
-        }
-        break;
-      default:
-        OP_REQUIRES_OK(context,
-                       errors::Aborted("Input dims must be <= 5 and >=1"));
-        return;
+        case 1:
+          layout_type = memory::format::x;
+          break;
+        case 2:
+          layout_type = memory::format::nc;
+          break;
+        case 3:
+          layout_type = memory::format::tnc;
+          break;
+        case 4:
+          if (src_mkl_shape.IsMklTensor()) {
+            layout_type = memory::format::nhwc;
+          } else {
+            layout_type = memory::format::nchw;
+          }
+          break;
+        case 5:
+          if (src_mkl_shape.IsMklTensor()) {
+            layout_type = memory::format::ndhwc;
+          } else {
+            layout_type = memory::format::ncdhw;
+          }
+          break;
+        default:
+          OP_REQUIRES_OK(context,
+                         errors::Aborted("Input dims must be <= 5 and >=1"));
+          return;
       }
       // Create softmax memory for src, dst: both are defined in mkl_util.h,
       // they are wrapper
@@ -133,9 +134,9 @@ public:
           softmax_forward::primitive_desc(softmax_fwd_desc, cpu_engine);
 
       // add: output
-      Tensor *output_tensor = nullptr;
+      Tensor* output_tensor = nullptr;
       MklDnnShape output_mkl_shape;
-      TensorShape output_tf_shape; // shape of output TF tensor.
+      TensorShape output_tf_shape;  // shape of output TF tensor.
       // Softmax MklDnn output layout is same as input layout.
       auto dst_pd = src.GetUsrMemPrimDesc();
 
@@ -148,7 +149,7 @@ public:
         output_mkl_shape.SetTfLayout(output_dims.size(), output_dims,
                                      layout_type);
         output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
-      } else { // then output is also TF shape
+      } else {  // then output is also TF shape
         output_mkl_shape.SetMklTensor(false);
         output_tf_shape = MklDnnDimsToTFShape(output_dims);
       }
@@ -169,7 +170,7 @@ public:
       std::vector<primitive> net;
       net.push_back(softmax_fwd);
       stream(stream::kind::eager).submit(net).wait();
-    } catch (mkldnn::error &e) {
+    } catch (mkldnn::error& e) {
       string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
                          string(e.message) + ", in file " + string(__FILE__) +
                          ":" + std::to_string(__LINE__);
@@ -182,15 +183,15 @@ public:
 
 /* Register DNN kernels for supported operations and supported types - right now
  * it is only Softmax and f32 */
-#define REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES(type)                     \
-  REGISTER_KERNEL_BUILDER(Name("_MklSoftmax")                                  \
-                              .Device(DEVICE_CPU)                              \
-                              .TypeConstraint<type>("T")                       \
-                              .Label(mkl_op_registry::kMklOpLabel),            \
+#define REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES(type)          \
+  REGISTER_KERNEL_BUILDER(Name("_MklSoftmax")                       \
+                              .Device(DEVICE_CPU)                   \
+                              .TypeConstraint<type>("T")            \
+                              .Label(mkl_op_registry::kMklOpLabel), \
                           MklSoftmaxOp<CPUDevice, type>);
 TF_CALL_float(REGISTER_SOFTMAX_MKL_SUPPORTED_KERNELS_TYPES);
 
-} // namespace tensorflow
+}  // namespace tensorflow
 
-#endif // INTEL_MKL_ML_ONLY
-#endif // INTEL_MKL
+#endif  // INTEL_MKL_ML_ONLY
+#endif  // INTEL_MKL
-- 
GitLab


From 1f05b7eaf0220bc720cd3d0a8fcf241458c3f74d Mon Sep 17 00:00:00 2001
From: Vidak Kazic <vidak.kazic@hilda.awesome.ness>
Date: Wed, 5 Dec 2018 14:44:33 +0100
Subject: [PATCH 101/873] Add example to import_meta_graph docstring

---
 tensorflow/python/training/saver.py | 31 +++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index a29926a57d..5352c998ea 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -1656,6 +1656,37 @@ def import_meta_graph(meta_graph_or_file, clear_devices=False,
   NOTE: Restarting training from saved `meta_graph` only works if the
   device assignments have not changed.
 
+  Example 2:
+  Variables, placeholders, and independent operations can also be stored, as shown
+  in the following example.
+
+  ```Python
+  # Saving contents and operations.
+  v1 = tf.placeholder(tf.float32, name="v1")
+  v2 = tf.placeholder(tf.float32, name="v2")
+  v3 = tf.mul(v1, v2)
+  vx = tf.Variable(10.0, name="vx")
+  v4 = tf.add(v3, vx, name="v4")
+  saver = tf.train.Saver([vx])
+  sess = tf.Session()
+  sess.run(tf.initialize_all_variables())
+  sess.run(vx.assign(tf.add(vx, vx)))
+  result = sess.run(v4, feed_dict={v1:12.0, v2:3.3})
+  print(result)
+  saver.save(sess, "./model_ex1")
+  ```
+
+  Later this model can be restored and contents loaded.
+
+  ```Python
+  # Restoring variables and running operations.
+  saver = tf.train.import_meta_graph("./model_ex1.meta")
+  sess = tf.Session()
+  saver.restore(sess, "./model_ex1")
+  result = sess.run("v4:0", feed_dict={"v1:0": 12.0, "v2:0": 3.3})
+  print(result)
+  ```
+
   Args:
     meta_graph_or_file: `MetaGraphDef` protocol buffer or filename (including
       the path) containing a `MetaGraphDef`.
-- 
GitLab


From b6a6296de5b873107bae5abd30dff8897fa53b54 Mon Sep 17 00:00:00 2001
From: Vidak Kazic <vidakdk@gmail.com>
Date: Wed, 5 Dec 2018 15:01:56 +0100
Subject: [PATCH 102/873] Add example to import_meta_graph docstring

---
 tensorflow/python/training/saver.py | 31 +++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index a29926a57d..5352c998ea 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -1656,6 +1656,37 @@ def import_meta_graph(meta_graph_or_file, clear_devices=False,
   NOTE: Restarting training from saved `meta_graph` only works if the
   device assignments have not changed.
 
+  Example 2:
+  Variables, placeholders, and independent operations can also be stored, as shown
+  in the following example.
+
+  ```Python
+  # Saving contents and operations.
+  v1 = tf.placeholder(tf.float32, name="v1")
+  v2 = tf.placeholder(tf.float32, name="v2")
+  v3 = tf.mul(v1, v2)
+  vx = tf.Variable(10.0, name="vx")
+  v4 = tf.add(v3, vx, name="v4")
+  saver = tf.train.Saver([vx])
+  sess = tf.Session()
+  sess.run(tf.initialize_all_variables())
+  sess.run(vx.assign(tf.add(vx, vx)))
+  result = sess.run(v4, feed_dict={v1:12.0, v2:3.3})
+  print(result)
+  saver.save(sess, "./model_ex1")
+  ```
+
+  Later this model can be restored and contents loaded.
+
+  ```Python
+  # Restoring variables and running operations.
+  saver = tf.train.import_meta_graph("./model_ex1.meta")
+  sess = tf.Session()
+  saver.restore(sess, "./model_ex1")
+  result = sess.run("v4:0", feed_dict={"v1:0": 12.0, "v2:0": 3.3})
+  print(result)
+  ```
+
   Args:
     meta_graph_or_file: `MetaGraphDef` protocol buffer or filename (including
       the path) containing a `MetaGraphDef`.
-- 
GitLab


From 2c17ecb324044638e5ff4df836c1621bc0774328 Mon Sep 17 00:00:00 2001
From: Karl Lessard <karl@kubx.ca>
Date: Wed, 5 Dec 2018 09:21:41 -0500
Subject: [PATCH 103/873] Expose underlying operation in op wrappers

---
 .../java/src/main/java/org/tensorflow/Session.java    | 10 ++++++++++
 .../src/main/java/org/tensorflow/op/PrimitiveOp.java  | 11 +++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/tensorflow/java/src/main/java/org/tensorflow/Session.java b/tensorflow/java/src/main/java/org/tensorflow/Session.java
index a660d25f98..c49e98b20e 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Session.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Session.java
@@ -157,6 +157,16 @@ public final class Session implements AutoCloseable {
       return this;
     }
 
+    /**
+     * Use {@code t} instead of the Tensor referred to by executing the operation referred to by
+     * {@code operand}.
+     */
+    public <T> Runner feed(Operand<T> operand, Tensor<T> t) {
+      inputs.add(operand.asOutput());
+      inputTensors.add(t);
+      return this;
+    }
+
     /**
      * Make {@link #run()} return the output of {@code operation}.
      *
diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/PrimitiveOp.java b/tensorflow/java/src/main/java/org/tensorflow/op/PrimitiveOp.java
index 8e56f97041..5c47611d09 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/op/PrimitiveOp.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/op/PrimitiveOp.java
@@ -24,6 +24,13 @@ import org.tensorflow.Operation;
  * PrimitiveOp}. Custom operations working with only one primitive may also derive from this class.
  */
 public abstract class PrimitiveOp implements Op {
+  
+  /**
+   * Returns the underlying {@link Operation}
+   */
+  public Operation op() {
+    return operation;
+  }
 
   @Override
   public final int hashCode() {
@@ -48,10 +55,6 @@ public abstract class PrimitiveOp implements Op {
     return String.format("<%s '%s'>", operation.type(), operation.name());
   }
 
-  /**
-   * Underlying operation. It is deliberately not exposed by a getter method to avoid any name
-   * conflict with generated methods of the subclasses.
-   */
   protected final Operation operation;
 
   /**
-- 
GitLab


From 1ec527767e83424b17d9c2e708b218a5db2738a7 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Wed, 5 Dec 2018 10:25:20 -0800
Subject: [PATCH 104/873] Add a rule to disallow rewrite for double type

---
 tensorflow/core/graph/mkl_layout_pass.cc      |  5 ++++
 tensorflow/core/graph/mkl_layout_pass_test.cc | 24 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 177d6becf2..42e5411c5a 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1123,6 +1123,11 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     // MKL DNN currently doesn't support all fusions that grappler fuses
     // together with Conv2D (ex. batchnorm). We rewrite _FusedConv2D only if
     // it includes those we support.
+    DataType T;
+    if (!GetNodeAttr(n->def(), "T", &T).ok() ||
+        !mkl_op_registry::IsMklOp(csinfo_.mkl_fused_conv2d, T)) {
+      return false;
+    }
 
     std::vector<string> fused_ops;
     TF_CHECK_OK(GetNodeAttr(n->def(), "fused_ops", &fused_ops));
diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc
index 65b6ed6745..43521c847c 100644
--- a/tensorflow/core/graph/mkl_layout_pass_test.cc
+++ b/tensorflow/core/graph/mkl_layout_pass_test.cc
@@ -126,6 +126,7 @@ REGISTER_OP("Input").Output("o: float").SetIsStateful();
 REGISTER_OP("InputList").Output("o: N * float").Attr("N: int").SetIsStateful();
 REGISTER_OP("HalfInput").Output("o: half").SetIsStateful();
 REGISTER_OP("Int32Input").Output("o: int32").SetIsStateful();
+REGISTER_OP("DoubleInput").Output("o: double").SetIsStateful();
 REGISTER_OP("_MklInput").Output("o: uint8").SetIsStateful();
 REGISTER_OP("_MklInput2")
     .Output("o: uint8")
@@ -945,6 +946,29 @@ TEST_F(MklLayoutPassTest, NodeRewrite_FusedConv2D_Negative1) {
             "B->D:1;C->D:2;C->E:1;D->E");
 }
 
+// Rewrite test for _FusedConv2D Op with unsupported type
+TEST_F(MklLayoutPassTest, NodeRewrite_FusedConv2D_Negative2) {
+  InitGraph(
+      "node { name: 'A' op: 'DoubleInput'}"
+      "node { name: 'B' op: 'DoubleInput'}"
+      "node { name: 'C' op: 'DoubleInput'}"
+      "node { name: 'D' op: '_FusedConv2D'"
+      " attr { key: 'T'                value { type: DT_DOUBLE } }"
+      " attr { key: 'num_args'         value { i: 1 } }"
+      " attr { key: 'data_format'      value { s: 'NCHW' } }"
+      " attr { key: 'strides'          value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'padding'          value { s: 'SAME' } }"
+      " attr { key: 'dilations'        value { list: {i: 1, i:1, i:1, i:1} } }"
+      " attr { key: 'fused_ops'        value { list: {s: 'BiasAdd'} } }"
+      " attr { key: 'epsilon'          value { f: 0.001 }}"
+      " input: ['A', 'B', 'C']}"
+      "node { name: 'E' op: 'Zeta' attr { key: 'T' value { type: DT_DOUBLE } }"
+      " input: ['D', 'C'] }");
+  EXPECT_EQ(DoMklLayoutOptimizationPass(),
+            "A(DoubleInput);B(DoubleInput);C(DoubleInput);"
+            "D(_FusedConv2D);E(Zeta)|A->D;B->D:1;C->D:2;C->E:1;D->E");
+}
+
 TEST_F(MklLayoutPassTest, NodeRewrite_Conv2DGradFilter_Positive) {
   InitGraph(
       "node { name: 'A' op: 'Input'}"
-- 
GitLab


From 91fbda2d2cf0d48e9cf5d659b6fbe6d59b3606d9 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 21 Nov 2018 13:44:30 -0800
Subject: [PATCH 105/873] Add ExpandDims, Squeeze ops and unit tests.

---
 .../contrib/tensorrt/convert/convert_graph.cc |   2 +
 .../contrib/tensorrt/convert/convert_nodes.cc | 122 +++++++++
 .../tensorrt/convert/convert_nodes_test.cc    | 247 ++++++++++++++++++
 3 files changed, 371 insertions(+)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 3b32f72bc1..560e565267 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -132,6 +132,8 @@ Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) {
       "Min",
       "Relu6",
       "Square",
+      "ExpandDims",
+      "Squeeze",
   };
   bool is_supported_op_type =
       (candidate_ops.count(node->type_string()) ||
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index fee095668e..49df1daae9 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1880,6 +1880,126 @@ tensorflow::Status ConvertReshape(OpConverterParams* params) {
   return tensorflow::Status::OK();
 }
 
+tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
+  const auto& inputs = params->inputs;
+  const auto& node_def = params->node_def;
+  if (inputs.size() != 2) {
+    return tensorflow::errors::InvalidArgument(
+        "Two inputs expected for ExpandDims, at ", node_def.name());
+  }
+  if (!inputs.at(1).is_weights() ) {
+    return tensorflow::errors::InvalidArgument(
+        "ExpandDims expects weights for axis, at ", node_def.name());
+  }
+  // Get input shape as vector.
+  TRT_TensorOrWeights input_tensor = inputs.at(0);
+  const nvinfer1::Dims dims = input_tensor.GetTrtDims();
+  std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
+  // Add batch dim back for tensors.
+  if (input_tensor.is_tensor()) {
+    input_dims.insert(input_dims.begin(), -1);
+  }
+  const int input_rank = input_dims.size();
+  // Get axis to expand on.
+  TRT_ShapedWeights weights = inputs.at(1).weights();
+  const int* weights_ptr =
+      static_cast<int*>(const_cast<void*>(weights.GetValues()));
+  int axis = weights_ptr[0];
+  // Make sure axis is valid.
+  if ((axis < (-input_rank - 1)) || (axis > input_rank)) {
+    return tensorflow::errors::InvalidArgument(
+        "Axis for ExpandDims is invalid, must be in the range "
+        "[-rank(input) - 1, rank(input)], at ",
+        node_def.name());
+  }
+  // Convert negative axis to corresponding positive axis.
+  if (axis < 0) axis += input_rank + 1;
+  if (input_tensor.is_tensor() && axis == 0) {
+    return tensorflow::errors::Unimplemented(
+        "Modifying batch dimension is not supported for ExpandDims, at ",
+        node_def.name());
+  }
+  if (params->validation_only) return Status::OK();
+
+  // ExpandDims: Insert new dim of size 1.
+  input_dims.insert(input_dims.begin()+axis, 1);
+  // Convert input_dims vector into nvinfer1::Dims.
+  const bool ignore_first_dim = input_tensor.is_tensor();
+  nvinfer1::Dims new_dims = VectorToTrtDims(input_dims, ignore_first_dim);
+  // Reshape tensor.
+  const nvinfer1::ITensor* output_tensor = nullptr;
+  TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
+      input_tensor, new_dims, &output_tensor));
+  params->outputs->push_back(
+      TRT_TensorOrWeights(const_cast<nvinfer1::ITensor*>(output_tensor)));
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
+  const auto& inputs = params->inputs;
+  const auto& node_def = params->node_def;
+  if (inputs.size() != 1) {
+    return tensorflow::errors::InvalidArgument(
+        "One input expected for Squeeze, at ", node_def.name());
+  }
+  // Get input shape.
+  TRT_TensorOrWeights input_tensor = inputs.at(0);
+  const nvinfer1::Dims dims = input_tensor.GetTrtDims();
+  std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
+  // Add batch dim back temporarily.
+  if (input_tensor.is_tensor()) {
+    input_dims.insert(input_dims.begin(), -1);
+  }
+  const int input_rank = input_dims.size();
+  // Mark axes to remove by setting them to 0.
+  TFAttrs attrs(node_def);
+  auto squeeze_dims = attrs.get<std::vector<int>>("squeeze_dims");
+  if (squeeze_dims.size() == 0) {
+    return tensorflow::errors::Unimplemented(
+        "Squeeze is only implemented for explicit dims, at ", node_def.name());
+  }
+  for (int axis : squeeze_dims) {
+    // Make sure axis is valid.
+    if ((axis < -input_rank) || (axis >= input_rank)) {
+      return tensorflow::errors::InvalidArgument(
+          "Axis for Squeeze is invalid, must be in the range "
+          "[-rank(input), rank(input)), at ",
+          node_def.name());
+    }
+    // Convert negative axis to corresponding positive axis.
+    if (axis < 0) axis += input_rank;
+    LOG(INFO) << axis;
+    // Don't squeeze batch dim.
+    if (axis == 0) {
+      return tensorflow::errors::Unimplemented(
+          "Cannot squeeze batch dimension, at ", node_def.name());
+    }
+    // Make sure target dimension is size 1.
+    if (input_dims[axis] != 1) {
+      return tensorflow::errors::InvalidArgument(
+          "Cannot squeeze a dimension which isn't size 1, at ",
+          node_def.name());
+    }
+    // Mark dim for removal by setting to 0.
+    input_dims[axis] = 0;
+  }
+  if (params->validation_only) return Status::OK();
+  
+  // Remove all dims which are equal to 0.
+  input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
+                   input_dims.end());
+  // Convert input_dims vector into nvinfer1::Dims.
+  const bool ignore_first_dim = input_tensor.is_tensor();
+  nvinfer1::Dims new_dims = VectorToTrtDims(input_dims, ignore_first_dim);
+  // Reshape tensor.
+  const nvinfer1::ITensor* output_tensor = nullptr;
+  TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
+      input_tensor, new_dims, &output_tensor));
+  params->outputs->push_back(
+      TRT_TensorOrWeights(const_cast<nvinfer1::ITensor*>(output_tensor)));
+  return tensorflow::Status::OK();
+}
+
 tensorflow::Status ConvertConv2D(OpConverterParams* params) {
   return ConvertConv2DHelper(params, ConvolutionType::DEFAULT);
 }
@@ -3156,6 +3276,8 @@ static void RegisterValidatableOpConverters(
   (*registration)["MatMul"] = ConvertMatMul;
   (*registration)["Relu6"] = ConvertRelu6;
   (*registration)["Square"] = ConvertSquare;
+  (*registration)["ExpandDims"] = ConvertExpandDims;
+  (*registration)["Squeeze"] = ConvertSqueeze;
 
   for (auto quantization_op_type :
        {"QuantizeAndDequantizeV2", "QuantizeAndDequantizeV3",
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 443033379f..c37406901a 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2113,6 +2113,253 @@ TEST_F(OpConverterTest, ConvertActivation) {
   }
 }
 
+TEST_F(OpConverterTest, ConvertExpandDims) {
+  {
+    // Input list is empty, should fail.
+    NodeDef node_def = MakeNodeDef("my_expanddims", "ExpandDims", {});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "Two inputs expected for ExpandDims, at my_expanddims");
+  }
+
+  // Get the NodeDef for ExpandDims.
+  Scope s = Scope::NewRootScope();
+  auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+  auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
+  auto expanddims = ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
+  const NodeDef& node_def = expanddims.operation.node()->def();
+
+  {
+    // Axis is a tensor, should fail.
+    Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestTensor("weights", {3});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "ExpandDims expects weights for axis, at my_expanddims");
+  }
+  {
+    // Add dim at batch dimension, should fail.
+    Reset();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("weights", {1}, {0});
+    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+        "Modifying batch dimension is not supported for ExpandDims, at my_expanddims");
+  }
+  {
+    // Add dim at batch dimension via negative axis, should fail.
+    Reset();
+    AddTestTensor("input", {1, 2, 3});
+    // Input is rank 4 (batch dim included)
+    AddTestWeights<int32>("weights", {1}, {-5});
+    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+        "Modifying batch dimension is not supported for ExpandDims, at my_expanddims");
+  }
+  {
+    // Axis > rank(input), should fail.
+    Reset();
+    AddTestTensor("input", {1, 2, 3});
+    // Input is rank 4 (batch dim included)
+    AddTestWeights<int32>("weights", {1}, {5});
+    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+        "Axis for ExpandDims is invalid, must be in the range "
+        "[-rank(input) - 1, rank(input)], at my_expanddims");
+  }
+  {
+    // Axis < -rank(input)-1, should fail.
+    Reset();
+    AddTestTensor("input", {1, 2, 3});
+    // Input is rank 4 (batch dim included)
+    AddTestWeights<int32>("weights", {1}, {-6});
+    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+        "Axis for ExpandDims is invalid, must be in the range "
+        "[-rank(input) - 1, rank(input)], at my_expanddims");
+  }
+  {
+    // Add axis before, Ok.
+    Reset();
+    AddTestTensor("input", {2, 3});
+    AddTestWeights<int32>("weights", {1}, {1});
+    RunValidationAndConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(TrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_expanddims", &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+  {
+    // Add axis after, Ok.
+    Reset();
+    AddTestTensor("input", {2, 3});
+    AddTestWeights<int32>("weights", {1}, {3});
+    RunValidationAndConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(TrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_expanddims", &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+  {
+    // Add negative axis, Ok.
+    Reset();
+    AddTestTensor("input", {2, 3});
+    AddTestWeights<int32>("weights", {1}, {-1});
+    RunValidationAndConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(TrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_expanddims", &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+}
+
+TEST_F(OpConverterTest, ConvertSqueeze) {
+  {
+    // Input list is empty, should fail.
+    NodeDef node_def = MakeNodeDef("my_squeeze", "Squeeze", {});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "One input expected for Squeeze, at my_squeeze");
+  }
+  {
+    // No attrs, should fail.
+    Reset();
+    Scope s = Scope::NewRootScope();
+    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+    auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input);
+    const NodeDef& node_def = squeeze.operation.node()->def();
+    AddTestTensor("input", {1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "Squeeze is only implemented for explicit dims, at my_squeeze");
+  }
+
+  // Get the NodeDef for Squeeze.
+  auto get_squeeze_nodedef = [](std::vector<int> axis) -> NodeDef {
+    Scope s = Scope::NewRootScope();
+    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+    ops::Squeeze::Attrs squeeze_attrs;
+    squeeze_attrs.axis_ = gtl::ArraySlice<int>(axis);
+    auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
+    return squeeze.operation.node()->def();
+  };
+
+  {
+    // Squeeze batch dim, should fail.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({0});
+    AddTestTensor("input", {1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "Cannot squeeze batch dimension, at my_squeeze");
+  }
+  {
+    // Squeeze batch dim via negative axis, should fail.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({-4});
+    AddTestTensor("input", {1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "Cannot squeeze batch dimension, at my_squeeze");
+  }
+  {
+    // Squeeze >= rank(input), should fail.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({4});
+    AddTestTensor("input", {1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "Axis for Squeeze is invalid, must be in the range "
+        "[-rank(input), rank(input)), at my_squeeze");
+  }
+  {
+    // Squeeze < -rank(input), should fail.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({-5});
+    AddTestTensor("input", {1, 2, 3});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "Axis for Squeeze is invalid, must be in the range "
+        "[-rank(input), rank(input)), at my_squeeze");
+  }
+  {
+    // Squeeze axis before, Ok.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({1});
+    AddTestTensor("input", {1, 2, 3});
+    RunConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+  {
+    // Squeeze axis after, Ok.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({3});
+    AddTestTensor("input", {2, 3, 1});
+    RunConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+  {
+    // Squeeze multiple axis, Ok.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({1, 3, 5});
+    AddTestTensor("input", {1, 2, 1, 3, 1});
+    RunConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+  {
+    // Squeeze multiple axis negative, Ok.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({-1, -3, -5});
+    AddTestTensor("input", {1, 2, 1, 3, 1});
+    RunConversion(node_def);
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
+    EXPECT_TRUE(output.is_tensor());
+    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
+        << output.DebugString();
+
+    std::vector<float> output_data(6);
+    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
+  }
+}
+
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
-- 
GitLab


From 9d3b1765b20660ba8f166a44bdc9f80bc3987ced Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Mon, 26 Nov 2018 09:33:24 -0800
Subject: [PATCH 106/873] Add missing VectorToTrtDims function

---
 .../contrib/tensorrt/convert/convert_nodes.cc   | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 49df1daae9..1116a039ce 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -120,6 +120,17 @@ inline nvinfer1::Dims TensorShapeToTrtDims(const TensorShapeType& shape,
   return trt_dims;
 }
 
+inline nvinfer1::Dims VectorToTrtDims(const std::vector<int>& shape,
+                                      bool ignore_first_dim = false) {
+  nvinfer1::Dims trt_dims;
+  const int offset = (ignore_first_dim ? 1 : 0);
+  for (int i = offset; i < shape.size(); i++) {
+    trt_dims.d[i - offset] = shape[i];
+  }
+  trt_dims.nbDims = shape.size() - offset;
+  return trt_dims;
+}
+
 void GetOutputProperties(const grappler::GraphProperties& graph_properties,
                          const Node* node, const int out_port,
                          PartialTensorShape* shape,
@@ -1923,10 +1934,9 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
 
   // ExpandDims: Insert new dim of size 1.
   input_dims.insert(input_dims.begin()+axis, 1);
-  // Convert input_dims vector into nvinfer1::Dims.
+  // Reshape tensor.
   const bool ignore_first_dim = input_tensor.is_tensor();
   nvinfer1::Dims new_dims = VectorToTrtDims(input_dims, ignore_first_dim);
-  // Reshape tensor.
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
@@ -1988,10 +1998,9 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
   // Remove all dims which are equal to 0.
   input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
                    input_dims.end());
-  // Convert input_dims vector into nvinfer1::Dims.
+    // Reshape tensor.
   const bool ignore_first_dim = input_tensor.is_tensor();
   nvinfer1::Dims new_dims = VectorToTrtDims(input_dims, ignore_first_dim);
-  // Reshape tensor.
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
-- 
GitLab


From dbe1a6bd852b8d03fccc6b6df7ee0b6f65ad7ca8 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Tue, 27 Nov 2018 10:20:57 -0800
Subject: [PATCH 107/873] Fix compilation error with BuildAndRun, Use
 ExpectTrtDimsEqualsArray, Fix formatting

---
 .../tensorrt/convert/convert_nodes_test.cc    | 45 ++++++++++---------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index c37406901a..1e4ee1708f 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2184,11 +2184,11 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
     EXPECT_TRUE(output.is_tensor());
-    EXPECT_TRUE(TrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions()))
-        << output.DebugString();
+    ExpectTrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
-    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_expanddims", &output_data);
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_expanddims",
+                       &output_data);
     EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
   }
   {
@@ -2200,11 +2200,11 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
     EXPECT_TRUE(output.is_tensor());
-    EXPECT_TRUE(TrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions()))
-        << output.DebugString();
+    ExpectTrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
-    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_expanddims", &output_data);
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_expanddims",
+                       &output_data);
     EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
   }
   {
@@ -2216,11 +2216,11 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
     EXPECT_TRUE(output.is_tensor());
-    EXPECT_TRUE(TrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions()))
-        << output.DebugString();
+    ExpectTrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
-    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_expanddims", &output_data);
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_expanddims",
+                       &output_data);
     EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
   }
 }
@@ -2252,7 +2252,8 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     ops::Squeeze::Attrs squeeze_attrs;
     squeeze_attrs.axis_ = gtl::ArraySlice<int>(axis);
-    auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
+    auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input,
+                                squeeze_attrs);
     return squeeze.operation.node()->def();
   };
 
@@ -2303,11 +2304,11 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
     EXPECT_TRUE(output.is_tensor());
-    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
-        << output.DebugString();
+    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
-    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
+                       &output_data);
     EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
   }
   {
@@ -2319,11 +2320,11 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
     EXPECT_TRUE(output.is_tensor());
-    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
-        << output.DebugString();
+    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
-    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
+                       &output_data);
     EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
   }
   {
@@ -2335,11 +2336,11 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
     EXPECT_TRUE(output.is_tensor());
-    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
-        << output.DebugString();
+    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
-    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
+                       &output_data);
     EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
   }
   {
@@ -2351,11 +2352,11 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
     EXPECT_TRUE(output.is_tensor());
-    EXPECT_TRUE(TrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions()))
-        << output.DebugString();
+    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
-    BuildAndRun("input", {1, 2, 3, 4, 5, 6}, "my_squeeze", &output_data);
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
+                       &output_data);
     EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
   }
 }
-- 
GitLab


From 23a1bf779c82e363820ea8a1217038d971903ea5 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 28 Nov 2018 11:19:02 -0800
Subject: [PATCH 108/873] Remove debug LOG statement. Use TestParams array for
 Ok tests

---
 .../contrib/tensorrt/convert/convert_nodes.cc |   1 -
 .../tensorrt/convert/convert_nodes_test.cc    | 146 +++++++-----------
 2 files changed, 59 insertions(+), 88 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 1116a039ce..135e188502 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1978,7 +1978,6 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
     }
     // Convert negative axis to corresponding positive axis.
     if (axis < 0) axis += input_rank;
-    LOG(INFO) << axis;
     // Don't squeeze batch dim.
     if (axis == 0) {
       return tensorflow::errors::Unimplemented(
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 1e4ee1708f..74ca982f3f 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2175,48 +2175,41 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
         "Axis for ExpandDims is invalid, must be in the range "
         "[-rank(input) - 1, rank(input)], at my_expanddims");
   }
-  {
-    // Add axis before, Ok.
-    Reset();
-    AddTestTensor("input", {2, 3});
-    AddTestWeights<int32>("weights", {1}, {1});
-    RunValidationAndConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
-    EXPECT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions());
 
-    std::vector<float> output_data(6);
-    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_expanddims",
-                       &output_data);
-    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
-  }
-  {
-    // Add axis after, Ok.
-    Reset();
-    AddTestTensor("input", {2, 3});
-    AddTestWeights<int32>("weights", {1}, {3});
-    RunValidationAndConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
-    EXPECT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions());
+  struct TestParams {
+    TestParams(const std::vector<int>& input_dims,
+               int axis,
+               const std::vector<int>& expected_output_dims)
+        : input_dims(input_dims),
+          axis(axis),
+          expected_output_dims(expected_output_dims) {}
+    std::vector<int> input_dims;
+    int axis;
+    std::vector<int> expected_output_dims;
+  };
 
-    std::vector<float> output_data(6);
-    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_expanddims",
-                       &output_data);
-    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
-  }
-  {
-    // Add negative axis, Ok.
+  // Ok.
+  const int kExpandDimsOKCases = 8;
+  TestParams ok_params[kExpandDimsOKCases] = {
+      TestParams{{2, 3}, 1, {1, 2, 3}},
+      TestParams{{2, 3}, -3, {1, 2, 3}},
+      TestParams{{2, 3}, 3, {2, 3, 1}},
+      TestParams{{2, 3}, -1, {2, 3, 1}},
+      TestParams{{2, 3}, 2, {2, 1, 3}},
+      TestParams{{2, 3}, -2, {2, 1, 3}},
+      TestParams{{6}, 1, {1, 6}},
+      TestParams{{6}, -1, {6, 1}},
+  };
+  for (int i = 0; i < kExpandDimsOKCases; ++i) {
     Reset();
-    AddTestTensor("input", {2, 3});
-    AddTestWeights<int32>("weights", {1}, {-1});
+    AddTestTensor("input", ok_params[i].input_dims);
+    AddTestWeights<int32>("weights", {1}, {ok_params[i].axis});
     RunValidationAndConversion(node_def);
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_expanddims", &output));
     EXPECT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({2, 3, 1}, output.tensor()->getDimensions());
+    ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
+                             output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
     BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_expanddims",
@@ -2295,64 +2288,43 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
         "Axis for Squeeze is invalid, must be in the range "
         "[-rank(input), rank(input)), at my_squeeze");
   }
-  {
-    // Squeeze axis before, Ok.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({1});
-    AddTestTensor("input", {1, 2, 3});
-    RunConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
-    EXPECT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
-
-    std::vector<float> output_data(6);
-    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
-                       &output_data);
-    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
-  }
-  {
-    // Squeeze axis after, Ok.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({3});
-    AddTestTensor("input", {2, 3, 1});
-    RunConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
-    EXPECT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
 
-    std::vector<float> output_data(6);
-    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
-                       &output_data);
-    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
-  }
-  {
-    // Squeeze multiple axis, Ok.
-    Reset();
-    NodeDef node_def = get_squeeze_nodedef({1, 3, 5});
-    AddTestTensor("input", {1, 2, 1, 3, 1});
-    RunConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
-    EXPECT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
+  struct TestParams {
+    TestParams(const std::vector<int>& input_dims,
+               const std::vector<int>& axis,
+               const std::vector<int>& expected_output_dims)
+        : input_dims(input_dims),
+          axis(axis),
+          expected_output_dims(expected_output_dims) {}
+    std::vector<int> input_dims;
+    std::vector<int> axis;
+    std::vector<int> expected_output_dims;
+  };
 
-    std::vector<float> output_data(6);
-    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
-                       &output_data);
-    EXPECT_THAT(output_data, ElementsAre(1, 2, 3, 4, 5, 6));
-  }
-  {
-    // Squeeze multiple axis negative, Ok.
+  // Ok.
+  const int kSqueezeOKCases = 10;
+  TestParams ok_params[kSqueezeOKCases] = {
+      TestParams{{1, 2, 3}, {1}, {2, 3}},
+      TestParams{{1, 2, 3}, {-3}, {2, 3}},
+      TestParams{{2, 3, 1}, {3}, {2, 3}},
+      TestParams{{2, 3, 1}, {-1}, {2, 3}},
+      TestParams{{1, 2, 1, 3, 1}, {1, 3, 5}, {2, 3}},
+      TestParams{{1, 2, 1, 3, 1}, {3, 1, 5}, {2, 3}},
+      TestParams{{1, 2, 1, 3, 1}, {-1, -3, -5}, {2, 3}},
+      TestParams{{1, 2, 1, 3, 1}, {1, -3, 5}, {2, 3}},
+      TestParams{{1, 6}, {1}, {6}},
+      TestParams{{6, 1}, {2}, {6}},
+  };
+  for (int i = 0; i < kSqueezeOKCases; ++i) {
     Reset();
-    NodeDef node_def = get_squeeze_nodedef({-1, -3, -5});
-    AddTestTensor("input", {1, 2, 1, 3, 1});
-    RunConversion(node_def);
+    NodeDef node_def = get_squeeze_nodedef(ok_params[i].axis);
+    AddTestTensor("input", ok_params[i].input_dims);
+    RunValidationAndConversion(node_def);
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_squeeze", &output));
     EXPECT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({2, 3}, output.tensor()->getDimensions());
+    ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims,
+                             output.tensor()->getDimensions());
 
     std::vector<float> output_data(6);
     BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_squeeze",
-- 
GitLab


From 293bc93045d1eddce9040de36918270ed5e65b6c Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Thu, 29 Nov 2018 16:38:36 -0800
Subject: [PATCH 109/873] Restrict ops to tensors only. Renamed
 VectorToTrtDims.

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 30 +++++++++++--------
 .../tensorrt/convert/convert_nodes_test.cc    | 27 +++++++++++++++--
 2 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 135e188502..a83f2c7c48 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -120,15 +120,11 @@ inline nvinfer1::Dims TensorShapeToTrtDims(const TensorShapeType& shape,
   return trt_dims;
 }
 
-inline nvinfer1::Dims VectorToTrtDims(const std::vector<int>& shape,
-                                      bool ignore_first_dim = false) {
-  nvinfer1::Dims trt_dims;
-  const int offset = (ignore_first_dim ? 1 : 0);
-  for (int i = offset; i < shape.size(); i++) {
-    trt_dims.d[i - offset] = shape[i];
-  }
-  trt_dims.nbDims = shape.size() - offset;
-  return trt_dims;
+inline nvinfer1::Dims TensorShapeArrayToTrtDims(const std::vector<int>& shape,
+                                                bool ignore_first_dim = false) {
+  PartialTensorShape tensor_shape;
+  TensorShapeUtils::MakeShape(shape, &tensor_shape);
+  return TensorShapeToTrtDims(tensor_shape, ignore_first_dim);
 }
 
 void GetOutputProperties(const grappler::GraphProperties& graph_properties,
@@ -1898,6 +1894,10 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
     return tensorflow::errors::InvalidArgument(
         "Two inputs expected for ExpandDims, at ", node_def.name());
   }
+  if (inputs.at(0).is_weights() ) {
+    return tensorflow::errors::Unimplemented(
+        "ExpandDims expects tensor for input, at ", node_def.name());
+  }
   if (!inputs.at(1).is_weights() ) {
     return tensorflow::errors::InvalidArgument(
         "ExpandDims expects weights for axis, at ", node_def.name());
@@ -1936,7 +1936,8 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
   input_dims.insert(input_dims.begin()+axis, 1);
   // Reshape tensor.
   const bool ignore_first_dim = input_tensor.is_tensor();
-  nvinfer1::Dims new_dims = VectorToTrtDims(input_dims, ignore_first_dim);
+  nvinfer1::Dims new_dims = TensorShapeArrayToTrtDims(input_dims,
+                                                      ignore_first_dim);
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
@@ -1952,6 +1953,10 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
     return tensorflow::errors::InvalidArgument(
         "One input expected for Squeeze, at ", node_def.name());
   }
+  if (inputs.at(0).is_weights() ) {
+    return tensorflow::errors::Unimplemented(
+        "Squeeze expects tensor for input, at ", node_def.name());
+  }
   // Get input shape.
   TRT_TensorOrWeights input_tensor = inputs.at(0);
   const nvinfer1::Dims dims = input_tensor.GetTrtDims();
@@ -1979,7 +1984,7 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
     // Convert negative axis to corresponding positive axis.
     if (axis < 0) axis += input_rank;
     // Don't squeeze batch dim.
-    if (axis == 0) {
+    if (input_tensor.is_tensor() && axis == 0) {
       return tensorflow::errors::Unimplemented(
           "Cannot squeeze batch dimension, at ", node_def.name());
     }
@@ -1999,7 +2004,8 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
                    input_dims.end());
     // Reshape tensor.
   const bool ignore_first_dim = input_tensor.is_tensor();
-  nvinfer1::Dims new_dims = VectorToTrtDims(input_dims, ignore_first_dim);
+  nvinfer1::Dims new_dims = TensorShapeArrayToTrtDims(input_dims,
+                                                      ignore_first_dim);
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 74ca982f3f..27d6098b49 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2126,9 +2126,19 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
   Scope s = Scope::NewRootScope();
   auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
-  auto expanddims = ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
+  auto expanddims = ops::ExpandDims(s.WithOpName("my_expanddims"), input, 
+                                    weights);
   const NodeDef& node_def = expanddims.operation.node()->def();
 
+  {
+    // Input is weights, should fail.
+    Reset();
+    AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
+    AddTestWeights<int32>("weights", {1}, {1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "ExpandDims expects tensor for input, at my_expanddims");
+  }
   {
     // Axis is a tensor, should fail.
     Reset();
@@ -2144,7 +2154,8 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("weights", {1}, {0});
     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
-        "Modifying batch dimension is not supported for ExpandDims, at my_expanddims");
+        "Modifying batch dimension is not supported for ExpandDims, at "
+        "my_expanddims");
   }
   {
     // Add dim at batch dimension via negative axis, should fail.
@@ -2153,7 +2164,8 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     // Input is rank 4 (batch dim included)
     AddTestWeights<int32>("weights", {1}, {-5});
     RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
-        "Modifying batch dimension is not supported for ExpandDims, at my_expanddims");
+        "Modifying batch dimension is not supported for ExpandDims, at "
+        "my_expanddims");
   }
   {
     // Axis > rank(input), should fail.
@@ -2250,6 +2262,15 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     return squeeze.operation.node()->def();
   };
 
+  {
+    // Input is weights, should fail.
+    Reset();
+    NodeDef node_def = get_squeeze_nodedef({0});
+    AddTestWeights<float>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "Squeeze expects tensor for input, at my_squeeze");
+  }
   {
     // Squeeze batch dim, should fail.
     Reset();
-- 
GitLab


From 06fe3c8ce5b2db058b633a3d93dedcb91a51b203 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 5 Dec 2018 11:19:33 -0800
Subject: [PATCH 110/873] Check that axis is a scalar

---
 tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index a83f2c7c48..3392e527c2 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1913,6 +1913,10 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
   const int input_rank = input_dims.size();
   // Get axis to expand on.
   TRT_ShapedWeights weights = inputs.at(1).weights();
+  if (weights.count() != 1) {
+    return tensorflow::errors::InvalidArgument(
+        "ExpandDims axis must be a scalar, at ", node_def.name());
+  }
   const int* weights_ptr =
       static_cast<int*>(const_cast<void*>(weights.GetValues()));
   int axis = weights_ptr[0];
-- 
GitLab


From 1680520c643b4c3ea6c41ce2ee3cc000a125a88e Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 5 Dec 2018 14:28:04 -0800
Subject: [PATCH 111/873] Fix clang-format

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 20 +++----
 .../tensorrt/convert/convert_nodes_test.cc    | 53 +++++++++----------
 2 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 3392e527c2..9f6ec75c5b 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1894,11 +1894,11 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
     return tensorflow::errors::InvalidArgument(
         "Two inputs expected for ExpandDims, at ", node_def.name());
   }
-  if (inputs.at(0).is_weights() ) {
+  if (inputs.at(0).is_weights()) {
     return tensorflow::errors::Unimplemented(
         "ExpandDims expects tensor for input, at ", node_def.name());
   }
-  if (!inputs.at(1).is_weights() ) {
+  if (!inputs.at(1).is_weights()) {
     return tensorflow::errors::InvalidArgument(
         "ExpandDims expects weights for axis, at ", node_def.name());
   }
@@ -1937,11 +1937,11 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
   if (params->validation_only) return Status::OK();
 
   // ExpandDims: Insert new dim of size 1.
-  input_dims.insert(input_dims.begin()+axis, 1);
+  input_dims.insert(input_dims.begin() + axis, 1);
   // Reshape tensor.
   const bool ignore_first_dim = input_tensor.is_tensor();
-  nvinfer1::Dims new_dims = TensorShapeArrayToTrtDims(input_dims,
-                                                      ignore_first_dim);
+  nvinfer1::Dims new_dims =
+      TensorShapeArrayToTrtDims(input_dims, ignore_first_dim);
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
@@ -1957,7 +1957,7 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
     return tensorflow::errors::InvalidArgument(
         "One input expected for Squeeze, at ", node_def.name());
   }
-  if (inputs.at(0).is_weights() ) {
+  if (inputs.at(0).is_weights()) {
     return tensorflow::errors::Unimplemented(
         "Squeeze expects tensor for input, at ", node_def.name());
   }
@@ -2002,14 +2002,14 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
     input_dims[axis] = 0;
   }
   if (params->validation_only) return Status::OK();
-  
+
   // Remove all dims which are equal to 0.
   input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
                    input_dims.end());
-    // Reshape tensor.
+  // Reshape tensor.
   const bool ignore_first_dim = input_tensor.is_tensor();
-  nvinfer1::Dims new_dims = TensorShapeArrayToTrtDims(input_dims,
-                                                      ignore_first_dim);
+  nvinfer1::Dims new_dims =
+      TensorShapeArrayToTrtDims(input_dims, ignore_first_dim);
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 27d6098b49..c37a43dd5d 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2126,8 +2126,8 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
   Scope s = Scope::NewRootScope();
   auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
   auto weights = ops::Placeholder(s.WithOpName("weights"), DT_INT32);
-  auto expanddims = ops::ExpandDims(s.WithOpName("my_expanddims"), input, 
-                                    weights);
+  auto expanddims =
+      ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
   const NodeDef& node_def = expanddims.operation.node()->def();
 
   {
@@ -2153,7 +2153,8 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     Reset();
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("weights", {1}, {0});
-    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
         "Modifying batch dimension is not supported for ExpandDims, at "
         "my_expanddims");
   }
@@ -2163,7 +2164,8 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     AddTestTensor("input", {1, 2, 3});
     // Input is rank 4 (batch dim included)
     AddTestWeights<int32>("weights", {1}, {-5});
-    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
         "Modifying batch dimension is not supported for ExpandDims, at "
         "my_expanddims");
   }
@@ -2173,7 +2175,8 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     AddTestTensor("input", {1, 2, 3});
     // Input is rank 4 (batch dim included)
     AddTestWeights<int32>("weights", {1}, {5});
-    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
         "Axis for ExpandDims is invalid, must be in the range "
         "[-rank(input) - 1, rank(input)], at my_expanddims");
   }
@@ -2183,14 +2186,14 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
     AddTestTensor("input", {1, 2, 3});
     // Input is rank 4 (batch dim included)
     AddTestWeights<int32>("weights", {1}, {-6});
-    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
         "Axis for ExpandDims is invalid, must be in the range "
         "[-rank(input) - 1, rank(input)], at my_expanddims");
   }
 
   struct TestParams {
-    TestParams(const std::vector<int>& input_dims,
-               int axis,
+    TestParams(const std::vector<int>& input_dims, int axis,
                const std::vector<int>& expected_output_dims)
         : input_dims(input_dims),
           axis(axis),
@@ -2203,14 +2206,10 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
   // Ok.
   const int kExpandDimsOKCases = 8;
   TestParams ok_params[kExpandDimsOKCases] = {
-      TestParams{{2, 3}, 1, {1, 2, 3}},
-      TestParams{{2, 3}, -3, {1, 2, 3}},
-      TestParams{{2, 3}, 3, {2, 3, 1}},
-      TestParams{{2, 3}, -1, {2, 3, 1}},
-      TestParams{{2, 3}, 2, {2, 1, 3}},
-      TestParams{{2, 3}, -2, {2, 1, 3}},
-      TestParams{{6}, 1, {1, 6}},
-      TestParams{{6}, -1, {6, 1}},
+      TestParams{{2, 3}, 1, {1, 2, 3}}, TestParams{{2, 3}, -3, {1, 2, 3}},
+      TestParams{{2, 3}, 3, {2, 3, 1}}, TestParams{{2, 3}, -1, {2, 3, 1}},
+      TestParams{{2, 3}, 2, {2, 1, 3}}, TestParams{{2, 3}, -2, {2, 1, 3}},
+      TestParams{{6}, 1, {1, 6}},       TestParams{{6}, -1, {6, 1}},
   };
   for (int i = 0; i < kExpandDimsOKCases; ++i) {
     Reset();
@@ -2234,9 +2233,8 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
   {
     // Input list is empty, should fail.
     NodeDef node_def = MakeNodeDef("my_squeeze", "Squeeze", {});
-    RunValidationAndConversion(
-        node_def, error::INVALID_ARGUMENT,
-        "One input expected for Squeeze, at my_squeeze");
+    RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
+                               "One input expected for Squeeze, at my_squeeze");
   }
   {
     // No attrs, should fail.
@@ -2257,8 +2255,8 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     ops::Squeeze::Attrs squeeze_attrs;
     squeeze_attrs.axis_ = gtl::ArraySlice<int>(axis);
-    auto squeeze = ops::Squeeze(s.WithOpName("my_squeeze"), input,
-                                squeeze_attrs);
+    auto squeeze =
+        ops::Squeeze(s.WithOpName("my_squeeze"), input, squeeze_attrs);
     return squeeze.operation.node()->def();
   };
 
@@ -2276,18 +2274,16 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
     Reset();
     NodeDef node_def = get_squeeze_nodedef({0});
     AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED,
-        "Cannot squeeze batch dimension, at my_squeeze");
+    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+                               "Cannot squeeze batch dimension, at my_squeeze");
   }
   {
     // Squeeze batch dim via negative axis, should fail.
     Reset();
     NodeDef node_def = get_squeeze_nodedef({-4});
     AddTestTensor("input", {1, 2, 3});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED,
-        "Cannot squeeze batch dimension, at my_squeeze");
+    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+                               "Cannot squeeze batch dimension, at my_squeeze");
   }
   {
     // Squeeze >= rank(input), should fail.
@@ -2311,8 +2307,7 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
   }
 
   struct TestParams {
-    TestParams(const std::vector<int>& input_dims,
-               const std::vector<int>& axis,
+    TestParams(const std::vector<int>& input_dims, const std::vector<int>& axis,
                const std::vector<int>& expected_output_dims)
         : input_dims(input_dims),
           axis(axis),
-- 
GitLab


From 9546b5259410f1aad4206b396558238abd1c7501 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 5 Dec 2018 14:31:46 -0800
Subject: [PATCH 112/873] Remove is_tensor() since inputs are required to be
 tensors

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 22 +++++++------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 9f6ec75c5b..ac0e2f684c 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1906,10 +1906,8 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
   TRT_TensorOrWeights input_tensor = inputs.at(0);
   const nvinfer1::Dims dims = input_tensor.GetTrtDims();
   std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
-  // Add batch dim back for tensors.
-  if (input_tensor.is_tensor()) {
-    input_dims.insert(input_dims.begin(), -1);
-  }
+  // Add batch dim back.
+  input_dims.insert(input_dims.begin(), -1);
   const int input_rank = input_dims.size();
   // Get axis to expand on.
   TRT_ShapedWeights weights = inputs.at(1).weights();
@@ -1929,7 +1927,7 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
   }
   // Convert negative axis to corresponding positive axis.
   if (axis < 0) axis += input_rank + 1;
-  if (input_tensor.is_tensor() && axis == 0) {
+  if (axis == 0) {
     return tensorflow::errors::Unimplemented(
         "Modifying batch dimension is not supported for ExpandDims, at ",
         node_def.name());
@@ -1939,9 +1937,8 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
   // ExpandDims: Insert new dim of size 1.
   input_dims.insert(input_dims.begin() + axis, 1);
   // Reshape tensor.
-  const bool ignore_first_dim = input_tensor.is_tensor();
   nvinfer1::Dims new_dims =
-      TensorShapeArrayToTrtDims(input_dims, ignore_first_dim);
+      TensorShapeArrayToTrtDims(input_dims, /*ignore_first_dim=*/true);
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
@@ -1965,10 +1962,8 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
   TRT_TensorOrWeights input_tensor = inputs.at(0);
   const nvinfer1::Dims dims = input_tensor.GetTrtDims();
   std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
-  // Add batch dim back temporarily.
-  if (input_tensor.is_tensor()) {
-    input_dims.insert(input_dims.begin(), -1);
-  }
+  // Add batch dim back.
+  input_dims.insert(input_dims.begin(), -1);
   const int input_rank = input_dims.size();
   // Mark axes to remove by setting them to 0.
   TFAttrs attrs(node_def);
@@ -1988,7 +1983,7 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
     // Convert negative axis to corresponding positive axis.
     if (axis < 0) axis += input_rank;
     // Don't squeeze batch dim.
-    if (input_tensor.is_tensor() && axis == 0) {
+    if (axis == 0) {
       return tensorflow::errors::Unimplemented(
           "Cannot squeeze batch dimension, at ", node_def.name());
     }
@@ -2007,9 +2002,8 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
   input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
                    input_dims.end());
   // Reshape tensor.
-  const bool ignore_first_dim = input_tensor.is_tensor();
   nvinfer1::Dims new_dims =
-      TensorShapeArrayToTrtDims(input_dims, ignore_first_dim);
+      TensorShapeArrayToTrtDims(input_dims, /*ignore_first_dim=*/true);
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
-- 
GitLab


From 293b0783fdf635d5e337d3c71ae4cadee8770322 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Dec 2018 02:02:57 +0000
Subject: [PATCH 113/873] Add processing in case dim is unknown in advance.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/nn_ops.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index a2305cefba..4a36aa1550 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1699,8 +1699,11 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   # still perform softmax on its last dimension.
 
   # In case dim is negative (and is not last dimension -1), add shape.ndims
-  if not isinstance(dim, ops.Tensor) and dim < 0:
-    dim += shape.ndims
+  if not isinstance(dim, ops.Tensor):
+    if dim < 0:
+      dim += shape.ndims
+  else:
+    dim = array_ops.where(math_ops.less(dim, 0), dim + shape.ndims, dim)
 
   # Swap logits' dimension of dim and its last dimension.
   input_rank = array_ops.rank(logits)
-- 
GitLab


From e8e80850b1bcc14e3e20c1aa9af517a76d607beb Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Thu, 6 Dec 2018 11:37:46 +0800
Subject: [PATCH 114/873] Modify some comments.

---
 tensorflow/core/kernels/mkl_slice_op.cc | 29 ++++++++++++++-----------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index 233f33e1cb..a85d80f9b3 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -62,8 +62,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 // either Mkl layout or Tensorflow layout.
 // A shared code to validate input shapes and check for identity, which is not
 // dependent on the type of T.
-// We do this to reduce code size by not duplicating all this for all T (float,
-// double, int32, etc.)
+// We do this to reduce code size by not duplicating
+// all this for all T (float, double, int32, etc.)
 static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
                               gtl::InlinedVector<int64, 4>* begin,
                               gtl::InlinedVector<int64, 4>* size) {
@@ -160,12 +160,13 @@ static void CheckCommonCasesForMklInputs(OpKernelContext* context,
 }
 
 // This structure aggregates multiple inputs to Slice methods.
-// Parameters from & to represents memory pointing to reorder.
-// Parameters begin_dims & size_dims represents offset and length
-// passed to view primitive.
 struct MklSliceParams {
+  // Parameters from & to represents memory pointing to reorder.
   const memory* from;
   const memory* to;
+
+  // Parameters begin_dims & size_dims represents offset and length
+  // passed to view primitive.
   memory::dims begin_dims;
   memory::dims size_dims;
 
@@ -174,7 +175,7 @@ struct MklSliceParams {
       : from(from), to(to), begin_dims(begin_dims), size_dims(size_dims) {}
 };
 
-// This implements the reuse interface of Slice reorders.
+// This implements the shared interface of Slice reorders.
 template <typename T>
 class MklSlicePrimitive : public MklPrimitive {
  public:
@@ -190,6 +191,7 @@ class MklSlicePrimitive : public MklPrimitive {
     context_.dst_mem->set_data_handle(sliceParams.to->get_data_handle());
     context_.slice_stream->submit(context_.slice_primitives);
 
+    // For safety guard, so that data_handle wouldn't be rewritten.
     context_.src_mem->set_data_handle(DummyData);
     context_.dst_mem->set_data_handle(DummyData);
     return;
@@ -213,6 +215,7 @@ class MklSlicePrimitive : public MklPrimitive {
   engine cpu_engine_ = engine(engine::cpu, 0);
 
   void Setup(const MklSliceParams& sliceParams) {
+    // Just create the memory primitive, fill with dummy.
     context_.src_mem.reset(
         new memory({sliceParams.from->get_primitive_desc().desc(), cpu_engine_},
                    DummyData));
@@ -260,16 +263,16 @@ class MklSlicePrimitiveFactory : public MklPrimitiveFactory<T> {
     FactoryKeyCreator key_creator;
     auto const& from_desc = sliceParams.from->get_primitive_desc().desc().data;
     auto const& to_desc = sliceParams.to->get_primitive_desc().desc().data;
-    const int KIdxFirstStride = 0;
+    const int kIdxFirstStride = 0;
     memory::dims from_dims(from_desc.dims, &from_desc.dims[from_desc.ndims]);
     memory::dims to_dims(to_desc.dims, &to_desc.dims[to_desc.ndims]);
     memory::dims from_strides(
-        from_desc.layout_desc.blocking.strides[KIdxFirstStride],
+        from_desc.layout_desc.blocking.strides[kIdxFirstStride],
         &from_desc.layout_desc.blocking
-             .strides[KIdxFirstStride][from_desc.ndims]);
+             .strides[kIdxFirstStride][from_desc.ndims]);
     memory::dims to_strides(
-        to_desc.layout_desc.blocking.strides[KIdxFirstStride],
-        &to_desc.layout_desc.blocking.strides[KIdxFirstStride][to_desc.ndims]);
+        to_desc.layout_desc.blocking.strides[kIdxFirstStride],
+        &to_desc.layout_desc.blocking.strides[kIdxFirstStride][to_desc.ndims]);
     key_creator.AddAsKey(prefix);
     key_creator.AddAsKey(static_cast<int>(from_desc.format));
     key_creator.AddAsKey(static_cast<int>(from_desc.data_type));
@@ -339,8 +342,8 @@ class MklSliceOp : public OpKernel {
       //
       // 1. create memory primitive descriptor in_mem_pd and memory primitive
       //    in_mem_p for the entire source data. create view primitive
-      //    descriptor
-      //    in_submem_pd based on in_mem_pd, initial offsets, and sub-sizes
+      //    descriptor in_submem_pd based on in_mem_pd, initial offsets,
+      //    and sub-sizes
       // 2. create memory primitive descriptor out_mem_pd and memory primitive
       //    out_mem_p for the output (the logical sizes should match sub-sizes
       //    used in step 1, but the format might be arbitrary)
-- 
GitLab


From 790390598cad7c4e456b60400a0d0d5454e75716 Mon Sep 17 00:00:00 2001
From: Bairen Yi <byronyi@clustar.ai>
Date: Fri, 30 Nov 2018 03:34:29 +0000
Subject: [PATCH 115/873] Implement async TensorFromTransportOptions for GDR

Instead of blocking on completion of an RDMA op, RecvTensor client will
now post a work request to the NIC send queue and return immediately.
The GDR background polling thread will handle the callback after the
corresponding RDMA op is completed, i.e. polled from the completion
queue on NIC. The old epoll based mechanism is removed to trade higher
CPU usage for improved throughput and lower latencies for RDMA ops.

The maximum numbers of work request (WR) in the send/recv queues on
NIC are increased to entertain the increased number of concurrent
RDMA ops. The threshold of tensor size below which we pass the tensor
content in metadata is also increased to reduce the pressure to send/recv
queues on NIC.

This fixes #23933.

Signed-off-by: Bairen Yi <byronyi@clustar.ai>
---
 tensorflow/contrib/gdr/BUILD                 |   2 +-
 tensorflow/contrib/gdr/gdr.proto             |   1 -
 tensorflow/contrib/gdr/gdr_memory_manager.cc | 520 ++++++++-----------
 tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc |   9 +-
 tensorflow/contrib/gdr/gdr_server_lib.cc     |   3 +-
 tensorflow/contrib/gdr/gdr_worker.cc         |  24 +-
 6 files changed, 216 insertions(+), 343 deletions(-)

diff --git a/tensorflow/contrib/gdr/BUILD b/tensorflow/contrib/gdr/BUILD
index e534fdc177..7ec3c5ff5d 100644
--- a/tensorflow/contrib/gdr/BUILD
+++ b/tensorflow/contrib/gdr/BUILD
@@ -58,7 +58,7 @@ tf_cuda_library(
     ],
 )
 
-tf_cuda_library(
+cc_library(
     name = "gdr_worker",
     srcs = ["gdr_worker.cc"],
     hdrs = ["gdr_worker.h"],
diff --git a/tensorflow/contrib/gdr/gdr.proto b/tensorflow/contrib/gdr/gdr.proto
index c0b89245b1..bd438787c3 100644
--- a/tensorflow/contrib/gdr/gdr.proto
+++ b/tensorflow/contrib/gdr/gdr.proto
@@ -9,5 +9,4 @@ message RemoteMemoryRegion {
   uint64 addr = 3;
   uint32 rkey = 4;
   uint32 tensor_key = 5;
-  uint64 checksum = 6;
 }
diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index 53587fcf30..69bbab1c39 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -26,15 +26,14 @@ limitations under the License.
 #include <fcntl.h>
 #include <rdma/rdma_cma.h>
 #include <rdma/rdma_verbs.h>
-#include <sys/epoll.h>
 
 #include "tensorflow/contrib/gdr/gdr.pb.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
 #include "tensorflow/core/common_runtime/process_state.h"
+#include "tensorflow/core/lib/random/random.h"
 #if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
-#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #endif  // GOOGLE_CUDA
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/macros.h"
@@ -81,10 +80,6 @@ int TryToReadNumaNode(ibv_device* device) {
   int32 value;
   if (strings::safe_strto32(content, &value)) {
     if (value < 0) {
-      LOG(INFO) << "Successful NUMA node read from SysFS had negative value ("
-                << value
-                << "), but there must be at least one NUMA node"
-                   ", so returning NUMA node zero";
       return port::kNUMANoAffinity;
     }
     LOG(INFO) << "NUMA node for device: " << device->name << " is " << value;
@@ -114,7 +109,7 @@ class GdrMemoryManager : public RemoteMemoryManager {
  public:
   GdrMemoryManager(const string& host, const string& port);
 
-  virtual ~GdrMemoryManager();
+  virtual ~GdrMemoryManager() {}
 
   virtual Status Init() override;
 
@@ -140,7 +135,7 @@ class GdrMemoryManager : public RemoteMemoryManager {
     return ptr < reinterpret_cast<char*>(other->addr) + other->length;
   }
 
-  ibv_mr* FindMemoryRegion(void* addr, size_t length);
+  ibv_mr* FindMemoryRegion(const Tensor* tensor);
 
   void InsertMemoryRegion(void* addr, size_t length,
                           const std::string& allocator_name);
@@ -152,7 +147,6 @@ class GdrMemoryManager : public RemoteMemoryManager {
   const string port_;
   RdmaEndpointPtr listening_;
   std::atomic<bool> stopped_;
-  int epfd_;
   int numa_node_;
 
   // Server side endpoints
@@ -163,15 +157,19 @@ class GdrMemoryManager : public RemoteMemoryManager {
   std::atomic<TensorKey> next_key_;
 
   // Server side on-the-fly tensor buffers
-  mutex server_mu_;
-  std::map<TensorKey, const TensorBuffer*> tensor_buffers_
-      GUARDED_BY(server_mu_);
+  mutex buf_mu_;
+  std::map<TensorKey, const TensorBuffer*> tensor_buffers_ GUARDED_BY(buf_mu_);
 
   // Client side endpoints
   mutex client_mu_;
   std::map<std::pair<string, string>, RdmaEndpointPtr> clients_
       GUARDED_BY(client_mu_);
 
+  // Client side callbacks
+  mutex callback_mu_;
+  std::map<TensorKey, StatusCallback> tensor_callbacks_
+      GUARDED_BY(callback_mu_);
+
   // Managed memory regions
   mutex alloc_mu_;
   std::vector<MemoryRegionPtr> mrs_ GUARDED_BY(alloc_mu_);
@@ -184,16 +182,9 @@ GdrMemoryManager::GdrMemoryManager(const string& host, const string& port)
       port_(port),
       listening_(nullptr, EndpointDeleter),
       stopped_(true),
-      next_key_(0) {}
-
-GdrMemoryManager::~GdrMemoryManager() { close(epfd_); }
+      next_key_(static_cast<uint32_t>(random::New64())) {}
 
 Status GdrMemoryManager::Init() {
-  epfd_ = epoll_create1(0);
-  if (epfd_ == -1) {
-    return errors::Unavailable(strerror(errno), ": ", "epoll_create");
-  }
-
   rdma_addrinfo* addrinfo;
   rdma_addrinfo hints = {};
   hints.ai_port_space = RDMA_PS_TCP;
@@ -206,7 +197,7 @@ Status GdrMemoryManager::Init() {
 
   ibv_qp_init_attr init_attr = {};
   init_attr.qp_type = IBV_QPT_RC;
-  init_attr.cap.max_recv_wr = 32;
+  init_attr.cap.max_recv_wr = 1024;
   init_attr.cap.max_send_wr = 1;
   init_attr.cap.max_recv_sge = 1;
   init_attr.cap.max_send_sge = 1;
@@ -239,14 +230,6 @@ Status GdrMemoryManager::Init() {
                                "cannot set server to non-blocking mode");
   }
 
-  epoll_event event = {};
-  event.events = EPOLLIN | EPOLLPRI;
-  event.data.ptr = listening_.get();
-  if (epoll_ctl(epfd_, EPOLL_CTL_ADD, listening_->channel->fd, &event)) {
-    return errors::Unavailable(strerror(errno), ": ",
-                               "cannot add server to epoll");
-  }
-
   numa_node_ = TryToReadNumaNode(listening_->verbs->device);
 
   SubAllocator::Visitor alloc_visitor = [this](void* ptr, int numa_node,
@@ -278,11 +261,9 @@ Status GdrMemoryManager::Init() {
       VLOG(2) << "Registering RDMA capable memory region on GPU " << gpu_id;
       InsertMemoryRegion(ptr, num_bytes, strings::StrCat("GPU:", gpu_id));
     };
-    for (int numa_idx = 0; numa_idx < port::NUMANumNodes(); ++numa_idx) {
-      GPUProcessState::singleton()->AddGPUAllocVisitor(numa_idx,
-                                                       cuda_alloc_visitor);
-    }
-    VLOG(1) << "Instrumenting GPU allocator(s) for all Numas";
+    GPUProcessState::singleton()->AddGPUAllocVisitor(numa_node_,
+                                                     cuda_alloc_visitor);
+    LOG(INFO) << "Instrumenting GPU allocator for NUMA " << numa_node_;
   }
 #endif  // GOOGLE_CUDA
   return Status::OK();
@@ -291,95 +272,90 @@ Status GdrMemoryManager::Init() {
 void GdrMemoryManager::Run() {
   stopped_ = false;
   while (!stopped_) {
-    epoll_event events[32];
-    int ret = epoll_wait(epfd_, events, 32, 1);
-    if (ret == -1) {
-      LOG(ERROR) << "epoll_wait: " << strerror(errno);
-      return;
-    }
-    for (int i = 0; i < ret; i++) {
-      rdma_cm_id* id = static_cast<rdma_cm_id*>(events[i].data.ptr);
-      if (id == listening_.get()) {
-        // Accept incoming connections
-        if (!rdma_get_request(listening_.get(), &id)) {
-          if (!rdma_accept(id, nullptr)) {
-            LOG(INFO) << "Accepted new RDMA connection";
-            if (ibv_req_notify_cq(id->recv_cq, 0)) {
-              LOG(ERROR) << strerror(errno) << ": ibv_req_notify_cq failed";
-              EndpointDeleter(id);
-              continue;
-            }
-            for (int i = 0; i < 32; i++) {
-              if (rdma_post_recvv(id, nullptr, nullptr, 0)) {
-                LOG(ERROR) << strerror(errno) << ": rdma_post_recvv failed";
-                EndpointDeleter(id);
-                continue;
-              }
-            }
-            int flags = fcntl(id->recv_cq_channel->fd, F_GETFL, 0);
-            if (fcntl(id->recv_cq_channel->fd, F_SETFL, flags | O_NONBLOCK)) {
-              LOG(ERROR) << strerror(errno)
-                         << ": cannot set server_client to non-blocking mode";
-              EndpointDeleter(id);
-              continue;
-            }
-            epoll_event event = {};
-            event.events = EPOLLIN | EPOLLPRI;
-            event.data.ptr = id;
-            if (epoll_ctl(epfd_, EPOLL_CTL_ADD, id->recv_cq_channel->fd,
-                          &event)) {
-              LOG(ERROR) << strerror(errno)
-                         << ": cannot add server client to epoll";
-              EndpointDeleter(id);
-              continue;
-            }
-            server_clients_.push_back({id, EndpointDeleter});
+    rdma_cm_id* id = nullptr;
+    // Accept incoming connections
+    if (!rdma_get_request(listening_.get(), &id)) {
+      if (!rdma_accept(id, nullptr)) {
+        LOG(INFO) << "Accepted new RDMA connection";
+        for (int i = 0; i < 1024; i++) {
+          if (rdma_post_recvv(id, nullptr, nullptr, 0)) {
+            LOG(ERROR) << strerror(errno) << ": rdma_post_recvv failed";
+            EndpointDeleter(id);
+            continue;
           }
         }
-      } else {
-        // Polling work completions
-        ibv_cq* cq;
-        void* context;
-        if (!ibv_get_cq_event(id->recv_cq_channel, &cq, &context)) {
-          ibv_ack_cq_events(id->recv_cq, 1);
-          if (ibv_req_notify_cq(id->recv_cq, 0)) {
-            LOG(ERROR) << strerror(errno) << ": ibv_req_notify_cq failed";
-            continue;
+        server_clients_.push_back({id, EndpointDeleter});
+      }
+    }
+    // Polling server side work completions
+    for (const auto& client : server_clients_) {
+      ibv_wc wc[32];
+      int ret = ibv_poll_cq(client->recv_cq, 32, wc);
+      if (ret < 0) {
+        LOG(ERROR) << "ibv_poll_cq failed";
+        continue;
+      }
+      for (int i = 0; i < ret; i++) {
+        if (wc[i].opcode != IBV_WC_RECV_RDMA_WITH_IMM) {
+          LOG(ERROR) << "Received unknown operation " << wc[i].opcode;
+        }
+        if (wc[i].status != 0) {
+          LOG(ERROR) << ibv_wc_status_str(wc[i].status);
+        }
+        TensorKey tensor_key = ntohl(wc[i].imm_data);
+
+        if (rdma_post_recvv(client.get(), nullptr, nullptr, 0)) {
+          perror("rdma_post_recvv");
+          LOG(ERROR) << "rdma_post_recvv failed";
+        }
+
+        mutex_lock l(buf_mu_);
+        auto iter = tensor_buffers_.find(tensor_key);
+        if (iter == std::end(tensor_buffers_)) {
+          LOG(ERROR) << "Cannot find tensor buffer for tensor key "
+                     << tensor_key;
+        } else {
+          const TensorBuffer* buffer = iter->second;
+          buffer->Unref();
+          tensor_buffers_.erase(iter);
+        }
+      }
+    }
+    // Polling client side work completions
+    if (client_mu_.try_lock()) {
+      for (const auto& client : clients_) {
+        ibv_wc wc[32];
+        int ret = ibv_poll_cq(client.second->send_cq, 32, wc);
+        for (int i = 0; i < ret; i++) {
+          Status s;
+          if (wc[i].status) {
+            s = errors::Unavailable(ibv_wc_status_str(wc[i].status));
+          } else {
+            s = Status::OK();
           }
-          ibv_wc wc[32];
-          int ret = ibv_poll_cq(id->recv_cq, 32, wc);
-          if (ret < 0) {
-            LOG(ERROR) << "ibv_poll_cq failed";
-            continue;
+          TensorKey key = wc[i].wr_id;
+
+          ibv_send_wr wr = {};
+          wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
+          wr.imm_data = htonl(key);
+          ibv_send_wr* bad_wr;
+          if (ibv_post_send(client.second->qp, &wr, &bad_wr)) {
+            LOG(ERROR) << strerror(errno)
+                       << ": ibv_post_send failed for tensor_key " << key;
           }
-          for (int i = 0; i < ret; i++) {
-            if (wc[i].opcode != IBV_WC_RECV_RDMA_WITH_IMM) {
-              LOG(ERROR) << "Received unknown operation " << wc[i].opcode;
-            }
-            if (wc[i].status != 0) {
-              LOG(ERROR) << ibv_wc_status_str(wc[i].status);
-            }
-            TensorKey tensor_key = ntohl(wc[i].imm_data);
-            {
-              mutex_lock l(server_mu_);
-              auto iter = tensor_buffers_.find(tensor_key);
-              if (iter == std::end(tensor_buffers_)) {
-                LOG(ERROR) << "Cannot find tensor buffer for tensor key "
-                           << tensor_key;
-              } else {
-                const TensorBuffer* buffer = iter->second;
-                buffer->Unref();
-                tensor_buffers_.erase(iter);
-              }
-            }
-            if (rdma_post_recvv(id, nullptr, nullptr, 0)) {
-              perror("rdma_post_recvv");
-              LOG(ERROR) << "rdma_post_recvv failed";
-              continue;
-            }
+
+          mutex_lock l(callback_mu_);
+          auto iter = tensor_callbacks_.find(key);
+          if (iter != std::end(tensor_callbacks_)) {
+            iter->second(s);
+            tensor_callbacks_.erase(iter);
+          } else {
+            LOG(WARNING) << "Cannot find client callback with tensor key "
+                         << key;
           }
         }
       }
+      client_mu_.unlock();
     }
   }
 }
@@ -390,116 +366,58 @@ void GdrMemoryManager::TransportOptionsFromTensor(
     ::google::protobuf::Any* mutable_transport_options, const Tensor& tensor,
     Device* device, DeviceContext* device_context, bool on_host,
     StatusCallback done) {
-  auto buffer = DMAHelper::buffer(&tensor);
-  void* addr = buffer->data();
-  size_t length = buffer->size();
-  if (length == 0) {
-    done(errors::Unavailable("Cannot register tensor buffer of size 0"));
-    return;
-  }
-
-  ibv_mr* mr = FindMemoryRegion(addr, length);
+  ibv_mr* mr = FindMemoryRegion(&tensor);
+  const TensorBuffer* buffer = DMAHelper::buffer(&tensor);
 
-#if GOOGLE_CUDA
-  if (device->tensorflow_gpu_device_info() && !on_host) {
-    Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0);
-    Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape());
-    GPUUtil::CopyGPUTensorToCPU(
-        device, device_context, &tensor, host_copy,
-        [done, host_copy, mutable_transport_options, this](const Status& s) {
-          if (!s.ok()) {
-            done(s);
-            delete host_copy;
-            return;
-          }
-          auto buffer = DMAHelper::buffer(host_copy);
-          void* addr = buffer->data();
-          size_t length = buffer->size();
-          ibv_mr* mr = FindMemoryRegion(addr, length);
-
-          if (mr == nullptr) {
-            done(errors::Unavailable("Cannot find pinned memory region"));
-            delete host_copy;
-            return;
-          }
-
-          buffer->Ref();
-          TensorKey tensor_key = next_key_++;
-          {
-            mutex_lock l(server_mu_);
-            tensor_buffers_.insert(std::make_pair(tensor_key, buffer));
-          }
-
-          uint64_t checksum = 0;
-          if (VLOG_IS_ON(2)) {
-            checksum = GPUUtil::Checksum(*host_copy);
-          }
-
-          RemoteMemoryRegion remote_mr;
-          remote_mr.set_host(host_);
-          remote_mr.set_port(port_);
-          remote_mr.set_addr(reinterpret_cast<uint64_t>(addr));
-          remote_mr.set_rkey(mr->rkey);
-          remote_mr.set_tensor_key(tensor_key);
-          remote_mr.set_checksum(checksum);
-          mutable_transport_options->PackFrom(remote_mr);
-
-          done(Status::OK());
-          delete host_copy;
-        });
-    return;
-  }
-#endif
+  Tensor* copy = nullptr;
 
   if (mr == nullptr) {
-    Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_);
-    Tensor host_copy(alloc, tensor.dtype(), tensor.shape());
-
-    std::memcpy(DMAHelper::buffer(&host_copy)->data(), buffer->data(), length);
-    VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer";
-
-    buffer = DMAHelper::buffer(&host_copy);
-    addr = buffer->data();
-    length = buffer->size();
-
-    mr = FindMemoryRegion(addr, length);
+    AllocatorAttributes alloc_attrs;
+    alloc_attrs.set_gpu_compatible(true);
+    alloc_attrs.set_nic_compatible(true);
+    alloc_attrs.set_on_host(true);
+    Allocator* alloc = device->GetAllocator(alloc_attrs);
+    copy = new Tensor(alloc, tensor.dtype(), tensor.shape());
+
+    mr = FindMemoryRegion(copy);
+    buffer = DMAHelper::buffer(copy);
     if (mr == nullptr) {
       done(errors::Unavailable("Cannot find pinned memory region"));
+      delete copy;
       return;
     }
-
-    buffer->Ref();
-  } else {
-    buffer->Ref();
   }
 
   TensorKey tensor_key = next_key_++;
+  buffer->Ref();
   {
-    mutex_lock l(server_mu_);
+    mutex_lock l(buf_mu_);
     tensor_buffers_.insert(std::make_pair(tensor_key, buffer));
   }
 
-  uint64_t checksum = 0;
-  if (VLOG_IS_ON(2)) {
-#ifdef GOOGLE_CUDA
-    if (device->tensorflow_gpu_device_info() && !on_host) {
-      checksum = GPUUtil::Checksum(device, device_context, tensor);
-    } else {
-      checksum = GPUUtil::Checksum(tensor);
-    }
-#endif
-  }
-
   RemoteMemoryRegion remote_mr;
   remote_mr.set_host(host_);
   remote_mr.set_port(port_);
-  remote_mr.set_addr(reinterpret_cast<uint64_t>(addr));
+  remote_mr.set_addr(reinterpret_cast<uint64_t>(buffer->data()));
   remote_mr.set_rkey(mr->rkey);
   remote_mr.set_tensor_key(tensor_key);
-  remote_mr.set_checksum(checksum);
   mutable_transport_options->PackFrom(remote_mr);
 
-  done(Status::OK());
+  if (copy && device->tensorflow_gpu_device_info() && !on_host) {
+    device_context->CopyDeviceTensorToCPU(&tensor, "" /* tensor_name */, device,
+                                          copy, [done, copy](const Status& s) {
+                                            done(s);
+                                            delete copy;
+                                          });
+    return;
+  } else if (copy) {
+    std::memcpy(buffer->data(), DMAHelper::buffer(&tensor)->data(),
+                buffer->size());
+    done(Status::OK());
+    delete copy;  // OK to delete; we have reffed the underlying TensorBuffer
+  } else {
+    done(Status::OK());
+  }
 }
 
 void GdrMemoryManager::TensorFromTransportOptions(
@@ -512,42 +430,10 @@ void GdrMemoryManager::TensorFromTransportOptions(
     return;
   }
 
-  auto buffer = DMAHelper::buffer(tensor);
-  void* addr = buffer->data();
-  size_t length = buffer->size();
-  ibv_mr* mr = FindMemoryRegion(addr, length);
-
-  Tensor host_copy;
-#if GOOGLE_CUDA
-  if (mr == nullptr && !on_host) {
-    Allocator* alloc =
-        GPUProcessState::singleton()->GetCUDAHostAllocator(numa_node_);
-    host_copy = Tensor(alloc, tensor->dtype(), tensor->shape());
-    buffer = DMAHelper::buffer(&host_copy);
-    addr = buffer->data();
-    length = buffer->size();
-    mr = FindMemoryRegion(addr, length);
-  }
-#endif  // GOOGLE_CUDA
-
-  if (mr == nullptr) {
-    Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_);
-    host_copy = Tensor(alloc, tensor->dtype(), tensor->shape());
-
-    buffer = DMAHelper::buffer(&host_copy);
-    addr = buffer->data();
-    length = buffer->size();
-
-    mr = FindMemoryRegion(addr, length);
-    if (mr == nullptr) {
-      done(errors::Unavailable("Cannot find pinned memory region"));
-      return;
-    }
-  }
-
-  decltype(clients_)::iterator iter;
-  bool success;
+  rdma_cm_id* id = nullptr;
   {
+    decltype(clients_)::iterator iter;
+    bool success;
     mutex_lock l(client_mu_);
     std::tie(iter, success) = clients_.insert(
         std::make_pair(std::make_pair(remote_mr.host(), remote_mr.port()),
@@ -560,93 +446,95 @@ void GdrMemoryManager::TensorFromTransportOptions(
         return;
       }
     }
+    id = iter->second.get();
   }
-  rdma_cm_id* id = iter->second.get();
 
-  uint64_t start = Env::Default()->NowMicros();
+  ibv_mr* mr = FindMemoryRegion(tensor);
+  const TensorBuffer* buffer = DMAHelper::buffer(tensor);
 
-  if (rdma_post_read(id, nullptr, buffer->data(), buffer->size(), mr, 0,
-                     remote_mr.addr(), remote_mr.rkey())) {
-    done(errors::Unavailable(strerror(errno), ": ", "rdma_post_read failed"));
-    return;
-  }
+  const Tensor* copy = nullptr;
 
-  ibv_send_wr wr = {};
-  wr.opcode = IBV_WR_RDMA_WRITE_WITH_IMM;
-  wr.imm_data = htonl(remote_mr.tensor_key());
-  wr.send_flags = IBV_SEND_SIGNALED;
-  ibv_send_wr* bad_wr;
-  if (ibv_post_send(id->qp, &wr, &bad_wr)) {
-    done(errors::Unavailable(strerror(errno), ": ", "ibv_post_send failed"));
-    return;
+  if (mr == nullptr) {
+    AllocatorAttributes alloc_attrs;
+    alloc_attrs.set_gpu_compatible(true);
+    alloc_attrs.set_nic_compatible(true);
+    alloc_attrs.set_on_host(true);
+    Allocator* alloc = device->GetAllocator(alloc_attrs);
+    copy = new Tensor(alloc, tensor->dtype(), tensor->shape());
+
+    mr = FindMemoryRegion(copy);
+    buffer = DMAHelper::buffer(copy);
+    if (mr == nullptr) {
+      done(errors::Unavailable("Cannot find pinned memory region"));
+      delete copy;
+      return;
+    }
   }
 
-  ibv_wc wc = {};
-  int ret;
-  while ((ret = ibv_poll_cq(id->send_cq, 1, &wc)) == 0)
-    ;
-  if (ret < 0 || wc.status) {
-    done(errors::Unavailable(ibv_wc_status_str(wc.status)));
-    return;
-  }
+  uint64_t start = Env::Default()->NowMicros();
 
-#if GOOGLE_CUDA
-  if (device->tensorflow_gpu_device_info() && !on_host &&
-      host_copy.NumElements() > 0) {
-    uint64_t checksum = 0;
-    if (VLOG_IS_ON(2)) {
-      checksum = GPUUtil::Checksum(host_copy);
-      CHECK(checksum == remote_mr.checksum())
-          << "Checksum mismatch: " << checksum << "!=" << remote_mr.checksum();
-    }
-    Tensor* ref = new Tensor;
-    std::swap(host_copy, *ref);
-    GPUUtil::CopyCPUTensorToGPU(
-        ref, device_context, device, tensor,
-        [ref, done, buffer, remote_mr, start](const Status& s) {
-          if (!s.ok()) {
-            done(s);
-            delete ref;
-            return;
-          }
-          uint64_t end = Env::Default()->NowMicros();
-
-          VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey()
-                  << " of size " << buffer->size() << " with tensor key "
-                  << remote_mr.tensor_key() << " took " << (end - start)
-                  << " micros";
-          done(Status::OK());
-          delete ref;
-        });
-    return;
-  }
-#endif  // GOOGLE_CUDA
+  TensorKey tensor_key = remote_mr.tensor_key();
 
-  if ((on_host || !device->tensorflow_gpu_device_info()) &&
-      host_copy.NumElements() > 0) {
-    std::memcpy(DMAHelper::buffer(tensor)->data(), addr, length);
-    VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer";
-  }
+  StatusCallback callback = [done, copy, device, device_context, on_host,
+                             tensor, start, tensor_key](const Status& s) {
 
-  uint64_t end = Env::Default()->NowMicros();
+    if (!s.ok()) {
+      done(s);
+      if (copy) {
+        delete copy;
+      }
+      return;
+    }
 
-  VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey()
-          << " of size " << buffer->size() << " with tensor key "
-          << remote_mr.tensor_key() << " took " << (end - start) << " micros";
+    VLOG(2) << "RDMA of tensor " << tensor_key << " of size "
+            << DMAHelper::buffer(tensor)->size() << " took "
+            << (Env::Default()->NowMicros() - start) << " micros";
+
+    if (copy && device->tensorflow_gpu_device_info() && !on_host) {
+      device_context->CopyCPUTensorToDevice(copy, device, tensor,
+                                            [done, copy](const Status& s) {
+                                              done(s);
+                                              delete copy;
+                                            });
+    } else if (copy) {
+      std::memcpy(DMAHelper::buffer(tensor)->data(),
+                  DMAHelper::buffer(copy)->data(),
+                  DMAHelper::buffer(copy)->size());
+      done(s);
+      delete copy;
+    } else {
+      done(s);
+    }
+  };
 
-  uint64_t checksum = 0;
-  if (VLOG_IS_ON(2)) {
-#ifdef GOOGLE_CUDA
-    if (device->tensorflow_gpu_device_info() && !on_host) {
-      checksum = GPUUtil::Checksum(device, device_context, *tensor);
+  {
+    mutex_lock l(callback_mu_);
+    if (tensor_callbacks_.find(tensor_key) == std::end(tensor_callbacks_)) {
+      tensor_callbacks_.insert(std::make_pair(tensor_key, std::move(callback)));
     } else {
-      checksum = GPUUtil::Checksum(*tensor);
+      done(errors::Unavailable("Received duplicated tensor key"));
+      if (copy) {
+        delete copy;
+      }
+      return;
+    }
+  }
+
+  if (rdma_post_read(id, reinterpret_cast<void*>(tensor_key), buffer->data(),
+                     buffer->size(), mr, IBV_SEND_SIGNALED, remote_mr.addr(),
+                     remote_mr.rkey())) {
+    done(errors::Unavailable(strerror(errno), ": ", "rdma_post_read failed"));
+    {
+      mutex_lock l(callback_mu_);
+      auto iter = tensor_callbacks_.find(tensor_key);
+      if (iter != std::end(tensor_callbacks_)) {
+        tensor_callbacks_.erase(iter);
+      }
+    }
+    if (copy) {
+      delete copy;
     }
-    CHECK(checksum == remote_mr.checksum())
-        << "Checksum mismatch: " << checksum << "!=" << remote_mr.checksum();
-#endif
   }
-  done(Status::OK());
 }
 
 Status GdrMemoryManager::CreateEndpoint(const string& host, const string& port,
@@ -663,7 +551,7 @@ Status GdrMemoryManager::CreateEndpoint(const string& host, const string& port,
   ibv_qp_init_attr init_attr = {};
   init_attr.qp_type = IBV_QPT_RC;
   init_attr.cap.max_recv_wr = 1;
-  init_attr.cap.max_send_wr = 32;
+  init_attr.cap.max_send_wr = 1024;
   init_attr.cap.max_recv_sge = 1;
   init_attr.cap.max_send_sge = 1;
 
@@ -687,8 +575,8 @@ Status GdrMemoryManager::CreateEndpoint(const string& host, const string& port,
   return Status::OK();
 }
 
-ibv_mr* GdrMemoryManager::FindMemoryRegion(void* addr, size_t length) {
-  if (length == 0) return nullptr;
+ibv_mr* GdrMemoryManager::FindMemoryRegion(const Tensor* tensor) {
+  const void* addr = DMAHelper::buffer(tensor)->data();
   mutex_lock l(alloc_mu_);
   auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator);
   if (iter == std::end(mrs_) || iter->get()->addr > addr) {
diff --git a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
index fbccbead03..5f8c300155 100644
--- a/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
+++ b/tensorflow/contrib/gdr/gdr_rendezvous_mgr.cc
@@ -58,11 +58,9 @@ class GdrRecvTensorCall : public BaseRecvTensorCall {
     resp_.InitAlloc(dst_device_, recv_args_.alloc_attrs);
     StatusCallback cb = [this, recv_done](const Status& s) {
       bool dma_ok = resp_.metadata().has_transport_options();
-      if (s.ok() && tensor().TotalBytes() > 0 && (!is_dead()) && dma_ok) {
+      if (s.ok() && tensor().TotalBytes() > 1024 && (!is_dead()) && dma_ok) {
         auto transport_options = resp_.metadata().transport_options();
-        const bool on_host =
-            (dst_device_->tensorflow_gpu_device_info() == nullptr) ||
-            recv_args_.alloc_attrs.on_host();
+        const bool on_host = recv_args_.alloc_attrs.on_host();
         remote_memory_manager_->TensorFromTransportOptions(
             const_cast<Tensor*>(&tensor()), transport_options, dst_device_,
             recv_args_.device_context, on_host,
@@ -70,9 +68,6 @@ class GdrRecvTensorCall : public BaseRecvTensorCall {
               if (!s.ok()) {
                 mutex_lock l(mu_);
                 status_.Update(s);
-                LOG(ERROR) << "Cannot find pinned memory region from allocator "
-                           << dst_device_->GetAllocator(recv_args_.alloc_attrs)
-                                  ->Name();
               }
               recv_done();
             });
diff --git a/tensorflow/contrib/gdr/gdr_server_lib.cc b/tensorflow/contrib/gdr/gdr_server_lib.cc
index b3f48ec1dd..dc0d5d548b 100644
--- a/tensorflow/contrib/gdr/gdr_server_lib.cc
+++ b/tensorflow/contrib/gdr/gdr_server_lib.cc
@@ -74,9 +74,8 @@ Status GdrServer::Start() {
 }
 
 Status GdrServer::Stop() {
-  TF_RETURN_IF_ERROR(GrpcServer::Stop());
   remote_memory_manager_->Stop();
-  return Status::OK();
+  return GrpcServer::Stop();
 }
 
 Status GdrServer::Join() {
diff --git a/tensorflow/contrib/gdr/gdr_worker.cc b/tensorflow/contrib/gdr/gdr_worker.cc
index 867cb83f42..016e5ea27b 100644
--- a/tensorflow/contrib/gdr/gdr_worker.cc
+++ b/tensorflow/contrib/gdr/gdr_worker.cc
@@ -18,9 +18,6 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/device_mgr.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
-#if GOOGLE_CUDA
-#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
-#endif  // GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/distributed_runtime/graph_mgr.h"
@@ -78,7 +75,7 @@ void GdrWorker::GrpcRecvTensorAsync(CallOptions* opts,
   const bool dma_ok = request->dma_ok();
   env_->rendezvous_mgr->RecvLocalAsync(
       step_id, parsed,
-      [this, opts, response, done, src_dev, dma_ok](
+      [this, opts, response, done, src_dev, request, dma_ok](
           const Status& status, const Rendezvous::Args& send_args,
           const Rendezvous::Args&, const Tensor& val, const bool is_dead) {
         opts->ClearCancelCallback();
@@ -89,10 +86,8 @@ void GdrWorker::GrpcRecvTensorAsync(CallOptions* opts,
           // 3) the tensor has the on_host allocation attribute,
           // i.e. it's in CPU RAM *independent of its assigned
           // device type*.
-          const bool on_host =
-              (src_dev->tensorflow_gpu_device_info() == nullptr) ||
-              send_args.alloc_attrs.on_host();
-          if (val.TotalBytes() > 0 && (!is_dead) &&
+          const bool on_host = send_args.alloc_attrs.on_host();
+          if (val.TotalBytes() > 1024 && (!is_dead) &&
               DMAHelper::CanUseDMA(&val) && dma_ok) {
             // DMA cases.
             RecvTensorResponse* proto = new RecvTensorResponse;
@@ -117,8 +112,7 @@ void GdrWorker::GrpcRecvTensorAsync(CallOptions* opts,
           } else {
             // Non-DMA cases.
             if (src_dev->tensorflow_gpu_device_info() && (!on_host)) {
-#if GOOGLE_CUDA
-              const DeviceContext* send_dev_context = send_args.device_context;
+              DeviceContext* send_dev_context = send_args.device_context;
               AllocatorAttributes alloc_attrs;
               alloc_attrs.set_gpu_compatible(true);
               alloc_attrs.set_on_host(true);
@@ -127,7 +121,8 @@ void GdrWorker::GrpcRecvTensorAsync(CallOptions* opts,
               CHECK(send_dev_context)
                   << "send dev name: " << src_dev->name()
                   << " gpu_info: " << src_dev->tensorflow_gpu_device_info();
-              // "val" is on a GPU. Uses GPUUtil to fill the response proto.
+              // "val" is on an accelerator device. Uses the device_context to
+              // fill the copy on host.
               StatusCallback copy_ready = [response, done, copy,
                                            is_dead](const Status& s) {
                 // The value is now ready to be returned on the wire.
@@ -136,11 +131,8 @@ void GdrWorker::GrpcRecvTensorAsync(CallOptions* opts,
                 delete copy;
               };
 
-              GPUUtil::CopyGPUTensorToCPU(src_dev, send_dev_context, &val, copy,
-                                          copy_ready);
-#else
-              done(errors::Internal("No GPU device in process"));
-#endif  // GOOGLE_CUDA
+              send_dev_context->CopyDeviceTensorToCPU(
+                  &val, request->rendezvous_key(), src_dev, copy, copy_ready);
             } else {
               grpc::EncodeTensorToByteBuffer(is_dead, val, response);
               done(Status::OK());
-- 
GitLab


From 0a109261334273042a63a4feea97c791ac59a2e5 Mon Sep 17 00:00:00 2001
From: AG Ramesh <ag.ramesh@intel.com>
Date: Wed, 5 Dec 2018 22:05:26 -0800
Subject: [PATCH 116/873] Clang format fixes

---
 tensorflow/core/graph/mkl_layout_pass.cc      | 4 ++--
 tensorflow/core/kernels/mkl_conv_ops.cc       | 6 +++---
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index f597b3c76c..8d2f142532 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1584,9 +1584,9 @@ int MklLayoutRewritePass::SetUpContiguousInputs(
     for (const Edge* e : filter_node->out_edges()) {
       if ((e->dst()->type_string() == csinfo_.mkl_conv2d ||
            e->dst()->type_string() == csinfo_.mkl_pad_with_conv2d ||
-           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias || 
+           e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias ||
            e->dst()->type_string() == csinfo_.mkl_fused_conv2d) &&
-           e->dst_input() == kConv2DFilterInputSlotIdx
+          e->dst_input() == kConv2DFilterInputSlotIdx
           /* filter is 2nd input of Conv2D and _MklConv2D. */) {
         if (conv2d_node != nullptr) {
           VLOG(1) << "MklLayoutRewritePass: unusual case of same filter"
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index eb17c29bb0..4eea1711e5 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1299,11 +1299,11 @@ class MklConvOp : public OpKernel {
 template <typename Device, typename Tinput, typename Tfilter, typename Tbias,
           typename Toutput, typename Ttemp_output>
 class MklFusedConvOp : public MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput,
-                                        Ttemp_output, false> {
+                                        Ttemp_output, int32, false, false> {
  public:
   explicit MklFusedConvOp(OpKernelConstruction* context)
-      : MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput, Ttemp_output, false>(
-            context) {
+      : MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput, Ttemp_output, int32,
+                  false, false>(context) {
     // Since we came here through the registration of _MklFusedConv2D then get
     // all information from 'fused_ops' and 'num_args'
     std::vector<string> fused_ops;
diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index ce4c1aec04..c9416e154b 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -339,8 +339,8 @@ class FusedPadConvOpTest : public OpsTestBase {
     // Compare output to expected results
     const Tensor& first = *GetOutput(0);
     const Tensor& second = *GetOutput(2);
-    ConvMklToTF conv_comp;
-    conv_comp.ConvertAndCompare<T>(dtype, first, second, expected);
+    ConvMklToTF<T> conv_comp;
+    conv_comp.ConvertAndCompare(dtype, first, second, expected);
   }
 };
 
-- 
GitLab


From ed17f60a544e31ba79a649d1decafd29887fd6d9 Mon Sep 17 00:00:00 2001
From: manhyuk <manhyuk@kw.ac.kr>
Date: Thu, 6 Dec 2018 15:47:30 +0900
Subject: [PATCH 117/873] fix typo

---
 tensorflow/compiler/xla/shape_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 84a27f662a..30bf1dfb03 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -266,7 +266,7 @@ class ShapeUtil {
   }
 
   // Returns the higher-precision element type if a and b are both floating
-  // point types; otherwise, checks that that they have the same element type
+  // point types; otherwise, checks that they have the same element type
   // and returns it.
   static PrimitiveType HigherPrecisionElementType(const Shape& a,
                                                   const Shape& b) {
-- 
GitLab


From aec214bc3870ecc0f5f831c2523a1609d8c12871 Mon Sep 17 00:00:00 2001
From: manhyuk <manhyuk@kw.ac.kr>
Date: Thu, 6 Dec 2018 15:48:47 +0900
Subject: [PATCH 118/873] fix typo

---
 tensorflow/python/kernel_tests/cond_v2_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 843d007cc8..502d504bfe 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -1036,7 +1036,7 @@ class CondV2ColocationGroupAndDeviceTest(test.TestCase):
                 self.evaluate(cond_v2.cond_v2(constant_op.constant(True),
                                               fn2, fn2)))
         else:
-          self.skipTest("Test requrires a GPU to check GPU device placement.")
+          self.skipTest("Test requires a GPU to check GPU device placement.")
 
   def testDeviceInAndOutOfCond(self):
     with ops.Graph().as_default() as g:
-- 
GitLab


From d14f39054f44d0a641db3a0997fe5d720918cd07 Mon Sep 17 00:00:00 2001
From: manhyuk <manhyuk@kw.ac.kr>
Date: Thu, 6 Dec 2018 15:50:58 +0900
Subject: [PATCH 119/873] fix typo

---
 tensorflow/contrib/cmake/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md
index df8b48dfc4..b2badc5785 100644
--- a/tensorflow/contrib/cmake/README.md
+++ b/tensorflow/contrib/cmake/README.md
@@ -157,7 +157,7 @@ suitable interface for project configuration and dependency setting.
     press `Finish`. Wait for a moment, the default project dependecy would
     automatically generate.
 6.  There are a few options that you can customize your own build. **The setting
-    here is crucial for a sucessful build, please check all items carefully.**
+    here is crucial for a successful build, please check all items carefully.**
 
     *   `tensorflow_BUILD_ALL_KERNELS` should alway be `on`
     *   `tensorflow_BUILD_CC_EXAMPLE` is default to be `on`. This can help you
-- 
GitLab


From 2b13b2f52bee1317a7cb6320e269d32afcbd7e97 Mon Sep 17 00:00:00 2001
From: vanderliang <vanderliang@gmail.com>
Date: Wed, 5 Dec 2018 16:39:56 +0800
Subject: [PATCH 120/873] Fix ClusterSpec.as_dict with only chief and ps

If the worker num is zero, continue the loop.
---
 tensorflow/python/training/server_lib.py      |  3 +++
 tensorflow/python/training/server_lib_test.py | 22 +++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/tensorflow/python/training/server_lib.py b/tensorflow/python/training/server_lib.py
index 46543413e4..bb6ad0e599 100644
--- a/tensorflow/python/training/server_lib.py
+++ b/tensorflow/python/training/server_lib.py
@@ -332,6 +332,9 @@ class ClusterSpec(object):
     ret = {}
     for job in self.jobs:
       task_indices = self.task_indices(job)
+      if len(task_indices) == 0:
+        ret[job] = {}
+        continue
       if max(task_indices) + 1 == len(task_indices):
         # Return a list because the task indices are dense. This
         # matches the behavior of `as_dict()` before support for
diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py
index cf995707fc..653235a5ca 100644
--- a/tensorflow/python/training/server_lib_test.py
+++ b/tensorflow/python/training/server_lib_test.py
@@ -456,6 +456,28 @@ class ClusterSpecTest(test.TestCase):
         expected_proto,
         server_lib.ClusterSpec(cluster_spec.as_dict()).as_cluster_def())
 
+  def testProtoDictDefEquivalencesWithZeroWorker(self):
+    cluster_spec = server_lib.ClusterSpec({
+        "ps": ["ps0:2222", "ps1:2222"],
+        "worker": []
+    })
+
+    expected_proto = """
+    job { name: 'ps' tasks { key: 0 value: 'ps0:2222' }
+                     tasks { key: 1 value: 'ps1:2222' } }
+    job { name: 'worker' }
+    """
+
+    self.assertProtoEquals(expected_proto, cluster_spec.as_cluster_def())
+    self.assertProtoEquals(
+        expected_proto, server_lib.ClusterSpec(cluster_spec).as_cluster_def())
+    self.assertProtoEquals(
+        expected_proto,
+        server_lib.ClusterSpec(cluster_spec.as_cluster_def()).as_cluster_def())
+    self.assertProtoEquals(
+        expected_proto,
+        server_lib.ClusterSpec(cluster_spec.as_dict()).as_cluster_def())
+
   def testClusterSpecAccessors(self):
     original_dict = {
         "ps": ["ps0:2222", "ps1:2222"],
-- 
GitLab


From 2295e1b7320328ff5659a75613c457d8a6e7d1ac Mon Sep 17 00:00:00 2001
From: Bairen Yi <byronyi@clustar.ai>
Date: Thu, 6 Dec 2018 08:34:36 +0000
Subject: [PATCH 121/873] Cleanup unnecessary GOOGLE_CUDA and tf_cuda_library

---
 tensorflow/contrib/gdr/BUILD                 |  2 +-
 tensorflow/contrib/gdr/gdr_memory_manager.cc | 10 ++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/gdr/BUILD b/tensorflow/contrib/gdr/BUILD
index 7ec3c5ff5d..704be917b3 100644
--- a/tensorflow/contrib/gdr/BUILD
+++ b/tensorflow/contrib/gdr/BUILD
@@ -37,7 +37,7 @@ tf_proto_library_cc(
     ],
 )
 
-tf_cuda_library(
+cc_library(
     name = "gdr_memory_manager",
     srcs = ["gdr_memory_manager.cc"],
     hdrs = ["gdr_memory_manager.h"],
diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc
index 69bbab1c39..d677592d9a 100644
--- a/tensorflow/contrib/gdr/gdr_memory_manager.cc
+++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc
@@ -30,12 +30,10 @@ limitations under the License.
 #include "tensorflow/contrib/gdr/gdr.pb.h"
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/common_runtime/dma_helper.h"
-#include "tensorflow/core/common_runtime/process_state.h"
-#include "tensorflow/core/lib/random/random.h"
-#if GOOGLE_CUDA
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
-#endif  // GOOGLE_CUDA
+#include "tensorflow/core/common_runtime/process_state.h"
 #include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/random/random.h"
 #include "tensorflow/core/platform/macros.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/numa.h"
@@ -248,13 +246,13 @@ Status GdrMemoryManager::Init() {
   ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
   LOG(INFO) << "Instrumenting CPU allocator(s)";
 
-#if GOOGLE_CUDA
   for (int numa_idx = 0; numa_idx < port::NUMANumNodes(); ++numa_idx) {
     GPUProcessState::singleton()->AddCUDAHostAllocVisitor(numa_idx,
                                                           alloc_visitor);
     GPUProcessState::singleton()->AddCUDAHostFreeVisitor(numa_idx,
                                                          free_visitor);
   }
+
   if (IsGDRAvailable()) {
     SubAllocator::Visitor cuda_alloc_visitor = [this](void* ptr, int gpu_id,
                                                       size_t num_bytes) {
@@ -265,7 +263,7 @@ Status GdrMemoryManager::Init() {
                                                      cuda_alloc_visitor);
     LOG(INFO) << "Instrumenting GPU allocator for NUMA " << numa_node_;
   }
-#endif  // GOOGLE_CUDA
+
   return Status::OK();
 }
 
-- 
GitLab


From 7667f9747c000fe5c29f4728b9b134ea2bb5dfd8 Mon Sep 17 00:00:00 2001
From: lxl910915 <lxl910915@gmail.com>
Date: Thu, 6 Dec 2018 18:32:06 +0800
Subject: [PATCH 122/873] #21745: set timeout for closing worker session

---
 tensorflow/core/distributed_runtime/master_session.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index bc8ba6e47d..59bb18e7eb 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -1352,7 +1352,9 @@ Status MasterSession::DeleteWorkerSessions() {
         &workers[i].call_opts, &workers[i].request, &workers[i].response, cb);
   }
 
-  done.Wait();
+  if (!done.WaitFor(std::chrono::milliseconds(10000))) {
+    LOG(WARNING) << "Timeout for closing worker session";
+  }
   for (size_t i = 0; i < workers.size(); ++i) {
     status.Update(workers[i].status);
   }
-- 
GitLab


From 636207ade13e6f9c0b110b21497cc6ed040ea4d6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 08:32:41 -0800
Subject: [PATCH 123/873] Fix wrongly exported dtypes.uint16 to dtypes.int16

PiperOrigin-RevId: 224342753
---
 tensorflow/python/framework/dtypes.py                  | 2 +-
 tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt | 4 ++++
 tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt | 4 ++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/dtypes.py b/tensorflow/python/framework/dtypes.py
index f7a12d27df..9a4fe4e93b 100644
--- a/tensorflow/python/framework/dtypes.py
+++ b/tensorflow/python/framework/dtypes.py
@@ -347,7 +347,7 @@ tf_export("dtypes.uint32", "uint32").export_constant(__name__, "uint32")
 uint64 = DType(types_pb2.DT_UINT64)
 tf_export("dtypes.uint64", "uint64").export_constant(__name__, "uint64")
 int16 = DType(types_pb2.DT_INT16)
-tf_export("dtypes.uint16", "int16").export_constant(__name__, "int16")
+tf_export("dtypes.int16", "int16").export_constant(__name__, "int16")
 int8 = DType(types_pb2.DT_INT8)
 tf_export("dtypes.int8", "int8").export_constant(__name__, "int8")
 string = DType(types_pb2.DT_STRING)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
index 848fc303aa..01b870a816 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.dtypes.pbtxt
@@ -44,6 +44,10 @@ tf_module {
     name: "half"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "int16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
   member {
     name: "int32"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
index 848fc303aa..01b870a816 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.dtypes.pbtxt
@@ -44,6 +44,10 @@ tf_module {
     name: "half"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "int16"
+    mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
+  }
   member {
     name: "int32"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
-- 
GitLab


From be4b5c34ecc0a99929b590cc6f956a4dc4eece55 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 08:37:29 -0800
Subject: [PATCH 124/873] Update Eigen to
 https://bitbucket.org/eigen/eigen/commits/729d33d11c81fd023834a2d61f0f280ba9da48c8

PiperOrigin-RevId: 224343375
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index ad3cecd3a1..e6b4a89e3b 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -134,11 +134,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "eigen_archive",
         build_file = clean_dep("//third_party:eigen.BUILD"),
-        sha256 = "37a483ec219c43219b6e0fc07e799277a4a36abb2b9f4162cfcd256aa211eae8",
-        strip_prefix = "eigen-eigen-2e50f4a5542a",
+        sha256 = "aae7a680d141c978301dfae2c7945c06039f65849fcf64269595a9cdbba82638",
+        strip_prefix = "eigen-eigen-729d33d11c81",
         urls = [
-            "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/2e50f4a5542a.tar.gz",
-            "https://bitbucket.org/eigen/eigen/get/2e50f4a5542a.tar.gz",
+            "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/729d33d11c81.tar.gz",
+            "https://bitbucket.org/eigen/eigen/get/729d33d11c81.tar.gz",
         ],
     )
 
-- 
GitLab


From 0334b504c6c7179a322a0293e09616201cabc805 Mon Sep 17 00:00:00 2001
From: Karim Nosir <karimnosseir@google.com>
Date: Thu, 6 Dec 2018 09:19:24 -0800
Subject: [PATCH 125/873] Fix test failure.

PiperOrigin-RevId: 224350277
---
 tensorflow/lite/tools/benchmark/benchmark_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/lite/tools/benchmark/benchmark_test.cc b/tensorflow/lite/tools/benchmark/benchmark_test.cc
index 0cf66c6e4d..a4f830122f 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_test.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_test.cc
@@ -76,10 +76,10 @@ TEST(BenchmarkTest, ParametersArePopulatedWhenInputShapeIsNotSpecified) {
   ASSERT_GE(inputs.size(), 1);
   auto input_tensor = interpreter->tensor(inputs[0]);
 
-  std::vector<uint8_t> input_bytes;
+  std::vector<char> input_bytes;
   input_bytes.reserve(input_tensor->bytes);
   for (size_t i = 0; i < input_tensor->bytes; i++) {
-    input_bytes.push_back(input_tensor->data.b[i]);
+    input_bytes.push_back(input_tensor->data.raw_const[i]);
   }
   benchmark.Prepare();
 
@@ -87,7 +87,7 @@ TEST(BenchmarkTest, ParametersArePopulatedWhenInputShapeIsNotSpecified) {
   EXPECT_EQ(input_bytes.size(), input_tensor->bytes);
   bool is_same = true;
   for (size_t i = 0; i < input_tensor->bytes; i++) {
-    if (input_bytes[i] != input_tensor->data.b[i]) {
+    if (input_bytes[i] != input_tensor->data.raw_const[i]) {
       is_same = false;
       break;
     }
-- 
GitLab


From f83053f48c1d262bd8dd61b87890eb34d550d8f7 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Thu, 6 Dec 2018 09:35:15 -0800
Subject: [PATCH 126/873] Use return value of TensorShapeUtils::MakeShape

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index ac0e2f684c..777a80bbc4 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -120,11 +120,13 @@ inline nvinfer1::Dims TensorShapeToTrtDims(const TensorShapeType& shape,
   return trt_dims;
 }
 
-inline nvinfer1::Dims TensorShapeArrayToTrtDims(const std::vector<int>& shape,
-                                                bool ignore_first_dim = false) {
+Status TensorShapeArrayToTrtDims(const std::vector<int>& shape,
+                                 nvinfer1::Dims* out,
+                                 bool ignore_first_dim = false) {
   PartialTensorShape tensor_shape;
-  TensorShapeUtils::MakeShape(shape, &tensor_shape);
-  return TensorShapeToTrtDims(tensor_shape, ignore_first_dim);
+  TF_RETURN_IF_ERROR(TensorShapeUtils::MakeShape(shape, &tensor_shape));
+  *out = TensorShapeToTrtDims(tensor_shape, ignore_first_dim);
+  return tensorflow::Status::OK();
 }
 
 void GetOutputProperties(const grappler::GraphProperties& graph_properties,
@@ -1937,8 +1939,9 @@ tensorflow::Status ConvertExpandDims(OpConverterParams* params) {
   // ExpandDims: Insert new dim of size 1.
   input_dims.insert(input_dims.begin() + axis, 1);
   // Reshape tensor.
-  nvinfer1::Dims new_dims =
-      TensorShapeArrayToTrtDims(input_dims, /*ignore_first_dim=*/true);
+  nvinfer1::Dims new_dims;
+  TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims,
+                                               /*ignore_first_dim=*/true));
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
@@ -2002,8 +2005,9 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
   input_dims.erase(std::remove(input_dims.begin(), input_dims.end(), 0),
                    input_dims.end());
   // Reshape tensor.
-  nvinfer1::Dims new_dims =
-      TensorShapeArrayToTrtDims(input_dims, /*ignore_first_dim=*/true);
+  nvinfer1::Dims new_dims;
+  TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims,
+                                               /*ignore_first_dim=*/true));
   const nvinfer1::ITensor* output_tensor = nullptr;
   TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
       input_tensor, new_dims, &output_tensor));
-- 
GitLab


From 322254f7e00bb739ded8611a925cca450f214ec2 Mon Sep 17 00:00:00 2001
From: Karim Nosir <karimnosseir@google.com>
Date: Thu, 6 Dec 2018 09:34:36 -0800
Subject: [PATCH 127/873] Add explicit for one param constructor.

PiperOrigin-RevId: 224352578
---
 tensorflow/lite/kernels/resize_bilinear_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/lite/kernels/resize_bilinear_test.cc b/tensorflow/lite/kernels/resize_bilinear_test.cc
index 530bb32b94..d3f4837a28 100644
--- a/tensorflow/lite/kernels/resize_bilinear_test.cc
+++ b/tensorflow/lite/kernels/resize_bilinear_test.cc
@@ -26,8 +26,8 @@ using uint8 = std::uint8_t;
 
 class ResizeBilinearOpModel : public SingleOpModel {
  public:
-  ResizeBilinearOpModel(const TensorData& input,
-                        std::initializer_list<int> size_data = {}) {
+  explicit ResizeBilinearOpModel(const TensorData& input,
+                                 std::initializer_list<int> size_data = {}) {
     bool const_size = size_data.size() != 0;
     input_ = AddInput(input);
     if (const_size) {
-- 
GitLab


From 48662695e0cabea065de19958132789d931ac78c Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 5 Dec 2018 11:22:57 -0800
Subject: [PATCH 128/873] Update
 tensorflow/contrib/tensorrt/convert/convert_graph.cc

Co-Authored-By: Trevor Morris <tmorris@nvidia.com>
---
 tensorflow/contrib/tensorrt/convert/convert_graph.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 3e599b9174..21c69326df 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -588,7 +588,7 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
   // We don't support segments with no inputs. Fall back to native TF here to
   // avoid crash later. Constant folding should've folded the ops that make up
   // these segments.
-  if (inputs.size() == 0) {
+  if (inputs.empty()) {
     return tensorflow::errors::Internal("Segment has no inputs (possible "
                                         "constfold failure)");
   }
-- 
GitLab


From 4f6613441e7f13abd93568445eb0b4ae69828632 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Thu, 6 Dec 2018 10:16:45 -0800
Subject: [PATCH 129/873] Update model save/load for unified_lstm layer.

The bias weights during load is changed a bit. The original approach is to split the weights into half and give them to recurrent bias and input gate bias. In the new approach, since it is unknown that whether the newly constructed layer will be running on CPU or GPU, if we still split the weight into half, then the input_gate_bias value is lost since canonical LSTM only use recurrent bias. The new approach is to give recurrent_bias 100% of the sum, and 0% to input_gate_bias.

PiperOrigin-RevId: 224360388
---
 tensorflow/python/keras/engine/saving.py      | 58 +++++++++++++++++
 tensorflow/python/keras/engine/saving_test.py | 64 +++++++++++++++++++
 tensorflow/python/keras/layers/__init__.py    |  1 +
 3 files changed, 123 insertions(+)

diff --git a/tensorflow/python/keras/engine/saving.py b/tensorflow/python/keras/engine/saving.py
index 54d9e32fb2..15ba5f78d9 100644
--- a/tensorflow/python/keras/engine/saving.py
+++ b/tensorflow/python/keras/engine/saving.py
@@ -551,10 +551,68 @@ def preprocess_weights_for_loading(layer,
       if layer.__class__.__name__ == 'ConvLSTM2D':
         weights[1] = np.transpose(weights[1], (3, 2, 0, 1))
 
+  weights = _convert_unified_lstm_weights(layer, weights)
+
   # convert CuDNN layers
   return _convert_rnn_weights(layer, weights)
 
 
+def _convert_unified_lstm_weights(layer, weights):
+  """Converts weights for Unified LSTM layer.
+
+  The input weights suppose to have 2, 3 or 4 items.
+  1. kernel. (i, f, c, o gates concat among axis 1)
+  2. recurrent_kernel. (i, f, c, o concat among axis 1)
+  3. recurrent_bias. (optional, only available when use bias)
+  4. input_bias (optional, only available when use bias and cudnn).
+  Kernel and recurrent_kernel does not need any conversion. During load(),
+  since the layer could be built with the parameter that does not support the
+  defun approach, it is possible that cudnn_bias variable is not created, or
+  even created but not used during actual run. Because of that, we sum up the
+  value of two biases, and give it to recurrent_bias only. Mathematically, the
+  LSTM is calculated as following formula:
+
+    i_t = sigmoid(w_i * x_t + r_i * h_(t-1) + b_wi + b_ri)
+    f_t = sigmoid(w_f * x_t + r_f * h_(t-1) + b_wf + b_rf)
+    o_t = sigmoid(w_o * x_t + r_o * h_(t-1) + b_wo + b_ro)
+    c'_t = tanh(w_c * x_t + r_c * h_(t-1) + b_wc + b_rc)
+    c_t = f_t . c_(t-1) + i_t . c'_t
+    h_t = o_t . tanh(c_t)
+
+  Note that b_w{x} is the input_bias, and b_r{x} is the recurrent_bias.
+  Since it is a linear add, it is fine to give b_r{x} 100% and b_w{x} 0%, as
+  long as the sum are the same.
+
+  Args:
+    layer: The keras layer that will be loaded with weights.
+    weights: the list of numpy arrays which hold the weights to be loaded.
+
+  Returns:
+    weights: the processed list of numpy arrays.
+  """
+  if layer.__class__.__name__ == 'UnifiedLSTM':
+    if len(weights) not in [3, 4]:
+      # Only handles the bias conversion in this function, in the case when
+      # bias is not used or weights in unexpected length, do nothing and return.
+      return weights
+
+    if len(weights) == 3:
+      recurrent_bias = weights[2]
+    else:
+      # Add all the bias value to recurrent_bias
+      recurrent_bias = weights[2] + weights[3]
+
+    if len(layer.weights) == 3:
+      weights = weights[:2] + [recurrent_bias]
+    elif len(layer.weights) == 4:
+      # Create a zero filled input_bias, since all the weights have given
+      # to recurrent bias.
+      input_bias = np.zeros_like(recurrent_bias)
+      weights = weights[:2] + [recurrent_bias, input_bias]
+
+  return weights
+
+
 def _convert_rnn_weights(layer, weights):
   """Converts weights for RNN layers between native and CuDNN format.
 
diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 6d9d9a2fca..8fcefce748 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -221,6 +221,70 @@ class TestWeightSavingAndLoading(test.TestCase, parameterized.TestCase):
           for (x, y) in zip(weights1, weights2)
       ]
 
+  @parameterized.named_parameters(
+      # test_name, use_bias, bias_initializer, activation
+      ('normal', True, 'zeros', 'tanh'),
+      ('no_bias', False, 'zeros', 'tanh'),
+      # TODO(scottzhu): Reenable this test case when the approach is decided.
+      # ('random_bias', True, 'random_uniform', 'tanh'),
+      ('no_cudnn_bias', True, 'zeros', 'relu')
+  )
+  def test_process_weights_for_loading_unified_lstm(
+      self, use_bias, bias_initializer, activation):
+    if h5py is None:
+      return
+
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir)
+    h5_path = os.path.join(temp_dir, 'test.h5')
+
+    batch = 10
+    timestep = 3
+    input_dim = 5
+    units = 2
+
+    x = np.random.random((batch, timestep, input_dim))
+
+    def build_model():
+      inputs = keras.layers.Input(
+          shape=[timestep, input_dim], dtype=dtypes.float32)
+      layer = keras.layers.UnifiedLSTM(
+          units,
+          activation=activation,
+          use_bias=use_bias,
+          bias_initializer=bias_initializer)
+      output = layer(inputs)
+      return keras.models.Model(inputs, output), layer
+
+    with self.cached_session():
+      model, layer = build_model()
+      y_ref = model.predict(x)
+      model.save_weights(h5_path)
+
+      cloned_model, new_layer = build_model()
+      cloned_model.load_weights(h5_path)
+      y = cloned_model.predict(x)
+
+      self.assertAllClose(y, y_ref)
+
+      # Test the individual layer weights.
+      weights1 = layer.get_weights()
+      weights2 = new_layer.get_weights()
+      self.assertLen(weights1, len(weights2))
+      # kernel and current kernel should be the same.
+      self.assertAllClose(weights1[:2], weights2[:2])
+
+      if len(weights2) >= 3:
+        # Test recurrent bias
+        expected_recurrent_bias = weights1[2]
+        if len(weights1) == 4:
+          expected_recurrent_bias += weights1[3]
+        self.assertAllClose(weights2[2], expected_recurrent_bias)
+
+      if len(weights2) == 4:
+        # Test recovered input_gate_bias to be always zero
+        self.assertAllClose(weights2[3], np.zeros_like(weights1[3]))
+
   def test_sequential_weight_loading(self):
     if h5py is None:
       return
diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py
index 49990b6bf4..df7571e5d5 100644
--- a/tensorflow/python/keras/layers/__init__.py
+++ b/tensorflow/python/keras/layers/__init__.py
@@ -149,6 +149,7 @@ from tensorflow.python.keras.layers.recurrent import PeepholeLSTMCell
 from tensorflow.python.keras.layers.recurrent import SimpleRNN
 from tensorflow.python.keras.layers.recurrent import GRU
 from tensorflow.python.keras.layers.recurrent import LSTM
+from tensorflow.python.keras.layers.recurrent import UnifiedLSTM
 
 # Convolutional-recurrent layers.
 from tensorflow.python.keras.layers.convolutional_recurrent import ConvLSTM2D
-- 
GitLab


From e576acf5dbd7b800d3b6aa4de4b69952a9e2c0fb Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Thu, 6 Dec 2018 10:28:10 -0800
Subject: [PATCH 130/873] Internal-only change.

PiperOrigin-RevId: 224362520
---
 .../distribute/cluster_resolver/tpu_cluster_resolver.py      | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index fe8e09f561..1fd674c8a4 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -197,13 +197,14 @@ class TPUClusterResolver(ClusterResolver):
     elif tpu == 'local' or not tpu:
       # Google environment, where the TPU is attached to the host.
       self._environment = 'google'
-    elif tpu.startswith('/bns'):
+    elif tpu.startswith('/bns') or tpu.startswith('uptc://'):
       # Google environment, where we reach the TPU through BNS.
       self._environment = 'google'
 
     # If TPU is in the Google environment or exists locally, we don't use any
     # RPC layer.
-    if tpu.startswith('/bns') or tpu == 'local' or not tpu:
+    if tpu.startswith('/bns') or tpu.startswith(
+        'uptc://') or tpu == 'local' or not tpu:
       self.rpc_layer = None
     else:
       self.rpc_layer = 'grpc'
-- 
GitLab


From 05b2aaacf1f62ad08d4d37c6fccf009482c3e086 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 6 Dec 2018 10:36:56 -0800
Subject: [PATCH 131/873] Automated rollback of commit
 8f6c5d3252cf1fa7c97c46c93b55803660487136

PiperOrigin-RevId: 224364244
---
 tensorflow/python/eager/pywrap_tensor.cc |  8 ++++++++
 tensorflow/python/eager/tensor_test.py   | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc
index 0d0f70d543..30a93fb0e4 100644
--- a/tensorflow/python/eager/pywrap_tensor.cc
+++ b/tensorflow/python/eager/pywrap_tensor.cc
@@ -220,6 +220,14 @@ TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype) {
       return nullptr;
     }
   }
+  tensorflow::Safe_PyObjectPtr value_decrefer;
+  if (PyArray_IsScalar(value, Generic)) {
+    // Convert numpy scalars to numpy arrays.
+    value = PyArray_FromScalar(value, nullptr);
+    // The returned value needs to be DECREF'd, but the original value was
+    // created in python code, and doesn't need to be DECREF'd.
+    value_decrefer.reset(value);
+  }
   if (PyArray_Check(value)) {
     int desired_np_dtype = -1;
     if (desired_dtype >= 0) {
diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py
index 25442ff048..0ee2ff68c2 100644
--- a/tensorflow/python/eager/tensor_test.py
+++ b/tensorflow/python/eager/tensor_test.py
@@ -95,6 +95,18 @@ class TFETensorTest(test_util.TensorFlowTestCase):
     t = _create_tensor(values)
     self.assertAllEqual(values, t)
 
+  @test_util.assert_no_new_pyobjects_executing_eagerly
+  def testNumpyDtypeSurvivesThroughTensorConversion(self):
+    scalar_creators = [np.int32, np.int64, np.float32, np.float64]
+    conversion_functions = [ops.convert_to_tensor, constant_op.constant]
+
+    for scalar_creator in scalar_creators:
+      for conversion_function in conversion_functions:
+        np_val = scalar_creator(3)
+        tensor_val = conversion_function(np_val)
+        self.assertEqual(tensor_val.numpy().dtype, np_val.dtype)
+        self.assertEqual(tensor_val.numpy(), np_val)
+
   def testNumpyValueWithCast(self):
     values = np.array([3.0], dtype=np.float32)
     t = _create_tensor(values, dtype=dtypes.float64)
-- 
GitLab


From 2bf928bcbbd51d31b817360a34d6b2b9881f7539 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 10:52:09 -0800
Subject: [PATCH 132/873] Disabling flaky tests in LossWeightingTest.

PiperOrigin-RevId: 224367330
---
 tensorflow/python/keras/engine/training_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index bd3d7d26d3..c3c3f06ffd 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -793,7 +793,8 @@ class TestExceptionsAndWarnings(keras_parameterized.TestCase):
 class LossWeightingTest(keras_parameterized.TestCase):
 
   @keras_parameterized.run_all_keras_modes
-  def test_class_weights(self):
+  # TODO(b/120562577): Test failing with assertion error.
+  def DISABLED_test_class_weights(self):
     num_classes = 5
     batch_size = 5
     epochs = 5
@@ -960,7 +961,8 @@ class LossWeightingTest(keras_parameterized.TestCase):
       self.assertTrue(msg_found)
 
   @keras_parameterized.run_all_keras_modes
-  def test_temporal_sample_weights(self):
+  # TODO(b/120562577): Test failing with assertion error.
+  def DISABLED_test_temporal_sample_weights(self):
     num_classes = 5
     batch_size = 5
     epochs = 5
-- 
GitLab


From 39c50c9bcbe49a5852f3dd1085e6dc143424a7a0 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 6 Dec 2018 10:52:26 -0800
Subject: [PATCH 133/873] [tf.data] Enable `noop_elimination` optimization by
 default.

PiperOrigin-RevId: 224367395
---
 .../core/kernels/data/optimize_dataset_op.cc  |  5 +--
 .../optimization/optimize_dataset_test.py     | 30 +++++++++++++++-
 .../kernel_tests/stats_dataset_ops_test.py    |  9 ++---
 .../experimental/ops/optimization_options.py  | 34 +++++++++++++++++++
 .../data/experimental/ops/prefetching_ops.py  |  4 +++
 tensorflow/python/data/ops/dataset_ops.py     | 31 +++++------------
 tensorflow/python/data/ops/iterator_ops.py    |  2 ++
 .../data/ops/multi_device_iterator_ops.py     |  7 +++-
 ...a.experimental.-optimization-options.pbtxt |  4 +++
 ...a.experimental.-optimization-options.pbtxt |  4 +++
 10 files changed, 100 insertions(+), 30 deletions(-)

diff --git a/tensorflow/core/kernels/data/optimize_dataset_op.cc b/tensorflow/core/kernels/data/optimize_dataset_op.cc
index ab184c232e..9c50d8050a 100644
--- a/tensorflow/core/kernels/data/optimize_dataset_op.cc
+++ b/tensorflow/core/kernels/data/optimize_dataset_op.cc
@@ -305,8 +305,9 @@ class OptimizeDatasetOp : public UnaryDatasetOpKernel {
         // removing unused graph nodes)
         // TODO(b/118175421): This should be part of the tf.data optimization
         // pass manager.
-        for (const auto& optimizer : {"pruning", "function", "constfold",
-                                      "shape", "arithmetic", "dependency"}) {
+        // TODO(b/120437209): Apply `constfold` optimization when it is fixed.
+        for (const auto& optimizer :
+             {"pruning", "function", "shape", "arithmetic", "dependency"}) {
           rewriter_config.add_optimizers(optimizer);
         }
       }
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
index 751be83326..150cc7b5e4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
@@ -163,7 +163,15 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     dataset = dataset_ops.Dataset.range(1)
     dataset = dataset.flat_map(flat_map_fn)
-    dataset = dataset_ops._OptimizeDataset(dataset, ["map_and_batch_fusion"])
+
+    # TODO(b/120558523): We use Options instead of _OptimizeDataset directly
+    # here because of a bug with chaining _OptimizeDatasets when there are
+    # nested dataset functions
+    options = dataset_ops.Options()
+    opt_options = optimization_options.OptimizationOptions()
+    opt_options.map_and_batch_fusion = True
+    options.experimental_optimization = opt_options
+    dataset = dataset.with_options(options)
     self.assertDatasetProduces(dataset, expected_output=[[0]])
 
   def testOptimizationThreadPoolDataset(self):
@@ -245,6 +253,26 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       except errors.OutOfRangeError:
         break
 
+  def testOptimizationEnabledByDefault(self):
+    """Tests that some optimizations are applied to datasets by default."""
+    options = dataset_ops.Options()
+    expected_optimizations = ["noop_elimination"]
+    self.assertEqual(options._static_optimizations(), expected_optimizations)
+
+  def testOptimizationDisableDefault(self):
+    """Tests that we can disable all static optimizations enabled by default.
+
+    If the `apply_default_optimizations` optimization options flag is False,
+    only explicitly enabled optimizations will be applied.
+    """
+    options = dataset_ops.Options()
+    opt_options = optimization_options.OptimizationOptions()
+    opt_options.hoist_random_uniform = True
+    opt_options.apply_default_optimizations = False
+    options.experimental_optimization = opt_options
+    expected_optimizations = ["hoist_random_uniform"]
+    self.assertEqual(options._static_optimizations(), expected_optimizations)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 8a300364f9..f19b08a2dd 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -138,9 +138,10 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
 
   @test_util.run_deprecated_v1
   def testPrefetchBufferScalars(self, dataset_transformation):
+    def map_fn(x):
+      return array_ops.tile([x], ops.convert_to_tensor([x]))
     aggregator = stats_aggregator.StatsAggregator()
-    dataset = dataset_ops.Dataset.range(10).map(
-        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(0)
+    dataset = dataset_ops.Dataset.range(10).map(map_fn).prefetch(1)
     dataset = dataset_transformation(dataset, aggregator)
     iterator = dataset_ops.make_initializable_iterator(dataset)
     next_element = iterator.get_next()
@@ -153,9 +154,9 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
             np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
         summary_str = self.evaluate(summary_t)
         self._assertSummaryHasScalarValue(summary_str,
-                                          "Prefetch::buffer_capacity", 0)
+                                          "Prefetch::buffer_capacity", 1)
         self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_size",
-                                          0)
+                                          1)
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py
index dc9d319374..daf65cd5cd 100644
--- a/tensorflow/python/data/experimental/ops/optimization_options.py
+++ b/tensorflow/python/data/experimental/ops/optimization_options.py
@@ -35,6 +35,12 @@ class OptimizationOptions(options.OptionsBase):
   dataset = dataset.with_options(options)
   ```
   """
+  apply_default_optimizations = options.create_option(
+      name="apply_default_optimizations",
+      ty=bool,
+      docstring=
+      "Whether to apply default static optimizations. If False, only static "
+      "optimizations that have been explicitly enabled will be applied.")
 
   filter_fusion = options.create_option(
       name="filter_fusion",
@@ -81,3 +87,31 @@ class OptimizationOptions(options.OptionsBase):
       name="shuffle_and_repeat_fusion",
       ty=bool,
       docstring="Whether to fuse shuffle and repeat transformations.")
+
+  def _static_optimizations(self):
+    """Produces the list of enabled static optimizations."""
+    result = []
+    optimizations_to_enable = [
+        "filter_fusion",
+        "hoist_random_uniform",
+        "map_and_batch_fusion",
+        "map_and_filter_fusion",
+        "map_fusion",
+        "map_parallelization",
+        "map_vectorization",
+        "shuffle_and_repeat_fusion",
+    ]
+    for optimization in optimizations_to_enable:
+      if getattr(self, optimization):
+        result.append(optimization)
+
+    if self.apply_default_optimizations is not False:
+      # The following optimizations are turned on by default, unless the
+      # user explicitly disables them.
+      optimizations_to_disable = [
+          "noop_elimination",
+      ]
+      for optimization in optimizations_to_disable:
+        if getattr(self, optimization) is not False:
+          result.append(optimization)
+    return result
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
index 50d9803c34..aba1786646 100644
--- a/tensorflow/python/data/experimental/ops/prefetching_ops.py
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.util import nest
@@ -73,6 +74,9 @@ def copy_to_device(target_device, source_device="/cpu:0"):
   def _apply_fn(dataset):
     options = dataset_ops.Options()
     options.experimental_autotune = False
+    opt_options = optimization_options.OptimizationOptions()
+    opt_options.apply_default_optimizations = False
+    options.experimental_optimization = opt_options
     return _CopyToDeviceDataset(
         dataset, target_device=target_device,
         source_device=source_device).with_options(options)
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index d7a2547fc8..a08a2b5787 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -187,8 +187,7 @@ class DatasetV2(object):
       RuntimeError: If eager execution is not enabled.
     """
     if context.executing_eagerly():
-      dataset = self._apply_options()
-      return iterator_ops.EagerIterator(dataset)
+      return iterator_ops.EagerIterator(self)
     else:
       raise RuntimeError("dataset.__iter__() is only supported when eager "
                          "execution is enabled.")
@@ -1318,8 +1317,7 @@ class DatasetV1(DatasetV2):
       An `Iterator` over the elements of this dataset.
     """
     if context.executing_eagerly():
-      dataset = self._apply_options()
-      return iterator_ops.EagerIterator(dataset)
+      return iterator_ops.EagerIterator(self)
 
     graph_level_seed, op_level_seed = core_random_seed.get_seed(None)
 
@@ -1702,8 +1700,8 @@ class Options(options_lib.OptionsBase):
       name="experimental_deterministic",
       ty=bool,
       docstring=
-      "Whether to dynamically adjust the values of tunable parameters (e.g. "
-      "degrees of parallelism).")
+      "Whether the outputs need to be produced in deterministic order."
+  )
 
   experimental_numa_aware = options_lib.create_option(
       name="experimental_numa_aware",
@@ -1729,22 +1727,11 @@ class Options(options_lib.OptionsBase):
     """Produces the list of enabled static optimizations."""
 
     result = []
-    exp_optimization_options = self.experimental_optimization
-    if exp_optimization_options:
-      optimizations = [
-          "filter_fusion",
-          "hoist_random_uniform",
-          "map_and_batch_fusion",
-          "map_and_filter_fusion",
-          "map_fusion",
-          "map_parallelization",
-          "map_vectorization",
-          "noop_elimination",
-          "shuffle_and_repeat_fusion",
-      ]
-      for optimization in optimizations:
-        if getattr(exp_optimization_options, optimization):
-          result.append(optimization)
+    exp_optimization_options = (
+        self.experimental_optimization or
+        optimization_options.OptimizationOptions())  # If not set, use default
+    result.extend(exp_optimization_options._static_optimizations())  # pylint: disable=protected-access
+
     if self.experimental_numa_aware:
       result.append("make_numa_aware")
     if self.experimental_deterministic is False:
diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index eb330d83ae..6f9c494f39 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -528,6 +528,7 @@ class EagerIterator(checkpointable.CheckpointableBase):
     self._device = context.context().device_name
     with ops.device("/cpu:0"):
       # pylint: disable=protected-access
+      dataset = dataset._apply_options()
       ds_variant = dataset._as_variant_tensor()
       self._structure = structure_lib.convert_legacy_structure(
           dataset.output_types, dataset.output_shapes, dataset.output_classes)
@@ -541,6 +542,7 @@ class EagerIterator(checkpointable.CheckpointableBase):
         # Delete the resource when this object is deleted
         self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
             handle=self._resource, handle_device=self._device)
+      # pylint: enable=protected-access
 
   def __iter__(self):
     return self
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 4638cee02a..7586012574 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
@@ -192,9 +193,13 @@ class MultiDeviceIterator(object):
           self._source_device_tensor, device, dataset._element_structure)  # pylint: disable=protected-access
       if prefetch_buffer_size > 0:
         ds = ds.prefetch(prefetch_buffer_size)
-      # TODO(jsimsa): Enable auto-tuning when supported for non-CPU devices.
+      # TODO(jsimsa): Enable auto-tuning and optimizations when supported for
+      # non-CPU devices.
       options = dataset_ops.Options()
       options.experimental_autotune = False
+      opt_options = optimization_options.OptimizationOptions()
+      opt_options.apply_default_optimizations = False
+      options.experimental_optimization = opt_options
       ds = ds.with_options(options)
       with ops.device(device):
         self._device_iterators.append(ds.make_initializable_iterator())
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt
index 9ca75828e5..3b7ad64f51 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt
@@ -3,6 +3,10 @@ tf_class {
   is_instance: "<class \'tensorflow.python.data.experimental.ops.optimization_options.OptimizationOptions\'>"
   is_instance: "<class \'tensorflow.python.data.util.options.OptionsBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "apply_default_optimizations"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "filter_fusion"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt
index 9ca75828e5..3b7ad64f51 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt
@@ -3,6 +3,10 @@ tf_class {
   is_instance: "<class \'tensorflow.python.data.experimental.ops.optimization_options.OptimizationOptions\'>"
   is_instance: "<class \'tensorflow.python.data.util.options.OptionsBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "apply_default_optimizations"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "filter_fusion"
     mtype: "<type \'property\'>"
-- 
GitLab


From 76e1a2a4767d8c697a7594b816bee13d401666a4 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 6 Dec 2018 11:07:20 -0800
Subject: [PATCH 134/873] [XLA:GPU] Add a flag to disable ptxas optimizations

PiperOrigin-RevId: 224370488
---
 .../compiler/xla/debug_options_flags.cc       |  6 ++++++
 .../xla/service/gpu/nvptx_compiler.cc         | 21 ++++++++++++-------
 .../compiler/xla/service/gpu/nvptx_compiler.h |  5 +++--
 tensorflow/compiler/xla/xla.proto             |  3 +++
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
index d7e7b9e621..20609cad58 100644
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@@ -334,6 +334,12 @@ void AllocateFlags() {
           "overhead from context switching but we let the user override this "
           "behavior to help run tests on the host that run models in parallel "
           "across multiple devices."),
+      tensorflow::Flag(
+          "xla_gpu_disable_ptxas_optimizations",
+          bool_setter_for(
+              &DebugOptions::set_xla_gpu_disable_ptxas_optimizations),
+          flag_values->xla_gpu_disable_ptxas_optimizations(),
+          "In XLA:GPU run ptxas in -O0 (default is -O3)."),
   });
   ParseFlagsFromEnvAndDieIfUnknown("XLA_FLAGS", *flag_objects);
 }
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index e934cbda17..f3e17d8882 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -479,7 +479,8 @@ void WarnIfBadDriverJITVersion() {
 // Compiles the given PTX string using ptxas and returns the resulting machine
 // code (i.e. a cubin) as a byte array.
 StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
-                                        int cc_minor) {
+                                        int cc_minor,
+                                        bool disable_ptx_optimizations) {
   tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
   const string ptxas_path =
       tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
@@ -519,6 +520,9 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
   if (VLOG_IS_ON(2)) {
     ptxas_args.push_back("-v");
   }
+  if (disable_ptx_optimizations) {
+    ptxas_args.push_back("-O0");
+  }
   ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
   ptxas_info_dumper.SetChannelAction(tensorflow::CHAN_STDERR,
                                      tensorflow::ACTION_PIPE);
@@ -739,8 +743,9 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
     }
   }
 
-  const std::vector<uint8> cubin =
-      CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor);
+  const std::vector<uint8> cubin = CompilePtxOrGetCachedResult(
+      ptx, cc_major, cc_minor,
+      module->config().debug_options().xla_gpu_disable_ptxas_optimizations());
 
   auto thunk_schedule = absl::make_unique<ThunkSchedule>(
       ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
@@ -772,9 +777,9 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
   return std::unique_ptr<Executable>(gpu_executable);
 }
 
-std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(const string& ptx,
-                                                              int cc_major,
-                                                              int cc_minor) {
+std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
+    const string& ptx, int cc_major, int cc_minor,
+    bool disable_ptx_optimizations) {
   XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
   tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
   bool inserted;
@@ -802,8 +807,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(const string& ptx,
     if (inserted) {
       CHECK(!cache_value->compilation_done);
       if (!ptx.empty()) {
-        StatusOr<std::vector<uint8>> maybe_cubin =
-            CompilePtx(*cache_ptx, cc_major, cc_minor);
+        StatusOr<std::vector<uint8>> maybe_cubin = CompilePtx(
+            *cache_ptx, cc_major, cc_minor, disable_ptx_optimizations);
         if (maybe_cubin.ok()) {
           cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
           VLOG(2) << "Compiled PTX size:" << ptx.size()
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
index f79ae2990a..be5e31a501 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@@ -97,8 +97,9 @@ class NVPTXCompiler : public LLVMCompiler {
 
   // Tries to compile the given ptx string to cubin.  Returns a vector with the
   // compiled cubin.  If compilation was unsuccessful, returns an empty vector.
-  std::vector<uint8> CompilePtxOrGetCachedResult(const string& ptx,
-                                                 int cc_major, int cc_minor);
+  std::vector<uint8> CompilePtxOrGetCachedResult(
+      const string& ptx, int cc_major, int cc_minor,
+      bool disable_ptx_optimizations);
 
   // The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
   // -> cubin so we don't recompile the same ptx twice.  This is important for
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index bdeb1728fa..a37eac7fe4 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -213,6 +213,9 @@ message DebugOptions {
   // the host that run models in parallel across multiple devices.
   int32 xla_force_host_platform_device_count = 102;
 
+  // If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
+  bool xla_gpu_disable_ptxas_optimizations = 103;
+
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
   map<string, string> xla_backend_extra_options = 500;
-- 
GitLab


From d3106e4138e28f62179cd6a7b8041729f5163190 Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Thu, 6 Dec 2018 11:07:35 -0800
Subject: [PATCH 135/873] Change num_accelerators_per_worker to
 num_accelerators within Cluster Resolver as we will support querying remote,
 heterogenous GPU clusters soon.

PiperOrigin-RevId: 224370540
---
 .../cluster_resolver/cluster_resolver.py      | 57 ++++++++++++++-----
 .../cluster_resolver/cluster_resolver_test.py | 12 ++--
 .../cluster_resolver/gce_cluster_resolver.py  | 16 ++++--
 .../kubernetes_cluster_resolver.py            | 15 ++++-
 .../slurm_cluster_resolver.py                 |  9 ++-
 .../tfconfig_cluster_resolver.py              | 19 +++++--
 .../tfconfig_cluster_resolver_test.py         |  4 +-
 .../cluster_resolver/tpu_cluster_resolver.py  | 21 +++++--
 8 files changed, 111 insertions(+), 42 deletions(-)

diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
index 7774ac0e12..ca40e60a55 100644
--- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
@@ -92,7 +92,11 @@ class ClusterResolver(object):
     raise NotImplementedError()
 
   @abc.abstractmethod
-  def num_accelerators_per_worker(self, session_config=None):
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='GPU',
+                       config_proto=None):
     """Returns the number of accelerator cores per worker.
 
     This returns the number of accelerator cores (such as GPUs and TPUs)
@@ -100,8 +104,19 @@ class ClusterResolver(object):
     should return 0. This method will query the master for this information
     if it is not otherwise known.
 
+    Optionally, we allow callers to specify the task_type, task_index, and
+    rpc_layer, if they want to target a specific TensorFlow process to query
+    the number of accelerators. This is to support heterogenous environments,
+    where the number of accelerators cores per host is different.
+
     Args:
-      session_config: (Optional) Configuration for starting a new session to
+      task_type: (Optional) The type of the TensorFlow task of the machine we
+        want to query.
+      task_index: (Optional) The index of the TensorFlow task of the machine we
+        want to query.
+      accelerator_type: (Optional) The type of accelerator we are trying to
+        query (defaults to 'GPU').
+      config_proto: (Optional) Configuration for starting a new session to
         query how many accelerator cores it has.
     """
     raise NotImplementedError()
@@ -116,7 +131,7 @@ class SimpleClusterResolver(ClusterResolver):
   """Simple implementation of ClusterResolver that accepts a ClusterSpec."""
 
   def __init__(self, cluster_spec, master='', task_type=None, task_index=None,
-               environment='', num_accelerators_per_worker=0,
+               environment='', num_accelerators=0,
                rpc_layer=None):
     """Creates a SimpleClusterResolver from a ClusterSpec."""
     super(SimpleClusterResolver, self).__init__()
@@ -124,7 +139,7 @@ class SimpleClusterResolver(ClusterResolver):
     self._task_type = task_type
     self._task_index = task_index
     self._environment = environment
-    self._num_accelerators_per_worker = num_accelerators_per_worker
+    self._num_accelerators = num_accelerators
     self._rpc_layer = rpc_layer
 
     if not isinstance(cluster_spec, ClusterSpec):
@@ -180,17 +195,27 @@ class SimpleClusterResolver(ClusterResolver):
   def environment(self):
     return self._environment
 
-  def num_accelerators_per_worker(self, session_config=None):
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='GPU',
+                       config_proto=None):
     """Returns the number of accelerator cores per worker.
 
+    The SimpleClusterResolver does not do automatic detection of accelerators,
+    so a TensorFlow session will never be created, and thus all arguments are
+    unused and we simply return whatever was passed in when this object was
+    initialized.
+
     Args:
-      session_config: Unused. The SimpleClusterResolver does not do automatic
-        detection of accelerators, so a TensorFlow session will never be
-        created, and thus a `session_config` is never necessary here, and will
-        be ignored.
+      task_type: Unused.
+      task_index: Unused.
+      accelerator_type: Unused.
+      config_proto: Unused.
     """
-    del session_config
-    return self._num_accelerators_per_worker
+    # Unused
+    del task_type, task_index, accelerator_type, config_proto
+    return self._num_accelerators
 
   @property
   def rpc_layer(self):
@@ -361,9 +386,13 @@ class UnionClusterResolver(ClusterResolver):
   def environment(self):
     return self._cluster_resolvers[0].environment
 
-  def num_accelerators_per_worker(self, session_config=None):
-    return self._cluster_resolvers[0].num_accelerators_per_worker(
-        session_config)
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='GPU',
+                       config_proto=None):
+    return self._cluster_resolvers[0].num_accelerators(
+        task_type, task_index, accelerator_type, config_proto)
 
   @property
   def rpc_layer(self):
diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py
index b5448faec6..3f7b469727 100644
--- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py
@@ -65,13 +65,13 @@ class UnionClusterResolverTest(test.TestCase):
 
     simple_resolver = SimpleClusterResolver(base_cluster_spec, task_type="ps",
                                             task_index=1, environment="cloud",
-                                            num_accelerators_per_worker=8,
+                                            num_accelerators=8,
                                             rpc_layer="grpc")
 
     self.assertEqual(simple_resolver.task_type, "ps")
     self.assertEqual(simple_resolver.task_index, 1)
     self.assertEqual(simple_resolver.environment, "cloud")
-    self.assertEqual(simple_resolver.num_accelerators_per_worker(), 8)
+    self.assertEqual(simple_resolver.num_accelerators(), 8)
     self.assertEqual(simple_resolver.rpc_layer, "grpc")
 
   def testOverrideSimpleClusterResolver(self):
@@ -82,7 +82,7 @@ class UnionClusterResolverTest(test.TestCase):
 
     simple_resolver = SimpleClusterResolver(base_cluster_spec, task_type="ps",
                                             task_index=1, environment="cloud",
-                                            num_accelerators_per_worker=8,
+                                            num_accelerators=8,
                                             rpc_layer="grpc")
 
     simple_resolver.task_type = "worker"
@@ -130,7 +130,7 @@ class UnionClusterResolverTest(test.TestCase):
     })
     resolver1 = SimpleClusterResolver(cluster_spec_1, task_type="ps",
                                       task_index=1, environment="cloud",
-                                      num_accelerators_per_worker=8,
+                                      num_accelerators=8,
                                       rpc_layer="grpc")
 
     cluster_spec_2 = server_lib.ClusterSpec({
@@ -139,7 +139,7 @@ class UnionClusterResolverTest(test.TestCase):
     })
     resolver2 = SimpleClusterResolver(cluster_spec_2, task_type="worker",
                                       task_index=2, environment="local",
-                                      num_accelerators_per_worker=16,
+                                      num_accelerators=16,
                                       rpc_layer="http")
 
     union_resolver = UnionClusterResolver(resolver1, resolver2)
@@ -147,7 +147,7 @@ class UnionClusterResolverTest(test.TestCase):
     self.assertEqual(union_resolver.task_type, "ps")
     self.assertEqual(union_resolver.task_index, 1)
     self.assertEqual(union_resolver.environment, "cloud")
-    self.assertEqual(union_resolver.num_accelerators_per_worker(), 8)
+    self.assertEqual(union_resolver.num_accelerators(), 8)
     self.assertEqual(union_resolver.rpc_layer, "grpc")
 
     union_resolver.task_type = "worker"
diff --git a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py
index b167bc8fc8..2412f6dad0 100644
--- a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py
@@ -51,7 +51,7 @@ class GceClusterResolver(ClusterResolver):
                task_type='worker',
                task_index=0,
                rpc_layer='grpc',
-               num_accelerators_per_worker=0,
+               num_accelerators=0,
                credentials='default',
                service=None):
     """Creates a new GceClusterResolver object.
@@ -73,7 +73,7 @@ class GceClusterResolver(ClusterResolver):
         can be distinguished from each other.
       rpc_layer: The RPC layer TensorFlow should use to communicate across
         instances.
-      num_accelerators_per_worker: Number of accelerators (GPUs) present per
+      num_accelerators: Number of accelerators (GPUs) present per
         instance.
       credentials: GCE Credentials. If nothing is specified, this defaults to
         GoogleCredentials.get_application_default().
@@ -90,6 +90,7 @@ class GceClusterResolver(ClusterResolver):
     self._task_type = task_type
     self._task_index = task_index
     self._rpc_layer = rpc_layer
+    self._num_accelerators = num_accelerators
     self._port = port
     self._credentials = credentials
 
@@ -201,6 +202,11 @@ class GceClusterResolver(ClusterResolver):
   def rpc_layer(self, rpc_layer):
     self._rpc_layer = rpc_layer
 
-  def num_accelerators_per_worker(self, session_config=None):
-    del session_config  # Unused, since this is set manually in __init__.
-    return self._num_accelerators_per_worker
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='GPU',
+                       config_proto=None):
+    # Unused
+    del task_type, task_index, accelerator_type, config_proto
+    return self._num_accelerators
diff --git a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
index 041c081540..b21c3676be 100644
--- a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
@@ -168,6 +168,15 @@ class KubernetesClusterResolver(ClusterResolver):
     """
     return ''
 
-  def num_accelerators_per_worker(self, session_config=None):
-    local_devices = device_lib.list_local_devices(session_config)
-    return len([d for d in local_devices if d.device_type == 'GPU'])
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='GPU',
+                       config_proto=None):
+    # TODO(frankchn): Make querying non-local accelerators work
+    if task_type is not None or task_index is not None:
+      raise NotImplementedError('Querying non-local accelerators is not yet'
+                                'implemented.')
+
+    local_devices = device_lib.list_local_devices(config_proto)
+    return sum(d.device_type == accelerator_type for d in local_devices)
diff --git a/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py
index fd3c6d6a18..1ab81731b7 100644
--- a/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py
@@ -221,6 +221,11 @@ class SlurmClusterResolver(ClusterResolver):
     """
     return ''
 
-  def num_accelerators_per_worker(self, session_config=None):
-    del session_config  # Unused, since this is set in __init__ manually.
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='GPU',
+                       config_proto=None):
+    # Unused, since this is set in __init__ manually.
+    del task_type, task_index, accelerator_type, config_proto
     return self._gpus_per_node
diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
index a3246e77f4..b4465714b2 100644
--- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
@@ -55,7 +55,7 @@ class TFConfigClusterResolver(ClusterResolver):
                task_index=None,
                rpc_layer=None,
                environment=None,
-               num_accelerators_per_worker=0):
+               num_accelerators=0):
     """Creates a new TFConfigClusterResolver.
 
     Args:
@@ -66,15 +66,17 @@ class TFConfigClusterResolver(ClusterResolver):
       rpc_layer: (String, optional) Overrides the rpc layer TensorFlow uses.
       environment: (String, optional) Overrides the environment TensorFlow
         operates in.
-      num_accelerators_per_worker: (Integer, optional) Specifies the number of
+      num_accelerators: (Integer, optional) Specifies the number of
         accelerators (e.g. GPUs, TPUs, others) that each node has.
     """
+    # TODO(frankchn): num_accelerators is a stop-gap and will be removed
+    # in favor of autodetection of devices soon.
 
     self._task_type = task_type
     self._task_index = task_index
     self._rpc_layer = rpc_layer
     self._environment = environment
-    self._num_accelerators_per_worker = num_accelerators_per_worker
+    self._num_accelerators = num_accelerators
 
   @property
   def task_type(self):
@@ -115,10 +117,15 @@ class TFConfigClusterResolver(ClusterResolver):
   def rpc_layer(self, rpc_layer):
     self._rpc_layer = rpc_layer
 
-  def num_accelerators_per_worker(self, session_config=None):
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='GPU',
+                       config_proto=None):
     # TODO(frankchn): Connect to server (w/ session_config) in the future.
-    del session_config  # Unused, we do not connect to another server here.
-    return self._num_accelerators_per_worker
+    # Unused, we do not connect to another server here right now.
+    del task_type, task_index, accelerator_type, config_proto
+    return self._num_accelerators
 
   def cluster_spec(self):
     """Returns a ClusterSpec based on the TF_CONFIG environment variable.
diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
index c20e51bc0b..197eba1739 100644
--- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
@@ -169,12 +169,12 @@ class TFConfigClusterResolverTest(test.TestCase):
     """
 
     cluster_resolver = TFConfigClusterResolver(task_type='ps', task_index=0,
-                                               num_accelerators_per_worker=8)
+                                               num_accelerators=8)
 
     self.assertEqual('grpc://ps0:2222', cluster_resolver.master())
     self.assertEqual('ps', cluster_resolver.task_type)
     self.assertEqual(0, cluster_resolver.task_index)
-    self.assertEqual(8, cluster_resolver.num_accelerators_per_worker())
+    self.assertEqual(8, cluster_resolver.num_accelerators())
 
     cluster_resolver.task_type = 'worker'
     cluster_resolver.task_index = 1
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index 1fd674c8a4..99a7a0922f 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -387,17 +387,30 @@ class TPUClusterResolver(ClusterResolver):
 
     return server_lib.ClusterSpec(cluster_spec)
 
-  def num_accelerators_per_worker(self, session_config=None):
+  def num_accelerators(self,
+                       task_type=None,
+                       task_index=None,
+                       accelerator_type='TPU',
+                       config_proto=None):
     """Returns the number of TPU cores per worker.
 
     This defaults to 8 for all current TPU configurations, and we do not need
     to query any remote systems for this.
 
     Args:
-      session_config: Unused. Not currently necessary to query anything as this
-        number is 8 for all TPU configurations.
+      task_type: Unused.
+      task_index: Unused.
+      accelerator_type: Unused.
+      config_proto: Unused.
+
+    Raises:
+      RuntimeError: If this is used with a non-TPU accelerator_type.
     """
-    del session_config  # Unused. Not necessary to query anything.
+    # Unused. Not necessary to query anything.
+    del task_type, task_index, config_proto
+
+    if accelerator_type != 'TPU':
+      raise ValueError('This Cluster Resolver is only compatible with TPUs.')
     return 8
 
   @property
-- 
GitLab


From e7f4f4d2a662ea24826af717d5ee401e67883b7d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 11:08:53 -0800
Subject: [PATCH 136/873] Increase timeout to deflake ctc_loss_op_test.

PiperOrigin-RevId: 224370778
---
 tensorflow/python/kernel_tests/BUILD | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 242e3b595c..df8c14970a 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -272,7 +272,7 @@ tf_py_test(
 
 cuda_py_test(
     name = "ctc_loss_op_test",
-    size = "small",
+    size = "medium",
     srcs = ["ctc_loss_op_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
-- 
GitLab


From a5c2d3ecf905e310ebf4f73089b0b5bf8b6dcf81 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Thu, 6 Dec 2018 11:24:25 -0800
Subject: [PATCH 137/873] Apply API symbols upgrade for TF 2.0

PiperOrigin-RevId: 224374109
---
 tensorflow/python/ops/array_ops.py            | 20 ++++-
 .../python/ops/candidate_sampling_ops.py      | 11 +--
 tensorflow/python/ops/embedding_ops.py        | 78 ++++++++++++++++++-
 tensorflow/python/ops/nn_impl.py              |  2 +-
 tensorflow/python/ops/nn_ops.py               | 10 ++-
 tensorflow/python/ops/sparse_ops.py           |  4 +-
 .../api/golden/v1/tensorflow.random.pbtxt     | 12 +++
 .../tools/api/golden/v2/tensorflow.math.pbtxt |  2 +-
 .../tools/api/golden/v2/tensorflow.nn.pbtxt   | 14 ++--
 .../api/golden/v2/tensorflow.random.pbtxt     |  4 +
 .../api/golden/v2/tensorflow.sparse.pbtxt     |  2 +-
 tensorflow/tools/compatibility/renames_v2.py  |  5 +-
 tensorflow/tools/compatibility/reorders_v2.py |  8 ++
 .../tools/compatibility/tf_upgrade_v2.py      | 11 +++
 .../tools/compatibility/tf_upgrade_v2_test.py | 48 ++++++++++++
 15 files changed, 205 insertions(+), 26 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index b555f63ceb..185452e1ab 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -2667,7 +2667,7 @@ def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=r
 space_to_batch.__doc__ = gen_array_ops.space_to_batch.__doc__
 
 
-@tf_export("nn.space_to_depth", v1=["nn.space_to_depth", "space_to_depth"])
+@tf_export(v1=["nn.space_to_depth", "space_to_depth"])
 @deprecation.deprecated_endpoints("space_to_depth")
 def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.space_to_depth(input, block_size, data_format, name=name)
@@ -2676,7 +2676,15 @@ def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint:
 space_to_depth.__doc__ = gen_array_ops.space_to_depth.__doc__
 
 
-@tf_export("nn.depth_to_space", v1=["nn.depth_to_space", "depth_to_space"])
+@tf_export("nn.space_to_depth", v1=[])
+def space_to_depth_v2(input, block_size, data_format="NHWC", name=None):  # pylint: disable=redefined-builtin
+  return gen_array_ops.space_to_depth(input, block_size, data_format, name=name)
+
+
+space_to_depth_v2.__doc__ = gen_array_ops.space_to_depth.__doc__
+
+
+@tf_export(v1=["nn.depth_to_space", "depth_to_space"])
 @deprecation.deprecated_endpoints("depth_to_space")
 def depth_to_space(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
   return gen_array_ops.depth_to_space(input, block_size, data_format, name=name)
@@ -2685,6 +2693,14 @@ def depth_to_space(input, block_size, name=None, data_format="NHWC"):  # pylint:
 depth_to_space.__doc__ = gen_array_ops.depth_to_space.__doc__
 
 
+@tf_export("nn.depth_to_space", v1=[])
+def depth_to_space_v2(input, block_size, data_format="NHWC", name=None):  # pylint: disable=redefined-builtin
+  return gen_array_ops.depth_to_space(input, block_size, data_format, name=name)
+
+
+depth_to_space_v2.__doc__ = gen_array_ops.depth_to_space.__doc__
+
+
 @tf_export(v1=["batch_to_space"])
 def batch_to_space(input, crops, block_size, name=None):  # pylint: disable=redefined-builtin
   result = batch_to_space_nd(
diff --git a/tensorflow/python/ops/candidate_sampling_ops.py b/tensorflow/python/ops/candidate_sampling_ops.py
index c64000b65d..56f76a49d5 100644
--- a/tensorflow/python/ops/candidate_sampling_ops.py
+++ b/tensorflow/python/ops/candidate_sampling_ops.py
@@ -151,7 +151,10 @@ def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
       seed2=seed2, name=name)
 
 
-@tf_export('nn.learned_unigram_candidate_sampler')
+@tf_export(
+    'random.learned_unigram_candidate_sampler',
+    'nn.learned_unigram_candidate_sampler')
+@deprecation.deprecated_endpoints(['nn.learned_unigram_candidate_sampler'])
 def learned_unigram_candidate_sampler(true_classes, num_true, num_sampled,
                                       unique, range_max, seed=None, name=None):
   """Samples a set of classes from a distribution learned during training.
@@ -209,8 +212,7 @@ def learned_unigram_candidate_sampler(true_classes, num_true, num_sampled,
 
 
 @tf_export('random.fixed_unigram_candidate_sampler',
-           'nn.fixed_unigram_candidate_sampler',
-           v1=['nn.fixed_unigram_candidate_sampler'])
+           'nn.fixed_unigram_candidate_sampler')
 def fixed_unigram_candidate_sampler(true_classes,
                                     num_true,
                                     num_sampled,
@@ -302,8 +304,7 @@ def fixed_unigram_candidate_sampler(true_classes,
       unigrams=unigrams, seed=seed1, seed2=seed2, name=name)
 
 
-@tf_export('random.all_candidate_sampler', 'nn.all_candidate_sampler',
-           v1=['nn.all_candidate_sampler'])
+@tf_export('random.all_candidate_sampler', 'nn.all_candidate_sampler')
 def all_candidate_sampler(true_classes, num_true, num_sampled, unique,
                           seed=None, name=None):
   """Generate the set of all classes.
diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py
index f2f5da7d7a..d0291e2095 100644
--- a/tensorflow/python/ops/embedding_ops.py
+++ b/tensorflow/python/ops/embedding_ops.py
@@ -247,7 +247,7 @@ def _embedding_lookup_and_transform(params,
       return ret
 
 
-@tf_export("nn.embedding_lookup")
+@tf_export(v1=["nn.embedding_lookup"])
 def embedding_lookup(
     params,
     ids,
@@ -316,7 +316,66 @@ def embedding_lookup(
       transform_fn=None)
 
 
-@tf_export("nn.embedding_lookup_sparse")
+@tf_export("nn.embedding_lookup", v1=[])
+def embedding_lookup_v2(
+    params,
+    ids,
+    partition_strategy="mod",
+    max_norm=None,
+    name=None):
+  """Looks up `ids` in a list of embedding tensors.
+
+  This function is used to perform parallel lookups on the list of
+  tensors in `params`.  It is a generalization of
+  `tf.gather`, where `params` is
+  interpreted as a partitioning of a large embedding tensor.  `params` may be
+  a `PartitionedVariable` as returned by using `tf.get_variable()` with a
+  partitioner.
+
+  If `len(params) > 1`, each element `id` of `ids` is partitioned between
+  the elements of `params` according to the `partition_strategy`.
+  In all strategies, if the id space does not evenly divide the number of
+  partitions, each of the first `(max_id + 1) % len(params)` partitions will
+  be assigned one more id.
+
+  If `partition_strategy` is `"mod"`, we assign each id to partition
+  `p = id % len(params)`. For instance,
+  13 ids are split across 5 partitions as:
+  `[[0, 5, 10], [1, 6, 11], [2, 7, 12], [3, 8], [4, 9]]`
+
+  If `partition_strategy` is `"div"`, we assign ids to partitions in a
+  contiguous manner. In this case, 13 ids are split across 5 partitions as:
+  `[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10], [11, 12]]`
+
+  The results of the lookup are concatenated into a dense
+  tensor. The returned tensor has shape `shape(ids) + shape(params)[1:]`.
+
+  Args:
+    params: A single tensor representing the complete embedding tensor,
+      or a list of P tensors all of same shape except for the first dimension,
+      representing sharded embedding tensors.  Alternatively, a
+      `PartitionedVariable`, created by partitioning along dimension 0. Each
+      element must be appropriately sized for the given `partition_strategy`.
+    ids: A `Tensor` with type `int32` or `int64` containing the ids to be looked
+      up in `params`.
+    partition_strategy: A string specifying the partitioning strategy, relevant
+      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
+      is `"mod"`.
+    max_norm: If not `None`, each embedding is clipped if its l2-norm is
+      larger than this value.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `Tensor` with the same type as the tensors in `params`.
+
+  Raises:
+    ValueError: If `params` is empty.
+  """
+  return embedding_lookup(params, ids, partition_strategy, name,
+                          max_norm=max_norm)
+
+
+@tf_export(v1=["nn.embedding_lookup_sparse"])
 def embedding_lookup_sparse(params,
                             sp_ids,
                             sp_weights,
@@ -491,6 +550,21 @@ def embedding_lookup_sparse(params,
     return embeddings
 
 
+@tf_export("nn.embedding_lookup_sparse", v1=[])
+def embedding_lookup_sparse_v2(params,
+                               sp_ids,
+                               sp_weights,
+                               partition_strategy="mod",
+                               combiner=None,
+                               max_norm=None,
+                               name=None):
+  return embedding_lookup_sparse_v2(
+      params, sp_ids, sp_weights, partition_strategy, name, combiner, max_norm)
+
+
+embedding_lookup_sparse_v2.__doc__ = embedding_lookup_sparse.__doc__
+
+
 @tf_export("nn.safe_embedding_lookup_sparse", v1=[])
 def safe_embedding_lookup_sparse_v2(embedding_weights,
                                     sparse_ids,
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 292cca8b9e..48dcab4842 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -262,7 +262,7 @@ def weighted_cross_entropy_with_logits(targets, logits, pos_weight, name=None):
         name=name)
 
 
-@tf_export("nn.relu_layer")
+@tf_export(v1=["nn.relu_layer"])
 def relu_layer(x, weights, biases, name=None):
   """Computes Relu(x * weight + biases).
 
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 97d405cd0e..611bfdac9a 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -3776,7 +3776,7 @@ def erosion2d_v2(value,
             name=name))
 
 
-@tf_export("math.in_top_k", "nn.in_top_k")
+@tf_export(v1=["math.in_top_k", "nn.in_top_k"])
 def in_top_k(predictions, targets, k, name=None):
   r"""Says whether the targets are in the top `K` predictions.
 
@@ -3810,6 +3810,14 @@ def in_top_k(predictions, targets, k, name=None):
     return gen_nn_ops.in_top_kv2(predictions, targets, k, name=name)
 
 
+@tf_export("math.in_top_k", "nn.in_top_k", v1=[])
+def in_top_k_v2(targets, predictions, k, name=None):
+  return in_top_k(predictions, targets, k, name)
+
+
+in_top_k_v2.__doc__ = in_top_k.__doc__
+
+
 tf_export(v1=["nn.quantized_avg_pool"])(gen_nn_ops.quantized_avg_pool)
 tf_export(v1=["nn.quantized_conv2d"])(gen_nn_ops.quantized_conv2d)
 tf_export(v1=["nn.quantized_relu_x"])(gen_nn_ops.quantized_relu_x)
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index feff7df850..d7346b7371 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -299,7 +299,7 @@ def sparse_concat(axis,
 
 
 @tf_export("sparse.concat", v1=[])
-def sparse_concat_v2(axis, sp_inputs, expand_nonconcat_dim=False, name=None):  # pylint: disable=missing-docstring
+def sparse_concat_v2(axis, sp_inputs, expand_nonconcat_dims=False, name=None):  # pylint: disable=missing-docstring
   sp_inputs = _convert_to_sparse_tensors(sp_inputs)
 
   if len(sp_inputs) == 1:  # Degenerate case of one tensor.
@@ -309,7 +309,7 @@ def sparse_concat_v2(axis, sp_inputs, expand_nonconcat_dim=False, name=None):  #
   vals = [sp_input.values for sp_input in sp_inputs]
   shapes = [sp_input.dense_shape for sp_input in sp_inputs]
 
-  if expand_nonconcat_dim:
+  if expand_nonconcat_dims:
     max_shape = math_ops.reduce_max(
         array_ops.concat(
             [array_ops.reshape(shape, [1, -1]) for shape in shapes], 0), 0)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
index 107534e086..1eefb1c70c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.random.pbtxt
@@ -1,9 +1,17 @@
 path: "tensorflow.random"
 tf_module {
+  member_method {
+    name: "all_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "categorical"
     argspec: "args=[\'logits\', \'num_samples\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
   }
+  member_method {
+    name: "fixed_unigram_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'vocab_file\', \'distortion\', \'num_reserved_ids\', \'num_shards\', \'shard\', \'unigrams\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'\', \'1.0\', \'0\', \'1\', \'0\', \'()\', \'None\', \'None\'], "
+  }
   member_method {
     name: "gamma"
     argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
@@ -12,6 +20,10 @@ tf_module {
     name: "get_seed"
     argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "learned_unigram_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "log_uniform_candidate_sampler"
     argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
index 979d77ea6b..4ac0484050 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.math.pbtxt
@@ -170,7 +170,7 @@ tf_module {
   }
   member_method {
     name: "in_top_k"
-    argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'targets\', \'predictions\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "invert_permutation"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
index 6bf4a9d00c..9e5e82483a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
@@ -106,7 +106,7 @@ tf_module {
   }
   member_method {
     name: "depth_to_space"
-    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+    argspec: "args=[\'input\', \'block_size\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
   }
   member_method {
     name: "depthwise_conv2d"
@@ -134,11 +134,11 @@ tf_module {
   }
   member_method {
     name: "embedding_lookup"
-    argspec: "args=[\'params\', \'ids\', \'partition_strategy\', \'name\', \'validate_indices\', \'max_norm\'], varargs=None, keywords=None, defaults=[\'mod\', \'None\', \'True\', \'None\'], "
+    argspec: "args=[\'params\', \'ids\', \'partition_strategy\', \'max_norm\', \'name\'], varargs=None, keywords=None, defaults=[\'mod\', \'None\', \'None\'], "
   }
   member_method {
     name: "embedding_lookup_sparse"
-    argspec: "args=[\'params\', \'sp_ids\', \'sp_weights\', \'partition_strategy\', \'name\', \'combiner\', \'max_norm\'], varargs=None, keywords=None, defaults=[\'mod\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'params\', \'sp_ids\', \'sp_weights\', \'partition_strategy\', \'combiner\', \'max_norm\', \'name\'], varargs=None, keywords=None, defaults=[\'mod\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "erosion2d"
@@ -158,7 +158,7 @@ tf_module {
   }
   member_method {
     name: "in_top_k"
-    argspec: "args=[\'predictions\', \'targets\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'targets\', \'predictions\', \'k\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "l2_loss"
@@ -228,10 +228,6 @@ tf_module {
     name: "relu6"
     argspec: "args=[\'features\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
-  member_method {
-    name: "relu_layer"
-    argspec: "args=[\'x\', \'weights\', \'biases\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "safe_embedding_lookup_sparse"
     argspec: "args=[\'embedding_weights\', \'sparse_ids\', \'sparse_weights\', \'combiner\', \'default_id\', \'max_norm\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'mean\', \'None\', \'None\', \'None\'], "
@@ -278,7 +274,7 @@ tf_module {
   }
   member_method {
     name: "space_to_depth"
-    argspec: "args=[\'input\', \'block_size\', \'name\', \'data_format\'], varargs=None, keywords=None, defaults=[\'None\', \'NHWC\'], "
+    argspec: "args=[\'input\', \'block_size\', \'data_format\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\'], "
   }
   member_method {
     name: "sparse_softmax_cross_entropy_with_logits"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
index de5cb6b717..d49c23e59c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.random.pbtxt
@@ -16,6 +16,10 @@ tf_module {
     name: "gamma"
     argspec: "args=[\'shape\', \'alpha\', \'beta\', \'dtype\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \"<dtype: \'float32\'>\", \'None\', \'None\'], "
   }
+  member_method {
+    name: "learned_unigram_candidate_sampler"
+    argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "log_uniform_candidate_sampler"
     argspec: "args=[\'true_classes\', \'num_true\', \'num_sampled\', \'unique\', \'range_max\', \'seed\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
index 9808200d72..b8bd2c0b72 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.sparse.pbtxt
@@ -14,7 +14,7 @@ tf_module {
   }
   member_method {
     name: "concat"
-    argspec: "args=[\'axis\', \'sp_inputs\', \'expand_nonconcat_dim\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+    argspec: "args=[\'axis\', \'sp_inputs\', \'expand_nonconcat_dims\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
   }
   member_method {
     name: "cross"
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index 02212c3810..b27fd5acc7 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -134,7 +134,7 @@ renames = {
     'tf.decode_json_example': 'tf.io.decode_json_example',
     'tf.decode_raw': 'tf.io.decode_raw',
     'tf.delete_session_tensor': 'tf.compat.v1.delete_session_tensor',
-    'tf.depth_to_space': 'tf.nn.depth_to_space',
+    'tf.depth_to_space': 'tf.compat.v1.depth_to_space',
     'tf.dequantize': 'tf.quantization.dequantize',
     'tf.deserialize_many_sparse': 'tf.io.deserialize_many_sparse',
     'tf.diag': 'tf.linalg.tensor_diag',
@@ -386,6 +386,7 @@ renames = {
     'tf.nn.quantized_max_pool': 'tf.compat.v1.nn.quantized_max_pool',
     'tf.nn.quantized_relu_x': 'tf.compat.v1.nn.quantized_relu_x',
     'tf.nn.raw_rnn': 'tf.compat.v1.nn.raw_rnn',
+    'tf.nn.relu_layer': 'tf.compat.v1.nn.relu_layer',
     'tf.nn.rnn_cell.BasicLSTMCell': 'tf.compat.v1.nn.rnn_cell.BasicLSTMCell',
     'tf.nn.rnn_cell.BasicRNNCell': 'tf.compat.v1.nn.rnn_cell.BasicRNNCell',
     'tf.nn.rnn_cell.GRUCell': 'tf.compat.v1.nn.rnn_cell.GRUCell',
@@ -511,7 +512,7 @@ renames = {
     'tf.sets.set_size': 'tf.sets.size',
     'tf.sets.set_union': 'tf.sets.union',
     'tf.space_to_batch': 'tf.nn.space_to_batch',
-    'tf.space_to_depth': 'tf.nn.space_to_depth',
+    'tf.space_to_depth': 'tf.compat.v1.space_to_depth',
     'tf.sparse.matmul': 'tf.sparse.sparse_dense_matmul',
     'tf.sparse.merge': 'tf.compat.v1.sparse.merge',
     'tf.sparse.placeholder': 'tf.compat.v1.sparse.placeholder',
diff --git a/tensorflow/tools/compatibility/reorders_v2.py b/tensorflow/tools/compatibility/reorders_v2.py
index 4152d37db9..76f71d5804 100644
--- a/tensorflow/tools/compatibility/reorders_v2.py
+++ b/tensorflow/tools/compatibility/reorders_v2.py
@@ -33,6 +33,7 @@ reorders = {
     'tf.confusion_matrix': ['labels', 'predictions', 'num_classes', 'dtype', 'name', 'weights'],
     'tf.convert_to_tensor': ['value', 'dtype', 'name', 'preferred_dtype'],
     'tf.decode_csv': ['records', 'record_defaults', 'field_delim', 'use_quote_delim', 'name', 'na_value', 'select_cols'],
+    'tf.depth_to_space': ['input', 'block_size', 'name', 'data_format'],
     'tf.feature_column.categorical_column_with_vocabulary_file': ['key', 'vocabulary_file', 'vocabulary_size', 'num_oov_buckets', 'default_value', 'dtype'],
     'tf.io.decode_csv': ['records', 'record_defaults', 'field_delim', 'use_quote_delim', 'name', 'na_value', 'select_cols'],
     'tf.io.parse_example': ['serialized', 'features', 'name', 'example_names'],
@@ -42,6 +43,7 @@ reorders = {
     'tf.math.argmax': ['input', 'axis', 'name', 'dimension', 'output_type'],
     'tf.math.argmin': ['input', 'axis', 'name', 'dimension', 'output_type'],
     'tf.math.confusion_matrix': ['labels', 'predictions', 'num_classes', 'dtype', 'name', 'weights'],
+    'tf.math.in_top_k': ['predictions', 'targets', 'k', 'name'],
     'tf.math.reduce_all': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
     'tf.math.reduce_any': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
     'tf.math.reduce_logsumexp': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
@@ -53,10 +55,15 @@ reorders = {
     'tf.multinomial': ['logits', 'num_samples', 'seed', 'name', 'output_dtype'],
     'tf.nn.convolution': ['input', 'filter', 'padding', 'strides', 'dilation_rate', 'name', 'data_format'],
     'tf.nn.crelu': ['features', 'name', 'axis'],
+    'tf.nn.depth_to_space': ['input', 'block_size', 'name', 'data_format'],
     'tf.nn.depthwise_conv2d': ['input', 'filter', 'strides', 'padding', 'rate', 'name', 'data_format'],
+    'tf.nn.embedding_lookup': ['params', 'ids', 'partition_strategy', 'name', 'validate_indices', 'max_norm'],
+    'tf.nn.embedding_lookup_sparse': ['params', 'sp_ids', 'sp_weights', 'partition_strategy', 'name', 'combiner', 'max_norm'],
+    'tf.nn.in_top_k': ['predictions', 'targets', 'k', 'name'],
     'tf.nn.moments': ['x', 'axes', 'shift', 'name', 'keep_dims'],
     'tf.nn.pool': ['input', 'window_shape', 'pooling_type', 'padding', 'dilation_rate', 'strides', 'name', 'data_format'],
     'tf.nn.separable_conv2d': ['input', 'depthwise_filter', 'pointwise_filter', 'strides', 'padding', 'rate', 'name', 'data_format'],
+    'tf.nn.space_to_depth': ['input', 'block_size', 'name', 'data_format'],
     'tf.nn.weighted_moments': ['x', 'axes', 'frequency_weights', 'name', 'keep_dims'],
     'tf.pad': ['tensor', 'paddings', 'mode', 'name', 'constant_values'],
     'tf.parse_example': ['serialized', 'features', 'name', 'example_names'],
@@ -78,6 +85,7 @@ reorders = {
     'tf.serialize_sparse': ['sp_input', 'name', 'out_type'],
     'tf.shape': ['input', 'name', 'out_type'],
     'tf.size': ['input', 'name', 'out_type'],
+    'tf.space_to_depth': ['input', 'block_size', 'name', 'data_format'],
     'tf.sparse.add': ['a', 'b', 'threshold', 'thresh'],
     'tf.sparse.concat': ['axis', 'sp_inputs', 'name', 'expand_nonconcat_dim', 'concat_dim'],
     'tf.sparse.segment_mean': ['data', 'indices', 'segment_ids', 'name', 'num_segments'],
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index f4165c9456..0630ce28cf 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py
@@ -552,6 +552,12 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.reduce_join",
         "tf.confusion_matrix",
         "tf.math.confusion_matrix",
+        "tf.math.in_top_k",
+        "tf.nn.depth_to_space",
+        "tf.nn.embedding_lookup",
+        "tf.nn.embedding_lookup_sparse",
+        "tf.nn.in_top_k",
+        "tf.nn.space_to_depth",
     }
 
     # Functions that were reordered should be changed to the new keyword args
@@ -711,6 +717,11 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             tf_01s_like_no_optimize_comment,
         "tf.ones_like":
             tf_01s_like_no_optimize_comment,
+        "tf.nn.embedding_lookup":
+            "WARNING: validate_indices argument has been removed.",
+        "tf.sparse.concat":
+            ("WARNING: expand_nonconcat_dim was renamed to "
+             "expand_nonconcat_dims."),
     }
 
     self.symbol_renames = {
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index 57700c07d6..b362a8081f 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -567,6 +567,54 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
+  def testInTopK(self):
+    text = "tf.math.in_top_k(a, b, c, n)"
+    expected_text = (
+        "tf.math.in_top_k(predictions=a, targets=b, k=c, name=n)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+  def testDepthToSpace(self):
+    text = "tf.nn.depth_to_space(input, block_size, name, data_format)"
+    expected_text = (
+        "tf.nn.depth_to_space(input=input, block_size=block_size, "
+        "name=name, data_format=data_format)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+  def testEmbeddingLookup(self):
+    text = ("tf.nn.embedding_lookup(params, ids, partition_strategy, name, "
+            "validate_indices, max_norm)")
+    expected_text = ("tf.nn.embedding_lookup(params=params, ids=ids, "
+                     "partition_strategy=partition_strategy, name=name, "
+                     "validate_indices=validate_indices, max_norm=max_norm)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+  def testEmbeddingLookupSparse(self):
+    text = ("tf.nn.embedding_lookup_sparse(params, sp_ids, sp_weights, "
+            "partition_strategy, name, combiner, max_norm)")
+    expected_text = ("tf.nn.embedding_lookup_sparse(params=params, "
+                     "sp_ids=sp_ids, sp_weights=sp_weights, "
+                     "partition_strategy=partition_strategy, name=name, "
+                     "combiner=combiner, max_norm=max_norm)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+  def testNnInTopK(self):
+    text = "tf.nn.in_top_k(predictions, targets, k, name)"
+    expected_text = ("tf.nn.in_top_k(predictions=predictions, "
+                     "targets=targets, k=k, name=name)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+  def testSpaceToDepth(self):
+    text = "tf.nn.space_to_depth(input, block_size, name, data_format)"
+    expected_text = ("tf.nn.space_to_depth(input=input, block_size=block_size, "
+                     "name=name, data_format=data_format)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
 
 class TestUpgradeFiles(test_util.TensorFlowTestCase):
 
-- 
GitLab


From ef46e5d3a3e3095517b7528890704230139d95c1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 11:40:10 -0800
Subject: [PATCH 138/873] Internal change.

PiperOrigin-RevId: 224377474
---
 tensorflow/lite/delegates/nnapi/BUILD | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD
index c24f0f71ac..fd954ba222 100644
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@@ -23,10 +23,7 @@ tf_cc_test(
     name = "nnapi_delegate_test",
     size = "small",
     srcs = ["nnapi_delegate_test.cc"],
-    tags = [
-        "no_oss",
-        "noasan",  # TODO(b/112326936): re-enable for asan once fixed.
-    ],
+    tags = ["no_oss"],
     deps = [
         ":nnapi_delegate",
         "//tensorflow/lite:framework",
-- 
GitLab


From 52aac384bf12ced33ea1ef3ec7bc8183a9cf4ca0 Mon Sep 17 00:00:00 2001
From: Dalmo Cirne <dalmo@clarifai.com>
Date: Wed, 31 Oct 2018 05:43:57 -0400
Subject: [PATCH 139/873] Fix downloading protobuf dependency

Protobuf has been failing due to its repository having dependencies. Downloading the .tar.gz form GitHub doesn't work because it does not bring its dependencies with it.

This PR introduces a function to clone a repository, initializa and update its submodules. The function is used to download protobuf as a repository dependency, rather than just a file.

The function is also implemented in a generic way, so if in the future other dependencies fall into the same situation, it can be reused.

With these changes, `tensorflow/workspace.bzl` goes back to using the commit sha in `PROTOBUF_URLS` and `PROTOBUF_STRIP_PREFIX`, rather than the tag of a release.

A command at the end of the `download_dependencies.sh` script was commented out. The comment says for it to be removed once protobug is fixed. Perhaps no longer necessary.
---
 .../contrib/makefile/download_dependencies.sh | 43 +++++++++++++++----
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index b396c52767..d22e2c179b 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -30,11 +30,13 @@ EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE
 GEMMLOWP_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
-# Note: The Protobuf source in `tensorflow/workspace.bzl` in TensorFlow
-# 1.10 branch does not work. `make distclean` fails and blocks the build
-# process. For now we're hardcoding to the version which is used by
-# TensorFlow 1.9.
-PROTOBUF_URL="https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz"
+
+# Note: The protobuf repo needs to be cloned due to its submodules.
+# These variables contain the GitHub repo and the sha, from `tensorflow/workspace.bzl`,
+# from which to clone it from and checkout to.
+readonly PROTOBUF_REPO="https://github.com/protocolbuffers/protobuf.git"
+readonly PROTOBUF_SHA="$(grep -o 'https://github.com/google/protobuf/archive/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1 | awk '{print substr($0, index($0, "archive") + 8, 40) }')"
+
 # TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' once
 # the archive has been propagated in mirror.bazel.build.
 RE2_URL="$(grep -o 'https://github.com/google/re2/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1)"
@@ -91,11 +93,34 @@ download_and_extract() {
   find "${dir}" -type f -name '*BUILD' -delete
 }
 
+function clone_repository() {
+  local repo_url="${1}"
+  local destination_directory="${2}"
+  local commit_sha="${3}"
+
+  if [[ -d "${destination_directory}" ]]; then
+    rm -rf "${destination_directory}"
+  fi
+
+  git clone "${repo_url}" "${destination_directory}"
+
+  pushd "$(pwd)" 1>/dev/null
+
+  cd "${destination_directory}"
+
+  if [[ -n "${commit_sha}" ]]; then
+    git checkout "${PROTOBUF_SHA}"
+  fi
+
+  git submodule update --init
+
+  popd 1>/dev/null
+}
+
 download_and_extract "${EIGEN_URL}" "${DOWNLOADS_DIR}/eigen"
 download_and_extract "${GEMMLOWP_URL}" "${DOWNLOADS_DIR}/gemmlowp"
 download_and_extract "${GOOGLETEST_URL}" "${DOWNLOADS_DIR}/googletest"
 download_and_extract "${NSYNC_URL}" "${DOWNLOADS_DIR}/nsync"
-download_and_extract "${PROTOBUF_URL}" "${DOWNLOADS_DIR}/protobuf"
 download_and_extract "${RE2_URL}" "${DOWNLOADS_DIR}/re2"
 download_and_extract "${FFT2D_URL}" "${DOWNLOADS_DIR}/fft2d"
 download_and_extract "${DOUBLE_CONVERSION_URL}" "${DOWNLOADS_DIR}/double_conversion"
@@ -106,6 +131,8 @@ download_and_extract "${CUB_URL}" "${DOWNLOADS_DIR}/cub/external/cub_archive"
 download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash"
 download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers"
 
+clone_repository "${PROTOBUF_REPO}" "${DOWNLOADS_DIR}/protobuf" "${PROTOBUF_SHA}"
+
 replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
   "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
 replace_by_sed 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#static uint32x2_t p2ui_CONJ_XOR;// = vld1_u32( conj_XOR_DATA ); - Removed by scripts#' \
@@ -113,7 +140,7 @@ replace_by_sed 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#s
 replace_by_sed 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \
   "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
 # TODO(satok): Remove this once protobuf/autogen.sh is fixed.
-replace_by_sed 's#https://googlemock.googlecode.com/files/gmock-1.7.0.zip#http://download.tensorflow.org/deps/gmock-1.7.0.zip#' \
-  "${DOWNLOADS_DIR}/protobuf/autogen.sh"
+# replace_by_sed 's#https://googlemock.googlecode.com/files/gmock-1.7.0.zip#http://download.tensorflow.org/deps/gmock-1.7.0.zip#' \
+#   "${DOWNLOADS_DIR}/protobuf/autogen.sh"
 
 echo "download_dependencies.sh completed successfully." >&2
-- 
GitLab


From a3a6ab05170eb95221e7ae486d30197f874ea784 Mon Sep 17 00:00:00 2001
From: Dalmo Cirne <dalmo@clarifai.com>
Date: Thu, 1 Nov 2018 21:21:45 -0400
Subject: [PATCH 140/873] Add comment to indicate the protobuf version

---
 tensorflow/workspace.bzl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index e6b4a89e3b..9e59f1c710 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -346,6 +346,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         },
     )
 
+    # The commit sha in the URLs correspond to v3.6.0 of protobuf
     PROTOBUF_URLS = [
         "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.1.2.tar.gz",
         "https://github.com/google/protobuf/archive/v3.6.1.2.tar.gz",
-- 
GitLab


From 1aeb29692a7e3e285c1572fca1dec9824c520962 Mon Sep 17 00:00:00 2001
From: Dalmo Cirne <dalmo@clarifai.com>
Date: Thu, 6 Dec 2018 14:07:02 -0500
Subject: [PATCH 141/873] Update the Flatbuffer and Protobuf versions

---
 tensorflow/contrib/makefile/download_dependencies.sh | 2 +-
 tensorflow/workspace.bzl                             | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index d22e2c179b..6582ba793b 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -47,7 +47,7 @@ CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v mirror
 
 # Required for TensorFlow Lite Flex runtime.
 FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz"
-FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz"
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
 #                   so work around it by patching the source.
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 9e59f1c710..e6b4a89e3b 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -346,7 +346,6 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
         },
     )
 
-    # The commit sha in the URLs correspond to v3.6.0 of protobuf
     PROTOBUF_URLS = [
         "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.1.2.tar.gz",
         "https://github.com/google/protobuf/archive/v3.6.1.2.tar.gz",
-- 
GitLab


From 6ef428bd6e83b0930266bf922eaa2f4a60e8328a Mon Sep 17 00:00:00 2001
From: Dalmo Cirne <dalmo@clarifai.com>
Date: Thu, 6 Dec 2018 14:42:06 -0500
Subject: [PATCH 142/873] Fix URLs for protobuf 3.6.1.2

---
 tensorflow/workspace.bzl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index e6b4a89e3b..6712925b49 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -347,8 +347,8 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     )
 
     PROTOBUF_URLS = [
-        "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.1.2.tar.gz",
-        "https://github.com/google/protobuf/archive/v3.6.1.2.tar.gz",
+        "https://mirror.bazel.build/github.com/protocolbuffers/protobuf/archive/v3.6.1.2.tar.gz",
+        "https://github.com/protocolbuffers/protobuf/archive/v3.6.1.2.tar.gz",
     ]
     PROTOBUF_SHA256 = "2244b0308846bb22b4ff0bcc675e99290ff9f1115553ae9671eba1030af31bc0"
     PROTOBUF_STRIP_PREFIX = "protobuf-3.6.1.2"
-- 
GitLab


From 0adae252576e5cdafab6598cbb0522fb3c703bf3 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 6 Dec 2018 11:50:41 -0800
Subject: [PATCH 143/873] Replace uses of deprecated
 `Dataset.make_one_shot_iterator()` with
 `tf.compat.v1.data.make_one_shot_iterator()`.

PiperOrigin-RevId: 224379500
---
 .../tpu/python/tpu/tpu_estimator_signals_test.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
index 55235556de..e3ea983abf 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py
@@ -21,8 +21,8 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.contrib.tpu.python.tpu import tpu_estimator
-from tensorflow.python import data as dataset_lib
 from tensorflow.python.client import session
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
@@ -34,10 +34,10 @@ def make_input_fn(num_samples):
 
   def input_fn(params):
     batch_size = params['batch_size']
-    da1 = dataset_lib.Dataset.from_tensor_slices(a)
-    da2 = dataset_lib.Dataset.from_tensor_slices(b)
+    da1 = dataset_ops.Dataset.from_tensor_slices(a)
+    da2 = dataset_ops.Dataset.from_tensor_slices(b)
 
-    dataset = dataset_lib.Dataset.zip((da1, da2))
+    dataset = dataset_ops.Dataset.zip((da1, da2))
     dataset = dataset.map(lambda fa, fb: {'a': fa, 'b': fb})
     dataset = dataset.batch(batch_size)
     return dataset
@@ -50,10 +50,10 @@ def make_input_fn_with_labels(num_samples):
 
   def input_fn(params):
     batch_size = params['batch_size']
-    da1 = dataset_lib.Dataset.from_tensor_slices(a)
-    da2 = dataset_lib.Dataset.from_tensor_slices(b)
+    da1 = dataset_ops.Dataset.from_tensor_slices(a)
+    da2 = dataset_ops.Dataset.from_tensor_slices(b)
 
-    dataset = dataset_lib.Dataset.zip((da1, da2))
+    dataset = dataset_ops.Dataset.zip((da1, da2))
     dataset = dataset.map(lambda fa, fb: ({'a': fa}, fb))
     dataset = dataset.batch(batch_size)
     return dataset
@@ -71,7 +71,7 @@ class TPUEstimatorStoppingSignalsTest(test.TestCase):
 
     with ops.Graph().as_default():
       dataset = input_fn(params)
-      features = dataset_lib.make_one_shot_iterator(dataset).get_next()
+      features = dataset_ops.make_one_shot_iterator(dataset).get_next()
 
       # With tf.data.Dataset.batch, the batch is None, i.e., dynamic shape.
       self.assertIsNone(features['a'].shape.as_list()[0])
-- 
GitLab


From 0fb46cf9139f1e8cf88195e0b8f555eb10d00b3b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 11:53:13 -0800
Subject: [PATCH 144/873] Update imports to point to new location of
 distribution_strategy_context.py.

PiperOrigin-RevId: 224380057
---
 tensorflow/contrib/metrics/python/metrics/classification.py | 2 +-
 tensorflow/contrib/optimizer_v2/optimizer_v2.py             | 2 +-
 tensorflow/python/keras/layers/normalization.py             | 2 +-
 tensorflow/python/keras/metrics.py                          | 2 +-
 tensorflow/python/keras/optimizer_v2/optimizer_v2.py        | 2 +-
 tensorflow/python/keras/optimizers.py                       | 2 +-
 tensorflow/python/ops/metrics_impl.py                       | 2 +-
 tensorflow/python/ops/summary_op_util.py                    | 2 +-
 tensorflow/python/training/checkpoint_utils.py              | 2 +-
 tensorflow/python/training/moving_averages.py               | 2 +-
 tensorflow/python/training/optimizer.py                     | 2 +-
 tensorflow/python/training/slot_creator.py                  | 2 +-
 tensorflow/python/training/sync_replicas_optimizer.py       | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/metrics/python/metrics/classification.py b/tensorflow/contrib/metrics/python/metrics/classification.py
index 062deb74b1..9aabc4bec3 100644
--- a/tensorflow/contrib/metrics/python/metrics/classification.py
+++ b/tensorflow/contrib/metrics/python/metrics/classification.py
@@ -18,13 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics_impl
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.training import distribution_strategy_context
 
 # TODO(nsilberman): move into metrics/python/ops/
 
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 73a556f0b2..11a9248a01 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -25,6 +25,7 @@ import abc
 import six
 
 from tensorflow.python.distribute import distribute_lib
+from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
@@ -36,7 +37,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
-from tensorflow.python.training import distribution_strategy_context as distribute_ctx
 from tensorflow.python.training import optimizer as optimizer_v1
 from tensorflow.python.training import slot_creator
 from tensorflow.python.training.checkpointable import base as checkpointable
diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py
index 37894a3d3d..75b10222ed 100644
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import contextlib
 
 from tensorflow.python import tf2
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -39,7 +40,6 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.util.tf_export import tf_export
 
 
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 3c2682e4c6..1d3c6e67d7 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -28,6 +28,7 @@ from enum import Enum
 import numpy as np
 import six
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
 from tensorflow.python.framework import dtypes
@@ -60,7 +61,6 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.ops import weights_broadcast_ops
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.tf_export import tf_export
 from tensorflow.tools.docs import doc_controls
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index b26b3cefc8..e6cd52c817 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -24,6 +24,7 @@ import abc
 
 import six
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
@@ -36,7 +37,6 @@ from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import optimizer as optimizer_v1
 from tensorflow.python.util import nest
 
diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index 10466eb573..ee6dbba5ad 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -23,6 +23,7 @@ import six
 from six.moves import zip  # pylint: disable=redefined-builtin
 
 from tensorflow.python import tf2
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.optimizer_v2 import adadelta as adadelta_v2
@@ -38,7 +39,6 @@ from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import optimizer as tf_optimizer_module
 from tensorflow.python.training import training_util
 from tensorflow.python.training.checkpointable import base as checkpointable
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index cb42199011..ec39b1790e 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -34,7 +35,6 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import weights_broadcast_ops
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export
 
diff --git a/tensorflow/python/ops/summary_op_util.py b/tensorflow/python/ops/summary_op_util.py
index c72a9aefc3..93d8d50842 100644
--- a/tensorflow/python/ops/summary_op_util.py
+++ b/tensorflow/python/ops/summary_op_util.py
@@ -21,10 +21,10 @@ from __future__ import print_function
 import contextlib
 import re
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.platform import tf_logging
-from tensorflow.python.training import distribution_strategy_context
 
 
 def collect(val, collections, default_collections):
diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py
index 58166dbb68..99b1f4c0d7 100644
--- a/tensorflow/python/training/checkpoint_utils.py
+++ b/tensorflow/python/training/checkpoint_utils.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import six
 
 from tensorflow.python import pywrap_tensorflow
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -29,7 +30,6 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import saver
 from tensorflow.python.util.tf_export import tf_export
 
diff --git a/tensorflow/python/training/moving_averages.py b/tensorflow/python/training/moving_averages.py
index 8785f9a8e7..72670f0ca3 100644
--- a/tensorflow/python/training/moving_averages.py
+++ b/tensorflow/python/training/moving_averages.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -26,7 +27,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import slot_creator
 from tensorflow.python.util.tf_export import tf_export
 
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index a9508b862a..d9ebdcad1f 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -25,6 +25,7 @@ import abc
 import six
 
 from tensorflow.python.distribute import distribute_lib
+from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
@@ -38,7 +39,6 @@ from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
-from tensorflow.python.training import distribution_strategy_context as distribute_ctx
 from tensorflow.python.training import slot_creator
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import nest
diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py
index d76b22acd8..ecf5a96ed4 100644
--- a/tensorflow/python/training/slot_creator.py
+++ b/tensorflow/python/training/slot_creator.py
@@ -39,13 +39,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import context
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
-from tensorflow.python.training import distribution_strategy_context
 
 
 def _create_slot_var(primary, val, scope, validate_shape, shape, dtype):
diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py
index 501d9bc8d3..cd4590db7f 100644
--- a/tensorflow/python/training/sync_replicas_optimizer.py
+++ b/tensorflow/python/training/sync_replicas_optimizer.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.core.framework import types_pb2
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -27,7 +28,6 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import optimizer
 from tensorflow.python.training import queue_runner
 from tensorflow.python.training import session_manager
-- 
GitLab


From 5bedbfd6211aedadb70b40a8e2729cb792754ff3 Mon Sep 17 00:00:00 2001
From: Dalmo Cirne <dalmo@clarifai.com>
Date: Thu, 6 Dec 2018 15:13:04 -0500
Subject: [PATCH 145/873] Updates from the PR review

`PROTOBUF_SHA` renamed to `PROTOBUF_TAG`. The variable is used to checkout to a specific tag after cloning the protobuf repository. The value is obtained by parsing the protobuf URL in the `workspace.bzl` file.
---
 tensorflow/contrib/makefile/download_dependencies.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/makefile/download_dependencies.sh b/tensorflow/contrib/makefile/download_dependencies.sh
index 6582ba793b..2a5232b476 100755
--- a/tensorflow/contrib/makefile/download_dependencies.sh
+++ b/tensorflow/contrib/makefile/download_dependencies.sh
@@ -35,7 +35,7 @@ NSYNC_URL="$(grep -o 'https://mirror.bazel.build/github.com/google/nsync/.*tar\.
 # These variables contain the GitHub repo and the sha, from `tensorflow/workspace.bzl`,
 # from which to clone it from and checkout to.
 readonly PROTOBUF_REPO="https://github.com/protocolbuffers/protobuf.git"
-readonly PROTOBUF_SHA="$(grep -o 'https://github.com/google/protobuf/archive/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1 | awk '{print substr($0, index($0, "archive") + 8, 40) }')"
+readonly PROTOBUF_TAG="$(grep -o 'https://github.com/protocolbuffers/protobuf/archive/.*tar\.gz' "${BZL_FILE_PATH}" | head -n1 | awk '{print substr($0, index($0, "archive") + 8, index($0, "tar") - index($0, "archive") - 9) }')"
 
 # TODO (yongtang): Replace the following with 'https://mirror.bazel.build/github.com/google/re2/.*tar\.gz' once
 # the archive has been propagated in mirror.bazel.build.
@@ -47,7 +47,7 @@ CUB_URL="$(grep -o 'https.*cub/archive.*zip' "${BZL_FILE_PATH}" | grep -v mirror
 
 # Required for TensorFlow Lite Flex runtime.
 FARMHASH_URL="https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz"
-FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/v1.9.0.tar.gz"
+FLATBUFFERS_URL="https://github.com/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz"
 
 # TODO(petewarden): Some new code in Eigen triggers a clang bug with iOS arm64,
 #                   so work around it by patching the source.
@@ -109,7 +109,7 @@ function clone_repository() {
   cd "${destination_directory}"
 
   if [[ -n "${commit_sha}" ]]; then
-    git checkout "${PROTOBUF_SHA}"
+    git checkout "${PROTOBUF_TAG}"
   fi
 
   git submodule update --init
@@ -131,7 +131,7 @@ download_and_extract "${CUB_URL}" "${DOWNLOADS_DIR}/cub/external/cub_archive"
 download_and_extract "${FARMHASH_URL}" "${DOWNLOADS_DIR}/farmhash"
 download_and_extract "${FLATBUFFERS_URL}" "${DOWNLOADS_DIR}/flatbuffers"
 
-clone_repository "${PROTOBUF_REPO}" "${DOWNLOADS_DIR}/protobuf" "${PROTOBUF_SHA}"
+clone_repository "${PROTOBUF_REPO}" "${DOWNLOADS_DIR}/protobuf" "${PROTOBUF_TAG}"
 
 replace_by_sed 's#static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );#static uint32x4_t p4ui_CONJ_XOR; // = vld1q_u32( conj_XOR_DATA ); - Removed by script#' \
   "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
@@ -140,7 +140,7 @@ replace_by_sed 's#static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );#s
 replace_by_sed 's#static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );#static uint64x2_t p2ul_CONJ_XOR;// = vld1q_u64( p2ul_conj_XOR_DATA ); - Removed by script#' \
   "${DOWNLOADS_DIR}/eigen/Eigen/src/Core/arch/NEON/Complex.h"
 # TODO(satok): Remove this once protobuf/autogen.sh is fixed.
-# replace_by_sed 's#https://googlemock.googlecode.com/files/gmock-1.7.0.zip#http://download.tensorflow.org/deps/gmock-1.7.0.zip#' \
-#   "${DOWNLOADS_DIR}/protobuf/autogen.sh"
+replace_by_sed 's#https://googlemock.googlecode.com/files/gmock-1.7.0.zip#http://download.tensorflow.org/deps/gmock-1.7.0.zip#' \
+  "${DOWNLOADS_DIR}/protobuf/autogen.sh"
 
 echo "download_dependencies.sh completed successfully." >&2
-- 
GitLab


From 2947a2851011782a020d59ef8624d191bef5c279 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 6 Dec 2018 12:00:29 -0800
Subject: [PATCH 146/873] [tf.data] Add documentation for supporting
 `tf.data.experimental.AUTOTUNE` as a value for the `num_parallel_calls`
 argument of `map()`, `interleave()`, and `map_and_batch()`.

PiperOrigin-RevId: 224381264
---
 tensorflow/python/data/experimental/ops/batching.py | 7 ++++---
 tensorflow/python/data/ops/dataset_ops.py           | 8 ++++++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
index 9db9a28dd8..f70a4e0537 100644
--- a/tensorflow/python/data/experimental/ops/batching.py
+++ b/tensorflow/python/data/experimental/ops/batching.py
@@ -626,9 +626,10 @@ def map_and_batch(map_func,
       whether the last batch should be dropped in case its size is smaller than
       desired; the default behavior is not to drop the smaller batch.
     num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
-        representing the number of elements to process in parallel. If not
-        specified, `batch_size * num_parallel_batches` elements will be
-        processed in parallel.
+      representing the number of elements to process in parallel. If not
+      specified, `batch_size * num_parallel_batches` elements will be processed
+      in parallel. If the value `tf.data.experimental.AUTOTUNE` is used, then
+      the number of parallel calls is set dynamically based on available CPU.
 
   Returns:
     A `Dataset` transformation function, which can be passed to
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index a08a2b5787..cd7b1a4a5a 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -947,7 +947,9 @@ class DatasetV2(object):
        `self.output_types`) to another nested structure of tensors.
       num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
         representing the number elements to process in parallel. If not
-        specified, elements will be processed sequentially.
+        specified, elements will be processed sequentially. If the value
+        `tf.data.experimental.AUTOTUNE` is used, then the number of parallel
+        calls is set dynamically based on available CPU.
 
     Returns:
       Dataset: A `Dataset`.
@@ -1058,7 +1060,9 @@ class DatasetV2(object):
       num_parallel_calls: (Optional.) If specified, the implementation creates
         a threadpool, which is used to fetch inputs from cycle elements
         asynchronously and in parallel. The default behavior is to fetch inputs
-        from cycle elements synchronously with no parallelism.
+        from cycle elements synchronously with no parallelism. If the value
+        `tf.data.experimental.AUTOTUNE` is used, then the number of parallel
+        calls is set dynamically based on available CPU.
 
     Returns:
       Dataset: A `Dataset`.
-- 
GitLab


From 51561e0354a1556e848c920cd47a718360dc720d Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Thu, 6 Dec 2018 12:25:27 -0800
Subject: [PATCH 147/873] Use coordinator in distribute coordinator and
 independent_worker tests.

PiperOrigin-RevId: 224385727
---
 .../python/estimator_training_test.py         |  7 ++--
 .../python/multi_worker_test_base.py          | 10 ++++--
 .../distribute/distribute_coordinator.py      | 35 +++++++++++++------
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/estimator_training_test.py b/tensorflow/contrib/distribute/python/estimator_training_test.py
index b369a7fefe..3f55a8a1c8 100644
--- a/tensorflow/contrib/distribute/python/estimator_training_test.py
+++ b/tensorflow/contrib/distribute/python/estimator_training_test.py
@@ -375,11 +375,13 @@ class DistributeCoordinatorIntegrationTest(
     threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn,
                                                  cluster_spec, train_distribute,
                                                  eval_distribute)
+    threads_to_join = []
     for task_type, ts in threads.items():
       if task_type == PS:
         continue
       for t in ts:
-        t.join()
+        threads_to_join.append(t)
+    self.join_independent_workers(threads_to_join)
 
     estimator = self._get_estimator(train_distribute, eval_distribute)
     self._inspect_train_and_eval_events(estimator)
@@ -413,8 +415,7 @@ class DistributeCoordinatorIntegrationTest(
     threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn,
                                                  cluster_spec, train_distribute,
                                                  eval_distribute)
-    threads[WORKER][0].join()
-    threads[EVALUATOR][0].join()
+    self.join_independent_workers([threads[WORKER][0], threads[EVALUATOR][0]])
 
     estimator = self._get_estimator(train_distribute, eval_distribute)
     self._inspect_train_and_eval_events(estimator)
diff --git a/tensorflow/contrib/distribute/python/multi_worker_test_base.py b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
index 147c9b83f8..b05aac431f 100644
--- a/tensorflow/contrib/distribute/python/multi_worker_test_base.py
+++ b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
@@ -40,6 +40,7 @@ from tensorflow.python.client import session
 from tensorflow.python.estimator import run_config
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import coordinator
 from tensorflow.python.training import server_lib
 
 ASSIGNED_PORTS = set()
@@ -360,6 +361,7 @@ class IndependentWorkerTestBase(test.TestCase):
     self._mock_os_env = MockOsEnv()
     self._mock_context = test.mock.patch.object(os, 'environ',
                                                 self._mock_os_env)
+    self._coord = coordinator.Coordinator()
     super(IndependentWorkerTestBase, self).setUp()
     self._mock_context.__enter__()
 
@@ -368,8 +370,9 @@ class IndependentWorkerTestBase(test.TestCase):
     super(IndependentWorkerTestBase, self).tearDown()
 
   def _task_thread(self, task_fn, tf_config, *args, **kwargs):
-    os.environ['TF_CONFIG'] = json.dumps(tf_config)
-    task_fn(*args, **kwargs)
+    with self._coord.stop_on_exception():
+      os.environ['TF_CONFIG'] = json.dumps(tf_config)
+      task_fn(*args, **kwargs)
 
   def _run_task_in_thread(self, task_fn, cluster_spec, task_type, task_id,
                           *args, **kwargs):
@@ -403,3 +406,6 @@ class IndependentWorkerTestBase(test.TestCase):
                                      *args, **kwargs)
         threads[task_type].append(t)
     return threads
+
+  def join_independent_workers(self, worker_threads):
+    self._coord.join(worker_threads)
diff --git a/tensorflow/python/distribute/distribute_coordinator.py b/tensorflow/python/distribute/distribute_coordinator.py
index c0f9b8a1fd..78c995a578 100644
--- a/tensorflow/python/distribute/distribute_coordinator.py
+++ b/tensorflow/python/distribute/distribute_coordinator.py
@@ -29,6 +29,7 @@ from tensorflow.python.client import session
 from tensorflow.python.distribute import distribute_coordinator_context
 from tensorflow.python.distribute import multi_worker_util
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import coordinator
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import server_lib
 
@@ -328,7 +329,8 @@ def _run_single_worker(worker_fn,
                        task_id,
                        session_config,
                        rpc_layer="",
-                       worker_barrier=None):
+                       worker_barrier=None,
+                       coord=None):
   """Runs a single worker by calling `worker_fn` under context."""
   session_config = copy.deepcopy(session_config)
   strategy = copy.deepcopy(strategy)
@@ -350,7 +352,11 @@ def _run_single_worker(worker_fn,
       rpc_layer=rpc_layer,
       worker_barrier=worker_barrier)
   with context:
-    return worker_fn(strategy)
+    if coord:
+      with coord.stop_on_exception():
+        return worker_fn(strategy)
+    else:
+      return worker_fn(strategy)
 
 
 def _split_cluster_for_evaluator(cluster_spec, task_type):
@@ -423,6 +429,7 @@ def _run_std_server(cluster_spec=None,
 def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                               cluster_spec, session_config, rpc_layer):
   """Runs a standalone client for between-graph replication."""
+  coord = coordinator.Coordinator()
   eval_thread = None
   if _TaskType.EVALUATOR in cluster_spec.jobs:
     eval_thread = threading.Thread(
@@ -431,6 +438,7 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
               session_config),
         kwargs={
             "rpc_layer": rpc_layer,
+            "coord": coord,
         })
     eval_thread.start()
 
@@ -444,18 +452,18 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                 session_config),
           kwargs={
               "rpc_layer": rpc_layer,
-              "worker_barrier": worker_barrier
+              "worker_barrier": worker_barrier,
+              "coord": coord,
           })
       t.start()
       threads.append(t)
 
-  # TODO(yuefengz): wrap threads into thread coordinator?
-  for t in threads:
-    t.join()
-
-  # TODO(yuefengz): is it necessary to join eval thread?
   if eval_thread:
-    eval_thread.join()
+    # TODO(yuefengz): is it necessary to join eval thread?
+    threads_to_join = threads + [eval_thread]
+  else:
+    threads_to_join = threads
+  coord.join(threads_to_join)
 
   # TODO(yuefengz): we probably want to return results from all workers?
   return None
@@ -464,6 +472,7 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
 def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                          cluster_spec, session_config, rpc_layer):
   """Runs a standalone client for in-graph replication."""
+  coord = coordinator.Coordinator()
   eval_thread = None
   if _TaskType.EVALUATOR in cluster_spec.jobs:
     eval_thread = threading.Thread(
@@ -472,6 +481,7 @@ def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
               session_config),
         kwargs={
             "rpc_layer": rpc_layer,
+            "coord": coord,
         })
     eval_thread.start()
 
@@ -482,9 +492,12 @@ def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
       None,
       None,
       session_config,
-      rpc_layer=rpc_layer)
+      rpc_layer=rpc_layer,
+      coord=coord)
+
   if eval_thread:
-    eval_thread.join()
+    coord.join([eval_thread])
+
   return worker_result
 
 
-- 
GitLab


From 844dbd8730b7aba7b9f51e7455023070f8953c98 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Thu, 6 Dec 2018 13:01:00 -0800
Subject: [PATCH 148/873] Populate some additional parameters when running
 eager operations.

PiperOrigin-RevId: 224391662
---
 tensorflow/core/common_runtime/eager/context.cc      | 10 ++++++++++
 tensorflow/core/common_runtime/eager/context.h       |  8 ++++++++
 tensorflow/core/common_runtime/eager/execute.cc      |  3 ++-
 .../core/common_runtime/eager/kernel_and_device.cc   | 12 ++++++++++++
 .../core/common_runtime/eager/kernel_and_device.h    | 10 +++++++++-
 tensorflow/core/framework/op_kernel.h                |  2 ++
 tensorflow/python/eager/context.py                   |  8 +++-----
 7 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 583ae64edd..1727c04560 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -15,6 +15,9 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/eager/context.h"
 
+#include "tensorflow/core/common_runtime/collective_executor_mgr.h"
+#include "tensorflow/core/common_runtime/collective_param_resolver_local.h"
+#include "tensorflow/core/common_runtime/device_resolver_local.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/common_runtime/process_util.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -71,6 +74,13 @@ EagerContext::EagerContext(const SessionOptions& opts,
   runner_ = [this](std::function<void()> closure) {
     this->thread_pool_->Schedule(std::move(closure));
   };
+
+  std::unique_ptr<DeviceResolverInterface> drl(
+      new DeviceResolverLocal(local_device_mgr()));
+  std::unique_ptr<ParamResolverInterface> cprl(new CollectiveParamResolverLocal(
+      local_device_mgr(), drl.get(), "/job:localhost/replica:0/task:0"));
+  collective_executor_mgr_.reset(new CollectiveExecutorMgr(
+      opts.config, local_device_mgr(), std::move(drl), std::move(cprl)));
 }
 
 void EagerContext::InitDeviceMapAndAsync() {
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 51109f8f1a..cdef947893 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/eager/eager_client.h"
 #include "tensorflow/core/distributed_runtime/server_lib.h"
 #endif
+#include "tensorflow/core/framework/collective.h"
 #include "tensorflow/core/framework/log_memory.h"
 #include "tensorflow/core/framework/rendezvous.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
@@ -147,6 +148,11 @@ class EagerContext {
   bool LogMemory() { return log_memory_; }
 
   Rendezvous* GetRendezvous() { return rendezvous_; }
+  std::unique_ptr<CollectiveExecutor::Handle> GetCollectiveExecutorHandle() {
+    return std::unique_ptr<CollectiveExecutor::Handle>(
+        new CollectiveExecutor::Handle(
+            collective_executor_mgr_->FindOrCreate(0), true /*inherit_ref*/));
+  }
 
   const tensorflow::DeviceMgr* local_device_mgr() const {
     return (local_device_manager_ != nullptr) ? local_device_manager_.get()
@@ -273,6 +279,8 @@ class EagerContext {
 
   Env* const env_;
 
+  std::unique_ptr<CollectiveExecutorMgrInterface> collective_executor_mgr_;
+
 #ifndef __ANDROID__
   void CloseRemoteContexts();
 
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 5bf7888fad..a6199f2aeb 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -284,7 +284,8 @@ Status EagerLocalExecute(EagerOperation* op,
           "Unable to find a FunctionLibraryRuntime corresponding to device ",
           device->name());
     }
-    kernel = new KernelAndDevice(ctx->GetRendezvous(), ctx->LogMemory());
+    kernel = new KernelAndDevice(ctx->GetRendezvous(), ctx->LogMemory(),
+                                 ctx->GetCollectiveExecutorHandle());
     status = KernelAndDevice::Init(ndef, flr, ctx->runner(), kernel);
     if (!status.ok()) {
       delete kernel;
diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.cc b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
index 192d22dfd5..317e9a1607 100644
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.cc
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.cc
@@ -84,6 +84,15 @@ Status KernelAndDevice::Run(ScopedStepContainer* step_container,
                              tensorflow::HOST_MEMORY);
   }
 
+  gtl::InlinedVector<DeviceContext*, 4> input_device_contexts;
+  for (int i = 0; i < inputs->size(); i++) {
+    DeviceContext* device_context = nullptr;
+    if (device_->tensorflow_gpu_device_info() != nullptr) {
+      device_context = device_->tensorflow_gpu_device_info()->default_context;
+    }
+    input_device_contexts.push_back(device_context);
+  }
+
   OpKernelContext::Params params;
   params.device = device_;
   params.frame_iter = FrameAndIter(0, 0);
@@ -110,6 +119,9 @@ Status KernelAndDevice::Run(ScopedStepContainer* step_container,
   }
 
   params.step_container = step_container;
+  params.collective_executor =
+      collective_executor_ ? collective_executor_->get() : nullptr;
+  params.input_device_contexts = &input_device_contexts;
 
   OpKernelContext context(&params);
 
diff --git a/tensorflow/core/common_runtime/eager/kernel_and_device.h b/tensorflow/core/common_runtime/eager/kernel_and_device.h
index 52dac94ccc..ee430b7fc7 100644
--- a/tensorflow/core/common_runtime/eager/kernel_and_device.h
+++ b/tensorflow/core/common_runtime/eager/kernel_and_device.h
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/cancellation.h"
+#include "tensorflow/core/framework/collective.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/types.h"
@@ -55,10 +56,16 @@ class KernelAndDevice {
                      KernelAndDevice* out);
 
   KernelAndDevice(tensorflow::Rendezvous* rendez, bool log_memory)
+      : KernelAndDevice(rendez, log_memory, nullptr) {}
+
+  KernelAndDevice(
+      tensorflow::Rendezvous* rendez, bool log_memory,
+      std::unique_ptr<CollectiveExecutor::Handle> collective_executor)
       : device_(nullptr),
         flr_(nullptr),
         rendez_(rendez),
-        log_memory_(log_memory) {}
+        log_memory_(log_memory),
+        collective_executor_(std::move(collective_executor)) {}
 
   // TODO(ashankar): Handle list-valued inputs.
   Status Run(std::vector<Tensor>* inputs, std::vector<Tensor>* outputs,
@@ -92,6 +99,7 @@ class KernelAndDevice {
   std::function<void(std::function<void()>)>* runner_;
   std::function<void(std::function<void()>)> default_runner_;
   const bool log_memory_;
+  const std::unique_ptr<CollectiveExecutor::Handle> collective_executor_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 9f4c57e880..19a0c5e5be 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -1527,6 +1527,7 @@ T* OpKernelContext::op_device_context() {
 
 template <typename T>
 T* OpKernelContext::input_device_context(int index) {
+  DCHECK_NE(params_->input_device_contexts, nullptr);
   DCHECK_GE(index, 0);
   DCHECK_LT(index, params_->input_device_contexts->size());
   static_assert(std::is_base_of<DeviceContext, T>::value,
@@ -1535,6 +1536,7 @@ T* OpKernelContext::input_device_context(int index) {
 }
 
 inline DeviceContext* OpKernelContext::input_device_context(int index) {
+  DCHECK_NE(params_->input_device_contexts, nullptr);
   DCHECK_GE(index, 0);
   DCHECK_LT(index, params_->input_device_contexts->size());
   return (*params_->input_device_contexts)[index];
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 2f6b038dda..cbbe5cf49e 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -478,10 +478,6 @@ class Context(object):
     Raises:
       ValueError: If name is not a string or is an invalid device name.
     """
-    devices = self._context_devices
-    if devices is None:
-      self._initialize_handle_and_devices()
-      devices = self._context_devices
     eager_context = self._eager_context
     old_device_name = eager_context.device_name
     old_device_spec = eager_context.device_spec
@@ -502,7 +498,9 @@ class Context(object):
         if old_device_name:
           new_device_spec = copy.copy(old_device_spec)
         else:
-          new_device_spec = pydev.DeviceSpec.from_string(devices[0])
+          self._initialize_handle_and_devices()
+          new_device_spec = pydev.DeviceSpec.from_string(
+              self._context_devices[0])
         new_device_spec.merge_from(device_spec)
       else:
         new_device_spec = pydev.DeviceSpec.from_string("")
-- 
GitLab


From a6e4af0a5a8434898f1ed1e7eedc972c42c285f0 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Thu, 6 Dec 2018 13:04:58 -0800
Subject: [PATCH 149/873] Specify architectures in TFLite
 build_ios_universal_lib.sh

PiperOrigin-RevId: 224392376
---
 .../tools/make/build_ios_universal_lib.sh     | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/tensorflow/lite/tools/make/build_ios_universal_lib.sh b/tensorflow/lite/tools/make/build_ios_universal_lib.sh
index be8064ec7c..8b617ef593 100755
--- a/tensorflow/lite/tools/make/build_ios_universal_lib.sh
+++ b/tensorflow/lite/tools/make/build_ios_universal_lib.sh
@@ -19,31 +19,36 @@ set -e
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR/../../../.."
 
-profiling_args=
+usage() {
+  echo "Usage: $(basename "$0") [-a]"
+  echo "-a [build_arch] build for specified arch comma separate for multiple archs (eg: x86_64 arm64)"
+  echo "  default is [x86_64 armv7 armv7s arm64]"
+  echo "-p enable profiling"
+  exit 1
+}
 
-while getopts "p" opt; do
-  case $opt in
+profiling_args=""
+BUILD_ARCHS="x86_64 armv7 armv7s arm64"
+while getopts "a:p" opt_name; do
+  case "$opt_name" in
+    a) BUILD_ARCHS="${OPTARG}";;
     p) profiling_args='-DGEMMLOWP_PROFILING,-DTFLITE_PROFILING_ENABLED';;
-    *) printf "if you want to enable profiling: pass in [-p]\n"
-      exit 2;;
+    *) usage;;
   esac
 done
-
 shift $(($OPTIND - 1))
+
 # Build library for supported architectures and packs them in a fat binary.
 make_library() {
-    for arch in x86_64 armv7 armv7s arm64
+    LIBS=""
+    for arch in $BUILD_ARCHS
     do
-        make -f tensorflow/lite/tools/make/Makefile TARGET=ios TARGET_ARCH=${arch} EXTRA_CXXFLAGS=$profiling_args \
-        -j 8
+        make -f tensorflow/lite/tools/make/Makefile TARGET=ios TARGET_ARCH=${arch} \
+            EXTRA_CXXFLAGS=$profiling_args -j 8
+        LIBS="${LIBS} tensorflow/lite/tools/make/gen/ios_${arch}/lib/${1}"
     done
     mkdir -p tensorflow/lite/tools/make/gen/lib
-    lipo \
-    tensorflow/lite/tools/make/gen/ios_x86_64/lib/${1} \
-    tensorflow/lite/tools/make/gen/ios_armv7/lib/${1} \
-    tensorflow/lite/tools/make/gen/ios_armv7s/lib/${1} \
-    tensorflow/lite/tools/make/gen/ios_arm64/lib/${1} \
-    -create \
+    lipo $LIBS -create \
     -output tensorflow/lite/tools/make/gen/lib/${1}
 }
 
-- 
GitLab


From e33bca07dec7f59052b3d818ce8eb98703b22603 Mon Sep 17 00:00:00 2001
From: Shining Sun <shiningsun@google.com>
Date: Thu, 6 Dec 2018 13:11:25 -0800
Subject: [PATCH 150/873] When doing validation within fit using distributed
 strategy, do not copy the weight over.

PiperOrigin-RevId: 224393616
---
 .../contrib/distribute/python/keras_test.py   |  6 ---
 .../python/keras/engine/training_arrays.py    | 17 +++++--
 .../keras/engine/training_distributed.py      | 46 ++++++++-----------
 3 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index e530ab6f17..331bf7c496 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1273,12 +1273,6 @@ class TestDistributionStrategyCorrectness(test.TestCase,
         # TODO(b/119257215): use the default one once the flakyness is fixed.
         tolerance = 1e-4
 
-      if (use_validation_data and
-          not isinstance(distribution, tpu_strategy.TPUStrategy)):
-        # TODO(b/120435565): Enable tests with use_validation_data once the
-        # the underlying bug is fixed.
-        return
-
       keras.backend.set_image_data_format('channels_last')
       np.random.seed(_RANDOM_SEED)
       random_seed.set_random_seed(_RANDOM_SEED)
diff --git a/tensorflow/python/keras/engine/training_arrays.py b/tensorflow/python/keras/engine/training_arrays.py
index e9dfbcbcc0..26a809d298 100644
--- a/tensorflow/python/keras/engine/training_arrays.py
+++ b/tensorflow/python/keras/engine/training_arrays.py
@@ -138,6 +138,7 @@ def model_iteration(model,
                     steps_per_epoch=None,
                     validation_steps=None,
                     mode='train',
+                    validation_in_fit=False,
                     **kwargs):
   """Loop function for arrays of data with modes 'train'/'test'/'predict'.
 
@@ -164,6 +165,9 @@ def model_iteration(model,
       validation_steps: Number of steps to run validation for (only if doing
         validation from data tensors). Ignored with the default value of `None`.
       mode: One of 'train'/'test'/'predict'.
+      validation_in_fit: if true, then this method is invoked from within
+        training iteration (for validation). In this case, do not copy weights
+        when using a tf.distribute.Strategy.
       **kwargs: Additional arguments for backwards compatibility.
 
   Returns:
@@ -230,8 +234,9 @@ def model_iteration(model,
     aggregator = training_utils.MetricsAggregator(use_steps,
                                                   num_samples_or_steps)
 
-  if model._distribution_strategy:
-    training_distributed._copy_weights_to_distributed_model(model)
+  if model._distribution_strategy and not validation_in_fit:
+    training_distributed._copy_weights_to_distributed_model(
+        model, model._grouped_model)
 
   callbacks.model.stop_training = False
   callbacks._call_begin_hook(mode)
@@ -356,7 +361,8 @@ def model_iteration(model,
           steps_per_epoch=validation_steps,
           callbacks=callbacks,
           verbose=0,
-          mode='test')
+          mode='test',
+          validation_in_fit=True)
       if not isinstance(val_results, list):
         val_results = [val_results]
       epoch_logs.update(
@@ -367,7 +373,10 @@ def model_iteration(model,
   callbacks._call_end_hook(mode)
 
   if model._distribution_strategy:
-    training_distributed._copy_weights_to_original_model(model, mode)
+    if not validation_in_fit:
+      training_distributed._copy_weights_to_original_model(
+          model, model._grouped_model, mode)
+
     scope.__exit__(None, None, None)
 
   if mode == 'train':
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 473f06ded7..9ca5082673 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -163,11 +163,9 @@ def experimental_fit_loop(model,
   do_validation = bool(validation_steps)
 
   # Copy the weights from the original model to each of the replicated models.
-  orig_model_weights = model.get_weights()
   with current_strategy.scope():
-    distributed_model = current_strategy.unwrap(model._grouped_model_train)[0]
-    distributed_training_utils.set_weights(
-        current_strategy, distributed_model, orig_model_weights)
+    _copy_weights_to_distributed_model(model, model._grouped_model_train)
+
   callbacks = cbks.configure_callbacks(
       callbacks,
       model,
@@ -217,9 +215,8 @@ def experimental_fit_loop(model,
       # Since we create a new clone from the original model we need to copy
       # the weights back to the original model before we can run validation.
       with current_strategy.scope():
-        updated_weights = current_strategy.unwrap(
-            model._grouped_model_train)[0].get_weights()
-        model.set_weights(updated_weights)
+        _copy_weights_to_original_model(model, model._grouped_model_train,
+                                        'train')
 
       val_outs = experimental_test_loop(  # pylint: disable=undefined-variable
           model,
@@ -240,9 +237,7 @@ def experimental_fit_loop(model,
 
   # Copy the weights back from the replicated model to the original model.
   with current_strategy.scope():
-    updated_weights = current_strategy.unwrap(
-        model._grouped_model_train)[0].get_weights()
-    model.set_weights(updated_weights)
+    _copy_weights_to_original_model(model, model._grouped_model_train, 'train')
 
   K.get_session().run(current_strategy.finalize())
   return model.history
@@ -345,11 +340,8 @@ def experimental_test_loop(model,
     progbar = Progbar(target=steps)
 
   # Copy the weights from the original model to each of the replicated models.
-  orig_model_weights = model.get_weights()
   with current_strategy.scope():
-    distributed_model = current_strategy.unwrap(model._grouped_model_test)[0]
-    distributed_training_utils.set_weights(
-        current_strategy, distributed_model, orig_model_weights)
+    _copy_weights_to_distributed_model(model, model._grouped_model_test)
 
   assert steps is not None
   outs = [0.] * len(model.metrics_names)
@@ -455,11 +447,8 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
     progbar = Progbar(target=steps)
 
   # Copy the weights from the original model to each of the replicated models.
-  orig_model_weights = model.get_weights()
   with current_strategy.scope():
-    distributed_model = current_strategy.unwrap(model._grouped_model_predict)[0]
-    distributed_training_utils.set_weights(
-        current_strategy, distributed_model, orig_model_weights)
+    _copy_weights_to_distributed_model(model, model._grouped_model_predict)
 
   assert steps is not None
   # Since we do not know how many samples we will see, we cannot pre-allocate
@@ -695,22 +684,23 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode):
   return ins
 
 
-def _copy_weights_to_distributed_model(model):
+def _copy_weights_to_distributed_model(original_model, grouped_model):
   """Copies weights from original model to distributed models."""
-  if model._distribution_strategy:
-    # Copy the weights from the original model to each of the replicated models.
-    orig_model_weights = model.get_weights()
-    distributed_model = model._distribution_strategy.unwrap(
-        model._grouped_model)[0]
-    distributed_training_utils.set_weights(
-        model._distribution_strategy, distributed_model, orig_model_weights)
+  strategy = original_model._distribution_strategy
+  if strategy:
+    # Copy the weights from the original model to each of the replicated
+    # models.
+    orig_model_weights = original_model.get_weights()
+    distributed_model = strategy.unwrap(grouped_model)[0]
+    distributed_training_utils.set_weights(strategy, distributed_model,
+                                           orig_model_weights)
 
 
-def _copy_weights_to_original_model(model, mode):
+def _copy_weights_to_original_model(model, grouped_model, mode):
   """Copies weights from first distributed model back to original model."""
   if model._distribution_strategy and mode == 'train':
     updated_weights = model._distribution_strategy.unwrap(
-        model._grouped_model)[0].get_weights()
+        grouped_model)[0].get_weights()
     model.set_weights(updated_weights)
 
 
-- 
GitLab


From 7a9363e16590c3a713ce06dbf5857149b460b8aa Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Thu, 6 Dec 2018 13:17:22 -0800
Subject: [PATCH 151/873] Update model for classification app

- Use model referred from our models.md list.
- Download both float/quant

PiperOrigin-RevId: 224394551
---
 tensorflow/lite/examples/android/BUILD        |  2 +-
 .../android/app/download-models.gradle        | 27 ++++++----
 .../tensorflow/demo/ClassifierActivity.java   |  2 +-
 tensorflow/lite/java/demo/app/build.gradle    | 53 +++++++++++++++----
 tensorflow/lite/java/demo/app/src/main/BUILD  |  3 +-
 .../ImageClassifierQuantizedMobileNet.java    |  5 +-
 tensorflow/opensource_only.files              |  2 +
 tensorflow/workspace.bzl                      | 20 +++++--
 third_party/tflite_mobilenet_float.BUILD      | 12 +++++
 third_party/tflite_mobilenet_quant.BUILD      | 12 +++++
 10 files changed, 108 insertions(+), 30 deletions(-)
 create mode 100644 third_party/tflite_mobilenet_float.BUILD
 create mode 100644 third_party/tflite_mobilenet_quant.BUILD

diff --git a/tensorflow/lite/examples/android/BUILD b/tensorflow/lite/examples/android/BUILD
index 761a60314e..80cefd415a 100644
--- a/tensorflow/lite/examples/android/BUILD
+++ b/tensorflow/lite/examples/android/BUILD
@@ -34,7 +34,7 @@ android_binary(
     # to reduce APK size.
     assets = [
         "//tensorflow/lite/examples/android/app/src/main/assets:labels_mobilenet_quant_v1_224.txt",
-        "@tflite_mobilenet//:mobilenet_quant_v1_224.tflite",
+        "@tflite_mobilenet_quant//:mobilenet_v1_1.0_224_quant.tflite",
         "@tflite_conv_actions_frozen//:conv_actions_frozen.tflite",
         "//tensorflow/lite/examples/android/app/src/main/assets:conv_actions_labels.txt",
         "@tflite_mobilenet_ssd//:mobilenet_ssd.tflite",
diff --git a/tensorflow/lite/examples/android/app/download-models.gradle b/tensorflow/lite/examples/android/app/download-models.gradle
index d2f03db5f6..36bd177a1f 100644
--- a/tensorflow/lite/examples/android/app/download-models.gradle
+++ b/tensorflow/lite/examples/android/app/download-models.gradle
@@ -8,13 +8,12 @@
  *     3 model files will be downloaded into given folder of ext.ASSET_DIR
  */
 // hard coded model files
-// LINT.IfChange
 
-def models = ['conv_actions_tflite.zip',
-              'mobilenet_ssd_tflite_v1.zip',
-              'mobilenet_v1_224_android_quant_2017_11_08.zip',
-              'coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip']
-// LINT.ThenChange(//tensorflow/lite/examples/android/BUILD)
+def models = ['https://storage.googleapis.com/download.tensorflow.org/models/tflite/conv_actions_tflite.zip',
+              'https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_ssd_tflite_v1.zip',
+              'https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip',
+              'http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz',
+              'http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz']
 
 // Root URL for model archives
 def MODEL_URL = 'https://storage.googleapis.com/download.tensorflow.org/models/tflite'
@@ -30,9 +29,9 @@ buildscript {
 
 import de.undercouch.gradle.tasks.download.Download
 task downloadFile(type: Download){
-    for (f in models) {
-        def modelUrl = MODEL_URL + "/" + f
-        println "Downloading ${f} from ${modelUrl}"
+    for (modelUrl in models) {
+        def localFile = modelUrl.split("/")[-1]
+        println "Downloading ${localFile} from ${modelUrl}"
         src modelUrl
     }
 
@@ -43,7 +42,12 @@ task downloadFile(type: Download){
 task extractModels(type: Copy) {
     for (f in models) {
         def localFile = f.split("/")[-1]
-        from zipTree(project.ext.TMP_DIR + '/' + localFile)
+        def localExt = localFile.split("[.]")[-1]
+        if (localExt == "tgz") {
+            from tarTree(project.ext.TMP_DIR + '/' + localFile)
+        } else {
+            from zipTree(project.ext.TMP_DIR + '/' + localFile)
+        }
     }
 
     into file(project.ext.ASSET_DIR)
@@ -63,6 +67,9 @@ task extractModels(type: Copy) {
     }
 }
 
+
+
+
 tasks.whenTaskAdded { task ->
     if (task.name == 'assembleDebug') {
         task.dependsOn 'extractModels'
diff --git a/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/ClassifierActivity.java b/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/ClassifierActivity.java
index dcbbefbeab..698251d8b4 100644
--- a/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/ClassifierActivity.java
+++ b/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/ClassifierActivity.java
@@ -65,7 +65,7 @@ public class ClassifierActivity extends CameraActivity implements OnImageAvailab
   // --input_binary=true
   private static final int INPUT_SIZE = 224;
 
-  private static final String MODEL_FILE = "mobilenet_quant_v1_224.tflite";
+  private static final String MODEL_FILE = "mobilenet_v1_1.0_224_quant.tflite";
   private static final String LABEL_FILE = "labels_mobilenet_quant_v1_224.txt";
 
   private static final boolean MAINTAIN_ASPECT = true;
diff --git a/tensorflow/lite/java/demo/app/build.gradle b/tensorflow/lite/java/demo/app/build.gradle
index 05301ebf88..5e50ed4b94 100644
--- a/tensorflow/lite/java/demo/app/build.gradle
+++ b/tensorflow/lite/java/demo/app/build.gradle
@@ -52,28 +52,60 @@ dependencies {
     compile 'org.tensorflow:tensorflow-lite:0.0.0-nightly'
 }
 
-def modelDownloadUrl = "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip"
-def localCache = "build/intermediates/mobilenet_v1_224_android_quant_2017_11_08.zip"
 def targetFolder = "src/main/assets"
+def modelFloatDownloadUrl = "http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/mobilenet_v1_1.0_224.tgz"
+def modelQuantDownloadUrl = "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz"
+def localCacheFloat = "build/intermediates/mobilenet_v1_1.0_224.tgz"
+def localCacheQuant = "build/intermediates/mmobilenet_v1_1.0_224_quant.tgz"
 
-task downloadModel(type: DownloadUrlTask) {
+
+task downloadModelFloat(type: DownloadUrlTask) {
+    doFirst {
+        println "Downloading ${modelFloatDownloadUrl}"
+    }
+    sourceUrl = "${modelFloatDownloadUrl}"
+    target = file("${localCacheFloat}")
+}
+
+task downloadModelQuant(type: DownloadUrlTask) {
     doFirst {
-        println "Downloading ${modelDownloadUrl}"
+        println "Downloading ${modelQuantDownloadUrl}"
     }
-    sourceUrl = "${modelDownloadUrl}"
-    target = file("${localCache}")
+    sourceUrl = "${modelQuantDownloadUrl}"
+    target = file("${localCacheQuant}")
 }
 
-task unzipModel(type: Copy, dependsOn: 'downloadModel') {
+task unzipModelFloat(type: Copy, dependsOn: 'downloadModelFloat') {
     doFirst {
-        println "Unzipping ${localCache}"
+        println "Unzipping ${localCacheFloat}"
     }
-    from zipTree("${localCache}")
+    from tarTree("${localCacheFloat}")
     into "${targetFolder}"
 }
 
+task unzipModelQuant(type: Copy, dependsOn: 'downloadModelQuant') {
+    doFirst {
+        println "Unzipping ${localCacheQuant}"
+    }
+    from tarTree("${localCacheQuant}")
+    into "${targetFolder}"
+}
+
+task cleanUnusedFiles(type: Delete, dependsOn: ['unzipModelFloat', 'unzipModelQuant']) {
+    delete fileTree("${targetFolder}").matching {
+        include "*.pb"
+        include "*.ckpt.*"
+        include "*.pbtxt.*"
+        include "*.quant_info.*"
+        include "*.meta"
+    }
+}
+
+
 // Ensure the model file is downloaded and extracted before every build
-preBuild.dependsOn unzipModel
+preBuild.dependsOn unzipModelFloat
+preBuild.dependsOn unzipModelQuant
+preBuild.dependsOn cleanUnusedFiles
 
 class DownloadUrlTask extends DefaultTask {
     @Input
@@ -87,3 +119,4 @@ class DownloadUrlTask extends DefaultTask {
         ant.get(src: sourceUrl, dest: target)
     }
 }
+
diff --git a/tensorflow/lite/java/demo/app/src/main/BUILD b/tensorflow/lite/java/demo/app/src/main/BUILD
index df8a024a57..9a7c1d0b61 100644
--- a/tensorflow/lite/java/demo/app/src/main/BUILD
+++ b/tensorflow/lite/java/demo/app/src/main/BUILD
@@ -10,7 +10,8 @@ android_binary(
     aapt_version = "aapt",
     assets = [
         "//tensorflow/lite/java/demo/app/src/main/assets:labels_mobilenet_quant_v1_224.txt",
-        "@tflite_mobilenet//:mobilenet_quant_v1_224.tflite",
+        "@tflite_mobilenet_quant//:mobilenet_v1_1.0_224_quant.tflite",
+        "@tflite_mobilenet_float//:mobilenet_v1_1.0_224.tflite",
     ],
     assets_dir = "",
     custom_package = "com.example.android.tflitecamerademo",
diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java
index e164ac7554..6310a56168 100644
--- a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java
+++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java
@@ -42,8 +42,9 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier {
   @Override
   protected String getModelPath() {
     // you can download this file from
-    // https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip
-    return "mobilenet_quant_v1_224.tflite";
+    // see build.gradle for where to obtain this file. It should be auto
+    // downloaded into assets.
+    return "mobilenet_v1_1.0_224_quant.tflite";
   }
 
   @Override
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 0c29ac6a30..688a837dac 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -173,6 +173,7 @@ tensorflow/third_party/common.bzl
 tensorflow/third_party/com_google_absl.BUILD
 tensorflow/third_party/pprof.BUILD
 tensorflow/third_party/BUILD
+tensorflow/third_party/tflite_mobilenet_quant.BUILD
 tensorflow/third_party/lmdb.BUILD
 tensorflow/third_party/git/BUILD.tpl
 tensorflow/third_party/git/BUILD
@@ -198,6 +199,7 @@ tensorflow/third_party/nanopb.BUILD
 tensorflow/third_party/gif.BUILD
 tensorflow/third_party/double_conversion.BUILD
 tensorflow/third_party/six.BUILD
+tensorflow/third_party/tflite_mobilenet_float.BUILD
 tensorflow/third_party/repo.bzl
 tensorflow/third_party/codegen.BUILD
 tensorflow/third_party/cub.BUILD
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index e6b4a89e3b..f475493446 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -734,12 +734,22 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     )
 
     tf_http_archive(
-        name = "tflite_mobilenet",
-        build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"),
-        sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b",
+        name = "tflite_mobilenet_float",
+        build_file = clean_dep("//third_party:tflite_mobilenet_float.BUILD"),
+        sha256 = "2fadeabb9968ec6833bee903900dda6e61b3947200535874ce2fe42a8493abc0",
         urls = [
-            "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
-            "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip",
+            "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz",
+            "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz",
+        ],
+    )
+
+    tf_http_archive(
+        name = "tflite_mobilenet_quant",
+        build_file = clean_dep("//third_party:tflite_mobilenet_quant.BUILD"),
+        sha256 = "d32432d28673a936b2d6281ab0600c71cf7226dfe4cdcef3012555f691744166",
+        urls = [
+            "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
+            "http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
         ],
     )
 
diff --git a/third_party/tflite_mobilenet_float.BUILD b/third_party/tflite_mobilenet_float.BUILD
new file mode 100644
index 0000000000..de47ed61f9
--- /dev/null
+++ b/third_party/tflite_mobilenet_float.BUILD
@@ -0,0 +1,12 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(
+    glob(
+        ["**/*"],
+        exclude = [
+            "BUILD",
+        ],
+    ),
+)
diff --git a/third_party/tflite_mobilenet_quant.BUILD b/third_party/tflite_mobilenet_quant.BUILD
new file mode 100644
index 0000000000..de47ed61f9
--- /dev/null
+++ b/third_party/tflite_mobilenet_quant.BUILD
@@ -0,0 +1,12 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(
+    glob(
+        ["**/*"],
+        exclude = [
+            "BUILD",
+        ],
+    ),
+)
-- 
GitLab


From 3c257d337439f863759d1884c88d17c2b0d685b5 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Thu, 6 Dec 2018 13:31:57 -0800
Subject: [PATCH 152/873] [tf.data] Export `tf.data.experimental.Structure` and
 its subclasses.

These classes are most useful when implementing the
`tf.data.Dataset._element_structure` property in `Dataset`
subclasses. This change enables users to implement `Dataset`
subclasses without depending on an internal module.

PiperOrigin-RevId: 224397031
---
 .../python/data/experimental/__init__.py      | 10 ++++++++++
 tensorflow/python/data/ops/BUILD              |  1 +
 tensorflow/python/data/ops/dataset_ops.py     |  1 +
 tensorflow/python/data/ops/optional_ops.py    |  2 ++
 tensorflow/python/data/util/structure.py      |  4 ++++
 ...data.experimental.-dataset-structure.pbtxt | 18 +++++++++++++++++
 ...ata.experimental.-optional-structure.pbtxt | 18 +++++++++++++++++
 ...xperimental.-sparse-tensor-structure.pbtxt | 18 +++++++++++++++++
 ...sorflow.data.experimental.-structure.pbtxt | 16 +++++++++++++++
 ....data.experimental.-tensor-structure.pbtxt | 18 +++++++++++++++++
 .../v1/tensorflow.data.experimental.pbtxt     | 20 +++++++++++++++++++
 ...data.experimental.-dataset-structure.pbtxt | 18 +++++++++++++++++
 ...ata.experimental.-optional-structure.pbtxt | 18 +++++++++++++++++
 ...xperimental.-sparse-tensor-structure.pbtxt | 18 +++++++++++++++++
 ...sorflow.data.experimental.-structure.pbtxt | 16 +++++++++++++++
 ....data.experimental.-tensor-structure.pbtxt | 18 +++++++++++++++++
 .../v2/tensorflow.data.experimental.pbtxt     | 20 +++++++++++++++++++
 17 files changed, 234 insertions(+)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-dataset-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-tensor-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-dataset-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-tensor-structure.pbtxt

diff --git a/tensorflow/python/data/experimental/__init__.py b/tensorflow/python/data/experimental/__init__.py
index 14dfec37cd..365c53405c 100644
--- a/tensorflow/python/data/experimental/__init__.py
+++ b/tensorflow/python/data/experimental/__init__.py
@@ -25,14 +25,19 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
 @@Counter
 @@CheckpointInputPipelineHook
 @@CsvDataset
+@@DatasetStructure
 @@OptimizationOptions
 @@Optional
+@@OptionalStructure
 @@RandomDataset
 @@Reducer
+@@SparseTensorStructure
 @@SqlDataset
 @@StatsAggregator
 @@StatsOptions
+@@Structure
 @@TFRecordWriter
+@@TensorStructure
 @@ThreadingOptions
 
 @@bucket_by_sequence_length
@@ -112,8 +117,13 @@ from tensorflow.python.data.experimental.ops.stats_options import StatsOptions
 from tensorflow.python.data.experimental.ops.threading_options import ThreadingOptions
 from tensorflow.python.data.experimental.ops.unique import unique
 from tensorflow.python.data.experimental.ops.writers import TFRecordWriter
+from tensorflow.python.data.ops.dataset_ops import DatasetStructure
 from tensorflow.python.data.ops.iterator_ops import get_next_as_optional
 from tensorflow.python.data.ops.optional_ops import Optional
+from tensorflow.python.data.ops.optional_ops import OptionalStructure
+from tensorflow.python.data.util.structure import SparseTensorStructure
+from tensorflow.python.data.util.structure import Structure
+from tensorflow.python.data.util.structure import TensorStructure
 # pylint: enable=unused-import
 
 from tensorflow.python.util.all_util import remove_undocumented
diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index 45d65956f3..fbff7df9c3 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@@ -87,6 +87,7 @@ py_library(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:resource_variable_ops",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
         "//tensorflow/python/data/util:structure",
     ],
 )
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index cd7b1a4a5a..a7cd2f54c7 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1915,6 +1915,7 @@ class _VariantDataset(DatasetV2):
     return self._structure
 
 
+@tf_export("data.experimental.DatasetStructure")
 class DatasetStructure(structure_lib.Structure):
   """Represents a `Dataset` of structured values."""
 
diff --git a/tensorflow/python/data/ops/optional_ops.py b/tensorflow/python/data/ops/optional_ops.py
index 15ec755c67..66011d8518 100644
--- a/tensorflow/python/data/ops/optional_ops.py
+++ b/tensorflow/python/data/ops/optional_ops.py
@@ -26,6 +26,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
 @six.add_metaclass(abc.ABCMeta)
@@ -145,6 +146,7 @@ class _OptionalImpl(Optional):
     return self._value_structure
 
 
+@tf_export("data.experimental.OptionalStructure")
 class OptionalStructure(structure.Structure):
   """Represents an optional potentially containing a structured value."""
 
diff --git a/tensorflow/python/data/util/structure.py b/tensorflow/python/data/util/structure.py
index 92450abda8..4f2c6cd853 100644
--- a/tensorflow/python/data/util/structure.py
+++ b/tensorflow/python/data/util/structure.py
@@ -28,11 +28,13 @@ from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
 _STRUCTURE_CONVERSION_FUNCTION_REGISTRY = {}
 
 
+@tf_export("data.experimental.Structure")
 @six.add_metaclass(abc.ABCMeta)
 class Structure(object):
   """Represents structural information, such as type and shape, about a value.
@@ -376,6 +378,7 @@ class NestedStructure(Structure):
         lambda s: s._batch(batch_size), self._nested_structure))
 
 
+@tf_export("data.experimental.TensorStructure")
 class TensorStructure(Structure):
   """Represents structural information about a `tf.Tensor`."""
 
@@ -439,6 +442,7 @@ class TensorStructure(Structure):
         tensor_shape.TensorShape([batch_size]).concatenate(self._shape))
 
 
+@tf_export("data.experimental.SparseTensorStructure")
 class SparseTensorStructure(Structure):
   """Represents structural information about a `tf.SparseTensor`."""
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-dataset-structure.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-dataset-structure.pbtxt
new file mode 100644
index 0000000000..dcb304f763
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-dataset-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.DatasetStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'element_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional-structure.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional-structure.pbtxt
new file mode 100644
index 0000000000..bf41c1d1d6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optional-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.OptionalStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.optional_ops.OptionalStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'value_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt
new file mode 100644
index 0000000000..f97376b328
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.SparseTensorStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.SparseTensorStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'dense_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-structure.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-structure.pbtxt
new file mode 100644
index 0000000000..a99db4542e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-structure.pbtxt
@@ -0,0 +1,16 @@
+path: "tensorflow.data.experimental.Structure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-tensor-structure.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-tensor-structure.pbtxt
new file mode 100644
index 0000000000..f5c8864a9d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-tensor-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.TensorStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.TensorStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
index f981b1af17..234507e5de 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "CsvDataset"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "DatasetStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "INFINITE_CARDINALITY"
     mtype: "<type \'int\'>"
@@ -24,6 +28,10 @@ tf_module {
     name: "Optional"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "OptionalStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RandomDataset"
     mtype: "<type \'type\'>"
@@ -32,6 +40,10 @@ tf_module {
     name: "Reducer"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SparseTensorStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "SqlDataset"
     mtype: "<type \'type\'>"
@@ -44,10 +56,18 @@ tf_module {
     name: "StatsOptions"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Structure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TFRecordWriter"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "TensorStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "ThreadingOptions"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-dataset-structure.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-dataset-structure.pbtxt
new file mode 100644
index 0000000000..dcb304f763
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-dataset-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.DatasetStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.dataset_ops.DatasetStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'element_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional-structure.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional-structure.pbtxt
new file mode 100644
index 0000000000..bf41c1d1d6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optional-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.OptionalStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.ops.optional_ops.OptionalStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'value_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt
new file mode 100644
index 0000000000..f97376b328
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-sparse-tensor-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.SparseTensorStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.SparseTensorStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'dense_shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-structure.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-structure.pbtxt
new file mode 100644
index 0000000000..a99db4542e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-structure.pbtxt
@@ -0,0 +1,16 @@
+path: "tensorflow.data.experimental.Structure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-tensor-structure.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-tensor-structure.pbtxt
new file mode 100644
index 0000000000..f5c8864a9d
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-tensor-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.TensorStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.TensorStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtype\', \'shape\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
index f981b1af17..234507e5de 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -12,6 +12,10 @@ tf_module {
     name: "CsvDataset"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "DatasetStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "INFINITE_CARDINALITY"
     mtype: "<type \'int\'>"
@@ -24,6 +28,10 @@ tf_module {
     name: "Optional"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "OptionalStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RandomDataset"
     mtype: "<type \'type\'>"
@@ -32,6 +40,10 @@ tf_module {
     name: "Reducer"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SparseTensorStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "SqlDataset"
     mtype: "<type \'type\'>"
@@ -44,10 +56,18 @@ tf_module {
     name: "StatsOptions"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Structure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TFRecordWriter"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "TensorStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "ThreadingOptions"
     mtype: "<type \'type\'>"
-- 
GitLab


From 479abd88927e54205ea418f68e64057e5b837e2d Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Wed, 5 Dec 2018 13:58:00 -0800
Subject: [PATCH 153/873] Add subdirectory-output support and change official
 releases

This PR does a few things:

* Add slice option `dockerfile_subdirectory` for
  https://github.com/tensorflow/tensorflow/pull/24180, a slice option that
  places any Dockerfile using the slice into a subdirectory of the
  Dockerfiles directory.
* Rearrange releases and slices slightly. The `devel` images are now
  going to be built nightly, to best support master, and are tagged
  `devel-*`.
---
 tensorflow/tools/dockerfiles/assembler.py     | 27 ++++++++++-------
 ...ockerfile => devel-cpu-jupyter.Dockerfile} |  3 ++
 ...-devel.Dockerfile => devel-cpu.Dockerfile} |  3 ++
 ...ockerfile => devel-gpu-jupyter.Dockerfile} |  4 +++
 ...-devel.Dockerfile => devel-gpu.Dockerfile} |  4 +++
 ...ockerfile => devel-cpu.partial.Dockerfile} |  3 ++
 ...erfile => devel-nvidia.partial.Dockerfile} |  4 +++
 tensorflow/tools/dockerfiles/spec.yml         | 29 ++++++++++++++-----
 8 files changed, 59 insertions(+), 18 deletions(-)
 rename tensorflow/tools/dockerfiles/dockerfiles/{cpu-devel-jupyter.Dockerfile => devel-cpu-jupyter.Dockerfile} (94%)
 rename tensorflow/tools/dockerfiles/dockerfiles/{cpu-devel.Dockerfile => devel-cpu.Dockerfile} (92%)
 rename tensorflow/tools/dockerfiles/dockerfiles/{gpu-devel-jupyter.Dockerfile => devel-gpu-jupyter.Dockerfile} (95%)
 rename tensorflow/tools/dockerfiles/dockerfiles/{gpu-devel.Dockerfile => devel-gpu.Dockerfile} (95%)
 rename tensorflow/tools/dockerfiles/partials/ubuntu/{cpu-devel.partial.Dockerfile => devel-cpu.partial.Dockerfile} (73%)
 rename tensorflow/tools/dockerfiles/partials/ubuntu/{nvidia-devel.partial.Dockerfile => devel-nvidia.partial.Dockerfile} (90%)

diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py
index bd8ed73a41..67a0320241 100644
--- a/tensorflow/tools/dockerfiles/assembler.py
+++ b/tensorflow/tools/dockerfiles/assembler.py
@@ -170,6 +170,8 @@ slice_sets:
              type: string
            dockerfile_exclusive_name:
              type: string
+           dockerfile_subdirectory:
+             type: string
            partials:
              type: list
              schema:
@@ -353,8 +355,9 @@ def gather_slice_list_items(slices, key):
 def find_first_slice_value(slices, key):
   """For a list of slices, get the first value for a certain key."""
   for s in slices:
-    if key in s:
+    if key in s and s[key] is not None:
       return s[key]
+  return None
 
 
 def assemble_tags(spec, cli_args, enabled_releases, all_partials):
@@ -389,6 +392,8 @@ def assemble_tags(spec, cli_args, enabled_releases, all_partials):
         used_partials = gather_slice_list_items(slices, 'partials')
         used_tests = gather_slice_list_items(slices, 'tests')
         test_runtime = find_first_slice_value(slices, 'test_runtime')
+        dockerfile_subdirectory = find_first_slice_value(
+            slices, 'dockerfile_subdirectory')
         dockerfile_contents = merge_partials(spec['header'], used_partials,
                                              all_partials)
 
@@ -398,6 +403,7 @@ def assemble_tags(spec, cli_args, enabled_releases, all_partials):
             'is_dockerfiles': release['is_dockerfiles'],
             'upload_images': release['upload_images'],
             'cli_args': tag_args,
+            'dockerfile_subdirectory': dockerfile_subdirectory or '',
             'partials': used_partials,
             'tests': used_tests,
             'test_runtime': test_runtime,
@@ -416,8 +422,7 @@ def merge_partials(header, used_partials, all_partials):
 def upload_in_background(hub_repository, dock, image, tag):
   """Upload a docker image (to be used by multiprocessing)."""
   image.tag(hub_repository, tag=tag)
-  for line in list(dock.images.push(hub_repository, tag=tag, stream=True)):
-    print(line)
+  print(dock.images.push(hub_repository, tag=tag))
 
 
 def mkdir_p(path):
@@ -525,13 +530,15 @@ def main(argv):
         continue
 
       # Write releases marked "is_dockerfiles" into the Dockerfile directory
-      if FLAGS.construct_dockerfiles:
-        path = os.path.join(FLAGS.dockerfile_dir, tag + '.Dockerfile')
-        if tag_def['is_dockerfiles']:
-          eprint('>> Writing {}...'.format(path))
-          if not FLAGS.dry_run:
-            with open(path, 'w') as f:
-              f.write(tag_def['dockerfile_contents'])
+      if FLAGS.construct_dockerfiles and tag_def['is_dockerfiles']:
+        path = os.path.join(FLAGS.dockerfile_dir,
+                            tag_def['dockerfile_subdirectory'],
+                            tag + '.Dockerfile')
+        eprint('>> Writing {}...'.format(path))
+        if not FLAGS.dry_run:
+          mkdir_p(os.path.dirname(path))
+          with open(path, 'w') as f:
+            f.write(tag_def['dockerfile_contents'])
 
       # Don't build any images for dockerfile-only releases
       if not FLAGS.build_images:
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
similarity index 94%
rename from tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile
rename to tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
index 14ddf08199..43265676f8 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
@@ -46,6 +46,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  
 ENV CI_BUILD_PYTHON python
 
+# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
 
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
similarity index 92%
rename from tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile
rename to tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
index 16973b47af..5c5b2f9163 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
@@ -46,6 +46,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  
 ENV CI_BUILD_PYTHON python
 
+# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
 
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
similarity index 95%
rename from tensorflow/tools/dockerfiles/dockerfiles/gpu-devel-jupyter.Dockerfile
rename to tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
index 9ecaec38c2..8769e4e9cd 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
@@ -79,6 +79,10 @@ ENV TF_CUDNN_VERSION=7
 # NCCL 2.x
 ENV TF_NCCL_VERSION=2
 
+# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
 ARG PYTHON=python${_PY_SUFFIX}
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
similarity index 95%
rename from tensorflow/tools/dockerfiles/dockerfiles/gpu-devel.Dockerfile
rename to tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
index c79bc3cf4c..809cda679e 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
@@ -79,6 +79,10 @@ ENV TF_CUDNN_VERSION=7
 # NCCL 2.x
 ENV TF_NCCL_VERSION=2
 
+# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
+
 ARG USE_PYTHON_3_NOT_2
 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3}
 ARG PYTHON=python${_PY_SUFFIX}
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/cpu-devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
similarity index 73%
rename from tensorflow/tools/dockerfiles/partials/ubuntu/cpu-devel.partial.Dockerfile
rename to tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
index 901652cc28..a61dfbbe54 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/cpu-devel.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
@@ -23,3 +23,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  
 ENV CI_BUILD_PYTHON python
 
+# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia-devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
similarity index 90%
rename from tensorflow/tools/dockerfiles/partials/ubuntu/nvidia-devel.partial.Dockerfile
rename to tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
index 48d457e40c..95f9875012 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia-devel.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
@@ -55,3 +55,7 @@ ENV TF_CUDNN_VERSION=7
 
 # NCCL 2.x
 ENV TF_NCCL_VERSION=2
+
+# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+ARG CHECKOUT_TF_SRC=0
+RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml
index 4826ddd8e2..19d96e7a3d 100644
--- a/tensorflow/tools/dockerfiles/spec.yml
+++ b/tensorflow/tools/dockerfiles/spec.yml
@@ -33,21 +33,28 @@ header: |
 #   - nightly-py3
 #   - nightly-gpu (similar)
 #   - nightly-gpu-py3
-
+#
+# Releases are all treated differently by TensorFlow's CI systems.
 releases:
+    # Built Nightly and pushed to tensorflow/tensorflow
     nightly:
         tag_specs:
             - "{nightly}{py}{jupyter}"
+            - "{ubuntu-devel}{py}"
 
+    # Built per-release and pushed to tensorflow/tensorflow
+    # --arg _TAG_PREFIX=<val> should be set to "1.11" (for example) or "latest".
     versioned:
         tag_specs:
             - "{_TAG_PREFIX}{ubuntu}{py}{jupyter}"
 
-    ubuntu-dockerfiles:
+    # Dockerfiles stored in the TF repo; not pushed anywhere
+    dockerfiles:
         is_dockerfiles: true
         upload_images: false
         tag_specs:
             - "{ubuntu}{jupyter}"
+            - "{ubuntu-devel}{jupyter}"
 
 slice_sets:
 
@@ -87,27 +94,33 @@ slice_sets:
           tests:
               - import-gpu.sh
           test_runtime: nvidia
-        - add_to_name: "-devel"
-          dockerfile_exclusive_name: "cpu-devel"
+
+    ubuntu-devel:
+        - add_to_name: "devel"
+          dockerfile_exclusive_name: "devel-cpu"
           partials:
               - ubuntu/version
-              - ubuntu/cpu-devel
+              - ubuntu/devel-cpu
               - ubuntu/python
               - ubuntu/bazel
               - shell
           tests:
               - build-cpu.sh
-        - add_to_name: "-gpu-devel"
-          dockerfile_exclusive_name: "gpu-devel"
+          args:
+              - CHECKOUT_TF_SRC=1
+        - add_to_name: "devel-gpu"
+          dockerfile_exclusive_name: "devel-gpu"
           partials:
               - ubuntu/version
-              - ubuntu/nvidia-devel
+              - ubuntu/devel-nvidia
               - ubuntu/python
               - ubuntu/bazel
               - shell
           tests:
               - build-gpu.sh
           test_runtime: nvidia
+          args:
+              - CHECKOUT_TF_SRC=1
 
     nightly:
         - add_to_name: "nightly"
-- 
GitLab


From 441471313ca001920987f4f8d3b1ba47e4c73dfc Mon Sep 17 00:00:00 2001
From: Pavithra Vijay <psv@google.com>
Date: Thu, 6 Dec 2018 14:34:02 -0800
Subject: [PATCH 154/873] Add v2 APIs for metrics: `SensitivityAtSpecificity`,
 `SpecificityAtSensitivity`

PiperOrigin-RevId: 224409174
---
 tensorflow/python/keras/metrics.py            |  82 ++++----
 .../tools/api/generator/api_init_files.bzl    |   1 -
 ...metrics.-sensitivity-at-specificity.pbtxt} |   9 +-
 ...metrics.-specificity-at-sensitivity.pbtxt} |   8 +-
 .../golden/v1/tensorflow.keras.metrics.pbtxt  |   8 +
 .../tensorflow.metrics.-binary-accuracy.pbtxt | 194 ------------------
 ...orflow.metrics.-categorical-accuracy.pbtxt | 194 ------------------
 .../golden/v1/tensorflow.metrics.-mean.pbtxt  | 192 -----------------
 .../v1/tensorflow.metrics.-recall.pbtxt       | 192 -----------------
 ...metrics.-sparse-categorical-accuracy.pbtxt | 194 ------------------
 .../tensorflow.metrics.-true-negatives.pbtxt  | 193 -----------------
 .../tensorflow.metrics.-true-positives.pbtxt  | 193 -----------------
 .../api/golden/v1/tensorflow.metrics.pbtxt    |  44 ----
 ...metrics.-sensitivity-at-specificity.pbtxt} |   7 +-
 ...metrics.-specificity-at-sensitivity.pbtxt} |   8 +-
 .../golden/v2/tensorflow.keras.metrics.pbtxt  |   8 +
 .../v2/tensorflow.metrics.-accuracy.pbtxt     | 194 ------------------
 .../tensorflow.metrics.-binary-accuracy.pbtxt | 194 ------------------
 ...orflow.metrics.-categorical-accuracy.pbtxt | 194 ------------------
 .../tensorflow.metrics.-false-negatives.pbtxt | 193 -----------------
 .../tensorflow.metrics.-false-positives.pbtxt | 193 -----------------
 .../golden/v2/tensorflow.metrics.-mean.pbtxt  | 192 -----------------
 .../v2/tensorflow.metrics.-precision.pbtxt    | 192 -----------------
 .../v2/tensorflow.metrics.-recall.pbtxt       | 192 -----------------
 ...metrics.-sparse-categorical-accuracy.pbtxt | 194 ------------------
 .../tensorflow.metrics.-true-negatives.pbtxt  | 193 -----------------
 .../tensorflow.metrics.-true-positives.pbtxt  | 193 -----------------
 .../api/golden/v2/tensorflow.metrics.pbtxt    |  47 -----
 .../tools/api/golden/v2/tensorflow.pbtxt      |   4 -
 29 files changed, 74 insertions(+), 3628 deletions(-)
 rename tensorflow/tools/api/golden/v1/{tensorflow.metrics.-accuracy.pbtxt => tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt} (93%)
 rename tensorflow/tools/api/golden/v1/{tensorflow.metrics.-false-positives.pbtxt => tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt} (93%)
 delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.metrics.-binary-accuracy.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.metrics.-categorical-accuracy.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.metrics.-mean.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.metrics.-recall.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-negatives.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-positives.pbtxt
 rename tensorflow/tools/api/golden/{v1/tensorflow.metrics.-precision.pbtxt => v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt} (93%)
 rename tensorflow/tools/api/golden/{v1/tensorflow.metrics.-false-negatives.pbtxt => v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt} (93%)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-accuracy.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-accuracy.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-accuracy.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-negatives.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-positives.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-precision.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-recall.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-negatives.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-positives.pbtxt
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.metrics.pbtxt

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 1d3c6e67d7..331a8636d1 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -511,7 +511,7 @@ class Metric(Layer):
   ### End: For use by subclasses ###
 
 
-@tf_export('metrics.Mean', 'keras.metrics.Mean')
+@tf_export('keras.metrics.Mean')
 class Mean(Metric):
   """Computes the (weighted) mean of the given values.
 
@@ -528,7 +528,7 @@ class Mean(Metric):
   Usage:
 
   ```python
-  m = tf.metrics.Mean()
+  m = tf.keras.metrics.Mean()
   m.update_state([1, 3, 5, 7])
   print('Final result: ', m.result().numpy())  # Final result: 4.0
   ```
@@ -537,7 +537,7 @@ class Mean(Metric):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.add_metric(metrics_module.Mean(name='mean_1')(outputs))
+  model.add_metric(tf.keras.metrics.Mean(name='mean_1')(outputs))
   model.compile('sgd', loss='mse')
   ```
   """
@@ -651,7 +651,7 @@ class MeanMetricWrapper(Mean):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('metrics.Accuracy', 'keras.metrics.Accuracy')
+@tf_export('keras.metrics.Accuracy')
 class Accuracy(MeanMetricWrapper):
   """Calculates how often predictions matches labels.
 
@@ -670,7 +670,7 @@ class Accuracy(MeanMetricWrapper):
   Usage:
 
   ```python
-  m = tf.metrics.Accuracy()
+  m = tf.keras.metrics.Accuracy()
   m.update_state([1, 2, 3, 4], [0, 2, 3, 4])
   print('Final result: ', m.result().numpy())  # Final result: 0.75
   ```
@@ -679,7 +679,7 @@ class Accuracy(MeanMetricWrapper):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.Accuracy()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Accuracy()])
   ```
   """
 
@@ -693,7 +693,7 @@ class Accuracy(MeanMetricWrapper):
     return super(Accuracy, cls).from_config(config)
 
 
-@tf_export('metrics.BinaryAccuracy', 'keras.metrics.BinaryAccuracy')
+@tf_export('keras.metrics.BinaryAccuracy')
 class BinaryAccuracy(MeanMetricWrapper):
   """Calculates how often predictions matches labels.
 
@@ -712,7 +712,7 @@ class BinaryAccuracy(MeanMetricWrapper):
   Usage:
 
   ```python
-  m = tf.metrics.BinaryAccuracy()
+  m = tf.keras.metrics.BinaryAccuracy()
   m.update_state([1, 1, 0, 0], [0.98, 1, 0, 0.6])
   print('Final result: ', m.result().numpy())  # Final result: 0.75
   ```
@@ -721,7 +721,7 @@ class BinaryAccuracy(MeanMetricWrapper):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.BinaryAccuracy()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.BinaryAccuracy()])
   ```
   """
 
@@ -744,8 +744,7 @@ class BinaryAccuracy(MeanMetricWrapper):
     return super(BinaryAccuracy, cls).from_config(config)
 
 
-@tf_export(
-    'metrics.CategoricalAccuracy', 'keras.metrics.CategoricalAccuracy')
+@tf_export('keras.metrics.CategoricalAccuracy')
 class CategoricalAccuracy(MeanMetricWrapper):
   """Calculates how often predictions matches labels.
 
@@ -768,7 +767,7 @@ class CategoricalAccuracy(MeanMetricWrapper):
   Usage:
 
   ```python
-  m = tf.metrics.CategoricalAccuracy()
+  m = tf.keras.metrics.CategoricalAccuracy()
   m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
   print('Final result: ', m.result().numpy())  # Final result: 0.5
   ```
@@ -777,7 +776,10 @@ class CategoricalAccuracy(MeanMetricWrapper):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.CategoricalAccuracy()])
+  model.compile(
+    'sgd',
+    loss='mse',
+    metrics=[tf.keras.metrics.CategoricalAccuracy()])
   ```
   """
 
@@ -798,9 +800,7 @@ class CategoricalAccuracy(MeanMetricWrapper):
     return super(CategoricalAccuracy, cls).from_config(config)
 
 
-@tf_export(
-    'metrics.SparseCategoricalAccuracy',
-    'keras.metrics.SparseCategoricalAccuracy')
+@tf_export('keras.metrics.SparseCategoricalAccuracy')
 class SparseCategoricalAccuracy(MeanMetricWrapper):
   """Calculates how often predictions matches integer labels.
 
@@ -820,7 +820,7 @@ class SparseCategoricalAccuracy(MeanMetricWrapper):
   Usage:
 
   ```python
-  m = tf.metrics.SparseCategoricalAccuracy()
+  m = tf.keras.metrics.SparseCategoricalAccuracy()
   m.update_state([[2], [1]], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
   print('Final result: ', m.result().numpy())  # Final result: 0.5
   ```
@@ -832,7 +832,7 @@ class SparseCategoricalAccuracy(MeanMetricWrapper):
   model.compile(
       'sgd',
       loss='mse',
-      metrics=[tf.metrics.SparseCategoricalAccuracy()])
+      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
   ```
   """
 
@@ -907,7 +907,7 @@ class _ConfusionMatrixConditionCount(Metric):
       K.set_value(v, np.zeros((num_thresholds,)))
 
 
-@tf_export('metrics.FalsePositives', 'keras.metrics.FalsePositives')
+@tf_export('keras.metrics.FalsePositives')
 class FalsePositives(_ConfusionMatrixConditionCount):
   """Calculates the number of false positives.
 
@@ -925,7 +925,7 @@ class FalsePositives(_ConfusionMatrixConditionCount):
   Usage:
 
   ```python
-  m = tf.metrics.FalsePositives()
+  m = tf.keras.metrics.FalsePositives()
   m.update_state([0, 1, 0, 0], [0, 0, 1, 1])
   print('Final result: ', m.result().numpy())  # Final result: 2
   ```
@@ -934,7 +934,7 @@ class FalsePositives(_ConfusionMatrixConditionCount):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.FalsePositives()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.FalsePositives()])
   ```
   """
 
@@ -957,7 +957,7 @@ class FalsePositives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('metrics.FalseNegatives', 'keras.metrics.FalseNegatives')
+@tf_export('keras.metrics.FalseNegatives')
 class FalseNegatives(_ConfusionMatrixConditionCount):
   """Calculates the number of false negatives.
 
@@ -975,7 +975,7 @@ class FalseNegatives(_ConfusionMatrixConditionCount):
   Usage:
 
   ```python
-  m = tf.metrics.FalseNegatives()
+  m = tf.keras.metrics.FalseNegatives()
   m.update_state([0, 1, 1, 1], [0, 1, 0, 0])
   print('Final result: ', m.result().numpy())  # Final result: 2
   ```
@@ -984,7 +984,7 @@ class FalseNegatives(_ConfusionMatrixConditionCount):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.FalseNegatives()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.FalseNegatives()])
   ```
   """
 
@@ -1007,7 +1007,7 @@ class FalseNegatives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('metrics.TrueNegatives', 'keras.metrics.TrueNegatives')
+@tf_export('keras.metrics.TrueNegatives')
 class TrueNegatives(_ConfusionMatrixConditionCount):
   """Calculates the number of true negatives.
 
@@ -1025,7 +1025,7 @@ class TrueNegatives(_ConfusionMatrixConditionCount):
   Usage:
 
   ```python
-  m = tf.metrics.TrueNegatives()
+  m = tf.keras.metrics.TrueNegatives()
   m.update_state([0, 1, 0, 0], [1, 1, 0, 0])
   print('Final result: ', m.result().numpy())  # Final result: 2
   ```
@@ -1034,7 +1034,7 @@ class TrueNegatives(_ConfusionMatrixConditionCount):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.TrueNegatives()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.TrueNegatives()])
   ```
   """
 
@@ -1057,7 +1057,7 @@ class TrueNegatives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('metrics.TruePositives', 'keras.metrics.TruePositives')
+@tf_export('keras.metrics.TruePositives')
 class TruePositives(_ConfusionMatrixConditionCount):
   """Calculates the number of true positives.
 
@@ -1075,7 +1075,7 @@ class TruePositives(_ConfusionMatrixConditionCount):
   Usage:
 
   ```python
-  m = tf.metrics.TruePositives()
+  m = tf.keras.metrics.TruePositives()
   m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
   print('Final result: ', m.result().numpy())  # Final result: 2
   ```
@@ -1084,7 +1084,7 @@ class TruePositives(_ConfusionMatrixConditionCount):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.TruePositives()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.TruePositives()])
   ```
   """
 
@@ -1107,7 +1107,7 @@ class TruePositives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('metrics.Precision', 'keras.metrics.Precision')
+@tf_export('keras.metrics.Precision')
 class Precision(Metric):
   """Computes the precision of the predictions with respect to the labels.
 
@@ -1126,7 +1126,7 @@ class Precision(Metric):
   Usage:
 
   ```python
-  m = tf.metrics.Precision()
+  m = tf.keras.metrics.Precision()
   m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
   print('Final result: ', m.result().numpy())  # Final result: 0.66
   ```
@@ -1135,7 +1135,7 @@ class Precision(Metric):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.Precision()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Precision()])
   ```
   """
 
@@ -1192,7 +1192,7 @@ class Precision(Metric):
       K.set_value(v, np.zeros((num_thresholds,)))
 
 
-@tf_export('metrics.Recall', 'keras.metrics.Recall')
+@tf_export('keras.metrics.Recall')
 class Recall(Metric):
   """Computes the recall of the predictions with respect to the labels.
 
@@ -1211,7 +1211,7 @@ class Recall(Metric):
   Usage:
 
   ```python
-  m = tf.metrics.Recall()
+  m = tf.keras.metrics.Recall()
   m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
   print('Final result: ', m.result().numpy())  # Final result: 0.66
   ```
@@ -1220,7 +1220,7 @@ class Recall(Metric):
 
   ```python
   model = keras.models.Model(inputs, outputs)
-  model.compile('sgd', loss='mse', metrics=[tf.metrics.Recall()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Recall()])
   ```
   """
 
@@ -1341,6 +1341,7 @@ class SensitivitySpecificityBase(Metric):
       K.set_value(v, np.zeros((num_thresholds,)))
 
 
+@tf_export('keras.metrics.SensitivityAtSpecificity')
 class SensitivityAtSpecificity(SensitivitySpecificityBase):
   """Computes the sensitivity at a given specificity.
 
@@ -1363,7 +1364,7 @@ class SensitivityAtSpecificity(SensitivitySpecificityBase):
   Usage:
 
   ```python
-  m = tf.metrics.SensitivityAtSpecificity(0.4, num_thresholds=1)
+  m = tf.keras.metrics.SensitivityAtSpecificity(0.4, num_thresholds=1)
   m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
   print('Final result: ', m.result().numpy())  # Final result: 0.5
   ```
@@ -1375,7 +1376,7 @@ class SensitivityAtSpecificity(SensitivitySpecificityBase):
   model.compile(
       'sgd',
       loss='mse',
-      metrics=[tf.metrics.SensitivityAtSpecificity()])
+      metrics=[tf.keras.metrics.SensitivityAtSpecificity()])
   ```
   """
 
@@ -1409,6 +1410,7 @@ class SensitivityAtSpecificity(SensitivitySpecificityBase):
                                self.tp[min_index] + self.fn[min_index])
 
 
+@tf_export('keras.metrics.SpecificityAtSensitivity')
 class SpecificityAtSensitivity(SensitivitySpecificityBase):
   """Computes the specificity at a given sensitivity.
 
@@ -1431,7 +1433,7 @@ class SpecificityAtSensitivity(SensitivitySpecificityBase):
   Usage:
 
   ```python
-  m = tf.metrics.SpecificityAtSensitivity(0.8, num_thresholds=1)
+  m = tf.keras.metrics.SpecificityAtSensitivity(0.8, num_thresholds=1)
   m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
   print('Final result: ', m.result().numpy())  # Final result: 1.0
   ```
@@ -1443,7 +1445,7 @@ class SpecificityAtSensitivity(SensitivitySpecificityBase):
   model.compile(
       'sgd',
       loss='mse',
-      metrics=[tf.metrics.SpecificityAtSensitivity()])
+      metrics=[tf.keras.metrics.SpecificityAtSensitivity()])
   ```
   """
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 3517c11cc9..0245ac50a6 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -64,7 +64,6 @@ TENSORFLOW_API_INIT_FILES = [
     "lite/constants/__init__.py",
     "losses/__init__.py",
     "math/__init__.py",
-    "metrics/__init__.py",
     "nn/__init__.py",
     "nn/rnn_cell/__init__.py",
     "quantization/__init__.py",
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
similarity index 93%
rename from tensorflow/tools/api/golden/v1/tensorflow.metrics.-accuracy.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
index f8e12f8817..aa77d1972c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
@@ -1,8 +1,7 @@
-path: "tensorflow.metrics.Accuracy"
+path: "tensorflow.keras.metrics.SensitivityAtSpecificity"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Accuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivityAtSpecificity\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
@@ -85,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'accuracy\', \'None\'], "
+    argspec: "args=[\'self\', \'specificity\', \'num_thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'200\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-false-positives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
similarity index 93%
rename from tensorflow/tools/api/golden/v1/tensorflow.metrics.-false-positives.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
index 9953162ea3..67857aa89f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-false-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
@@ -1,7 +1,7 @@
-path: "tensorflow.metrics.FalsePositives"
+path: "tensorflow.keras.metrics.SpecificityAtSensitivity"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.FalsePositives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SpecificityAtSensitivity\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'sensitivity\', \'num_thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'200\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt
index 8cab17edc5..905021dd79 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.pbtxt
@@ -32,10 +32,18 @@ tf_module {
     name: "Recall"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SensitivityAtSpecificity"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "SparseCategoricalAccuracy"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SpecificityAtSensitivity"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TrueNegatives"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-binary-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.-binary-accuracy.pbtxt
deleted file mode 100644
index b9bc6a716a..0000000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-binary-accuracy.pbtxt
+++ /dev/null
@@ -1,194 +0,0 @@
-path: "tensorflow.metrics.BinaryAccuracy"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.BinaryAccuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\', \'threshold\'], varargs=None, keywords=None, defaults=[\'binary_accuracy\', \'None\', \'0.5\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.-categorical-accuracy.pbtxt
deleted file mode 100644
index 0ef75d8756..0000000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-categorical-accuracy.pbtxt
+++ /dev/null
@@ -1,194 +0,0 @@
-path: "tensorflow.metrics.CategoricalAccuracy"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.CategoricalAccuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'categorical_accuracy\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-mean.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.-mean.pbtxt
deleted file mode 100644
index 7fe6d6fda9..0000000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-mean.pbtxt
+++ /dev/null
@@ -1,192 +0,0 @@
-path: "tensorflow.metrics.Mean"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'values\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-recall.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.-recall.pbtxt
deleted file mode 100644
index 840a68bbc7..0000000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-recall.pbtxt
+++ /dev/null
@@ -1,192 +0,0 @@
-path: "tensorflow.metrics.Recall"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Recall\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt
deleted file mode 100644
index 7bce43fbde..0000000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt
+++ /dev/null
@@ -1,194 +0,0 @@
-path: "tensorflow.metrics.SparseCategoricalAccuracy"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.SparseCategoricalAccuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_accuracy\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-negatives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-negatives.pbtxt
deleted file mode 100644
index 83cd5b736b..0000000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-negatives.pbtxt
+++ /dev/null
@@ -1,193 +0,0 @@
-path: "tensorflow.metrics.TrueNegatives"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.TrueNegatives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-positives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-positives.pbtxt
deleted file mode 100644
index 5b2502eafe..0000000000
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-true-positives.pbtxt
+++ /dev/null
@@ -1,193 +0,0 @@
-path: "tensorflow.metrics.TruePositives"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.TruePositives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.metrics.pbtxt
index f5c267a166..e9b996c9f5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.metrics.pbtxt
@@ -1,49 +1,5 @@
 path: "tensorflow.metrics"
 tf_module {
-  member {
-    name: "Accuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "BinaryAccuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "CategoricalAccuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "FalseNegatives"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "FalsePositives"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "Mean"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "Precision"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "Recall"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "SparseCategoricalAccuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "TrueNegatives"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "TruePositives"
-    mtype: "<type \'type\'>"
-  }
   member_method {
     name: "accuracy"
     argspec: "args=[\'labels\', \'predictions\', \'weights\', \'metrics_collections\', \'updates_collections\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-precision.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
similarity index 93%
rename from tensorflow/tools/api/golden/v1/tensorflow.metrics.-precision.pbtxt
rename to tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
index 8c3271a109..aa77d1972c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-precision.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
@@ -1,6 +1,7 @@
-path: "tensorflow.metrics.Precision"
+path: "tensorflow.keras.metrics.SensitivityAtSpecificity"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Precision\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivityAtSpecificity\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
@@ -83,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'specificity\', \'num_thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'200\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-false-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
similarity index 93%
rename from tensorflow/tools/api/golden/v1/tensorflow.metrics.-false-negatives.pbtxt
rename to tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
index 33226a2df6..67857aa89f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.metrics.-false-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
@@ -1,7 +1,7 @@
-path: "tensorflow.metrics.FalseNegatives"
+path: "tensorflow.keras.metrics.SpecificityAtSensitivity"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.FalseNegatives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SpecificityAtSensitivity\'>"
+  is_instance: "<class \'tensorflow.python.keras.metrics.SensitivitySpecificityBase\'>"
   is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
   is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
@@ -84,7 +84,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'sensitivity\', \'num_thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'200\', \'None\', \'None\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt
index 8cab17edc5..905021dd79 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.pbtxt
@@ -32,10 +32,18 @@ tf_module {
     name: "Recall"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SensitivityAtSpecificity"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "SparseCategoricalAccuracy"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SpecificityAtSensitivity"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TrueNegatives"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-accuracy.pbtxt
deleted file mode 100644
index f8e12f8817..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-accuracy.pbtxt
+++ /dev/null
@@ -1,194 +0,0 @@
-path: "tensorflow.metrics.Accuracy"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Accuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'accuracy\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-accuracy.pbtxt
deleted file mode 100644
index b9bc6a716a..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-binary-accuracy.pbtxt
+++ /dev/null
@@ -1,194 +0,0 @@
-path: "tensorflow.metrics.BinaryAccuracy"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.BinaryAccuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\', \'threshold\'], varargs=None, keywords=None, defaults=[\'binary_accuracy\', \'None\', \'0.5\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-accuracy.pbtxt
deleted file mode 100644
index 0ef75d8756..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-categorical-accuracy.pbtxt
+++ /dev/null
@@ -1,194 +0,0 @@
-path: "tensorflow.metrics.CategoricalAccuracy"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.CategoricalAccuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'categorical_accuracy\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-negatives.pbtxt
deleted file mode 100644
index 33226a2df6..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-negatives.pbtxt
+++ /dev/null
@@ -1,193 +0,0 @@
-path: "tensorflow.metrics.FalseNegatives"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.FalseNegatives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-positives.pbtxt
deleted file mode 100644
index 9953162ea3..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-false-positives.pbtxt
+++ /dev/null
@@ -1,193 +0,0 @@
-path: "tensorflow.metrics.FalsePositives"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.FalsePositives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean.pbtxt
deleted file mode 100644
index 7fe6d6fda9..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-mean.pbtxt
+++ /dev/null
@@ -1,192 +0,0 @@
-path: "tensorflow.metrics.Mean"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'mean\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'values\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-precision.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-precision.pbtxt
deleted file mode 100644
index 8c3271a109..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-precision.pbtxt
+++ /dev/null
@@ -1,192 +0,0 @@
-path: "tensorflow.metrics.Precision"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Precision\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-recall.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-recall.pbtxt
deleted file mode 100644
index 840a68bbc7..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-recall.pbtxt
+++ /dev/null
@@ -1,192 +0,0 @@
-path: "tensorflow.metrics.Recall"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.Recall\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt
deleted file mode 100644
index 7bce43fbde..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-sparse-categorical-accuracy.pbtxt
+++ /dev/null
@@ -1,194 +0,0 @@
-path: "tensorflow.metrics.SparseCategoricalAccuracy"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.SparseCategoricalAccuracy\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.MeanMetricWrapper\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Mean\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'sparse_categorical_accuracy\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-negatives.pbtxt
deleted file mode 100644
index 83cd5b736b..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-negatives.pbtxt
+++ /dev/null
@@ -1,193 +0,0 @@
-path: "tensorflow.metrics.TrueNegatives"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.TrueNegatives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-positives.pbtxt
deleted file mode 100644
index 5b2502eafe..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.-true-positives.pbtxt
+++ /dev/null
@@ -1,193 +0,0 @@
-path: "tensorflow.metrics.TruePositives"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.metrics.TruePositives\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics._ConfusionMatrixConditionCount\'>"
-  is_instance: "<class \'tensorflow.python.keras.metrics.Metric\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'thresholds\', \'name\', \'dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'aggregation\', \'synchronization\', \'initializer\'], varargs=None, keywords=None, defaults=[\'()\', \'VariableAggregation.SUM\', \'VariableSynchronization.ON_READ\', \'None\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "result"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "update_state"
-    argspec: "args=[\'self\', \'y_true\', \'y_pred\', \'sample_weight\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.metrics.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.metrics.pbtxt
deleted file mode 100644
index 773efd03fc..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.metrics.pbtxt
+++ /dev/null
@@ -1,47 +0,0 @@
-path: "tensorflow.metrics"
-tf_module {
-  member {
-    name: "Accuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "BinaryAccuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "CategoricalAccuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "FalseNegatives"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "FalsePositives"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "Mean"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "Precision"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "Recall"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "SparseCategoricalAccuracy"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "TrueNegatives"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "TruePositives"
-    mtype: "<type \'type\'>"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 7b1c96c2e8..542e5cbe9a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -216,10 +216,6 @@ tf_module {
     name: "math"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "metrics"
-    mtype: "<type \'module\'>"
-  }
   member {
     name: "name_scope"
     mtype: "<type \'type\'>"
-- 
GitLab


From 2efcc2905fd6f3c19d73a56a4d17a89d6a691ec3 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Thu, 6 Dec 2018 22:48:52 +0000
Subject: [PATCH 155/873] Replace  shape.ndims with array_ops.rank

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/ops/nn_ops.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 4a36aa1550..a0ffbc85ec 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1699,11 +1699,12 @@ def _softmax(logits, compute_op, dim=-1, name=None):
   # still perform softmax on its last dimension.
 
   # In case dim is negative (and is not last dimension -1), add shape.ndims
+  ndims = array_ops.rank(logits)
   if not isinstance(dim, ops.Tensor):
     if dim < 0:
-      dim += shape.ndims
+      dim += ndims
   else:
-    dim = array_ops.where(math_ops.less(dim, 0), dim + shape.ndims, dim)
+    dim = array_ops.where(math_ops.less(dim, 0), dim + ndims, dim)
 
   # Swap logits' dimension of dim and its last dimension.
   input_rank = array_ops.rank(logits)
-- 
GitLab


From ac7864268b480abf216e437d64c1311c7d1362b5 Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Thu, 6 Dec 2018 14:55:13 -0800
Subject: [PATCH 156/873] Make kInputLayerNormWeightsTensor optional.

PiperOrigin-RevId: 224413157
---
 tensorflow/lite/kernels/layer_norm_lstm.cc    |  41 ++--
 .../lite/kernels/layer_norm_lstm_test.cc      | 223 +++++++++++++++++-
 2 files changed, 243 insertions(+), 21 deletions(-)

diff --git a/tensorflow/lite/kernels/layer_norm_lstm.cc b/tensorflow/lite/kernels/layer_norm_lstm.cc
index 5b0046a7b3..49e8a53c82 100644
--- a/tensorflow/lite/kernels/layer_norm_lstm.cc
+++ b/tensorflow/lite/kernels/layer_norm_lstm.cc
@@ -55,7 +55,7 @@ constexpr int kCellToForgetWeightsTensor = 10;  // Optional
 constexpr int kCellToOutputWeightsTensor = 11;  // Optional
 
 // Layer norm weights tensors of size {n_cell}, representing a diagonal matrix.
-constexpr int kInputLayerNormWeightsTensor = 12;
+constexpr int kInputLayerNormWeightsTensor = 12;  // Optional
 constexpr int kForgetLayerNormWeightsTensor = 13;
 constexpr int kCellLayerNormWeightsTensor = 14;
 constexpr int kOutputLayerNormWeightsTensor = 15;
@@ -118,7 +118,8 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
-  if (input_to_input_weights != nullptr) {
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  if (!use_cifg) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input);
@@ -138,7 +139,9 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor);
-  if (recurrent_to_input_weights != nullptr) {
+  if (use_cifg) {
+    TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights, nullptr);
+  } else {
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0],
                       n_cell);
@@ -161,15 +164,6 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1],
                     n_output);
 
-  // We make sure the input-gate's parameters are either both present (regular
-  // LSTM) or not at all (CIFG-LSTM).
-  const bool cifg_weights_all_or_none =
-      ((input_to_input_weights != nullptr) &&
-       (recurrent_to_input_weights != nullptr)) ||
-      ((input_to_input_weights == nullptr) &&
-       (recurrent_to_input_weights == nullptr));
-  TF_LITE_ENSURE(context, cifg_weights_all_or_none == true);
-
   const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, kCellToInputWeightsTensor);
   if (cell_to_input_weights) {
@@ -192,7 +186,6 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   }
 
   // Making sure the peephole weights are there all or none.
-  const bool use_cifg = (input_to_input_weights == nullptr);
   const bool peephole_weights_all_or_none =
       ((cell_to_input_weights != nullptr || use_cifg) &&
        (cell_to_forget_weights != nullptr) &&
@@ -204,10 +197,14 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   // Making sure layer norm weights are not null and have the right dimension.
   const TfLiteTensor* input_layer_norm_weights =
-      GetInput(context, node, kInputLayerNormWeightsTensor);
-  TF_LITE_ENSURE(context, input_layer_norm_weights != nullptr);
-  TF_LITE_ENSURE_EQ(context, input_layer_norm_weights->dims->size, 1);
-  TF_LITE_ENSURE_EQ(context, input_layer_norm_weights->dims->data[0], n_cell);
+      GetOptionalInputTensor(context, node, kInputLayerNormWeightsTensor);
+  if (use_cifg) {
+    TF_LITE_ENSURE_EQ(context, input_layer_norm_weights, nullptr);
+  } else {
+    TF_LITE_ENSURE(context, input_layer_norm_weights != nullptr);
+    TF_LITE_ENSURE_EQ(context, input_layer_norm_weights->dims->size, 1);
+    TF_LITE_ENSURE_EQ(context, input_layer_norm_weights->dims->data[0], n_cell);
+  }
 
   const TfLiteTensor* forget_layer_norm_weights =
       GetInput(context, node, kForgetLayerNormWeightsTensor);
@@ -978,6 +975,9 @@ TfLiteStatus EvalFloat(
       (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
   const float* projection_bias_ptr =
       (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
+  const float* input_layer_norm_weight_ptr =
+      (input_layer_norm_weights == nullptr) ? nullptr
+                                            : input_layer_norm_weights->data.f;
 
   // Required tensors, pointers are non-null.
   const float* input_ptr_batch = input->data.f;
@@ -990,7 +990,6 @@ TfLiteStatus EvalFloat(
       recurrent_to_cell_weights->data.f;
   const float* recurrent_to_output_weights_ptr =
       recurrent_to_output_weights->data.f;
-  const float* input_layer_norm_weight_ptr = input_layer_norm_weights->data.f;
   const float* forget_layer_norm_weight_ptr = forget_layer_norm_weights->data.f;
   const float* cell_layer_norm_weight_ptr = cell_layer_norm_weights->data.f;
   const float* output_layer_norm_weight_ptr = output_layer_norm_weights->data.f;
@@ -1115,6 +1114,9 @@ TfLiteStatus EvalHybrid(
       (projection_weights == nullptr) ? 1.0f : projection_weights->params.scale;
   const float* projection_bias_ptr =
       (projection_bias == nullptr) ? nullptr : projection_bias->data.f;
+  const float* input_layer_norm_weight_ptr =
+      (input_layer_norm_weights == nullptr) ? nullptr
+                                            : input_layer_norm_weights->data.f;
 
   // Required tensors, pointers are non-null.
   const float* input_ptr_batch = input->data.f;
@@ -1141,7 +1143,6 @@ TfLiteStatus EvalHybrid(
       reinterpret_cast<int8_t*>(recurrent_to_output_weights->data.uint8);
   const float recurrent_to_output_weights_scale =
       recurrent_to_output_weights->params.scale;
-  const float* input_layer_norm_weight_ptr = input_layer_norm_weights->data.f;
   const float* forget_layer_norm_weight_ptr = forget_layer_norm_weights->data.f;
   const float* cell_layer_norm_weight_ptr = cell_layer_norm_weights->data.f;
   const float* output_layer_norm_weight_ptr = output_layer_norm_weights->data.f;
@@ -1221,7 +1222,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
 
   const TfLiteTensor* input_layer_norm_weights =
-      GetInput(context, node, kInputLayerNormWeightsTensor);
+      GetOptionalInputTensor(context, node, kInputLayerNormWeightsTensor);
   const TfLiteTensor* forget_layer_norm_weights =
       GetInput(context, node, kForgetLayerNormWeightsTensor);
   const TfLiteTensor* cell_layer_norm_weights =
diff --git a/tensorflow/lite/kernels/layer_norm_lstm_test.cc b/tensorflow/lite/kernels/layer_norm_lstm_test.cc
index e89bce50c3..1c13cee1c3 100644
--- a/tensorflow/lite/kernels/layer_norm_lstm_test.cc
+++ b/tensorflow/lite/kernels/layer_norm_lstm_test.cc
@@ -83,7 +83,11 @@ class LayerNormLSTMOpModel : public SingleOpModel {
       cell_to_output_weights_ = AddNullInput();
     }
 
-    input_layer_norm_weights_ = AddInput(TensorType_FLOAT32);
+    if (use_cifg) {
+      input_layer_norm_weights_ = AddNullInput();
+    } else {
+      input_layer_norm_weights_ = AddInput(TensorType_FLOAT32);
+    }
     forget_layer_norm_weights_ = AddInput(TensorType_FLOAT32);
     cell_layer_norm_weights_ = AddInput(TensorType_FLOAT32);
     output_layer_norm_weights_ = AddInput(TensorType_FLOAT32);
@@ -650,6 +654,223 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest,
                 &layer_norm_lstm);
 }
 
+class CifgPeepholeProjectionNoClippingLayerNormLstmTest
+    : public BaseLayerNormLstmTest {
+  void SetUp() override {
+    input_to_forget_weights_ = {-0.6, -0.1, 0.3,  0.2,  0.9,  -0.5, -0.2,
+                                -0.4, 0.3,  -0.8, -0.4, 0.3,  -0.5, -0.4,
+                                -0.6, 0.3,  -0.4, -0.6, -0.5, -0.5};
+    input_to_cell_weights_ = {-0.4, -0.3, -0.2, -0.1, -0.5, 0.5,  -0.2,
+                              -0.3, -0.2, -0.6, 0.6,  -0.1, -0.4, -0.3,
+                              -0.7, 0.7,  -0.9, -0.5, 0.8,  0.6};
+    input_to_output_weights_ = {-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3,
+                                -0.3, -0.8, -0.2, 0.6,  -0.2, 0.4,  -0.7,
+                                -0.3, -0.5, 0.1,  0.5,  -0.6, -0.4};
+
+    forget_gate_bias_ = {0.1, -0.3, -0.2, 0.1};
+    cell_gate_bias_ = {-0.05, 0.72, 0.25, 0.08};
+    output_gate_bias_ = {0.05, -0.01, 0.2, 0.1};
+
+    recurrent_to_cell_weights_ = {-0.3, 0.2, 0.1, -0.3, 0.8,  -0.08,
+                                  -0.2, 0.3, 0.8, -0.6, -0.1, 0.2};
+    recurrent_to_forget_weights_ = {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4,
+                                    0.9,  0.3,  -0.1, 0.2,  0.5, 0.2};
+    recurrent_to_output_weights_ = {0.3,  -0.1, 0.1,  -0.2, -0.5, -0.7,
+                                    -0.2, -0.6, -0.1, -0.4, -0.7, -0.2};
+
+    cell_to_forget_weights_ = {-0.02, -0.15, -0.25, -0.03};
+    cell_to_output_weights_ = {0.1, -0.1, -0.5, 0.05};
+
+    forget_layer_norm_weights_ = {0.2, 0.2, 0.4, 0.3};
+    cell_layer_norm_weights_ = {0.7, 0.2, 0.3, 0.8};
+    output_layer_norm_weights_ = {0.6, 0.2, 0.2, 0.5};
+    projection_weights_ = {-0.1, 0.2,  0.01, -0.2, 0.1,  0.5,
+                           0.3,  0.08, 0.07, 0.2,  -0.4, 0.2};
+
+    layer_norm_lstm_input_ = {
+        {// Batch0: 3 (input_sequence_size) * 5 (n_input)
+         0.7, 0.8, 0.1, 0.2, 0.3,   // seq 0
+         0.8, 0.1, 0.2, 0.4, 0.5,   // seq 1
+         0.2, 0.7, 0.7, 0.1, 0.7},  // seq 2
+
+        {// Batch1: 3 (input_sequence_size) * 5 (n_input)
+         0.3, 0.2, 0.9, 0.8, 0.1,   // seq 0
+         0.1, 0.5, 0.2, 0.4, 0.2,   // seq 1
+         0.6, 0.9, 0.2, 0.5, 0.7},  // seq 2
+    };
+  }
+};
+
+TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest,
+       LayerNormLstmBlackBoxTest) {
+  const int n_batch = 2;
+  const int n_input = 5;
+  const int n_cell = 4;
+  const int n_output = 3;
+  const float ceil_clip = 0.0;
+  const float proj_clip = 0.0;
+
+  LayerNormLSTMOpModel layer_norm_lstm(
+      n_batch, n_input, n_cell, n_output,
+      /*use_cifg=*/true, /*use_peephole=*/true,
+      /*use_projection_weights=*/true,
+      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      {
+          {n_batch, n_input},  // input tensor
+
+          {0, 0},             // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {0, 0},              // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},       // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {0},       // input_layer_norm_weight tensor
+          {n_cell},  // forget_layer_norm_weight tensor
+          {n_cell},  // cell_layer_norm_weight tensor
+          {n_cell},  // output_layer_norm_weight tensor
+
+          {0},       // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+      });
+
+  layer_norm_lstm.SetInputToCellWeights(input_to_cell_weights_);
+  layer_norm_lstm.SetInputToForgetWeights(input_to_forget_weights_);
+  layer_norm_lstm.SetInputToOutputWeights(input_to_output_weights_);
+
+  layer_norm_lstm.SetCellBias(cell_gate_bias_);
+  layer_norm_lstm.SetForgetGateBias(forget_gate_bias_);
+  layer_norm_lstm.SetOutputGateBias(output_gate_bias_);
+
+  layer_norm_lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_);
+  layer_norm_lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_);
+  layer_norm_lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_);
+
+  layer_norm_lstm.SetCellToForgetWeights(cell_to_forget_weights_);
+  layer_norm_lstm.SetCellToOutputWeights(cell_to_output_weights_);
+
+  layer_norm_lstm.SetForgetLayerNormWeights(forget_layer_norm_weights_);
+  layer_norm_lstm.SetCellLayerNormWeights(cell_layer_norm_weights_);
+  layer_norm_lstm.SetOutputLayerNormWeights(output_layer_norm_weights_);
+
+  layer_norm_lstm.SetProjectionWeights(projection_weights_);
+
+  // Verify the final output.
+  const std::vector<std::vector<float>> layer_norm_lstm_golden_output = {
+      {
+          // Batch0: 3 (input_sequence_size) * 3 (n_output)
+          0.02129706, 0.140816242, 0.0112733059,     // seq 0
+          0.0132302344, 0.152308047, 0.0346313119,   // seq 1
+          -0.0123688057, 0.165790111, 0.0893077999,  // seq 2
+      },
+      {
+          // Batch1: 3 (input_sequence_size) * 3 (n_output)
+          -0.0226350538, 0.0916948169, 0.0769175813,  // seq 0
+          -0.0269966982, 0.149707705, 0.094149217,    // seq 1
+          -0.0103429332, 0.173016444, 0.0720508844,   // seq 2
+      }};
+
+  VerifyGoldens(layer_norm_lstm_input_, layer_norm_lstm_golden_output,
+                &layer_norm_lstm);
+}
+
+TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest,
+       HybridLayerNormLstmBlackBoxTest) {
+  const int n_batch = 2;
+  const int n_input = 5;
+  const int n_cell = 4;
+  const int n_output = 3;
+  const float ceil_clip = 0.0;
+  const float proj_clip = 0.0;
+
+  HybridLayerNormLSTMOpModel layer_norm_lstm(
+      n_batch, n_input, n_cell, n_output,
+      /*use_cifg=*/true, /*use_peephole=*/true,
+      /*use_projection_weights=*/true,
+      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      {
+          {n_batch, n_input},  // input tensor
+
+          {0, 0},             // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {0, 0},              // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},       // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {0},       // input_layer_norm_weight tensor
+          {n_cell},  // forget_layer_norm_weight tensor
+          {n_cell},  // cell_layer_norm_weight tensor
+          {n_cell},  // output_layer_norm_weight tensor
+
+          {0},       // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+      });
+
+  layer_norm_lstm.SetInputToCellWeights(input_to_cell_weights_);
+  layer_norm_lstm.SetInputToForgetWeights(input_to_forget_weights_);
+  layer_norm_lstm.SetInputToOutputWeights(input_to_output_weights_);
+
+  layer_norm_lstm.SetCellBias(cell_gate_bias_);
+  layer_norm_lstm.SetForgetGateBias(forget_gate_bias_);
+  layer_norm_lstm.SetOutputGateBias(output_gate_bias_);
+
+  layer_norm_lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_);
+  layer_norm_lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_);
+  layer_norm_lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_);
+
+  layer_norm_lstm.SetCellToForgetWeights(cell_to_forget_weights_);
+  layer_norm_lstm.SetCellToOutputWeights(cell_to_output_weights_);
+
+  layer_norm_lstm.SetForgetLayerNormWeights(forget_layer_norm_weights_);
+  layer_norm_lstm.SetCellLayerNormWeights(cell_layer_norm_weights_);
+  layer_norm_lstm.SetOutputLayerNormWeights(output_layer_norm_weights_);
+
+  layer_norm_lstm.SetProjectionWeights(projection_weights_);
+
+  // Verify the final output.
+  const std::vector<std::vector<float>> layer_norm_lstm_golden_output = {
+      {
+          // Batch0: 3 (input_sequence_size) * 3 (n_output)
+          0.0212250091, 0.140474007, 0.0115012666,   // seq 0
+          0.0130806509, 0.152660668, 0.0347516984,   // seq 1
+          -0.0124010444, 0.166042402, 0.0898982584,  // seq 2
+      },
+      {
+          // Batch1: 3 (input_sequence_size) * 3 (n_output)
+          -0.0228835996, 0.0917588323, 0.0778886303,  // seq 0
+          -0.0275101066, 0.148769245, 0.0938384682,   // seq 1
+          -0.0103605557, 0.172605693, 0.0728750974,   // seq 2
+      }};
+
+  VerifyGoldens(layer_norm_lstm_input_, layer_norm_lstm_golden_output,
+                &layer_norm_lstm);
+}
+
 }  // namespace
 }  // namespace custom
 }  // namespace ops
-- 
GitLab


From 3ca8908b14ce55151ce0e043ae6423fb50d29ade Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 15:06:07 -0800
Subject: [PATCH 157/873] gradient_checker_v2.py: some internal improvements.

PiperOrigin-RevId: 224415291
---
 tensorflow/python/ops/gradient_checker_v2.py  | 33 ++++++++++------
 .../python/ops/gradient_checker_v2_test.py    | 38 +++++++++----------
 2 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/tensorflow/python/ops/gradient_checker_v2.py b/tensorflow/python/ops/gradient_checker_v2.py
index cf84484112..5d473eeb5f 100644
--- a/tensorflow/python/ops/gradient_checker_v2.py
+++ b/tensorflow/python/ops/gradient_checker_v2.py
@@ -44,27 +44,19 @@ def _product(t):
     return y
 
 
-def _to_numpy(a):
-  """Converts tensors to numpy arrays.
+def _eval_indexed_slices(a):
+  """Converts IndexedSlices to IndexedSlicesValue with numpy indices/values.
 
-  Converts Tensors and EagerTensors to numpy arrays.
   When eager execution is enabled, converts IndexedSlices
-  to IndexedSlicesValue with numpy indices/values
+  to IndexedSlicesValue with numpy indices/values.
 
   Args:
     a: any value.
 
   Returns:
-    If a is EagerTensor or Tensor, returns the evaluation of a by calling
-    numpy() or run().
     If a is IndexedSlices and eager execution is enabled, calls numpy() on a's
     fields. Otherwise returns a unchanged.
   """
-  if isinstance(a, ops.EagerTensor):
-    return a.numpy()
-  if isinstance(a, ops.Tensor):
-    sess = ops.get_default_session()
-    return sess.run(a)
   if isinstance(a, ops.IndexedSlices) and context.executing_eagerly():
     return ops.IndexedSlicesValue(
         indices=[x.numpy() for x in a.indices],
@@ -73,6 +65,24 @@ def _to_numpy(a):
   return a
 
 
+def _to_numpy(a):
+  """Converts Tensors and EagerTensors to numpy arrays.
+
+  Args:
+    a: any value.
+
+  Returns:
+    If a is EagerTensor or Tensor, returns the evaluation of a by calling
+    numpy() or run(). Otherwise returns a unchanged.
+  """
+  if isinstance(a, ops.EagerTensor):
+    return a.numpy()
+  if isinstance(a, ops.Tensor):
+    sess = ops.get_default_session()
+    return sess.run(a)
+  return a
+
+
 def _prepare(f, xs_dtypes):
   """Return a function that executes 'f'.
 
@@ -147,6 +157,7 @@ def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param):
   for col in range(y_size):
     dy_data_flat[col] = 1
     grad = _to_numpy(grad_fn(dy_data, *xs)[0])
+    grad = _eval_indexed_slices(grad)
     dy_data_flat[col] = 0
     if isinstance(grad, ops.IndexedSlicesValue):
       for i, v in zip(grad.indices, grad.values):
diff --git a/tensorflow/python/ops/gradient_checker_v2_test.py b/tensorflow/python/ops/gradient_checker_v2_test.py
index ce9ff47d61..191b2b6568 100644
--- a/tensorflow/python/ops/gradient_checker_v2_test.py
+++ b/tensorflow/python/ops/gradient_checker_v2_test.py
@@ -22,7 +22,6 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
@@ -38,13 +37,17 @@ from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
 
+def _random_complex(shape, dtype):
+  data = np.random.random_sample(shape).astype(dtype.as_numpy_dtype)
+  if dtype.is_complex:
+    data.imag = np.random.random_sample(shape)
+  return data
+
+
 @test_util.run_all_in_graph_and_eager_modes
 class GradientCheckerTest(test.TestCase):
 
   def testAddSimple(self):
-    # if context.executing_eagerly():
-    #   return
-    np.random.seed(1)  # Fix seed to avoid flakiness
     size = (2, 3)
     x1 = constant_op.constant(2.0, shape=size, name="x1")
     x2 = constant_op.constant(3.0, shape=size, name="x2")
@@ -54,7 +57,6 @@ class GradientCheckerTest(test.TestCase):
     assert error < 1e-4
 
   def testAddCustomized(self):
-    np.random.seed(3)  # Fix seed to avoid flakiness
     size = (2, 3)
     x1 = constant_op.constant(
         2.0, shape=size, dtype=dtypes.float64, name="x1")
@@ -67,7 +69,6 @@ class GradientCheckerTest(test.TestCase):
     assert error < 1e-10
 
   def testGather(self):
-    np.random.seed(4)  # Fix seed to avoid flakiness
     def f(params):
       index_values = [1, 3]
       indices = constant_op.constant(index_values, name="i")
@@ -82,7 +83,6 @@ class GradientCheckerTest(test.TestCase):
     assert error < 1e-4
 
   def testNestedGather(self):
-    np.random.seed(5)  # Fix seed to avoid flakiness
     def f(params):
       index_values = [1, 3, 5, 6]
       indices = constant_op.constant(index_values, name="i")
@@ -100,33 +100,37 @@ class GradientCheckerTest(test.TestCase):
     assert error < 1e-4
 
   def testComplexMul(self):
-    if not context.executing_eagerly():
-      return
     c = constant_op.constant(5 + 7j, dtype=dtypes.complex64)
     def f(x):
       return c * x
-    x = constant_op.constant(11 - 13j, dtype=dtypes.complex64)
+    x_shape = c.shape
+    x_dtype = c.dtype
+    x = constant_op.constant(_random_complex(x_shape, x_dtype))
     analytical, numerical = gradient_checker.compute_gradient(
-        f, [x], delta=0.1)
+        f, [x])
     correct = np.array([[5, 7], [-7, 5]])
     self.assertAllEqual(correct, analytical[0])
     self.assertAllClose(correct, numerical[0], rtol=1e-4)
+    x = constant_op.constant(_random_complex(x_shape, x_dtype))
     self.assertLess(
         gradient_checker.max_error(*gradient_checker.compute_gradient(
-            f, [x], delta=0.1)), 2e-4)
+            f, [x])), 3e-4)
 
   def testComplexConj(self):
     def f(x):
       return math_ops.conj(x)
-    x = constant_op.constant(11 - 13j, dtype=dtypes.complex64)
+    x_shape = ()
+    x_dtype = dtypes.complex64
+    x = constant_op.constant(_random_complex(x_shape, x_dtype))
     analytical, numerical = gradient_checker.compute_gradient(
-        f, [x], delta=0.1)
+        f, [x])
     correct = np.array([[1, 0], [0, -1]])
     self.assertAllEqual(correct, analytical[0])
     self.assertAllClose(correct, numerical[0], rtol=2e-5)
+    x = constant_op.constant(_random_complex(x_shape, x_dtype))
     self.assertLess(
         gradient_checker.max_error(*gradient_checker.compute_gradient(
-            f, [x], delta=0.1)), 2e-5)
+            f, [x])), 2e-5)
 
   def testEmptySucceeds(self):
     def f(x):
@@ -140,8 +144,6 @@ class GradientCheckerTest(test.TestCase):
     self.assertEqual(error, 0)
 
   def testEmptyFails(self):
-    # if not context.executing_eagerly():
-    #   return
     @custom_gradient.custom_gradient
     def id_bad_grad(x):
       y = array_ops.identity(x)
@@ -279,8 +281,6 @@ class MiniMNISTTest(test.TestCase):
     return err
 
   def testInputGradient(self):
-    # if context.executing_eagerly():
-    #   return
     self.assertLess(self._BuildAndTestMiniMNIST(0, "input"), 1e-8)
 
   def testHiddenWeightGradient(self):
-- 
GitLab


From 9797cc2e91d01b492ed946e05f8f07c2a7f8c157 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 15:15:34 -0800
Subject: [PATCH 158/873] Fix PREDICT bug.

PiperOrigin-RevId: 224416988
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index fb1316cf33..84816d70d0 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -530,13 +530,16 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
 class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook):
 
-  def __init__(self, ctx, enqueue_ops, dequeue_ops, rendezvous=None):
+  def __init__(self, ctx, enqueue_ops, dequeue_ops, rendezvous=None,
+               master=None, session_config=None):
     super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__(
         ctx,
         enqueue_ops,
         dequeue_ops,
         run_infeed_loop_on_coordinator=False,
-        rendezvous=rendezvous)
+        rendezvous=rendezvous,
+        master=master,
+        session_config=session_config)
 
   def _create_infeed_controller(self, name, target, args):
     return _OpSignalOnceQueueContext(name=name, target=target, args=args)
@@ -2748,7 +2751,9 @@ class TPUEstimator(estimator_lib.Estimator):
         hooks = [
             _StoppingPredictHook(scalar_stopping_signal),
             TPUInfeedOutfeedSessionHookForPrediction(
-                ctx, enqueue_ops, host_ops, rendezvous=self._rendezvous[mode]),
+                ctx, enqueue_ops, host_ops, rendezvous=self._rendezvous[mode],
+                master=self._config.master,
+                session_config=self._session_config),
         ] + input_hooks
 
         if prediction_hooks:
-- 
GitLab


From d453dfad7c383db0ded338e8de3142a17d55910b Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Thu, 6 Dec 2018 15:20:01 -0800
Subject: [PATCH 159/873] Exports OpsSet in Python API.

PiperOrigin-RevId: 224417739
---
 tensorflow/lite/python/convert.py                    |  1 +
 .../api/golden/v1/tensorflow.lite.-ops-set.pbtxt     | 12 ++++++++++++
 tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt |  4 ++++
 .../api/golden/v2/tensorflow.lite.-ops-set.pbtxt     | 12 ++++++++++++
 tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt |  4 ++++
 5 files changed, 33 insertions(+)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.lite.-ops-set.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.lite.-ops-set.pbtxt

diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py
index 563312e027..9c60399871 100644
--- a/tensorflow/lite/python/convert.py
+++ b/tensorflow/lite/python/convert.py
@@ -97,6 +97,7 @@ def convert_dtype_to_tflite_type(tf_dtype):
   return result
 
 
+@_tf_export("lite.OpsSet")
 class OpsSet(enum.Enum):
   """Enum class defining the sets of ops available to generate TFLite models.
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.-ops-set.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.-ops-set.pbtxt
new file mode 100644
index 0000000000..68c651a3c9
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-ops-set.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.lite.OpsSet"
+tf_class {
+  is_instance: "<enum \'OpsSet\'>"
+  member {
+    name: "SELECT_TF_OPS"
+    mtype: "<enum \'OpsSet\'>"
+  }
+  member {
+    name: "TFLITE_BUILTINS"
+    mtype: "<enum \'OpsSet\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt
index f5013c250b..154dd00821 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "OpHint"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "OpsSet"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "TFLiteConverter"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-ops-set.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-ops-set.pbtxt
new file mode 100644
index 0000000000..68c651a3c9
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-ops-set.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.lite.OpsSet"
+tf_class {
+  is_instance: "<enum \'OpsSet\'>"
+  member {
+    name: "SELECT_TF_OPS"
+    mtype: "<enum \'OpsSet\'>"
+  }
+  member {
+    name: "TFLITE_BUILTINS"
+    mtype: "<enum \'OpsSet\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt
index f5013c250b..154dd00821 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt
@@ -8,6 +8,10 @@ tf_module {
     name: "OpHint"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "OpsSet"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
   member {
     name: "TFLiteConverter"
     mtype: "<type \'type\'>"
-- 
GitLab


From fa06cffc09699de4215cf47b2e00ed6d7cdd4dfe Mon Sep 17 00:00:00 2001
From: Yanhui Liang <yhliang@google.com>
Date: Thu, 6 Dec 2018 15:30:33 -0800
Subject: [PATCH 160/873] Update sdca_optimizer in core to its v2 version, and
 migrate linear_optimizer from contrib to core.

PiperOrigin-RevId: 224419608
---
 .../python/kernel_tests/sdca_ops_test.py         |  7 ++++++-
 .../linear_optimizer/python/ops/sdca_ops.py      | 16 ++++++++++++++--
 .../ops/sharded_mutable_dense_hashtable.py       | 12 +++++++++++-
 .../ops/sharded_mutable_dense_hashtable_test.py  |  7 ++++++-
 .../python/ops/sparse_feature_column.py          | 12 +++++++++++-
 .../python/ops/sparse_feature_column_test.py     |  7 ++++++-
 6 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
index 8466dc36d1..d49834dc86 100644
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for SdcaModel."""
+"""Tests for SdcaModel (deprecated).
+
+This module and all its submodules are deprecated. To UPDATE or USE linear
+optimizers, please check its latest version in core:
+tensorflow_estimator/python/estimator/canned/linear_optimizer/.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
index f3f1dcd98d..c056a12fa5 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
@@ -12,7 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Proximal stochastic dual coordinate ascent optimizer for linear models."""
+# pylint: disable=line-too-long
+"""Proximal stochastic dual coordinate ascent optimizer for linear models (deprecated).
+
+This module and all its submodules are deprecated. To UPDATE or USE linear
+optimizers, please check its latest version in core:
+tensorflow_estimator/python/estimator/canned/linear_optimizer/.
+"""
+# pylint: enable=line-too-long
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -40,6 +47,7 @@ from tensorflow.python.ops import variables as var_ops
 from tensorflow.python.ops.nn import log_poisson_loss
 from tensorflow.python.ops.nn import sigmoid_cross_entropy_with_logits
 from tensorflow.python.summary import summary
+from tensorflow.python.util import deprecation
 
 __all__ = ['SdcaModel']
 
@@ -48,7 +56,7 @@ __all__ = ['SdcaModel']
 class SdcaModel(object):
   """Stochastic dual coordinate ascent solver for linear models.
 
-    Loss functions supported:
+  Loss functions supported:
 
      * Binary logistic loss
      * Squared loss
@@ -109,6 +117,10 @@ class SdcaModel(object):
     ```
   """
 
+  @deprecation.deprecated(
+      None, 'This class is deprecated. To UPDATE or USE linear optimizers, '
+      'please check its latest version in core: '
+      'tensorflow_estimator/python/estimator/canned/linear_optimizer/.')
   def __init__(self, examples, variables, options):
     """Create a new sdca optimizer."""
 
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
index a001555e8f..a28394964a 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Sharded mutable dense hash table."""
+"""Sharded mutable dense hash table (deprecated).
+
+This module and all its submodules are deprecated. To UPDATE or USE linear
+optimizers, please check its latest version in core:
+tensorflow_estimator/python/estimator/canned/linear_optimizer/.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -28,6 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.util import deprecation
 
 
 # TODO(rohanj): This should subclass Checkpointable and implement
@@ -45,6 +51,10 @@ class ShardedMutableDenseHashTable(object):
 
   # TODO(andreasst): consider moving this to lookup module
 
+  @deprecation.deprecated(
+      None, 'This class is deprecated. To UPDATE or USE linear optimizers, '
+      'please check its latest version in core: '
+      'tensorflow_estimator/python/estimator/canned/linear_optimizer/.')
   def __init__(self,
                key_dtype,
                value_dtype,
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
index 2b56d0fa3a..2d1457f9e4 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for sharded_mutable_dense_hashtable.py."""
+"""Tests for sharded_mutable_dense_hashtable.py (deprecated).
+
+This module and all its submodules are deprecated. To UPDATE or USE linear
+optimizers, please check its latest version in core:
+tensorflow_estimator/python/estimator/canned/linear_optimizer/.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column.py b/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column.py
index 003795233f..64730f8eed 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Sparse feature column."""
+"""Sparse feature column (deprecated).
+
+This module and all its submodules are deprecated. To UPDATE or USE linear
+optimizers, please check its latest version in core:
+tensorflow_estimator/python/estimator/canned/linear_optimizer/.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -21,6 +26,7 @@ from __future__ import print_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework.ops import internal_convert_to_tensor
 from tensorflow.python.framework.ops import name_scope
+from tensorflow.python.util import deprecation
 
 
 class SparseFeatureColumn(object):
@@ -68,6 +74,10 @@ class SparseFeatureColumn(object):
   @@feature_values
   """
 
+  @deprecation.deprecated(
+      None, 'This class is deprecated. To UPDATE or USE linear optimizers, '
+      'please check its latest version in core: '
+      'tensorflow_estimator/python/estimator/canned/linear_optimizer/.')
   def __init__(self, example_indices, feature_indices, feature_values):
     """Creates a `SparseFeatureColumn` representation.
 
diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column_test.py b/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column_test.py
index 51c4f68543..0ae780e1a1 100644
--- a/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column_test.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for sparse_feature_column.py."""
+"""Tests for sparse_feature_column.py (deprecated).
+
+This module and all its submodules are deprecated. To UPDATE or USE linear
+optimizers, please check its latest version in core:
+tensorflow_estimator/python/estimator/canned/linear_optimizer/.
+"""
 
 from __future__ import absolute_import
 from __future__ import division
-- 
GitLab


From 938af66d71256aac8c04ac022a4f0390e10146f8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 15:46:13 -0800
Subject: [PATCH 161/873] Add missing namespace qualifier that ADL was "saving"
 us from having to provide, but which would break once absl::string_view is
 really std::string_view. PiperOrigin-RevId: 224422387

---
 tensorflow/compiler/xla/service/hlo_pass_pipeline.cc | 2 +-
 tensorflow/compiler/xla/shape_util.cc                | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 312b5d020c..51177f24f5 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -113,7 +113,7 @@ void HloPassPipeline::MaybeDumpHlo(const HloModule& module,
   }
 
   const string message =
-      StrCat("after ", after_pass_name, ", before ", before_pass_name);
+      absl::StrCat("after ", after_pass_name, ", before ", before_pass_name);
   hlo_graph_dumper::MaybeDumpHloModule(module, message);
   VLOG(3) << "HLO " << message << ":";
   VLOG(3) << module.entry_computation_layout().ToString();
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index f3cc51ca91..a4d4e1e53e 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -584,7 +584,7 @@ namespace {
 // Parses shapes with simple recursive descent structure -- consumes from the
 // front of s and passes that view recursively as required.
 StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
-  *s = StripLeadingAsciiWhitespace(*s);
+  *s = absl::StripLeadingAsciiWhitespace(*s);
 
   if (absl::ConsumePrefix(s, "(")) {  // Tuple.
     std::vector<Shape> shapes;
@@ -597,7 +597,7 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
       }
       shapes.emplace_back();
       TF_ASSIGN_OR_RETURN(shapes.back(), ParseShapeStringInternal(s));
-      *s = StripLeadingAsciiWhitespace(*s);
+      *s = absl::StripLeadingAsciiWhitespace(*s);
       must_end = !absl::ConsumePrefix(s, ",");
     }
     return ShapeUtil::MakeTupleShape(shapes);
-- 
GitLab


From fc4b1847dde6511fb6bfb8d4c3aa1e2586307516 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Thu, 6 Dec 2018 15:51:28 -0800
Subject: [PATCH 162/873] Update tests to reflect that expanddims and squeeze
 are now converted

---
 tensorflow/contrib/tensorrt/test/rank_two_test.py | 6 ++++--
 tensorflow/contrib/tensorrt/test/unary_test.py    | 3 +--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/test/rank_two_test.py b/tensorflow/contrib/tensorrt/test/rank_two_test.py
index 0cd733dca1..563232fc12 100644
--- a/tensorflow/contrib/tensorrt/test/rank_two_test.py
+++ b/tensorflow/contrib/tensorrt/test/rank_two_test.py
@@ -51,8 +51,10 @@ class RankTwoTest(trt_test.TfTrtIntegrationTestBase):
         c = constant_op.constant(3.0, name="c%d_3" % i)
         q = math_ops.add(q, c, name="add%d_3" % i)
         if i == 0:
+          axis = constant_op.constant(-1, dtype=dtypes.int32, name="axis")
           for j in range(2):
-            q = array_ops.expand_dims(q, -1, name="expand%d_%d" % (i, j))
+            q = array_ops.expand_dims(q, axis, name="expand%d_%d" % (i, j))
+          q = self.trt_incompatible_op(q)
         q = gen_math_ops.reciprocal(q, name="reciprocal%d" % i)
         outputs.append(q)
       # Combine both paths
@@ -70,7 +72,7 @@ class RankTwoTest(trt_test.TfTrtIntegrationTestBase):
     return {
         "TRTEngineOp_0": [
             "add0_1", "add0_2", "add0_3", "c0_1", "c0_2", "c0_3", "abs0_1",
-            "abs0_2"
+            "abs0_2", "expand0_0", "expand0_1", "axis"
         ],
         "TRTEngineOp_1": [
             "add", "add1_1", "add1_2", "add1_3", "c1_1", "c1_2", "c1_3",
diff --git a/tensorflow/contrib/tensorrt/test/unary_test.py b/tensorflow/contrib/tensorrt/test/unary_test.py
index 9fc50e0595..b77abc4032 100644
--- a/tensorflow/contrib/tensorrt/test/unary_test.py
+++ b/tensorflow/contrib/tensorrt/test/unary_test.py
@@ -107,8 +107,7 @@ class UnaryTest(trt_test.TfTrtIntegrationTestBase):
   def ExpectedEnginesToBuild(self, run_params):
     """Return the expected engines to build."""
     return [
-        "TRTEngineOp_0", "TRTEngineOp_1", "TRTEngineOp_2", "TRTEngineOp_3",
-        "TRTEngineOp_4"
+        "TRTEngineOp_0"
     ]
 
 
-- 
GitLab


From 4ca599630233afe5240deae44212f19412ab6423 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Thu, 6 Dec 2018 15:53:05 -0800
Subject: [PATCH 163/873] Fix stateful metrics in Keras + Distribution Strategy
 codepath.

PiperOrigin-RevId: 224423535
---
 .../contrib/distribute/python/keras_test.py   | 42 +++++++++++++++++--
 tensorflow/python/keras/engine/training.py    | 12 +++++-
 .../python/keras/engine/training_arrays.py    |  6 +--
 .../keras/engine/training_distributed.py      | 15 ++++++-
 4 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 331bf7c496..796c4ed9f6 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1259,9 +1259,43 @@ class TestDistributionStrategyCorrectness(test.TestCase,
       train_dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
       train_dataset = batch_wrapper(train_dataset, batch_size, distribution)
 
-      history = model.fit(x=train_dataset, epochs=1, steps_per_epoch=10)
-      self.assertEqual(history.history['binary_accuracy'], [1.0])
+      history = model.fit(x=train_dataset, epochs=2, steps_per_epoch=10)
+      self.assertEqual(history.history['binary_accuracy'], [1.0, 1.0])
 
+  @combinations.generate(all_strategy_combinations())
+  def test_eval_metrics_correctness(self, distribution):
+    with self.cached_session():
+      model = keras.Sequential()
+      model.add(
+          keras.layers.Dense(
+              3, activation='relu', input_dim=4, kernel_initializer='ones'))
+      model.add(
+          keras.layers.Dense(
+              1, activation='sigmoid', kernel_initializer='ones'))
+      model.compile(
+          loss='mae',
+          metrics=['accuracy', keras.metrics.BinaryAccuracy()],
+          optimizer=gradient_descent.GradientDescentOptimizer(0.001),
+          distribute=distribution)
+
+      # verify correctness of stateful and stateless metrics.
+      x = np.ones((100, 4)).astype('float32')
+      y = np.ones((100, 1)).astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
+      dataset = batch_wrapper(dataset, 4, distribution)
+      outs = model.evaluate(dataset, steps=10)
+      self.assertEqual(outs[1], 1.)
+      self.assertEqual(outs[2], 1.)
+
+      y = np.zeros((100, 1)).astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
+      dataset = batch_wrapper(dataset, 4, distribution)
+      outs = model.evaluate(dataset, steps=10)
+      self.assertEqual(outs[1], 0.)
+      self.assertEqual(outs[2], 0.)
+
+  # TODO(priyag): Add metrics correctness to this test to compare with and
+  # without distribution strategies.
   @combinations.generate(strategy_and_input_combinations())
   def test_correctness(self, distribution, use_numpy, use_validation_data):
 
@@ -1319,7 +1353,7 @@ class TestDistributionStrategyCorrectness(test.TestCase,
                                         with_distribution,
                                         x_train, y_train, x_predict))
 
-        traning_history = model.fit(**training_inputs).history
+        training_history = model.fit(**training_inputs).history
 
         if eval_inputs is not None:
           eval_result = model.evaluate(**eval_inputs)
@@ -1330,7 +1364,7 @@ class TestDistributionStrategyCorrectness(test.TestCase,
         weights = model.get_weights()
         predict_result = model.predict(**predict_inputs)
 
-        return weights, traning_history, eval_result, predict_result
+        return weights, training_history, eval_result, predict_result
 
       wts_with_ds, history_with_ds, eval_with_ds, predict_with_ds = (
           fit_eval_and_predict(with_distribution=distribution))
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index c902d928a0..90f8a7b252 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -918,11 +918,17 @@ class Model(Network):
                                     [self.total_loss] + metrics_tensors)
 
   def _make_eval_function(self):
+    # TODO(psv,anjalisridhar): Remove updates after we fix b/118841692
+    # Stateful metrics updates
+    metric_updates = []
+    for m in self.metrics:
+      metric_updates += m.updates
+
     metrics_tensors = [
         self._all_stateful_metrics_tensors[m] for m in self.metrics_names[1:]
     ]
-    self._make_test_function_helper('_eval_function',
-                                    [self.total_loss] + metrics_tensors)
+    self._make_test_function_helper(
+        '_eval_function', [self.total_loss] + metrics_tensors, metric_updates)
 
   def _make_predict_function(self):
     if not hasattr(self, 'predict_function'):
@@ -2095,6 +2101,8 @@ class Model(Network):
     if hasattr(self, 'metrics'):
       for m in self.metrics:
         m.reset_states()
+      if self._distribution_strategy:
+        training_distributed._reset_metrics(self)  # pylint: disable=protected-access
 
   def train_on_batch(self,
                      x,
diff --git a/tensorflow/python/keras/engine/training_arrays.py b/tensorflow/python/keras/engine/training_arrays.py
index 26a809d298..196d48faec 100644
--- a/tensorflow/python/keras/engine/training_arrays.py
+++ b/tensorflow/python/keras/engine/training_arrays.py
@@ -241,15 +241,14 @@ def model_iteration(model,
   callbacks.model.stop_training = False
   callbacks._call_begin_hook(mode)
   progbar.on_train_begin()
+
   for epoch in range(initial_epoch, epochs):
     if callbacks.model.stop_training:
       break
 
     # Setup work for each epoch
     epoch_logs = {}
-    if hasattr(model, 'metrics'):
-      for m in model.metrics:
-        m.reset_states()
+    model.reset_metrics()
     callbacks.on_epoch_begin(epoch, epoch_logs, mode=mode)
     progbar.on_epoch_begin(epoch, epoch_logs)
 
@@ -373,6 +372,7 @@ def model_iteration(model,
   callbacks._call_end_hook(mode)
 
   if model._distribution_strategy:
+    # TODO(priyag, psv): Copy back metrics to the original model as well?
     if not validation_in_fit:
       training_distributed._copy_weights_to_original_model(
           model, model._grouped_model, mode)
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 9ca5082673..fc72f6bd15 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -183,6 +183,8 @@ def experimental_fit_loop(model,
 
   callbacks.on_train_begin()
   for epoch in range(initial_epoch, epochs):
+    with current_strategy.scope():
+      _reset_metrics(model, model._grouped_model_train)
     callbacks.on_epoch_begin(epoch)
     epoch_logs = {}
     step_index = 0
@@ -342,7 +344,7 @@ def experimental_test_loop(model,
   # Copy the weights from the original model to each of the replicated models.
   with current_strategy.scope():
     _copy_weights_to_distributed_model(model, model._grouped_model_test)
-
+    _reset_metrics(model, model._grouped_model_test)
   assert steps is not None
   outs = [0.] * len(model.metrics_names)
   for step in range(steps):
@@ -449,7 +451,7 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
   # Copy the weights from the original model to each of the replicated models.
   with current_strategy.scope():
     _copy_weights_to_distributed_model(model, model._grouped_model_predict)
-
+    _reset_metrics(model, model._grouped_model_predict)
   assert steps is not None
   # Since we do not know how many samples we will see, we cannot pre-allocate
   # the returned Numpy arrays. Instead, we store one array per batch seen
@@ -714,3 +716,12 @@ def _per_device_aggregate_batch(batch_outs, model, mode):
       total_batch_outs.append(np.concatenate(nest.flatten(nested_outs)))
     return total_batch_outs
   return batch_outs
+
+
+def _reset_metrics(model, distributed_model=None):
+  if model._distribution_strategy:
+    distributed_model = (
+        distributed_model or
+        model._distribution_strategy.unwrap(model._grouped_model)[0])
+    distributed_model.reset_metrics()
+
-- 
GitLab


From 9cbc061187bb994acc950730bbb844426c572db1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 15:55:11 -0800
Subject: [PATCH 164/873] Return tuples instead of lists from unwrap and device
 methods of tf.distribute.Strategy.

PiperOrigin-RevId: 224423866
---
 .../python/collective_all_reduce_strategy.py  | 14 ++++----
 .../distribute/python/minimize_loss_test.py   |  4 +--
 .../python/mirrored_strategy_multigpu_test.py |  2 +-
 .../distribute/python/one_device_strategy.py  | 12 +++----
 .../python/parameter_server_strategy.py       | 35 +++++++++----------
 .../distribute/python/strategy_test_lib.py    |  2 +-
 .../contrib/distribute/python/tpu_strategy.py | 10 +++---
 .../contrib/optimizer_v2/optimizer_v2.py      |  8 ++---
 .../python/distribute/cross_device_ops.py     |  6 ++--
 .../python/distribute/distribute_lib.py       | 14 ++++----
 .../python/distribute/mirrored_strategy.py    | 19 +++++-----
 11 files changed, 62 insertions(+), 64 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
index e988b63a28..5c50a20490 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
@@ -77,11 +77,11 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
     self._num_workers = 1
 
     if num_gpus_per_worker:
-      local_devices = [
+      local_devices = tuple(
           "/device:GPU:%d" % i for i in range(num_gpus_per_worker)
-      ]
+      )
     else:
-      local_devices = ["/device:CPU:0"]
+      local_devices = ("/device:CPU:0",)
     self._worker_device = device_util.canonicalize("/device:CPU:0")
 
     self._collective_keys = cross_device_utils.CollectiveKeys()
@@ -104,7 +104,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
     if task_type is None or task_id is None:
       raise ValueError("When `cluster_spec` is given, you must also specify "
                        "`task_type` and `task_id`")
-    if task_type not in ["chief", "worker"]:
+    if task_type not in ("chief", "worker"):
       raise ValueError(
           "Unrecognized task_type: %r, valid task types are: \"chief\", "
           "\"worker\"." % task_type)
@@ -119,12 +119,12 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
 
     self._worker_device = "/job:%s/task:%d" % (task_type, task_id)
     if num_gpus_per_worker:
-      local_devices = [
+      local_devices = tuple(
           "%s/device:GPU:%d" % (self._worker_device, i)
           for i in range(num_gpus_per_worker)
-      ]
+      )
     else:
-      local_devices = [self._worker_device]
+      local_devices = (self._worker_device,)
 
     self._collective_keys = cross_device_utils.CollectiveKeys()
     self._initialize_local(local_devices)
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index dcc9df4cda..f09483cb56 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -232,7 +232,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
         fetches = distribution.unwrap(
             distribution.call_for_each_replica(model_fn, args=inputs))
         if update_ops_in_cross_replica_mode:
-          fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS)
+          fetches += tuple(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
         return control_flow_ops.group(fetches)
 
       iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
@@ -443,7 +443,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
             step_fn, iterator, iterations=2,
             initial_loop_values=initial_loop_values)
 
-        self.assertEqual({key1: [value1]}, ctx.non_tensor_outputs)
+        self.assertEqual({key1: (value1,)}, ctx.non_tensor_outputs)
         self._verify_loss_output(
             initial_loss(),
             loss_output=ctx.last_step_outputs["replica_loss_reduced"],
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index 66512f983e..36be5c83f8 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -180,7 +180,7 @@ class MirroredStrategyVariableCreatorStackTest(
         variable_scope.variable_creator_scope(main_thread_creator):
       result = distribution.extended.call_for_each_replica(model_fn)
       result = distribution.unwrap(result)
-      expected = ["main_thread:thread_0", "main_thread:thread_1"]
+      expected = ("main_thread:thread_0", "main_thread:thread_1")
       self.assertEqual(expected, result)
 
 
diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py
index e322b6acb8..fdbfba4e04 100644
--- a/tensorflow/contrib/distribute/python/one_device_strategy.py
+++ b/tensorflow/contrib/distribute/python/one_device_strategy.py
@@ -60,7 +60,7 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended):
     if isinstance(colocate_with, six.string_types):
       with ops.device(colocate_with):
         return next_creator(*args, **kwargs)
-    if (isinstance(colocate_with, list) and len(colocate_with) == 1 and
+    if (isinstance(colocate_with, (list, tuple)) and len(colocate_with) == 1 and
         isinstance(colocate_with[0], six.string_types)):
       with ops.device(colocate_with[0]):
         return next_creator(*args, **kwargs)
@@ -166,7 +166,7 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended):
     return array_ops.identity(replica_local_var)
 
   def _unwrap(self, value):
-    return [value]
+    return (value,)
 
   def value_container(self, value):
     return value
@@ -177,15 +177,15 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended):
 
   @property
   def worker_devices(self):
-    return [self._device]
+    return (self._device,)
 
   @property
   def parameter_devices(self):
-    return [self._device]
+    return (self._device,)
 
   def non_slot_devices(self, var_list):
     del var_list
-    return [self._device]
+    return (self._device,)
 
   @property
   def experimental_should_init(self):
@@ -216,4 +216,4 @@ class _OneDeviceReplicaContext(distribute_lib.ReplicaContext):
 
   @property
   def devices(self):
-    return [self._distribution_strategy.extended.worker_devices[0]]
+    return self._distribution_strategy.extended.worker_devices
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
index eaeb4d7030..2c7766f95f 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
@@ -145,14 +145,14 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     # replica. When there are GPUs, replicate operations on these GPUs.
     # Otherwise, place operations on CPU.
     if num_gpus_per_worker > 0:
-      self._compute_devices = [
+      self._compute_devices = tuple(
           "%s/device:GPU:%d" % (self._worker_device, i)
           for i in range(num_gpus_per_worker)
-      ]
+      )
     else:
-      self._compute_devices = [self._worker_device]
+      self._compute_devices = (self._worker_device,)
 
-    self._compute_devices = list(
+    self._compute_devices = tuple(
         map(device_util.resolve, self._compute_devices))
     self._canonical_compute_device_set = set(self._compute_devices)
 
@@ -176,8 +176,8 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     # The `_parameter_devices` is needed for the `parameter_devices` property
     # and is a list of all variable devices. Here parameter devices are all
     # tasks of the "ps" job.
-    self._parameter_devices = map("/job:ps/task:{}".format,
-                                  range(num_ps_replicas))
+    self._parameter_devices = tuple(map("/job:ps/task:{}".format,
+                                        range(num_ps_replicas)))
 
     # Add a default device so that ops without specified devices will not end up
     # on other workers.
@@ -204,24 +204,24 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     # replica. When there are GPUs, replicate operations on these GPUs.
     # Otherwise, place operations on CPU.
     if num_gpus_per_worker > 0:
-      self._compute_devices = list(
+      self._compute_devices = tuple(
           map("/device:GPU:{}".format, range(num_gpus_per_worker)))
     else:
-      self._compute_devices = [_LOCAL_CPU]
+      self._compute_devices = (_LOCAL_CPU,)
 
-    self._compute_devices = list(
+    self._compute_devices = tuple(
         map(device_util.resolve, self._compute_devices))
     self._canonical_compute_device_set = set(self._compute_devices)
 
     # If there is only one GPU, put everything on that GPU. Otherwise, place
     # variables on CPU.
     if num_gpus_per_worker == 1:
-      assert len(list(self._compute_devices)) == 1
+      assert len(self._compute_devices) == 1
       self._variable_device = _LOCAL_GPU_0
-      self._parameter_devices = [_LOCAL_GPU_0]
+      self._parameter_devices = (_LOCAL_GPU_0,)
     else:
       self._variable_device = _LOCAL_CPU
-      self._parameter_devices = [_LOCAL_CPU]
+      self._parameter_devices = (_LOCAL_CPU,)
 
     self._is_chief = True
     self._cluster_spec = None
@@ -417,9 +417,9 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     if isinstance(val, values.DistributedValues):
       # Return in a deterministic order.
       if set(val.devices) == self._canonical_compute_device_set:
-        return [val.get(device=d) for d in self._compute_devices]
-      return [val.get(device=d) for d in sorted(val.devices)]
-    return [val]
+        return tuple(val.get(device=d) for d in self._compute_devices)
+      return tuple(val.get(device=d) for d in sorted(val.devices))
+    return (val,)
 
   def value_container(self, val):
     if (hasattr(val, "_aggregating_container") and
@@ -497,12 +497,11 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
 
   @property
   def worker_devices(self):
-    # Make a copy to prevent users from accidentally mutating our copy.
-    return list(self._compute_devices)
+    return self._compute_devices
 
   @property
   def parameter_devices(self):
-    return list(self._parameter_devices)
+    return self._parameter_devices
 
   def non_slot_devices(self, var_list):
     return min(var_list, key=lambda x: x.name)
diff --git a/tensorflow/contrib/distribute/python/strategy_test_lib.py b/tensorflow/contrib/distribute/python/strategy_test_lib.py
index d50b142c5e..d441b5af5f 100644
--- a/tensorflow/contrib/distribute/python/strategy_test_lib.py
+++ b/tensorflow/contrib/distribute/python/strategy_test_lib.py
@@ -290,4 +290,4 @@ class DistributionTestBase(test.TestCase):
       self.evaluate(strategy.group(train_ops))
       global_step_tensors = strategy.unwrap(value)
       global_step_values = self.evaluate(global_step_tensors)
-      self.assertEqual([1] * len(global_step_tensors), global_step_values)
+      self.assertEqual((1,) * len(global_step_tensors), global_step_values)
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 806ff0ac61..b6f5b49201 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -162,7 +162,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
                   if "device:TPU:" in d.name}
     self._device_index = values.PerReplica(device_map)
     self._host_device = self.get_host_cpu_device(0)
-    self._tpu_devices = sorted(device_map.keys())
+    self._tpu_devices = tuple(sorted(device_map.keys()))
     # Only create variables for the number of replicas we're running.
     self._tpu_devices = self._tpu_devices[:self._num_replicas_in_sync]
 
@@ -507,13 +507,13 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
   def _unwrap(self, val):
     if isinstance(val, values.DistributedValues):
       # Return in a deterministic order.
-      return [val.get(device=d) for d in sorted(val.devices)]
+      return tuple(val.get(device=d) for d in sorted(val.devices))
     elif isinstance(val, list):
       # TODO(josh11b): We need to remove this case; per device values should
       # be represented using a PerReplica wrapper instead of a list with
       # one entry per device.
-      return val
-    return [val]
+      return tuple(val)
+    return (val,)
 
   def value_container(self, value):
     return value
@@ -619,4 +619,4 @@ class _TPUReplicaContext(distribute_lib.ReplicaContext):
     distribute_lib.require_replica_context(self)
     ds = self._distribution_strategy
     replica_id = tensor_util.constant_value(self._replica_id_in_sync_group)
-    return [ds.extended.worker_devices[replica_id]]
+    return (ds.extended.worker_devices[replica_id],)
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 11a9248a01..7fb23abc38 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -997,10 +997,10 @@ class OptimizerV2(optimizer_v1.Optimizer):
       with ops.control_dependencies([update_ops]):
         finish_updates = distribution.extended.update_non_slot(
             non_slot_devices, finish, group=False)
-      # We said grouped=False, which means finish_updates is always a list.
-      # It will be [None] when finish() returns None.
-      if finish_updates == [None]:
-        finish_updates = [update_ops]
+      # We said group=False, which means finish_updates is always a tuple.
+      # It will be (None,) when finish() returns None.
+      if finish_updates == (None,):
+        finish_updates = (update_ops,)
 
       # Update `global_step` (if any).
       if global_step is None:
diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py
index a88ed62533..57c552ca8f 100644
--- a/tensorflow/python/distribute/cross_device_ops.py
+++ b/tensorflow/python/distribute/cross_device_ops.py
@@ -53,10 +53,10 @@ def validate_destinations(destinations):
   if not isinstance(
       destinations,
       (value_lib.DistributedValues, resource_variable_ops.ResourceVariable,
-       value_lib.AggregatingVariable, six.string_types, list)):
+       value_lib.AggregatingVariable, six.string_types, list, tuple)):
     raise ValueError("destinations must be one of a `DistributedValues` object,"
-                     " a tf.Variable object, a device string, a list of device "
-                     "strings")
+                     " a tf.Variable object, a device string, a list or tuple "
+                     "of device strings")
 
   if not check_destinations(destinations):
     raise ValueError("destinations can not be empty")
diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index eddd6ff8b1..87bf510ec5 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -565,8 +565,8 @@ class DistributionStrategy(object):
         variable created in `scope`.
 
     Returns:
-      A list of values contained in `value`. If `value` represents a single
-      value, this returns `[value].`
+      A tuple of values contained in `value`. If `value` represents a single
+      value, this returns `(value,).`
     """
     return self._extended._unwrap(value)  # pylint: disable=protected-access
 
@@ -1346,14 +1346,14 @@ class DistributionStrategyExtended(object):
 
   @property
   def worker_devices(self):
-    """Returns the list of devices used to run `call_for_each_replica()` calls.
+    """Returns the tuple of all devices used to for compute replica execution.
     """
     # TODO(josh11b): More docstring
     raise NotImplementedError("must be implemented in descendants")
 
   @property
   def parameter_devices(self):
-    """Returns the list of devices used for variable and `update` placement."""
+    """Returns the tuple of all devices used to place variables."""
     # TODO(josh11b): More docstring
     raise NotImplementedError("must be implemented in descendants")
 
@@ -1513,9 +1513,9 @@ class ReplicaContext(object):
 
   @property
   def devices(self):
-    """The devices this replica is to be executed on, as a list of strings."""
+    """The devices this replica is to be executed on, as a tuple of strings."""
     require_replica_context(self)
-    return [device_util.current()]
+    return (device_util.current(),)
 
   # TODO(josh11b): Implement `start_all_reduce(method, t)` for efficient
   # all-reduce. It would return a function returning the result of reducing `t`
@@ -1605,7 +1605,7 @@ class _DefaultDistributionExtended(DistributionStrategyExtended):
     return array_ops.identity(replica_local_var)
 
   def _unwrap(self, distributed_value):
-    return [distributed_value]
+    return (distributed_value,)
 
   def value_container(self, value):
     return value
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index 562f2328a5..56948b2bcb 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -472,7 +472,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     assert len(set(devices)) == len(devices), (
         "No duplicates allowed in `devices` argument.")
     # TODO(josh11b): Require at least 2 devices?
-    self._devices = [device_util.resolve(d) for d in devices]
+    self._devices = tuple(device_util.resolve(d) for d in devices)
     self._canonical_device_set = set(self._devices)
     self._device_index = values.PerReplica(
         {d: i for i, d in enumerate(devices)})
@@ -488,7 +488,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     assert len(set(devices)) == len(devices), (
         "No duplicates allowed in `devices` argument.")
     # TODO(josh11b): Require at least 2 devices?
-    self._devices = [device_util.resolve(d) for d in devices]
+    self._devices = tuple(device_util.resolve(d) for d in devices)
     self._canonical_device_set = set(self._devices)
     self._device_index = values.PerReplica(
         {d: i for i, d in enumerate(devices)})
@@ -727,7 +727,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     return values.update_regroup(self, updates, group)
 
   def _update_non_slot(self, colocate_with, fn, args, kwargs, group):
-    assert isinstance(colocate_with, list)
+    assert isinstance(colocate_with, tuple)
     # TODO(josh11b): In eager mode, use one thread per device.
     updates = {}
     for d in colocate_with:
@@ -748,9 +748,9 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     if isinstance(val, values.DistributedValues):
       # Return in a deterministic order.
       if set(val.devices) == self._canonical_device_set:
-        return [val.get(device=d) for d in self._devices]
-      return [val.get(device=d) for d in sorted(val.devices)]
-    return [val]
+        return tuple(val.get(device=d) for d in self._devices)
+      return tuple(val.get(device=d) for d in sorted(val.devices))
+    return (val,)
 
   def value_container(self, val):
     return values.value_container(val)
@@ -761,12 +761,11 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
 
   @property
   def worker_devices(self):
-    # Make a copy to prevent users from accidentally mutating our copy.
-    return list(self._devices)
+    return self._devices
 
   @property
   def parameter_devices(self):
-    return list(self._devices)
+    return self._devices
 
   @property
   def experimental_between_graph(self):
@@ -786,7 +785,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
 
   def non_slot_devices(self, var_list):
     del var_list
-    return list(self._devices)
+    return tuple(self._devices)
 
   def _get_devices_from(self, colocate_with=None):
     if colocate_with is None:
-- 
GitLab


From d6074a1afb8dc7dff25cea759f5249898cf1affe Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Thu, 6 Dec 2018 16:01:40 -0800
Subject: [PATCH 165/873] Fix a flaky test.

Move from absolute time measurement to relative.

PiperOrigin-RevId: 224424896
---
 tensorflow/lite/profiling/profiler_test.cc | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/tensorflow/lite/profiling/profiler_test.cc b/tensorflow/lite/profiling/profiler_test.cc
index 82d053729c..addebabe1b 100644
--- a/tensorflow/lite/profiling/profiler_test.cc
+++ b/tensorflow/lite/profiling/profiler_test.cc
@@ -27,11 +27,8 @@ namespace tflite {
 namespace profiling {
 namespace {
 
-void AssertDurationOfEventAroundMs(const ProfileEvent* event,
-                                   double expected_ms, double eps_ms) {
-  double duration_ms =
-      (event->end_timestamp_us - event->begin_timestamp_us) / 1e3;
-  EXPECT_NEAR(expected_ms, duration_ms, eps_ms);
+double GetDurationOfEventMs(const ProfileEvent* event) {
+  return (event->end_timestamp_us - event->begin_timestamp_us) / 1e3;
 }
 
 void SleepForQuarterSecond(Profiler* profiler) {
@@ -84,12 +81,17 @@ TEST(ProfilingTest, ProfilesAreCollected) {
 
 #ifndef ADDRESS_SANITIZER
   // ASAN build is sometimes very slow. Set a large epsilon to avoid flakiness.
+  // Due to flakiness, just verify relative values match.
   const int eps_ms = 50;
-  AssertDurationOfEventAroundMs(profile_events[0], /*expected_ms*/ 500, eps_ms);
-  AssertDurationOfEventAroundMs(profile_events[1], /*expected_ms*/ 250, eps_ms);
-  AssertDurationOfEventAroundMs(profile_events[2], /*expected_ms*/ 250, eps_ms);
-  AssertDurationOfEventAroundMs(profile_events[3], /*expected_ms*/ 250, eps_ms);
-  AssertDurationOfEventAroundMs(profile_events[4], /*expected_ms*/ 250, eps_ms);
+  auto parent_ms = GetDurationOfEventMs(profile_events[0]);
+  double child_ms[2], sleep_for_quarter_ms[2];
+  child_ms[0] = GetDurationOfEventMs(profile_events[1]);
+  child_ms[1] = GetDurationOfEventMs(profile_events[3]);
+  sleep_for_quarter_ms[0] = GetDurationOfEventMs(profile_events[2]);
+  sleep_for_quarter_ms[1] = GetDurationOfEventMs(profile_events[4]);
+  EXPECT_NEAR(parent_ms, child_ms[0] + child_ms[1], eps_ms);
+  EXPECT_NEAR(child_ms[0], sleep_for_quarter_ms[0], eps_ms);
+  EXPECT_NEAR(child_ms[1], sleep_for_quarter_ms[1], eps_ms);
 #endif
 }
 
-- 
GitLab


From a5be8bd6b31f49c1ebe7899b1bf9123664785415 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Thu, 6 Dec 2018 16:06:18 -0800
Subject: [PATCH 166/873] Populate for int8 type.

PiperOrigin-RevId: 224425691
---
 tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
index ad025b19d9..32cf4e4292 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -281,6 +281,11 @@ void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
           interpreter->typed_tensor<uint8_t>(i),
           std::vector<int>(sizes.begin() + 1, sizes.end()),
           []() { return static_cast<uint8_t>(rand()) % 255; });
+    } else if (t->type == kTfLiteInt8) {
+      FillRandomValue<int8_t>(
+          interpreter->typed_tensor<int8_t>(i),
+          std::vector<int>(sizes.begin() + 1, sizes.end()),
+          []() { return static_cast<int8_t>(rand()) % 255 - 127; });
     } else if (t->type == kTfLiteString) {
       tflite::DynamicBuffer buffer;
       FillRandomString(&buffer, sizes, []() {
-- 
GitLab


From d3cf56c7280629bccfd4a1015d60dd4f2fda3ac7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 16:15:20 -0800
Subject: [PATCH 167/873] In gpu_process_state.cc enforce that BusIdForGPU
 always returns a non-negative integer, because the value will be used as an
 index.  The default implementation uses NUMA node, and some platforms will
 return -1 (kUnknownNumaNode).

PiperOrigin-RevId: 224427213
---
 tensorflow/core/common_runtime/gpu/gpu_process_state.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
index a9a19f0fe0..8167cfb9d7 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.cc
@@ -70,7 +70,10 @@ int GPUProcessState::BusIdForGPU(TfGpuId tf_gpu_id) {
   // Return the NUMA node associated with the GPU's StreamExecutor.
   se::StreamExecutor* se =
       GpuIdUtil::ExecutorForTfGpuId(tf_gpu_id).ValueOrDie();
-  return se->GetDeviceDescription().numa_node();
+  int numa_node = se->GetDeviceDescription().numa_node();
+  // bus_id must be non-negative.  If the numa_node is not known,
+  // use 0.
+  return numa_node >= 0 ? numa_node : 0;
 }
 
 Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
@@ -97,6 +100,7 @@ Allocator* GPUProcessState::GetGPUAllocator(const GPUOptions& options,
     PlatformGpuId platform_gpu_id;
     TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id));
     int bus_id = BusIdForGPU(tf_gpu_id);
+    DCHECK_GE(bus_id, 0);
     while (bus_id >= gpu_visitors_.size()) {
       gpu_visitors_.push_back({});
     }
@@ -249,6 +253,7 @@ void GPUProcessState::AddGPUAllocVisitor(int bus_id,
   CHECK(gpu_allocators_.empty())  // Crash OK
       << "AddGPUAllocVisitor must be called before "
          "first call to GetGPUAllocator.";
+  DCHECK_GE(bus_id, 0);
   while (bus_id >= static_cast<int64>(gpu_visitors_.size())) {
     gpu_visitors_.push_back(std::vector<SubAllocator::Visitor>());
   }
-- 
GitLab


From f8006837d3e2d1ee6bc03d48a51f17284cd7034d Mon Sep 17 00:00:00 2001
From: Zhenyu Tan <tanzheny@google.com>
Date: Thu, 6 Dec 2018 16:23:47 -0800
Subject: [PATCH 168/873] scale loss instead of scale gradients.

PiperOrigin-RevId: 224428618
---
 .../python/keras/optimizer_v2/optimizer_v2.py | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index e6cd52c817..15f3009a4a 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -24,7 +24,8 @@ import abc
 
 import six
 
-from tensorflow.python.distribute import distribution_strategy_context
+from tensorflow.python.distribute import distribute_lib
+from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
@@ -252,12 +253,14 @@ class OptimizerV2(optimizer_v1.Optimizer):
       with backprop.GradientTape() as tape:
         tape.watch(var_list)
         loss_value = loss()
+        loss_value = self._scale_loss(loss_value)
       grads = tape.gradient(loss_value, var_list, grad_loss)
     else:
       if context.executing_eagerly():
         raise RuntimeError("`loss` passed to Optimizer.compute_gradients "
                            "should be a function when eager execution is "
                            "enabled.")
+      loss = self._scale_loss(loss)
       self._assert_valid_dtypes([loss])
       if grad_loss is not None:
         self._assert_valid_dtypes([grad_loss])
@@ -277,6 +280,15 @@ class OptimizerV2(optimizer_v1.Optimizer):
 
     return grads_and_vars
 
+  @staticmethod
+  def _scale_loss(loss_value):
+    if distribute_lib.get_loss_reduction() == ds_reduce_util.ReduceOp.MEAN:
+      num_replicas = \
+        distribute_ctx.get_distribution_strategy().num_replicas_in_sync
+      if num_replicas > 1:
+        loss_value *= (1. / num_replicas)
+    return loss_value
+
   def apply_gradients(self, grads_and_vars, name=None):
     """Apply gradients to variables.
 
@@ -299,7 +311,7 @@ class OptimizerV2(optimizer_v1.Optimizer):
     """
     grads_and_vars = _filter_grads(grads_and_vars)
     var_list = [v for (_, v) in grads_and_vars]
-    if distribution_strategy_context.has_distribution_strategy():
+    if distribute_ctx.has_distribution_strategy():
       reduced_grads = merge_grads(grads_and_vars)
       grads_and_vars = zip(reduced_grads, var_list)
 
@@ -598,7 +610,7 @@ def merge_update_step(update_ops, local_step):
       incre_op = local_step.assign_add(1).op
     return incre_op
 
-  return distribution_strategy_context.get_replica_context().merge_call(
+  return distribute_ctx.get_replica_context().merge_call(
       merge_update_step_fn, args=(update_ops, local_step))
 
 
@@ -606,11 +618,11 @@ def merge_grads(grads_and_vars):
   """Merge gradients from different replicas."""
 
   def merge_grad_fn(strategy, grads_and_vars):
-    reduced_grads = strategy.batch_reduce(
-        ds_reduce_util.ReduceOp.MEAN, grads_and_vars)
+    reduced_grads = strategy.batch_reduce(ds_reduce_util.ReduceOp.SUM,
+                                          grads_and_vars)
     return reduced_grads
 
-  return distribution_strategy_context.get_replica_context().merge_call(
+  return distribute_ctx.get_replica_context().merge_call(
       merge_grad_fn, args=(grads_and_vars,))
 
 
@@ -629,7 +641,7 @@ def _var_key(var):
   """
 
   # pylint: disable=protected-access
-  if distribution_strategy_context.has_distribution_strategy() and hasattr(
+  if distribute_ctx.has_distribution_strategy() and hasattr(
       var, "_primary_var"):
     var = var._primary_var
   if hasattr(var, "op"):
-- 
GitLab


From b8f5de61851acba53ca9ecf79b60bb34d753f34b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 16:26:49 -0800
Subject: [PATCH 169/873] Propagate quantization up selects with constant
 unquantize input. This helps for applying padding values or other masking
 operations

PiperOrigin-RevId: 224429126
---
 .../graph_transformations/hardcode_min_max.cc | 28 ++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc
index df50f31de8..2e41767095 100644
--- a/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc
+++ b/tensorflow/lite/toco/graph_transformations/hardcode_min_max.cc
@@ -208,12 +208,32 @@ bool HardcodeMinMaxForSelect(Model* model, Operator* op) {
   if (output_array.minmax) {
     return false;
   }
-  const auto& input_array_1 = model->GetArray(op->inputs[1]);
-  if (!input_array_1.minmax) {
+
+  auto& input_array_1 = model->GetArray(op->inputs[1]);
+  auto& input_array_2 = model->GetArray(op->inputs[2]);
+
+  if (!input_array_1.minmax && !input_array_2.minmax) {
     return false;
   }
-  const auto& input_array_2 = model->GetArray(op->inputs[2]);
-  if (!input_array_2.minmax) {
+
+  // Propagate up if one input is quantized and the other is constant.
+  if (!input_array_1.minmax &&
+      IsConstantParameterArray(*model, op->inputs[1])) {
+    auto& minmax_1 = input_array_1.GetOrCreateMinMax();
+    const auto& minmax_2 = input_array_2.GetMinMax();
+    minmax_1.min = minmax_2.min;
+    minmax_1.max = minmax_2.max;
+  }
+
+  if (!input_array_2.minmax &&
+      IsConstantParameterArray(*model, op->inputs[2])) {
+    auto& minmax_2 = input_array_2.GetOrCreateMinMax();
+    const auto& minmax_1 = input_array_1.GetMinMax();
+    minmax_2.min = minmax_1.min;
+    minmax_2.max = minmax_1.max;
+  }
+
+  if (!input_array_1.minmax || !input_array_2.minmax) {
     return false;
   }
 
-- 
GitLab


From 3f5dae940e67ce9a1c7a87f4c0d2c7ed051215be Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Thu, 6 Dec 2018 16:39:22 -0800
Subject: [PATCH 170/873] Clarify interface docs for DeleteRecursively in Env
 and FileSystem

PiperOrigin-RevId: 224431070
---
 tensorflow/core/platform/env.h         | 21 +++++++++++++++++----
 tensorflow/core/platform/file_system.h | 19 ++++++++++++++++---
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/platform/env.h b/tensorflow/core/platform/env.h
index 7374fccdc2..1b53828415 100644
--- a/tensorflow/core/platform/env.h
+++ b/tensorflow/core/platform/env.h
@@ -167,11 +167,24 @@ class Env {
   Status DeleteFile(const string& fname);
 
   /// \brief Deletes the specified directory and all subdirectories and files
-  /// underneath it. undeleted_files and undeleted_dirs stores the number of
-  /// files and directories that weren't deleted (unspecified if the return
-  /// status is not OK).
+  /// underneath it. This is accomplished by traversing the directory tree
+  /// rooted at dirname and deleting entries as they are encountered.
+  ///
+  /// If dirname itself is not readable or does not exist, *undeleted_dir_count
+  /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status
+  /// (e.g. NOT_FOUND) is returned.
+  ///
+  /// If dirname and all its descendants were successfully deleted, TF_OK is
+  /// returned and both error counters are set to zero.
+  ///
+  /// Otherwise, while traversing the tree, undeleted_file_count and
+  /// undeleted_dir_count are updated if an entry of the corresponding type
+  /// could not be deleted. The returned error status represents the reason that
+  /// any one of these entries could not be deleted.
+  ///
   /// REQUIRES: undeleted_files, undeleted_dirs to be not null.
-  /// Typical return codes
+  ///
+  /// Typical return codes:
   ///  * OK - dirname exists and we were able to delete everything underneath.
   ///  * NOT_FOUND - dirname doesn't exist
   ///  * PERMISSION_DENIED - dirname or some descendant is not writable
diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h
index 156af6cdea..c84a93b1bf 100644
--- a/tensorflow/core/platform/file_system.h
+++ b/tensorflow/core/platform/file_system.h
@@ -167,10 +167,23 @@ class FileSystem {
   virtual Status DeleteDir(const string& dirname) = 0;
 
   /// \brief Deletes the specified directory and all subdirectories and files
-  /// underneath it. undeleted_files and undeleted_dirs stores the number of
-  /// files and directories that weren't deleted (unspecified if the return
-  /// status is not OK).
+  /// underneath it. This is accomplished by traversing the directory tree
+  /// rooted at dirname and deleting entries as they are encountered.
+  ///
+  /// If dirname itself is not readable or does not exist, *undeleted_dir_count
+  /// is set to 1, *undeleted_file_count is set to 0 and an appropriate status
+  /// (e.g. NOT_FOUND) is returned.
+  ///
+  /// If dirname and all its descendants were successfully deleted, TF_OK is
+  /// returned and both error counters are set to zero.
+  ///
+  /// Otherwise, while traversing the tree, undeleted_file_count and
+  /// undeleted_dir_count are updated if an entry of the corresponding type
+  /// could not be deleted. The returned error status represents the reason that
+  /// any one of these entries could not be deleted.
+  ///
   /// REQUIRES: undeleted_files, undeleted_dirs to be not null.
+  ///
   /// Typical return codes:
   ///  * OK - dirname exists and we were able to delete everything underneath.
   ///  * NOT_FOUND - dirname doesn't exist
-- 
GitLab


From 5eaca08c7efdad8cf795b00212d4e3653e0d7466 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 16:42:03 -0800
Subject: [PATCH 171/873] Remove broken test that computed gradients of fused
 batch norm in inference mode.

PiperOrigin-RevId: 224431514
---
 tensorflow/python/ops/parallel_for/control_flow_ops_test.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
index cc20d7ca6a..933bddd8cc 100644
--- a/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
+++ b/tensorflow/python/ops/parallel_for/control_flow_ops_test.py
@@ -927,7 +927,10 @@ class NNTest(PForTest):
               outputs[1] = constant_op.constant(0.)
               outputs[2] = constant_op.constant(0.)
             loss = nn.l2_loss(outputs[0])
-          gradients = g.gradient(loss, [x1, scale, offset])
+          if is_training:
+            gradients = g.gradient(loss, [x1, scale, offset])
+          else:
+            gradients = [constant_op.constant(0.)] * 3
           return outputs + gradients
 
         # pylint: enable=cell-var-from-loop
-- 
GitLab


From ceee617b3675037da8637997f673ab9a993ee5de Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Thu, 6 Dec 2018 16:44:31 -0800
Subject: [PATCH 172/873] Add missing 'void' parameter list for C forward
 declarations.

PiperOrigin-RevId: 224431819
---
 tensorflow/c/c_api.h              | 15 ++++++++-------
 tensorflow/c/c_api_experimental.h |  2 +-
 tensorflow/c/eager/c_api.h        |  2 +-
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index 3d56268110..c7abba8552 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -91,7 +91,7 @@ extern "C" {
 // --------------------------------------------------------------------------
 // TF_Version returns a string describing version information of the
 // TensorFlow library. TensorFlow using semantic versioning.
-TF_CAPI_EXPORT extern const char* TF_Version();
+TF_CAPI_EXPORT extern const char* TF_Version(void);
 
 // --------------------------------------------------------------------------
 // TF_DataType holds the type for a scalar value.  E.g., one slot in a tensor.
@@ -157,7 +157,7 @@ typedef enum TF_Code {
 typedef struct TF_Status TF_Status;
 
 // Return a new status object.
-TF_CAPI_EXPORT extern TF_Status* TF_NewStatus();
+TF_CAPI_EXPORT extern TF_Status* TF_NewStatus(void);
 
 // Delete a previously created status object.
 TF_CAPI_EXPORT extern void TF_DeleteStatus(TF_Status*);
@@ -196,7 +196,7 @@ TF_CAPI_EXPORT extern TF_Buffer* TF_NewBufferFromString(const void* proto,
                                                         size_t proto_len);
 
 // Useful for passing *out* a protobuf.
-TF_CAPI_EXPORT extern TF_Buffer* TF_NewBuffer();
+TF_CAPI_EXPORT extern TF_Buffer* TF_NewBuffer(void);
 
 TF_CAPI_EXPORT extern void TF_DeleteBuffer(TF_Buffer*);
 
@@ -305,7 +305,7 @@ TF_CAPI_EXPORT extern size_t TF_StringEncodedSize(size_t len);
 typedef struct TF_SessionOptions TF_SessionOptions;
 
 // Return a new options object.
-TF_CAPI_EXPORT extern TF_SessionOptions* TF_NewSessionOptions();
+TF_CAPI_EXPORT extern TF_SessionOptions* TF_NewSessionOptions(void);
 
 // Set the target in TF_SessionOptions.options.
 // target can be empty, a single entry, or a comma separated list of entries.
@@ -338,7 +338,7 @@ TF_CAPI_EXPORT extern void TF_DeleteSessionOptions(TF_SessionOptions*);
 typedef struct TF_Graph TF_Graph;
 
 // Return a new graph object.
-TF_CAPI_EXPORT extern TF_Graph* TF_NewGraph();
+TF_CAPI_EXPORT extern TF_Graph* TF_NewGraph(void);
 
 // Destroy an options object.  Graph will be deleted once no more
 // TFSession's are referencing it.
@@ -890,7 +890,8 @@ TF_CAPI_EXPORT extern void TF_GraphVersions(TF_Graph* graph,
 // TF_GraphImportGraphDef.
 typedef struct TF_ImportGraphDefOptions TF_ImportGraphDefOptions;
 
-TF_CAPI_EXPORT extern TF_ImportGraphDefOptions* TF_NewImportGraphDefOptions();
+TF_CAPI_EXPORT extern TF_ImportGraphDefOptions* TF_NewImportGraphDefOptions(
+    void);
 TF_CAPI_EXPORT extern void TF_DeleteImportGraphDefOptions(
     TF_ImportGraphDefOptions* opts);
 
@@ -1611,7 +1612,7 @@ TF_CAPI_EXPORT extern void TF_DeleteLibraryHandle(TF_Library* lib_handle);
 //
 // The data in the buffer will be the serialized OpList proto for ops registered
 // in this address space.
-TF_CAPI_EXPORT extern TF_Buffer* TF_GetAllOpList();
+TF_CAPI_EXPORT extern TF_Buffer* TF_GetAllOpList(void);
 
 // TF_ApiDefMap encapsulates a collection of API definitions for an operation.
 //
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index 80c8bfe594..3e3a485eb7 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -239,7 +239,7 @@ TF_CAPI_EXPORT void TF_InitMain(const char* usage, int* argc, char*** argv);
 
 // Platform-specific implementation to return an unused port. (This should used
 // in tests only.)
-TF_CAPI_EXPORT int TF_PickUnusedPortOrDie();
+TF_CAPI_EXPORT int TF_PickUnusedPortOrDie(void);
 
 // Fast path method that makes constructing a single scalar tensor require less
 // overhead and copies.
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index 8d6c8d958d..f80ae5a6d0 100755
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -48,7 +48,7 @@ extern "C" {
 typedef struct TFE_ContextOptions TFE_ContextOptions;
 
 // Return a new options object.
-TF_CAPI_EXPORT extern TFE_ContextOptions* TFE_NewContextOptions();
+TF_CAPI_EXPORT extern TFE_ContextOptions* TFE_NewContextOptions(void);
 
 // Set the config in TF_ContextOptions.options.
 // config should be a serialized tensorflow.ConfigProto proto.
-- 
GitLab


From d19733b470d6c5737b95fa193e4129255e3e1d83 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 16:49:28 -0800
Subject: [PATCH 173/873] Makes the tensorflow should_use decorator not produce
 any warnings when running inside functions, to prevent extraneous warnings.

PiperOrigin-RevId: 224432616
---
 tensorflow/python/BUILD                        |  2 ++
 tensorflow/python/ops/control_flow_ops_test.py | 13 +++++++++++++
 tensorflow/python/util/tf_should_use.py        |  6 +++++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 035830b911..bc6dc413e7 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3251,6 +3251,7 @@ cuda_py_test(
         ":util",
         ":variable_scope",
         ":variables",
+        "//tensorflow/python/eager:def_function",
     ],
 )
 
@@ -3835,6 +3836,7 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":util",
+        "//tensorflow/python:framework_ops",
         "//tensorflow/python/eager:context",
         "@six_archive//:six",
     ],
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index c020189ad6..b19ec4bd61 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -24,6 +24,7 @@ import numpy as np
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
 from tensorflow.python.client import session
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -1014,6 +1015,18 @@ class AssertTest(test_util.TensorFlowTestCase):
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(c)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testAssertInFunction(self):
+
+    @def_function.function
+    def whiny(value):
+      control_flow_ops.Assert(value, ["Raised false"])
+      return constant_op.constant(5)
+
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(whiny(False))
+
+    self.assertAllEqual(whiny(True), 5)
 
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/util/tf_should_use.py b/tensorflow/python/util/tf_should_use.py
index ca6710bcf2..63de4a7a96 100644
--- a/tensorflow/python/util/tf_should_use.py
+++ b/tensorflow/python/util/tf_should_use.py
@@ -23,6 +23,7 @@ import traceback
 
 import six  # pylint: disable=unused-import
 
+from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging
 from tensorflow.python.util import tf_decorator
 # pylint: enable=g-bad-import-order,g-import-not-at-top
@@ -32,7 +33,8 @@ class _TFShouldUseHelper(object):
   """Object stored in TFShouldUse-wrapped objects.
 
   When it is deleted it will emit a warning or error if its `sate` method
-  has not been called by time of deletion.
+  has not been called by time of deletion, and Tensorflow is not executing
+  eagerly outside of functions.
   """
 
   def __init__(self, type_, repr_, stack_frame, fatal_error_if_unsated):
@@ -50,6 +52,8 @@ class _TFShouldUseHelper(object):
     self._logging_module = None
 
   def __del__(self):
+    if ops.executing_eagerly_outside_functions():
+      return
     if self._sated:
       return
     if self._fatal_error_if_unsated:
-- 
GitLab


From 7e0ae39bad55bbd88b2e8f3d436b8608d49ede94 Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Thu, 6 Dec 2018 16:51:07 -0800
Subject: [PATCH 174/873] Add TPU core detection to TPUClusterResolver's
 num_accelerators_per_worker

PiperOrigin-RevId: 224432865
---
 .../tpu/python/tpu/tpu_system_metadata.py     |   1 +
 .../cluster_resolver/tpu_cluster_resolver.py  |  80 ++++++-
 .../tpu_cluster_resolver_test.py              | 218 +++++++++++++-----
 3 files changed, 231 insertions(+), 68 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py
index ec682e5829..d66ecfcf4a 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py
@@ -52,6 +52,7 @@ def _query_tpu_system_metadata(master_address, cluster_def=None,
   devices = []
   device_dict = collections.defaultdict(list)
 
+  # TODO(b/120564445): Replace with standard library for retries.
   retry_count = 1
   while True:
     logging.info('Querying Tensorflow master (%s) for TPU system metadata.',
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index 99a7a0922f..e907d6fde4 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -18,13 +18,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import os
+import re
 
 from six.moves.urllib.request import Request
 from six.moves.urllib.request import urlopen
 
+from tensorflow.python.client import session
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
 
@@ -41,6 +47,45 @@ _ENDPOINTS_SEPARATOR = ','
 _DEFAULT_ENV_VARIABLE = 'TPU_NAME'
 _DISCOVERY_SERVICE_URL_ENV_VARIABLE = 'TPU_API_DISCOVERY_URL'
 
+_TPU_DEVICE_REGEX = re.compile(
+    r'.*task:(?P<host_id>\d+)/.*device:TPU:(?P<core_id>\d+)$')
+_TPU_CONN_RETRIES = 120
+
+DeviceDetails = collections.namedtuple(
+    'DeviceDetails', ['device_map', 'total_cores'])
+
+
+def _get_device_dict_and_cores(devices):
+  """Returns a dict of hosts to cores and total cores given devices names.
+
+  Returns a namedtuple with two attributes:
+    device_map: A map of host_ids to a list of core_ids.
+    total_cores: The total number of cores within the TPU system.
+
+  Args:
+    devices: A list of devices returned by session.list_devices()
+  """
+  device_map = collections.defaultdict(list)
+  num_cores = 0
+  for device in devices:
+    match = _TPU_DEVICE_REGEX.match(device.name)
+    if match:
+      host_id = match.group('host_id')
+      core_id = match.group('core_id')
+      device_map[host_id].append(core_id)
+      num_cores += 1
+  return DeviceDetails(device_map, num_cores)
+
+
+def _verify_and_return_same_core_count(device_dict):
+  """Verifies that every device in device_dict has the same number of cores."""
+  num_cores_per_host_set = (
+      {len(core_ids) for core_ids in device_dict.values()})
+  if len(num_cores_per_host_set) != 1:
+    raise RuntimeError('TPU cores on each device is not the same. This '
+                       'should never happen. Devices: {}'.format(device_dict))
+  return num_cores_per_host_set.pop()
+
 
 class TPUClusterResolver(ClusterResolver):
   """Cluster Resolver for Google Cloud TPUs.
@@ -394,24 +439,43 @@ class TPUClusterResolver(ClusterResolver):
                        config_proto=None):
     """Returns the number of TPU cores per worker.
 
-    This defaults to 8 for all current TPU configurations, and we do not need
-    to query any remote systems for this.
+    Connects to the master and list all the devices present in the master,
+    and counts them up. Also verifies that the device counts per host in the
+    cluster is the same before returning the number of TPU cores per host.
 
     Args:
       task_type: Unused.
       task_index: Unused.
       accelerator_type: Unused.
-      config_proto: Unused.
+      config_proto: Used to create a connection to a TPU master in order to
+        retrieve the system metadata.
 
     Raises:
       RuntimeError: If this is used with a non-TPU accelerator_type.
     """
-    # Unused. Not necessary to query anything.
-    del task_type, task_index, config_proto
+    retry_count = 1
+    # TODO(b/120564445): Replace with standard library for retries.
+    while True:
+      try:
+        with ops.Graph().as_default():
+          with session.Session(self.master(), config=config_proto) as s:
+            devices = s.list_devices()
+            device_details = _get_device_dict_and_cores(devices)
+            break
+      except errors.DeadlineExceededError:
+        error_message = ('Failed to connect to master. The TPU might not be '
+                         'ready (e.g. still scheduling) or the master '
+                         'address is incorrect: got (%s)' % self.master())
+        if retry_count <= _TPU_CONN_RETRIES:
+          logging.warning(error_message)
+          logging.warning('Retrying (%d/%d)...', retry_count, _TPU_CONN_RETRIES)
+          retry_count += 1
+        else:
+          raise RuntimeError(error_message)
 
-    if accelerator_type != 'TPU':
-      raise ValueError('This Cluster Resolver is only compatible with TPUs.')
-    return 8
+    if device_details.total_cores:
+      return _verify_and_return_same_core_count(device_details.device_map)
+    return 0
 
   @property
   def environment(self):
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
index 0f22ede3d9..27d92608fa 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
@@ -20,7 +20,10 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
+from tensorflow.python.client import session
+from tensorflow.python.distribute import cluster_resolver
+from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
+from tensorflow.python.framework import errors
 from tensorflow.python.platform import test
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
@@ -101,7 +104,8 @@ class TPUClusterResolverTest(test.TestCase):
 
     return mock_client
 
-  @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata',
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_requestComputeMetadata',
                      mock_request_compute_metadata)
   def testRetrieveProjectAndZoneFromMetadata(self):
     tpu_map = {
@@ -112,7 +116,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project=None,
         zone=None,
         tpu=['test-tpu-1'],
@@ -120,7 +124,7 @@ class TPUClusterResolverTest(test.TestCase):
         service=self.mock_service_client(tpu_map=tpu_map),
         coordinator_name='coordinator')
 
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job {
       name: 'coordinator'
@@ -130,11 +134,12 @@ class TPUClusterResolverTest(test.TestCase):
       name: 'worker'
       tasks { key: 0 value: '10.1.2.3:8470' }
     }
-    """ % tpu_cluster_resolver._coordinator_port
+    """ % resolver._coordinator_port
     self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.1.2.3:8470')
+    self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
 
-  @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata',
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_requestComputeMetadata',
                      mock_request_compute_metadata)
   def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self):
     tpu_map = {
@@ -145,7 +150,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project=None,
         zone=None,
         tpu=['test-tpu-1'],
@@ -153,14 +158,15 @@ class TPUClusterResolverTest(test.TestCase):
         credentials=None,
         service=self.mock_service_client(tpu_map=tpu_map))
 
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } }
     """
     self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.1.2.3:8470')
+    self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
 
-  @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata',
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_requestComputeMetadata',
                      mock_request_compute_metadata)
   def testUnhealthyCloudTpu(self):
     tpu_map = {
@@ -171,7 +177,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project=None,
         zone=None,
         tpu='test-tpu-1',
@@ -180,9 +186,10 @@ class TPUClusterResolverTest(test.TestCase):
         service=self.mock_service_client(tpu_map=tpu_map))
 
     with self.assertRaises(RuntimeError):
-      tpu_cluster_resolver.cluster_spec()
+      resolver.cluster_spec()
 
-  @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata',
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_requestComputeMetadata',
                      mock_request_compute_metadata)
   def testNotReadyCloudTpu(self):
     tpu_map = {
@@ -193,7 +200,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project=None,
         zone=None,
         tpu='test-tpu-1',
@@ -202,7 +209,7 @@ class TPUClusterResolverTest(test.TestCase):
         service=self.mock_service_client(tpu_map=tpu_map))
 
     with self.assertRaises(RuntimeError):
-      tpu_cluster_resolver.cluster_spec()
+      resolver.cluster_spec()
 
   def testSimpleSuccessfulRetrieval(self):
     tpu_map = {
@@ -213,7 +220,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project='test-project',
         zone='us-central1-c',
         tpu=['test-tpu-1'],
@@ -222,13 +229,13 @@ class TPUClusterResolverTest(test.TestCase):
         credentials=None,
         service=self.mock_service_client(tpu_map=tpu_map))
 
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } }
     job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } }
     """
     self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.1.2.3:8470')
+    self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
 
   def testNewNetworkEndpointFormat(self):
     tpu_map = {
@@ -241,7 +248,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project='test-project',
         zone='us-central1-c',
         tpu='test-tpu-1',
@@ -250,15 +257,16 @@ class TPUClusterResolverTest(test.TestCase):
         credentials=None,
         service=self.mock_service_client(tpu_map=tpu_map))
 
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } }
     job { name: 'worker' tasks { key: 0 value: '10.2.3.4:8470' } }
     """
     self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
-    self.assertEqual('grpc://10.2.3.4:8470', tpu_cluster_resolver.master())
+    self.assertEqual('grpc://10.2.3.4:8470', resolver.master())
 
-  @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata',
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_requestComputeMetadata',
                      mock_request_compute_metadata)
   def testPodResolution(self):
     tpu_map = {
@@ -286,13 +294,13 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         tpu='test-tpu-1',
         credentials=None,
         service=self.mock_service_client(tpu_map=tpu_map),
         coordinator_name='coordinator')
 
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job {
       name: 'coordinator',
@@ -305,9 +313,9 @@ class TPUClusterResolverTest(test.TestCase):
       tasks { key: 2 value: '10.2.3.6:8470' }
       tasks { key: 3 value: '10.2.3.7:8470' }
     }
-    """ % tpu_cluster_resolver._coordinator_port
+    """ % resolver._coordinator_port
     self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto))
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.2.3.4:8470')
+    self.assertEqual(resolver.master(), 'grpc://10.2.3.4:8470')
 
   def testPodResolutionNoCoordinator(self):
     tpu_map = {
@@ -335,7 +343,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project='test-project',
         zone='us-central1-c',
         tpu='test-tpu-1',
@@ -343,7 +351,7 @@ class TPUClusterResolverTest(test.TestCase):
         credentials=None,
         service=self.mock_service_client(tpu_map=tpu_map))
 
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job {
       name: 'worker'
@@ -354,13 +362,13 @@ class TPUClusterResolverTest(test.TestCase):
     }
     """
     self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto)
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.2.3.4:8470')
+    self.assertEqual(resolver.master(), 'grpc://10.2.3.4:8470')
 
   def testGetMasterNoEntries(self):
     tpu_map = {}
 
     with self.assertRaises(ValueError):
-      TPUClusterResolver(
+      cluster_resolver.TPUClusterResolver(
           project='test-project',
           zone='us-central1-c',
           tpu=[],
@@ -370,14 +378,14 @@ class TPUClusterResolverTest(test.TestCase):
 
   # TODO(saeta): Convert to parameterized test when included in OSS TF.
   def verifyShouldResolve(self, tpu, should_resolve):
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project='test-project',
         zone='us-central1-c',
         tpu=tpu,
         coordinator_name=None,
         credentials=None,
         service=self.mock_service_client(tpu_map={}))
-    self.assertEqual(should_resolve, tpu_cluster_resolver._shouldResolve(),
+    self.assertEqual(should_resolve, resolver._shouldResolve(),
                      "TPU: '%s'" % tpu)
 
   def testShouldResolveNoName(self):
@@ -402,25 +410,26 @@ class TPUClusterResolverTest(test.TestCase):
     self.verifyShouldResolve('grpctpu', True)
 
   def testNoCallComputeMetadata(self):
-    tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar')
+    resolver = cluster_resolver.TPUClusterResolver(
+        tpu='/bns/foo/bar')
     self.assertEqual(
-        compat.as_bytes('/bns/foo/bar'), tpu_cluster_resolver.master())
-    self.assertEqual(None, tpu_cluster_resolver.cluster_spec())
+        compat.as_bytes('/bns/foo/bar'), resolver.master())
+    self.assertEqual(None, resolver.cluster_spec())
 
   def testGkeEnvironmentForDonut(self):
     os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = 'grpc://10.120.27.5:8470'
 
     self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
-    self.assertTrue(TPUClusterResolver._inGke())
+    self.assertTrue(cluster_resolver.TPUClusterResolver._inGke())
     self.assertEqual(
         compat.as_bytes('grpc://10.120.27.5:8470'),
-        compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
+        compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints()))
 
-    tpu_cluster_resolver = TPUClusterResolver()
+    resolver = cluster_resolver.TPUClusterResolver()
     self.assertEqual(
         compat.as_bytes('grpc://10.120.27.5:8470'),
-        compat.as_bytes(tpu_cluster_resolver.master()))
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+        compat.as_bytes(resolver.master()))
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job {
       name: 'worker'
@@ -438,19 +447,19 @@ class TPUClusterResolverTest(test.TestCase):
                                                      'grpc://10.120.27.8:8470')
 
     self.assertIn('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS', os.environ)
-    self.assertTrue(TPUClusterResolver._inGke())
+    self.assertTrue(cluster_resolver.TPUClusterResolver._inGke())
     self.assertEqual(
         compat.as_bytes('grpc://10.120.27.5:8470,'
                         'grpc://10.120.27.6:8470,'
                         'grpc://10.120.27.7:8470,'
                         'grpc://10.120.27.8:8470'),
-        compat.as_bytes(TPUClusterResolver._gkeEndpoints()))
+        compat.as_bytes(cluster_resolver.TPUClusterResolver._gkeEndpoints()))
 
-    tpu_cluster_resolver = TPUClusterResolver()
+    resolver = cluster_resolver.TPUClusterResolver()
     self.assertEqual(
         compat.as_bytes('grpc://10.120.27.5:8470'),
-        compat.as_bytes(tpu_cluster_resolver.master()))
-    actual_cluster_spec = tpu_cluster_resolver.cluster_spec()
+        compat.as_bytes(resolver.master()))
+    actual_cluster_spec = resolver.cluster_spec()
     expected_proto = """
     job {
       name: 'worker'
@@ -467,18 +476,21 @@ class TPUClusterResolverTest(test.TestCase):
   def testEnvironmentDiscoveryUrl(self):
     os.environ['TPU_API_DISCOVERY_URL'] = 'https://{api}.internal/{apiVersion}'
     self.assertEqual('https://{api}.internal/{apiVersion}',
-                     TPUClusterResolver._environmentDiscoveryUrl())
+                     (cluster_resolver.TPUClusterResolver.
+                      _environmentDiscoveryUrl()))
 
   def testEnvironmentAndRpcDetectionForGoogle(self):
-    tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/ab/cd/ef')
-    self.assertEqual(tpu_cluster_resolver.environment, 'google')
-    self.assertEqual(tpu_cluster_resolver.rpc_layer, None)
+    resolver = cluster_resolver.TPUClusterResolver(
+        tpu='/bns/ab/cd/ef')
+    self.assertEqual(resolver.environment, 'google')
+    self.assertEqual(resolver.rpc_layer, None)
 
   def testEnvironmentAndRpcDetectionForGrpcString(self):
-    tpu_cluster_resolver = TPUClusterResolver(tpu='grpc://10.1.2.3:8470')
-    self.assertEqual(tpu_cluster_resolver.environment, '')
-    self.assertEqual(tpu_cluster_resolver.rpc_layer, 'grpc')
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.1.2.3:8470')
+    resolver = cluster_resolver.TPUClusterResolver(
+        tpu='grpc://10.1.2.3:8470')
+    self.assertEqual(resolver.environment, '')
+    self.assertEqual(resolver.rpc_layer, 'grpc')
+    self.assertEqual(resolver.master(), 'grpc://10.1.2.3:8470')
 
   def testOverrideTaskTypeAndIndexAndGetMaster(self):
     tpu_map = {
@@ -506,7 +518,7 @@ class TPUClusterResolverTest(test.TestCase):
         }
     }
 
-    tpu_cluster_resolver = TPUClusterResolver(
+    resolver = cluster_resolver.TPUClusterResolver(
         project='test-project',
         zone='us-central1-c',
         tpu='test-tpu-1',
@@ -514,17 +526,103 @@ class TPUClusterResolverTest(test.TestCase):
         credentials=None,
         service=self.mock_service_client(tpu_map=tpu_map))
 
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.2.3.4:8470')
+    self.assertEqual(resolver.master(), 'grpc://10.2.3.4:8470')
 
-    tpu_cluster_resolver.task_type = 'worker'
-    tpu_cluster_resolver.task_index = 3
-    self.assertEqual(tpu_cluster_resolver.master(), 'grpc://10.2.3.7:8470')
+    resolver.task_type = 'worker'
+    resolver.task_index = 3
+    self.assertEqual(resolver.master(), 'grpc://10.2.3.7:8470')
 
     self.assertEqual(
-        tpu_cluster_resolver.master(
+        resolver.master(
             task_type='worker', task_index=2, rpc_layer='test'),
         'test://10.2.3.6:8470')
 
+  def testGetDeviceDictAndCoresWithTPUs(self):
+    device_names = [
+        '/job:tpu_worker/task:0/device:TPU:0',
+        '/job:tpu_worker/task:1/device:TPU:1',
+        '/job:tpu_worker/task:2/device:TPU:0',
+        '/job:tpu_worker/task:3/device:TPU:1',
+        '/job:tpu_worker/task:0/device:TPU:4',
+        '/job:tpu_worker/task:1/device:TPU:5',
+        '/job:tpu_worker/task:2/device:TPU:4',
+        '/job:tpu_worker/task:3/device:TPU:5',
+    ]
+    device_list = [
+        session._DeviceAttributes(
+            name, 'TPU', 1024, 0) for name in device_names
+    ]
+
+    device_details = tpu_cluster_resolver._get_device_dict_and_cores(
+        device_list)
+    self.assertEqual(device_details.total_cores, 8)
+    self.assertEqual(device_details.device_map,
+                     {'0': ['0', '4'],
+                      '1': ['1', '5'],
+                      '2': ['0', '4'],
+                      '3': ['1', '5']})
+
+  def testGetDeviceDictAndCoresWithCPUsAndGPUs(self):
+    device_names = [
+        '/job:tpu_worker/task:0/device:CPU:0',
+        '/job:tpu_worker/task:1/device:CPU:0',
+        '/job:tpu_worker/task:2/device:CPU:0',
+        '/job:tpu_worker/task:3/device:CPU:0',
+        '/job:tpu_worker/task:0/device:GPU:1',
+        '/job:tpu_worker/task:1/device:GPU:1',
+        '/job:tpu_worker/task:2/device:GPU:1',
+        '/job:tpu_worker/task:3/device:GPU:1',
+    ]
+    device_list = [
+        session._DeviceAttributes(
+            name, 'XLA', 1024, 0) for name in device_names
+    ]
+
+    device_dict, num_cores = tpu_cluster_resolver._get_device_dict_and_cores(
+        device_list)
+    self.assertEqual(num_cores, 0)
+    self.assertEqual(device_dict, {})
+
+  def testVerifySameCoreCount(self):
+    self.assertEqual(
+        tpu_cluster_resolver._verify_and_return_same_core_count(
+            {0: [0, 1, 2, 3, 4, 5, 6, 7]}), 8)
+    self.assertEqual(
+        tpu_cluster_resolver._verify_and_return_same_core_count(
+            {0: [0, 1], 1: [2, 3]}), 2)
+    with self.assertRaises(RuntimeError):
+      tpu_cluster_resolver._verify_and_return_same_core_count(
+          {0: [0], 1: [1, 2]})
+
+  @mock.patch.object(session.BaseSession, 'list_devices')
+  def testNumAcceleratorsSuccess(self, mock_list_devices):
+    device_names = [
+        '/job:tpu_worker/task:0/device:TPU:0',
+        '/job:tpu_worker/task:1/device:TPU:1',
+        '/job:tpu_worker/task:2/device:TPU:0',
+        '/job:tpu_worker/task:3/device:TPU:1',
+        '/job:tpu_worker/task:0/device:TPU:4',
+        '/job:tpu_worker/task:1/device:TPU:5',
+        '/job:tpu_worker/task:2/device:TPU:4',
+        '/job:tpu_worker/task:3/device:TPU:5',
+    ]
+    device_list = [
+        session._DeviceAttributes(
+            name, 'TPU', 1024, 0) for name in device_names
+    ]
+    mock_list_devices.return_value = device_list
+
+    resolver = cluster_resolver.TPUClusterResolver(tpu='')
+    self.assertEqual(resolver.num_accelerators(), 2)
+
+  @mock.patch.object(session.BaseSession, 'list_devices')
+  def testNumAcceleratorsRetryFailure(self, mock_list_devices):
+    resolver = cluster_resolver.TPUClusterResolver(tpu='')
+    mock_list_devices.side_effect = errors.DeadlineExceededError(
+        None, None, 'timeout')
+    with self.assertRaises(RuntimeError):
+      resolver.num_accelerators()
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 59d535a0df17eaf3033bbff73ef4e1e1988c454e Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Thu, 6 Dec 2018 17:03:34 -0800
Subject: [PATCH 175/873] create_ios_frameworks.sh : Add an option for GPU
 support

The GPU library is still working-in-progress.

PiperOrigin-RevId: 224434789
---
 .../lite/lib_package/create_ios_frameworks.sh | 47 +++++++++++++++++--
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/tensorflow/lite/lib_package/create_ios_frameworks.sh b/tensorflow/lite/lib_package/create_ios_frameworks.sh
index 7901655b7c..abf40e7dec 100755
--- a/tensorflow/lite/lib_package/create_ios_frameworks.sh
+++ b/tensorflow/lite/lib_package/create_ios_frameworks.sh
@@ -1,4 +1,4 @@
-#!/bin/bash -x
+#!/bin/bash
 # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,10 +20,41 @@ set -e
 echo "Starting"
 TFLITE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
 
+usage() {
+  echo "Usage: $(basename "$0") [-a]"
+  echo "-g build with GPU delegate"
+  exit 1
+}
+
+USE_GPU_DELEGATE="false"
+FRAMEWORK_NAME="tensorflow_lite"
+while getopts "g" opt_name; do
+  case "$opt_name" in
+    g)
+        USE_GPU_DELEGATE="true"
+        FRAMEWORK_NAME="tensorflow_lite_gpu"
+        ;;
+    *) usage;;
+  esac
+done
+shift $((OPTIND - 1))
+readonly USE_GPU_DELEGATE
+readonly FRAMEWORK_NAME
+
+if [ $USE_GPU_DELEGATE == "true" ] ; then
+  for filename in metal_delegate.h libmetal_delegate.a ; do
+    if [[ ! -f "${TFLITE_DIR}/delegates/gpu/${filename}" ]] ; then
+      echo "File ${TFLITE_DIR}/delegates/gpu/${filename} doesn't exist."
+      echo "It's requried for building TFLite Framework with GPU. Aborting."
+      exit 1
+    fi
+  done
+fi
+
 TMP_DIR=$(mktemp -d)
 echo "Package dir: " $TMP_DIR
 FW_DIR=$TMP_DIR/tensorflow_lite_ios_frameworks
-FW_DIR_TFLITE=$FW_DIR/tensorflow_lite.framework
+FW_DIR_TFLITE=$FW_DIR/$FRAMEWORK_NAME.framework
 FW_DIR_TFLITE_HDRS=$FW_DIR_TFLITE/Headers
 
 echo "Creating target Headers directories"
@@ -58,8 +89,14 @@ cp $TFLITE_DIR/../../bazel-genfiles/tensorflow/tools/lib_package/include/tensorf
    $FW_DIR_TFLITE
 
 echo "Copying static libraries"
+# Note: There must be a static library with the same name
+# as the framework name.
 cp $TFLITE_DIR/tools/make/gen/lib/libtensorflow-lite.a \
-   $FW_DIR_TFLITE/tensorflow_lite
+    $FW_DIR_TFLITE/$FRAMEWORK_NAME
+if [ $USE_GPU_DELEGATE == "true" ] ; then
+  cp "${TFLITE_DIR}/delegates/gpu/libmetal_delegate.a" \
+      $FW_DIR_TFLITE/libmetal_delegate.a
+fi
 
 # This is required, otherwise they interfere with the documentation of the
 # pod at cocoapods.org.
@@ -71,10 +108,10 @@ find . -type f -name readme\* -exec rm -f {} \;
 TARGET_GEN_LOCATION="$TFLITE_DIR/gen/ios_frameworks"
 echo "Moving results to target: " $TARGET_GEN_LOCATION
 cd $FW_DIR
-zip -q -r tensorflow_lite.framework.zip tensorflow_lite.framework -x .DS_Store
+zip -q -r $FRAMEWORK_NAME.framework.zip $FRAMEWORK_NAME.framework -x .DS_Store
 rm -rf $TARGET_GEN_LOCATION
 mkdir -p $TARGET_GEN_LOCATION
-cp -r tensorflow_lite.framework.zip $TARGET_GEN_LOCATION
+cp -r $FRAMEWORK_NAME.framework.zip $TARGET_GEN_LOCATION
 
 echo "Cleaning up"
 rm -rf $TMP_DIR
-- 
GitLab


From fe2799dfb0e92eab722d52dd8b54e8256a290753 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 6 Dec 2018 17:10:55 -0800
Subject: [PATCH 176/873] Internal change.

PiperOrigin-RevId: 224435855
---
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index a6871dd8f1..2820e466f3 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -109,7 +109,7 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
   if [ -z ${PROJECT_NAME} ]; then
     EXTRA_PIP_FLAGS="--nightly_flag"
   else
-    EXTRA_PIP_FLAGS="--project_name=${PROJECT_NAME} --nightly_flag"
+    EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME} --nightly_flag"
   fi
 fi
 
-- 
GitLab


From b397935bae2530e1cf5022ca6f85a0b898a795d6 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Thu, 6 Dec 2018 17:13:19 -0800
Subject: [PATCH 177/873] Moving state created within a DatasetKernel to a
 resource_mgr owned by the IteratorResource instead. This helps us do better
 state management for state that is owned by dataset objects and would allow
 us to separate dataset and iterator kernel creation

PiperOrigin-RevId: 224436180
---
 tensorflow/core/framework/dataset.h           |   7 +
 .../core/kernels/data/cache_dataset_ops.cc    |  49 +++--
 tensorflow/core/kernels/data/iterator_ops.cc  |  12 +-
 .../kernels/data/multi_device_iterator_ops.cc |   5 +
 .../core/kernels/data/shuffle_dataset_op.cc   | 176 ++++++++++++------
 5 files changed, 167 insertions(+), 82 deletions(-)

diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 8dcb5f3076..7d3776a6ec 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -285,6 +285,7 @@ class IteratorContext {
           function_library(ctx->function_library()),
           lib(ctx->lib()),
           function_handle_cache(ctx->function_handle_cache()),
+          resource_mgr(ctx->resource_mgr()),
           model(ctx->model()),
           runner(*(ctx->runner())),
           runner_threadpool_size(ctx->runner_threadpool_size()),
@@ -324,6 +325,10 @@ class IteratorContext {
     // A FunctionHandleCache that owns all the function handles. Not owned.
     FunctionHandleCache* function_handle_cache = nullptr;
 
+    // A resource manager for storing dataset-related state, e.g. random
+    // seeds or cached tensors. Not owned.
+    ResourceMgr* resource_mgr = nullptr;
+
     // If non-null, identifies the object used for performance modeling.
     std::shared_ptr<model::Model> model = nullptr;
 
@@ -363,6 +368,8 @@ class IteratorContext {
     return params_.function_handle_cache;
   }
 
+  ResourceMgr* resource_mgr() { return params_.resource_mgr; }
+
   const std::shared_ptr<model::Model>& model() { return params_.model; }
 
   std::function<void(std::function<void()>)>* runner() {
diff --git a/tensorflow/core/kernels/data/cache_dataset_ops.cc b/tensorflow/core/kernels/data/cache_dataset_ops.cc
index ab85736890..f00b38e732 100644
--- a/tensorflow/core/kernels/data/cache_dataset_ops.cc
+++ b/tensorflow/core/kernels/data/cache_dataset_ops.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
@@ -564,9 +565,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
   class MemoryDataset : public DatasetBase {
    public:
     explicit MemoryDataset(OpKernelContext* ctx, const DatasetBase* input)
-        : DatasetBase(DatasetContext(ctx)),
-          input_(input),
-          cache_(new MemoryCache()) {
+        : DatasetBase(DatasetContext(ctx)), input_(input) {
       input->Ref();
     }
 
@@ -574,8 +573,8 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      return std::unique_ptr<IteratorBase>(new MemoryIterator(
-          {this, strings::StrCat(prefix, "::MemoryCache")}, cache_));
+      return std::unique_ptr<IteratorBase>(
+          new MemoryIterator({this, strings::StrCat(prefix, "::MemoryCache")}));
     }
 
     const DataTypeVector& output_dtypes() const override {
@@ -611,10 +610,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
     // The expected use is that a single `MemoryWriterIterator` populates the
     // cache with dataset elements. Once all elements are cached, the cache can
     // be used by one or more `MemoryReaderIterator`s.
-    class MemoryCache {
+    class MemoryCache : public ResourceBase {
      public:
       MemoryCache() = default;
 
+      string DebugString() override { return "CacheDataset::MemoryCache"; }
+
       // Marks the cache as completed.
       void Complete() {
         mutex_lock l(mu_);
@@ -681,15 +682,25 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
 
     class MemoryIterator : public DatasetIterator<MemoryDataset> {
      public:
-      explicit MemoryIterator(const Params& params,
-                              const std::shared_ptr<MemoryCache>& cache)
-          : DatasetIterator<MemoryDataset>(params), cache_(cache) {
-        mode_ = cache->MaybeClaim() ? Mode::write : Mode::read;
-        InitializeIterator();
-      }
+      explicit MemoryIterator(const Params& params)
+          : DatasetIterator<MemoryDataset>(params) {}
+
+      ~MemoryIterator() override { cache_->Unref(); }
 
       Status Initialize(IteratorContext* ctx) override {
         mutex_lock l(mu_);
+        // Use the resource manager in the iterator context to get / create
+        // a cache.
+        ResourceMgr* mgr = ctx->resource_mgr();
+        const string name =
+            strings::StrCat(prefix(), "::", dataset()->name(), "::MemoryCache");
+        TF_RETURN_IF_ERROR(mgr->LookupOrCreate<MemoryCache>(
+            "tf_data", name, &cache_, [](MemoryCache** cache) {
+              *cache = new MemoryCache();
+              return Status::OK();
+            }));
+        mode_ = cache_->MaybeClaim() ? Mode::write : Mode::read;
+        InitializeIterator();
         if (mode_ == Mode::read && !cache_->IsCompleted()) {
           return errors::Internal(
               "Cache should only be read after it has been completed.");
@@ -788,8 +799,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
      private:
       class MemoryWriterIterator : public DatasetIterator<MemoryDataset> {
        public:
-        explicit MemoryWriterIterator(const Params& params,
-                                      const std::shared_ptr<MemoryCache>& cache)
+        explicit MemoryWriterIterator(const Params& params, MemoryCache* cache)
             : DatasetIterator<MemoryDataset>(params), cache_(cache) {
           CHECK(cache_);
         }
@@ -848,13 +858,12 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
        private:
         mutex mu_;
         std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
-        std::shared_ptr<MemoryCache> cache_;
+        MemoryCache* const cache_ GUARDED_BY(mu_);  // not owned.
       };  // MemoryWriterIterator
 
       class MemoryReaderIterator : public DatasetIterator<MemoryDataset> {
        public:
-        explicit MemoryReaderIterator(const Params& params,
-                                      const std::shared_ptr<MemoryCache>& cache)
+        explicit MemoryReaderIterator(const Params& params, MemoryCache* cache)
             : DatasetIterator<MemoryDataset>(params), cache_(cache), index_(0) {
           CHECK(cache);
         }
@@ -865,6 +874,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
           // thus we record the memory allocated for the cache here. The caveat
           // is that this is incorrect if there are concurrent instances of this
           // iterator.
+          tf_shared_lock l(mu_);
           for (size_t i = 0; i < cache_->size(); ++i) {
             RecordBufferEnqueue(ctx, cache_->at(i));
           }
@@ -914,7 +924,7 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
 
        private:
         mutex mu_;
-        const std::shared_ptr<MemoryCache> cache_;
+        MemoryCache* const cache_ GUARDED_BY(mu_);  // not owned.
         size_t index_ GUARDED_BY(mu_);
       };  // MemoryReaderIterator
 
@@ -931,14 +941,13 @@ class CacheDatasetOp : public UnaryDatasetOpKernel {
       }
 
       mutex mu_;
-      std::shared_ptr<MemoryCache> cache_;
+      MemoryCache* cache_ GUARDED_BY(mu_);  // not owned.
       enum Mode { read, write };
       Mode mode_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> iterator_ GUARDED_BY(mu_);
     };  // MemoryIterator
 
     const DatasetBase* const input_;
-    const std::shared_ptr<MemoryCache> cache_;
   };  // MemoryDataset
 };    // CacheDatasetOp
 
diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index cb7477f9e2..d5b4bfa5c5 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -77,6 +77,7 @@ class IteratorResource : public ResourceBase {
       params.lib = captured_state->lib;
       params.function_handle_cache =
           captured_state->function_handle_cache.get();
+      params.resource_mgr = &captured_state->resource_mgr;
       return captured_state->iterator->GetNext(
           IteratorContext(std::move(params)), out_tensors, end_of_sequence);
     } else {
@@ -135,8 +136,8 @@ class IteratorResource : public ResourceBase {
     std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(nullptr);
     TF_RETURN_IF_ERROR(ctx->function_library()->Clone(&flib_def, &pflr, &lib));
     TF_RETURN_IF_ERROR(flib_def->AddLibrary(graph_def.library()));
-    std::unique_ptr<State> new_state(
-        new State(std::move(flib_def), std::move(pflr), lib, nullptr));
+    std::unique_ptr<State> new_state(new State(
+        std::move(flib_def), std::move(pflr), lib, nullptr /* iterator */));
 
     TF_RETURN_IF_ERROR(
         graph_runner.Run(&graph, new_state->lib, {}, {output_node}, &outputs));
@@ -145,6 +146,7 @@ class IteratorResource : public ResourceBase {
     IteratorContext::Params params(ctx);
     params.lib = new_state->lib;
     params.function_handle_cache = new_state->function_handle_cache.get();
+    params.resource_mgr = &new_state->resource_mgr;
     TF_RETURN_IF_ERROR(dataset->MakeIterator(IteratorContext(std::move(params)),
                                              "Iterator", &new_state->iterator));
     TF_RETURN_IF_ERROR(
@@ -156,6 +158,7 @@ class IteratorResource : public ResourceBase {
       IteratorContext::Params params(ctx);
       params.lib = new_state->lib;
       params.function_handle_cache = new_state->function_handle_cache.get();
+      params.resource_mgr = &new_state->resource_mgr;
       DeviceBase* device = new_state->lib->device();
       params.allocator_getter = [device](AllocatorAttributes attrs) {
         return device->GetAllocator(attrs);
@@ -180,7 +183,8 @@ class IteratorResource : public ResourceBase {
       tf_shared_lock l(mu_);
       new_state.reset(new State(iterator_state_->flib_def,
                                 iterator_state_->pflr, iterator_state_->lib,
-                                nullptr, nullptr));
+                                nullptr /* function_handle_cache */,
+                                nullptr /* iterator */));
     }
 
     // Ensure that the iterator has access to all functions in the current
@@ -212,6 +216,7 @@ class IteratorResource : public ResourceBase {
     IteratorContext::Params params(ctx);
     params.lib = new_state->lib;
     params.function_handle_cache = new_state->function_handle_cache.get();
+    params.resource_mgr = &new_state->resource_mgr;
     TF_RETURN_IF_ERROR(dataset->MakeIterator(IteratorContext(std::move(params)),
                                              "Iterator", &iterator));
     TF_RETURN_IF_ERROR(
@@ -259,6 +264,7 @@ class IteratorResource : public ResourceBase {
     std::shared_ptr<ProcessFunctionLibraryRuntime> pflr;
     FunctionLibraryRuntime* lib = nullptr;  // not owned.
     std::unique_ptr<FunctionHandleCache> function_handle_cache;
+    ResourceMgr resource_mgr;
     std::unique_ptr<IteratorBase> iterator;
   };
 
diff --git a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
index a070456414..ba2125a66e 100644
--- a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
+++ b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc
@@ -98,6 +98,7 @@ class MultiDeviceIterator : public ResourceBase {
       IteratorContext::Params params(ctx);
       params.lib = lib_;
       params.function_handle_cache = function_handle_cache_.get();
+      params.resource_mgr = &resource_mgr_;
       IteratorContext iter_ctx(std::move(params));
       tf_shared_lock l(mu_);
       multi_device_buffer_->GetNextFromShard(
@@ -125,6 +126,8 @@ class MultiDeviceIterator : public ResourceBase {
     return function_handle_cache_.get();
   }
 
+  ResourceMgr* resource_mgr() { return &resource_mgr_; }
+
  private:
   // A private class that uses a background thread to keep a per device buffer
   // full.
@@ -350,6 +353,7 @@ class MultiDeviceIterator : public ResourceBase {
   const std::unique_ptr<ProcessFunctionLibraryRuntime> pflr_;
   FunctionLibraryRuntime* const lib_ = nullptr;  // not owned.
   const std::unique_ptr<FunctionHandleCache> function_handle_cache_;
+  ResourceMgr resource_mgr_;
   std::shared_ptr<const FunctionLibraryDefinition> lib_def_ GUARDED_BY(mu_);
 
   int64 incarnation_id_ GUARDED_BY(mu_) = 0;
@@ -477,6 +481,7 @@ class MultiDeviceIteratorInitOp : public OpKernel {
     IteratorContext::Params params(ctx);
     params.lib = resource->lib();
     params.function_handle_cache = resource->function_handle_cache();
+    params.resource_mgr = resource->resource_mgr();
     IteratorContext iter_ctx(std::move(params));
     OP_REQUIRES_OK(
         ctx, dataset->MakeIterator(std::move(iter_ctx), "Iterator", &iterator));
diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 3cf005dbda..7134793e26 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/dataset.h"
 #include "tensorflow/core/framework/partial_tensor_shape.h"
+#include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/random/philox_random.h"
 #include "tensorflow/core/lib/random/random.h"
@@ -70,9 +71,9 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
       explicit Iterator(const typename DatasetIterator<T>::Params& params,
                         int64 seed, int64 seed2)
           : DatasetIterator<T>(params),
-            input_impl_(nullptr),
             seed_(seed),
             seed2_(seed2),
+            input_impl_(nullptr),
             epoch_(0),
             num_elements_(0),
             parent_generator_(seed, seed2),
@@ -174,6 +175,14 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
                                          /*ratio=*/1);
       }
 
+      void ResetRngs() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+        // Reset the generators based on the current iterator seeds.
+        parent_generator_ = random::PhiloxRandom(seed_, seed2_);
+        generator_ = random::SingleSampleAdapter<random::PhiloxRandom>(
+            &parent_generator_);
+        generator_.Skip(num_random_samples_);
+      }
+
       Status SaveInternal(IteratorStateWriter* writer) override {
         mutex_lock l(mu_);
         // Save state needed to restore the random number generators.
@@ -281,6 +290,10 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
         return Status::OK();
       }
 
+      mutex mu_;
+      int64 seed_ GUARDED_BY(mu_);
+      int64 seed2_ GUARDED_BY(mu_);
+
      private:
       // Used to represent slices of `buffer_` that belong to different epochs.
       // The invariant maintained by the implementation is: `start` <= `end`.
@@ -301,19 +314,8 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
         return out;
       }
 
-      void ResetRngs() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-        // Reset the generators based on the current iterator seeds.
-        parent_generator_ = random::PhiloxRandom(seed_, seed2_);
-        generator_ = random::SingleSampleAdapter<random::PhiloxRandom>(
-            &parent_generator_);
-        generator_.Skip(num_random_samples_);
-      }
-
-      mutex mu_;
       std::unique_ptr<std::vector<Tensor>[]> buffer_ GUARDED_BY(mu_);
       std::unique_ptr<IteratorBase> input_impl_ GUARDED_BY(mu_);
-      int64 seed_ GUARDED_BY(mu_);
-      int64 seed2_ GUARDED_BY(mu_);
       int64 epoch_ GUARDED_BY(mu_);
       int64 num_elements_ GUARDED_BY(mu_);
       std::deque<std::unique_ptr<Slice>> slices_ GUARDED_BY(mu_);
@@ -370,7 +372,7 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
   }
 
  private:
-  // A dataset that uses a pseduorandom sequence of seeds for the iterators
+  // A dataset that uses a pseudorandom sequence of seeds for the iterators
   // created from it. Used when `reshuffle_each_iteration` is true.
   class ReshufflingDataset : public ShuffleDatasetBase {
    public:
@@ -378,37 +380,114 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
                        int64 buffer_size, int64 seed, int64 seed2, int64 count)
         : ShuffleDatasetBase(ctx, input, buffer_size, count),
           seed_(seed),
-          seed2_(seed2),
-          parent_generator_(seed, seed2),
-          generator_(&parent_generator_) {}
+          seed2_(seed2) {}
 
     string DebugString() const override {
-      mutex_lock l(mu_);
       return strings::StrCat("ShuffleDatasetOp(", buffer_size_, ", ", seed_,
                              ", ", seed2_, ")::ReshufflingDataset");
     }
 
     std::unique_ptr<IteratorBase> MakeIteratorInternal(
         const string& prefix) const override {
-      int64 iterator_seed;
-      int64 iterator_seed2;
-      {
-        mutex_lock l(mu_);
-        iterator_seed = Random();
-        iterator_seed2 = Random();
-      }
-      return std::unique_ptr<IteratorBase>(
-          new Iterator({this, strings::StrCat(prefix, "::Shuffle")},
-                       iterator_seed, iterator_seed2));
+      return std::unique_ptr<IteratorBase>(new Iterator(
+          {this, strings::StrCat(prefix, "::Shuffle")}, seed_, seed2_));
     }
 
    protected:
+    class RandomSeedGenerator : public ResourceBase {
+     public:
+      RandomSeedGenerator(int64 seed, int64 seed2)
+          : seed_(seed),
+            seed2_(seed2),
+            parent_generator_(seed, seed2),
+            generator_(&parent_generator_) {}
+
+      string DebugString() override {
+        return "ReshufflingDataset::RandomSeedGenerator";
+      }
+
+      void GenerateRandomSeeds(int64* seed1, int64* seed2) {
+        mutex_lock l(mu_);
+        num_random_samples_++;
+        *seed1 = generator_();
+        num_random_samples_++;
+        *seed2 = generator_();
+      }
+
+      int64 num_random_samples() {
+        tf_shared_lock l(mu_);
+        return num_random_samples_;
+      }
+
+      void set_num_random_samples(int64 num_random_samples) {
+        mutex_lock l(mu_);
+        num_random_samples_ = num_random_samples;
+      }
+
+      void Reset() {
+        mutex_lock l(mu_);
+        // Reset the generators based on the current seeds.
+        parent_generator_ = random::PhiloxRandom(seed_, seed2_);
+        generator_ = random::SingleSampleAdapter<random::PhiloxRandom>(
+            &parent_generator_);
+        generator_.Skip(num_random_samples_);
+      }
+
+     private:
+      const int64 seed_;
+      const int64 seed2_;
+      mutex mu_;
+      random::PhiloxRandom parent_generator_ GUARDED_BY(mu_);
+      random::SingleSampleAdapter<random::PhiloxRandom> generator_
+          GUARDED_BY(mu_);
+      int64 num_random_samples_ GUARDED_BY(mu_) = 0;
+    };
+
     class Iterator : public ShuffleDatasetBase::Iterator<ReshufflingDataset> {
      public:
       explicit Iterator(const Params& params, int64 seed, int64 seed2)
           : ShuffleDatasetBase::Iterator<ReshufflingDataset>(params, seed,
                                                              seed2) {}
 
+      ~Iterator() override { seed_generator_->Unref(); }
+
+      Status Initialize(IteratorContext* ctx) override {
+        // Firstly, lookup or create a seed generator from the IteratorResource
+        // resource_mgr.
+        ResourceMgr* mgr = ctx->resource_mgr();
+        RandomSeedGenerator* seed_generator;
+        const string name = strings::StrCat(prefix(), "::", dataset()->name(),
+                                            "::RandomSeedGenerator");
+
+        int64 dataset_seed, dataset_seed2;
+        {
+          tf_shared_lock l(mu_);
+          // Ideally we'd like to hold this lock in the LookupOrCreate method,
+          // but that trips up our Deadlock detection code.
+          dataset_seed = seed_;
+          dataset_seed2 = seed2_;
+        }
+        TF_RETURN_IF_ERROR(mgr->LookupOrCreate<RandomSeedGenerator>(
+            "tf_data", name, &seed_generator,
+            [dataset_seed,
+             dataset_seed2](RandomSeedGenerator** seed_generator) {
+              // On the first iterator creation, use the original seeds from the
+              // dataset to seed a `RandomSeedGenerator` that will provide seeds
+              // for subsequent repetitions of the same dataset.
+              *seed_generator =
+                  new RandomSeedGenerator(dataset_seed, dataset_seed2);
+              return Status::OK();
+            }));
+        // Now use the seed generator to update the base class Iterator seeds
+        // and random number generator with generated seeds for the current
+        // repetition.
+        mutex_lock l(mu_);
+        seed_generator->GenerateRandomSeeds(&seed_, &seed2_);
+        ResetRngs();
+        seed_generator_ = seed_generator;
+        return Status::OK();
+      }
+
      protected:
       std::shared_ptr<model::Node> CreateNode(
           IteratorContext* ctx, model::Node::Args args) const override {
@@ -417,12 +496,10 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
       }
 
       Status SaveInternal(IteratorStateWriter* writer) override {
-        mutex_lock l(dataset()->mu_);
-
         // Save RNG state of Dataset.
         TF_RETURN_IF_ERROR(
             writer->WriteScalar(full_name("ds_num_random_samples"),
-                                dataset()->num_random_samples_));
+                                seed_generator_->num_random_samples()));
 
         // Save the Iterator.
         return ShuffleDatasetBase::Iterator<ReshufflingDataset>::SaveInternal(
@@ -431,24 +508,25 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
 
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
-        mutex_lock l(dataset()->mu_);
-
         // Restore RNG state of Dataset.
-        TF_RETURN_IF_ERROR(
-            reader->ReadScalar(full_name("ds_num_random_samples"),
-                               &dataset()->num_random_samples_));
-        dataset()->ResetRngs();
+        int64 num_random_samples;
+        TF_RETURN_IF_ERROR(reader->ReadScalar(
+            full_name("ds_num_random_samples"), &num_random_samples));
+        seed_generator_->set_num_random_samples(num_random_samples);
+        seed_generator_->Reset();
 
         // Restore the Iterator.
         return ShuffleDatasetBase::Iterator<
             ReshufflingDataset>::RestoreInternal(ctx, reader);
       }
+
+     private:
+      RandomSeedGenerator* seed_generator_;
     };
 
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      mutex_lock l(mu_);
       Node* input_graph_node = nullptr;
       TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
       Node* buffer_size = nullptr;
@@ -469,28 +547,8 @@ class ShuffleDatasetOp : public ShuffleDatasetOpBase {
     }
 
    private:
-    random::SingleSampleAdapter<random::PhiloxRandom>::ResultType Random() const
-        EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      num_random_samples_++;
-      auto out = generator_();
-      return out;
-    }
-
-    void ResetRngs() const EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-      // Reset the generators based on the current seeds.
-      parent_generator_ = random::PhiloxRandom(seed_, seed2_);
-      generator_ =
-          random::SingleSampleAdapter<random::PhiloxRandom>(&parent_generator_);
-      generator_.Skip(num_random_samples_);
-    }
-
-    mutable int64 seed_ GUARDED_BY(mu_);
-    mutable int64 seed2_ GUARDED_BY(mu_);
-    mutable mutex mu_;
-    mutable random::PhiloxRandom parent_generator_ GUARDED_BY(mu_);
-    mutable random::SingleSampleAdapter<random::PhiloxRandom> generator_
-        GUARDED_BY(mu_);
-    mutable int64 num_random_samples_ GUARDED_BY(mu_) = 0;
+    const int64 seed_;
+    const int64 seed2_;
   };
 
   // A dataset that uses the same fixed seed for all iterators created from it.
-- 
GitLab


From 983b7403151f07740fa81ef9a58b7f3768e9431f Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Thu, 6 Dec 2018 17:14:35 -0800
Subject: [PATCH 178/873] Adding a whitelited_stateful_ops argument to the old
 Defun in which stateful ops belonging to this whitelist are recreated instead
 of being re-used when captured by value. This is useful for dataset
 make_one_shot_iterator which captures all inputs by value.

PiperOrigin-RevId: 224436333
---
 tensorflow/python/framework/function.py      | 40 +++++++++++++++-----
 tensorflow/python/framework/function_test.py | 22 +++++++++++
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 622686ce00..cfdc915a1b 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -209,6 +209,7 @@ class _DefinedFunction(object):
                out_names=None,
                shape_func=None,
                capture_by_value=False,
+               whitelisted_stateful_ops=None,
                **kwargs):
     """Creates _DefinedFunction.
 
@@ -229,6 +230,8 @@ class _DefinedFunction(object):
         output shapes.
       capture_by_value: Boolean (defaults to False). If True, captured values
         will be copied into the function body.
+      whitelisted_stateful_ops: A set of ops that if stateful we ignore and
+        copy into the function body, when `capture_by_value` is True.
       **kwargs: The keyword arguments. **kwargs is passed to every call
         site of this function.
 
@@ -244,6 +247,9 @@ class _DefinedFunction(object):
     self._out_names = out_names
     self._shape_func = shape_func
     self._capture_by_value = capture_by_value
+    self._whitelisted_stateful_ops = whitelisted_stateful_ops
+    if self._whitelisted_stateful_ops is None:
+      self._whitelisted_stateful_ops = set()
     self._extra_kwargs = kwargs
     # Constructed only when C API is disabled, lazily
     self._definition = None
@@ -340,8 +346,13 @@ class _DefinedFunction(object):
       return
 
     temp_graph = func_graph_from_py_func(
-        self._func, self._arg_names, self._arg_types, self._func_name,
-        self._capture_by_value, self._caller_device)
+        self._func,
+        self._arg_names,
+        self._arg_types,
+        self._func_name,
+        self._capture_by_value,
+        self._caller_device,
+        whitelisted_stateful_ops=self._whitelisted_stateful_ops)
 
     self._extra_inputs = temp_graph.extra_inputs
     # pylint: disable=protected-access
@@ -625,9 +636,11 @@ class _FuncGraph(ops.Graph):
   function argument and the caller passes in the captured tensor.
   """
 
-  def __init__(self, name, capture_by_value, *args, **kwargs):
+  def __init__(self, name, capture_by_value, whitelisted_stateful_ops, *args,
+               **kwargs):
     super(_FuncGraph, self).__init__(*args, **kwargs)
     self._capture_by_value = capture_by_value
+    self._whitelisted_stateful_ops = whitelisted_stateful_ops
     self._building_function = True
     self._outer_graph = ops.get_default_graph()
     self._vscope = vs.get_variable_scope()
@@ -785,7 +798,7 @@ class _FuncGraph(ops.Graph):
     # pylint: disable=protected-access
     op_def = graph_to_function_def._get_op_def(op)
     # pylint: enable=protected-access
-    if op_def.is_stateful:
+    if op_def.is_stateful and op not in self._whitelisted_stateful_ops:
       raise ValueError("Cannot capture a stateful node (name:%s, type:%s) "
                        "by value." % (op.name, op.type))
     elif op.type in ("Placeholder", "PlaceholderV2"):
@@ -807,10 +820,17 @@ class _FuncGraph(ops.Graph):
     return captured_op
 
 
-def func_graph_from_py_func(func, arg_names, arg_types, name=None,
-                            capture_by_value=False, device=None,
-                            colocation_stack=None, container=None,
-                            collections_ref=None, arg_shapes=None):
+def func_graph_from_py_func(func,
+                            arg_names,
+                            arg_types,
+                            name=None,
+                            capture_by_value=False,
+                            device=None,
+                            colocation_stack=None,
+                            container=None,
+                            collections_ref=None,
+                            arg_shapes=None,
+                            whitelisted_stateful_ops=None):
   """Returns a _FuncGraph generated from `func`.
 
   Args:
@@ -828,6 +848,8 @@ def func_graph_from_py_func(func, arg_names, arg_types, name=None,
     collections_ref: A reference to a collections dict the _FuncGraph should
       use internally.
     arg_shapes: A sequence of the function's argument shapes.
+    whitelisted_stateful_ops: A set of ops that if stateful we ignore and
+      re-create.
 
   Returns:
     A _FuncGraph.
@@ -837,7 +859,7 @@ def func_graph_from_py_func(func, arg_names, arg_types, name=None,
   """
   if not name:
     name = function_utils.get_func_name(func)
-  func_graph = _FuncGraph(name, capture_by_value)
+  func_graph = _FuncGraph(name, capture_by_value, whitelisted_stateful_ops)
 
   with func_graph.as_default(), ops.device(device):
     # pylint: disable=protected-access
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index d71f06ea52..6ec71ba8e9 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -1054,6 +1054,28 @@ class FunctionTest(test.TestCase):
         self.assertFalse(all(val3 == val1))
         self.assertFalse(all(val4 == val2))
 
+  def testStatefulFunctionWithWhitelisting(self):
+    t = random_ops.random_uniform([100], maxval=10, dtype=dtypes.int32)
+
+    @function.Defun(capture_by_value=True)
+    def StatefulFn():
+      return t + constant_op.constant(3, dtype=dtypes.int32)
+
+    # First time we try to capture a stateful RandomUniform op.
+    with self.assertRaisesRegexp(ValueError, "Cannot capture a stateful node"):
+      res = StatefulFn()
+
+    # This time we whitelist this op, so that its recreated.
+    @function.Defun(capture_by_value=True, whitelisted_stateful_ops=set([t.op]))
+    def StatefulFn2():
+      return t + constant_op.constant(3, dtype=dtypes.int32)
+
+    res = StatefulFn2()
+    with session.Session() as sess:
+      r = sess.run(res)
+      for i in r:
+        self.assertGreaterEqual(i, 3)
+
   @test_util.run_deprecated_v1
   def testSameFunctionOnTwoDevices(self):
 
-- 
GitLab


From 9e4dca5db91d9ac587514cd4cf19f334130f9837 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Thu, 6 Dec 2018 17:27:18 -0800
Subject: [PATCH 179/873] Make SelfAdjointEigGradTest use gradient_checker_v2

PiperOrigin-RevId: 224437857
---
 .../kernel_tests/self_adjoint_eig_op_test.py  | 60 +++++++++----------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
index 42577f7e42..323d14b7d8 100644
--- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
+++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
@@ -23,7 +23,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
@@ -185,53 +185,51 @@ def _GetSelfAdjointEigGradTest(dtype_, shape_, compute_v_):
     n = shape_[-1]
     batch_shape = shape_[:-2]
     np_dtype = dtype_.as_numpy_dtype
-    a = np.random.uniform(
-        low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
-    if dtype_.is_complex:
-      a += 1j * np.random.uniform(
+
+    def RandomInput():
+      a = np.random.uniform(
           low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
-    a += np.conj(a.T)
-    a = np.tile(a, batch_shape + (1, 1))
+      if dtype_.is_complex:
+        a += 1j * np.random.uniform(
+            low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
+      a += np.conj(a.T)
+      a = np.tile(a, batch_shape + (1, 1))
+      return a
+
     # Optimal stepsize for central difference is O(epsilon^{1/3}).
     epsilon = np.finfo(np_dtype).eps
     delta = 0.1 * epsilon**(1.0 / 3.0)
     # tolerance obtained by looking at actual differences using
     # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
+    # after discarding one random input sample
+    _ = RandomInput()
     if dtype_ in (dtypes_lib.float32, dtypes_lib.complex64):
       tol = 1e-2
     else:
       tol = 1e-7
     with self.session(use_gpu=True):
-      tf_a = constant_op.constant(a)
-      if compute_v_:
-        tf_e, tf_v = linalg_ops.self_adjoint_eig(tf_a)
+      def Compute(x):
+        e, v = linalg_ops.self_adjoint_eig(x)
         # (complex) Eigenvectors are only unique up to an arbitrary phase
         # We normalize the vectors such that the first component has phase 0.
-        top_rows = tf_v[..., 0:1, :]
-        if tf_a.dtype.is_complex:
+        top_rows = v[..., 0:1, :]
+        if dtype_.is_complex:
           angle = -math_ops.angle(top_rows)
           phase = math_ops.complex(math_ops.cos(angle), math_ops.sin(angle))
         else:
           phase = math_ops.sign(top_rows)
-        tf_v *= phase
-        outputs = [tf_e, tf_v]
+        v *= phase
+        return e, v
+
+      if compute_v_:
+        funcs = [lambda x: Compute(x)[0], lambda x: Compute(x)[1]]
       else:
-        tf_e = linalg_ops.self_adjoint_eigvals(tf_a)
-        outputs = [tf_e]
-      for b in outputs:
-        x_init = np.random.uniform(
-            low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
-        if dtype_.is_complex:
-          x_init += 1j * np.random.uniform(
-              low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
-        x_init += np.conj(x_init.T)
-        x_init = np.tile(x_init, batch_shape + (1, 1))
-        theoretical, numerical = gradient_checker.compute_gradient(
-            tf_a,
-            tf_a.get_shape().as_list(),
-            b,
-            b.get_shape().as_list(),
-            x_init_value=x_init,
+        funcs = [linalg_ops.self_adjoint_eigvals]
+
+      for f in funcs:
+        theoretical, numerical = gradient_checker_v2.compute_gradient(
+            f,
+            [RandomInput()],
             delta=delta)
         self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
 
@@ -245,7 +243,7 @@ if __name__ == "__main__":
       for size in 1, 2, 5, 10:
         for batch_dims in [(), (3,)] + [(3, 2)] * (max(size, size) < 10):
           shape = batch_dims + (size, size)
-          name = "%s_%s_%s" % (dtype, "_".join(map(str, shape)), compute_v)
+          name = "%s_%s_%s" % (dtype.name, "_".join(map(str, shape)), compute_v)
           _AddTest(SelfAdjointEigTest, "SelfAdjointEig", name,
                    _GetSelfAdjointEigTest(dtype, shape, compute_v))
           _AddTest(SelfAdjointEigGradTest, "SelfAdjointEigGrad", name,
-- 
GitLab


From 24cd8686127694c5fadc1abc3e5183c7f8e2bb87 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Thu, 6 Dec 2018 18:02:42 -0800
Subject: [PATCH 180/873] Fixed the metrics for TPUStrategy with Keras. And
 added a test for that.

PiperOrigin-RevId: 224441940
---
 .../contrib/distribute/python/keras_test.py      | 16 ++++++++++++----
 .../python/keras/engine/training_distributed.py  | 14 ++++++++++----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 796c4ed9f6..771c48bdd8 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -165,7 +165,9 @@ def get_multi_inputs_multi_outputs_data():
   return (train_data, test_data)
 
 
-def batch_wrapper(dataset, batch_size, distribution):
+def batch_wrapper(dataset, batch_size, distribution, repeat=None):
+  if repeat:
+    dataset = dataset.repeat(repeat)
   # TPUs currently require fully defined input shapes, drop_remainder ensures
   # the input will have fully defined shapes.
   if isinstance(distribution, tpu_strategy.TPUStrategy):
@@ -216,6 +218,7 @@ def get_correctness_test_inputs(use_numpy, use_validation_data,
                                 with_distribution,
                                 x_train, y_train, x_predict):
   """Generates the inputs for correctness check when enable Keras with DS."""
+  training_epochs = 2
   global_batch_size = 64
   batch_size = global_batch_size
   # TODO(b/118776054): Use global batch size for Keras/DS support.
@@ -231,7 +234,7 @@ def get_correctness_test_inputs(use_numpy, use_validation_data,
         'batch_size': batch_size,
         'x': x_train,
         'y': y_train,
-        'epochs': 1,
+        'epochs': training_epochs,
         'shuffle': False,
     }
 
@@ -252,13 +255,14 @@ def get_correctness_test_inputs(use_numpy, use_validation_data,
     # keras.fit/evaluate/predict. The batch size is part of the dataset.
     train_dataset = dataset_ops.Dataset.from_tensor_slices(
         (x_train, y_train))
-    x = batch_wrapper(train_dataset, batch_size, with_distribution)
+    x = batch_wrapper(
+        train_dataset, batch_size, with_distribution, repeat=training_epochs)
 
     training_inputs = {
         'batch_size': None,
         'x': x,
         'y': None,
-        'epochs': 1,
+        'epochs': training_epochs,
         'shuffle': False,
         'steps_per_epoch': len(x_train) // global_batch_size,
     }
@@ -1301,11 +1305,14 @@ class TestDistributionStrategyCorrectness(test.TestCase,
 
     with self.cached_session():
       tolerance = 1e-5
+      metrics = ['mse']
 
       if isinstance(distribution, (mirrored_strategy.MirroredStrategy,
                                    mirrored_strategy.CoreMirroredStrategy)):
         # TODO(b/119257215): use the default one once the flakyness is fixed.
         tolerance = 1e-4
+        # TODO(b/120570676): Enable metrics check once the bug is fixed.
+        metrics = None
 
       keras.backend.set_image_data_format('channels_last')
       np.random.seed(_RANDOM_SEED)
@@ -1346,6 +1353,7 @@ class TestDistributionStrategyCorrectness(test.TestCase,
         model.compile(
             loss=keras.losses.mean_squared_error,
             optimizer=gradient_descent_keras.SGD(0.5),
+            metrics=metrics,
             distribute=with_distribution)
 
         training_inputs, eval_inputs, predict_inputs = (
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index fc72f6bd15..d20d092d8e 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -350,11 +350,18 @@ def experimental_test_loop(model,
   for step in range(steps):
     _, batch_outs = K.get_session().run([test_op, output_tensors])
     for i, label in enumerate(model.metrics_names):
-      outs[i] += batch_outs[label]
+      if i == 0:
+        # Loss is stateless metrics.
+        outs[i] += batch_outs[label]
+      else:
+        # For all stateful metrics, the aggregation is handled by mirrored vars.
+        outs[i] = batch_outs[label]
+
     if verbose >= 1:
       progbar.update(step + 1)
-  for i in range(len(outs)):
-    outs[i] /= (steps)
+
+  if len(outs) >= 0:
+    outs[0] /= (steps)
 
   if initialize_finalize_strategy:
     K.get_session().run(current_strategy.finalize())
@@ -724,4 +731,3 @@ def _reset_metrics(model, distributed_model=None):
         distributed_model or
         model._distribution_strategy.unwrap(model._grouped_model)[0])
     distributed_model.reset_metrics()
-
-- 
GitLab


From 3640da49c3731807a3dbc27d813e8ab68a86328a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Dec 2018 18:38:24 -0800
Subject: [PATCH 181/873] Use tpu.rewrite() instead of
 tpu.rewrite_for_inference() in tpu_estimator.py for
 TPUEstimator.export_savedmodel().

PiperOrigin-RevId: 224445962
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 84816d70d0..96b9556e13 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -2234,7 +2234,7 @@ class TPUEstimator(estimator_lib.Estimator):
     def computation():
       """Compute tpu tensors used in export_outputs.
 
-      Passed to rewrite_for_inference so that model_fn will be called under
+      Passed to rewrite so that model_fn will be called under
       the rewriting contexts. Only tpu tensors are returned, but export_outputs
       and scaffold are captured.
 
@@ -2243,7 +2243,7 @@ class TPUEstimator(estimator_lib.Estimator):
          outside_compilation.
       """
       # We should only call model fn once and it should be inside `computation`
-      # so that building the graph will happen under `rewrite_for_inference`.
+      # so that building the graph will happen under `rewrite`.
       mode = model_fn_lib.ModeKeys.PREDICT
       estimator_spec = self._call_model_fn(features, labels, mode, config)
 
@@ -2260,7 +2260,7 @@ class TPUEstimator(estimator_lib.Estimator):
       capture.capture((estimator_spec, tensors_dict, tensors))
       return tpu_tensors
 
-    tpu_tensors_on_cpu = tpu.rewrite_for_inference(computation)
+    tpu_tensors_on_cpu = tpu.rewrite(computation)
     estimator_spec, tensors_dict, tensors = capture.get()
 
     # Reconstruct `tensors`, but with `tpu_tensors` replaced with
-- 
GitLab


From 607d43181c55cb17eab67497c66384ddf66fdd2f Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 6 Dec 2018 21:59:20 -0800
Subject: [PATCH 182/873] Set bazel version to 0.20.0

---
 tensorflow/tools/ci_build/install/install_bazel.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/install/install_bazel.sh b/tensorflow/tools/ci_build/install/install_bazel.sh
index 7472053209..f45ac3eab3 100755
--- a/tensorflow/tools/ci_build/install/install_bazel.sh
+++ b/tensorflow/tools/ci_build/install/install_bazel.sh
@@ -15,7 +15,7 @@
 # ==============================================================================
 
 # Select bazel version.
-BAZEL_VERSION="0.18.0"
+BAZEL_VERSION="0.20.0"
 
 set +e
 local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}')
-- 
GitLab


From b34707000d4cd408f4e286dc083ae0328b98009a Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 6 Dec 2018 21:59:35 -0800
Subject: [PATCH 183/873] Set bazel version to 0.20.0

---
 tensorflow/tools/ci_build/install/install_bazel_from_source.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/install/install_bazel_from_source.sh b/tensorflow/tools/ci_build/install/install_bazel_from_source.sh
index 4f83815d77..9501a6d94b 100755
--- a/tensorflow/tools/ci_build/install/install_bazel_from_source.sh
+++ b/tensorflow/tools/ci_build/install/install_bazel_from_source.sh
@@ -18,7 +18,7 @@
 # It will compile bazel from source and install it in /usr/local/bin
 
 # Select bazel version.
-BAZEL_VERSION="0.18.0"
+BAZEL_VERSION="0.20.0"
 
 set +e
 local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}')
-- 
GitLab


From 55bbb4c92567732ee6712c0201b94bef50df6083 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 6 Dec 2018 21:59:54 -0800
Subject: [PATCH 184/873] Set bazel version to 0.20.0

---
 tensorflow/tools/docker/Dockerfile.devel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel
index 5ddcd3a2fd..9ea29c0e20 100644
--- a/tensorflow/tools/docker/Dockerfile.devel
+++ b/tensorflow/tools/docker/Dockerfile.devel
@@ -65,7 +65,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.18.0
+ENV BAZEL_VERSION 0.20.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
-- 
GitLab


From f674bcc9d8e057406c727fe6449053356c69d598 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 6 Dec 2018 22:00:14 -0800
Subject: [PATCH 185/873] Set bazel version to 0.20.0

---
 tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu
index 767e5f4a4f..1ad359ddcc 100644
--- a/tensorflow/tools/docker/Dockerfile.devel-gpu
+++ b/tensorflow/tools/docker/Dockerfile.devel-gpu
@@ -87,7 +87,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.18.0
+ENV BAZEL_VERSION 0.20.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
-- 
GitLab


From 15a1ba9bdc56ef3e32bd7e0f86480f1a8d9af3ec Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 6 Dec 2018 22:00:32 -0800
Subject: [PATCH 186/873] Set bazel version to 0.20.0

---
 tensorflow/tools/docker/Dockerfile.devel-mkl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl
index 0980502bcc..4eefd31d00 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl
@@ -88,7 +88,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.18.0
+ENV BAZEL_VERSION 0.20.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
-- 
GitLab


From 234959092788197d674f9c49495a979f47f75a7b Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 6 Dec 2018 22:00:47 -0800
Subject: [PATCH 187/873] Set bazel version to 0.20.0

---
 tensorflow/tools/docker/Dockerfile.devel-mkl-horovod | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
index 90db249e3d..3810daefa5 100755
--- a/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
+++ b/tensorflow/tools/docker/Dockerfile.devel-mkl-horovod
@@ -79,7 +79,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc
 RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \
     >>/etc/bazel.bazelrc
 # Install the most recent bazel release.
-ENV BAZEL_VERSION 0.18.0
+ENV BAZEL_VERSION 0.20.0
 WORKDIR /
 RUN mkdir /bazel && \
     cd /bazel && \
-- 
GitLab


From 9408b2835c84f7bd1ad25b95b2cb85682ee3f348 Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Thu, 6 Dec 2018 22:01:21 -0800
Subject: [PATCH 188/873] Add the beginnings of ::tensorflow::Env support for
 C.

PiperOrigin-RevId: 224461412
---
 tensorflow/c/BUILD       |  49 +++++++++++++
 tensorflow/c/env.cc      | 147 ++++++++++++++++++++++++++++++++++++++
 tensorflow/c/env.h       | 148 +++++++++++++++++++++++++++++++++++++++
 tensorflow/c/env_test.cc |  94 +++++++++++++++++++++++++
 tensorflow/core/BUILD    |   5 +-
 5 files changed, 442 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/c/env.cc
 create mode 100644 tensorflow/c/env.h
 create mode 100644 tensorflow/c/env_test.cc

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index ad2ae08a37..25df970eca 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -175,6 +175,34 @@ tf_cuda_library(
     ],
 )
 
+tf_cuda_library(
+    name = "env",
+    srcs = [
+        "env.cc",
+    ],
+    hdrs = [
+        "env.h",
+    ],
+    copts = tf_copts(),
+    visibility = ["//visibility:public"],
+    deps = select({
+        "//tensorflow:android": [
+            ":c_api",
+            ":tf_status_helper",
+            "//tensorflow/core:android_tensorflow_lib_lite",
+            "//tensorflow/core:platform_env",
+            "//tensorflow/core:lib",
+        ],
+        "//conditions:default": [
+            ":c_api",
+            ":tf_status_helper",
+            "//tensorflow/core:framework",
+            "//tensorflow/core:platform_env",
+            "//tensorflow/core:lib",
+        ],
+    }) + [":c_api_internal"],
+)
+
 tf_cuda_library(
     name = "kernels",
     srcs = [
@@ -334,6 +362,27 @@ tf_kernel_library(
     alwayslink = 1,
 )
 
+tf_cuda_cc_test(
+    name = "env_test",
+    size = "small",
+    srcs = ["env_test.cc"],
+    linkopts = select({
+        "//tensorflow:darwin": ["-headerpad_max_install_names"],
+        "//conditions:default": [],
+    }),
+    tags = ["noasan"],
+    # We must ensure that the dependencies can be dynamically linked since
+    # the shared library must be able to use core:framework.
+    # linkstatic = tf_kernel_tests_linkstatic(),
+    deps = [
+        ":c_api",
+        ":env",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "kernels_test",
     size = "small",
diff --git a/tensorflow/c/env.cc b/tensorflow/c/env.cc
new file mode 100644
index 0000000000..fcff1e413d
--- /dev/null
+++ b/tensorflow/c/env.cc
@@ -0,0 +1,147 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/env.h"
+
+#include "tensorflow/c/c_api_internal.h"
+#include "tensorflow/c/tf_status_helper.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/types.h"
+
+struct TF_StringStream {
+  std::vector<::tensorflow::string>* list;
+  size_t position;
+};
+
+void TF_CreateDir(const char* dirname, TF_Status* status) {
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(
+      status, ::tensorflow::Env::Default()->CreateDir(dirname));
+}
+
+void TF_DeleteDir(const char* dirname, TF_Status* status) {
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(
+      status, ::tensorflow::Env::Default()->DeleteDir(dirname));
+}
+
+void TF_DeleteRecursively(const char* dirname, int64_t* undeleted_file_count,
+                          int64_t* undeleted_dir_count, TF_Status* status) {
+  ::tensorflow::int64 f, d;
+
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(
+      status, ::tensorflow::Env::Default()->DeleteRecursively(dirname, &f, &d));
+  *undeleted_file_count = f;
+  *undeleted_dir_count = d;
+}
+
+void TF_FileStat(const char* filename, TF_FileStatistics* stats,
+                 TF_Status* status) {
+  ::tensorflow::FileStatistics cc_stats;
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Status s =
+      ::tensorflow::Env::Default()->Stat(filename, &cc_stats);
+  ::tensorflow::Set_TF_Status_from_Status(status, s);
+  if (s.ok()) {
+    stats->length = cc_stats.length;
+    stats->mtime_nsec = cc_stats.mtime_nsec;
+    stats->is_directory = cc_stats.is_directory;
+  }
+}
+
+void TF_NewWritableFile(const char* filename, TF_WritableFileHandle** handle,
+                        TF_Status* status) {
+  std::unique_ptr<::tensorflow::WritableFile> f;
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Status s =
+      ::tensorflow::Env::Default()->NewWritableFile(filename, &f);
+  ::tensorflow::Set_TF_Status_from_Status(status, s);
+
+  if (s.ok()) {
+    *handle = reinterpret_cast<TF_WritableFileHandle*>(f.release());
+  }
+}
+
+void TF_CloseWritableFile(TF_WritableFileHandle* handle, TF_Status* status) {
+  auto* cc_file = reinterpret_cast<::tensorflow::WritableFile*>(handle);
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(status, cc_file->Close());
+  delete cc_file;
+}
+
+void TF_SyncWritableFile(TF_WritableFileHandle* handle, TF_Status* status) {
+  auto* cc_file = reinterpret_cast<::tensorflow::WritableFile*>(handle);
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(status, cc_file->Sync());
+}
+
+void TF_FlushWritableFile(TF_WritableFileHandle* handle, TF_Status* status) {
+  auto* cc_file = reinterpret_cast<::tensorflow::WritableFile*>(handle);
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(status, cc_file->Flush());
+}
+
+void TF_AppendWritableFile(TF_WritableFileHandle* handle, const char* data,
+                           size_t length, TF_Status* status) {
+  auto* cc_file = reinterpret_cast<::tensorflow::WritableFile*>(handle);
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(
+      status, cc_file->Append(::tensorflow::StringPiece{data, length}));
+}
+
+void TF_DeleteFile(const char* filename, TF_Status* status) {
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(
+      status, ::tensorflow::Env::Default()->DeleteFile(filename));
+}
+
+bool TF_StringStreamNext(TF_StringStream* list, const char** result) {
+  if (list->position >= list->list->size()) {
+    *result = nullptr;
+    return false;
+  }
+
+  *result = list->list->at(list->position++).c_str();
+  return true;
+}
+
+void TF_StringStreamDone(TF_StringStream* list) {
+  delete list->list;
+  delete list;
+}
+TF_StringStream* TF_GetChildren(const char* dirname, TF_Status* status) {
+  auto* children = new std::vector<::tensorflow::string>;
+
+  TF_SetStatus(status, TF_OK, "");
+  ::tensorflow::Set_TF_Status_from_Status(
+      status, ::tensorflow::Env::Default()->GetChildren(dirname, children));
+
+  auto* list = new TF_StringStream;
+  list->list = children;
+  list->position = 0;
+  return list;
+}
+
+TF_StringStream* TF_GetLocalTempDirectories() {
+  auto* tmpdirs = new std::vector<::tensorflow::string>;
+
+  ::tensorflow::Env::Default()->GetLocalTempDirectories(tmpdirs);
+
+  auto* list = new TF_StringStream;
+  list->list = tmpdirs;
+  list->position = 0;
+  return list;
+}
diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h
new file mode 100644
index 0000000000..32d3f1f745
--- /dev/null
+++ b/tensorflow/c/env.h
@@ -0,0 +1,148 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_C_ENV_H_
+#define TENSORFLOW_C_ENV_H_
+
+#include "tensorflow/c/c_api.h"
+
+// --------------------------------------------------------------------------
+// C API for tensorflow::Env.
+
+struct TF_WritableFileHandle;
+struct TF_StringStream;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct TF_FileStatistics {
+  // The length of the file in bytes.
+  int64_t length;
+  // The last modified time in nanoseconds.
+  int64_t mtime_nsec;
+  // Whether the name refers to a directory.
+  bool is_directory;
+} TF_FileStatistics;
+
+// Creates the specified directory. Typical status code are:
+//  * TF_OK - successfully created the directory
+//  * TF_ALREADY_EXISTS - directory already exists
+//  * TF_PERMISSION_DENIED - dirname is not writable
+TF_CAPI_EXPORT extern void TF_CreateDir(const char* dirname, TF_Status* status);
+
+// Deletes the specified directory. Typical status codes are:
+//  * TF_OK - successfully deleted the directory
+//  * TF_FAILED_PRECONDITION - the directory is not empty
+TF_CAPI_EXPORT extern void TF_DeleteDir(const char* dirname, TF_Status* status);
+
+// Deletes the specified directory and all subdirectories and files underneath
+// it. This is accomplished by traversing the directory tree rooted at dirname
+// and deleting entries as they are encountered.
+//
+// If dirname itself is not readable or does not exist, *undeleted_dir_count is
+// set to 1, *undeleted_file_count is set to 0 and an appropriate status (e.g.
+// TF_NOT_FOUND) is returned.
+//
+// If dirname and all its descendants were successfully deleted, TF_OK is
+// returned and both error counters are set to zero.
+//
+// Otherwise, while traversing the tree, undeleted_file_count and
+// undeleted_dir_count are updated if an entry of the corresponding type could
+// not be deleted. The returned error status represents the reason that any one
+// of these entries could not be deleted.
+//
+// Typical status codes:
+//  * TF_OK - dirname exists and we were able to delete everything underneath
+//  * TF_NOT_FOUND - dirname doesn't exist
+//  * TF_PERMISSION_DENIED - dirname or some descendant is not writable
+//  * TF_UNIMPLEMENTED - some underlying functions (like Delete) are not
+//    implemented
+TF_CAPI_EXPORT extern void TF_DeleteRecursively(const char* dirname,
+                                                int64_t* undeleted_file_count,
+                                                int64_t* undeleted_dir_count,
+                                                TF_Status* status);
+
+// Obtains statistics for the given path. If status is TF_OK, *stats is
+// updated, otherwise it is not touched.
+TF_CAPI_EXPORT extern void TF_FileStat(const char* filename,
+                                       TF_FileStatistics* stats,
+                                       TF_Status* status);
+
+// Creates or truncates the given filename and returns a handle to be used for
+// appending data to the file. If status is TF_OK, *handle is updated and the
+// caller is responsible for freeing it (see TF_CloseWritableFile).
+TF_CAPI_EXPORT extern void TF_NewWritableFile(const char* filename,
+                                              TF_WritableFileHandle** handle,
+                                              TF_Status* status);
+
+// Closes the given handle and frees its memory. If there was a problem closing
+// the file, it is indicated by status. Memory is freed in any case.
+TF_CAPI_EXPORT extern void TF_CloseWritableFile(TF_WritableFileHandle* handle,
+                                                TF_Status* status);
+
+// Syncs content of the handle to the filesystem. Blocks waiting for the
+// filesystem to indicate that the content has been persisted.
+TF_CAPI_EXPORT extern void TF_SyncWritableFile(TF_WritableFileHandle* handle,
+                                               TF_Status* status);
+
+// Flush local buffers to the filesystem. If the process terminates after a
+// successful flush, the contents may still be persisted, since the underlying
+// filesystem may eventually flush the contents.  If the OS or machine crashes
+// after a successful flush, the contents may or may not be persisted, depending
+// on the implementation.
+TF_CAPI_EXPORT extern void TF_FlushWritableFile(TF_WritableFileHandle* handle,
+                                                TF_Status* status);
+
+// Appends the given bytes to the file. Any failure to do so is indicated in
+// status.
+TF_CAPI_EXPORT extern void TF_AppendWritableFile(TF_WritableFileHandle* handle,
+                                                 const char* data,
+                                                 size_t length,
+                                                 TF_Status* status);
+
+// Deletes the named file and indicates whether successful in *status.
+TF_CAPI_EXPORT extern void TF_DeleteFile(const char* filename,
+                                         TF_Status* status);
+
+// Retrieves the next item from the given TF_StringStream and places a pointer
+// to it in *result. If no more items are in the list, *result is set to NULL
+// and false is returned.
+//
+// Ownership of the items retrieved with this function remains with the library.
+// Item points are invalidated after a call to TF_StringStreamDone.
+TF_CAPI_EXPORT extern bool TF_StringStreamNext(TF_StringStream* list,
+                                               const char** result);
+
+// Frees the resources associated with given string list. All pointers returned
+// by TF_StringStreamNext are invalid after this call.
+TF_CAPI_EXPORT extern void TF_StringStreamDone(TF_StringStream* list);
+
+// Retrieves the list of children of the given directory. You can iterate
+// through the list with TF_StringStreamNext. The caller is responsible for
+// freeing the list (see TF_StringStreamDone).
+TF_CAPI_EXPORT extern TF_StringStream* TF_GetChildren(const char* filename,
+                                                      TF_Status* status);
+
+// Retrieves a list of directory names on the local machine that may be used for
+// temporary storage. You can iterate through the list with TF_StringStreamNext.
+// The caller is responsible for freeing the list (see TF_StringStreamDone).
+TF_CAPI_EXPORT extern TF_StringStream* TF_GetLocalTempDirectories(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // TENSORFLOW_C_ENV_H_
diff --git a/tensorflow/c/env_test.cc b/tensorflow/c/env_test.cc
new file mode 100644
index 0000000000..08e803f12f
--- /dev/null
+++ b/tensorflow/c/env_test.cc
@@ -0,0 +1,94 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/env.h"
+
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/platform/types.h"
+
+#define ASSERT_TF_OK(x) ASSERT_EQ(TF_OK, TF_GetCode(x))
+
+TEST(TestEnv, TestDirHandling) {
+  TF_StringStream* tempdirs = TF_GetLocalTempDirectories();
+  const char* tempdir;
+  bool found = false;
+  while (TF_StringStreamNext(tempdirs, &tempdir)) {
+    found = true;
+
+    TF_Status* s = TF_NewStatus();
+
+    ::tensorflow::string dirpath =
+        ::tensorflow::io::JoinPath(tempdir, "somedir");
+    TF_CreateDir(dirpath.c_str(), s);
+    ASSERT_TF_OK(s) << "TF_CreateDir failed for " << dirpath << ": "
+                    << TF_Message(s);
+
+    ::tensorflow::string filepath =
+        ::tensorflow::io::JoinPath(dirpath, "somefile.txt");
+    TF_WritableFileHandle* handle;
+    TF_NewWritableFile(filepath.c_str(), &handle, s);
+    ASSERT_TF_OK(s) << "NewWritableFile failed for " << filepath << ": "
+                    << TF_Message(s);
+
+    const char* data = "Hello, world!\n";
+    TF_AppendWritableFile(handle, data, strlen(data), s);
+    ASSERT_TF_OK(s) << "TF_AppendWritableFile failed to append data to file at "
+                    << filepath << ": " << TF_Message(s);
+
+    TF_CloseWritableFile(handle, s);
+    ASSERT_TF_OK(s) << "TF_CloseWritableFile failed to close handle to "
+                    << filepath << ": " << TF_Message(s);
+
+    TF_StringStream* children = TF_GetChildren(dirpath.c_str(), s);
+    ASSERT_TF_OK(s) << "TF_GetChildren failed for " << dirpath;
+    const char* childpath;
+    ASSERT_TRUE(TF_StringStreamNext(children, &childpath));
+    ASSERT_EQ(::tensorflow::string(childpath), "somefile.txt");
+    // There should only be one file in this directory.
+    ASSERT_FALSE(TF_StringStreamNext(children, &childpath));
+    ASSERT_EQ(childpath, nullptr);
+    TF_StringStreamDone(children);
+
+    TF_FileStatistics stats;
+    TF_FileStat(filepath.c_str(), &stats, s);
+    ASSERT_EQ(stats.length, strlen(data));
+    ASSERT_FALSE(stats.is_directory);
+    ASSERT_GT(stats.mtime_nsec, 0);
+
+    // Trying to delete a non-empty directory should fail.
+    TF_DeleteDir(dirpath.c_str(), s);
+    ASSERT_NE(TF_OK, TF_GetCode(s))
+        << "TF_DeleteDir unexpectedly succeeded with a non-empty directory "
+        << dirpath;
+
+    TF_DeleteFile(filepath.c_str(), s);
+    ASSERT_TF_OK(s) << "TF_DeleteFile failed for " << filepath << ": "
+                    << TF_Message(s);
+
+    // Now deleting the directory should work.
+    TF_DeleteDir(dirpath.c_str(), s);
+    ASSERT_TF_OK(s) << "TF_DeleteDir failed for " << dirpath << ": "
+                    << TF_Message(s);
+
+    TF_DeleteStatus(s);
+    break;
+  }
+
+  ASSERT_TRUE(found) << "expected at least one temp dir";
+
+  TF_StringStreamDone(tempdirs);
+}
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 575edfe7a9..328bfe474d 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -492,7 +492,10 @@ cc_library(
         ":platform_env_internal_hdrs",
     ],
     copts = tf_copts(),
-    visibility = ["//tensorflow/core:__subpackages__"],
+    visibility = [
+        "//tensorflow/c:__subpackages__",
+        "//tensorflow/core:__subpackages__",
+    ],
     deps = [
         ":error_codes_proto_cc",
         ":lib",
-- 
GitLab


From 3570d7957ae81380a5584d8c00ab08ffb583fef4 Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Fri, 7 Dec 2018 14:13:22 +0800
Subject: [PATCH 189/873] Some minor changes.

---
 tensorflow/core/kernels/mkl_slice_op.cc | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index a85d80f9b3..577aa5c8db 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -59,11 +59,10 @@ gtl::InlinedVector<int64, 4> IntTensorToInt64Vec(const Tensor& tensor) {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 
 // A version of SharedValidation (slice_op.h) written for input that is in
-// either Mkl layout or Tensorflow layout.
-// A shared code to validate input shapes and check for identity, which is not
-// dependent on the type of T.
-// We do this to reduce code size by not duplicating
-// all this for all T (float, double, int32, etc.)
+// either Mkl layout or Tensorflow layout. A shared code to validate input
+// shapes and check for identity, which is not dependent on the type of T.
+// We do this to reduce code size by not duplicating all this for all T
+// (float, double, int32, etc.)
 static void ValidateMklInputs(OpKernelContext* context, bool* is_identity,
                               gtl::InlinedVector<int64, 4>* begin,
                               gtl::InlinedVector<int64, 4>* size) {
-- 
GitLab


From faa7db2df4b467cc97fab6184faa6de5ea88f7f0 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Thu, 6 Dec 2018 22:36:48 -0800
Subject: [PATCH 190/873] TF MirroredStrategy: Respect control_dependencies
 added in replica context during a merge_call. Also enable keras metrics
 correctness which was broken due to this issue.

PiperOrigin-RevId: 224463968
---
 .../contrib/distribute/python/keras_test.py   | 39 ++++++++++++-------
 .../python/distribute/mirrored_strategy.py    |  9 ++++-
 tensorflow/python/keras/engine/training.py    | 16 +-------
 3 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 771c48bdd8..d66fc52cf2 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1298,22 +1298,20 @@ class TestDistributionStrategyCorrectness(test.TestCase,
       self.assertEqual(outs[1], 0.)
       self.assertEqual(outs[2], 0.)
 
-  # TODO(priyag): Add metrics correctness to this test to compare with and
-  # without distribution strategies.
   @combinations.generate(strategy_and_input_combinations())
   def test_correctness(self, distribution, use_numpy, use_validation_data):
 
     with self.cached_session():
-      tolerance = 1e-5
-      metrics = ['mse']
+      weights_tolerance = 1e-5
+      metrics_tolerance = 1e-4
 
       if isinstance(distribution, (mirrored_strategy.MirroredStrategy,
                                    mirrored_strategy.CoreMirroredStrategy)):
-        # TODO(b/119257215): use the default one once the flakyness is fixed.
-        tolerance = 1e-4
-        # TODO(b/120570676): Enable metrics check once the bug is fixed.
-        metrics = None
+        # TODO(b/119257215): Weights are not exactly the same, so use lower
+        # tolerance for now.
+        weights_tolerance = 1e-4
 
+      metrics = ['mse']
       keras.backend.set_image_data_format('channels_last')
       np.random.seed(_RANDOM_SEED)
       random_seed.set_random_seed(_RANDOM_SEED)
@@ -1383,22 +1381,33 @@ class TestDistributionStrategyCorrectness(test.TestCase,
       # Verify that the weights, training history, eval results, predict outputs
       # are the same within some limits of tolerance.
       self.assertAllClose(
-          wts_with_ds, wts_without_ds, atol=tolerance, rtol=tolerance,
+          wts_with_ds,
+          wts_without_ds,
+          atol=weights_tolerance,
+          rtol=weights_tolerance,
           msg='Fail to assert weights after training.')
-
       self.assertAllClose(
-          eval_with_ds, eval_without_ds, atol=tolerance, rtol=tolerance,
+          eval_with_ds,
+          eval_without_ds,
+          atol=metrics_tolerance,
+          rtol=metrics_tolerance,
           msg='Fail to assert eval results.')
       self.assertAllClose(
-          predict_with_ds, predict_without_ds, atol=tolerance, rtol=tolerance,
+          predict_with_ds,
+          predict_without_ds,
+          atol=weights_tolerance,
+          rtol=weights_tolerance,
           msg='Fail to assert predict results.')
 
-      if not (isinstance(distribution, tpu_strategy.TPUStrategy)
-              and distribution.extended.steps_per_run > 1):
+      if not (isinstance(distribution, tpu_strategy.TPUStrategy) and
+              distribution.extended.steps_per_run > 1):
         # TODO(b/119894254): Enable this test for all cases once the underlying
         # bug is fixed.
         self.assertAllClose(
-            history_with_ds, history_without_ds, atol=tolerance, rtol=tolerance,
+            history_with_ds,
+            history_without_ds,
+            atol=metrics_tolerance,
+            rtol=metrics_tolerance,
             msg='Fail to assert training history.')
 
 
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index 56948b2bcb..cb94dfcfbd 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -169,7 +169,12 @@ def _call_for_each_replica(distribution, fn, args, kwargs):
           # capture the name_scope from the first MRT and assume it is
           # the same for all other MRTs.
           mtt_captured_name_scope = threads[0].captured_name_scope
-          with ops.name_scope(mtt_captured_name_scope):
+          # Capture and merge the control dependencies from all the threads.
+          mtt_captured_control_deps = set()
+          for t in threads:
+            mtt_captured_control_deps.update(t.captured_control_deps)
+          with ops.name_scope(mtt_captured_name_scope),\
+              ops.control_dependencies(mtt_captured_control_deps):
             merge_result = threads[0].merge_fn(distribution, *merge_args,
                                                **merge_kwargs)
           for t in threads:
@@ -898,6 +903,8 @@ class MirroredReplicaContext(distribute_lib.ReplicaContext):
     # Adding a "/" at end lets us re-enter this scope later.
     if t.captured_name_scope:
       t.captured_name_scope += "/"
+
+    t.captured_control_deps = t.graph._current_control_dependencies()  # pylint: disable=protected-access
     t.has_paused.set()
     t.should_run.wait()
     t.should_run.clear()
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 90f8a7b252..462694fda6 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -875,17 +875,11 @@ class Model(Network):
                                      [self.total_loss] + metrics_tensors)
 
   def _make_fit_function(self):
-    # TODO(psv/anjalisridhar): Remove updates after we fix b/118841692
-    # Stateful metrics updates
-    metric_updates = []
-    for m in self.metrics:
-      metric_updates += m.updates
-
     metrics_tensors = [
         self._all_stateful_metrics_tensors[m] for m in self.metrics_names[1:]
     ]
     self._make_train_function_helper(
-        '_fit_function', [self.total_loss] + metrics_tensors, metric_updates)
+        '_fit_function', [self.total_loss] + metrics_tensors)
 
   def _make_test_function_helper(self, fn_name, outputs, metric_updates=None):
     if not hasattr(self, fn_name):
@@ -918,17 +912,11 @@ class Model(Network):
                                     [self.total_loss] + metrics_tensors)
 
   def _make_eval_function(self):
-    # TODO(psv,anjalisridhar): Remove updates after we fix b/118841692
-    # Stateful metrics updates
-    metric_updates = []
-    for m in self.metrics:
-      metric_updates += m.updates
-
     metrics_tensors = [
         self._all_stateful_metrics_tensors[m] for m in self.metrics_names[1:]
     ]
     self._make_test_function_helper(
-        '_eval_function', [self.total_loss] + metrics_tensors, metric_updates)
+        '_eval_function', [self.total_loss] + metrics_tensors)
 
   def _make_predict_function(self):
     if not hasattr(self, 'predict_function'):
-- 
GitLab


From cd4cc57282ad9621c369f103e9b48961863b9c9d Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Fri, 7 Dec 2018 00:04:50 -0800
Subject: [PATCH 191/873] Fix a typo in estimator_training.

PiperOrigin-RevId: 224470630
---
 tensorflow/python/distribute/estimator_training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/distribute/estimator_training.py b/tensorflow/python/distribute/estimator_training.py
index 549fa8fb8a..7d5f231c37 100644
--- a/tensorflow/python/distribute/estimator_training.py
+++ b/tensorflow/python/distribute/estimator_training.py
@@ -354,7 +354,7 @@ def estimator_evaluate(estimator, evaluate_distributed_fn, hooks):
   if (estimator._config._distribute_coordinator_mode !=
       dc.CoordinatorMode.STANDALONE_CLIENT):
     raise ValueError('Only `STANDALONE_CLIENT` mode is supported when you call '
-                     '`Estimator.train`')
+                     '`Estimator.evaluate`')
 
   if estimator._config._eval_distribute.extended.experimental_between_graph:
     # TODO(yuefengz): remove this limitation once we figure out how to merge
-- 
GitLab


From 6bcc66c2d2b413a5d0487638c3d6b6bb63716e8f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 01:02:31 -0800
Subject: [PATCH 192/873] compat: Update forward compatibility horizon to
 2018-12-07

PiperOrigin-RevId: 224475966
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 269d2e286a..e66c29ae39 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 6)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 7)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From c2e2ecedfef715b8f756c5e2df4dab3b88531253 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 01:02:57 -0800
Subject: [PATCH 193/873] Internal change.

PiperOrigin-RevId: 224476105
---
 tensorflow/core/BUILD | 70 +++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 33 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 328bfe474d..66714235b5 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1682,6 +1682,9 @@ filegroup(
 # operators, use :android_tensorflow_lib if you want full operator
 # support.
 #
+# If you just need TensorFlow types, e.g. Tensors, use
+# :android_tensorflow_lib_lite_no_runtime.
+#
 # Compiles to a trivial library on non-Android to prevent irrelevant
 # build errors. If not building this as part of an android_binary,
 # a command such as the following must be used:
@@ -1692,7 +1695,33 @@ filegroup(
 cc_library(
     name = "android_tensorflow_lib_lite",
     srcs = if_android(["//tensorflow/core:android_srcs"]),
-    copts = tf_copts(android_optimization_level_override = None),
+    copts = tf_copts(android_optimization_level_override = None) + [
+        "-DSUPPORT_SELECTIVE_REGISTRATION",
+    ],
+    linkopts = ["-lz"],
+    tags = [
+        "manual",
+        "notap",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        ":mobile_additional_lib_deps",
+        ":protos_all_cc_impl",
+        ":stats_calculator_portable",
+        "//third_party/eigen3",
+        "@double_conversion//:double-conversion",
+        "@nsync//:nsync_cpp",
+        "@protobuf_archive//:protobuf",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "android_tensorflow_lib_lite_nortti",
+    srcs = if_android(["//tensorflow/core:android_srcs"]),
+    copts = tf_copts(android_optimization_level_override = None) + [
+        "-DSUPPORT_SELECTIVE_REGISTRATION",
+    ] + tf_opts_nortti_if_android(),
     linkopts = ["-lz"],
     tags = [
         "manual",
@@ -1804,46 +1833,21 @@ cc_library(
 # Does not contain operators. In contrast to android_tensorflow_lib_lite,
 # this links in framework support for all types, relying on selective
 # registration of ops to prune code size.
-cc_library(
+#
+# TODO(gonnet): Move all users of these aliases to the corresponding
+#     :android_tensorflow_lib_lite* targets and remove.
+alias(
     name = "android_tensorflow_lib_selective_registration",
-    srcs = if_android(["//tensorflow/core:android_srcs_only_runtime"]),
-    copts = tf_copts(android_optimization_level_override = None) + [
-        "-DSUPPORT_SELECTIVE_REGISTRATION",
-    ],
-    linkopts = if_android(["-lz"]),
-    tags = [
-        "manual",
-        "notap",
-    ],
+    actual = ":android_tensorflow_lib_lite",
     visibility = ["//visibility:public"],
-    deps = [
-        ":protos_all_cc_impl",
-        "@com_google_absl//absl/container:flat_hash_set",
-        "@protobuf_archive//:protobuf",
-    ],
-    alwayslink = 1,
 )
 
 # Android library for use with the SELECTIVE_REGISTRATION feature with
 # no proto_rtti.
-cc_library(
+alias(
     name = "android_tensorflow_lib_selective_registration_nortti",
-    srcs = if_android(["//tensorflow/core:android_srcs_only_runtime"]),
-    copts = tf_copts(android_optimization_level_override = None) + tf_opts_nortti_if_android() + [
-        "-DSUPPORT_SELECTIVE_REGISTRATION",
-    ],
-    linkopts = if_android(["-lz"]),
-    tags = [
-        "manual",
-        "notap",
-    ],
+    actual = ":android_tensorflow_lib_lite_nortti",
     visibility = ["//visibility:public"],
-    deps = [
-        ":protos_all_cc_impl",
-        "@com_google_absl//absl/container:flat_hash_set",
-        "@protobuf_archive//:protobuf",
-    ],
-    alwayslink = 1,
 )
 
 filegroup(
-- 
GitLab


From 3bd4a240020d1c62d84c1c940e8b1f5b3d838d3d Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Fri, 7 Dec 2018 03:08:17 -0800
Subject: [PATCH 194/873] Enable @run_v1_only to be applied on a class

Some entire suite of tests do not work on v1. Rather
than annotating the individual test cases, it makes more
sense to annotate the entire class. Note that we do not
intend this for the @run_deprecated_v1 annotation.

PiperOrigin-RevId: 224489413
---
 tensorflow/python/framework/test_util.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index fc1a5fbe85..4bc9bf01d2 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -494,7 +494,8 @@ def with_control_flow_v2(cls):
     return cls
 
   for name, value in cls.__dict__.copy().items():
-    if (callable(value) and name.startswith("test") and
+    if (callable(value) and
+        name.startswith(unittest.TestLoader.testMethodPrefix) and
         not getattr(value, "_disable_control_flow_v2", False)):
       setattr(cls, name + "WithControlFlowV2", enable_control_flow_v2(value))
   return cls
@@ -893,8 +894,10 @@ def run_all_in_graph_and_eager_modes(cls):
   """Execute all test methods in the given class with and without eager."""
   base_decorator = run_in_graph_and_eager_modes
   for name, value in cls.__dict__.copy().items():
-    if callable(value) and name.startswith("test") and not (
-        name.startswith("testSkipEager") or name.startswith("test_skip_eager")):
+    if (callable(value) and
+        name.startswith(unittest.TestLoader.testMethodPrefix) and
+        not (name.startswith("testSkipEager")
+             or name.startswith("test_skip_eager"))):
       setattr(cls, name, base_decorator(value))
   return cls
 
@@ -1059,7 +1062,16 @@ def run_v1_only(reason, func=None):
 
   def decorator(f):
     if tf_inspect.isclass(f):
-      raise ValueError("`run_v1_only` only supports test methods.")
+      setup = f.__dict__.get("setUp")
+      if setup is not None:
+        setattr(f, "setUp", decorator(setup))
+
+      for name, value in f.__dict__.copy().items():
+        if (callable(value) and
+            name.startswith(unittest.TestLoader.testMethodPrefix)):
+          setattr(f, name, decorator(value))
+
+      return f
 
     def decorated(self, *args, **kwargs):
       if tf2.enabled():
-- 
GitLab


From 0d3b58cfe91c6b865a14701345d7a84ce949c0e3 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Fri, 7 Dec 2018 04:17:58 -0800
Subject: [PATCH 195/873] Disable some tests using placeholders when running in
 v2

In order to do so we introduce the concept of a FakeEagerSession
which allows tests to run sess.run() with an empty feed_dict.

PiperOrigin-RevId: 224495103
---
 .../python/kernel_tests/lanczos_test.py       |  4 +-
 .../python/kernel_tests/least_squares_test.py |  4 +-
 .../kernel_tests/linear_equations_test.py     |  4 +-
 tensorflow/python/framework/test_util.py      | 59 ++++++++++++++++++-
 .../kernel_tests/batch_matmul_op_test.py      |  1 +
 .../kernel_tests/matrix_solve_ls_op_test.py   | 16 +++--
 tensorflow/python/kernel_tests/qr_op_test.py  |  8 ++-
 tensorflow/python/kernel_tests/svd_op_test.py |  8 ++-
 8 files changed, 89 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/solvers/python/kernel_tests/lanczos_test.py b/tensorflow/contrib/solvers/python/kernel_tests/lanczos_test.py
index 8fcd7aeef6..f31bdbd399 100644
--- a/tensorflow/contrib/solvers/python/kernel_tests/lanczos_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/lanczos_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.contrib.solvers.python.ops import lanczos
 from tensorflow.contrib.solvers.python.ops import util
 from tensorflow.python.framework import constant_op
@@ -80,7 +81,8 @@ if __name__ == "__main__":
     for shape in [[4, 4], [7, 4], [5, 8]]:
       for orthogonalize in True, False:
         for steps in range(1, min(shape) + 1):
-          for use_static_shape in True, False:
+          # TF2 does not support placeholders so we skip it
+          for use_static_shape in set([True, tf2.enabled()]):
             arg_string = "%s_%s_%s_%s_staticshape_%s" % (
                 dtype.__name__, "_".join(map(str, shape)), orthogonalize, steps,
                 use_static_shape)
diff --git a/tensorflow/contrib/solvers/python/kernel_tests/least_squares_test.py b/tensorflow/contrib/solvers/python/kernel_tests/least_squares_test.py
index 2a9100903a..841a41a233 100644
--- a/tensorflow/contrib/solvers/python/kernel_tests/least_squares_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/least_squares_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.contrib.solvers.python.ops import least_squares
 from tensorflow.contrib.solvers.python.ops import util
 from tensorflow.python.framework import constant_op
@@ -76,7 +77,8 @@ def _get_least_squares_tests(dtype_, use_static_shape_, shape_):
 if __name__ == "__main__":
   for dtype in np.float32, np.float64:
     for shape in [[4, 4], [8, 5], [3, 7]]:
-      for use_static_shape in True, False:
+      # TF2 does not support placeholders under eager so we skip it
+      for use_static_shape in set([True, tf2.enabled()]):
         arg_string = "%s_%s_staticshape_%s" % (dtype.__name__,
                                                "_".join(map(str, shape)),
                                                use_static_shape)
diff --git a/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py b/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
index a0e6eb87bc..10807f7a80 100644
--- a/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.contrib.solvers.python.ops import linear_equations
 from tensorflow.contrib.solvers.python.ops import util
 from tensorflow.python.framework import constant_op
@@ -113,7 +114,8 @@ def _get_linear_equations_tests(dtype_, use_static_shape_, shape_):
 if __name__ == "__main__":
   for dtype in np.float32, np.float64:
     for size in 1, 4, 10:
-      for use_static_shape in True, False:
+      # TF2 does not support placeholders under eager so we skip it
+      for use_static_shape in set([True, tf2.enabled()]):
         shape = [size, size]
         arg_string = "%s_%s_staticshape_%s" % (dtype.__name__, size,
                                                use_static_shape)
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 4bc9bf01d2..b0c3c9b506 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1277,6 +1277,63 @@ class CapturedWrites(object):
     return output_data
 
 
+class FakeEagerSession(object):
+  """Fake session so tests that conditionally use placeholders can use eager.
+
+  There are a number of tests that conditionally use placeholders for shape
+  inference. The pattern is demonstrated here:
+
+  ```python
+  with self.cached_session() as sess:
+    if static_shape:
+      y = math_ops.matmul(x, ...)
+      feed_dict = {}
+    else:
+      x_ph = array_ops.placeholder(...)
+      y = math_ops.matmul(x_ph, ...)
+      feed_dict = {x_ph: x}
+    val = sess.run(y, feed_dict=feed_dict)
+  ```
+
+  Since the feed_dict is empty when not using placeholders we should be able to
+  call self.evaluate(), however this requires rewriting the test case.
+  This class shold be considered a stop-gap solution to get tests running with
+  eager with minimal changes to the actual test.
+  """
+
+  def __init__(self, test_case):
+    self._test_case = test_case
+
+  def run(self, fetches, *args, **kwargs):
+    """Evalaute `fetches`.
+
+    Fail if additional args are specified.
+
+    Args:
+      fetches: A Tensor or a nested list/tuple of Tensors.
+      *args: Positional arguments
+      **kwargs: Keyword arguments
+
+    Raises:
+      RuntimeError: If args or kwargs are specified.
+
+    Returns:
+      Tensors as numpy values.
+    """
+    feed_dict = kwargs.pop("feed_dict", {})
+    if feed_dict:
+      raise RuntimeError(
+          "feed_dict is not supported when eager execution is enabled "
+          "(in this case, sess.run(t) is shorthand for t.numpy()")
+
+    if args or kwargs:
+      raise RuntimeError(
+          "Optional args are not supported when eager execution is enabled "
+          "(in this case, sess.run(t) is shorthand for t.numpy()")
+
+    return self._test_case.evaluate(fetches)
+
+
 class ErrorLoggingSession(session.Session):
   """Wrapper around a Session that logs errors in run().
   """
@@ -1584,7 +1641,7 @@ class TensorFlowTestCase(googletest.TestCase):
       the graph building and execution code in a test case.
     """
     if context.executing_eagerly():
-      yield None
+      yield FakeEagerSession(self)
     else:
       sess = self._get_cached_session(
           graph, config, force_gpu, crash_if_inconsistent_args=True)
diff --git a/tensorflow/python/kernel_tests/batch_matmul_op_test.py b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
index 931bb41424..f2f0291b89 100644
--- a/tensorflow/python/kernel_tests/batch_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
@@ -198,6 +198,7 @@ if __name__ == "__main__":
     for adjoint_a_ in False, True:
       for adjoint_b_ in False, True:
         name = "%s_%s_%s" % (dtype_.__name__, adjoint_a_, adjoint_b_)
+        # TF2 does not support placeholders under eager so we skip it
         for use_static_shape_ in set([True, tf2.enabled()]):
           setattr(BatchMatmulOpTest,
                   "testBatchMatmulOp_" + name + ("_%s" % use_static_shape_),
diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
index a6f5da9d3d..42fd95d311 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -150,13 +151,17 @@ class MatrixSolveLsOpTest(test_lib.TestCase):
     empty1 = np.empty([0, 2])
     for fast in [True, False]:
       with self.cached_session(use_gpu=True):
-        tf_ans = linalg_ops.matrix_solve_ls(empty0, empty0, fast=fast).eval()
+        tf_ans = self.evaluate(
+            linalg_ops.matrix_solve_ls(empty0, empty0, fast=fast))
         self.assertEqual(tf_ans.shape, (0, 0))
-        tf_ans = linalg_ops.matrix_solve_ls(empty0, full, fast=fast).eval()
+        tf_ans = self.evaluate(
+            linalg_ops.matrix_solve_ls(empty0, full, fast=fast))
         self.assertEqual(tf_ans.shape, (0, 2))
-        tf_ans = linalg_ops.matrix_solve_ls(full, empty0, fast=fast).eval()
+        tf_ans = self.evaluate(
+            linalg_ops.matrix_solve_ls(full, empty0, fast=fast))
         self.assertEqual(tf_ans.shape, (2, 0))
-        tf_ans = linalg_ops.matrix_solve_ls(empty1, empty1, fast=fast).eval()
+        tf_ans = self.evaluate(
+            linalg_ops.matrix_solve_ls(empty1, empty1, fast=fast))
         self.assertEqual(tf_ans.shape, (2, 2))
 
   @test_util.run_deprecated_v1
@@ -350,7 +355,8 @@ class MatrixSolveLsBenchmark(test_lib.Benchmark):
 
 if __name__ == "__main__":
   for dtype_ in [np.float32, np.float64, np.complex64, np.complex128]:
-    for use_placeholder_ in [True, False]:
+    # TF2 does not support placeholders under eager so we skip it
+    for use_placeholder_ in set([False, not tf2.enabled()]):
       for fast_ in [True, False]:
         l2_regularizers = [0] if dtype_ == np.complex128 else [0, 0.1]
         for l2_regularizer_ in l2_regularizers:
diff --git a/tensorflow/python/kernel_tests/qr_op_test.py b/tensorflow/python/kernel_tests/qr_op_test.py
index 0f2537b371..5e9767f20c 100644
--- a/tensorflow/python/kernel_tests/qr_op_test.py
+++ b/tensorflow/python/kernel_tests/qr_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
@@ -102,7 +103,7 @@ def _GetQrOpTest(dtype_, shape_, full_matrices_, use_static_shape_):
       tol = 1e-14
     # Tests that a ~= q*r.
     a_recon = math_ops.matmul(q, r)
-    self.assertAllClose(a_recon.eval(), a, rtol=tol, atol=tol)
+    self.assertAllClose(a_recon, a, rtol=tol, atol=tol)
 
   def CheckUnitary(self, x):
     # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
@@ -112,7 +113,7 @@ def _GetQrOpTest(dtype_, shape_, full_matrices_, use_static_shape_):
       tol = 1e-5
     else:
       tol = 1e-14
-    self.assertAllClose(identity.eval(), self.evaluate(xx), atol=tol)
+    self.assertAllClose(identity, xx, atol=tol)
 
   def Test(self):
     np.random.seed(1)
@@ -202,7 +203,8 @@ if __name__ == "__main__":
       for cols in 1, 2, 5, 10, 32, 100:
         for full_matrices in False, True:
           for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10):
-            for use_static_shape in True, False:
+            # TF2 does not support placeholders under eager so we skip it
+            for use_static_shape in set([True, tf2.enabled()]):
               shape = batch_dims + (rows, cols)
               name = "%s_%s_full_%s_static_%s" % (dtype.__name__,
                                                   "_".join(map(str, shape)),
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
index 97a280ef51..94155fd117 100644
--- a/tensorflow/python/kernel_tests/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
@@ -117,13 +118,13 @@ def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
         diag_s = array_ops.concat([diag_s, zeros], a.ndim - 1)
     a_recon = math_ops.matmul(u, diag_s)
     a_recon = math_ops.matmul(a_recon, v, adjoint_b=True)
-    self.assertAllClose(a_recon.eval(), a, rtol=tol, atol=tol)
+    self.assertAllClose(a_recon, a, rtol=tol, atol=tol)
 
   def CheckUnitary(self, x, tol):
     # Tests that x[...,:,:]^H * x[...,:,:] is close to the identity.
     xx = math_ops.matmul(x, x, adjoint_a=True)
     identity = array_ops.matrix_band_part(array_ops.ones_like(xx), 0, 0)
-    self.assertAllClose(identity.eval(), self.evaluate(xx), atol=tol)
+    self.assertAllClose(identity, xx, atol=tol)
 
   def Test(self):
     is_complex = dtype_ in (np.complex64, np.complex128)
@@ -263,7 +264,8 @@ if __name__ == "__main__":
           for cols in 1, 2, 5, 10, 32, 100:
             for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10):
               shape = batch_dims + (rows, cols)
-              for use_static_shape in True, False:
+              # TF2 does not support placeholders under eager so we skip it
+              for use_static_shape in set([True, tf2.enabled()]):
                 name = "%s_%s_static_shape_%s__compute_uv_%s_full_%s" % (
                     dtype.__name__, "_".join(map(str, shape)), use_static_shape,
                     compute_uv, full_matrices)
-- 
GitLab


From 9109842678c385b4ab31825c53bf4373fcadf15e Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Fri, 7 Dec 2018 07:42:26 -0800
Subject: [PATCH 196/873] Add time functions to TF_Env

PiperOrigin-RevId: 224513745
---
 tensorflow/c/env.cc      | 14 ++++++++++++++
 tensorflow/c/env.h       |  9 +++++++++
 tensorflow/c/env_test.cc |  6 ++++++
 3 files changed, 29 insertions(+)

diff --git a/tensorflow/c/env.cc b/tensorflow/c/env.cc
index fcff1e413d..07b9e8b940 100644
--- a/tensorflow/c/env.cc
+++ b/tensorflow/c/env.cc
@@ -145,3 +145,17 @@ TF_StringStream* TF_GetLocalTempDirectories() {
   list->position = 0;
   return list;
 }
+
+TF_CAPI_EXPORT extern uint64_t TF_NowNanos(void) {
+  return ::tensorflow::Env::Default()->NowNanos();
+}
+
+// Returns the number of microseconds since the Unix epoch.
+TF_CAPI_EXPORT extern uint64_t TF_NowMicros(void) {
+  return ::tensorflow::Env::Default()->NowMicros();
+}
+
+// Returns the number of seconds since the Unix epoch.
+TF_CAPI_EXPORT extern uint64_t TF_NowSeconds(void) {
+  return ::tensorflow::Env::Default()->NowSeconds();
+}
diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h
index 32d3f1f745..9d27c5da37 100644
--- a/tensorflow/c/env.h
+++ b/tensorflow/c/env.h
@@ -141,6 +141,15 @@ TF_CAPI_EXPORT extern TF_StringStream* TF_GetChildren(const char* filename,
 // The caller is responsible for freeing the list (see TF_StringStreamDone).
 TF_CAPI_EXPORT extern TF_StringStream* TF_GetLocalTempDirectories(void);
 
+// Returns the number of nanoseconds since the Unix epoch.
+TF_CAPI_EXPORT extern uint64_t TF_NowNanos(void);
+
+// Returns the number of microseconds since the Unix epoch.
+TF_CAPI_EXPORT extern uint64_t TF_NowMicros(void);
+
+// Returns the number of seconds since the Unix epoch.
+TF_CAPI_EXPORT extern uint64_t TF_NowSeconds(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tensorflow/c/env_test.cc b/tensorflow/c/env_test.cc
index 08e803f12f..e2206c6bef 100644
--- a/tensorflow/c/env_test.cc
+++ b/tensorflow/c/env_test.cc
@@ -92,3 +92,9 @@ TEST(TestEnv, TestDirHandling) {
 
   TF_StringStreamDone(tempdirs);
 }
+
+TEST(TestEnv, TestTimeFunctions) {
+  ASSERT_GE(TF_NowSeconds(), 946684800);  // Midnight Jan 1, 2000
+  ASSERT_GE(TF_NowMicros(), 946684800 * 1e6);
+  ASSERT_GE(TF_NowNanos(), 946684800 * 1e9);
+}
-- 
GitLab


From 840be3029ce5ee97b345a8c317a76a68eb4e5374 Mon Sep 17 00:00:00 2001
From: Brennan Saeta <saeta@google.com>
Date: Fri, 7 Dec 2018 08:21:42 -0800
Subject: [PATCH 197/873] [Distributed Runtime] Mark SessionOptions as const.

#cleanup

PiperOrigin-RevId: 224518435
---
 tensorflow/core/distributed_runtime/rpc/grpc_session.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_session.h b/tensorflow/core/distributed_runtime/rpc/grpc_session.h
index e00cf97e38..a3ed3ec736 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_session.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_session.h
@@ -111,7 +111,7 @@ class GrpcSession : public Session {
       LOCKS_EXCLUDED(mu_);
 
  private:
-  SessionOptions options_;
+  const SessionOptions options_;
   std::unique_ptr<MasterInterface> master_;
   mutex mu_;
 
-- 
GitLab


From 989e78c412a7e0f5361d4d7dfdfb230c8136e749 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Fri, 7 Dec 2018 09:32:55 -0800
Subject: [PATCH 198/873] [TF Keras] keras.Flatten()(tensor : shaped (batch,))
 -> tensor : shaped (batch, 1)

This provides a consistent API for users who have scalar "channels".

PiperOrigin-RevId: 224528301
---
 .../features/python/conditioning_utils_impl.py     |  3 +++
 .../features/python/conditioning_utils_test.py     |  2 +-
 .../contrib/layers/python/layers/layers_test.py    | 13 ++++++-------
 tensorflow/python/keras/engine/training_utils.py   | 14 +++++++++++---
 tensorflow/python/keras/layers/core.py             | 13 ++++++++++---
 tensorflow/python/keras/layers/core_test.py        | 14 ++++++++++++++
 tensorflow/python/keras/testing_utils.py           |  6 +++++-
 tensorflow/python/layers/core_test.py              |  9 ++++++---
 8 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py b/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py
index e2594faf85..364fa4eb46 100644
--- a/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py
+++ b/tensorflow/contrib/gan/python/features/python/conditioning_utils_impl.py
@@ -64,6 +64,9 @@ def condition_tensor(tensor, conditioning):
   """
   tensor.shape[1:].assert_is_fully_defined()
   num_features = tensor.shape[1:].num_elements()
+  if conditioning.shape.ndims < 2:
+    raise ValueError('conditioning must be at least 2D, but saw shape: %s'
+                     % conditioning.shape)
 
   mapped_conditioning = layers.linear(
       layers.flatten(conditioning), num_features)
diff --git a/tensorflow/contrib/gan/python/features/python/conditioning_utils_test.py b/tensorflow/contrib/gan/python/features/python/conditioning_utils_test.py
index 0aad769793..f5c7d53cf2 100644
--- a/tensorflow/contrib/gan/python/features/python/conditioning_utils_test.py
+++ b/tensorflow/contrib/gan/python/features/python/conditioning_utils_test.py
@@ -45,7 +45,7 @@ class ConditioningUtilsTest(test.TestCase):
           array_ops.placeholder(dtypes.float32, (5, None)),
           array_ops.placeholder(dtypes.float32, (5, 1)))
 
-    with self.assertRaisesRegexp(ValueError, 'expected min_ndim=2'):
+    with self.assertRaisesRegexp(ValueError, 'at least 2D'):
       conditioning_utils.condition_tensor(
           array_ops.placeholder(dtypes.float32, (5, 2)),
           array_ops.placeholder(dtypes.float32, (5)))
diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py
index 0a4d2c6d4c..d791418c9d 100644
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
@@ -1459,13 +1459,6 @@ class DropoutTest(test.TestCase):
 
 class FlattenTest(test.TestCase):
 
-  def testInvalidRank(self):
-    with ops.Graph().as_default() as g, self.session(g):
-      inputs = array_ops.placeholder(dtype=dtypes.float32)
-      inputs.set_shape(tensor_shape.TensorShape((5,)))
-      with self.assertRaisesRegexp(ValueError, 'incompatible with the layer'):
-        _layers.flatten(inputs)
-
   def testUnknownLastDim(self):
     with ops.Graph().as_default() as g, self.session(g):
       inputs = array_ops.placeholder(dtype=dtypes.float32)
@@ -1502,6 +1495,12 @@ class FlattenTest(test.TestCase):
                        images.get_shape().num_elements())
       self.assertEqual(output.get_shape()[0], images.get_shape()[0])
 
+  def testFlatten0D(self):
+    with self.cached_session():
+      scalars = random_ops.random_uniform((5,), seed=1, name='scalars')
+      output = _layers.flatten(scalars)
+      self.assertEqual(output.shape, (5, 1))
+
   def testFlattenBatchSize(self):
     height, width = 3, 3
     with self.cached_session() as sess:
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 9301302f4a..01a09eb031 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -232,10 +232,14 @@ def check_num_samples(ins,
   return None  # Edge case where ins == [static_learning_phase]
 
 
-def standardize_single_array(x):
+def standardize_single_array(x, expected_shape=None):
+  """Expand data of shape (x,) to (x, 1), unless len(expected_shape)==1."""
   if x is None:
     return None
-  if x.shape is not None and len(x.shape) == 1:
+
+  if (x.shape is not None
+      and len(x.shape) == 1
+      and (expected_shape is None or len(expected_shape) != 1)):
     if tensor_util.is_tensor(x):
       x = array_ops.expand_dims(x, axis=1)
     else:
@@ -301,7 +305,11 @@ def standardize_input_data(data,
   else:
     data = data.values if data.__class__.__name__ == 'DataFrame' else data
     data = [data]
-  data = [standardize_single_array(x) for x in data]
+  if shapes is not None:
+    data = [standardize_single_array(x, shape)
+            for (x, shape) in zip(data, shapes)]
+  else:
+    data = [standardize_single_array(x) for x in data]
 
   if len(data) != len(names):
     if data and hasattr(data[0], 'shape'):
diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 56dd70558c..854774c569 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -506,6 +506,9 @@ class Permute(Layer):
 class Flatten(Layer):
   """Flattens the input. Does not affect the batch size.
 
+  If inputs are shaped `(batch,)` without a channel dimension, then flattening
+  adds an extra channel dimension and output shapes are `(batch, 1)`.
+
   Arguments:
       data_format: A string,
           one of `channels_last` (default) or `channels_first`.
@@ -534,23 +537,27 @@ class Flatten(Layer):
   def __init__(self, data_format=None, **kwargs):
     super(Flatten, self).__init__(**kwargs)
     self.data_format = conv_utils.normalize_data_format(data_format)
-    self.input_spec = InputSpec(min_ndim=2)
+    self.input_spec = InputSpec(min_ndim=1)
 
   def call(self, inputs):
-    if self.data_format == 'channels_first':
+    if (self.data_format == 'channels_first'
+        and K.ndim(inputs) is not None and K.ndim(inputs) > 1):
       permutation = [0]
       permutation.extend([i for i in
                           range(2, K.ndim(inputs))])
       permutation.append(1)
       inputs = array_ops.transpose(inputs, perm=permutation)
 
-    outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1))
+    outputs = array_ops.reshape(
+        inputs, (inputs.shape[0].value or array_ops.shape(inputs)[0], -1))
     if not context.executing_eagerly():
       outputs.set_shape(self.compute_output_shape(inputs.get_shape()))
     return outputs
 
   def compute_output_shape(self, input_shape):
     input_shape = tensor_shape.TensorShape(input_shape).as_list()
+    if not input_shape:
+      output_shape = tensor_shape.TensorShape([1])
     output_shape = [input_shape[0]]
     if all(input_shape[1:]):
       output_shape += [np.prod(input_shape[1:])]
diff --git a/tensorflow/python/keras/layers/core_test.py b/tensorflow/python/keras/layers/core_test.py
index aad6ab8171..f138adf760 100644
--- a/tensorflow/python/keras/layers/core_test.py
+++ b/tensorflow/python/keras/layers/core_test.py
@@ -149,6 +149,20 @@ class CoreLayersTest(test.TestCase):
         np.transpose(inputs, (0, 2, 3, 1)), (-1, 5 * 5 * 3))
     self.assertAllClose(outputs, target_outputs)
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_flatten_scalar_channels(self):
+    testing_utils.layer_test(
+        keras.layers.Flatten, kwargs={}, input_shape=(3,))
+
+    # Test channels_first
+    inputs = np.random.random((10,)).astype('float32')
+    outputs = testing_utils.layer_test(
+        keras.layers.Flatten,
+        kwargs={'data_format': 'channels_first'},
+        input_data=inputs)
+    target_outputs = np.expand_dims(inputs, -1)
+    self.assertAllClose(outputs, target_outputs)
+
   @tf_test_util.run_in_graph_and_eager_modes
   def test_repeat_vector(self):
     testing_utils.layer_test(
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index 42f8f10fca..fd062b0ab3 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -77,9 +77,13 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None,
   Returns:
     The output data (Numpy array) returned by the layer, for additional
     checks to be done by the calling code.
+
+  Raises:
+    ValueError: if `input_shape is None`.
   """
   if input_data is None:
-    assert input_shape
+    if input_shape is None:
+      raise ValueError('input_shape is None')
     if not input_dtype:
       input_dtype = 'float32'
     input_data_shape = list(input_shape)
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index cf6f0fbb70..b40a268238 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -533,10 +533,13 @@ class FlattenTest(test.TestCase):
     self.assertEqual(y.get_shape().as_list(), [None, 6])
 
   @test_util.run_deprecated_v1
-  def testFlattenValueError(self):
+  def testFlatten0D(self):
     x = array_ops.placeholder(shape=(None,), dtype='float32')
-    with self.assertRaises(ValueError):
-      core_layers.Flatten()(x)
+    y = core_layers.Flatten()(x)
+    with self.cached_session() as sess:
+      np_output = sess.run(y, feed_dict={x: np.zeros((5,))})
+    self.assertEqual(list(np_output.shape), [5, 1])
+    self.assertEqual(y.shape.as_list(), [None, 1])
 
   @test_util.run_deprecated_v1
   def testFlattenUnknownAxes(self):
-- 
GitLab


From 8d3be0f555b14013d4d93336cd5206153a6fa63b Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Fri, 7 Dec 2018 09:45:24 -0800
Subject: [PATCH 199/873] Add default value for topdown aerg of io.gfile.walk.
 The default exists in Python's os.walk and in the v1 gfile too.

PiperOrigin-RevId: 224530098
---
 tensorflow/python/lib/io/file_io.py                      | 2 +-
 tensorflow/tools/api/golden/v1/tensorflow.io.gfile.pbtxt | 2 +-
 tensorflow/tools/api/golden/v2/tensorflow.io.gfile.pbtxt | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py
index 4caa5750bf..ee55d89bff 100644
--- a/tensorflow/python/lib/io/file_io.py
+++ b/tensorflow/python/lib/io/file_io.py
@@ -671,7 +671,7 @@ def walk(top, in_order=True):
 
 
 @tf_export("io.gfile.walk")
-def walk_v2(top, topdown, onerror=None):
+def walk_v2(top, topdown=True, onerror=None):
   """Recursive directory tree generator for directories.
 
   Args:
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.io.gfile.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.io.gfile.pbtxt
index 93d9b0fd75..cfa3372b12 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.io.gfile.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.io.gfile.pbtxt
@@ -46,6 +46,6 @@ tf_module {
   }
   member_method {
     name: "walk"
-    argspec: "args=[\'top\', \'topdown\', \'onerror\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'top\', \'topdown\', \'onerror\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.gfile.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.gfile.pbtxt
index 93d9b0fd75..cfa3372b12 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.gfile.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.gfile.pbtxt
@@ -46,6 +46,6 @@ tf_module {
   }
   member_method {
     name: "walk"
-    argspec: "args=[\'top\', \'topdown\', \'onerror\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'top\', \'topdown\', \'onerror\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], "
   }
 }
-- 
GitLab


From 0d9781711b16a897a220e33f5f6c60a038fb5bac Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Fri, 7 Dec 2018 09:53:37 -0800
Subject: [PATCH 200/873] Creating a wrapper around DatasetVariantWrapper's
 that can be passed from CPU to GPU while keeping everything in host memory.
 This is necessary for when we move to creating datasets on the go and for
 transformations such as copy_to_device we need to ferry the
 DatasetVariantWrapper.

PiperOrigin-RevId: 224531292
---
 tensorflow/compiler/jit/xla_device_ops.h      |   2 +
 .../api_def_UnwrapDatasetVariant.pbtxt        |   4 +
 .../base_api/api_def_WrapDatasetVariant.pbtxt |   4 +
 tensorflow/core/framework/dataset.cc          | 109 ++++++++++++++++++
 tensorflow/core/kernels/function_ops.cc       |   3 +
 tensorflow/core/ops/dataset_ops.cc            |  10 ++
 .../data/experimental/kernel_tests/BUILD      |  11 ++
 .../kernel_tests/wrap_unwrap_test.py          |  69 +++++++++++
 8 files changed, 212 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_UnwrapDatasetVariant.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_WrapDatasetVariant.pbtxt
 create mode 100644 tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py

diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h
index adf0f994b8..927f983ba9 100644
--- a/tensorflow/compiler/jit/xla_device_ops.h
+++ b/tensorflow/compiler/jit/xla_device_ops.h
@@ -203,6 +203,8 @@ class XlaAssignVariableOp : public OpKernel {
                               .HostMemory("output")                            \
                               .TypeConstraint<ResourceHandle>("T"),            \
                           ArgOp);                                              \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name(kArgOp).Device(DEVICE).TypeConstraint<Variant>("T"), ArgOp);        \
                                                                                \
   REGISTER_KERNEL_BUILDER(Name(kRetOp)                                         \
                               .Device(DEVICE)                                  \
diff --git a/tensorflow/core/api_def/base_api/api_def_UnwrapDatasetVariant.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnwrapDatasetVariant.pbtxt
new file mode 100644
index 0000000000..7b3f88a1da
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UnwrapDatasetVariant.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "UnwrapDatasetVariant"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_WrapDatasetVariant.pbtxt b/tensorflow/core/api_def/base_api/api_def_WrapDatasetVariant.pbtxt
new file mode 100644
index 0000000000..40f5c7a0d2
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_WrapDatasetVariant.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "WrapDatasetVariant"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index 0e48932da7..6e21433271 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include "tensorflow/core/framework/device_base.h"
 #include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
 #include "tensorflow/core/graph/node_builder.h"
 #include "tensorflow/core/platform/mutex.h"
@@ -74,6 +76,113 @@ class DatasetVariantWrapper {
   DatasetBase* const dataset_;  // Owns one reference.
 };
 
+const char kWrappedDatasetVariantTypeName[] =
+    "tensorflow::data::WrappedDatasetVariant";
+
+class WrappedDatasetVariantWrapper {
+ public:
+  WrappedDatasetVariantWrapper() {}
+
+  explicit WrappedDatasetVariantWrapper(const Tensor& ds_tensor)
+      : ds_tensor_(ds_tensor) {}
+
+  Tensor get() const { return ds_tensor_; }
+
+  string TypeName() const { return "tensorflow::WrappedDatasetVariantWrapper"; }
+
+  string DebugString() const {
+    return "tensorflow::WrappedDatasetVariantWrapper::DebugString";
+  }
+
+  void Encode(VariantTensorData* data) const {
+    *(data->add_tensors()) = ds_tensor_;
+  }
+
+  bool Decode(const VariantTensorData& data) {
+    ds_tensor_ = data.tensors(0);
+    return true;
+  }
+
+ private:
+  Tensor ds_tensor_;
+};
+
+class WrapDatasetVariantOp : public OpKernel {
+ public:
+  explicit WrapDatasetVariantOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& tensor = ctx->input(0);
+    OP_REQUIRES(ctx,
+                tensor.dtype() == DT_VARIANT &&
+                    TensorShapeUtils::IsScalar(tensor.shape()),
+                errors::InvalidArgument(
+                    "Dataset tensor must be a scalar of dtype DT_VARIANT."));
+    DatasetBase* unused;
+    OP_REQUIRES_OK(ctx, GetDatasetFromVariantTensor(tensor, &unused));
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output));
+    output->scalar<Variant>()() = WrappedDatasetVariantWrapper(tensor);
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("WrapDatasetVariant").Device(DEVICE_CPU),
+                        WrapDatasetVariantOp);
+REGISTER_KERNEL_BUILDER(Name("WrapDatasetVariant")
+                            .HostMemory("input_handle")
+                            .HostMemory("output_handle")
+                            .Device(DEVICE_GPU),
+                        WrapDatasetVariantOp);
+
+class UnwrapDatasetVariantOp : public OpKernel {
+ public:
+  explicit UnwrapDatasetVariantOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& tensor = ctx->input(0);
+    OP_REQUIRES(ctx,
+                tensor.dtype() == DT_VARIANT &&
+                    TensorShapeUtils::IsScalar(tensor.shape()),
+                errors::InvalidArgument(
+                    "Dataset tensor must be a scalar of dtype DT_VARIANT."));
+    Variant variant = tensor.scalar<Variant>()();
+    const WrappedDatasetVariantWrapper* wrapper =
+        variant.get<WrappedDatasetVariantWrapper>();
+    OP_REQUIRES(ctx, wrapper != nullptr,
+                errors::InvalidArgument(
+                    "Tensor must be a WrappedDataset variant object."));
+    Tensor ds_tensor = wrapper->get();
+    OP_REQUIRES_OK(ctx, ctx->set_output("output_handle", ds_tensor));
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("UnwrapDatasetVariant").Device(DEVICE_CPU),
+                        UnwrapDatasetVariantOp);
+REGISTER_KERNEL_BUILDER(Name("UnwrapDatasetVariant")
+                            .HostMemory("input_handle")
+                            .HostMemory("output_handle")
+                            .Device(DEVICE_GPU),
+                        UnwrapDatasetVariantOp);
+
+static Status WrappedDatasetVariantDeviceCopy(
+    const WrappedDatasetVariantWrapper& from, WrappedDatasetVariantWrapper* to,
+    const UnaryVariantOpRegistry::AsyncTensorDeviceCopyFn& copy) {
+  *to = WrappedDatasetVariantWrapper(from);
+  return Status::OK();
+}
+
+#define REGISTER_OPTIONAL_COPY(DIRECTION)               \
+  INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION( \
+      WrappedDatasetVariantWrapper, DIRECTION,          \
+      WrappedDatasetVariantDeviceCopy)
+
+REGISTER_OPTIONAL_COPY(VariantDeviceCopyDirection::HOST_TO_DEVICE);
+REGISTER_OPTIONAL_COPY(VariantDeviceCopyDirection::DEVICE_TO_HOST);
+REGISTER_OPTIONAL_COPY(VariantDeviceCopyDirection::DEVICE_TO_DEVICE);
+
+REGISTER_UNARY_VARIANT_DECODE_FUNCTION(WrappedDatasetVariantWrapper,
+                                       kWrappedDatasetVariantTypeName);
+
 }  // namespace
 
 Status GraphDefBuilderWrapper::AddDataset(
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index cca3cfbd7c..90f94ee4a0 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -122,6 +122,9 @@ REGISTER_KERNEL_BUILDER(Name(kArgOp)
                             .TypeConstraint<string>("T"),
                         ArgOp);
 
+REGISTER_KERNEL_BUILDER(
+    Name(kArgOp).Device(DEVICE_GPU).TypeConstraint<Variant>("T"), ArgOp);
+
 #define REGISTER(type)     \
   REGISTER_KERNEL_BUILDER( \
       Name(kRetOp).Device(DEVICE_GPU).TypeConstraint<type>("T"), RetvalOp);
diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc
index 9163ed715d..1c117166de 100644
--- a/tensorflow/core/ops/dataset_ops.cc
+++ b/tensorflow/core/ops/dataset_ops.cc
@@ -687,6 +687,16 @@ REGISTER_OP("MapDefun")
       return Status::OK();
     });
 
+REGISTER_OP("WrapDatasetVariant")
+    .Input("input_handle: variant")
+    .Output("output_handle: variant")
+    .SetShapeFn(shape_inference::ScalarShape);
+
+REGISTER_OP("UnwrapDatasetVariant")
+    .Input("input_handle: variant")
+    .Output("output_handle: variant")
+    .SetShapeFn(shape_inference::ScalarShape);
+
 REGISTER_OP("MultiDeviceIterator")
     .Output("handle: resource")
     .Attr("devices: list(string) >= 1")
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index 07ed4d2bce..548eb422ed 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -716,3 +716,14 @@ py_test(
         "//tensorflow/python/data/ops:dataset_ops",
     ],
 )
+
+cuda_py_test(
+    name = "wrap_unwrap_test",
+    size = "small",
+    srcs = ["wrap_unwrap_test.py"],
+    additional_deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/kernel_tests:test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
diff --git a/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
new file mode 100644
index 0000000000..9c734b65e0
--- /dev/null
+++ b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
@@ -0,0 +1,69 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Wrapping / Unwrapping dataset variants."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.platform import test
+
+
+class WrapDatasetVariantTest(test_base.DatasetTestBase):
+
+  def testBasic(self):
+    ds = dataset_ops.Dataset.range(100)
+    ds_variant = ds._as_variant_tensor()  # pylint: disable=protected-access
+
+    wrapped_variant = gen_dataset_ops.wrap_dataset_variant(ds_variant)
+    unwrapped_variant = gen_dataset_ops.unwrap_dataset_variant(wrapped_variant)
+
+    variant_ds = dataset_ops._VariantDataset(unwrapped_variant,
+                                             ds._element_structure)
+    iterator = dataset_ops.make_initializable_iterator(variant_ds)
+    get_next = iterator.get_next()
+
+    with self.cached_session():
+      self.evaluate(iterator.initializer)
+      for i in range(100):
+        self.assertEqual(i, self.evaluate(get_next))
+
+  def testGPU(self):
+    ds = dataset_ops.Dataset.range(100)
+    ds_variant = ds._as_variant_tensor()  # pylint: disable=protected-access
+    wrapped_variant = gen_dataset_ops.wrap_dataset_variant(ds_variant)
+
+    with ops.device("/gpu:0"):
+      gpu_wrapped_variant = array_ops.identity(wrapped_variant)
+
+    unwrapped_variant = gen_dataset_ops.unwrap_dataset_variant(
+        gpu_wrapped_variant)
+    variant_ds = dataset_ops._VariantDataset(unwrapped_variant,
+                                             ds._element_structure)
+    iterator = dataset_ops.make_initializable_iterator(variant_ds)
+    get_next = iterator.get_next()
+
+    with self.cached_session():
+      self.evaluate(iterator.initializer)
+      for i in range(100):
+        self.assertEqual(i, self.evaluate(get_next))
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From b56d14fbcb33d974f593e0c874bc404d1f1192b3 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 7 Dec 2018 09:56:19 -0800
Subject: [PATCH 201/873] Rework the Cudnn LSTM implementation to get rid the
 extra input bias.

This heavily clear up the logic for save/load model since the weights for input bias is gone. The new implementation is mathematically same as the canonical LSTM.

PiperOrigin-RevId: 224531752
---
 tensorflow/python/keras/engine/saving.py      | 58 -------------
 tensorflow/python/keras/engine/saving_test.py | 64 --------------
 tensorflow/python/keras/layers/recurrent.py   | 46 +++-------
 .../python/keras/layers/unified_lstm_test.py  | 84 +++++++++++++++++++
 4 files changed, 94 insertions(+), 158 deletions(-)

diff --git a/tensorflow/python/keras/engine/saving.py b/tensorflow/python/keras/engine/saving.py
index 15ba5f78d9..54d9e32fb2 100644
--- a/tensorflow/python/keras/engine/saving.py
+++ b/tensorflow/python/keras/engine/saving.py
@@ -551,68 +551,10 @@ def preprocess_weights_for_loading(layer,
       if layer.__class__.__name__ == 'ConvLSTM2D':
         weights[1] = np.transpose(weights[1], (3, 2, 0, 1))
 
-  weights = _convert_unified_lstm_weights(layer, weights)
-
   # convert CuDNN layers
   return _convert_rnn_weights(layer, weights)
 
 
-def _convert_unified_lstm_weights(layer, weights):
-  """Converts weights for Unified LSTM layer.
-
-  The input weights suppose to have 2, 3 or 4 items.
-  1. kernel. (i, f, c, o gates concat among axis 1)
-  2. recurrent_kernel. (i, f, c, o concat among axis 1)
-  3. recurrent_bias. (optional, only available when use bias)
-  4. input_bias (optional, only available when use bias and cudnn).
-  Kernel and recurrent_kernel does not need any conversion. During load(),
-  since the layer could be built with the parameter that does not support the
-  defun approach, it is possible that cudnn_bias variable is not created, or
-  even created but not used during actual run. Because of that, we sum up the
-  value of two biases, and give it to recurrent_bias only. Mathematically, the
-  LSTM is calculated as following formula:
-
-    i_t = sigmoid(w_i * x_t + r_i * h_(t-1) + b_wi + b_ri)
-    f_t = sigmoid(w_f * x_t + r_f * h_(t-1) + b_wf + b_rf)
-    o_t = sigmoid(w_o * x_t + r_o * h_(t-1) + b_wo + b_ro)
-    c'_t = tanh(w_c * x_t + r_c * h_(t-1) + b_wc + b_rc)
-    c_t = f_t . c_(t-1) + i_t . c'_t
-    h_t = o_t . tanh(c_t)
-
-  Note that b_w{x} is the input_bias, and b_r{x} is the recurrent_bias.
-  Since it is a linear add, it is fine to give b_r{x} 100% and b_w{x} 0%, as
-  long as the sum are the same.
-
-  Args:
-    layer: The keras layer that will be loaded with weights.
-    weights: the list of numpy arrays which hold the weights to be loaded.
-
-  Returns:
-    weights: the processed list of numpy arrays.
-  """
-  if layer.__class__.__name__ == 'UnifiedLSTM':
-    if len(weights) not in [3, 4]:
-      # Only handles the bias conversion in this function, in the case when
-      # bias is not used or weights in unexpected length, do nothing and return.
-      return weights
-
-    if len(weights) == 3:
-      recurrent_bias = weights[2]
-    else:
-      # Add all the bias value to recurrent_bias
-      recurrent_bias = weights[2] + weights[3]
-
-    if len(layer.weights) == 3:
-      weights = weights[:2] + [recurrent_bias]
-    elif len(layer.weights) == 4:
-      # Create a zero filled input_bias, since all the weights have given
-      # to recurrent bias.
-      input_bias = np.zeros_like(recurrent_bias)
-      weights = weights[:2] + [recurrent_bias, input_bias]
-
-  return weights
-
-
 def _convert_rnn_weights(layer, weights):
   """Converts weights for RNN layers between native and CuDNN format.
 
diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 8fcefce748..6d9d9a2fca 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -221,70 +221,6 @@ class TestWeightSavingAndLoading(test.TestCase, parameterized.TestCase):
           for (x, y) in zip(weights1, weights2)
       ]
 
-  @parameterized.named_parameters(
-      # test_name, use_bias, bias_initializer, activation
-      ('normal', True, 'zeros', 'tanh'),
-      ('no_bias', False, 'zeros', 'tanh'),
-      # TODO(scottzhu): Reenable this test case when the approach is decided.
-      # ('random_bias', True, 'random_uniform', 'tanh'),
-      ('no_cudnn_bias', True, 'zeros', 'relu')
-  )
-  def test_process_weights_for_loading_unified_lstm(
-      self, use_bias, bias_initializer, activation):
-    if h5py is None:
-      return
-
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir)
-    h5_path = os.path.join(temp_dir, 'test.h5')
-
-    batch = 10
-    timestep = 3
-    input_dim = 5
-    units = 2
-
-    x = np.random.random((batch, timestep, input_dim))
-
-    def build_model():
-      inputs = keras.layers.Input(
-          shape=[timestep, input_dim], dtype=dtypes.float32)
-      layer = keras.layers.UnifiedLSTM(
-          units,
-          activation=activation,
-          use_bias=use_bias,
-          bias_initializer=bias_initializer)
-      output = layer(inputs)
-      return keras.models.Model(inputs, output), layer
-
-    with self.cached_session():
-      model, layer = build_model()
-      y_ref = model.predict(x)
-      model.save_weights(h5_path)
-
-      cloned_model, new_layer = build_model()
-      cloned_model.load_weights(h5_path)
-      y = cloned_model.predict(x)
-
-      self.assertAllClose(y, y_ref)
-
-      # Test the individual layer weights.
-      weights1 = layer.get_weights()
-      weights2 = new_layer.get_weights()
-      self.assertLen(weights1, len(weights2))
-      # kernel and current kernel should be the same.
-      self.assertAllClose(weights1[:2], weights2[:2])
-
-      if len(weights2) >= 3:
-        # Test recurrent bias
-        expected_recurrent_bias = weights1[2]
-        if len(weights1) == 4:
-          expected_recurrent_bias += weights1[3]
-        self.assertAllClose(weights2[2], expected_recurrent_bias)
-
-      if len(weights2) == 4:
-        # Test recovered input_gate_bias to be always zero
-        self.assertAllClose(weights2[3], np.zeros_like(weights1[3]))
-
   def test_sequential_weight_loading(self):
     if h5py is None:
       return
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 189ad98794..fdc2acd538 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -2666,21 +2666,6 @@ class UnifiedLSTM(LSTM):
         activation == 'tanh' and recurrent_dropout == 0 and
         not unroll and use_bias and bias_regularizer is None)
 
-  def build(self, input_shape):
-    super(UnifiedLSTM, self).build(input_shape)
-    if self.could_use_cudnn:
-      # Add a new set of bias for CuDNN implementation only. Standard LSTM only
-      # has bias for recurrent kernel, while CuDNN LSTM has an extra set for
-      # input gate as well.
-      self.cudnn_bias = self.add_weight(
-          shape=(self.units * 4,),
-          name='cudnn_bias',
-          use_resource=True,
-          initializer=self.bias_initializer,
-          regularizer=self.bias_regularizer,
-          constraint=self.bias_constraint)
-    self.built = True
-
   def call(self, inputs, mask=None, training=None, initial_state=None):
     # LSTM does not support constants. Ignore it during process.
     inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)
@@ -2721,8 +2706,6 @@ class UnifiedLSTM(LSTM):
         # Reverse time axis.
         inputs = K.reverse(inputs, 1)
 
-      combined_bias = array_ops.concat([self.cudnn_bias, self.cell.bias], 0)
-
       if 0 < self.dropout < 1:
         if self._dropout_mask is None:
           self._dropout_mask = _generate_dropout_mask(
@@ -2757,23 +2740,23 @@ class UnifiedLSTM(LSTM):
         if context.num_gpus() > 0:
           last_output, outputs, new_h, new_c, runtime = defun_cudnn_lstm(
               inputs, initial_state[0], initial_state[1], self.cell.kernel,
-              self.cell.recurrent_kernel, combined_bias, self.time_major)
+              self.cell.recurrent_kernel, self.cell.bias, self.time_major)
         else:
           last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(
               inputs, initial_state[0], initial_state[1], self.cell.kernel,
-              self.cell.recurrent_kernel, combined_bias, self.activation,
+              self.cell.recurrent_kernel, self.cell.bias, self.activation,
               self.recurrent_activation, self.time_major)
       else:
         # Call the normal LSTM impl and register the CuDNN impl function. The
         # grappler will kick in during session execution to optimize the graph.
         last_output, outputs, new_h, new_c, runtime = defun_standard_lstm(
             inputs, initial_state[0], initial_state[1], self.cell.kernel,
-            self.cell.recurrent_kernel, combined_bias, self.activation,
+            self.cell.recurrent_kernel, self.cell.bias, self.activation,
             self.recurrent_activation, self.time_major)
 
         function.register(defun_cudnn_lstm, inputs, initial_state[0],
                           initial_state[1], self.cell.kernel,
-                          self.cell.recurrent_kernel, combined_bias,
+                          self.cell.recurrent_kernel, self.cell.bias,
                           self.time_major)
       states = [new_h, new_c]
 
@@ -2800,8 +2783,6 @@ class UnifiedLSTM(LSTM):
     if self.trainable:
       weights = []
       weights += self.cell.trainable_weights
-      if getattr(self, 'cudnn_bias', None) is not None:
-        weights += [self.cudnn_bias]
       return weights
     return []
 
@@ -2810,8 +2791,6 @@ class UnifiedLSTM(LSTM):
     if not self.trainable:
       weights = []
       weights += self.cell.non_trainable_weights
-      if getattr(self, 'cudnn_bias', None) is not None:
-        weights += [self.cudnn_bias]
       return weights
     return []
 
@@ -2830,8 +2809,6 @@ class UnifiedLSTM(LSTM):
   def get_weights(self):
     weights = []
     weights += self.cell.weights
-    if getattr(self, 'cudnn_bias', None) is not None:
-      weights += [self.cudnn_bias]
     return K.batch_get_value(weights)
 
   def set_weights(self, weights):
@@ -2839,10 +2816,6 @@ class UnifiedLSTM(LSTM):
     cell_weights = weights[:len(self.cell.weights)]
     if cell_weights:
       tuples.append((self.cell.weights, cell_weights))
-    if getattr(self, 'cudnn_bias', None) is not None:
-      cudnn_bias_weights = weights[len(self.cell.weights):]
-      if cudnn_bias_weights:
-        tuples.append((self.cudnn_bias, cudnn_bias_weights))
     K.batch_set_value(tuples)
 
 
@@ -2919,9 +2892,6 @@ def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias,
   input_shape = K.int_shape(inputs)
   timesteps = input_shape[0] if time_major else input_shape[1]
 
-  # Only use the second half of the bias weights.
-  _, real_bias = array_ops.split(bias, 2)
-
   def step(cell_inputs, cell_states):
     """Step function that will be used by Keras RNN backend."""
     h_tm1 = cell_states[0]  # previous memory state
@@ -2929,7 +2899,7 @@ def standard_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias,
 
     z = K.dot(cell_inputs, kernel)
     z += K.dot(h_tm1, recurrent_kernel)
-    z = K.bias_add(z, real_bias)
+    z = K.bias_add(z, bias)
 
     z0, z1, z2, z3 = array_ops.split(z, 4, axis=1)
 
@@ -2962,9 +2932,13 @@ def cudnn_lstm(inputs, input_h, input_c, kernel, recurrent_kernel, bias,
 
   weights = array_ops.split(kernel, 4, axis=1)
   weights += array_ops.split(recurrent_kernel, 4, axis=1)
+  # CuDNN has an extra set of bias for inputs, we disable them (setting to 0),
+  # so that mathematically it is same as the canonical LSTM implementation.
+  full_bias = array_ops.concat((array_ops.zeros_like(bias), bias), 0)
+
   params = _canonical_to_params(
       weights=weights,
-      biases=array_ops.split(bias, 8),
+      biases=array_ops.split(full_bias, 8),
       shape=constant_op.constant([-1]),
       transpose_weights=True)
 
diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index b004284140..2cb3eff8fd 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+import shutil
 import time
 
 from absl.testing import parameterized
@@ -26,6 +28,7 @@ import numpy as np
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import keras
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
@@ -175,6 +178,86 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
                         bias_regularizer=bias_regularizer)
     self.assertFalse(layer.could_use_cudnn)
 
+  def test_unified_lstm_feature_parity_with_canonical_lstm(self):
+    with context.eager_mode():
+      # Run this test under eager only due to b/120160788 for model.set_weights.
+      input_shape = 10
+      rnn_state_size = 8
+      timestep = 4
+      batch = 20
+
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=rnn_state_size)
+      y_train = keras.utils.to_categorical(y_train, rnn_state_size)
+
+      inputs = keras.layers.Input(
+          shape=[timestep, input_shape], dtype=dtypes.float32)
+      lstm_layer = keras.layers.LSTM(rnn_state_size,
+                                     recurrent_activation='sigmoid')
+      output = lstm_layer(inputs)
+      lstm_model = keras.models.Model(inputs, output)
+      weights = lstm_model.get_weights()
+      y_1 = lstm_model.predict(x_train)
+      lstm_model.compile('rmsprop', 'mse')
+      lstm_model.fit(x_train, y_train)
+      y_2 = lstm_model.predict(x_train)
+
+      with test_util.device(use_gpu=True):
+        cudnn_layer = keras.layers.UnifiedLSTM(rnn_state_size,
+                                               recurrent_activation='sigmoid')
+        cudnn_model = keras.models.Model(inputs, cudnn_layer(inputs))
+      cudnn_model.set_weights(weights)
+      y_3 = cudnn_model.predict(x_train)
+      cudnn_model.compile('rmsprop', 'mse')
+      cudnn_model.fit(x_train, y_train)
+      y_4 = cudnn_model.predict(x_train)
+
+      self.assertAllClose(y_1, y_3)
+      self.assertAllClose(y_2, y_4)
+
+  @parameterized.named_parameters(
+      # test_name, use_bias, bias_initializer, activation
+      ('normal', True, 'zeros'),
+      ('no_bias', False, 'zeros'),
+      ('random_bias', True, 'random_uniform'),
+  )
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_unified_lstm_model_save_load(self, use_bias, bias_initializer):
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir)
+    h5_path = os.path.join(temp_dir, 'test.h5')
+
+    batch = 10
+    timestep = 3
+    input_dim = 5
+    units = 2
+
+    x = np.random.random((batch, timestep, input_dim))
+
+    def build_model():
+      inputs = keras.layers.Input(
+          shape=[timestep, input_dim], dtype=dtypes.float32)
+      layer = keras.layers.UnifiedLSTM(
+          units,
+          use_bias=use_bias,
+          bias_initializer=bias_initializer)
+      output = layer(inputs)
+      return keras.models.Model(inputs, output), layer
+
+    model, layer = build_model()
+    y_ref = model.predict(x)
+    model.save_weights(h5_path)
+
+    cloned_model, new_layer = build_model()
+    cloned_model.load_weights(h5_path)
+    y = cloned_model.predict(x)
+
+    self.assertAllClose(y, y_ref)
+    self.assertAllClose(layer.get_weights(), new_layer.get_weights())
+
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_unified_lstm_output_on_multiple_kernel(self):
     input_shape = 10
@@ -240,6 +323,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     model.compile('rmsprop', loss='mse')
     model.fit(x_train, y_train, epochs=epoch)
     model.evaluate(x_train, y_train)
+    model.predict(x_train)
 
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_return_sequences_LSTM(self):
-- 
GitLab


From 8f2bc575d908bf1d1e743dd0e67c7115c5b757a9 Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Fri, 7 Dec 2018 09:57:11 -0800
Subject: [PATCH 202/873] Reuse GPUs for multiple ranks in NcclManager test.

Since NCCL 2.1.2, NCCL uses cooperative group launch by default which
results in deadlocks in the NcclManager test if number of ranks > number of
GPUs.  Setting NCCL_LAUNCH_MODE to PARALLEL, which was the default before NCCL
2.1.2, fixes this.

PiperOrigin-RevId: 224531851
---
 tensorflow/core/nccl/nccl_manager_test.cc | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tensorflow/core/nccl/nccl_manager_test.cc b/tensorflow/core/nccl/nccl_manager_test.cc
index f43103e120..f9ed4d0b9a 100644
--- a/tensorflow/core/nccl/nccl_manager_test.cc
+++ b/tensorflow/core/nccl/nccl_manager_test.cc
@@ -65,6 +65,7 @@ class NcclManagerTest : public ::testing::Test {
 
   static void SetUpTestCase() {
     setenv("NCCL_DEBUG", "WARN", 1 /* replace */);
+    setenv("NCCL_LAUNCH_MODE", "PARALLEL", 1 /* replace */);
     devices_ = new std::vector<std::unique_ptr<BaseGPUDevice>>(GetGPUDevices());
     LOG(ERROR) << "Running test with " << devices_->size() << " gpus";
   }
@@ -200,7 +201,7 @@ TYPED_TEST_CASE(NcclManagerTest, TypeList);
 
 // Test basic sum reduction.
 TYPED_TEST(NcclManagerTest, BasicSumReduction) {
-  const int num_ranks = this->NumGPUs();
+  const int num_ranks = 4;
 
   for (int op = 0; op < 4; ++op) {
     ncclRedOp_t reduction_op = static_cast<ncclRedOp_t>(op);
@@ -208,6 +209,7 @@ TYPED_TEST(NcclManagerTest, BasicSumReduction) {
         this->MakeTestCase(num_ranks, reduction_op, TensorShape({2, 3}), 0.0f));
     for (int rank = 0; rank < num_ranks; ++rank) {
       auto* device = this->GetDevice(rank);
+      VLOG(2) << "rank " << rank << " device " << device->name();
       auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
       auto* stream = device->tensorflow_gpu_device_info()->stream;
       NcclManager::instance()->AddToAllReduce(
@@ -224,14 +226,12 @@ TYPED_TEST(NcclManagerTest, BasicSumReduction) {
 // Same as the Basic test, but with multiple threads launching parts of many
 // reductions.
 //
-// Testing the multi-rank execution is currently reduced as it can hang when run
-// with num_ranks > devices->size(), for some GPUs (e.g. K20m).
-// To test the higher settings, increase num_ranks,
-// num_collectives_per_iteration and time_limit_micros.
+// To run test longer, increase num_ranks, num_collectives_per_iteration and
+// time_limit_micros.
 TYPED_TEST(NcclManagerTest, MultipleCallers) {
-  const int num_ranks = this->NumGPUs();
+  const int num_ranks = 4;
   const int num_collectives_per_iteration = 10;  // 1000;
-  const int num_threads = 3;
+  const int num_threads = num_ranks * 2;
   const int time_limit_micros = 100;  // 60 * 30 * 1000 * 1000;
 
   int64 start = Env::Default()->NowMicros();
-- 
GitLab


From 19b07482d6a80943c832d167df6f49c0a4008462 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <hinsu@google.com>
Date: Fri, 7 Dec 2018 09:57:49 -0800
Subject: [PATCH 203/873] Fix tensorflow nccl CODEOWNERS

PiperOrigin-RevId: 224531965
---
 CODEOWNERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CODEOWNERS b/CODEOWNERS
index bfcdc2a23f..cb3fa23124 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,7 +1,7 @@
 # Where component owners are known, add them here.
 
 /tenosrflow/core/debug @caisq
-/tensorflow/core/nccl/ @azaks @csigg
+/tensorflow/core/nccl/ @azaks2 @chsigg
 /tensorflow/core/platform/windows/ @mrry
 /tensorflow/core/platform/s3 @yongtang
 /tensorflow/go @asimshankar
-- 
GitLab


From cc079ee316c75993f86a19c6b8820d2bc36a33b6 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Fri, 7 Dec 2018 10:09:13 -0800
Subject: [PATCH 204/873] Lower the tol for metrics to 1e-5. And add a
 predict_tol.

PiperOrigin-RevId: 224534209
---
 tensorflow/contrib/distribute/python/keras_test.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index d66fc52cf2..dea208232a 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1302,16 +1302,18 @@ class TestDistributionStrategyCorrectness(test.TestCase,
   def test_correctness(self, distribution, use_numpy, use_validation_data):
 
     with self.cached_session():
-      weights_tolerance = 1e-5
-      metrics_tolerance = 1e-4
+      default_tolerance = 1e-5
+      weights_tolerance = default_tolerance
+      metrics_tolerance = default_tolerance
+      predict_tolerance = default_tolerance
 
       if isinstance(distribution, (mirrored_strategy.MirroredStrategy,
                                    mirrored_strategy.CoreMirroredStrategy)):
         # TODO(b/119257215): Weights are not exactly the same, so use lower
         # tolerance for now.
         weights_tolerance = 1e-4
+        predict_tolerance = 1e-4
 
-      metrics = ['mse']
       keras.backend.set_image_data_format('channels_last')
       np.random.seed(_RANDOM_SEED)
       random_seed.set_random_seed(_RANDOM_SEED)
@@ -1351,7 +1353,7 @@ class TestDistributionStrategyCorrectness(test.TestCase,
         model.compile(
             loss=keras.losses.mean_squared_error,
             optimizer=gradient_descent_keras.SGD(0.5),
-            metrics=metrics,
+            metrics=['mse'],
             distribute=with_distribution)
 
         training_inputs, eval_inputs, predict_inputs = (
@@ -1395,8 +1397,8 @@ class TestDistributionStrategyCorrectness(test.TestCase,
       self.assertAllClose(
           predict_with_ds,
           predict_without_ds,
-          atol=weights_tolerance,
-          rtol=weights_tolerance,
+          atol=predict_tolerance,
+          rtol=predict_tolerance,
           msg='Fail to assert predict results.')
 
       if not (isinstance(distribution, tpu_strategy.TPUStrategy) and
-- 
GitLab


From 89912db5f751778b0a24c7e77e4083bbe7c042a3 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 7 Dec 2018 10:14:25 -0800
Subject: [PATCH 205/873] Change API for TF 2.0 for tf.space_to_batch and
 tf.space_to_batch_nd.

PiperOrigin-RevId: 224535013
---
 tensorflow/python/ops/array_ops.py                  | 10 +++++++++-
 tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt  |  2 +-
 tensorflow/tools/api/golden/v2/tensorflow.pbtxt     |  4 ++++
 tensorflow/tools/compatibility/renames_v2.py        |  1 -
 tensorflow/tools/compatibility/reorders_v2.py       |  2 ++
 tensorflow/tools/compatibility/tf_upgrade_v2.py     |  8 ++++++++
 .../tools/compatibility/tf_upgrade_v2_test.py       | 13 +++++++++++++
 7 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 185452e1ab..216b08a8b0 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -2652,7 +2652,7 @@ def required_space_to_batch_paddings(input_shape,
     return result_paddings, result_crops
 
 
-@tf_export("nn.space_to_batch", v1=["nn.space_to_batch", "space_to_batch"])
+@tf_export(v1=["nn.space_to_batch", "space_to_batch"])
 @deprecation.deprecated_endpoints("space_to_batch")
 def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=redefined-builtin
   result = space_to_batch_nd(
@@ -2667,6 +2667,14 @@ def space_to_batch(input, paddings, block_size, name=None):  # pylint: disable=r
 space_to_batch.__doc__ = gen_array_ops.space_to_batch.__doc__
 
 
+@tf_export("space_to_batch", "nn.space_to_batch", v1=[])
+def space_to_batch_v2(input, block_shape, paddings, name=None):  # pylint: disable=redefined-builtin
+  return space_to_batch_nd(input, block_shape, paddings, name)
+
+
+space_to_batch_v2.__doc__ = gen_array_ops.space_to_batch_nd.__doc__
+
+
 @tf_export(v1=["nn.space_to_depth", "space_to_depth"])
 @deprecation.deprecated_endpoints("space_to_depth")
 def space_to_depth(input, block_size, name=None, data_format="NHWC"):  # pylint: disable=redefined-builtin
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
index 9e5e82483a..c75c75f2ef 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt
@@ -270,7 +270,7 @@ tf_module {
   }
   member_method {
     name: "space_to_batch"
-    argspec: "args=[\'input\', \'paddings\', \'block_size\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'input\', \'block_shape\', \'paddings\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "space_to_depth"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 542e5cbe9a..4432cae53b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -928,6 +928,10 @@ tf_module {
     name: "sort"
     argspec: "args=[\'values\', \'axis\', \'direction\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'ASCENDING\', \'None\'], "
   }
+  member_method {
+    name: "space_to_batch"
+    argspec: "args=[\'input\', \'block_shape\', \'paddings\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "space_to_batch_nd"
     argspec: "args=[\'input\', \'block_shape\', \'paddings\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index b27fd5acc7..e4ef8eb528 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -511,7 +511,6 @@ renames = {
     'tf.sets.set_intersection': 'tf.sets.intersection',
     'tf.sets.set_size': 'tf.sets.size',
     'tf.sets.set_union': 'tf.sets.union',
-    'tf.space_to_batch': 'tf.nn.space_to_batch',
     'tf.space_to_depth': 'tf.compat.v1.space_to_depth',
     'tf.sparse.matmul': 'tf.sparse.sparse_dense_matmul',
     'tf.sparse.merge': 'tf.compat.v1.sparse.merge',
diff --git a/tensorflow/tools/compatibility/reorders_v2.py b/tensorflow/tools/compatibility/reorders_v2.py
index 76f71d5804..e6596755c3 100644
--- a/tensorflow/tools/compatibility/reorders_v2.py
+++ b/tensorflow/tools/compatibility/reorders_v2.py
@@ -63,6 +63,7 @@ reorders = {
     'tf.nn.moments': ['x', 'axes', 'shift', 'name', 'keep_dims'],
     'tf.nn.pool': ['input', 'window_shape', 'pooling_type', 'padding', 'dilation_rate', 'strides', 'name', 'data_format'],
     'tf.nn.separable_conv2d': ['input', 'depthwise_filter', 'pointwise_filter', 'strides', 'padding', 'rate', 'name', 'data_format'],
+    'tf.nn.space_to_batch': ['input', 'paddings', 'block_size', 'name'],
     'tf.nn.space_to_depth': ['input', 'block_size', 'name', 'data_format'],
     'tf.nn.weighted_moments': ['x', 'axes', 'frequency_weights', 'name', 'keep_dims'],
     'tf.pad': ['tensor', 'paddings', 'mode', 'name', 'constant_values'],
@@ -85,6 +86,7 @@ reorders = {
     'tf.serialize_sparse': ['sp_input', 'name', 'out_type'],
     'tf.shape': ['input', 'name', 'out_type'],
     'tf.size': ['input', 'name', 'out_type'],
+    'tf.space_to_batch': ['input', 'paddings', 'block_size', 'name'],
     'tf.space_to_depth': ['input', 'block_size', 'name', 'data_format'],
     'tf.sparse.add': ['a', 'b', 'threshold', 'thresh'],
     'tf.sparse.concat': ['axis', 'sp_inputs', 'name', 'expand_nonconcat_dim', 'concat_dim'],
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index 0630ce28cf..b2477a541e 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py
@@ -51,6 +51,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.batch_to_space": {
             "block_size": "block_shape",
         },
+        "tf.space_to_batch": {
+            "block_size": "block_shape",
+        },
         "tf.constant": {
             "verify_shape": "verify_shape_is_now_always_true",
         },
@@ -294,6 +297,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
     self.manual_symbol_renames = {
         "tf.batch_to_space_nd":
             "tf.batch_to_space",
+        "tf.space_to_batch_nd":
+            "tf.space_to_batch",
+        "tf.nn.space_to_batch":
+            "tf.space_to_batch",
         "tf.extract_image_patches":
             "tf.image.extract_image_patches",
         "tf.gfile.Copy":
@@ -497,6 +504,7 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.argmax",
         "tf.argmin",
         "tf.batch_to_space",
+        "tf.nn.space_to_batch",
         "tf.boolean_mask",
         "tf.convert_to_tensor",
         "tf.nn.moments",
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index b362a8081f..4460ad5182 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -567,6 +567,19 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
+  def testSpacetoBatch(self):
+    text = "tf.space_to_batch_nd(input, shape, paddings, name)"
+    expected_text = "tf.space_to_batch(input, shape, paddings, name)"
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+    text = "tf.nn.space_to_batch(input, paddings, block_size, name)"
+    expected_text = (
+        "tf.space_to_batch(input=input, paddings=paddings, "
+        "block_size=block_size, name=name)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
   def testInTopK(self):
     text = "tf.math.in_top_k(a, b, c, n)"
     expected_text = (
-- 
GitLab


From a40ad0ce882e5cc482b16892162f2ce061aea055 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 7 Dec 2018 10:14:36 -0800
Subject: [PATCH 206/873] [tf.data] tiny test change

PiperOrigin-RevId: 224535049
---
 .../python/data/experimental/kernel_tests/optimization/BUILD | 1 -
 .../kernel_tests/optimization/noop_elimination_test.py       | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index f214944254..2fc243aa13 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -279,7 +279,6 @@ py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:math_ops",
         "//tensorflow/python/data/experimental/ops:optimization",
-        "//tensorflow/python/data/experimental/ops:optimization_options",
         "//tensorflow/python/data/kernel_tests:test_base",
         "//tensorflow/python/data/ops:dataset_ops",
     ],
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
index ce86bfa4e0..8058f53eea 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/noop_elimination_test.py
@@ -18,7 +18,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -42,10 +41,6 @@ class NoopEliminationTest(test_base.DatasetTestBase):
             ["FiniteRepeat", "FiniteSkip", "Prefetch", "MemoryCacheImpl"]))
     dataset = dataset.repeat(some_tensor).skip(5).take(-1).skip(0).repeat(
         1).prefetch(0).prefetch(1).cache()
-    options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
-    options.experimental_optimization.noop_elimination = True
-    dataset = dataset.with_options(options)
     self.assertDatasetProduces(dataset, expected_output=range(5))
 
 
-- 
GitLab


From 6d456192027c309a902fb63e15c57254e4d7ab42 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 7 Dec 2018 10:43:54 -0800
Subject: [PATCH 207/873] Fix TFLite iOS example Podfile

PiperOrigin-RevId: 224540375
---
 tensorflow/lite/examples/ios/camera/Podfile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/lite/examples/ios/camera/Podfile b/tensorflow/lite/examples/ios/camera/Podfile
index 96a0d23426..2ed5c8da1e 100644
--- a/tensorflow/lite/examples/ios/camera/Podfile
+++ b/tensorflow/lite/examples/ios/camera/Podfile
@@ -1,5 +1,7 @@
 platform :ios, '8.0'
 inhibit_all_warnings!
 
+project 'tflite_camera_example.xcodeproj'
+
 target 'tflite_camera_example'
        pod 'TensorFlowLite', '1.12.0'
-- 
GitLab


From c8e92c679ec4c098bb9404bf9f38a5f4f02e3c42 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 7 Dec 2018 10:52:12 -0800
Subject: [PATCH 208/873] Better propagation of variable shape inference.

PiperOrigin-RevId: 224541903
---
 .../resource_variable_ops_test.py             | 29 +++++++++++++++++++
 tensorflow/python/ops/array_ops.py            |  6 +++-
 tensorflow/python/ops/custom_gradient.py      |  4 +++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index b57d9d47aa..3992d6bdfb 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -33,6 +33,8 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import custom_gradient
+from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -585,6 +587,33 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     v.load(2.0)
     self.assertEqual(2.0, self.evaluate(v.value()))
 
+  def testShapePassedToGradient(self):
+    with ops.Graph().as_default():
+      @custom_gradient.custom_gradient
+      def differentiable_scatter_update(handle, indices, values):
+        with ops.control_dependencies([
+            resource_variable_ops.resource_scatter_update(
+                handle, indices, values)]):
+          new_handle = array_ops.identity(handle)
+
+        def grad(dresult):
+          self.assertIsNotNone(
+              tensor_util.constant_value(dresult.dense_shape))
+          return [dresult, None, None]
+
+        return new_handle, grad
+
+      var = variable_scope.get_variable(
+          "foo", shape=[20], initializer=init_ops.zeros_initializer,
+          dtype=dtypes.float64, use_resource=True)
+
+      indices = math_ops.range(10)
+      updates = math_ops.range(9, -1, -1, dtype=dtypes.float64)
+      new_handle = differentiable_scatter_update(var.handle, indices, updates)
+      gathered = resource_variable_ops.resource_gather(
+          new_handle, indices, dtype=var.dtype)
+      gradients_impl.gradients([gathered], [updates])
+
   def testToFromProtoCachedValue(self):
     with ops.Graph().as_default():
       v_def = resource_variable_ops.ResourceVariable(
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 216b08a8b0..9dabbffb13 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -78,7 +78,11 @@ def identity(input, name=None):  # pylint: disable=redefined-builtin
       return input._copy()  # pylint: disable=protected-access
     return input
   else:
-    return gen_array_ops.identity(input, name=name)
+    ret = gen_array_ops.identity(input, name=name)
+    # Propagate handle data for happier shape inference for resource variables.
+    if hasattr(input, "_handle_data"):
+      ret._handle_data = input._handle_data  # pylint: disable=protected-access
+    return ret
 
 
 # pylint: disable=redefined-builtin,protected-access
diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py
index 1426e8851c..d96601ac21 100644
--- a/tensorflow/python/ops/custom_gradient.py
+++ b/tensorflow/python/ops/custom_gradient.py
@@ -236,6 +236,10 @@ def _graph_mode_decorator(f, *args, **kwargs):
   original_tensors = all_tensors
   with ops.get_default_graph().gradient_override_map({"IdentityN": name}):
     all_tensors = array_ops.identity_n(all_tensors)
+  # Propagate handle data for happier shape inference for resource variables.
+  for i, t in enumerate(original_tensors):
+    if t.dtype == dtypes.resource and hasattr(t, "_handle_data"):
+      all_tensors[i]._handle_data = t._handle_data  # pylint: disable=protected-access
   tape_lib.record_operation(
       f.__name__, all_tensors, original_tensors, tape_grad_fn)
   for ot, t in zip(original_tensors, all_tensors):
-- 
GitLab


From 504afac32db928bfb196299030ac2af28ce06d9d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 10:58:45 -0800
Subject: [PATCH 209/873] Increase max buffer size.

PiperOrigin-RevId: 224543122
---
 .../lite/kernels/internal/optimized/optimized_ops.h       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index c7691e2763..c79b69a22e 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -3582,8 +3582,8 @@ inline void AveragePool(const PoolParams& params,
             std::min(params.filter_height, input_height - in_y_origin);
         const int filter_count =
             (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
-        // 1280 required by Inception v3
-        static constexpr int kAccBufferMaxSize = 2048;
+        // 2560 is required by MobileNetV2 with depth multiplier 2.
+        static constexpr int kAccBufferMaxSize = 4096;
         TFLITE_DCHECK_LE(depth, kAccBufferMaxSize);
         uint16 acc[kAccBufferMaxSize];
         memset(acc, 0, depth * sizeof(acc[0]));
@@ -3748,8 +3748,8 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
         const int filter_y_start = std::max(0, -in_y_origin);
         const int filter_y_end =
             std::min(params.filter_height, input_height - in_y_origin);
-        // 2048 required by Inception v3
-        static constexpr int kAccBufferMaxSize = 2048;
+        // 2560 is required by MobileNetV2 with depth multiplier 2.
+        static constexpr int kAccBufferMaxSize = 4096;
         TFLITE_DCHECK_LE(depth, kAccBufferMaxSize);
         uint8 acc[kAccBufferMaxSize];
         memset(acc, 0, depth * sizeof(acc[0]));
-- 
GitLab


From 629834552e79edf13dbdf2306420698b3d64def2 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Fri, 7 Dec 2018 11:00:57 -0800
Subject: [PATCH 210/873] Fix up control_flow_ops_tests to work with v2

- Add cond_v2/while_v2 dependencies
- Add run_deprecated_v1 and run_v1_only where needed
- Remove non-strict cond tests

PiperOrigin-RevId: 224543529
---
 tensorflow/python/BUILD                       |   2 +
 .../python/ops/control_flow_ops_test.py       | 367 ++++++++++--------
 2 files changed, 199 insertions(+), 170 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index bc6dc413e7..cc36f1fc0e 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3236,6 +3236,7 @@ cuda_py_test(
     srcs = ["ops/control_flow_ops_test.py"],
     additional_deps = [
         ":array_ops",
+        ":cond_v2",
         ":control_flow_ops",
         ":embedding_ops",
         ":framework_for_generated_wrappers",
@@ -3251,6 +3252,7 @@ cuda_py_test(
         ":util",
         ":variable_scope",
         ":variables",
+        ":while_v2",
         "//tensorflow/python/eager:def_function",
     ],
 )
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index b19ec4bd61..0c18b7208f 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -21,9 +21,9 @@ from __future__ import print_function
 import collections
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.core.framework import graph_pb2
 from tensorflow.core.framework import node_def_pb2
-from tensorflow.python.client import session
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -127,56 +127,56 @@ class GroupTestCase(test_util.TensorFlowTestCase):
       node { name: "root" op: "NoOp" input: "^a" input: "^b" }
     """, self._StripGraph(gd))
 
+  @test_util.run_deprecated_v1
   def testPassingNonTensors(self):
-    with ops.Graph().as_default():
-      with self.assertRaises(TypeError):
-        control_flow_ops.group(1, 2)
+    with self.assertRaises(TypeError):
+      control_flow_ops.group(1, 2)
 
 
 class ShapeTestCase(test_util.TensorFlowTestCase):
 
   def testShape(self):
-    with ops.Graph().as_default():
-      tensor = constant_op.constant([1.0, 2.0])
-      self.assertEquals([2], tensor.get_shape())
-      self.assertEquals([2],
-                        control_flow_ops.with_dependencies(
-                            [constant_op.constant(1.0)], tensor).get_shape())
+    tensor = constant_op.constant([1.0, 2.0])
+    self.assertEquals([2], tensor.get_shape())
+    self.assertEquals([2],
+                      control_flow_ops.with_dependencies(
+                          [constant_op.constant(1.0)], tensor).get_shape())
 
 
 class WithDependenciesTestCase(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testTupleDependencies(self):
-    with ops.Graph().as_default():
-      counter = variable_scope.get_variable(
-          "my_counter", shape=[], initializer=init_ops.zeros_initializer())
-      increment_counter = state_ops.assign_add(counter, 1)
-      const_with_dep = control_flow_ops.with_dependencies(
-          (increment_counter, constant_op.constant(42)),
-          constant_op.constant(7))
-      with self.cached_session():
-        variables.global_variables_initializer().run()
-        self.assertEquals(0, self.evaluate(counter))
-        self.assertEquals(7, self.evaluate(const_with_dep))
-        self.assertEquals(1, self.evaluate(counter))
-
+    counter = variable_scope.get_variable(
+        "my_counter", shape=[], initializer=init_ops.zeros_initializer())
+    increment_counter = state_ops.assign_add(counter, 1)
+    const_with_dep = control_flow_ops.with_dependencies(
+        (increment_counter, constant_op.constant(42)),
+        constant_op.constant(7))
+
+    self.evaluate(variables.global_variables_initializer())
+    self.assertEquals(0, self.evaluate(counter))
+    self.assertEquals(7, self.evaluate(const_with_dep))
+    self.assertEquals(1, self.evaluate(counter))
+
+  @test_util.run_deprecated_v1
   def testListDependencies(self):
-    with ops.Graph().as_default():
-      counter = variable_scope.get_variable(
-          "my_counter", shape=[], initializer=init_ops.zeros_initializer())
-      increment_counter = state_ops.assign_add(counter, 1)
-      const_with_dep = control_flow_ops.with_dependencies(
-          [increment_counter, constant_op.constant(42)],
-          constant_op.constant(7))
-      with self.cached_session():
-        variables.global_variables_initializer().run()
-        self.assertEquals(0, self.evaluate(counter))
-        self.assertEquals(7, self.evaluate(const_with_dep))
-        self.assertEquals(1, self.evaluate(counter))
+    counter = variable_scope.get_variable(
+        "my_counter", shape=[], initializer=init_ops.zeros_initializer())
+    increment_counter = state_ops.assign_add(counter, 1)
+    const_with_dep = control_flow_ops.with_dependencies(
+        [increment_counter, constant_op.constant(42)],
+        constant_op.constant(7))
+
+    self.evaluate(variables.global_variables_initializer())
+    self.assertEquals(0, self.evaluate(counter))
+    self.assertEquals(7, self.evaluate(const_with_dep))
+    self.assertEquals(1, self.evaluate(counter))
 
 
 class SwitchTestCase(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testIndexedSlicesWithDenseShape(self):
     with self.cached_session():
       data = ops.IndexedSlices(
@@ -190,68 +190,64 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
       self.assertAllEqual([1, 2, 3], switch_true.values.eval())
       self.assertAllEqual([0, 1], switch_true.indices.eval())
 
+  @test_util.run_deprecated_v1
   def testIndexedSlicesGradient(self):
-    with ops.Graph().as_default():
-      embedding_matrix = variable_scope.get_variable(
-          "embedding_matrix", [5, 5],
-          initializer=init_ops.random_normal_initializer())
-
-      def cond(it, _):
-        return it < 5
-
-      def body(it, cost):
-        embedding = embedding_ops.embedding_lookup(embedding_matrix + 0.0, [0])
-        cost += math_ops.reduce_sum(embedding)
-        return it + 1, cost
-
-      _, cost = control_flow_ops.while_loop(
-          cond, body, [constant_op.constant(0),
-                       constant_op.constant(0.0)])
-      optimizer = momentum.MomentumOptimizer(0.1, 0.9)
-      train_op = optimizer.minimize(cost)
-      with self.cached_session() as sess:
-        self.evaluate(variables.global_variables_initializer())
-        for _ in range(10):
-          self.evaluate([train_op])
+    embedding_matrix = variable_scope.get_variable(
+        "embedding_matrix", [5, 5],
+        initializer=init_ops.random_normal_initializer())
+
+    def cond(it, _):
+      return it < 5
+
+    def body(it, cost):
+      embedding = embedding_ops.embedding_lookup(embedding_matrix + 0.0, [0])
+      cost += math_ops.reduce_sum(embedding)
+      return it + 1, cost
+
+    _, cost = control_flow_ops.while_loop(
+        cond, body, [constant_op.constant(0),
+                     constant_op.constant(0.0)])
+    optimizer = momentum.MomentumOptimizer(0.1, 0.9)
+    train_op = optimizer.minimize(cost)
+    with self.cached_session():
+      self.evaluate(variables.global_variables_initializer())
+      for _ in range(10):
+        self.evaluate([train_op])
 
   def testResourceReadInLoop(self):
-    with ops.Graph().as_default():
-      embedding_matrix = variable_scope.get_variable(
-          "embedding_matrix",
-          initializer=[[2.0], [3.0]],
-          use_resource=True)
+    embedding_matrix = variable_scope.get_variable(
+        "embedding_matrix", initializer=[[2.0], [3.0]], use_resource=True)
 
-      def cond(it, _):
-        return it < 5
+    def cond(it, _):
+      return it < 5
 
-      def body(it, cost):
-        embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
-        cost += math_ops.reduce_sum(embedding)
-        return it + 1, cost
+    def body(it, cost):
+      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
+      cost += math_ops.reduce_sum(embedding)
+      return it + 1, cost
 
-      _, cost = control_flow_ops.while_loop(
-          cond, body, [constant_op.constant(0),
-                       constant_op.constant(0.0)])
-      with self.cached_session() as sess:
-        self.evaluate(variables.global_variables_initializer())
-        self.assertAllEqual(10.0, self.evaluate(cost))
+    _, cost = control_flow_ops.while_loop(
+        cond, body, [constant_op.constant(0),
+                     constant_op.constant(0.0)])
+    with self.cached_session():
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllEqual(10.0, self.evaluate(cost))
 
   def doTestIndexedSlicesGradientInCondInWhileLoop(self, use_resource=False):
-    with ops.Graph().as_default():
-      embedding_matrix = variable_scope.get_variable(
-          "embedding_matrix", [5, 5],
-          initializer=init_ops.random_normal_initializer(),
-          use_resource=use_resource)
-
-      def cond(it, _):
-        return it < 5
-
-      def body(it, cost):
-        embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
-        cost = control_flow_ops.cond(
-            math_ops.equal(it, 3), lambda: math_ops.square(cost),
-            lambda: cost + math_ops.reduce_sum(embedding))
-        return it + 1, cost
+    embedding_matrix = variable_scope.get_variable(
+        "embedding_matrix", [5, 5],
+        initializer=init_ops.random_normal_initializer(),
+        use_resource=use_resource)
+
+    def cond(it, _):
+      return it < 5
+
+    def body(it, cost):
+      embedding = embedding_ops.embedding_lookup(embedding_matrix, [0])
+      cost = control_flow_ops.cond(
+          math_ops.equal(it, 3), lambda: math_ops.square(cost),
+          (lambda: cost + math_ops.reduce_sum(embedding)))
+      return it + 1, cost
 
       _, cost = control_flow_ops.while_loop(
           cond, body, [constant_op.constant(0),
@@ -269,7 +265,7 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
       static_grads = math_ops.segment_sum(static_grads.values,
                                           static_grads.indices)
 
-      with self.cached_session() as sess:
+      with self.cached_session():
         self.evaluate(variables.global_variables_initializer())
         self.assertAllEqual(*self.evaluate([static_grads, dynamic_grads]))
 
@@ -279,6 +275,7 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
   def testIndexedSlicesGradientInCondInWhileLoopResource(self):
     self.doTestIndexedSlicesGradientInCondInWhileLoop(use_resource=True)
 
+  @test_util.run_v1_only("b/120545219")
   def testIndexedSlicesWithShapeGradientInWhileLoop(self):
     for dtype in [dtypes.float32, dtypes.float64]:
       with self.cached_session() as sess:
@@ -308,6 +305,7 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
         self.assertEquals(o, 20)
         self.assertAllEqual(grad, [1] * num_steps)
 
+  @test_util.run_v1_only("b/120545219")
   def testIndexedSlicesWithDynamicShapeGradientInWhileLoop(self):
     for dtype in [dtypes.float32, dtypes.float64]:
       with self.cached_session() as sess:
@@ -335,105 +333,94 @@ class SwitchTestCase(test_util.TensorFlowTestCase):
         self.assertEquals(o, 6)
         self.assertAllEqual(grad, [1] * 3)
 
+  @test_util.run_deprecated_v1
   def testGradientThroughSingleBranchOutsideOfContext(self):
-    with self.cached_session():
-      x = constant_op.constant(2.)
-      s = constant_op.constant(True)
-      x_false, x_true = control_flow_ops.switch(x, s)
-      grad_x_true = gradients_impl.gradients(x_true, x)[0]
-      grad_x_false = gradients_impl.gradients(x_false, x)[0]
-      self.assertEquals(grad_x_true.eval(), 1.)
-      self.assertEquals(grad_x_false.eval(), 0.)
+    x = constant_op.constant(2.)
+    s = constant_op.constant(True)
+    x_false, x_true = control_flow_ops.switch(x, s)
+    grad_x_true = gradients_impl.gradients(x_true, x)[0]
+    grad_x_false = gradients_impl.gradients(x_false, x)[0]
+    self.assertEquals(self.evaluate(grad_x_true), 1.)
+    self.assertEquals(self.evaluate(grad_x_false), 0.)
 
 
 class CondTest(test_util.TensorFlowTestCase):
 
   def testCondTrue(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(2)
-        y = constant_op.constant(5)
-        z = control_flow_ops.cond(
-            math_ops.less(x, y), lambda: math_ops.multiply(x, 17),
-            lambda: math_ops.add(y, 23))
-        self.assertEquals(z.eval(), 34)
+    x = constant_op.constant(2)
+    y = constant_op.constant(5)
+    z = control_flow_ops.cond(
+        math_ops.less(
+            x,
+            y), lambda: math_ops.multiply(x, 17), lambda: math_ops.add(y, 23))
+    self.assertEquals(self.evaluate(z), 34)
 
   def testCondFalse(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(2)
-        y = constant_op.constant(1)
-        z = control_flow_ops.cond(
-            math_ops.less(x, y), lambda: math_ops.multiply(x, 17),
-            lambda: math_ops.add(y, 23))
-        self.assertEquals(z.eval(), 24)
+    x = constant_op.constant(2)
+    y = constant_op.constant(1)
+    z = control_flow_ops.cond(
+        math_ops.less(
+            x,
+            y), lambda: math_ops.multiply(x, 17), lambda: math_ops.add(y, 23))
+    self.assertEquals(self.evaluate(z), 24)
 
   def testCondTrueLegacy(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(2)
-        y = constant_op.constant(5)
-        z = control_flow_ops.cond(
-            math_ops.less(x, y), fn1=lambda: math_ops.multiply(x, 17),
-            fn2=lambda: math_ops.add(y, 23))
-        self.assertEquals(z.eval(), 34)
+    x = constant_op.constant(2)
+    y = constant_op.constant(5)
+    z = control_flow_ops.cond(
+        math_ops.less(x, y),
+        fn1=lambda: math_ops.multiply(x, 17),
+        fn2=lambda: math_ops.add(y, 23))
+    self.assertEquals(self.evaluate(z), 34)
 
   def testCondFalseLegacy(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(2)
-        y = constant_op.constant(1)
-        z = control_flow_ops.cond(
-            math_ops.less(x, y), fn1=lambda: math_ops.multiply(x, 17),
-            fn2=lambda: math_ops.add(y, 23))
-        self.assertEquals(z.eval(), 24)
-
+    x = constant_op.constant(2)
+    y = constant_op.constant(1)
+    z = control_flow_ops.cond(
+        math_ops.less(x, y),
+        fn1=lambda: math_ops.multiply(x, 17),
+        fn2=lambda: math_ops.add(y, 23))
+    self.assertEquals(self.evaluate(z), 24)
+
+  @test_util.run_deprecated_v1
   def testCondModifyBoolPred(self):
     # This test in particular used to fail only when running in GPU, hence
     # use_gpu=True.
-    with ops.Graph().as_default():
-      with session.Session() as sess:
-        bool_var = variable_scope.get_variable("bool_var", dtype=dtypes.bool,
-                                               initializer=True)
-        cond_on_bool_var = control_flow_ops.cond(
-            pred=bool_var,
-            true_fn=lambda: state_ops.assign(bool_var, False),
-            false_fn=lambda: True)
-        self.evaluate(bool_var.initializer)
-        self.assertEquals(self.evaluate(cond_on_bool_var), False)
-        self.assertEquals(self.evaluate(cond_on_bool_var), True)
+    with test_util.use_gpu():
+      bool_var = variable_scope.get_variable(
+          "bool_var", dtype=dtypes.bool, initializer=True)
+      cond_on_bool_var = control_flow_ops.cond(
+          pred=bool_var,
+          true_fn=lambda: state_ops.assign(bool_var, False),
+          false_fn=lambda: True)
+      self.evaluate(bool_var.initializer)
+      self.assertEquals(self.evaluate(cond_on_bool_var), False)
+      self.assertEquals(self.evaluate(cond_on_bool_var), True)
 
   def testCondMissingArg1(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(1)
-        with self.assertRaises(TypeError):
-          control_flow_ops.cond(True, false_fn=lambda: x)
+    x = constant_op.constant(1)
+    with self.assertRaises(TypeError):
+      control_flow_ops.cond(True, false_fn=lambda: x)
 
   def testCondMissingArg2(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(1)
-        with self.assertRaises(TypeError):
-          control_flow_ops.cond(True, lambda: x)
+    x = constant_op.constant(1)
+    with self.assertRaises(TypeError):
+      control_flow_ops.cond(True, lambda: x)
 
   def testCondDuplicateArg1(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(1)
-        with self.assertRaises(TypeError):
-          control_flow_ops.cond(True, lambda: x, lambda: x, fn1=lambda: x)
+    x = constant_op.constant(1)
+    with self.assertRaises(TypeError):
+      control_flow_ops.cond(True, lambda: x, lambda: x, fn1=lambda: x)
 
   def testCondDuplicateArg2(self):
-    with ops.Graph().as_default():
-      with session.Session():
-        x = constant_op.constant(1)
-        with self.assertRaises(TypeError):
-          control_flow_ops.cond(True, lambda: x, lambda: x, fn2=lambda: x)
+    x = constant_op.constant(1)
+    with self.assertRaises(TypeError):
+      control_flow_ops.cond(True, lambda: x, lambda: x, fn2=lambda: x)
 
 
 class ContextTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testCondContext(self):
     with self.cached_session() as sess:
       x = constant_op.constant(2)
@@ -463,12 +450,15 @@ class ContextTest(test_util.TensorFlowTestCase):
               control_flow_ops.WhileContext.from_proto(
                   control_flow_context.to_proto()).to_proto())
 
+  @test_util.run_deprecated_v1
   def testWhileContext(self):
     self._testWhileContextHelper()
 
+  @test_util.run_deprecated_v1
   def testWhileContextWithMaximumIterations(self):
     self._testWhileContextHelper(maximum_iterations=10)
 
+  @test_util.run_deprecated_v1
   def testControlContextImportScope(self):
     class NoABCControlFlowContext(control_flow_ops.ControlFlowContext):
       """A noop wrapper around `ControlFlowContext`.
@@ -591,6 +581,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
       if check_cond:
         self.assertAllEqualNested(result_case, expected_value_false)
 
+  @test_util.run_deprecated_v1
   def test_int(self):
     shape = tensor_shape.TensorShape([])
     fn_true = lambda: 1
@@ -600,6 +591,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testShape(fn_true, fn_false, shape, strict=True)
     self._testReturnValues(fn_true, fn_false, 1, 2, strict=True)
 
+  @test_util.run_deprecated_v1
   def test_float(self):
     shape = tensor_shape.TensorShape([])
     fn_true = lambda: 1.0
@@ -607,12 +599,14 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testShape(fn_true, fn_false, shape)
     self._testReturnValues(fn_true, fn_false, 1.0, 2.0)
 
+  @test_util.run_deprecated_v1
   def test_noop(self):
     shape = tensor_shape.TensorShape(None)
     self._testShape(control_flow_ops.no_op, control_flow_ops.no_op, shape)
     self._testReturnValues(control_flow_ops.no_op, control_flow_ops.no_op,
                            True, False, check_cond=False)
 
+  @test_util.run_deprecated_v1
   def test_string(self):
     shape = tensor_shape.TensorShape([])
     fn_true = lambda: "abc"
@@ -620,6 +614,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testShape(fn_true, fn_false, shape)
     self._testReturnValues(fn_true, fn_false, b"abc", b"xyz")
 
+  @test_util.run_deprecated_v1
   def test_variable(self):
     shape = tensor_shape.TensorShape([])
     fn_true = lambda: variables.Variable(3.0)
@@ -627,6 +622,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testShape(fn_true, fn_false, shape)
     self._testReturnValues(fn_true, fn_false, 3.0, 4.0)
 
+  @test_util.run_v1_only("b/120553181")
   def test_none(self):
     fn_none = lambda: None
     fn_tensor = lambda: constant_op.constant(1)
@@ -637,6 +633,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     with self.assertRaises(ValueError):
       control_flow_ops.cond(constant_op.constant(True), fn_tensor, fn_none)
 
+  @test_util.run_deprecated_v1
   def test_tensors(self):
 
     def _build_true_branch(dtype):
@@ -665,6 +662,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
                              (np.zeros([2, 2]), np.ones([3, 3])),
                              (np.ones([2, 2]), np.zeros([3, 3])))
 
+  @test_util.run_deprecated_v1
   def test_tensors_unknown_shape(self):
 
     def _build_true_branch(dtype):
@@ -693,6 +691,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
                              feed_dict={true_tensor: np.zeros([2, 2]),
                                         false_tensor: np.ones([2, 2])})
 
+  @test_util.run_deprecated_v1
   def test_sparse_tensors(self):
     shape = tensor_shape.TensorShape([None, None])
 
@@ -708,11 +707,14 @@ class DataTypesTest(test_util.TensorFlowTestCase):
                                              values=[1, 2], dense_shape=[3, 4])
     value2 = sparse_tensor.SparseTensorValue(indices=[[0, 0], [2, 1]],
                                              values=[3, 4], dense_shape=[3, 4])
-    self._testShape(true_fn, false_fn, shape)
-    self._testReturnValues(true_fn, false_fn, value1, value2)
+    # Non-strict cond is only available in v1
+    if not tf2.enabled():
+      self._testShape(true_fn, false_fn, shape)
+      self._testReturnValues(true_fn, false_fn, value1, value2)
     self._testShape(true_fn, false_fn, [shape], strict=True)
     self._testReturnValues(true_fn, false_fn, [value1], [value2], strict=True)
 
+  @test_util.run_deprecated_v1
   def test_tensors_with_partially_specified_shapes(self):
 
     def _build_branch(dtype, shape):
@@ -742,6 +744,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
                                         true_tensors[2]: np.ones([3, 3]),
                                         false_tensors[2]: np.ones([3, 3])})
 
+  @test_util.run_deprecated_v1
   def test_tensor_arrays(self):
     element_shape = tensor_shape.TensorShape([2])
     ta1 = _create_tensor_array(4, element_shape)
@@ -751,6 +754,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     fn_false = lambda: ta2
     self._testShape(fn_true, fn_false, shape)
 
+  @test_util.run_deprecated_v1
   def test_tensor_array_reads(self):
     shape = tensor_shape.TensorShape([2])
     ta = _create_tensor_array(4, shape)
@@ -758,6 +762,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     fn_false = lambda: ta.read(1)
     self._testShape(fn_true, fn_false, shape)
 
+  @test_util.run_deprecated_v1
   def test_list(self):
     shape = [tensor_shape.TensorShape([]), tensor_shape.TensorShape([]),
              tensor_shape.TensorShape([])]
@@ -766,6 +771,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testShape(fn_true, fn_false, shape)
     self._testReturnValues(fn_true, fn_false, [1, 2, 3.0], [3, 4, 5.0])
 
+  @test_util.run_v1_only("Non-strict cond is only available in v1")
   def test_non_strict(self):
     shape = tensor_shape.TensorShape([])
     fn_tensor = lambda: constant_op.constant(1)
@@ -778,6 +784,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testReturnValues(fn_tensor, fn_tuple, 1, 3)
     self._testReturnValues(fn_list, fn_tuple, 2, 3)
 
+  @test_util.run_v1_only("b/120553181")
   def test_singleton_strict(self):
     fn_tensor = lambda: constant_op.constant(1)
     fn_list = lambda: [constant_op.constant(2)]
@@ -799,36 +806,46 @@ class DataTypesTest(test_util.TensorFlowTestCase):
       control_flow_ops.case([(constant_op.constant(True), fn_list)], fn_tuple,
                             strict=True)
 
+  @test_util.run_deprecated_v1
   def test_singleton_list(self):
     shape = tensor_shape.TensorShape([])
     fn_true = lambda: [constant_op.constant(1)]
     fn_false = lambda: [constant_op.constant(3)]
-    self._testShape(fn_true, fn_false, shape)
-    self._testReturnValues(fn_true, fn_false, 1, 3)
+    # Non-strict cond is only available in v1
+    if not tf2.enabled():
+      self._testShape(fn_true, fn_false, shape)
+      self._testReturnValues(fn_true, fn_false, 1, 3)
     self._testShape(fn_true, fn_false, [shape], strict=True)
     self._testReturnValues(fn_true, fn_false, [1], [3], strict=True)
 
+  @test_util.run_deprecated_v1
   def test_singleton_tuple(self):
     shape = tensor_shape.TensorShape([])
     fn_true = lambda: (constant_op.constant(1),)
     fn_false = lambda: (constant_op.constant(3),)
-    self._testShape(fn_true, fn_false, shape)
-    self._testReturnValues(fn_true, fn_false, 1, 3)
+    # Non-strict cond is only available in v1
+    if not tf2.enabled():
+      self._testShape(fn_true, fn_false, shape)
+      self._testReturnValues(fn_true, fn_false, 1, 3)
     self._testShape(fn_true, fn_false, (shape,), strict=True)
     self._testReturnValues(fn_true, fn_false, (1,), (3,),
                            strict=True)
 
+  @test_util.run_deprecated_v1
   def test_singleton_namedtuple(self):
     shape = tensor_shape.TensorShape([])
     fn_true = lambda: SingletonTestTuple(constant_op.constant(1))
     fn_false = lambda: SingletonTestTuple(constant_op.constant(3))
-    self._testShape(fn_true, fn_false, shape)
-    self._testReturnValues(fn_true, fn_false, 1, 3)
+    # Non-strict cond is only available in v1
+    if not tf2.enabled():
+      self._testShape(fn_true, fn_false, shape)
+      self._testReturnValues(fn_true, fn_false, 1, 3)
     self._testShape(fn_true, fn_false, SingletonTestTuple(shape),
                     strict=True)
     self._testReturnValues(fn_true, fn_false, SingletonTestTuple(1),
                            SingletonTestTuple(3), strict=True)
 
+  @test_util.run_deprecated_v1
   def test_tuple(self):
     shape = (tensor_shape.TensorShape([]), tensor_shape.TensorShape([]))
     fn_true = lambda: (constant_op.constant(1), 2)
@@ -836,6 +853,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testShape(fn_true, fn_false, shape)
     self._testReturnValues(fn_true, fn_false, (1, 2), (3, 4))
 
+  @test_util.run_deprecated_v1
   def test_namedtuple(self):
     shape = TestTuple(tensor_shape.TensorShape([]),
                       tensor_shape.TensorShape([]))
@@ -844,6 +862,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
     self._testShape(fn_true, fn_false, shape)
     self._testReturnValues(fn_true, fn_false, TestTuple(1, 2), TestTuple(3, 4))
 
+  @test_util.run_deprecated_v1
   def test_nested(self):
     shape = [tensor_shape.TensorShape([]),
              TestTuple(tensor_shape.TensorShape([]),
@@ -869,6 +888,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
         [11, TestTuple(12, [13, 14]),
          np.ones([5, 5]), 16])
 
+  @test_util.run_deprecated_v1
   def test_cond_inside_while_loop(self):
 
     def body(i, matrix):
@@ -890,6 +910,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
 class CaseTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testCase_withDefault(self):
     x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
     conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)),
@@ -901,6 +922,7 @@ class CaseTest(test_util.TensorFlowTestCase):
       self.assertEqual(sess.run(output, feed_dict={x: 2}), 4)
       self.assertEqual(sess.run(output, feed_dict={x: 3}), 6)
 
+  @test_util.run_deprecated_v1
   def testCase_multiple_matches_exclusive(self):
     x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
     conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)),
@@ -914,6 +936,7 @@ class CaseTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 2})
 
+  @test_util.run_deprecated_v1
   def testCase_multiple_matches_non_exclusive(self):
     x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
     conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)),
@@ -926,6 +949,7 @@ class CaseTest(test_util.TensorFlowTestCase):
       self.assertEqual(sess.run(output, feed_dict={x: 2}), 4)
       self.assertEqual(sess.run(output, feed_dict={x: 3}), 8)
 
+  @test_util.run_deprecated_v1
   def testCase_withoutDefault(self):
     x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
     conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2)),
@@ -939,6 +963,7 @@ class CaseTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(errors.InvalidArgumentError, "Input error:"):
         sess.run(output, feed_dict={x: 4})
 
+  @test_util.run_deprecated_v1
   def testCase_withoutDefault_oneCondition(self):
     x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
     conditions = [(math_ops.equal(x, 1), lambda: constant_op.constant(2))]
@@ -980,6 +1005,7 @@ class WhileLoopTestCase(test_util.TensorFlowTestCase):
     # Expect a tuple since that is what the body returns.
     self.assertEqual(self.evaluate(r), (10,))
 
+  @test_util.run_deprecated_v1
   def testWhileLoopSameReturnShape_False(self):
     i = constant_op.constant(0)
     c = lambda i, _: math_ops.less(i, 10)
@@ -1005,6 +1031,7 @@ class WhileLoopTestCase(test_util.TensorFlowTestCase):
 
 class AssertTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testAssert(self):
     i = constant_op.constant(0)
     c = control_flow_ops.Assert(i < 10, [i, [10], [i + 1]])
-- 
GitLab


From 49aa21b5b6468309e0acef82a6a15314ba8f5a8f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 11:24:44 -0800
Subject: [PATCH 211/873] Automated rollback of commit
 51561e0354a1556e848c920cd47a718360dc720d

PiperOrigin-RevId: 224548216
---
 .../python/estimator_training_test.py         |  7 ++--
 .../python/multi_worker_test_base.py          | 10 ++----
 .../distribute/distribute_coordinator.py      | 35 ++++++-------------
 3 files changed, 16 insertions(+), 36 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/estimator_training_test.py b/tensorflow/contrib/distribute/python/estimator_training_test.py
index 3f55a8a1c8..b369a7fefe 100644
--- a/tensorflow/contrib/distribute/python/estimator_training_test.py
+++ b/tensorflow/contrib/distribute/python/estimator_training_test.py
@@ -375,13 +375,11 @@ class DistributeCoordinatorIntegrationTest(
     threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn,
                                                  cluster_spec, train_distribute,
                                                  eval_distribute)
-    threads_to_join = []
     for task_type, ts in threads.items():
       if task_type == PS:
         continue
       for t in ts:
-        threads_to_join.append(t)
-    self.join_independent_workers(threads_to_join)
+        t.join()
 
     estimator = self._get_estimator(train_distribute, eval_distribute)
     self._inspect_train_and_eval_events(estimator)
@@ -415,7 +413,8 @@ class DistributeCoordinatorIntegrationTest(
     threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn,
                                                  cluster_spec, train_distribute,
                                                  eval_distribute)
-    self.join_independent_workers([threads[WORKER][0], threads[EVALUATOR][0]])
+    threads[WORKER][0].join()
+    threads[EVALUATOR][0].join()
 
     estimator = self._get_estimator(train_distribute, eval_distribute)
     self._inspect_train_and_eval_events(estimator)
diff --git a/tensorflow/contrib/distribute/python/multi_worker_test_base.py b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
index b05aac431f..147c9b83f8 100644
--- a/tensorflow/contrib/distribute/python/multi_worker_test_base.py
+++ b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
@@ -40,7 +40,6 @@ from tensorflow.python.client import session
 from tensorflow.python.estimator import run_config
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import coordinator
 from tensorflow.python.training import server_lib
 
 ASSIGNED_PORTS = set()
@@ -361,7 +360,6 @@ class IndependentWorkerTestBase(test.TestCase):
     self._mock_os_env = MockOsEnv()
     self._mock_context = test.mock.patch.object(os, 'environ',
                                                 self._mock_os_env)
-    self._coord = coordinator.Coordinator()
     super(IndependentWorkerTestBase, self).setUp()
     self._mock_context.__enter__()
 
@@ -370,9 +368,8 @@ class IndependentWorkerTestBase(test.TestCase):
     super(IndependentWorkerTestBase, self).tearDown()
 
   def _task_thread(self, task_fn, tf_config, *args, **kwargs):
-    with self._coord.stop_on_exception():
-      os.environ['TF_CONFIG'] = json.dumps(tf_config)
-      task_fn(*args, **kwargs)
+    os.environ['TF_CONFIG'] = json.dumps(tf_config)
+    task_fn(*args, **kwargs)
 
   def _run_task_in_thread(self, task_fn, cluster_spec, task_type, task_id,
                           *args, **kwargs):
@@ -406,6 +403,3 @@ class IndependentWorkerTestBase(test.TestCase):
                                      *args, **kwargs)
         threads[task_type].append(t)
     return threads
-
-  def join_independent_workers(self, worker_threads):
-    self._coord.join(worker_threads)
diff --git a/tensorflow/python/distribute/distribute_coordinator.py b/tensorflow/python/distribute/distribute_coordinator.py
index 78c995a578..c0f9b8a1fd 100644
--- a/tensorflow/python/distribute/distribute_coordinator.py
+++ b/tensorflow/python/distribute/distribute_coordinator.py
@@ -29,7 +29,6 @@ from tensorflow.python.client import session
 from tensorflow.python.distribute import distribute_coordinator_context
 from tensorflow.python.distribute import multi_worker_util
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import coordinator
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import server_lib
 
@@ -329,8 +328,7 @@ def _run_single_worker(worker_fn,
                        task_id,
                        session_config,
                        rpc_layer="",
-                       worker_barrier=None,
-                       coord=None):
+                       worker_barrier=None):
   """Runs a single worker by calling `worker_fn` under context."""
   session_config = copy.deepcopy(session_config)
   strategy = copy.deepcopy(strategy)
@@ -352,11 +350,7 @@ def _run_single_worker(worker_fn,
       rpc_layer=rpc_layer,
       worker_barrier=worker_barrier)
   with context:
-    if coord:
-      with coord.stop_on_exception():
-        return worker_fn(strategy)
-    else:
-      return worker_fn(strategy)
+    return worker_fn(strategy)
 
 
 def _split_cluster_for_evaluator(cluster_spec, task_type):
@@ -429,7 +423,6 @@ def _run_std_server(cluster_spec=None,
 def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                               cluster_spec, session_config, rpc_layer):
   """Runs a standalone client for between-graph replication."""
-  coord = coordinator.Coordinator()
   eval_thread = None
   if _TaskType.EVALUATOR in cluster_spec.jobs:
     eval_thread = threading.Thread(
@@ -438,7 +431,6 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
               session_config),
         kwargs={
             "rpc_layer": rpc_layer,
-            "coord": coord,
         })
     eval_thread.start()
 
@@ -452,18 +444,18 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                 session_config),
           kwargs={
               "rpc_layer": rpc_layer,
-              "worker_barrier": worker_barrier,
-              "coord": coord,
+              "worker_barrier": worker_barrier
           })
       t.start()
       threads.append(t)
 
+  # TODO(yuefengz): wrap threads into thread coordinator?
+  for t in threads:
+    t.join()
+
+  # TODO(yuefengz): is it necessary to join eval thread?
   if eval_thread:
-    # TODO(yuefengz): is it necessary to join eval thread?
-    threads_to_join = threads + [eval_thread]
-  else:
-    threads_to_join = threads
-  coord.join(threads_to_join)
+    eval_thread.join()
 
   # TODO(yuefengz): we probably want to return results from all workers?
   return None
@@ -472,7 +464,6 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
 def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                          cluster_spec, session_config, rpc_layer):
   """Runs a standalone client for in-graph replication."""
-  coord = coordinator.Coordinator()
   eval_thread = None
   if _TaskType.EVALUATOR in cluster_spec.jobs:
     eval_thread = threading.Thread(
@@ -481,7 +472,6 @@ def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
               session_config),
         kwargs={
             "rpc_layer": rpc_layer,
-            "coord": coord,
         })
     eval_thread.start()
 
@@ -492,12 +482,9 @@ def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
       None,
       None,
       session_config,
-      rpc_layer=rpc_layer,
-      coord=coord)
-
+      rpc_layer=rpc_layer)
   if eval_thread:
-    coord.join([eval_thread])
-
+    eval_thread.join()
   return worker_result
 
 
-- 
GitLab


From 0b2afecc4b5d76c4d4976b3fb8155c2ac8ee6c5a Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Dec 2018 19:41:11 +0000
Subject: [PATCH 212/873] Add float16 suport for scatter_max/scatter_min on gpu

This fix tries to address the issue raised in 24219 where
there were no float16 supports for scatter_max/scatter_min on gpu.
This fix adds the float16 support for scatter_max/scatter_min on gpu.

This fix fixes 24219.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/scatter_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/scatter_op.cc b/tensorflow/core/kernels/scatter_op.cc
index 0fbde764d5..ee3c583347 100644
--- a/tensorflow/core/kernels/scatter_op.cc
+++ b/tensorflow/core/kernels/scatter_op.cc
@@ -288,7 +288,7 @@ TF_CALL_ALL_TYPES(REGISTER_SCATTER_UPDATE_CPU);
 #define REGISTER_SCATTER_UPDATE_GPU(type) REGISTER_SCATTER_UPDATE(type, GPU);
 
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_GPU);
-TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_GPU);
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_MINMAX_GPU);
 TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_UPDATE_GPU);
 
 #endif  // GOOGLE_CUDA
-- 
GitLab


From fda42f7afdd11155d2267669e2b41f94beb725de Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Dec 2018 19:45:37 +0000
Subject: [PATCH 213/873] Enable template specification for ScatterMax/Min on
 gpu

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/core/kernels/scatter_op_gpu.cu.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/kernels/scatter_op_gpu.cu.cc b/tensorflow/core/kernels/scatter_op_gpu.cu.cc
index 0df329310f..d4defb8503 100644
--- a/tensorflow/core/kernels/scatter_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/scatter_op_gpu.cu.cc
@@ -41,6 +41,7 @@ typedef Eigen::GpuDevice GPUDevice;
   DEFINE_GPU_SPECS_INDEX(T, int32); \
   DEFINE_GPU_SPECS_INDEX(T, int64);
 
+DEFINE_GPU_SPECS(Eigen::half);
 DEFINE_GPU_SPECS(float);
 DEFINE_GPU_SPECS(double);
 // TODO: The following fails to compile.
-- 
GitLab


From 22497517ac15d961697bbda4d20d78c3a6e33141 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Dec 2018 19:48:07 +0000
Subject: [PATCH 214/873] Add test case for float16 of scatter_max/min

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/python/kernel_tests/scatter_ops_test.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py
index 1c7006ac0b..44431791ef 100644
--- a/tensorflow/python/kernel_tests/scatter_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_ops_test.py
@@ -191,6 +191,10 @@ class ScatterTest(test.TestCase):
     if tf_scatter != state_ops.scatter_div:
       vtypes.append(np.int32)
 
+    if (tf_scatter == state_ops.scatter_min or
+        tf_scatter == state_ops.scatter_max):
+      vtypes.append(np.float16)
+
     for vtype in vtypes:
       for itype in (np.int32, np.int64):
         self._VariableRankTest(tf_scatter, vtype, itype, repeat_indices,
-- 
GitLab


From 48a224c33e6cc258226e029a6eacb71aca0aa7b9 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 7 Dec 2018 12:27:20 -0800
Subject: [PATCH 215/873] Update documentation on string support for TensorFlow
 Lite

Strings are not compatible with ByteBuffer inputs/outputs. Make that
clear in the documentation, also highlighting how they differ from
primitive types.

PiperOrigin-RevId: 224559052
---
 tensorflow/lite/g3doc/apis.md                 |  7 ++++
 .../java/org/tensorflow/lite/Interpreter.java | 37 +++++++++++++++----
 2 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/tensorflow/lite/g3doc/apis.md b/tensorflow/lite/g3doc/apis.md
index b85570f41c..b15159ce41 100644
--- a/tensorflow/lite/g3doc/apis.md
+++ b/tensorflow/lite/g3doc/apis.md
@@ -304,6 +304,13 @@ one of the following primitive types:
 *   `long`
 *   `byte`
 
+`String` types are also supported, but they are encoded differently than the
+primitive types. In particular, the shape of a string Tensor dictates the number
+and arrangement of strings in the Tensor, with each element itself being a
+variable length string. In this sense, the (byte) size of the Tensor cannot be
+computed from the shape and type alone, and consequently strings cannot be
+provided as a single, flat `ByteBuffer` argument.
+
 If other data types, including boxed types like `Integer` and `Float`, are used,
 an `IllegalArgumentException` will be thrown.
 
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index a03d7b5676..2203d5fbdb 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -43,15 +43,34 @@ import org.checkerframework.checker.nullness.qual.NonNull;
  * <pre>{@code
  * Object[] inputs = {input0, input1, ...};
  * Map<Integer, Object> map_of_indices_to_outputs = new HashMap<>();
- * float[][][] ith_output = new float[3][2][4];
+ * ByteBuffer ith_output = ByteBuffer.allocateDirect(3 * 2 * 4 * 4);  // Float tensor, shape 3x2x4.
+ * ith_output.order(ByteOrder.nativeOrder());
  * map_of_indices_to_outputs.put(i, ith_output);
  * try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) {
  *   interpreter.runForMultipleInputsOutputs(inputs, map_of_indices_to_outputs);
  * }
  * }</pre>
  *
+ * <p>If a model takes or produces string tensors:
+ *
+ * <pre>{@code
+ * String[] input = {"foo", "bar"};  // Input tensor shape is [2].
+ * String[] output = new String[3][2];  // Output tensor shape is [3, 2].
+ * try (Interpreter interpreter = new Interpreter(file_of_a_tensorflowlite_model)) {
+ *   interpreter.runForMultipleInputsOutputs(input, output);
+ * }
+ * }</pre>
+ *
  * <p>Orders of inputs and outputs are determined when converting TensorFlow model to TensorFlowLite
- * model with Toco.
+ * model with Toco, as are the default shapes of the inputs.
+ *
+ * <p>When inputs are provided as (multi-dimensional) arrays, the corresponding input tensor(s) will
+ * be implicitly resized according to that array's shape. When inputs are provided as {@link
+ * ByteBuffer} types, no implicit resizing is done; the caller must ensure that the {@link
+ * ByteBuffer} byte size either matches that of the corresponding tensor, or that they first resize
+ * the tensor via {@link #resizeInput()}. Tensor shape and type information can be obtained via the
+ * {@link Tensor} class, available via {@link #getInputTensor(int)} and {@link
+ * #getOutputTensor(int)}.
  *
  * <p><b>WARNING:</b>Instances of a {@code Interpreter} is <b>not</b> thread-safe. A {@code
  * Interpreter} owns resources that <b>must</b> be explicitly freed by invoking {@link #close()}
@@ -192,12 +211,13 @@ public final class Interpreter implements AutoCloseable {
    * Runs model inference if the model takes only one input, and provides only one output.
    *
    * <p>Warning: The API runs much faster if {@link ByteBuffer} is used as input data type. Please
-   * consider using {@link ByteBuffer} to feed input data for better performance.
+   * consider using {@link ByteBuffer} to feed primitive input data for better performance.
    *
    * @param input an array or multidimensional array, or a {@link ByteBuffer} of primitive types
    *     including int, float, long, and byte. {@link ByteBuffer} is the preferred way to pass large
-   *     input data. When {@link ByteBuffer} is used, its content should remain unchanged until
-   *     model inference is done.
+   *     input data for primitive types, whereas string types require using the (multi-dimensional)
+   *     array input path. When {@link ByteBuffer} is used, its content should remain unchanged
+   *     until model inference is done.
    * @param output a multidimensional array of output data, or a {@link ByteBuffer} of primitive
    *     types including int, float, long, and byte.
    */
@@ -212,13 +232,14 @@ public final class Interpreter implements AutoCloseable {
    * Runs model inference if the model takes multiple inputs, or returns multiple outputs.
    *
    * <p>Warning: The API runs much faster if {@link ByteBuffer} is used as input data type. Please
-   * consider using {@link ByteBuffer} to feed input data for better performance.
+   * consider using {@link ByteBuffer} to feed primitive input data for better performance.
    *
    * @param inputs an array of input data. The inputs should be in the same order as inputs of the
    *     model. Each input can be an array or multidimensional array, or a {@link ByteBuffer} of
    *     primitive types including int, float, long, and byte. {@link ByteBuffer} is the preferred
-   *     way to pass large input data. When {@link ByteBuffer} is used, its content should remain
-   *     unchanged until model inference is done.
+   *     way to pass large input data, whereas string types require using the (multi-dimensional)
+   *     array input path. When {@link ByteBuffer} is used, its content should remain unchanged
+   *     until model inference is done.
    * @param outputs a map mapping output indices to multidimensional arrays of output data or {@link
    *     ByteBuffer}s of primitive types including int, float, long, and byte. It only needs to keep
    *     entries for the outputs to be used.
-- 
GitLab


From 9fffd26041e40bb3c89eabebfd35f92fe37b2c2a Mon Sep 17 00:00:00 2001
From: Igor Ganichev <iga@google.com>
Date: Fri, 7 Dec 2018 12:39:30 -0800
Subject: [PATCH 216/873] Support XLA_CPU/GPU devices in eager runtime

Compiling a defun can be done with:

  @function.defun
  def f(x):
    return x + 1

  with tf.device("XLA_GPU"):
    f(5)

PiperOrigin-RevId: 224560868
---
 tensorflow/compiler/jit/BUILD                      |  2 ++
 .../compiler/jit/xla_compile_on_demand_op.cc       | 14 +++++++++++++-
 tensorflow/compiler/jit/xla_cpu_device.cc          | 14 +++++++++++++-
 tensorflow/compiler/tests/BUILD                    |  7 -------
 tensorflow/core/common_runtime/eager/execute.cc    |  3 ++-
 .../core/common_runtime/eager/tensor_handle.cc     |  5 +----
 tensorflow/python/framework/func_graph.py          |  4 +++-
 7 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 7ebcd120f6..15dcbb2641 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -76,6 +76,7 @@ cc_library(
     srcs = ["xla_cpu_device.cc"],
     visibility = [":friends"],
     deps = [
+        ":create_xla_launch_op",  # buildcleaner: keep
         ":flags",
         ":jit_compilation_passes",
         ":xla_device",
@@ -95,6 +96,7 @@ cc_library(
     srcs = ["xla_gpu_device.cc"],
     visibility = [":friends"],
     deps = [
+        ":create_xla_launch_op",  # buildcleaner: keep
         ":jit_compilation_passes",
         ":xla_device",
         "//tensorflow/compiler/jit/kernels:xla_ops",
diff --git a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
index 1fe612d43d..c7e8d61d28 100644
--- a/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
+++ b/tensorflow/compiler/jit/xla_compile_on_demand_op.cc
@@ -142,11 +142,22 @@ Status XlaCompileOnDemandOp::Compile(
         TF_RETURN_IF_ERROR(ctx->allocate_temp(
             device_tensor.dtype(), device_tensor.shape(), &host_tensor, attrs));
         Notification n;
+        Status status;
         ctx->op_device_context()->CopyDeviceTensorToCPU(
             &device_tensor, "ConstantArgument",
             reinterpret_cast<Device*>(ctx->device()), &host_tensor,
-            [&](Status status) { n.Notify(); });
+            [&](Status s) {
+              status = s;
+              n.Notify();
+            });
         n.WaitForNotification();
+        if (!status.ok()) {
+          LOG(ERROR) << "Copying tensor of shape "
+                     << device_tensor.shape().DebugString() << " from "
+                     << ctx->device()->name() << "to CPU failed with "
+                     << status.ToString();
+          return status;
+        }
         constant_arguments[i] = host_tensor;
       }
     }
@@ -189,6 +200,7 @@ Status XlaCompileOnDemandOp::Compile(
   std::map<int, OptionalTensor> variable_args = GetVariables(ctx);
 
   std::vector<XlaCompiler::Argument> args;
+
   TF_RETURN_IF_ERROR(XlaComputationLaunchContext::BuildXlaCompilerArguments(
       constant_arguments, variable_args, ctx, &args));
 
diff --git a/tensorflow/compiler/jit/xla_cpu_device.cc b/tensorflow/compiler/jit/xla_cpu_device.cc
index 7df898ad12..e9770647e7 100644
--- a/tensorflow/compiler/jit/xla_cpu_device.cc
+++ b/tensorflow/compiler/jit/xla_cpu_device.cc
@@ -63,7 +63,19 @@ Status XlaCpuDeviceFactory::CreateDevices(
   options.device_ordinal = 0;
   options.compilation_device_name = DEVICE_CPU_XLA_JIT;
   options.use_multiple_streams = false;
-  devices->push_back(absl::make_unique<XlaDevice>(session_options, options));
+  auto device = absl::make_unique<XlaDevice>(session_options, options);
+
+  // Setting GpuDeviceInfo because eager runtime relies on the device
+  // context in tensorflow_gpu_device_info(). Also,
+  // tensorflow_gpu_device_info() == nullptr is used as an IsCPU test.
+  // We need XlaCpuDevice to be treated not as CPU because it allocates
+  // XlaTensors, not regular Tensors.
+  Status status = device->UseGpuDeviceInfo();
+  if (!status.ok()) {
+    errors::AppendToMessage(&status, "while setting up ", DEVICE_GPU_XLA_JIT);
+    return status;
+  }
+  devices->push_back(std::move(device));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index bc3d60b90e..093b61629c 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -408,13 +408,6 @@ tf_xla_py_test(
     name = "eager_test",
     size = "large",
     srcs = ["eager_test.py"],
-    disabled_backends = [
-        # TODO(b/78199195) Support XLA CPU devices in eager runtime
-        "cpu",
-        "cpu_ondemand",
-        # TODO(b/78468222) Enable GPU backend
-        "gpu",
-    ],
     deps = [
         ":xla_test",
         "//tensorflow/python:array_ops",
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index a6199f2aeb..783baa96c9 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -263,7 +263,8 @@ Status EagerLocalExecute(EagerOperation* op,
     // Note that it is not ideal, but currently ok, to set this
     // attribute after computing the kernel cache key above.
     if (op->is_function() && device != nullptr &&
-        device->device_type() == "TPU") {
+        (device->device_type() == "TPU" || device->device_type() == "XLA_GPU" ||
+         device->device_type() == "XLA_CPU")) {
       op->MutableAttrs()->Set(kXlaCompileAttr, true);
     }
 
diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.cc b/tensorflow/core/common_runtime/eager/tensor_handle.cc
index d8d6b7a63b..0acd160936 100644
--- a/tensorflow/core/common_runtime/eager/tensor_handle.cc
+++ b/tensorflow/core/common_runtime/eager/tensor_handle.cc
@@ -184,10 +184,7 @@ Status TensorHandle::CopyToDevice(EagerContext* ctx, tensorflow::Device* dstd,
   bool is_same_device = (srcd == dstd) || (srcd->name() == dstd->name());
   const bool dst_cpu = dstd->tensorflow_gpu_device_info() == nullptr;
   const bool src_cpu = srcd->tensorflow_gpu_device_info() == nullptr;
-  // both_on_cpu can be true and yet is_same_device is false, if one of src/dst
-  // has device type XLA_CPU, and the other CPU.
-  const bool both_on_cpu = src_cpu && dst_cpu;
-  if (is_same_device || both_on_cpu) {
+  if (is_same_device) {
     *output = new tensorflow::TensorHandle(*src, dstd, dstd, ctx);
     return tensorflow::Status::OK();
   }
diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py
index 0f6db500cc..bd4ed5553e 100644
--- a/tensorflow/python/framework/func_graph.py
+++ b/tensorflow/python/framework/func_graph.py
@@ -122,7 +122,9 @@ class FuncGraph(ops.Graph):
     # restored.
     if context.executing_eagerly():
       self.seed = context.global_seed()
-      self._xla_compile = (context.context().device_spec.device_type == "TPU")
+      device_type = context.context().device_spec.device_type
+      self._xla_compile = (device_type == "TPU" or device_type == "XLA_GPU"
+                           or device_type == "XLA_CPU")
       if self._distribution_strategy_stack or self._xla_compile:
         self._add_device_to_stack(context.context().device_name)
     else:
-- 
GitLab


From 4f543e588a99caeb8265372127b889a4f67edba4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 12:39:53 -0800
Subject: [PATCH 217/873] Fix NVPTX TargetMachine construction to use
 reloc/code model flags only if explicitly set. Fixes failures caused by
 r348585.

PiperOrigin-RevId: 224560930
---
 .../xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc
index abf9e7b6d6..bd53b90b42 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.cc
@@ -199,8 +199,7 @@ std::unique_ptr<llvm::TargetMachine> GetTargetMachine(
   }
   return absl::WrapUnique(target->createTargetMachine(
       triple.str(), llvm_ir::AsStringRef(cpu_name), "+ptx60", target_options,
-      Optional<Reloc::Model>(RelocModel), Optional<CodeModel::Model>(CMModel),
-      codegen_opt_level));
+      getRelocModel(), getCodeModel(), codegen_opt_level));
 }
 
 // Adds the standard LLVM optimization passes, based on the speed optimization
-- 
GitLab


From 66ca3cd10df0bf9bb6586bf0a09ac5c5ed0a25fb Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 7 Dec 2018 12:40:49 -0800
Subject: [PATCH 218/873] Add a functional saver, use it for object-based
 checkpointing

Pulls some utilities out of saver.py which are necessary to actually use it. The functional saver takes only SaveableObjects, so these are utilities for taking a list of whatever users pass in and converting them to those.

One other code move for object-based checkpointing to avoid circular imports.

Applications which need a SaverDef still use the old Saver. Serialization to SaverDef will be added to this saver in a followup.

Does not actually wrap the new Saver's methods in @tf.function yet, since there are memory issues which need to be fixed first.

PiperOrigin-RevId: 224561069
---
 .../training/elastic_average_optimizer.py     |   5 +-
 .../training/moving_average_optimizer.py      |   3 +-
 tensorflow/python/BUILD                       |  11 +-
 .../python/training/checkpoint_utils.py       |   6 +-
 .../python/training/checkpointable/BUILD      |   6 +-
 .../python/training/checkpointable/base.py    |  40 +--
 .../python/training/checkpointable/util.py    | 151 +++++---
 tensorflow/python/training/saver.py           | 331 ++---------------
 tensorflow/python/training/saving/BUILD       |  55 +++
 .../training/saving/functional_saver.py       | 101 ++++++
 .../training/saving/functional_saver_test.py  |  50 +++
 .../training/{ => saving}/saveable_object.py  |   0
 .../training/saving/saveable_object_util.py   | 340 ++++++++++++++++++
 .../python/training/warm_starting_util.py     |   4 +-
 14 files changed, 687 insertions(+), 416 deletions(-)
 create mode 100644 tensorflow/python/training/saving/BUILD
 create mode 100644 tensorflow/python/training/saving/functional_saver.py
 create mode 100644 tensorflow/python/training/saving/functional_saver_test.py
 rename tensorflow/python/training/{ => saving}/saveable_object.py (100%)
 create mode 100644 tensorflow/python/training/saving/saveable_object_util.py

diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
index 6c203e5519..fa1a7aaff0 100644
--- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.training import optimizer
 from tensorflow.python.training import saver
 from tensorflow.python.training import session_run_hook
+from tensorflow.python.training.saving import saveable_object_util
 
 LOCAL_VARIABLE_NAME = 'local_center_variable'
 GLOBAL_VARIABLE_NAME = 'global_center_variable'
@@ -424,7 +425,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer):
     if var_list is None:
       var_list = variables.trainable_variables()
     if not isinstance(var_list, dict):
-      var_list = saver.BaseSaverBuilder.OpListToDict(var_list)
+      var_list = saveable_object_util.op_list_to_dict(var_list)
 
     swapped_var_list = {}
     for key, var in var_list.items():
@@ -464,4 +465,4 @@ class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook):
 
   def after_create_session(self, session, coord):
     """Run initialization ops"""
-    session.run(self._variable_init_op)
\ No newline at end of file
+    session.run(self._variable_init_op)
diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
index b7fd2d2fb9..bf3e5c51f7 100644
--- a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
+++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py
@@ -26,6 +26,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.training import moving_averages
 from tensorflow.python.training import optimizer
 from tensorflow.python.training import saver
+from tensorflow.python.training.saving import saveable_object_util
 
 
 class MovingAverageOptimizer(optimizer.Optimizer):
@@ -165,7 +166,7 @@ class MovingAverageOptimizer(optimizer.Optimizer):
     if var_list is None:
       var_list = variables.global_variables()
     if not isinstance(var_list, dict):
-      var_list = saver.BaseSaverBuilder.OpListToDict(var_list)
+      var_list = saveable_object_util.op_list_to_dict(var_list)
 
     v_name_to_tensor = {}
     for k, tensor_or_list in six.iteritems(var_list):
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index cc36f1fc0e..0a3ee65bc4 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3515,13 +3515,13 @@ py_library(
         exclude = [
             "**/*test*",
             "training/checkpointable/**/*.py",
+            "training/saving/**/*.py",
             # The following targets have their own build rules (same name as the
             # file):
             "training/basic_session_run_hooks.py",
             "training/checkpoint_management.py",
             "training/distribute.py",
             "training/distribution_strategy_context.py",
-            "training/saveable_object.py",
             "training/saver.py",
             "training/session_run_hook.py",
             "training/training_util.py",
@@ -3596,12 +3596,6 @@ py_library(
     ],
 )
 
-py_library(
-    name = "saveable_object",
-    srcs = ["training/saveable_object.py"],
-    srcs_version = "PY2AND3",
-)
-
 py_library(
     name = "checkpoint_management",
     srcs = ["training/checkpoint_management.py"],
@@ -3655,7 +3649,6 @@ py_library(
         ":platform",
         ":pywrap_tensorflow",
         ":resource_variable_ops",
-        ":saveable_object",
         ":session",
         ":state_ops",
         ":string_ops",
@@ -3665,6 +3658,8 @@ py_library(
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/saving:saveable_object",
+        "//tensorflow/python/training/saving:saveable_object_util",
         "//third_party/py/numpy",
         "@six_archive//:six",
     ],
diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py
index 99b1f4c0d7..74b46179e7 100644
--- a/tensorflow/python/training/checkpoint_utils.py
+++ b/tensorflow/python/training/checkpoint_utils.py
@@ -30,7 +30,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saver
+from tensorflow.python.training.saving import saveable_object_util
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -311,10 +311,10 @@ def _set_checkpoint_initializer(variable,
     restore_op = io_ops.restore_v2(
         ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0]
 
-    names_to_saveables = saver.BaseSaverBuilder.OpListToDict([variable])
+    names_to_saveables = saveable_object_util.op_list_to_dict([variable])
     saveable_objects = []
     for name, op in names_to_saveables.items():
-      for s in saver.BaseSaverBuilder.SaveableObjectsForOp(op, name):
+      for s in saveable_object_util.saveable_objects_for_op(op, name):
         saveable_objects.append(s)
 
     assert len(saveable_objects) == 1  # Should be only one variable.
diff --git a/tensorflow/python/training/checkpointable/BUILD b/tensorflow/python/training/checkpointable/BUILD
index 4ab5593d4f..26a0ac35b7 100644
--- a/tensorflow/python/training/checkpointable/BUILD
+++ b/tensorflow/python/training/checkpointable/BUILD
@@ -25,9 +25,9 @@ py_library(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:io_ops_gen",
         "//tensorflow/python:platform",
-        "//tensorflow/python:saveable_object",
         "//tensorflow/python:util",
         "//tensorflow/python/eager:context",
+        "//tensorflow/python/training/saving:saveable_object",
     ],
 )
 
@@ -114,7 +114,6 @@ py_library(
         "//tensorflow/python:init_ops",
         "//tensorflow/python:io_ops_gen",
         "//tensorflow/python:pywrap_tensorflow",
-        "//tensorflow/python:saveable_object",
         "//tensorflow/python:saver",
         "//tensorflow/python:session",
         "//tensorflow/python:tensor_shape",
@@ -123,6 +122,9 @@ py_library(
         "//tensorflow/python:variables",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/training/saving:functional_saver",
+        "//tensorflow/python/training/saving:saveable_object",
+        "//tensorflow/python/training/saving:saveable_object_util",
     ],
 )
 
diff --git a/tensorflow/python/training/checkpointable/base.py b/tensorflow/python/training/checkpointable/base.py
index 095a90ddd4..3cd1c6f9c8 100644
--- a/tensorflow/python/training/checkpointable/base.py
+++ b/tensorflow/python/training/checkpointable/base.py
@@ -25,7 +25,6 @@ import weakref
 
 import six
 
-from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -34,7 +33,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_io_ops as io_ops
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import saveable_object
+from tensorflow.python.training.saving import saveable_object
 from tensorflow.python.util import nest
 from tensorflow.python.util import serialization
 from tensorflow.python.util import tf_decorator
@@ -374,41 +373,10 @@ class _CheckpointPosition(object):
       eagerly.
     """
     (restore_ops,
-     named_saveables,
+     tensor_saveables,
      python_saveables) = self._gather_ops_or_named_saveables()
-
-    # Eagerly run restorations for Python state.
-    reader = pywrap_tensorflow.NewCheckpointReader(
-        self._checkpoint.save_path_string)
-    for saveable in python_saveables:
-      spec_names = [spec.name for spec in saveable.specs]
-      saveable.python_restore(
-          [reader.get_tensor(name) for name in spec_names])
-
-    # If we have new SaveableObjects, extract and cache restore ops.
-    if named_saveables:
-      validated_saveables = (
-          self._checkpoint.builder._ValidateAndSliceInputs(named_saveables))  # pylint: disable=protected-access
-      validated_names = set(saveable.name for saveable in validated_saveables)
-      if set(named_saveables.keys()) != validated_names:
-        raise AssertionError(
-            ("Saveable keys changed when validating. Got back %s, was "
-             "expecting %s") % (named_saveables.keys(), validated_names))
-      all_tensors = self._checkpoint.builder.bulk_restore(
-          filename_tensor=self._checkpoint.save_path_tensor,
-          saveables=validated_saveables, preferred_shard=-1,
-          restore_sequentially=False)
-      saveable_index = 0
-      for saveable in validated_saveables:
-        num_specs = len(saveable.specs)
-        saveable_tensors = all_tensors[
-            saveable_index:saveable_index + num_specs]
-        saveable_index += num_specs
-        restore_op = saveable.restore(saveable_tensors, restored_shapes=None)
-        if not context.executing_eagerly():
-          assert saveable.name not in self._checkpoint.restore_ops_by_name
-          self._checkpoint.restore_ops_by_name[saveable.name] = restore_op
-          restore_ops.append(restore_op)
+    restore_ops.extend(self._checkpoint.restore_saveables(
+        tensor_saveables, python_saveables))
     return restore_ops
 
   @property
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index d183fbdcf9..a54f41a54f 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -40,11 +40,14 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import optimizer as optimizer_lib
-from tensorflow.python.training import saveable_object as saveable_object_lib
-from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import saver as v1_saver_lib
 from tensorflow.python.training.checkpointable import base
 from tensorflow.python.training.checkpointable import data_structures
 from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.saving import functional_saver
+from tensorflow.python.training.saving import saveable_object as saveable_object_lib
+from tensorflow.python.training.saving import saveable_object_util
+from tensorflow.python.util import compat
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
@@ -89,7 +92,6 @@ class _CheckpointRestoreCoordinator(object):
         referenced every restore (e.g. for Python state); otherwise they would
         create their own ops every restore.
     """
-    self.builder = saver_lib.BulkSaverBuilder()
     self.object_graph_proto = object_graph_proto
     self.restore_uid = ops.uid()
     # Maps from objects to lists of attributes which were in the checkpoint but
@@ -144,6 +146,57 @@ class _CheckpointRestoreCoordinator(object):
     if self.new_restore_ops_callback:
       self.new_restore_ops_callback(new_ops)  # pylint: disable=not-callable
 
+  def restore_saveables(self, tensor_saveables, python_saveables):
+    """Run or build restore operations for SaveableObjects.
+
+    Args:
+      tensor_saveables: `SaveableObject`s which correspond to Tensors.
+      python_saveables: `PythonStateSaveable`s which correspond to Python
+        values.
+
+    Returns:
+      When graph building, a list of restore operations, either cached or newly
+      created, to restore `tensor_saveables`.
+    """
+    restore_ops = []
+    # Eagerly run restorations for Python state.
+    reader = pywrap_tensorflow.NewCheckpointReader(
+        self.save_path_string)
+    for saveable in python_saveables:
+      spec_names = [spec.name for spec in saveable.specs]
+      saveable.python_restore(
+          [reader.get_tensor(name) for name in spec_names])
+
+    # If we have new SaveableObjects, extract and cache restore ops.
+    if tensor_saveables:
+      validated_saveables = saveable_object_util.validate_and_slice_inputs(
+          tensor_saveables)
+      validated_names = set(saveable.name for saveable in validated_saveables)
+      if set(tensor_saveables.keys()) != validated_names:
+        raise AssertionError(
+            ("Saveable keys changed when validating. Got back %s, was "
+             "expecting %s") % (tensor_saveables.keys(), validated_names))
+      for saveable in validated_saveables:
+        if saveable.device:
+          device = saveable_object_util.set_cpu0(saveable.device)
+        else:
+          device = None
+        with ops.device(device):
+          tensors = []
+          for spec in saveable.specs:
+            tensors.append(
+                io_ops.restore_v2(
+                    self.save_path_tensor,
+                    [spec.name],
+                    [spec.slice_spec],
+                    [spec.dtype])[0])
+          restore_op = saveable.restore(tensors, restored_shapes=None)
+        if not context.executing_eagerly():
+          assert saveable.name not in self.restore_ops_by_name
+          self.restore_ops_by_name[saveable.name] = restore_op
+          restore_ops.append(restore_op)
+    return restore_ops
+
 
 class _NameBasedRestoreCoordinator(object):
   """Keeps the status of a name-based checkpoint restore."""
@@ -183,11 +236,11 @@ class _NameBasedRestoreCoordinator(object):
           continue
       else:
         saveable = saveable_factory
-      names_to_saveables = saver_lib.BaseSaverBuilder.OpListToDict(
+      names_to_saveables = saveable_object_util.op_list_to_dict(
           [saveable],
           convert_variable_to_tensor=False)
       for name, op in names_to_saveables.items():
-        for saveable_object in saver_lib.BaseSaverBuilder.SaveableObjectsForOp(
+        for saveable_object in saveable_object_util.saveable_objects_for_op(
             op=op, name=name):
           yield saveable_object
 
@@ -606,10 +659,10 @@ def _add_attributes_to_object_graph(
           # Figure out the name-based Saver's name for this variable. If it's
           # already a SaveableObject we'd just get the checkpoint key back, so
           # we leave full_name blank.
-          saver_dict = saver_lib.BaseSaverBuilder.OpListToDict(
+          saver_dict = saveable_object_util.op_list_to_dict(
               [maybe_saveable], convert_variable_to_tensor=False)
           full_name, = saver_dict.keys()
-          saveables = tuple(saver_lib.BaseSaverBuilder.SaveableObjectsForOp(
+          saveables = tuple(saveable_object_util.saveable_objects_for_op(
               op=maybe_saveable, name=attribute.checkpoint_key))
           for saveable in saveables:
             saveable.full_name = full_name
@@ -1226,7 +1279,7 @@ class NameBasedSaverStatus(_LoadStatus):
       session = ops.get_default_session()
     with ops.device("/cpu:0"):
       saveables = self._gather_saveable_objects()
-      saver_lib.Saver(saveables).restore(
+      v1_saver_lib.Saver(saveables).restore(
           sess=session, save_path=self._checkpoint.save_path)
 
   def initialize_or_restore(self, session=None):
@@ -1251,18 +1304,6 @@ class _SessionWithFeedDictAdditions(session_lib.SessionInterface):
         fetches=fetches, feed_dict=feed_dict, **kwargs)
 
 
-def _copy_saver_with_new_var_list(old_saver, new_var_list):
-  """Copy a `tf.train.Saver`'s state to a new Saver with different variables."""
-  new_saver = saver_lib.Saver(var_list=new_var_list, max_to_keep=None)
-  # TODO(allenl): Move to copying functionality to Saver?
-  # pylint: disable=protected-access
-  new_saver._last_checkpoints = old_saver._last_checkpoints
-  new_saver._checkpoints_to_be_deleted = old_saver._checkpoints_to_be_deleted
-  new_saver._next_checkpoint_time = old_saver._next_checkpoint_time
-  # pylint: enable=protected-access
-  return new_saver
-
-
 class CheckpointableSaver(object):
   """Saves and restores a `Checkpointable` object and its dependencies.
 
@@ -1301,7 +1342,8 @@ class CheckpointableSaver(object):
     # Op caching for save
     self._object_graph_feed_tensor = None
     self._last_save_object_graph = None
-    self._last_save_saver = None
+    self._file_prefix_feed_tensor = None
+    self._cached_save_operation = None
 
     # Op caching for restore, shared between _CheckpointRestoreCoordinators
     self._restore_op_cache = {}
@@ -1368,13 +1410,16 @@ class CheckpointableSaver(object):
           base.NoRestoreSaveable(
               tensor=object_graph_tensor,
               name=base.OBJECT_GRAPH_PROTO_KEY))
-      # TODO(allenl, haoliang): Swap in a function-based saver here.
-      return saver_lib.Saver(
+      # TODO(allenl): Swap in a function-based saver here once it can serialize
+      # to a SaverDef.
+      return v1_saver_lib.Saver(
           var_list=named_saveable_objects, max_to_keep=None)
 
-  def _prepare_save(self,
-                    object_graph_tensor=None,
-                    saveable_object_cache=None):
+  def _save_cached_when_graph_building(
+      self,
+      file_prefix,
+      object_graph_tensor=None,
+      saveable_object_cache=None):
     """Create or retrieve save ops.
 
     When graph building, `saveable_object_cache` will typically be non-`None`,
@@ -1383,15 +1428,17 @@ class CheckpointableSaver(object):
     unnecessarily re-creating save ops.
 
     Args:
+      file_prefix: The prefix for saved checkpoint files.
       object_graph_tensor: A `Tensor` to which the current object graph will be
         fed.
       saveable_object_cache: A dictionary; if specified, used to cache
         `SaveableObject`s.
 
     Returns:
-      A two-element tuple with a `tf.train.Saver` and a feed_dict of `Tensor`s
-      to feed when running save ops. The feed dict contains the current object
-      graph and any Python state to be saved in the checkpoint.
+      A two-element tuple with a filename tensor and a feed_dict of tensors to
+      feed when running it (if graph building). The feed dict contains the
+      current object graph and any Python state to be saved in the
+      checkpoint. When executing eagerly only the first argument is meaningful.
     """
     (named_saveable_objects, graph_proto,
      feed_additions) = self._gather_saveables(
@@ -1403,15 +1450,11 @@ class CheckpointableSaver(object):
         # constructors. That means the Saver needs to be copied with a new
         # var_list.
         or context.executing_eagerly()):
-      if self._last_save_object_graph is not None:
-        self._last_save_saver = _copy_saver_with_new_var_list(
-            old_saver=self._last_save_saver,
-            new_var_list=named_saveable_objects)
-      else:
-        self._last_save_saver = saver_lib.Saver(
-            var_list=named_saveable_objects, max_to_keep=None)
+      saver = functional_saver.Saver(named_saveable_objects)
+      with ops.device("/cpu:0"):
+        self._cached_save_operation = saver.save(file_prefix)
       self._last_save_object_graph = graph_proto
-    return self._last_save_saver, feed_additions
+    return self._cached_save_operation, feed_additions
 
   def save(self, file_prefix, checkpoint_number=None, session=None):
     """Save a training checkpoint.
@@ -1435,36 +1478,42 @@ class CheckpointableSaver(object):
     Returns:
       The full path to the checkpoint.
     """
-    feed_additions = {}
+    feed_dict = {}
     graph_building = not context.executing_eagerly()
+    if checkpoint_number:
+      file_prefix = "%s-%d" % (file_prefix, checkpoint_number)
     if graph_building:
       if self._object_graph_feed_tensor is None:
         with ops.device("/cpu:0"):
           self._object_graph_feed_tensor = constant_op.constant(
               "", dtype=dtypes.string)
+          self._file_prefix_feed_tensor = constant_op.constant(
+              "", dtype=dtypes.string)
       object_graph_tensor = self._object_graph_feed_tensor
+      file_prefix_tensor = self._file_prefix_feed_tensor
+      feed_dict[file_prefix_tensor] = file_prefix
     else:
+      with ops.device("/cpu:0"):
+        file_prefix_tensor = constant_op.constant(
+            file_prefix, dtype=dtypes.string)
       object_graph_tensor = None
 
-    saver, new_feed_additions = self._prepare_save(
+    file_io.recursive_create_dir(os.path.dirname(file_prefix))
+    save_path, new_feed_additions = self._save_cached_when_graph_building(
+        file_prefix=file_prefix_tensor,
         object_graph_tensor=object_graph_tensor,
         saveable_object_cache=self._saveable_object_cache)
     if new_feed_additions:
-      feed_additions.update(new_feed_additions)
+      feed_dict.update(new_feed_additions)
     if not graph_building:
       session = None
     elif session is None:
       session = ops.get_default_session()
 
-    file_io.recursive_create_dir(os.path.dirname(file_prefix))
-    with ops.device("/cpu:0"):
-      save_path = saver.save(
-          sess=_SessionWithFeedDictAdditions(
-              session=session, feed_additions=feed_additions),
-          save_path=file_prefix,
-          write_meta_graph=False,
-          write_state=False,
-          global_step=checkpoint_number)
+    if session:
+      save_path = session.run(save_path, feed_dict=feed_dict)
+    else:
+      save_path = save_path.numpy()
     return save_path
 
   def restore(self, save_path):
@@ -1753,9 +1802,9 @@ class Checkpoint(tracking.Checkpointable):
     Returns:
       The full path to the checkpoint (i.e. `file_prefix`).
     """
-    return self._saver.save(
+    return compat.as_str(self._saver.save(
         file_prefix=file_prefix,
-        session=session)
+        session=session))
 
   @property
   def save_counter(self):
diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py
index 4cd09f8a1d..04a7216484 100644
--- a/tensorflow/python/training/saver.py
+++ b/tensorflow/python/training/saver.py
@@ -14,7 +14,11 @@
 # ==============================================================================
 
 # pylint: disable=invalid-name
-"""Save and restore variables."""
+"""Save and restore variables.
+
+Symbols in this file are deprecated. See replacements in
+tensorflow/python/training/checkpointable and tensorflow/python/training/saving.
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -25,7 +29,6 @@ import time
 import uuid
 
 import numpy as np
-import six
 
 from tensorflow.core.protobuf import checkpointable_object_graph_pb2
 from tensorflow.core.protobuf import meta_graph_pb2
@@ -42,16 +45,15 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.ops import io_ops
-from tensorflow.python.ops import resource_variable_ops
-from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import saveable_object
 from tensorflow.python.training import training_util
 from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.saving import saveable_object
+from tensorflow.python.training.saving import saveable_object_util
 from tensorflow.python.util import compat
 from tensorflow.python.util.tf_export import tf_export
 
@@ -67,31 +69,6 @@ get_checkpoint_mtimes = checkpoint_management.get_checkpoint_mtimes
 remove_checkpoint = checkpoint_management.remove_checkpoint
 
 
-# Op names which identify variable reads which should be saved.
-_VARIABLE_OPS = set(["Variable",
-                     "VariableV2",
-                     "AutoReloadVariable",
-                     "VarHandleOp",
-                     "ReadVariableOp"])
-
-
-def _set_cpu0(device_string):
-  """Creates a new device string based on `device_string` but using /CPU:0.
-
-  If the device is already on /CPU:0, this is a no-op.
-
-  Args:
-    device_string: A device string.
-
-  Returns:
-    A device string.
-  """
-  parsed_device = pydev.DeviceSpec.from_string(device_string)
-  parsed_device.device_type = "CPU"
-  parsed_device.device_index = 0
-  return parsed_device.to_string()
-
-
 class BaseSaverBuilder(object):
   """Base class for Savers.
 
@@ -101,64 +78,9 @@ class BaseSaverBuilder(object):
   SaveSpec = saveable_object.SaveSpec
   SaveableObject = saveable_object.SaveableObject
 
-  class VariableSaveable(SaveableObject):
-    """SaveableObject implementation that handles Variables."""
-
-    def __init__(self, var, slice_spec, name):
-      spec = BaseSaverBuilder.SaveSpec(var, slice_spec, name, dtype=var.dtype)
-      super(BaseSaverBuilder.VariableSaveable, self).__init__(var, [spec], name)
-
-    def restore(self, restored_tensors, restored_shapes):
-      restored_tensor = restored_tensors[0]
-      if restored_shapes is not None:
-        restored_tensor = array_ops.reshape(restored_tensor, restored_shapes[0])
-      return state_ops.assign(
-          self.op,
-          restored_tensor,
-          validate_shape=restored_shapes is None and
-          self.op.get_shape().is_fully_defined())
-
-  class ResourceVariableSaveable(SaveableObject):
-    """SaveableObject implementation that handles ResourceVariables."""
-
-    def __init__(self, var, slice_spec, name):
-      self._var_device = var.device
-      self._var_shape = var.shape
-      if isinstance(var, ops.Tensor):
-        self.handle_op = var.op.inputs[0]
-        tensor = var
-      elif isinstance(var, resource_variable_ops.ResourceVariable):
-
-        def _read_variable_closure(v):
-          def f():
-            with ops.device(v.device):
-              x = v.read_value()
-              # To allow variables placed on non-CPU devices to be checkpointed,
-              # we copy them to CPU on the same machine first.
-              with ops.device("/device:CPU:0"):
-                return array_ops.identity(x)
-          return f
-
-        self.handle_op = var.handle
-        tensor = _read_variable_closure(var)
-      else:
-        raise ValueError(
-            "Saveable is neither a resource variable nor a read operation."
-            " Got: %s" % repr(var))
-      spec = BaseSaverBuilder.SaveSpec(tensor, slice_spec, name,
-                                       dtype=var.dtype)
-      super(BaseSaverBuilder.ResourceVariableSaveable, self).__init__(
-          var, [spec], name)
-
-    def restore(self, restored_tensors, restored_shapes):
-      restored_tensor = restored_tensors[0]
-      if restored_shapes is not None:
-        restored_tensor = array_ops.reshape(restored_tensor, restored_shapes[0])
-      # Copy the restored tensor to the variable's device.
-      with ops.device(self._var_device):
-        restored_tensor = array_ops.identity(restored_tensor)
-        return resource_variable_ops.shape_safe_assign_variable_handle(
-            self.handle_op, self._var_shape, restored_tensor)
+  # Aliases for code which was moved but still has lots of users.
+  VariableSaveable = saveable_object_util.ReferenceVariableSaveable
+  ResourceVariableSaveable = saveable_object_util.ResourceVariableSaveable
 
   def __init__(self, write_version=saver_pb2.SaverDef.V2):
     self._write_version = write_version
@@ -224,7 +146,11 @@ class BaseSaverBuilder(object):
     del restore_sequentially
     all_tensors = []
     for saveable in saveables:
-      with ops.device(_set_cpu0(saveable.device) if saveable.device else None):
+      if saveable.device:
+        device = saveable_object_util.set_cpu0(saveable.device)
+      else:
+        device = None
+      with ops.device(device):
         all_tensors.extend(
             self.restore_op(filename_tensor, saveable, preferred_shard))
     return all_tensors
@@ -336,7 +262,7 @@ class BaseSaverBuilder(object):
     last_device = None
     for shard, (device, saveables) in enumerate(per_device):
       last_device = device
-      with ops.device(_set_cpu0(device)):
+      with ops.device(saveable_object_util.set_cpu0(device)):
         sharded_filename = self.sharded_filename(tmp_checkpoint_prefix, shard,
                                                  num_shards_tensor)
         sharded_prefixes.append(sharded_filename)
@@ -344,7 +270,7 @@ class BaseSaverBuilder(object):
 
     with ops.control_dependencies([x.op for x in sharded_saves]):
       # Co-locates the merge step with the last device.
-      with ops.device(_set_cpu0(last_device)):
+      with ops.device(saveable_object_util.set_cpu0(last_device)):
         # V2 format write path consists of a metadata merge step.  Once merged,
         # attempts to delete the temporary directory, "<user-fed prefix>_temp".
         merge_step = gen_io_ops.merge_v2_checkpoints(
@@ -459,10 +385,6 @@ class BaseSaverBuilder(object):
                 name="restore_shard"))
     return control_flow_ops.group(*sharded_restores, name="restore_all")
 
-  @staticmethod
-  def _IsVariable(v):
-    return isinstance(v, ops.Tensor) and v.op.type in _VARIABLE_OPS
-
   def _GroupByDevices(self, saveables):
     """Group Variable tensor slices per device.
 
@@ -490,220 +412,6 @@ class BaseSaverBuilder(object):
       per_device[canonical_device.pop()].append(saveable)
     return sorted(per_device.items(), key=lambda t: t[0])
 
-  @staticmethod
-  def OpListToDict(op_list, convert_variable_to_tensor=True):
-    """Create a dictionary of names to operation lists.
-
-    Args:
-      op_list: A list, tuple, or set of Variables or SaveableObjects.
-      convert_variable_to_tensor: Whether or not to convert single Variables
-        with no slice info into Tensors.
-
-    Returns:
-      A dictionary of names to the operations that must be saved under
-      that name.  Variables with save_slice_info are grouped together under the
-      same key in no particular order.
-
-    Raises:
-      TypeError: If the type of op_list or its elements is not supported.
-      ValueError: If at least two saveables share the same name.
-    """
-    if not isinstance(op_list, (list, tuple, set)):
-      raise TypeError("Variables to save should be passed in a dict or a "
-                      "list: %s" % op_list)
-    # When ResourceVariables are converted to Tensors, read ops are added to the
-    # graph. Sorting the op_list ensures that the resulting graph is always
-    # constructed in a deterministic way:
-    op_list = sorted(op_list, key=lambda x: x.name)
-    names_to_saveables = {}
-    # pylint: disable=protected-access
-    for var in op_list:
-      if isinstance(var, BaseSaverBuilder.SaveableObject):
-        names_to_saveables[var.name] = var
-      elif isinstance(var, variables.PartitionedVariable):
-        if var.name in names_to_saveables:
-          raise ValueError("At least two variables have the same name: %s" %
-                           var.name)
-        names_to_saveables[var.name] = var
-      elif isinstance(var, variables.Variable) and var._save_slice_info:
-        name = var._save_slice_info.full_name
-        if name in names_to_saveables:
-          if not isinstance(names_to_saveables[name], list):
-            raise ValueError("Mixing slices and non-slices with the same name: "
-                             "%s" % name)
-          names_to_saveables[name].append(var)
-        else:
-          names_to_saveables[name] = [var]
-      elif (isinstance(var, checkpointable.CheckpointableBase)
-            and not isinstance(var, variables.Variable)):
-        checkpointable_saveables = [
-            (factory() if callable(factory) else factory)
-            for factory in var._gather_saveables_for_checkpoint().values()]
-        names_to_saveables.update(
-            BaseSaverBuilder.OpListToDict(checkpointable_saveables))
-      else:
-        if context.executing_eagerly():
-          if not isinstance(var, resource_variable_ops.ResourceVariable):
-            raise ValueError(
-                "Can only save/restore ResourceVariables when eager execution "
-                "is enabled, type: %s." % type(var))
-          set_var = names_to_saveables.setdefault(var._shared_name, var)
-          if set_var is not var:
-            raise ValueError(
-                ("Two different ResourceVariable objects with the same "
-                 "shared_name '%s' were passed to the Saver. This likely means "
-                 "that they were created in different Graphs or isolation "
-                 "contexts, and may not be checkpointed together.") %
-                (var._shared_name,))
-        else:
-          if convert_variable_to_tensor:
-            if isinstance(var, resource_variable_ops.ResourceVariable):
-              var = var._graph_element  # pylint: disable=protected-access
-            else:
-              var = ops.internal_convert_to_tensor(var, as_ref=True)
-            if not BaseSaverBuilder._IsVariable(var):
-              raise TypeError("Variable to save is not a Variable: %s" % var)
-          if var.op.type == "ReadVariableOp":
-            name = var.op.inputs[0].op.name
-          else:
-            name = var.op.name
-          if name in names_to_saveables:
-            raise ValueError("At least two variables have the same name: %s" %
-                             name)
-          names_to_saveables[name] = var
-
-      # pylint: enable=protected-access
-    return names_to_saveables
-
-  @staticmethod
-  def SaveableObjectsForOp(op, name):
-    """Create `SaveableObject`s from an operation.
-
-    Args:
-      op: A variable, operation, or SaveableObject to coerce into a
-        SaveableObject.
-      name: A string name for the SaveableObject.
-
-    Yields:
-      `SaveableObject`s which together save/restore `op`.
-
-    Raises:
-      TypeError: If `name` is not a string.
-      ValueError: For operations with no known conversion to SaveableObject.
-    """
-    if not isinstance(name, six.string_types):
-      raise TypeError(
-          "names_to_saveables must be a dict mapping string names to "
-          "checkpointable operations. Name is not a string: %s" % name)
-    if isinstance(op, BaseSaverBuilder.SaveableObject):
-      yield op
-    elif isinstance(op, (list, tuple, variables.PartitionedVariable)):
-      if isinstance(op, variables.PartitionedVariable):
-        op = list(op)
-      # A set of slices.
-      slice_name = None
-      # pylint: disable=protected-access
-      for variable in op:
-        if not isinstance(variable, variables.Variable):
-          raise ValueError("Slices must all be Variables: %s" % variable)
-        if not variable._save_slice_info:
-          raise ValueError("Slices must all be slices: %s" % variable)
-        if slice_name is None:
-          slice_name = variable._save_slice_info.full_name
-        elif slice_name != variable._save_slice_info.full_name:
-          raise ValueError(
-              "Slices must all be from the same tensor: %s != %s" %
-              (slice_name, variable._save_slice_info.full_name))
-        if variable.op.type in ["Variable", "VariableV2",
-                                "AutoReloadVariable"]:
-          yield BaseSaverBuilder.VariableSaveable(
-              variable, variable._save_slice_info.spec, name)
-        else:
-          yield BaseSaverBuilder.ResourceVariableSaveable(
-              variable, variable._save_slice_info.spec, name)
-      # pylint: enable=protected-access
-    elif isinstance(op, checkpointable.CheckpointableBase) and not isinstance(
-        op, variables.Variable):
-      # pylint: disable=protected-access
-      for attr, factory in op._gather_saveables_for_checkpoint().items():
-        if attr == checkpointable.VARIABLE_VALUE_KEY:
-          # Keep original name for classes masquerading as variables.
-          full_name = name
-        else:
-          full_name = name + "_" + attr
-        op = (factory(full_name) if callable(factory) else factory)
-        for op in BaseSaverBuilder.SaveableObjectsForOp(op, op.name):
-          yield op
-      # pylint: enable=protected-access
-    else:
-      # A variable or tensor.
-      if context.executing_eagerly():
-        if not isinstance(op, resource_variable_ops.ResourceVariable):
-          raise ValueError("Can only save/restore ResourceVariable eager "
-                           "mode is enabled, type: %s." % type(op))
-        yield BaseSaverBuilder.ResourceVariableSaveable(op, "", name)
-      else:
-        if isinstance(op, resource_variable_ops.ResourceVariable):
-          variable = op._graph_element  # pylint: disable=protected-access
-        else:
-          variable = ops.internal_convert_to_tensor(op, as_ref=True)
-        if not BaseSaverBuilder._IsVariable(variable):
-          raise TypeError("names_to_saveables must be a dict mapping string "
-                          "names to Tensors/Variables. Not a variable: %s" %
-                          variable)
-        if variable.op.type in ["Variable", "VariableV2",
-                                "AutoReloadVariable"]:
-          yield BaseSaverBuilder.VariableSaveable(variable, "", name)
-        else:
-          yield BaseSaverBuilder.ResourceVariableSaveable(
-              variable, "", name)
-
-  def _ValidateAndSliceInputs(self, names_to_saveables):
-    """Returns the variables and names that will be used for a Saver.
-
-    Args:
-      names_to_saveables: A dict (k, v) where k is the name of an operation and
-         v is an operation to save or a BaseSaverBuilder.Saver.
-
-    Returns:
-      A list of BaseSaverBuilder.SaveableObject objects.
-
-    Raises:
-      TypeError: If any of the keys are not strings or any of the
-        values are not one of Tensor or Variable or a checkpointable operation.
-      ValueError: If the same operation is given in more than one value
-        (this also applies to slices of SlicedVariables).
-    """
-    if not isinstance(names_to_saveables, dict):
-      names_to_saveables = BaseSaverBuilder.OpListToDict(names_to_saveables)
-
-    saveables = []
-    seen_ops = set()
-    for name, op in sorted(names_to_saveables.items(),
-                           # Avoid comparing ops, sort only by name.
-                           key=lambda x: x[0]):
-      for converted_saveable_object in self.SaveableObjectsForOp(op, name):
-        self._AddSaveable(saveables, seen_ops, converted_saveable_object)
-    return saveables
-
-  def _AddSaveable(self, saveables, seen_ops, saveable):
-    """Adds the saveable to the saveables list.
-
-    Args:
-      saveables: List to append the SaveableObject to.
-      seen_ops: Set of the ops of the saveables already processed.  Used to
-        check that each saveable is only saved once.
-      saveable: The saveable.
-
-    Raises:
-      ValueError: If the saveable has already been processed.
-    """
-    if saveable.op in seen_ops:
-      raise ValueError("The same saveable will be restored with two names: %s" %
-                       saveable.name)
-    saveables.append(saveable)
-    seen_ops.add(saveable.op)
-
   def build(self,
             names_to_saveables,
             reshape=False,
@@ -775,7 +483,8 @@ class BaseSaverBuilder(object):
       raise ValueError("save and restore operations need to be built together "
                        " when eager execution is not enabled.")
 
-    saveables = self._ValidateAndSliceInputs(names_to_saveables)
+    saveables = saveable_object_util.validate_and_slice_inputs(
+        names_to_saveables)
     if max_to_keep is None:
       max_to_keep = 0
 
@@ -1910,7 +1619,7 @@ def saver_from_object_based_checkpoint(
   if builder is None:
     builder = BulkSaverBuilder()
 
-  saveables = builder._ValidateAndSliceInputs(var_list)  # pylint: disable=protected-access
+  saveables = saveable_object_util.validate_and_slice_inputs(var_list)
   current_names = set()
   for saveable in saveables:
     for spec in saveable.specs:
diff --git a/tensorflow/python/training/saving/BUILD b/tensorflow/python/training/saving/BUILD
new file mode 100644
index 0000000000..67ccd59b88
--- /dev/null
+++ b/tensorflow/python/training/saving/BUILD
@@ -0,0 +1,55 @@
+# Description:
+#   Low-level utilities for reading and writing checkpoints.
+
+package(
+    default_visibility = [
+        "//tensorflow:internal",
+    ],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+
+py_library(
+    name = "functional_saver",
+    srcs = ["functional_saver.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":saveable_object",
+        ":saveable_object_util",
+        "//tensorflow/python/eager:def_function",
+    ],
+)
+
+cuda_py_test(
+    name = "functional_saver_test",
+    size = "medium",
+    srcs = [
+        "functional_saver_test.py",
+    ],
+    additional_deps = [
+        ":functional_saver",
+        "//tensorflow/python/eager:test",
+    ],
+)
+
+py_library(
+    name = "saveable_object",
+    srcs = ["saveable_object.py"],
+    srcs_version = "PY2AND3",
+)
+
+py_library(
+    name = "saveable_object_util",
+    srcs = ["saveable_object_util.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/training/checkpointable:base",
+        "@six_archive//:six",
+    ],
+)
diff --git a/tensorflow/python/training/saving/functional_saver.py b/tensorflow/python/training/saving/functional_saver.py
new file mode 100644
index 0000000000..7eed333662
--- /dev/null
+++ b/tensorflow/python/training/saving/functional_saver.py
@@ -0,0 +1,101 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Saves and restore variables inside traced @tf.functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import io_ops
+from tensorflow.python.training.saving import saveable_object
+from tensorflow.python.training.saving import saveable_object_util
+
+
+class Saver(object):
+  """A minimal utility class for saving and restoring checkpoints.
+
+  Note that this is a low-level utility which stores Tensors in the keys
+  specified by `SaveableObject`s. Higher-level utilities for object-based
+  checkpointing are built on top of it.
+  """
+
+  def __init__(self, saveable_objects):
+    """Specify a list of `SaveableObject`s to save and restore.
+
+    Args:
+      saveable_objects: A list of `SaveableObject`s.
+    """
+    saveable_objects = list(saveable_objects)
+    for saveable in saveable_objects:
+      if not isinstance(saveable, saveable_object.SaveableObject):
+        raise ValueError(
+            "Saver expected a list of SaveableObjects, got %s." % (saveable,))
+    self._saveable_objects = saveable_objects
+
+  # TODO(b/120569892): Use tf.function here
+  def save(self, file_prefix):
+    """Save the saveable objects to a checkpoint with `file_prefix`.
+
+    Args:
+      file_prefix: A string or scalar string Tensor containing the prefix to
+        save under.
+    Returns:
+      A scalar string Tensor containing `file_prefix` with control dependencies
+      on the save ops.
+    """
+    tensor_names = []
+    tensors = []
+    tensor_slices = []
+    for saveable in self._saveable_objects:
+      for spec in saveable.specs:
+        tensor_names.append(spec.name)
+        tensors.append(spec.tensor)
+        tensor_slices.append(spec.slice_spec)
+    with ops.control_dependencies(
+        [io_ops.save_v2(file_prefix, tensor_names, tensor_slices, tensors)]):
+      return array_ops.identity(file_prefix)
+
+  # TODO(b/120569892): Use tf.function here
+  def restore(self, file_prefix):
+    """Restore the saveable objects from a checkpoint with `file_prefix`.
+
+    Args:
+      file_prefix: A string or scalar string Tensor containing the prefix for
+        files to read from.
+
+    Returns:
+      An operation which restores the `Saver`'s `SaveableObject`s when run, or
+      None if executing eagerly.
+    """
+    restore_ops = []
+    for saveable in self._saveable_objects:
+      if saveable.device:
+        device = saveable_object_util.set_cpu0(saveable.device)
+      else:
+        device = None
+      with ops.device(device):
+        tensors = []
+        for spec in saveable.specs:
+          tensors.append(
+              io_ops.restore_v2(
+                  file_prefix,
+                  [spec.name],
+                  [spec.slice_spec],
+                  [spec.dtype])[0])
+        restore_ops.append(saveable.restore(tensors, restored_shapes=None))
+    return control_flow_ops.group(restore_ops)
diff --git a/tensorflow/python/training/saving/functional_saver_test.py b/tensorflow/python/training/saving/functional_saver_test.py
new file mode 100644
index 0000000000..40002255aa
--- /dev/null
+++ b/tensorflow/python/training/saving/functional_saver_test.py
@@ -0,0 +1,50 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Tests for the functional saver."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.training.saving import functional_saver
+from tensorflow.python.training.saving import saveable_object_util
+
+
+class SaverTest(test.TestCase):
+
+  def test_resource_variable(self):
+    v1 = resource_variable_ops.ResourceVariable(2.)
+    saver = functional_saver.Saver(
+        saveable_object_util.saveable_objects_for_op(v1, "x"))
+    prefix = os.path.join(self.get_temp_dir(), "ckpt")
+    save_path = saver.save(constant_op.constant(prefix))
+    v1.assign(1.)
+    saver.restore(save_path)
+    self.assertEqual(2., self.evaluate(v1))
+
+    v2 = resource_variable_ops.ResourceVariable(3.)
+    second_saver = functional_saver.Saver(
+        saveable_object_util.saveable_objects_for_op(v2, "x"))
+    second_saver.restore(save_path)
+    self.assertEqual(2., self.evaluate(v2))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/training/saveable_object.py b/tensorflow/python/training/saving/saveable_object.py
similarity index 100%
rename from tensorflow/python/training/saveable_object.py
rename to tensorflow/python/training/saving/saveable_object.py
diff --git a/tensorflow/python/training/saving/saveable_object_util.py b/tensorflow/python/training/saving/saveable_object_util.py
new file mode 100644
index 0000000000..fa88d2c6eb
--- /dev/null
+++ b/tensorflow/python/training/saving/saveable_object_util.py
@@ -0,0 +1,340 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for working with and creating SaveableObjects."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import device as pydev
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.saving import saveable_object
+
+
+# Op names which identify variable reads which should be saved.
+_VARIABLE_OPS = set(["Variable",
+                     "VariableV2",
+                     "AutoReloadVariable",
+                     "VarHandleOp",
+                     "ReadVariableOp"])
+
+
+def set_cpu0(device_string):
+  """Creates a new device string based on `device_string` but using /CPU:0.
+
+  If the device is already on /CPU:0, this is a no-op.
+
+  Args:
+    device_string: A device string.
+
+  Returns:
+    A device string.
+  """
+  parsed_device = pydev.DeviceSpec.from_string(device_string)
+  parsed_device.device_type = "CPU"
+  parsed_device.device_index = 0
+  return parsed_device.to_string()
+
+
+class ReferenceVariableSaveable(saveable_object.SaveableObject):
+  """SaveableObject implementation that handles reference variables."""
+
+  def __init__(self, var, slice_spec, name):
+    spec = saveable_object.SaveSpec(var, slice_spec, name, dtype=var.dtype)
+    super(ReferenceVariableSaveable, self).__init__(var, [spec], name)
+
+  def restore(self, restored_tensors, restored_shapes):
+    restored_tensor = restored_tensors[0]
+    if restored_shapes is not None:
+      restored_tensor = array_ops.reshape(restored_tensor, restored_shapes[0])
+    return state_ops.assign(
+        self.op,
+        restored_tensor,
+        validate_shape=restored_shapes is None and
+        self.op.get_shape().is_fully_defined())
+
+
+class ResourceVariableSaveable(saveable_object.SaveableObject):
+  """SaveableObject implementation that handles ResourceVariables."""
+
+  def __init__(self, var, slice_spec, name):
+    self._var_device = var.device
+    self._var_shape = var.shape
+    if isinstance(var, ops.Tensor):
+      self.handle_op = var.op.inputs[0]
+      tensor = var
+    elif isinstance(var, resource_variable_ops.ResourceVariable):
+
+      def _read_variable_closure(v):
+        def f():
+          with ops.device(v.device):
+            x = v.read_value()
+            # To allow variables placed on non-CPU devices to be checkpointed,
+            # we copy them to CPU on the same machine first.
+            with ops.device("/device:CPU:0"):
+              return array_ops.identity(x)
+        return f
+
+      self.handle_op = var.handle
+      tensor = _read_variable_closure(var)
+    else:
+      raise ValueError(
+          "Saveable is neither a resource variable nor a read operation."
+          " Got: %s" % repr(var))
+    spec = saveable_object.SaveSpec(tensor, slice_spec, name,
+                                    dtype=var.dtype)
+    super(ResourceVariableSaveable, self).__init__(var, [spec], name)
+
+  def restore(self, restored_tensors, restored_shapes):
+    restored_tensor = restored_tensors[0]
+    if restored_shapes is not None:
+      restored_tensor = array_ops.reshape(restored_tensor, restored_shapes[0])
+    # Copy the restored tensor to the variable's device.
+    with ops.device(self._var_device):
+      restored_tensor = array_ops.identity(restored_tensor)
+      return resource_variable_ops.shape_safe_assign_variable_handle(
+          self.handle_op, self._var_shape, restored_tensor)
+
+
+def _tensor_comes_from_variable(v):
+  return isinstance(v, ops.Tensor) and v.op.type in _VARIABLE_OPS
+
+
+def saveable_objects_for_op(op, name):
+  """Create `SaveableObject`s from an operation.
+
+  Args:
+    op: A variable, operation, or SaveableObject to coerce into a
+      SaveableObject.
+    name: A string name for the SaveableObject.
+
+  Yields:
+    `SaveableObject`s which together save/restore `op`.
+
+  Raises:
+    TypeError: If `name` is not a string.
+    ValueError: For operations with no known conversion to SaveableObject.
+  """
+  if not isinstance(name, six.string_types):
+    raise TypeError(
+        "names_to_saveables must be a dict mapping string names to "
+        "checkpointable operations. Name is not a string: %s" % name)
+  if isinstance(op, saveable_object.SaveableObject):
+    yield op
+  elif isinstance(op, (list, tuple, variables.PartitionedVariable)):
+    if isinstance(op, variables.PartitionedVariable):
+      op = list(op)
+    # A set of slices.
+    slice_name = None
+    # pylint: disable=protected-access
+    for variable in op:
+      if not isinstance(variable, variables.Variable):
+        raise ValueError("Slices must all be Variables: %s" % variable)
+      if not variable._save_slice_info:
+        raise ValueError("Slices must all be slices: %s" % variable)
+      if slice_name is None:
+        slice_name = variable._save_slice_info.full_name
+      elif slice_name != variable._save_slice_info.full_name:
+        raise ValueError(
+            "Slices must all be from the same tensor: %s != %s" %
+            (slice_name, variable._save_slice_info.full_name))
+      if variable.op.type in ["Variable", "VariableV2",
+                              "AutoReloadVariable"]:
+        yield ReferenceVariableSaveable(
+            variable, variable._save_slice_info.spec, name)
+      else:
+        yield ResourceVariableSaveable(
+            variable, variable._save_slice_info.spec, name)
+    # pylint: enable=protected-access
+  elif isinstance(op, checkpointable.CheckpointableBase) and not isinstance(
+      op, variables.Variable):
+    # pylint: disable=protected-access
+    for attr, factory in op._gather_saveables_for_checkpoint().items():
+      if attr == checkpointable.VARIABLE_VALUE_KEY:
+        # Keep original name for classes masquerading as variables.
+        full_name = name
+      else:
+        full_name = name + "_" + attr
+      op = (factory(full_name) if callable(factory) else factory)
+      for op in saveable_objects_for_op(op, op.name):
+        yield op
+    # pylint: enable=protected-access
+  else:
+    # A variable or tensor.
+    if isinstance(op, resource_variable_ops.ResourceVariable):
+      # pylint: disable=protected-access
+      if op._in_graph_mode:
+        variable = op._graph_element
+      else:
+        variable = op
+      # pylint: enable=protected-access
+      yield ResourceVariableSaveable(variable, "", name)
+    else:
+      with ops.init_scope():
+        if context.executing_eagerly():
+          raise ValueError("Can only save/restore ResourceVariables when "
+                           "executing eagerly, got type: %s." % type(op))
+
+      variable = ops.internal_convert_to_tensor(op, as_ref=True)
+      if not _tensor_comes_from_variable(variable):
+        raise TypeError("names_to_saveables must be a dict mapping string "
+                        "names to Tensors/Variables. Not a variable: %s" %
+                        variable)
+      if variable.op.type in ["Variable", "VariableV2",
+                              "AutoReloadVariable"]:
+        yield ReferenceVariableSaveable(variable, "", name)
+      else:
+        yield ResourceVariableSaveable(
+            variable, "", name)
+
+
+def op_list_to_dict(op_list, convert_variable_to_tensor=True):
+  """Create a dictionary of names to operation lists.
+
+  Args:
+    op_list: A list, tuple, or set of Variables or SaveableObjects.
+    convert_variable_to_tensor: Whether or not to convert single Variables
+      with no slice info into Tensors.
+
+  Returns:
+    A dictionary of names to the operations that must be saved under
+    that name.  Variables with save_slice_info are grouped together under the
+    same key in no particular order.
+
+  Raises:
+    TypeError: If the type of op_list or its elements is not supported.
+    ValueError: If at least two saveables share the same name.
+  """
+  if not isinstance(op_list, (list, tuple, set)):
+    raise TypeError("Variables to save should be passed in a dict or a "
+                    "list: %s" % op_list)
+  # When ResourceVariables are converted to Tensors, read ops are added to the
+  # graph. Sorting the op_list ensures that the resulting graph is always
+  # constructed in a deterministic way:
+  op_list = sorted(op_list, key=lambda x: x.name)
+  names_to_saveables = {}
+  # pylint: disable=protected-access
+  for var in op_list:
+    if isinstance(var, saveable_object.SaveableObject):
+      names_to_saveables[var.name] = var
+    elif isinstance(var, variables.PartitionedVariable):
+      if var.name in names_to_saveables:
+        raise ValueError("At least two variables have the same name: %s" %
+                         var.name)
+      names_to_saveables[var.name] = var
+    elif isinstance(var, variables.Variable) and var._save_slice_info:
+      name = var._save_slice_info.full_name
+      if name in names_to_saveables:
+        if not isinstance(names_to_saveables[name], list):
+          raise ValueError("Mixing slices and non-slices with the same name: "
+                           "%s" % name)
+        names_to_saveables[name].append(var)
+      else:
+        names_to_saveables[name] = [var]
+    elif (isinstance(var, checkpointable.CheckpointableBase)
+          and not isinstance(var, variables.Variable)):
+      checkpointable_saveables = [
+          (factory() if callable(factory) else factory)
+          for factory in var._gather_saveables_for_checkpoint().values()]
+      names_to_saveables.update(
+          op_list_to_dict(checkpointable_saveables))
+    else:
+      if context.executing_eagerly():
+        if not isinstance(var, resource_variable_ops.ResourceVariable):
+          raise ValueError(
+              "Can only save/restore ResourceVariables when eager execution "
+              "is enabled, type: %s." % type(var))
+        set_var = names_to_saveables.setdefault(var._shared_name, var)
+        if set_var is not var:
+          raise ValueError(
+              ("Two different ResourceVariable objects with the same "
+               "shared_name '%s' were passed to the Saver. This likely means "
+               "that they were created in different Graphs or isolation "
+               "contexts, and may not be checkpointed together.") %
+              (var._shared_name,))
+      else:
+        if convert_variable_to_tensor:
+          if isinstance(var, resource_variable_ops.ResourceVariable):
+            var = var._graph_element  # pylint: disable=protected-access
+          else:
+            var = ops.internal_convert_to_tensor(var, as_ref=True)
+          if not _tensor_comes_from_variable(var):
+            raise TypeError("Variable to save is not a Variable: %s" % var)
+        if var.op.type == "ReadVariableOp":
+          name = var.op.inputs[0].op.name
+        else:
+          name = var.op.name
+        if name in names_to_saveables:
+          raise ValueError("At least two variables have the same name: %s" %
+                           name)
+        names_to_saveables[name] = var
+
+    # pylint: enable=protected-access
+  return names_to_saveables
+
+
+def _add_saveable(saveables, seen_ops, saveable):
+  """Adds the saveable to the saveables list.
+
+  Args:
+    saveables: List to append the SaveableObject to.
+    seen_ops: Set of the ops of the saveables already processed.  Used to
+      check that each saveable is only saved once.
+    saveable: The saveable.
+
+  Raises:
+    ValueError: If the saveable has already been processed.
+  """
+  if saveable.op in seen_ops:
+    raise ValueError("The same saveable will be restored with two names: %s" %
+                     saveable.name)
+  saveables.append(saveable)
+  seen_ops.add(saveable.op)
+
+
+def validate_and_slice_inputs(names_to_saveables):
+  """Returns the variables and names that will be used for a Saver.
+
+  Args:
+    names_to_saveables: A dict (k, v) where k is the name of an operation and
+       v is an operation to save or a BaseSaverBuilder.Saver.
+
+  Returns:
+    A list of SaveableObjects.
+
+  Raises:
+    TypeError: If any of the keys are not strings or any of the
+      values are not one of Tensor or Variable or a checkpointable operation.
+    ValueError: If the same operation is given in more than one value
+      (this also applies to slices of SlicedVariables).
+  """
+  if not isinstance(names_to_saveables, dict):
+    names_to_saveables = op_list_to_dict(names_to_saveables)
+
+  saveables = []
+  seen_ops = set()
+  for name, op in sorted(names_to_saveables.items(),
+                         # Avoid comparing ops, sort only by name.
+                         key=lambda x: x[0]):
+    for converted_saveable_object in saveable_objects_for_op(op, name):
+      _add_saveable(saveables, seen_ops, converted_saveable_object)
+  return saveables
diff --git a/tensorflow/python/training/warm_starting_util.py b/tensorflow/python/training/warm_starting_util.py
index 8c97f101da..1382b8ce72 100644
--- a/tensorflow/python/training/warm_starting_util.py
+++ b/tensorflow/python/training/warm_starting_util.py
@@ -28,7 +28,7 @@ from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_ops
 from tensorflow.python.training import checkpoint_utils
-from tensorflow.python.training import saver
+from tensorflow.python.training.saving import saveable_object_util
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -139,7 +139,7 @@ def _infer_var_name(var):
   Returns:
     Name of the `var`
   """
-  name_to_var_dict = saver.BaseSaverBuilder.OpListToDict(var)
+  name_to_var_dict = saveable_object_util.op_list_to_dict(var)
   if len(name_to_var_dict) > 1:
     raise TypeError("`var` = %s passed as arg violates the constraints.  "
                     "name_to_var_dict = %s" % (var, name_to_var_dict))
-- 
GitLab


From abc2610c5ca689dce98683c63ccf8d886c42c37a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 13:09:16 -0800
Subject: [PATCH 219/873] Update ops-related pbtxt files.

PiperOrigin-RevId: 224565203
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 22 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 22 +++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index ce71b21507..1492741e8b 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -79062,6 +79062,17 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "UnwrapDatasetVariant"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+}
 op {
   name: "UpperBound"
   input_arg {
@@ -79495,6 +79506,17 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "WrapDatasetVariant"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+}
 op {
   name: "WriteAudioSummary"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 1fa59b7087..89bdcc571e 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -38153,6 +38153,17 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "UnwrapDatasetVariant"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+}
 op {
   name: "UpperBound"
   input_arg {
@@ -38471,6 +38482,17 @@ op {
     minimum: 1
   }
 }
+op {
+  name: "WrapDatasetVariant"
+  input_arg {
+    name: "input_handle"
+    type: DT_VARIANT
+  }
+  output_arg {
+    name: "output_handle"
+    type: DT_VARIANT
+  }
+}
 op {
   name: "WriteAudioSummary"
   input_arg {
-- 
GitLab


From 32b16083085c0bf954228e8c9160561a576ec405 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 13:11:24 -0800
Subject: [PATCH 220/873] Go: Update generated wrapper functions for TensorFlow
 ops. PiperOrigin-RevId: 224565479

---
 tensorflow/go/op/wrappers.go | 266 +++++++++++++++++------------------
 1 file changed, 133 insertions(+), 133 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 440756bb38..6e49fbb9ea 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -5862,6 +5862,121 @@ func MapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output
 	return scope.AddOperation(opspec)
 }
 
+// StageClearAttr is an optional argument to StageClear.
+type StageClearAttr func(optionalAttr)
+
+// StageClearCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageClearCapacity(value int64) StageClearAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// StageClearMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageClearMemoryLimit(value int64) StageClearAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// StageClearContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StageClearContainer(value string) StageClearAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// StageClearSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StageClearSharedName(value string) StageClearAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes all elements in the underlying container.
+//
+// Returns the created operation.
+func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StageClear",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// StageSizeAttr is an optional argument to StageSize.
+type StageSizeAttr func(optionalAttr)
+
+// StageSizeCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageSizeCapacity(value int64) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// StageSizeMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageSizeMemoryLimit(value int64) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// StageSizeContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StageSizeContainer(value string) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// StageSizeSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StageSizeSharedName(value string) StageSizeAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op returns the number of elements in the underlying container.
+func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StageSize",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
 //
 // The regularized incomplete beta integral is defined as:
@@ -11434,24 +11549,6 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr)
 	return op.Output(0)
 }
 
-// Returns the truth value of (x > y) element-wise.
-//
-// *NOTE*: `Greater` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Greater",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp.
 type ResourceSparseApplyRMSPropAttr func(optionalAttr)
 
@@ -17210,64 +17307,6 @@ func NonMaxSuppressionWithOverlaps(scope *Scope, overlaps tf.Output, scores tf.O
 	return op.Output(0)
 }
 
-// StageClearAttr is an optional argument to StageClear.
-type StageClearAttr func(optionalAttr)
-
-// StageClearCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageClearCapacity(value int64) StageClearAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// StageClearMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageClearMemoryLimit(value int64) StageClearAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// StageClearContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StageClearContainer(value string) StageClearAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// StageClearSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StageClearSharedName(value string) StageClearAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes all elements in the underlying container.
-//
-// Returns the created operation.
-func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StageClear",
-
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Computes softmax cross entropy cost and gradients to backpropagate.
 //
 // Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
@@ -20236,63 +20275,6 @@ func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// StageSizeAttr is an optional argument to StageSize.
-type StageSizeAttr func(optionalAttr)
-
-// StageSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageSizeCapacity(value int64) StageSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// StageSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageSizeMemoryLimit(value int64) StageSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// StageSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StageSizeContainer(value string) StageSizeAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// StageSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StageSizeSharedName(value string) StageSizeAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op returns the number of elements in the underlying container.
-func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StageSize",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Produces the max pool of the input tensor for quantized types.
 //
 // Arguments:
@@ -31550,6 +31532,24 @@ func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataT
 	return op.Output(0)
 }
 
+// Returns the truth value of (x > y) element-wise.
+//
+// *NOTE*: `Greater` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Greater",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Performs a padding as a preprocess during a convolution.
 //
 // Similar to FusedResizeAndPadConv2d, this op allows for an optimized
-- 
GitLab


From d09435e0cc8b21e5b10eb0f9750e7a24c2031e85 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 13:13:45 -0800
Subject: [PATCH 221/873] Change `Status::error_message()` usage to
 `Status::message()`.

PiperOrigin-RevId: 224565798
---
 tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index b4b06a40a2..ef35e84ba5 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
@@ -98,7 +98,7 @@ Status DumpOpProfileToLogDirectory(StringPiece run_dir,
   if (!status.ok()) {
     return errors::Internal(
         "Failed to convert op profile to json. Skipping... ",
-        string(status.error_message()));
+        string(status.message()));
   }
   TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, json));
   if (os) {
-- 
GitLab


From 939206b56b6dd243f1aae52184c680e6b6f8b30e Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 7 Dec 2018 13:25:27 -0800
Subject: [PATCH 222/873] Update the performance test for unified LSTM to be
 benchmark test.

PiperOrigin-RevId: 224567398
---
 .../python/keras/layers/unified_lstm_test.py  | 51 +++++++++++--------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 2cb3eff8fd..8cbc370633 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -28,6 +28,7 @@ import numpy as np
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import keras
+from tensorflow.python.client import session as session_lib
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -732,7 +733,7 @@ class LSTMLayerGraphOnlyTest(test.TestCase):
       self.assertEqual(len(layer.get_losses_for(x)), 1)
 
 
-class UnifiedLSTMPerformanceTest(test.TestCase):
+class UnifiedLSTMPerformanceTest(test.Benchmark):
 
   def _measure_performance(self, test_config, model, x_train, y_train):
     batch = test_config['batch']
@@ -808,11 +809,11 @@ class UnifiedLSTMPerformanceTest(test.TestCase):
                  'Normal LSTM', sec_per_epoch)
     return sec_per_epoch
 
-  @test_util.run_in_graph_and_eager_modes(config=_config, use_gpu=True)
-  def test_performance_with_standard_cudnn_impl(self):
+  def _benchmark_performance_with_standard_cudnn_impl(self):
     if not test.is_gpu_available():
       self.skipTest('performance test will only run on GPU')
 
+    mode = 'eager' if context.executing_eagerly() else 'graph'
     batch = 64
     num_batch = 10
     test_config = {
@@ -832,34 +833,42 @@ class UnifiedLSTMPerformanceTest(test.TestCase):
         num_classes=test_config['output_shape'])
     y_train = keras.utils.to_categorical(y_train, test_config['output_shape'])
 
-    cudnn_duration = self._time_performance_run_cudnn_lstm(
+    cudnn_sec_per_epoch = self._time_performance_run_cudnn_lstm(
         test_config, x_train, y_train)
-    unified_lstm_gpu_duration = self._time_performance_run_unifed_lstm_gpu(
+    unified_lstm_sec_per_epoch = self._time_performance_run_unifed_lstm_gpu(
         test_config, x_train, y_train)
-    normal_lstm_duration = self._time_performance_run_normal_lstm(
+    normal_lstm_sec_per_epoch = self._time_performance_run_normal_lstm(
         test_config, x_train, y_train)
 
-    cudnn_vs_unified = cudnn_duration / unified_lstm_gpu_duration
-    unified_vs_normal = normal_lstm_duration / unified_lstm_gpu_duration
+    cudnn_vs_unified = cudnn_sec_per_epoch / unified_lstm_sec_per_epoch
+    unified_vs_normal = normal_lstm_sec_per_epoch / unified_lstm_sec_per_epoch
+
+    self.report_benchmark(name='keras_cudnn_lstm_' + mode,
+                          wall_time=cudnn_sec_per_epoch,
+                          iters=test_config['epoch'],
+                          extras=test_config)
+    self.report_benchmark(name='keras_unified_lstm_' + mode,
+                          wall_time=unified_lstm_sec_per_epoch,
+                          iters=test_config['epoch'],
+                          extras=test_config)
+    self.report_benchmark(name='keras_canonical_lstm_' + mode,
+                          wall_time=normal_lstm_sec_per_epoch,
+                          iters=test_config['epoch'],
+                          extras=test_config)
 
-    # TODO(scottzhu): reeanble the test after moving it to benchmark test suite.
-    # The current test has performance flakiness issue.
     logging.info('Expect the performance of Unified LSTM is within 80% of '
                  'CuDNN LSTM, got {0:.2f}%'.format(cudnn_vs_unified * 100))
     logging.info('Expect the performance of Unified LSTM is more than 5 times'
                  ' of normal LSTM, got {0:.2f}'.format(unified_vs_normal))
 
-    # Assert the performance diff should be within 80% of the native cudnn.
-    # self.assertGreaterEqual(
-    #     cudnn_vs_unified, 0.80,
-    #     'Expect the performance of Unified LSTM is within 80% of CuDNN LSTM, '
-    #     'but got {0:.2f}%'.format(cudnn_vs_unified * 100))
-    # # Assert the performance diff between CPU impl and GPU impl should be more
-    # # than 5 times.
-    # self.assertGreaterEqual(
-    #     unified_vs_normal, 5,
-    #     'Expect the performance of Unified LSTM is more than 5 times of '
-    #     'normal LSTM, but got {0:.2f}'.format(unified_vs_normal))
+  def benchmark_performance_graph(self):
+    with context.graph_mode(), session_lib.Session(config=_config):
+      self._benchmark_performance_with_standard_cudnn_impl()
+
+  def benchmark_performance_eager(self):
+    with context.eager_mode():
+      self._benchmark_performance_with_standard_cudnn_impl()
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 56429fce51cc81bb49d57c0217bd2c59deff774f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 13:42:24 -0800
Subject: [PATCH 223/873] Enables some previously disabled Keras test
 parameterizations.

PiperOrigin-RevId: 224570187
---
 .../keras/engine/training_dataset_test.py      | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_dataset_test.py b/tensorflow/python/keras/engine/training_dataset_test.py
index a8a81fa842..d6cc93d1ef 100644
--- a/tensorflow/python/keras/engine/training_dataset_test.py
+++ b/tensorflow/python/keras/engine/training_dataset_test.py
@@ -24,6 +24,7 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util as tf_test_util
 from tensorflow.python.keras import keras_parameterized
@@ -147,11 +148,14 @@ class TestTrainingWithDatasetIterators(keras_parameterized.TestCase):
 
 class TestTrainingWithDataset(keras_parameterized.TestCase):
 
-  # TODO(kaftan) Run w/ all model types.
-  # Seems like subclass models has a bug, file ticket
-  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_with_all_model_types
   @keras_parameterized.run_all_keras_modes
   def test_calling_model_on_same_dataset(self):
+    if ((not testing_utils.should_run_eagerly())
+        and testing_utils.get_model_type() == 'subclass'
+        and context.executing_eagerly()):
+      self.skipTest('b/120673224')
+
     model = testing_utils.get_small_mlp(1, 4, input_dim=3)
     optimizer = RMSPropOptimizer(learning_rate=0.001)
     loss = 'mse'
@@ -234,9 +238,7 @@ class TestTrainingWithDataset(keras_parameterized.TestCase):
                                  'you should specify the `steps` argument'):
       model.predict(dataset, verbose=0)
 
-  # TODO(kaftan) Run w/ all model types.
-  # Seems like subclass models has a bug, file ticket
-  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_with_all_model_types
   @keras_parameterized.run_all_keras_modes
   def test_dataset_with_sample_weights(self):
     model = testing_utils.get_small_mlp(1, 4, input_dim=3)
@@ -308,9 +310,7 @@ class TestTrainingWithDataset(keras_parameterized.TestCase):
 
 class TestMetricsWithDatasetIterators(keras_parameterized.TestCase):
 
-  # TODO(kaftan) Run w/ all model types.
-  # Seems like subclass models has a bug, file ticket
-  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_with_all_model_types
   @keras_parameterized.run_all_keras_modes
   def test_metrics_correctness_with_iterator(self):
     layers = [
-- 
GitLab


From 68e8a87bcd2b97a7f9a09f4cdba86ec254fad978 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Fri, 7 Dec 2018 13:45:51 -0800
Subject: [PATCH 224/873] Improve the correctness test by invoking train/eval
 multiple times (w/w.o. validation data).

PiperOrigin-RevId: 224570819
---
 .../contrib/distribute/python/keras_test.py   | 86 +++++++++----------
 1 file changed, 39 insertions(+), 47 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index dea208232a..c0bcb5ecd5 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1303,16 +1303,17 @@ class TestDistributionStrategyCorrectness(test.TestCase,
 
     with self.cached_session():
       default_tolerance = 1e-5
-      weights_tolerance = default_tolerance
-      metrics_tolerance = default_tolerance
-      predict_tolerance = default_tolerance
+      tol_table = {}
 
       if isinstance(distribution, (mirrored_strategy.MirroredStrategy,
                                    mirrored_strategy.CoreMirroredStrategy)):
-        # TODO(b/119257215): Weights are not exactly the same, so use lower
-        # tolerance for now.
-        weights_tolerance = 1e-4
-        predict_tolerance = 1e-4
+        # TODO(b/119257215): Weights are not exactly the same, so use larger
+        # tolerance for now. Predict should be related to weights.
+        tol_table = {
+            'weights_1': 1e-4,
+            'weights_2': 1e-4,
+            'predict_result_1': 1e-4,
+        }
 
       keras.backend.set_image_data_format('channels_last')
       np.random.seed(_RANDOM_SEED)
@@ -1361,56 +1362,47 @@ class TestDistributionStrategyCorrectness(test.TestCase,
                                         with_distribution,
                                         x_train, y_train, x_predict))
 
-        training_history = model.fit(**training_inputs).history
+        result = {}
+        result['training_history_1'] = model.fit(**training_inputs).history
 
         if eval_inputs is not None:
-          eval_result = model.evaluate(**eval_inputs)
-        else:
-          # Creates a dummy identical eval_result to be compared later.
-          eval_result = 1.0
+          result['eval_result_1'] = model.evaluate(**eval_inputs)
 
-        weights = model.get_weights()
-        predict_result = model.predict(**predict_inputs)
+        result['weights_1'] = model.get_weights()
+        result['predict_result_1'] = model.predict(**predict_inputs)
 
-        return weights, training_history, eval_result, predict_result
+        # Train and eval again to mimic user's flow.
 
-      wts_with_ds, history_with_ds, eval_with_ds, predict_with_ds = (
-          fit_eval_and_predict(with_distribution=distribution))
+        result['training_history_2'] = model.fit(**training_inputs).history
 
-      (wts_without_ds, history_without_ds, eval_without_ds,
-       predict_without_ds) = fit_eval_and_predict(with_distribution=None)
+        if eval_inputs is not None:
+          result['eval_result_2'] = model.evaluate(**eval_inputs)
+
+        result['weights_2'] = model.get_weights()
+
+        return result
+
+      results_with_ds = fit_eval_and_predict(with_distribution=distribution)
+      results_without_ds = fit_eval_and_predict(with_distribution=None)
 
       # Verify that the weights, training history, eval results, predict outputs
       # are the same within some limits of tolerance.
-      self.assertAllClose(
-          wts_with_ds,
-          wts_without_ds,
-          atol=weights_tolerance,
-          rtol=weights_tolerance,
-          msg='Fail to assert weights after training.')
-      self.assertAllClose(
-          eval_with_ds,
-          eval_without_ds,
-          atol=metrics_tolerance,
-          rtol=metrics_tolerance,
-          msg='Fail to assert eval results.')
-      self.assertAllClose(
-          predict_with_ds,
-          predict_without_ds,
-          atol=predict_tolerance,
-          rtol=predict_tolerance,
-          msg='Fail to assert predict results.')
-
-      if not (isinstance(distribution, tpu_strategy.TPUStrategy) and
-              distribution.extended.steps_per_run > 1):
-        # TODO(b/119894254): Enable this test for all cases once the underlying
-        # bug is fixed.
+      for key in results_with_ds:
+        if (key.startswith('training_history') and
+            isinstance(distribution, tpu_strategy.TPUStrategy) and
+            distribution.extended.steps_per_run > 1):
+          # TODO(b/119894254): Enable this test for all cases once the
+          # underlying bug is fixed.
+          continue
+
+        tolerance = tol_table.get(key, default_tolerance)
+
         self.assertAllClose(
-            history_with_ds,
-            history_without_ds,
-            atol=metrics_tolerance,
-            rtol=metrics_tolerance,
-            msg='Fail to assert training history.')
+            results_with_ds[key],
+            results_without_ds[key],
+            atol=tolerance,
+            rtol=tolerance,
+            msg='Fail to assert {}.'.format(key))
 
 
 if __name__ == '__main__':
-- 
GitLab


From 1b03648fb58913992ae44d706f9e5fce06a919e5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 13:52:06 -0800
Subject: [PATCH 225/873] Update cmd line quantization documentation link.

PiperOrigin-RevId: 224571863
---
 tensorflow/lite/g3doc/convert/cmdline_examples.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/lite/g3doc/convert/cmdline_examples.md b/tensorflow/lite/g3doc/convert/cmdline_examples.md
index 59f26b3505..de81e2cfdd 100644
--- a/tensorflow/lite/g3doc/convert/cmdline_examples.md
+++ b/tensorflow/lite/g3doc/convert/cmdline_examples.md
@@ -94,11 +94,12 @@ tflite_convert \
 ### Convert a TensorFlow GraphDef for quantized inference <a name="graphdef_quant"></a>
 
 The TensorFlow Lite Converter is compatible with fixed point quantization models
-described [here](https://www.tensorflow.org/performance/quantization). These are
-float models with `FakeQuant*` ops inserted at the boundaries of fused layers
-to record min-max range information. This generates a quantized inference
-workload that reproduces the quantization behavior that was used during
-training.
+described
+[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/q
+uantize/README.md). These are float models with `FakeQuant*` ops inserted at the
+boundaries of fused layers to record min-max range information. This generates a
+quantized inference workload that reproduces the quantization behavior that was
+used during training.
 
 The following command generates a quantized TensorFlow Lite FlatBuffer from a
 "quantized" TensorFlow GraphDef.
-- 
GitLab


From a64a8d8d02f9d15b1f5d6fbdbc0fa0db3f37f97d Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Fri, 7 Dec 2018 13:58:08 -0800
Subject: [PATCH 226/873] Fix up a few tests to interact better with v2 mode

PiperOrigin-RevId: 224572844
---
 .../tutorials/mnist/mnist_with_summaries.py   |  3 ++-
 .../python/debug/examples/debug_errors.py     |  3 ++-
 .../python/debug/examples/debug_fibonacci.py  |  3 ++-
 .../python/debug/examples/debug_keras.py      |  3 ++-
 .../python/debug/examples/debug_mnist.py      |  3 ++-
 tensorflow/python/keras/models_test.py        |  4 ++-
 tensorflow/python/kernel_tests/lu_op_test.py  | 27 ++++++++++---------
 .../python/kernel_tests/matmul_op_test.py     |  4 ++-
 .../kernel_tests/self_adjoint_eig_op_test.py  |  4 +--
 .../python/kernel_tests/tensordot_op_test.py  | 10 ++++---
 10 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
index 7967e22d6a..1854e84d49 100644
--- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
+++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py
@@ -183,7 +183,8 @@ def main(_):
   if tf.gfile.Exists(FLAGS.log_dir):
     tf.gfile.DeleteRecursively(FLAGS.log_dir)
   tf.gfile.MakeDirs(FLAGS.log_dir)
-  train()
+  with tf.Graph().as_default():
+    train()
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/debug/examples/debug_errors.py b/tensorflow/python/debug/examples/debug_errors.py
index 28abc97343..e3692072cc 100644
--- a/tensorflow/python/debug/examples/debug_errors.py
+++ b/tensorflow/python/debug/examples/debug_errors.py
@@ -77,4 +77,5 @@ if __name__ == "__main__":
       default=False,
       help="Use debugger to track down bad values during training")
   FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
+  with tf.Graph().as_default():
+    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/python/debug/examples/debug_fibonacci.py b/tensorflow/python/debug/examples/debug_fibonacci.py
index 3821b393ec..777fb08988 100644
--- a/tensorflow/python/debug/examples/debug_fibonacci.py
+++ b/tensorflow/python/debug/examples/debug_fibonacci.py
@@ -100,4 +100,5 @@ if __name__ == "__main__":
       "--debug flag.")
 
   FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
+  with tf.Graph().as_default():
+    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/python/debug/examples/debug_keras.py b/tensorflow/python/debug/examples/debug_keras.py
index 3272d85ade..019121fa0a 100644
--- a/tensorflow/python/debug/examples/debug_keras.py
+++ b/tensorflow/python/debug/examples/debug_keras.py
@@ -86,4 +86,5 @@ if __name__ == "__main__":
       default=2,
       help="Number of epochs to train the model for.")
   FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
+  with tf.Graph().as_default():
+    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/python/debug/examples/debug_mnist.py b/tensorflow/python/debug/examples/debug_mnist.py
index ab1c90371c..09fb06c9c0 100644
--- a/tensorflow/python/debug/examples/debug_mnist.py
+++ b/tensorflow/python/debug/examples/debug_mnist.py
@@ -190,4 +190,5 @@ if __name__ == "__main__":
       "the gRPC address (e.g., localhost:1234). Mutually exclusive with the "
       "--debug flag.")
   FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
+  with tf.Graph().as_default():
+    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index b0872ae3ab..907ac41d0e 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -365,7 +365,9 @@ class TestCloneAndBuildModel(test.TestCase):
 
     self.assertEqual('mse', model.loss)
     self.assertTrue(
-        isinstance(model.optimizer, keras.optimizers.RMSprop))
+        isinstance(model.optimizer,
+                   (keras.optimizers.RMSprop,
+                    keras.optimizer_v2.rmsprop.RMSprop)))
     self.assertEqual(['acc', metrics.categorical_accuracy],
                      model._compile_metrics)
 
diff --git a/tensorflow/python/kernel_tests/lu_op_test.py b/tensorflow/python/kernel_tests/lu_op_test.py
index f2defcae85..06deb0e1c8 100644
--- a/tensorflow/python/kernel_tests/lu_op_test.py
+++ b/tensorflow/python/kernel_tests/lu_op_test.py
@@ -24,6 +24,7 @@ from tensorflow.python.client import session
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import functional_ops
@@ -41,9 +42,9 @@ class LuOpTest(test.TestCase):
   def float_types(self):
     return set((np.float64, np.float32, np.complex64, np.complex128))
 
-  def _verifyLuBase(self, sess, x, lower, upper, perm, verification,
+  def _verifyLuBase(self, x, lower, upper, perm, verification,
                     output_idx_type):
-    lower_np, upper_np, perm_np, verification_np = sess.run(
+    lower_np, upper_np, perm_np, verification_np = self.evaluate(
         [lower, upper, perm, verification])
 
     self.assertAllClose(x, verification_np)
@@ -65,7 +66,7 @@ class LuOpTest(test.TestCase):
 
   def _verifyLu(self, x, output_idx_type=dtypes.int64):
     # Verify that Px = LU.
-    with self.cached_session(use_gpu=True) as sess:
+    with test_util.use_gpu():
 
       lu, perm = linalg_ops.lu(x, output_idx_type=output_idx_type)
 
@@ -121,7 +122,7 @@ class LuOpTest(test.TestCase):
         verification = array_ops.reshape(permuted_verification_reshaped,
                                          lu_shape)
 
-      self._verifyLuBase(sess, x, lower, upper, perm, verification,
+      self._verifyLuBase(x, lower, upper, perm, verification,
                          output_idx_type)
 
   def testBasic(self):
@@ -139,7 +140,7 @@ class LuOpTest(test.TestCase):
         self._verifyLu(complex_data, output_idx_type=output_idx_type)
 
   def testPivoting(self):
-    with self.session(use_gpu=True) as sess:
+    with test_util.use_gpu():
       # This matrix triggers partial pivoting because the first diagonal entry
       # is small.
       data = np.array([[1e-9, 1., 0.], [1., 0., 0], [0., 1., 5]])
@@ -148,7 +149,7 @@ class LuOpTest(test.TestCase):
       for dtype in (np.float32, np.float64):
         self._verifyLu(data.astype(dtype))
         _, p = linalg_ops.lu(data)
-        p_val = sess.run([p])
+        p_val = self.evaluate([p])
         # Make sure p_val is not the identity permutation.
         self.assertNotAllClose(np.arange(3), p_val)
 
@@ -158,7 +159,7 @@ class LuOpTest(test.TestCase):
         complex_data += data
         self._verifyLu(complex_data)
         _, p = linalg_ops.lu(data)
-        p_val = sess.run([p])
+        p_val = self.evaluate([p])
         # Make sure p_val is not the identity permutation.
         self.assertNotAllClose(np.arange(3), p_val)
 
@@ -166,15 +167,15 @@ class LuOpTest(test.TestCase):
     # LU factorization gives an error when the input is singular.
     # Note: A singular matrix may return without error but it won't be a valid
     # factorization.
-    with self.session(use_gpu=True) as sess:
+    with test_util.use_gpu():
       for dtype in self.float_types:
         with self.assertRaises(errors.InvalidArgumentError):
-          sess.run(
+          self.evaluate(
               linalg_ops.lu(
                   np.array([[1., 2., 3.], [2., 4., 6.], [2., 3., 4.]],
                            dtype=dtype)))
         with self.assertRaises(errors.InvalidArgumentError):
-          sess.run(
+          self.evaluate(
               linalg_ops.lu(
                   np.array([[[1., 2., 3.], [2., 4., 6.], [1., 2., 3.]],
                             [[1., 2., 3.], [3., 4., 5.], [5., 6., 7.]]],
@@ -212,17 +213,19 @@ class LuOpTest(test.TestCase):
     data = np.random.rand(n, n) + 1j * np.random.rand(n, n)
     self._verifyLu(data)
 
+  @test_util.run_v1_only("b/120545219")
   def testEmpty(self):
     self._verifyLu(np.empty([0, 2, 2]))
     self._verifyLu(np.empty([2, 0, 0]))
 
+  @test_util.run_deprecated_v1
   def testConcurrentExecutesWithoutError(self):
-    with self.session(use_gpu=True) as sess:
+    with test_util.use_gpu():
       matrix1 = random_ops.random_normal([5, 5], seed=42)
       matrix2 = random_ops.random_normal([5, 5], seed=42)
       lu1, p1 = linalg_ops.lu(matrix1)
       lu2, p2 = linalg_ops.lu(matrix2)
-      lu1_val, p1_val, lu2_val, p2_val = sess.run([lu1, p1, lu2, p2])
+      lu1_val, p1_val, lu2_val, p2_val = self.evaluate([lu1, p1, lu2, p2])
       self.assertAllEqual(lu1_val, lu2_val)
       self.assertAllEqual(p1_val, p2_val)
 
diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
index cd99f1774a..d31ecbcd3f 100644
--- a/tensorflow/python/kernel_tests/matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import operator
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
@@ -224,7 +225,8 @@ class MatMulInfixOperatorTest(test_lib.TestCase):
 if __name__ == "__main__":
   sizes = [1, 3, 5]
   trans_options = [[False, False], [True, False], [False, True]]
-  for use_static_shape in [False, True]:
+  # TF2 does not support placeholders under eager so we skip it
+  for use_static_shape in set([True, tf2.enabled()]):
     for dtype in (np.int32, np.int64, np.float16, np.float32, np.float64,
                   np.complex64, np.complex128):
       if not use_static_shape and (dtype == np.int32 or dtype == np.int64):
diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
index 323d14b7d8..ce782dbc28 100644
--- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
+++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
@@ -161,7 +161,7 @@ def _GetSelfAdjointEigTest(dtype_, shape_, compute_v_):
             math_ops.matmul(tf_v, array_ops.matrix_diag(tf_e)),
             tf_v,
             adjoint_b=True)
-        self.assertAllClose(a_ev.eval(), a, atol=atol)
+        self.assertAllClose(self.evaluate(a_ev), a, atol=atol)
 
         # Compare to numpy.linalg.eigh.
         CompareEigenDecompositions(self, np_e, np_v, self.evaluate(tf_e),
@@ -169,7 +169,7 @@ def _GetSelfAdjointEigTest(dtype_, shape_, compute_v_):
       else:
         tf_e = linalg_ops.self_adjoint_eigvals(constant_op.constant(a))
         self.assertAllClose(
-            np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
+            np.sort(np_e, -1), np.sort(self.evaluate(tf_e), -1), atol=atol)
 
   return Test
 
diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py
index 123c9b376c..2894b7dd50 100644
--- a/tensorflow/python/kernel_tests/tensordot_op_test.py
+++ b/tensorflow/python/kernel_tests/tensordot_op_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -99,7 +100,7 @@ class TensordotTest(test_lib.TestCase):
 
         tf_a = array_ops.ones((3, 3), dtype=dtypes.float32)
         tf_b = constant_op.constant([2, 3, 1], dtype=dtypes.float32)[None, None]
-        tf_ans = math_ops.tensordot(tf_a, tf_b, axes_value).eval()
+        tf_ans = math_ops.tensordot(tf_a, tf_b, axes_value)
 
         self.assertAllEqual(tf_ans.shape, np_ans.shape)
         self.assertAllEqual(tf_ans, np_ans)
@@ -178,7 +179,7 @@ def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
                   axes: (a_dims_np, b_dims_np)
               })
         else:
-          tf_ans = math_ops.tensordot(a_np, b_np, (a_dims_np, b_dims_np)).eval()
+          tf_ans = math_ops.tensordot(a_np, b_np, (a_dims_np, b_dims_np))
       self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol)
       self.assertAllEqual(tf_ans.shape, np_ans.shape)
 
@@ -208,7 +209,7 @@ def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
           c = math_ops.tensordot(a, b, axes=axes)
           tf_ans = sess.run(c, feed_dict={a: a_np, b: b_np})
         else:
-          tf_ans = math_ops.tensordot(a_np, b_np, axes=axes).eval()
+          tf_ans = math_ops.tensordot(a_np, b_np, axes=axes)
       self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol)
       self.assertAllEqual(tf_ans.shape, np_ans.shape)
 
@@ -220,7 +221,8 @@ if __name__ == "__main__":
     for rank_a in 1, 2, 4, 5:
       for rank_b in 1, 2, 4, 5:
         for num_dims in range(0, min(rank_a, rank_b) + 1):
-          for dynamic_shape in False, True:
+          # TF2 does not support placeholders under eager so we skip it
+          for dynamic_shape in set([False, not tf2.enabled()]):
             for testcase in _get_tensordot_tests(dtype, rank_a, rank_b,
                                                  num_dims, dynamic_shape):
               name = "%s_%s_%s_%s_%s_%s" % (testcase.__name__, dtype.__name__,
-- 
GitLab


From 71ea120a1bdfea823c4e3d27c55c2d2b885b147d Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Fri, 7 Dec 2018 14:09:06 -0800
Subject: [PATCH 227/873] Use eager_py_func instead of the deprecated py_func.
 Add a bit of extra logic to account for the potential use of tf.print in
 Python 2.

PiperOrigin-RevId: 224574979
---
 .../python/autograph/operators/py_builtins.py      |  2 ++
 tensorflow/python/autograph/pyct/parser.py         | 13 ++++++++++++-
 tensorflow/python/autograph/utils/py_func.py       |  5 +++--
 tensorflow/python/autograph/utils/py_func_test.py  | 14 +++++++-------
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/autograph/operators/py_builtins.py b/tensorflow/python/autograph/operators/py_builtins.py
index 2f55d53892..ddf05f73f3 100644
--- a/tensorflow/python/autograph/operators/py_builtins.py
+++ b/tensorflow/python/autograph/operators/py_builtins.py
@@ -174,6 +174,7 @@ def _tf_py_func_print(objects, kwargs):
     override_kwargs['flush'] = True
 
   def print_wrapper(*vals):
+    vals = tuple(v.numpy() if tensor_util.is_tensor(v) else v for v in vals)
     if six.PY3:
       # TensorFlow doesn't seem to generate Unicode when passing strings to
       # py_func. This causes the print to add a "b'" wrapper to the output,
@@ -193,6 +194,7 @@ def range_(start_or_stop, stop=UNDEFINED, step=UNDEFINED):
 
 
 def _tf_range(start_or_stop, stop, step):
+  """Overload of range_ that generates a TF range tensor."""
   # Note: for static inputs (e.g. constants), tf.range errors out at graph
   # construction time, instead of returning an empty tensor. Preventing the
   # graph construction error aligns the semantics with Python.
diff --git a/tensorflow/python/autograph/pyct/parser.py b/tensorflow/python/autograph/pyct/parser.py
index 8f4037c5e2..39fc1a7ed0 100644
--- a/tensorflow/python/autograph/pyct/parser.py
+++ b/tensorflow/python/autograph/pyct/parser.py
@@ -24,6 +24,7 @@ from __future__ import print_function
 import textwrap
 
 import gast
+import six
 
 from tensorflow.python.util import tf_inspect
 
@@ -91,7 +92,17 @@ def parse_entity(entity):
 def parse_str(src):
   """Returns the AST of given piece of code."""
   # TODO(mdan): This should exclude the module things are autowrapped in.
-  return gast.parse(src)
+
+  if six.PY2 and '.print(' in src:
+    # This special treatment is required because gast.parse is not aware of
+    # whether print_function was present in the original context.
+    src = 'from __future__ import print_function\n' + src
+    parsed_module = gast.parse(src)
+    parsed_module.body = parsed_module.body[1:]
+  else:
+    parsed_module = gast.parse(src)
+
+  return parsed_module
 
 
 def parse_expression(src):
diff --git a/tensorflow/python/autograph/utils/py_func.py b/tensorflow/python/autograph/utils/py_func.py
index 11ebfb2e49..ee8b46b520 100644
--- a/tensorflow/python/autograph/utils/py_func.py
+++ b/tensorflow/python/autograph/utils/py_func.py
@@ -127,5 +127,6 @@ def wrap_py_func(f, return_dtypes, args, kwargs=None, use_dummy_return=False):
     retval = f(*f_args, **f_kwargs)
     return 1 if use_dummy_return else retval
 
-  return script_ops.py_func(f_wrapper, tensor_args, dtypes.int64
-                            if use_dummy_return else return_dtypes)
+  if use_dummy_return:
+    return_dtypes = dtypes.int32
+  return script_ops.eager_py_func(f_wrapper, tensor_args, return_dtypes)
diff --git a/tensorflow/python/autograph/utils/py_func_test.py b/tensorflow/python/autograph/utils/py_func_test.py
index 28cefd8c3e..d17ede7714 100644
--- a/tensorflow/python/autograph/utils/py_func_test.py
+++ b/tensorflow/python/autograph/utils/py_func_test.py
@@ -32,13 +32,13 @@ class PyFuncTest(test.TestCase):
       return a + b + c
 
     with self.cached_session() as sess:
-      result = py_func.wrap_py_func(test_fn, dtypes.int64,
+      result = py_func.wrap_py_func(test_fn, dtypes.int32,
                                     (1, constant_op.constant(1), 1))
       self.assertEqual(3, self.evaluate(result))
-      result = py_func.wrap_py_func(test_fn, dtypes.int64, (1, 1, 1))
+      result = py_func.wrap_py_func(test_fn, dtypes.int32, (1, 1, 1))
       self.assertEqual(3, self.evaluate(result))
       result = py_func.wrap_py_func(
-          test_fn, dtypes.int64,
+          test_fn, dtypes.int32,
           (constant_op.constant(1), 1, constant_op.constant(1)))
       self.assertEqual(3, self.evaluate(result))
 
@@ -53,9 +53,9 @@ class PyFuncTest(test.TestCase):
       return a * b.foo
 
     with self.cached_session() as sess:
-      result = py_func.wrap_py_func(test_fn, dtypes.int64, (7, TestClass()))
+      result = py_func.wrap_py_func(test_fn, dtypes.int32, (7, TestClass()))
       self.assertEqual(35, self.evaluate(result))
-      result = py_func.wrap_py_func(test_fn, dtypes.int64,
+      result = py_func.wrap_py_func(test_fn, dtypes.int32,
                                     (constant_op.constant(7), TestClass()))
       self.assertEqual(35, self.evaluate(result))
 
@@ -70,12 +70,12 @@ class PyFuncTest(test.TestCase):
       return a * b.foo + c * d.foo
 
     with self.cached_session() as sess:
-      result = py_func.wrap_py_func(test_fn, dtypes.int64, (7, TestClass(5)), {
+      result = py_func.wrap_py_func(test_fn, dtypes.int32, (7, TestClass(5)), {
           'c': 11,
           'd': TestClass(13)
       })
       self.assertEqual(178, self.evaluate(result))
-      result = py_func.wrap_py_func(test_fn, dtypes.int64,
+      result = py_func.wrap_py_func(test_fn, dtypes.int32,
                                     (constant_op.constant(7), TestClass(5)), {
                                         'c': constant_op.constant(11),
                                         'd': TestClass(13)
-- 
GitLab


From 093f0363238c773255c580ea1390e9fdf666d69c Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 7 Dec 2018 14:14:36 -0800
Subject: [PATCH 228/873] Utility to run tests inside tf.function and eager.

Relies on being able to run the assert* test methods inside a
py_func to run them inside the graph, so there's no need for
self.evaluate or similar methods which create a graph/eager
hybrid programming model.

PiperOrigin-RevId: 224575790
---
 tensorflow/python/eager/function_test.py      |  9 +--
 tensorflow/python/framework/test_util.py      | 72 ++++++++++++++++++-
 .../kernel_tests/variable_scope_test.py       |  5 +-
 3 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 71afbd24d8..e0854b0632 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -428,20 +428,21 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.evaluate(variables.global_variables_initializer())
     self.assertEqual(self.evaluate(value), 2.0)
 
-  @test_util.run_in_graph_and_eager_modes
+  @test_util.also_run_as_tf_function
   def testInitScopeTensorInitializationInFunction(self):
 
     @def_function.function
     def tensor_init():
       with ops.init_scope():
         const = constant_op.constant(2.0)
+      # Note: this variable bypasses tf.function's variable creation
+      # requirements by bypassing variable_creator_scope by using
+      # ResourceVariable instead of Variable.
       self.v = resource_variable_ops.ResourceVariable(const)
       return self.v.read_value()
 
     value = tensor_init()
-    if not context.executing_eagerly():
-      self.evaluate(variables.global_variables_initializer())
-    self.assertEqual(self.evaluate(value), 2.0)
+    self.assertAllEqual(value, 2.0)
 
   def testDefunShapeInferenceWithCapturedResourceVariable(self):
     v = resource_variable_ops.ResourceVariable([[1, 2], [3, 4]])
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index b0c3c9b506..06316ce2e9 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -54,6 +54,7 @@ from tensorflow.python import tf2
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import device as pydev
 from tensorflow.python.framework import dtypes
@@ -67,6 +68,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import versions
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
@@ -76,6 +78,7 @@ from tensorflow.python.util import compat
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import memory
 from tensorflow.python.util import nest
+from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.protobuf import compare
 from tensorflow.python.util.tf_export import tf_export
@@ -1009,6 +1012,58 @@ def run_in_graph_and_eager_modes(func=None,
   return decorator
 
 
+def py_func_if_in_function(f):
+
+  def decorated(*args, **kwds):
+    if not ops.get_default_graph()._building_function:
+      return f(*args, **kwds)
+
+    tensor_args, tensor_indices = zip(
+        *[(x, i) for i, x in enumerate(args)
+          if isinstance(x, (ops.Tensor, variables.Variable))])
+
+    def inner_f(*inner_tensor_args):
+      my_args = list(args)
+      for i, n in zip(tensor_indices, inner_tensor_args):
+        my_args[i] = n
+      return f(*my_args, **kwds)
+
+    return script_ops.py_func(inner_f, tensor_args, [])
+
+  return tf_decorator.make_decorator(f, decorated)
+
+
+def also_run_as_tf_function(f):
+  """Runs the decorated test twice--once as is, once inside a tf.function.
+
+  This allows you to run a test both in eager execution and inside a
+  tf.function, exercising the two execution modes supported in tf 2.0. The test
+  assertions are automatically done inside tf.py_funcs, and tf.function ensures
+  that they run in the proper order and with the proper side effects.
+
+  Currently variable creation is not supported in tests annotated with this
+  decorator since it's tricky to ensure the variable doesn't get repeatedly
+  created when retracing the tf.function.
+
+  Args:
+    f: the test method to be decorated
+
+  Returns:
+    The decorated test method, which will run both in eager and inside a
+    tf.function.
+  """
+
+  def decorated(*args, **kwds):
+    with context.eager_mode():
+      # Running in eager mode
+      f(*args, **kwds)
+
+      defun_f = def_function.function(f)
+      defun_f(*args, **kwds)
+
+  return decorated
+
+
 def run_deprecated_v1(func=None):
   """Execute the decorated test in graph mode.
 
@@ -1783,8 +1838,8 @@ class TensorFlowTestCase(googletest.TestCase):
     return ret
 
 
-# pylint: enable=invalid-name
-
+  # pylint: enable=invalid-name
+  @py_func_if_in_function
   def assertNear(self, f1, f2, err, msg=None):
     """Asserts that two floats are near each other.
 
@@ -1803,6 +1858,7 @@ class TensorFlowTestCase(googletest.TestCase):
         "%f != %f +/- %f%s" % (f1, f2, err, " (%s)" % msg
                                if msg is not None else ""))
 
+  @py_func_if_in_function
   def assertArrayNear(self, farray1, farray2, err, msg=None):
     """Asserts that two float arrays are near each other.
 
@@ -1822,6 +1878,7 @@ class TensorFlowTestCase(googletest.TestCase):
   def _NDArrayNear(self, ndarray1, ndarray2, err):
     return np.linalg.norm(ndarray1 - ndarray2) < err
 
+  @py_func_if_in_function
   def assertNDArrayNear(self, ndarray1, ndarray2, err, msg=None):
     """Asserts that two numpy arrays have near values.
 
@@ -1959,6 +2016,7 @@ class TensorFlowTestCase(googletest.TestCase):
         e.args = ((e.args[0] + " : " + msg,) + e.args[1:])
         raise
 
+  @py_func_if_in_function
   def assertAllClose(self, a, b, rtol=1e-6, atol=1e-6, msg=None):
     """Asserts that two structures of numpy arrays or Tensors, have near values.
 
@@ -1984,6 +2042,7 @@ class TensorFlowTestCase(googletest.TestCase):
     """
     self._assertAllCloseRecursive(a, b, rtol=rtol, atol=atol, msg=msg)
 
+  @py_func_if_in_function
   def assertAllCloseAccordingToType(self,
                                     a,
                                     b,
@@ -2031,6 +2090,7 @@ class TensorFlowTestCase(googletest.TestCase):
 
     self.assertAllClose(a, b, rtol=rtol, atol=atol, msg=msg)
 
+  @py_func_if_in_function
   def assertNotAllClose(self, a, b, **kwargs):
     """Assert that two numpy arrays, or or Tensors, do not have near values.
 
@@ -2049,6 +2109,7 @@ class TensorFlowTestCase(googletest.TestCase):
       return
     raise AssertionError("The two values are close at all elements")
 
+  @py_func_if_in_function
   def assertAllEqual(self, a, b, msg=None):
     """Asserts that two numpy arrays or Tensors have the same values.
 
@@ -2091,6 +2152,7 @@ class TensorFlowTestCase(googletest.TestCase):
       msgs.append("not equal rhs = {}".format(y))
       np.testing.assert_array_equal(a, b, err_msg="\n".join(msgs))
 
+  @py_func_if_in_function
   def assertAllGreater(self, a, comparison_target):
     """Assert element values are all greater than a target value.
 
@@ -2102,6 +2164,7 @@ class TensorFlowTestCase(googletest.TestCase):
     a = self._GetNdArray(a)
     self.assertGreater(np.min(a), comparison_target)
 
+  @py_func_if_in_function
   def assertAllLess(self, a, comparison_target):
     """Assert element values are all less than a target value.
 
@@ -2113,6 +2176,7 @@ class TensorFlowTestCase(googletest.TestCase):
     a = self._GetNdArray(a)
     self.assertLess(np.max(a), comparison_target)
 
+  @py_func_if_in_function
   def assertAllGreaterEqual(self, a, comparison_target):
     """Assert element values are all greater than or equal to a target value.
 
@@ -2124,6 +2188,7 @@ class TensorFlowTestCase(googletest.TestCase):
     a = self._GetNdArray(a)
     self.assertGreaterEqual(np.min(a), comparison_target)
 
+  @py_func_if_in_function
   def assertAllLessEqual(self, a, comparison_target):
     """Assert element values are all less than or equal to a target value.
 
@@ -2166,6 +2231,7 @@ class TensorFlowTestCase(googletest.TestCase):
       lines.append(prefix + "...")
     return lines
 
+  @py_func_if_in_function
   def assertAllInRange(self,
                        target,
                        lower_bound,
@@ -2224,6 +2290,7 @@ class TensorFlowTestCase(googletest.TestCase):
           "Subscript(s) and value(s) of the offending elements:\n" +
           "\n".join(self._format_subscripts(violation_subscripts, target)))
 
+  @py_func_if_in_function
   def assertAllInSet(self, target, expected_set):
     """Assert that elements of a Tensor are all in a given closed set.
 
@@ -2245,6 +2312,7 @@ class TensorFlowTestCase(googletest.TestCase):
       raise AssertionError("%d unique element(s) are not in the set %s: %s" %
                            (np.size(diff), expected_set, diff))
 
+  @py_func_if_in_function
   def assertDTypeEqual(self, target, expected_dtype):
     """Assert ndarray data type is equal to expected.
 
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 44d4bd5e30..451eb38530 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -237,7 +237,8 @@ class VariableScopeTest(test.TestCase):
         _ = d2(x)
         self.assertEqual(len(d2.variables), 2)
         v3, v4 = d2.variables
-        self.assertAllEqual([v1, v2], [v3, v4])
+        self.assertEqual(v1, v3)
+        self.assertEqual(v2, v4)
       f()
 
   # TODO(mihaimaruseac): Not converted to use wrap_function because of
@@ -1684,7 +1685,7 @@ class VariableScopeWithCustomGetterTest(test.TestCase):
       with variable_scope.variable_creator_scope(creator_b):
         variable_scope.variable(1.0, name="one_name")
 
-    self.assertAllEqual(variable_names, ["forced_name"])
+    self.assertEqual(variable_names[0], "forced_name")
 
     called = [False]
 
-- 
GitLab


From 2a050766bf0556d7d92eea62d40fd2bebbcb399f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 14:18:35 -0800
Subject: [PATCH 229/873] human_readable_json.h uses the correct switched
 namespace protobuf but the implementation directly references
 google::protobuf. Fix it so that it agrees with its header file.

PiperOrigin-RevId: 224576410
---
 tensorflow/core/platform/default/human_readable_json.cc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/platform/default/human_readable_json.cc b/tensorflow/core/platform/default/human_readable_json.cc
index 9f97c8272c..bf9c7b7620 100644
--- a/tensorflow/core/platform/default/human_readable_json.cc
+++ b/tensorflow/core/platform/default/human_readable_json.cc
@@ -20,7 +20,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
+Status ProtoToHumanReadableJson(const protobuf::Message& proto,
                                 string* result) {
 #ifdef TENSORFLOW_LITE_PROTOS
   *result = "[human readable output not available on Android]";
@@ -28,7 +28,7 @@ Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
 #else
   result->clear();
 
-  auto status = google::protobuf::util::MessageToJsonString(proto, result);
+  auto status = protobuf::util::MessageToJsonString(proto, result);
   if (!status.ok()) {
     // Convert error_msg google::protobuf::StringPiece to
     // tensorflow::StringPiece.
@@ -41,8 +41,7 @@ Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
 #endif
 }
 
-Status HumanReadableJsonToProto(const string& str,
-                                ::google::protobuf::Message* proto) {
+Status HumanReadableJsonToProto(const string& str, protobuf::Message* proto) {
 #ifdef TENSORFLOW_LITE_PROTOS
   return errors::Internal("Cannot parse JSON protos on Android");
 #else
-- 
GitLab


From 9ed800cea0d5872511c1b45708d0e8970e4fc958 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 7 Dec 2018 14:23:42 -0800
Subject: [PATCH 230/873] Bug fix for unified LSTM with time major and
 go_backward combined.

Also added test case to cover that.

PiperOrigin-RevId: 224577299
---
 tensorflow/python/keras/layers/recurrent.py   |  2 +-
 .../python/keras/layers/unified_lstm_test.py  | 44 +++++++++++++++++++
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index fdc2acd538..86a69e45d9 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -2704,7 +2704,7 @@ class UnifiedLSTM(LSTM):
       # both normal and CuDNN implementations.
       if self.go_backwards:
         # Reverse time axis.
-        inputs = K.reverse(inputs, 1)
+        inputs = K.reverse(inputs, 0 if self.time_major else 1)
 
       if 0 < self.dropout < 1:
         if self._dropout_mask is None:
diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 8cbc370633..33351948ee 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -298,6 +298,50 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     self.assertAllClose(y_1, y_2)
     self.assertAllClose(y_2, y_3)
 
+  @parameterized.named_parameters(
+      # test_name, time_major, go_backwards
+      ('normal', False, False),
+      ('time_major', True, False),
+      ('go_backwards', False, True),
+      ('both', True, True),
+  )
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_time_major_and_go_backward(self, time_major, go_backwards):
+    input_shape = 10
+    rnn_state_size = 8
+    timestep = 4
+    batch = 100
+
+    x_train = np.random.random((batch, timestep, input_shape))
+
+    def build_model(layer_cls):
+      inputs = keras.layers.Input(
+          shape=[timestep, input_shape], dtype=dtypes.float32)
+      layer = layer_cls(rnn_state_size,
+                        recurrent_activation='sigmoid',
+                        time_major=time_major,
+                        return_sequences=True,
+                        go_backwards=go_backwards)
+      if time_major:
+        converted_input = keras.layers.Lambda(
+            lambda t: array_ops.transpose(t, [1, 0, 2]))(inputs)
+        outputs = layer(converted_input)
+        outputs = keras.layers.Lambda(
+            lambda t: array_ops.transpose(t, [1, 0, 2]))(outputs)
+      else:
+        outputs = layer(inputs)
+      return keras.models.Model(inputs, outputs)
+
+    lstm_model = build_model(keras.layers.LSTM)
+    y_ref = lstm_model.predict(x_train)
+    weights = lstm_model.get_weights()
+
+    unified_lstm_model = build_model(keras.layers.UnifiedLSTM)
+    unified_lstm_model.set_weights(weights)
+    y = unified_lstm_model.predict(x_train)
+
+    self.assertAllClose(y, y_ref)
+
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_keras_model_with_lstm(self):
     input_shape = 10
-- 
GitLab


From 7eeb77ab979acf284d68fbd9b48d54152fc5d90b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 14:29:40 -0800
Subject: [PATCH 231/873] [TF:XLA] Rename misnamed variables in a test.

PiperOrigin-RevId: 224578284
---
 .../compiler/xla/tests/array_elementwise_ops_test.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
index f6be27bee2..915b456b52 100644
--- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
+++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc
@@ -329,13 +329,13 @@ TEST_P(ArrayElementwiseOpTestParamCount, AddManyValues) {
   Literal b_literal = LiteralUtil::CreateR1<float>({b_values});
   std::unique_ptr<GlobalData> b_data =
       client_->TransferToServer(b_literal).ConsumeValueOrDie();
-  auto b_constant = Parameter(&builder, 1, a_literal.shape(), "b_param");
-  auto b_param = ConstantR1<float>(&builder, b_values);
+  auto b_param = Parameter(&builder, 1, a_literal.shape(), "b_param");
+  auto b_constant = ConstantR1<float>(&builder, b_values);
 
-  auto sum1 = Add(a_constant, b_constant);
-  auto sum2 = Add(a_constant, b_param);
-  auto sum3 = Add(a_param, b_constant);
-  auto sum4 = Add(a_param, b_param);
+  auto sum1 = Add(a_constant, b_param);
+  auto sum2 = Add(a_constant, b_constant);
+  auto sum3 = Add(a_param, b_param);
+  auto sum4 = Add(a_param, b_constant);
 
   auto sum = Add(sum1, sum2);
   sum = Add(sum, sum3);
-- 
GitLab


From 9c5d513df64aa61e8e7a8ba3939e32fdd13f49f1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 14:41:30 -0800
Subject: [PATCH 232/873] Update the remote execution toolchain to make it work
 with Bazel 0.20.0

PiperOrigin-RevId: 224580438
---
 .../preconfig/win_1803/bazel_018/BUILD        |  7 ++++
 .../preconfig/win_1803/bazel_018/CROSSTOOL    | 36 -------------------
 2 files changed, 7 insertions(+), 36 deletions(-)

diff --git a/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD b/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
index c00f005e46..edd9583648 100644
--- a/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
+++ b/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
@@ -39,6 +39,9 @@ cc_toolchain_suite(
         "x64_windows|msvc-cl": ":cc-compiler-x64_windows",
         "x64_windows|msys-gcc": ":cc-compiler-x64_windows_msys",
         "x64_windows|mingw-gcc": ":cc-compiler-x64_windows_mingw",
+        "x64_windows_msys": ":cc-compiler-x64_windows_msys",
+        "x64_windows": ":cc-compiler-x64_windows",
+        "armeabi-v7a": ":cc-compiler-armeabi-v7a",
     },
 )
 
@@ -54,6 +57,7 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = ":empty",
     supports_param_files = 1,
+    toolchain_identifier = "msys_x64",
 )
 
 toolchain(
@@ -83,6 +87,7 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = ":empty",
     supports_param_files = 0,
+    toolchain_identifier = "msys_x64_mingw",
 )
 
 toolchain(
@@ -112,6 +117,7 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = ":empty",
     supports_param_files = 1,
+    toolchain_identifier = "msvc_x64",
 )
 
 toolchain(
@@ -140,6 +146,7 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = ":empty",
     supports_param_files = 1,
+    toolchain_identifier = "stub_armeabi-v7a",
 )
 
 toolchain(
diff --git a/third_party/toolchains/preconfig/win_1803/bazel_018/CROSSTOOL b/third_party/toolchains/preconfig/win_1803/bazel_018/CROSSTOOL
index 04c8bcae45..38a80c22da 100644
--- a/third_party/toolchains/preconfig/win_1803/bazel_018/CROSSTOOL
+++ b/third_party/toolchains/preconfig/win_1803/bazel_018/CROSSTOOL
@@ -14,42 +14,6 @@
 
 major_version: "local"
 minor_version: ""
-default_target_cpu: "same_as_host"
-
-default_toolchain {
-  cpu: "x64_windows"
-  toolchain_identifier: "msvc_x64"
-}
-
-default_toolchain {
-  cpu: "local"
-  toolchain_identifier: "stub_armeabi-v7a"
-}
-
-default_toolchain {
-  cpu: "armeabi-v7a"
-  toolchain_identifier: "stub_armeabi-v7a"
-}
-
-default_toolchain {
-  cpu: "x64_windows"
-  toolchain_identifier: "msvc_x64"
-}
-
-default_toolchain {
-  cpu: "x64_windows_msvc"
-  toolchain_identifier: "msvc_x64"
-}
-
-default_toolchain {
-  cpu: "x64_windows_msys"
-  toolchain_identifier: "msys_x64"
-}
-
-default_toolchain {
-  cpu: "s390x"
-  toolchain_identifier: "msys_x64"
-}
 
 # Android tooling requires a default toolchain for the armeabi-v7a cpu.
 toolchain {
-- 
GitLab


From 5c4a09cc0b0597aba582ccaf41cd23cb8c541d23 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Fri, 7 Dec 2018 14:50:34 -0800
Subject: [PATCH 233/873] Int8 dequantize implementation.

PiperOrigin-RevId: 224581893
---
 tensorflow/lite/kernels/dequantize.cc         | 27 +++++++++---
 tensorflow/lite/kernels/dequantize_test.cc    | 33 ++++++++++++---
 tensorflow/lite/kernels/internal/BUILD        |  1 +
 .../internal/reference/integer_ops/README.md  |  8 ++++
 .../reference/integer_ops/dequantize.h        | 42 +++++++++++++++++++
 tensorflow/lite/kernels/register.cc           |  4 +-
 tensorflow/lite/toco/tflite/operator.cc       | 24 +++++++++++
 7 files changed, 127 insertions(+), 12 deletions(-)
 create mode 100644 tensorflow/lite/kernels/internal/reference/integer_ops/README.md
 create mode 100644 tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h

diff --git a/tensorflow/lite/kernels/dequantize.cc b/tensorflow/lite/kernels/dequantize.cc
index b2825bb9ea..7f03c73c9c 100644
--- a/tensorflow/lite/kernels/dequantize.cc
+++ b/tensorflow/lite/kernels/dequantize.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/c_api_internal.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h"
 #include "tensorflow/lite/kernels/internal/tensor.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/kernels/op_macros.h"
@@ -57,7 +58,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   OpContext op_context(context, node);
 
-  TF_LITE_ENSURE(context, op_context.input->type == kTfLiteUInt8);
+  TF_LITE_ENSURE(context, op_context.input->type == kTfLiteUInt8 ||
+                              op_context.input->type == kTfLiteInt8);
 
   op_context.output->type = kTfLiteFloat32;
   // If the input tensor is constant, we can persist the dequantized value in
@@ -80,10 +82,25 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   tflite::DequantizationParams op_params;
   op_params.zero_point = op_context.input->params.zero_point;
   op_params.scale = op_context.input->params.scale;
-  optimized_ops::Dequantize(op_params, GetTensorShape(op_context.input),
-                            GetTensorData<uint8_t>(op_context.input),
-                            GetTensorShape(op_context.output),
-                            GetTensorData<float>(op_context.output));
+  switch (op_context.input->type) {
+    case kTfLiteUInt8:
+      optimized_ops::Dequantize(op_params, GetTensorShape(op_context.input),
+                                GetTensorData<uint8_t>(op_context.input),
+                                GetTensorShape(op_context.output),
+                                GetTensorData<float>(op_context.output));
+      break;
+    case kTfLiteInt8:
+      reference_integer_ops::Dequantize(
+          op_params, GetTensorShape(op_context.input),
+          GetTensorData<int8_t>(op_context.input),
+          GetTensorShape(op_context.output),
+          GetTensorData<float>(op_context.output));
+      break;
+    default:
+      context->ReportError(context, "Type %d not supported.",
+                           op_context.input->type);
+      return kTfLiteError;
+  }
 
   if (IsConstantTensor(op_context.input)) {
     op_data->float_dequantized_weights_initialized = true;
diff --git a/tensorflow/lite/kernels/dequantize_test.cc b/tensorflow/lite/kernels/dequantize_test.cc
index 55265d93e5..bb5f1e74a8 100644
--- a/tensorflow/lite/kernels/dequantize_test.cc
+++ b/tensorflow/lite/kernels/dequantize_test.cc
@@ -25,8 +25,16 @@ using ::testing::ElementsAreArray;
 
 class DequantizeOpModel : public SingleOpModel {
  public:
-  DequantizeOpModel(std::initializer_list<int> shape, float min, float max) {
-    input_ = AddInput({TensorType_UINT8, shape, min, max});
+  DequantizeOpModel(TensorType type, std::initializer_list<int> shape,
+                    float scale, int32_t zero_point) {
+    TensorData input_tensor_data;
+    input_tensor_data.type = type;
+    input_tensor_data.shape = shape;
+    input_tensor_data.min = 0;
+    input_tensor_data.max = 0;
+    input_tensor_data.scale = scale;
+    input_tensor_data.zero_point = zero_point;
+    input_ = AddInput(input_tensor_data);
     output_ = AddOutput({TensorType_FLOAT32, shape});
     SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions,
                  CreateDequantizeOptions(builder_).Union());
@@ -34,7 +42,8 @@ class DequantizeOpModel : public SingleOpModel {
     BuildInterpreter({GetShape(input_)});
   }
 
-  void SetInput(std::initializer_list<uint8_t> data) {
+  template <typename T>
+  void SetInput(std::initializer_list<T> data) {
     PopulateTensor(input_, data);
   }
 
@@ -45,10 +54,22 @@ class DequantizeOpModel : public SingleOpModel {
   int output_;
 };
 
-TEST(SplitOpTest, FourDimensional) {
-  DequantizeOpModel m({2, 5}, -63.5, 64);
+TEST(DequantizeOpTest, UINT8) {
+  // [-63.5, 64] -> scale=0.5 zero_point=127 for UINT8
+  DequantizeOpModel m(TensorType_UINT8, {2, 5}, 0.5, 127);
 
-  m.SetInput({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
+  m.SetInput<uint8>({0, 1, 2, 3, 4, 251, 252, 253, 254, 255});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear(
+                  {-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64})));
+}
+
+TEST(DequantizeOpTest, INT8) {
+  // [-63.5, 64] -> scale=0.5, zero_point=1 for INT8
+  DequantizeOpModel m(TensorType_INT8, {2, 5}, 0.5, -1);
+
+  m.SetInput<int8>({-128, -127, -126, -125, -124, 123, 124, 125, 126, 127});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(),
               ElementsAreArray(ArrayFloatNear(
diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index d2f7f455e4..69816583f5 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@@ -313,6 +313,7 @@ cc_library(
         "reference/depthwiseconv_float.h",
         "reference/depthwiseconv_uint8.h",
         "reference/fully_connected.h",
+        "reference/integer_ops/dequantize.h",
         "reference/reference_ops.h",
         "reference/softmax.h",
     ],
diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/README.md b/tensorflow/lite/kernels/internal/reference/integer_ops/README.md
new file mode 100644
index 0000000000..4b1d3c91d5
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/README.md
@@ -0,0 +1,8 @@
+This directory contains reference implementations for int8 fully integer kernels.
+
+Weight filters of convs are expected to be symmetric per-channel quantized in
+the range [-127, 127].
+Inputs/activations are expected to be asymmetric per-layer quantized in the
+range [-128, 127].
+
+THESE ARE EXPERIMENTAL AND PRONE TO CHANGE.
diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h b/tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h
new file mode 100644
index 0000000000..03dcb6c220
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/reference/integer_ops/dequantize.h
@@ -0,0 +1,42 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEQUANTIZE_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEQUANTIZE_H_
+
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace reference_integer_ops {
+
+inline void Dequantize(const tflite::DequantizationParams& op_params,
+                       const RuntimeShape& input_shape, const int8* input_data,
+                       const RuntimeShape& output_shape, float* output_data) {
+  const int32 zero_point = op_params.zero_point;
+  const double scale = op_params.scale;
+  const int flat_size = MatchingFlatSize(input_shape, output_shape);
+
+  for (int i = 0; i < flat_size; i++) {
+    const int32 val = input_data[i];
+    const float result = static_cast<float>(scale * (val - zero_point));
+    output_data[i] = result;
+  }
+}
+
+}  // namespace reference_integer_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_DEQUANTIZE_H_
diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc
index 3c60d281b3..c0e6f6994f 100644
--- a/tensorflow/lite/kernels/register.cc
+++ b/tensorflow/lite/kernels/register.cc
@@ -222,7 +222,9 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_LOG, Register_LOG());
   AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX());
   AddBuiltin(BuiltinOperator_CAST, Register_CAST());
-  AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE());
+  AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
   AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
   AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc
index e0faed4927..205af23da5 100644
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc
@@ -1482,6 +1482,30 @@ class TensorFlowUnsupported : public BaseOperator {
   const bool enable_select_tf_ops_;
 };
 
+class Dequantize
+    : public BuiltinOperator<DequantizeOperator, ::tflite::DequantizeOptions,
+                             ::tflite::BuiltinOptions_DequantizeOptions> {
+ public:
+  using BuiltinOperator::BuiltinOperator;
+
+  flatbuffers::Offset<TfLiteOptions> WriteOptions(
+      const TocoOperator& op,
+      flatbuffers::FlatBufferBuilder* builder) const override {
+    return ::tflite::CreateDequantizeOptions(*builder);
+  }
+
+  void ReadOptions(const TfLiteOptions& options,
+                   TocoOperator* op) const override {}
+
+  int GetVersion(const Operator& op) const override {
+    // TODO(suharshs): Dequantize now supports INT8 in addition to
+    // QUANTIZED_UINT8. When TOCO can create models with INT8, we need
+    // to find a way to see the type here and return version 2. Right now
+    // version 2 will only be added by post training quantization tools.
+    return 1;
+  }
+};
+
 namespace {
 // Build a vector containing all the known operators.
 std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
-- 
GitLab


From 6e010b0d414f4aca6c7e87fdada46b683b5c9846 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Fri, 7 Dec 2018 14:58:00 -0800
Subject: [PATCH 234/873] Addressing review comments

---
 tensorflow/core/graph/mkl_graph_util.h        |  8 -----
 tensorflow/core/graph/mkl_layout_pass.cc      |  6 ++--
 tensorflow/core/kernels/mkl_conv_ops.cc       | 36 +++++++++----------
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 12 +++----
 4 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h
index a599ce3620..990b2fe9b0 100644
--- a/tensorflow/core/graph/mkl_graph_util.h
+++ b/tensorflow/core/graph/mkl_graph_util.h
@@ -72,14 +72,6 @@ int inline GetTensorMetaDataIndex(int n, int total_tensors) {
   return DataIndexToMetaDataIndex(tidx, total_tensors);
 }
 
-// Helper function to compare fused_ops attribute strings
-// TODO(Intel) this code is also defined in mkl_conv_ops.h, we need to move to
-// mkl_util.h so we have only one version.
-inline bool CompareFusedOps(const std::vector<string>& fused_ops,
-                            const std::vector<string>& expected) {
-  return fused_ops == expected;
-}
-
 namespace mkl_op_registry {
 static const char* kMklOpLabel = "MklOp";
 static const char* kMklOpLabelPattern = "label='MklOp'";
diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc
index 6933b033b1..4c060f54ca 100644
--- a/tensorflow/core/graph/mkl_layout_pass.cc
+++ b/tensorflow/core/graph/mkl_layout_pass.cc
@@ -1131,9 +1131,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
 
     std::vector<string> fused_ops;
     TF_CHECK_OK(GetNodeAttr(n->def(), "fused_ops", &fused_ops));
-    return (CompareFusedOps(fused_ops, {"BiasAdd"}) ||
-            CompareFusedOps(fused_ops, {"Relu"}) ||
-            CompareFusedOps(fused_ops, {"BiasAdd", "Relu"}));
+    return (fused_ops == std::vector<string>{"BiasAdd"} ||
+            fused_ops == std::vector<string>{"Relu"} ||
+            fused_ops == std::vector<string>{"BiasAdd", "Relu"});
   }
 
   // Rewrites input node to a new node specified by its matching rewrite info.
diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc
index 4a4aaffead..d3bbb3d9e3 100644
--- a/tensorflow/core/kernels/mkl_conv_ops.cc
+++ b/tensorflow/core/kernels/mkl_conv_ops.cc
@@ -1099,12 +1099,12 @@ class MklConvOp : public OpKernel {
   }
 
  protected:
-  void FuseBiasAdd(bool fuse_bias_add) { fuse_biasadd_ = fuse_bias_add; }
-  void FuseRelu(bool fuse_relu) { fuse_relu_ = fuse_relu; }
+  void set_fuse_biasadd(bool fuse_biasadd) { fuse_biasadd_ = fuse_biasadd; }
+  void set_fuse_relu(bool fuse_relu) { fuse_relu_ = fuse_relu; }
 
-  // This method is called for the base class MklConvOp, which handles the
+  // This method is for the base class MklConvOp, which handles the
   // floating point implementation of Conv. The quantized conv implementations
-  // will use overiddern versions of this method.
+  // will use overidden versions of this method.
   virtual void ExtendConvFwdParams(OpKernelContext* context,
                                    MklConvFwdParams& params) {
     // Create a string from data types of input, filter, bias, and output.
@@ -1114,6 +1114,8 @@ class MklConvOp : public OpKernel {
     params.dtypes.append(typeid(Toutput).name());
 
     // Add fusions as post ops
+    // Note: Fusion of BiasAdd is handled directly inside MklConvOp by
+    // checking fuse_biasadd_ flag.
     if (fuse_relu_) params.post_op_params.push_back({"relu", {1.0, 0.0, 0.0}});
   }
 
@@ -1169,7 +1171,7 @@ class MklConvOp : public OpKernel {
   Padding padding_;
   TensorFormat data_format_;
 
-  // Initialize to value the template is instantiated with
+  // Initialize to values the template is instantiated with
   bool fuse_biasadd_ = biasEnabled;
   bool fuse_relu_ = false;
 
@@ -1177,11 +1179,6 @@ class MklConvOp : public OpKernel {
   const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
   const int kDilationH = 0, kDilationW = 1;
 
-  // Helper function to compare fused_ops attribute strings
-  bool CompareFusedOps(const std::vector<string>& fused_ops,
-                       const std::vector<string>& expected) {
-    return fused_ops == expected;
-  }
   // Allocate filter output tensor.
   void AllocateFilterOutputTensor(
       OpKernelContext* context,
@@ -1254,27 +1251,27 @@ class MklFusedConvOp : public MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput,
   explicit MklFusedConvOp(OpKernelConstruction* context)
       : MklConvOp<Device, Tinput, Tfilter, Tbias, Toutput, Ttemp_output, false>(
             context) {
-    // Since we came here through the registration of _MklFusedConv2D then get
+    // Since we came here through the registration of _MklFusedConv2D, get
     // all information from 'fused_ops' and 'num_args'
     std::vector<string> fused_ops;
     OP_REQUIRES_OK(context, context->GetAttr("fused_ops", &fused_ops));
 
     int num_args;
     OP_REQUIRES_OK(context, context->GetAttr("num_args", &num_args));
-    OP_REQUIRES(context, (num_args == 0 || !fused_ops.empty()),
+    OP_REQUIRES(context, !fused_ops.empty(),
                 errors::InvalidArgument(
                     "Fused Conv2D must have at least one fused op."));
 
-    if (CompareFusedOps(fused_ops, {"BiasAdd"})) {
-      this->FuseBiasAdd(true);
+    if (fused_ops == std::vector<string>{"BiasAdd"}) {
+      this->set_fuse_biasadd(true);
       OP_REQUIRES(context, num_args == 1,
                   errors::InvalidArgument(
                       "Fused Conv2D must have one extra argument: bias."));
-    } else if (CompareFusedOps(fused_ops, {"Relu"})) {
-      this->FuseRelu(true);
-    } else if (CompareFusedOps(fused_ops, {"BiasAdd", "Relu"})) {
-      this->FuseBiasAdd(true);
-      this->FuseRelu(true);
+    } else if (fused_ops == std::vector<string>{"Relu"}) {
+      this->set_fuse_relu(true);
+    } else if (fused_ops == std::vector<string>{"BiasAdd", "Relu"}) {
+      this->set_fuse_biasadd(true);
+      this->set_fuse_relu(true);
       OP_REQUIRES(context, num_args == 1,
                   errors::InvalidArgument(
                       "Fused Conv2D must have one extra argument: bias."));
@@ -1873,7 +1870,6 @@ TF_CALL_float(REGISTER_MKL_CPU_2D);
                               .TypeConstraint<T>("T")               \
                               .Label(mkl_op_registry::kMklOpLabel), \
                           MklFusedConvOp<CPUDevice, T, T, T, T, T>);
-// Note we are registering _MklFusedConv2D.
 // We check the fused_ops attributes to decide if bias is enabled or not.
 
 TF_CALL_float(REGISTER_MKL_CPU_2D_FUSED);
diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index 7f1965de85..657b3e63ff 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -66,7 +66,7 @@ class ConvMklToTF : public OpsTestBase {
     PerformConversion(dtype, tensor, mkl_meta_tensor, &output);
     test::ExpectTensorNear<T>(expected, output, 1e-5);
   }
-  void TestBody(){};
+  void TestBody() {}
 };
 
 // Testing MKL's fused convolution ops
@@ -175,6 +175,8 @@ class MklFusedConv2DOpTest : public OpsTestBase {
 
     // Compare output to expected results
     const Tensor& output_tensor = *GetOutput(0);
+    // Index 2 will need to be changed if the number of outputs produced
+    // by MklConv2D change.
     const Tensor& output_meta_tensor = *GetOutput(2);
     ConvMklToTF<T> conv_comp;
     conv_comp.PerformConversion(dtype, output_tensor, output_meta_tensor,
@@ -207,7 +209,7 @@ class MklFusedConv2DOpTest : public OpsTestBase {
     ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
     ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
 
-    test::ExpectTensorNear<T>(conv_2d, fused_conv_2d, 1e-5);
+    test::ExpectClose(conv_2d, fused_conv_2d);
   }
 
   // Verifies that computing Conv2D+BiasAdd in a graph is identical to
@@ -293,10 +295,8 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
   this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
 }
 
-REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,
-                           OneByOneConvolution,         //
-                           SpatialConvolution,          //
-                           OneByOneConvolutionAndRelu,  //
+REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolution,
+                           SpatialConvolution, OneByOneConvolutionAndRelu,
                            SpatialConvolutionAndRelu);
 
 using MklFusedBiasAddDataTypes = ::testing::Types<float>;
-- 
GitLab


From cd231a62a664e577aa5171d7b3a44a04867f2035 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 7 Dec 2018 15:05:59 -0800
Subject: [PATCH 235/873] [tf.data] Expose
 `tf.data.experimental.NestedStructure`.

PiperOrigin-RevId: 224584699
---
 .../python/data/experimental/__init__.py       |  2 ++
 tensorflow/python/data/util/structure.py       |  1 +
 ...w.data.experimental.-nested-structure.pbtxt | 18 ++++++++++++++++++
 .../v1/tensorflow.data.experimental.pbtxt      |  4 ++++
 ...w.data.experimental.-nested-structure.pbtxt | 18 ++++++++++++++++++
 .../v2/tensorflow.data.experimental.pbtxt      |  4 ++++
 6 files changed, 47 insertions(+)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-nested-structure.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-nested-structure.pbtxt

diff --git a/tensorflow/python/data/experimental/__init__.py b/tensorflow/python/data/experimental/__init__.py
index 365c53405c..ffc2e5ef5f 100644
--- a/tensorflow/python/data/experimental/__init__.py
+++ b/tensorflow/python/data/experimental/__init__.py
@@ -26,6 +26,7 @@ See [Importing Data](https://tensorflow.org/guide/datasets) for an overview.
 @@CheckpointInputPipelineHook
 @@CsvDataset
 @@DatasetStructure
+@@NestedStructure
 @@OptimizationOptions
 @@Optional
 @@OptionalStructure
@@ -121,6 +122,7 @@ from tensorflow.python.data.ops.dataset_ops import DatasetStructure
 from tensorflow.python.data.ops.iterator_ops import get_next_as_optional
 from tensorflow.python.data.ops.optional_ops import Optional
 from tensorflow.python.data.ops.optional_ops import OptionalStructure
+from tensorflow.python.data.util.structure import NestedStructure
 from tensorflow.python.data.util.structure import SparseTensorStructure
 from tensorflow.python.data.util.structure import Structure
 from tensorflow.python.data.util.structure import TensorStructure
diff --git a/tensorflow/python/data/util/structure.py b/tensorflow/python/data/util/structure.py
index 4f2c6cd853..5e3addacaa 100644
--- a/tensorflow/python/data/util/structure.py
+++ b/tensorflow/python/data/util/structure.py
@@ -273,6 +273,7 @@ def convert_legacy_structure(output_types, output_shapes, output_classes):
 # NOTE(mrry): The following classes make extensive use of non-public methods of
 # their base class, so we disable the protected-access lint warning once here.
 # pylint: disable=protected-access
+@tf_export("data.experimental.NestedStructure")
 class NestedStructure(Structure):
   """Represents a nested structure in which each leaf is a `Structure`."""
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-nested-structure.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-nested-structure.pbtxt
new file mode 100644
index 0000000000..b4b066e563
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-nested-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.NestedStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.NestedStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'nested_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
index 234507e5de..2d11590492 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "INFINITE_CARDINALITY"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "NestedStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "OptimizationOptions"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-nested-structure.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-nested-structure.pbtxt
new file mode 100644
index 0000000000..b4b066e563
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-nested-structure.pbtxt
@@ -0,0 +1,18 @@
+path: "tensorflow.data.experimental.NestedStructure"
+tf_class {
+  is_instance: "<class \'tensorflow.python.data.util.structure.NestedStructure\'>"
+  is_instance: "<class \'tensorflow.python.data.util.structure.Structure\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'nested_structure\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "from_value"
+    argspec: "args=[\'value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_compatible_with"
+    argspec: "args=[\'self\', \'other\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
index 234507e5de..2d11590492 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "INFINITE_CARDINALITY"
     mtype: "<type \'int\'>"
   }
+  member {
+    name: "NestedStructure"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "OptimizationOptions"
     mtype: "<type \'type\'>"
-- 
GitLab


From 47b751c9324522c0178536f792cc47a970f4f62b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 15:17:02 -0800
Subject: [PATCH 236/873] Changed CPU <-> NNAPI toggle UI element from
 ToggleButton to RadioButton.

PiperOrigin-RevId: 224586292
---
 .../Camera2BasicFragment.java                 | 41 ++++++++++++++-----
 .../tflitecamerademo/ImageClassifier.java     |  9 +++-
 .../layout-land/fragment_camera2_basic.xml    | 25 ++++++++---
 .../res/layout-v26/fragment_camera2_basic.xml | 29 +++++++++----
 .../res/layout/fragment_camera2_basic.xml     | 30 +++++++++-----
 .../demo/app/src/main/res/values/strings.xml  |  1 +
 6 files changed, 98 insertions(+), 37 deletions(-)

diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
index 3596e42011..20e96f586a 100644
--- a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
+++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
@@ -56,11 +56,10 @@ import android.view.Surface;
 import android.view.TextureView;
 import android.view.View;
 import android.view.ViewGroup;
-import android.widget.CompoundButton;
 import android.widget.NumberPicker;
+import android.widget.RadioButton;
 import android.widget.TextView;
 import android.widget.Toast;
-import android.widget.ToggleButton;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -87,10 +86,12 @@ public class Camera2BasicFragment extends Fragment
   private boolean runClassifier = false;
   private boolean checkedPermissions = false;
   private TextView textView;
-  private ToggleButton toggle;
   private NumberPicker np;
   private ImageClassifier classifier;
 
+  enum InferenceEngine { CPU, NNAPI };
+  private InferenceEngine inferenceEngine = InferenceEngine.CPU;
+
   /** Max preview width that is guaranteed by Camera2 API */
   private static final int MAX_PREVIEW_WIDTH = 1920;
 
@@ -303,14 +304,6 @@ public class Camera2BasicFragment extends Fragment
   public void onViewCreated(final View view, Bundle savedInstanceState) {
     textureView = (AutoFitTextureView) view.findViewById(R.id.texture);
     textView = (TextView) view.findViewById(R.id.text);
-    toggle = (ToggleButton) view.findViewById(R.id.button);
-
-    toggle.setOnCheckedChangeListener(
-        new CompoundButton.OnCheckedChangeListener() {
-          public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {
-            backgroundHandler.post(() -> classifier.setUseNNAPI(isChecked));
-          }
-        });
 
     np = (NumberPicker) view.findViewById(R.id.np);
     np.setMinValue(1);
@@ -323,6 +316,32 @@ public class Camera2BasicFragment extends Fragment
             backgroundHandler.post(() -> classifier.setNumThreads(newVal));
           }
         });
+
+    RadioButton cpuButton = (RadioButton) view.findViewById(R.id.radio_cpu);
+    cpuButton.setChecked(true);  // TFLite runs on CPU by default.
+    cpuButton.setOnClickListener(
+        new View.OnClickListener() {
+          @Override
+          public void onClick(View view) {
+            if (inferenceEngine == InferenceEngine.CPU) {
+              return;
+            }
+            inferenceEngine = InferenceEngine.CPU;
+            backgroundHandler.post(() -> classifier.useCPU());
+          }
+        });
+
+    ((RadioButton) view.findViewById(R.id.radio_nnapi)).setOnClickListener(
+        new View.OnClickListener() {
+          @Override
+          public void onClick(View view) {
+            if (inferenceEngine == InferenceEngine.NNAPI) {
+              return;
+            }
+            inferenceEngine = InferenceEngine.NNAPI;
+            backgroundHandler.post(() -> classifier.useNNAPI());
+          }
+        });
   }
 
   /** Load the model and labels. */
diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
index 39057aa776..700efc1c1a 100644
--- a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
+++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
@@ -163,8 +163,13 @@ public abstract class ImageClassifier {
     }
   }
 
-  public void setUseNNAPI(Boolean nnapi) {
-    tfliteOptions.setUseNNAPI(nnapi);
+  public void useCPU() {
+    tfliteOptions.setUseNNAPI(false);
+    recreateInterpreter();
+  }
+
+  public void useNNAPI() {
+    tfliteOptions.setUseNNAPI(true);
     recreateInterpreter();
   }
 
diff --git a/tensorflow/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml b/tensorflow/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml
index ef8a9e0845..ee71ab808f 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/layout-land/fragment_camera2_basic.xml
@@ -40,12 +40,27 @@
         android:scaleType="centerInside"
         android:src="@drawable/logo"/>
 
-    <ToggleButton
-        android:id="@+id/button"
+    <RadioGroup
+        android:gravity="center"
         android:layout_width="match_parent"
-        android:layout_height="wrap_content"
-        android:textOff="@string/tflite"
-        android:textOn="@string/nnapi"/>
+        android:layout_height="match_parent"
+        android:orientation="horizontal">
+        <RadioButton
+            android:id="@+id/radio_cpu"
+            android:background="#0000000f"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:text="@string/cpu"
+            android:textColor="@android:color/white" />
+        <RadioButton
+            android:id="@+id/radio_nnapi"
+            android:background="#0000000f"
+            android:layout_width="wrap_content"
+            android:layout_height="wrap_content"
+            android:text="@string/nnapi"
+            android:textColor="@android:color/white" />
+        </RadioGroup>
+
     <NumberPicker
         android:id="@+id/np"
         android:layout_width="wrap_content"
diff --git a/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml b/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml
index ddb099a950..19e0a9bab4 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml
@@ -80,15 +80,26 @@
             android:layout_marginLeft="10dp"
             android:theme="@style/AppTheme.Picker"
             android:visibility="visible" />
-        <ToggleButton
-            android:id="@+id/button"
-            android:textOff="@string/tflite"
-            android:textOn="@string/nnapi"
-            android:layout_width="wrap_content"
-            android:layout_height="wrap_content"
-            android:layout_marginLeft="10dp"
-            android:background="#0000000f"
-            android:textColor="@android:color/white" />
+        <RadioGroup
+            android:gravity="center"
+            android:layout_width="match_parent"
+            android:layout_height="match_parent"
+            android:orientation="horizontal">
+            <RadioButton
+                android:id="@+id/radio_cpu"
+                android:background="#0000000f"
+                android:layout_width="wrap_content"
+                android:layout_height="wrap_content"
+                android:text="@string/cpu"
+                android:textColor="@android:color/white" />
+            <RadioButton
+                android:id="@+id/radio_nnapi"
+                android:background="#0000000f"
+                android:layout_width="wrap_content"
+                android:layout_height="wrap_content"
+                android:text="@string/nnapi"
+                android:textColor="@android:color/white" />
+        </RadioGroup>
     </LinearLayout>
 
 
diff --git a/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml b/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
index e567009a42..be66eeac75 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
@@ -80,15 +80,25 @@
             android:layout_marginLeft="10dp"
             android:theme="@style/AppTheme.Picker"
             android:visibility="visible" />
-        <ToggleButton
-            android:id="@+id/button"
-            android:textOff="@string/tflite"
-            android:textOn="@string/nnapi"
-            android:layout_width="wrap_content"
-            android:layout_height="wrap_content"
-            android:layout_marginLeft="10dp"
-            android:background="#0000000f"
-            android:textColor="@android:color/white" />
-
+        <RadioGroup
+            android:gravity="center"
+            android:layout_width="match_parent"
+            android:layout_height="match_parent"
+            android:orientation="horizontal">
+            <RadioButton
+                android:id="@+id/radio_cpu"
+                android:background="#0000000f"
+                android:layout_width="wrap_content"
+                android:layout_height="wrap_content"
+                android:text="@string/cpu"
+                android:textColor="@android:color/white" />
+            <RadioButton
+                android:id="@+id/radio_nnapi"
+                android:background="#0000000f"
+                android:layout_width="wrap_content"
+                android:layout_height="wrap_content"
+                android:text="@string/nnapi"
+                android:textColor="@android:color/white" />
+        </RadioGroup>
     </LinearLayout>
 </RelativeLayout>
diff --git a/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml b/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml
index 29a033bcd4..45b12850e5 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml
@@ -23,4 +23,5 @@
     <string name="toggle">Use NNAPI</string>
     <string name="tflite">tflite</string>
     <string name="nnapi">NNAPI</string>
+    <string name="cpu">CPU</string>
 </resources>
-- 
GitLab


From c707bb2b4da16b9fdf228c3680cf279034c3e9fb Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 7 Dec 2018 15:38:23 -0800
Subject: [PATCH 237/873] [tf.data] Update datasets that use legacy structures
 to use `Structure`.

PiperOrigin-RevId: 224589556
---
 .../experimental/ops/get_single_element.py    | 12 ++---
 .../data/experimental/ops/prefetching_ops.py  | 26 +++------
 .../python/data/experimental/ops/scan_ops.py  | 53 ++++++++++---------
 3 files changed, 37 insertions(+), 54 deletions(-)

diff --git a/tensorflow/python/data/experimental/ops/get_single_element.py b/tensorflow/python/data/experimental/ops/get_single_element.py
index 73116edf12..d649a07012 100644
--- a/tensorflow/python/data/experimental/ops/get_single_element.py
+++ b/tensorflow/python/data/experimental/ops/get_single_element.py
@@ -18,8 +18,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
 from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.util.tf_export import tf_export
 
@@ -63,10 +61,8 @@ def get_single_element(dataset):
   if not isinstance(dataset, dataset_ops.DatasetV2):
     raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
 
-  nested_ret = nest.pack_sequence_as(
-      dataset.output_types, gen_dataset_ops.dataset_to_single_element(
-          dataset._as_variant_tensor(),  # pylint: disable=protected-access
+  # pylint: disable=protected-access
+  return dataset._element_structure._from_compatible_tensor_list(
+      gen_dataset_ops.dataset_to_single_element(
+          dataset._as_variant_tensor(),
           **dataset_ops.flat_structure(dataset)))
-  return sparse.deserialize_sparse_tensors(
-      nested_ret, dataset.output_types, dataset.output_shapes,
-      dataset.output_classes)
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
index aba1786646..e46dfb6568 100644
--- a/tensorflow/python/data/experimental/ops/prefetching_ops.py
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -20,8 +20,6 @@ from __future__ import print_function
 from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
 from tensorflow.python.eager import function
 from tensorflow.python.framework import device as framework_device
 from tensorflow.python.framework import dtypes
@@ -106,13 +104,6 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
     self._source_device_string = source_device
     self._source_device = ops.convert_to_tensor(source_device)
 
-    self._flat_output_shapes = nest.flatten(
-        sparse.as_dense_shapes(self._input_dataset.output_shapes,
-                               self._input_dataset.output_classes))
-    self._flat_output_types = nest.flatten(
-        sparse.as_dense_types(self._input_dataset.output_types,
-                              self._input_dataset.output_classes))
-
     @function.defun()
     def _init_func():
       """Creates an iterator for the input dataset.
@@ -123,8 +114,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
       # pylint: disable=protected-access
       ds_variant = self._input_dataset._as_variant_tensor()
       resource = gen_dataset_ops.anonymous_iterator(
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
+          **dataset_ops.flat_structure(self._input_dataset))
       with ops.control_dependencies(
           [gen_dataset_ops.make_iterator(ds_variant, resource)]):
         return gen_dataset_ops.iterator_to_string_handle(resource)
@@ -155,8 +145,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
         iterator = iterator_ops.Iterator.from_string_handle(
             string_handle, self.output_types, self.output_shapes,
             self.output_classes)
-      ret = iterator.get_next()
-      return nest.flatten(sparse.serialize_sparse_tensors(ret))
+      return self._element_structure._to_tensor_list(iterator.get_next())  # pylint: disable=protected-access
 
     next_func_concrete = _next_func._get_concrete_function_internal()  # pylint: disable=protected-access
 
@@ -166,7 +155,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
           target=self._source_device,
           args=[string_handle] +
           next_func_concrete.captured_inputs,
-          Tout=self._flat_output_types,
+          Tout=self._input_dataset._element_structure._flat_types,  # pylint: disable=protected-access
           f=next_func_concrete)
 
     self._next_func = _remote_next_func._get_concrete_function_internal()  # pylint: disable=protected-access
@@ -183,8 +172,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
       """
       iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
           string_handle,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
+          **dataset_ops.flat_structure(self._input_dataset))
       with ops.control_dependencies([
           resource_variable_ops.destroy_resource_op(
               iterator_resource, ignore_lookup_error=True)]):
@@ -196,8 +184,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
     def _remote_finalize_func(string_handle):
       return functional_ops.remote_call(
           target=self._source_device,
-          args=[string_handle] +
-          finalize_func_concrete.captured_inputs,
+          args=[string_handle] + finalize_func_concrete.captured_inputs,
           Tout=[dtypes.int64],
           f=finalize_func_concrete)
 
@@ -233,8 +220,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
           init_func=self._init_func,
           next_func=self._next_func,
           finalize_func=self._finalize_func,
-          output_types=self._flat_output_types,
-          output_shapes=self._flat_output_shapes)
+          **dataset_ops.flat_structure(self._input_dataset))
 
 
 class _MapOnGpuDataset(dataset_ops.UnaryDataset):
diff --git a/tensorflow/python/data/experimental/ops/scan_ops.py b/tensorflow/python/data/experimental/ops/scan_ops.py
index 661f2c5928..5c77ad7343 100644
--- a/tensorflow/python/data/experimental/ops/scan_ops.py
+++ b/tensorflow/python/data/experimental/ops/scan_ops.py
@@ -21,7 +21,6 @@ import collections
 
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
@@ -50,13 +49,7 @@ class _ScanDataset(dataset_ops.UnaryDataset):
     # Compute initial values for the state classes, shapes and types based on
     # the initial state. The shapes may be refined by running `tf_scan_func` one
     # or more times below.
-    self._state_classes = sparse.get_classes(self._initial_state)
-    self._state_shapes = nest.pack_sequence_as(
-        self._initial_state,
-        [t.get_shape() for t in nest.flatten(self._initial_state)])
-    self._state_types = nest.pack_sequence_as(
-        self._initial_state,
-        [t.dtype for t in nest.flatten(self._initial_state)])
+    self._state_structure = structure.Structure.from_value(self._initial_state)
 
     # Iteratively rerun the scan function until reaching a fixed point on
     # `self._state_shapes`.
@@ -66,9 +59,8 @@ class _ScanDataset(dataset_ops.UnaryDataset):
       wrapped_func = dataset_ops.StructuredFunctionWrapper(
           scan_func,
           self._transformation_name(),
-          input_classes=(self._state_classes, input_dataset.output_classes),
-          input_shapes=(self._state_shapes, input_dataset.output_shapes),
-          input_types=(self._state_types, input_dataset.output_types),
+          input_structure=structure.NestedStructure(
+              (self._state_structure, input_dataset._element_structure)),  # pylint: disable=protected-access
           add_to_graph=False)
       if not (
           isinstance(wrapped_func.output_types, collections.Sequence) and
@@ -76,35 +68,38 @@ class _ScanDataset(dataset_ops.UnaryDataset):
         raise TypeError("The scan function must return a pair comprising the "
                         "new state and the output value.")
 
-      new_state_classes, output_classes = wrapped_func.output_classes
+      new_state_classes, self._output_classes = wrapped_func.output_classes
 
       # Extract and validate class information from the returned values.
-      for new_state_class, state_class in zip(
+      new_state_classes, output_classes = wrapped_func.output_classes
+      old_state_classes = self._state_structure._to_legacy_output_classes()  # pylint: disable=protected-access
+      for new_state_class, old_state_class in zip(
           nest.flatten(new_state_classes),
-          nest.flatten(self._state_classes)):
-        if not issubclass(new_state_class, state_class):
+          nest.flatten(old_state_classes)):
+        if not issubclass(new_state_class, old_state_class):
           raise TypeError(
               "The element classes for the new state must match the initial "
               "state. Expected %s; got %s." %
-              (self._state_classes, new_state_classes))
+              (old_state_classes, new_state_classes))
 
       # Extract and validate type information from the returned values.
       new_state_types, output_types = wrapped_func.output_types
-      for new_state_type, state_type in zip(
-          nest.flatten(new_state_types), nest.flatten(self._state_types)):
-        if new_state_type != state_type:
+      old_state_types = self._state_structure._to_legacy_output_types()  # pylint: disable=protected-access
+      for new_state_type, old_state_type in zip(
+          nest.flatten(new_state_types), nest.flatten(old_state_types)):
+        if new_state_type != old_state_type:
           raise TypeError(
               "The element types for the new state must match the initial "
               "state. Expected %s; got %s." %
-              (self._state_types, new_state_types))
+              (old_state_types, new_state_types))
 
       # Extract shape information from the returned values.
       new_state_shapes, output_shapes = wrapped_func.output_shapes
-
+      old_state_shapes = self._state_structure._to_legacy_output_shapes()  # pylint: disable=protected-access
       self._structure = structure.convert_legacy_structure(
           output_types, output_shapes, output_classes)
 
-      flat_state_shapes = nest.flatten(self._state_shapes)
+      flat_state_shapes = nest.flatten(old_state_shapes)
       flat_new_state_shapes = nest.flatten(new_state_shapes)
       weakened_state_shapes = [
           original.most_specific_compatible_shape(new)
@@ -121,8 +116,13 @@ class _ScanDataset(dataset_ops.UnaryDataset):
           break
 
       if need_to_rerun:
-        self._state_shapes = nest.pack_sequence_as(self._state_shapes,
-                                                   weakened_state_shapes)
+        # TODO(b/110122868): Support a "most specific compatible structure"
+        # method for combining structures, to avoid using legacy structures
+        # in this method.
+        self._state_structure = structure.convert_legacy_structure(
+            old_state_types,
+            nest.pack_sequence_as(old_state_shapes, weakened_state_shapes),
+            old_state_classes)
 
     self._scan_func = wrapped_func
     self._scan_func.function.add_to_graph(ops.get_default_graph())
@@ -131,10 +131,11 @@ class _ScanDataset(dataset_ops.UnaryDataset):
     return [self._scan_func]
 
   def _as_variant_tensor(self):
-    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+    # pylint: disable=protected-access
+    input_t = self._input_dataset._as_variant_tensor()
     return gen_experimental_dataset_ops.experimental_scan_dataset(
         input_t,
-        nest.flatten(sparse.serialize_sparse_tensors(self._initial_state)),
+        self._state_structure._to_tensor_list(self._initial_state),
         self._scan_func.function.captured_inputs,
         f=self._scan_func.function,
         preserve_cardinality=True,
-- 
GitLab


From 078e8fa05791cc55e71a13ea33475a436e62dba5 Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Fri, 7 Dec 2018 15:58:35 -0800
Subject: [PATCH 238/873] [TF:XLA] Change ArCrsCombiner to use division instead
 of subtraction. This way, when we move the AR down the graph, the adjustment
 stays before the AR, not after, and the AR moves closer to the CRS. Then,
 it's easier to move the AR past multiple ops one-at-a-time in future CLs.

Also, some bug fixes in InstructionsComputeSameValue.

PiperOrigin-RevId: 224592476
---
 .../compiler/xla/service/ar_crs_combiner.cc   | 69 +++++++-------
 .../compiler/xla/service/ar_crs_combiner.h    |  6 +-
 .../xla/service/ar_crs_combiner_test.cc       | 94 +++++++++++++++++--
 3 files changed, 124 insertions(+), 45 deletions(-)

diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.cc b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
index 24de693822..362bc44a1c 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
@@ -60,7 +60,7 @@ absl::optional<HloInstruction*> MatchesArCrsPattern(
 
 absl::optional<HloInstruction*> ArCrsCombiner::WhileFromBodyParameter(
     HloInstruction* instruction) {
-  CHECK(HloOpcode::kParameter == instruction->opcode());
+  CHECK_EQ(HloOpcode::kParameter, instruction->opcode());
   HloComputation* computation = instruction->parent();
   auto caller_instructions = call_graph_->GetComputationCallers(computation);
   if (caller_instructions.size() == 1) {
@@ -120,7 +120,7 @@ bool ArCrsCombiner::TupleElementsComputeSameValue(
     return false;
   }
   for (auto tuple : tuples) {
-    CHECK(tuple->opcode() == HloOpcode::kTuple);
+    CHECK_EQ(tuple->opcode(), HloOpcode::kTuple);
     if (!InstructionsComputeSameValue(tuple->mutable_operand(i1),
                                       tuple->mutable_operand(i2),
                                       visited_pairs)) {
@@ -133,7 +133,7 @@ bool ArCrsCombiner::TupleElementsComputeSameValue(
 /* static */
 bool ArCrsCombiner::TestInstructionsComputeSameValue(HloInstruction* i1,
                                                      HloInstruction* i2) {
-  ArCrsCombiner combiner(/*num_spatial_partitions=*/2, /*num_replicas=*/1);
+  ArCrsCombiner combiner(/*num_spatial_partitions=*/2);
   auto module = i1->parent()->parent();
   CHECK_EQ(module, i2->parent()->parent());
   combiner.call_graph_ = CallGraph::Build(module);
@@ -160,13 +160,6 @@ bool ArCrsCombiner::InstructionsComputeSameValue(
   if (opcode1 != i2->opcode() || operands1.size() != i2->operands().size()) {
     return false;
   }
-  if (opcode1 == HloOpcode::kConstant || i1->IsCrossModuleAllReduce()) {
-    return i1->Identical(
-        *i2,
-        /*eq_operands=*/std::equal_to<const HloInstruction*>(),
-        /*eq_computations=*/std::equal_to<const HloComputation*>(),
-        /*layout_sensitive=*/false);
-  }
   visited_pairs->emplace(min_uid, max_uid);
   for (int i = 0; i < operands1.size(); ++i) {
     auto operand1 = operands1[i];
@@ -175,14 +168,28 @@ bool ArCrsCombiner::InstructionsComputeSameValue(
       return false;
     }
   }
+  if (opcode1 == HloOpcode::kParameter) {
+    // In the general case, we don't try to prove equality of parameters.
+    // We only try in the context of get-tuple-element
+    // (see TupleElementsComputeSameValue).
+    return false;
+  }
   if (opcode1 == HloOpcode::kGetTupleElement) {
-    if (i1->tuple_index() == i2->tuple_index()) {
-      return true;
-    }
-    return TupleElementsComputeSameValue(operands1[0], i1->tuple_index(),
+    return i1->tuple_index() == i2->tuple_index() ||
+           TupleElementsComputeSameValue(operands1[0], i1->tuple_index(),
                                          i2->tuple_index(), visited_pairs);
   }
-  return true;
+  // Don't check that the operands are identical, because Identical can
+  // return false for instructions that compute the same value but are not
+  // identical, which we don't want. We have checked the arguments with
+  // InstructionsComputeSameValue earlier.
+  auto eq_instructions = [](const HloInstruction* i1,
+                            const HloInstruction* i2) -> bool { return true; };
+  auto eq_computations = [](const HloComputation* a, const HloComputation* b) {
+    return *a == *b;
+  };
+  return i1->Identical(*i2, eq_instructions, eq_computations,
+                       /*layout_sensitive=*/false);
 }
 
 void ArCrsCombiner::GroupAllReducesById(HloModule* module) {
@@ -203,12 +210,12 @@ void ArCrsCombiner::KeepProvablyEqualInstructionGroups() {
 
     auto instr_0 = instruction_vec[0];
     auto add_0 = instr_0->users()[0]->users()[0];
-    CHECK(HloOpcode::kAdd == add_0->opcode());
+    CHECK_EQ(HloOpcode::kAdd, add_0->opcode());
 
     for (int i = 1; i < instruction_vec.size(); ++i) {
       auto instr_i = instruction_vec[i];
       auto add_i = instr_i->users()[0]->users()[0];
-      CHECK(HloOpcode::kAdd == add_i->opcode());
+      CHECK_EQ(HloOpcode::kAdd, add_i->opcode());
       absl::flat_hash_map<int64, int64> visited_pairs;
       if (!InstructionsComputeSameValue(add_0, add_i, &visited_pairs)) {
         all_reduce_map_.erase(it.first);
@@ -242,26 +249,22 @@ StatusOr<bool> ArCrsCombiner::RewriteGraph() {
       HloInstruction* other_summand = (add->operands()[0] == convert)
                                           ? add->operands()[1]
                                           : add->operands()[0];
-      // Remove the AllReduce and replace the CRS with an all-core AllReduce,
-      // then subtract:
-      // other_summand * num_replicas_ * (num_spatial_partitions_ - 1)
+      // To move the AR past the addition, we need to divide other_summand by
+      // the number of spatial partitions.
+      CHECK_EQ(all_reduce->user_count(), 1);
       TF_CHECK_OK(
           all_reduce->ReplaceAllUsesWith(all_reduce->mutable_operand(0)));
-      crs->set_all_reduce_id(all_reduce->all_reduce_id());
-      auto new_shape = crs->shape();
-      Literal lit(new_shape);
-      lit.PopulateWithValue<float>(num_replicas_ *
-                                   (num_spatial_partitions_ - 1));
-      auto partitions_minus_1_const = parent_computation->AddInstruction(
+      auto shape = other_summand->shape();
+      Literal lit(shape);
+      lit.PopulateWithValue<float>(num_spatial_partitions_);
+      auto divisor = parent_computation->AddInstruction(
           HloInstruction::CreateConstant(lit.Clone()));
-      auto to_subtract =
+      auto division =
           parent_computation->AddInstruction(HloInstruction::CreateBinary(
-              new_shape, HloOpcode::kMultiply, other_summand,
-              partitions_minus_1_const));
-      auto sub =
-          parent_computation->AddInstruction(HloInstruction::CreateBinary(
-              new_shape, HloOpcode::kSubtract, crs, to_subtract));
-      TF_CHECK_OK(crs->ReplaceAllUsesWith(sub));
+              shape, HloOpcode::kDivide, other_summand, divisor));
+      TF_CHECK_OK(other_summand->ReplaceUseWith(add, division));
+      // The AllReduce and the CRS are combined to an all-core AllReduce.
+      crs->set_all_reduce_id(all_reduce->all_reduce_id());
       TF_CHECK_OK(parent_computation->RemoveInstruction(all_reduce));
     }
   }
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.h b/tensorflow/compiler/xla/service/ar_crs_combiner.h
index 4abdb1f57d..f6a7ef76ec 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.h
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.h
@@ -30,9 +30,8 @@ namespace xla {
 // fully utilizes the interconnect bandwidth.
 class ArCrsCombiner : public HloModulePass {
  public:
-  ArCrsCombiner(int num_spatial_partitions, int num_replicas)
-      : num_spatial_partitions_(num_spatial_partitions),
-        num_replicas_(num_replicas) {}
+  ArCrsCombiner(int num_spatial_partitions)
+      : num_spatial_partitions_(num_spatial_partitions) {}
   absl::string_view name() const override { return "ar-crs-combiner"; }
   StatusOr<bool> Run(HloModule* module) override;
 
@@ -77,7 +76,6 @@ class ArCrsCombiner : public HloModulePass {
   StatusOr<bool> RewriteGraph();
 
   int num_spatial_partitions_;
-  int num_replicas_;
 
   // Map from all-reduce ids to the all reduce instructions.
   absl::flat_hash_map<int64, std::vector<HloInstruction*>> all_reduce_map_;
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
index 23d9aa9eb3..10171835d8 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
@@ -48,6 +48,43 @@ ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
   EXPECT_TRUE(ArCrsCombiner::TestInstructionsComputeSameValue(i1, i2));
 }
 
+TEST_F(ArCrsCombinerTest, SameValueTestBasecase2) {
+  const char* module_str = R"(
+HloModule foobar
+
+ENTRY %entrycomp (x: f32[]) -> (f32[], f32[]) {
+  %x = f32[] parameter(0)
+  ROOT %tuple = (f32[], f32[]) tuple(%x, %x)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  auto root_tuple = module->entry_computation()->root_instruction();
+  auto i1 = root_tuple->operands()[0];
+  auto i2 = root_tuple->operands()[1];
+  EXPECT_TRUE(ArCrsCombiner::TestInstructionsComputeSameValue(i1, i2));
+}
+
+TEST_F(ArCrsCombinerTest, SameValueTestBasecase3) {
+  const char* module_str = R"(
+HloModule foobar
+
+ENTRY %entrycomp (x: f32[], y: f32[]) -> (f32[], f32[]) {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %tuple = (f32[], f32[]) tuple(%x, %y)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  auto root_tuple = module->entry_computation()->root_instruction();
+  auto i1 = root_tuple->operands()[0];
+  auto i2 = root_tuple->operands()[1];
+  EXPECT_FALSE(ArCrsCombiner::TestInstructionsComputeSameValue(i1, i2));
+}
+
 TEST_F(ArCrsCombinerTest, SameValueTestNumOperands) {
   const char* module_str = R"(
 HloModule foobar
@@ -69,6 +106,46 @@ ENTRY %entrycomp (p: f32[2,2]) -> ((f32[2,2]), (f32[2,2], f32[2,2])) {
   EXPECT_FALSE(ArCrsCombiner::TestInstructionsComputeSameValue(i1, i2));
 }
 
+TEST_F(ArCrsCombinerTest, SameValueTestSliceIndicesMatch) {
+  const char* module_str = R"(
+HloModule foobar
+
+ENTRY %entrycomp (p: f32[2]) -> (f32[1], f32[1]) {
+  %p = f32[2] parameter(0)
+  %slice.1 = f32[1] slice(f32[2] %p), slice={[0:1]}
+  %slice.2 = f32[1] slice(f32[2] %p), slice={[0:1]}
+  ROOT %tuple = (f32[1], f32[1]) tuple(%slice.1, %slice.2)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  auto root_tuple = module->entry_computation()->root_instruction();
+  auto i1 = root_tuple->operands()[0];
+  auto i2 = root_tuple->operands()[1];
+  EXPECT_TRUE(ArCrsCombiner::TestInstructionsComputeSameValue(i1, i2));
+}
+
+TEST_F(ArCrsCombinerTest, SameValueTestSliceIndicesDontMatch) {
+  const char* module_str = R"(
+HloModule foobar
+
+ENTRY %entrycomp (p: f32[2]) -> (f32[1], f32[1]) {
+  %p = f32[2] parameter(0)
+  %slice.1 = f32[1] slice(f32[2] %p), slice={[0:1]}
+  %slice.2 = f32[1] slice(f32[2] %p), slice={[1:2]}
+  ROOT %tuple = (f32[1], f32[1]) tuple(%slice.1, %slice.2)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  auto root_tuple = module->entry_computation()->root_instruction();
+  auto i1 = root_tuple->operands()[0];
+  auto i2 = root_tuple->operands()[1];
+  EXPECT_FALSE(ArCrsCombiner::TestInstructionsComputeSameValue(i1, i2));
+}
+
 TEST_F(ArCrsCombinerTest, SameValueTestTupleElementSameIndex) {
   const char* module_str = R"(
 HloModule foobar
@@ -317,17 +394,18 @@ ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
   auto crs_before =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_before = crs_before->replica_groups();
-  ArCrsCombiner combiner(2, 1);
+  ArCrsCombiner combiner(2);
   auto changed = combiner.Run(module.get()).ValueOrDie();
   EXPECT_TRUE(changed);
   EXPECT_THAT(
       module->entry_computation()->root_instruction(),
-      op::Tuple(op::Subtract(op::CrossReplicaSum(),
-                             op::Multiply(op::Constant(), op::Constant())),
-                op::Subtract(op::CrossReplicaSum(),
-                             op::Multiply(op::Constant(), op::Constant()))));
-  auto sub = module->entry_computation()->root_instruction()->operands()[0];
-  auto crs_after = sub->operands()[0];
+      op::Tuple(
+          op::CrossReplicaSum(op::Add(
+              op::Divide(op::Constant(), op::Constant()), op::Convert())),
+          op::CrossReplicaSum(op::Add(
+              op::Divide(op::Constant(), op::Constant()), op::Convert()))));
+  auto crs_after =
+      module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();
   ASSERT_EQ(replica_groups_before.size(), replica_groups_after.size());
   for (int i = 0; i < replica_groups_before.size(); ++i) {
@@ -409,7 +487,7 @@ ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
                           ParseAndReturnVerifiedModule(module_str));
-  ArCrsCombiner combiner(2, 1);
+  ArCrsCombiner combiner(2);
   auto changed = combiner.Run(module.get()).ValueOrDie();
   EXPECT_FALSE(changed);
 }
-- 
GitLab


From 44157872848ae4f07b533a1fea35a6b00d97cc1e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 16:13:24 -0800
Subject: [PATCH 239/873] Track XRT memory allocations.

PiperOrigin-RevId: 224594734
---
 tensorflow/compiler/xrt/xrt_state.cc | 47 +++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xrt/xrt_state.cc b/tensorflow/compiler/xrt/xrt_state.cc
index 5c7c537c34..31603e044d 100644
--- a/tensorflow/compiler/xrt/xrt_state.cc
+++ b/tensorflow/compiler/xrt/xrt_state.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include "tensorflow/compiler/xrt/xrt_state.h"
 
 #include <stdint.h>
+#include <map>
 #include <memory>
 #include <string>
 #include <utility>
@@ -34,6 +35,7 @@ limitations under the License.
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/stream_executor/stream_executor.h"
 
@@ -41,6 +43,34 @@ namespace tensorflow {
 
 namespace {
 
+class BufferAllocStats {
+ public:
+  struct Stats {
+    int64 count = 0;
+    int64 size = 0;
+  };
+
+  Stats ReportAlloc(int64 device, int64 msize) {
+    mutex_lock lock(lock_);
+    Stats* device_stats = &stats_[device];
+    device_stats->count += 1;
+    device_stats->size += msize;
+    return *device_stats;
+  }
+
+  Stats ReportFree(int64 device, int64 msize) {
+    mutex_lock lock(lock_);
+    Stats* device_stats = &stats_[device];
+    device_stats->count -= 1;
+    device_stats->size -= msize;
+    return *device_stats;
+  }
+
+ private:
+  mutable mutex lock_;
+  std::map<int64, Stats> stats_;
+};
+
 const char* kTupleContainer = "tuples";
 
 int64 get_uid() {
@@ -48,6 +78,11 @@ int64 get_uid() {
   return static_cast<int64>(unsigned_rand);
 }
 
+BufferAllocStats* GetAllocStats() {
+  static BufferAllocStats* stats = new BufferAllocStats();
+  return stats;
+}
+
 Status AllocateScopedShapedBuffer(
     xla::Backend* backend, int device_ordinal, const xla::Shape& shape,
     std::unique_ptr<xla::ScopedShapedBuffer>* buffer) {
@@ -100,9 +135,19 @@ XRTBufferAllocation::XRTBufferAllocation(const se::DeviceMemoryBase& allocation,
                                          xla::DeviceMemoryAllocator* allocator)
     : allocation_(allocation),
       device_ordinal_(device_ordinal),
-      allocator_(allocator) {}
+      allocator_(allocator) {
+  if (VLOG_IS_ON(2)) {
+    auto stats =
+        GetAllocStats()->ReportAlloc(device_ordinal_, allocation_.size());
+    LOG(INFO) << "XRT Allocation Stats: device=" << device_ordinal_
+              << " count=" << stats.count << " size=" << stats.size;
+  }
+}
 
 XRTBufferAllocation::~XRTBufferAllocation() {
+  if (VLOG_IS_ON(2)) {
+    GetAllocStats()->ReportFree(device_ordinal_, allocation_.size());
+  }
   // Deallocate explicitly allows allocation_ to be null.
   Status s = allocator_->Deallocate(device_ordinal_, allocation_);
   // Nothing to do but check fail here if memory datastructures are corrupted.
-- 
GitLab


From 294ad7140234fb383e28bd059d6d761ba879c09e Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 7 Dec 2018 16:22:49 -0800
Subject: [PATCH 240/873] Internal change.

PiperOrigin-RevId: 224596031
---
 tensorflow/tools/api/tests/api_compatibility_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index e7f23a1174..40162daf14 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -277,6 +277,9 @@ class ApiCompatibilityTest(test.TestCase):
 
     public_api_visitor = public_api.PublicAPIVisitor(visitor)
     public_api_visitor.private_map['tf'] = ['contrib']
+    if api_version == 2:
+      public_api_visitor.private_map['tf'].append('enable_v2_behavior')
+
     public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental']
     if FLAGS.only_test_core_api:
       public_api_visitor.do_not_descend_map['tf'].extend(_NON_CORE_PACKAGES)
-- 
GitLab


From 5dfbcfbba04197117cff01a07397f499682d3f56 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 7 Dec 2018 16:23:28 -0800
Subject: [PATCH 241/873] Consolidate JNI inclusion code

Introduce a single proxy target that exports JNI headers appropriately
for the given platform.

PiperOrigin-RevId: 224596100
---
 tensorflow/lite/java/jni/BUILD                | 47 ++++++++++++++
 tensorflow/lite/java/src/main/native/BUILD    | 62 ++-----------------
 tensorflow/lite/java/src/test/native/BUILD    | 19 ++----
 .../models/smartreply/demo/app/src/main/BUILD |  1 +
 4 files changed, 58 insertions(+), 71 deletions(-)
 create mode 100644 tensorflow/lite/java/jni/BUILD

diff --git a/tensorflow/lite/java/jni/BUILD b/tensorflow/lite/java/jni/BUILD
new file mode 100644
index 0000000000..ce17ac4fa0
--- /dev/null
+++ b/tensorflow/lite/java/jni/BUILD
@@ -0,0 +1,47 @@
+package(default_visibility = ["//tensorflow/lite:__subpackages__"])
+
+licenses(["notice"])  # Apache 2.0
+
+# Helper target for exposing JNI headers across multiple platforms.
+cc_library(
+    name = "jni",
+    hdrs = select({
+        # The Android toolchain makes "jni.h" available in the include path.
+        # For non-Android toolchains, generate jni.h and jni_md.h.
+        "//tensorflow:android": [],
+        "//conditions:default": [
+            ":jni.h",
+            ":jni_md.h",
+        ],
+    }),
+    includes = select({
+        "//tensorflow:android": [],
+        "//conditions:default": ["."],
+    }),
+)
+
+# Silly rules to make
+# #include <jni.h>
+# in the source headers work
+# (in combination with the "includes" attribute of the tf_cuda_library rule
+# above. Not needed when using the Android toolchain).
+#
+# Inspired from:
+# https://github.com/bazelbuild/bazel/blob/f99a0543f8d97339d32075c7176b79f35be84606/src/main/native/BUILD
+# but hopefully there is a simpler alternative to this.
+genrule(
+    name = "copy_jni_h",
+    srcs = ["@bazel_tools//tools/jdk:jni_header"],
+    outs = ["jni.h"],
+    cmd = "cp -f $< $@",
+)
+
+genrule(
+    name = "copy_jni_md_h",
+    srcs = select({
+        "//tensorflow:darwin": ["@bazel_tools//tools/jdk:jni_md_header-darwin"],
+        "//conditions:default": ["@bazel_tools//tools/jdk:jni_md_header-linux"],
+    }),
+    outs = ["jni_md.h"],
+    cmd = "cp -f $< $@",
+)
diff --git a/tensorflow/lite/java/src/main/native/BUILD b/tensorflow/lite/java/src/main/native/BUILD
index 8f95f14518..52194e86db 100644
--- a/tensorflow/lite/java/src/main/native/BUILD
+++ b/tensorflow/lite/java/src/main/native/BUILD
@@ -15,15 +15,7 @@ cc_library(
         "nativeinterpreterwrapper_jni.cc",
         "tensor_jni.cc",
         "tensorflow_lite_jni.cc",
-    ] + select({
-        # The Android toolchain makes "jni.h" available in the include path.
-        # For non-Android toolchains, generate jni.h and jni_md.h.
-        "//tensorflow:android": [],
-        "//conditions:default": [
-            ":jni.h",
-            ":jni_md.h",
-        ],
-    }),
+    ],
     hdrs = [
         "exception_jni.h",
         "nativeinterpreterwrapper_jni.h",
@@ -31,75 +23,31 @@ cc_library(
         "tensorflow_lite_jni.h",
     ],
     copts = tflite_copts(),
-    includes = select({
-        "//tensorflow:android": [],
-        "//conditions:default": ["."],
-    }),
     linkopts = [
         "-lm",
         "-ldl",
     ],
     deps = [
-        "//tensorflow/lite:context",
         "//tensorflow/lite:framework",
         "//tensorflow/lite:schema_fbs_version",
         "//tensorflow/lite:string_util",
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/java/jni",
     ],
     alwayslink = 1,
 )
 
-# Silly rules to make
-# #include <jni.h>
-# in the source headers work
-# (in combination with the "includes" attribute of the tf_cuda_library rule
-# above. Not needed when using the Android toolchain).
-#
-# Inspired from:
-# https://github.com/bazelbuild/bazel/blob/f99a0543f8d97339d32075c7176b79f35be84606/src/main/native/BUILD
-# but hopefully there is a simpler alternative to this.
-genrule(
-    name = "copy_jni_h",
-    srcs = ["@bazel_tools//tools/jdk:jni_header"],
-    outs = ["jni.h"],
-    cmd = "cp -f $< $@",
-)
-
-genrule(
-    name = "copy_jni_md_h",
-    srcs = select({
-        "//tensorflow:darwin": ["@bazel_tools//tools/jdk:jni_md_header-darwin"],
-        "//conditions:default": ["@bazel_tools//tools/jdk:jni_md_header-linux"],
-    }),
-    outs = ["jni_md.h"],
-    cmd = "cp -f $< $@",
-)
-
 cc_library(
     name = "init_tensorflow",
     srcs = [
         "init_tensorflow_jni.cc",
-    ] + select({
-        # The Android toolchain makes "jni.h" available in the include path.
-        # For non-Android toolchains, generate jni.h and jni_md.h.
-        "//tensorflow:android": [],
-        "//conditions:default": [
-            ":jni.h",
-            ":jni_md.h",
-        ],
-    }),
+    ],
     hdrs = [
         "init_tensorflow_jni.h",
     ],
     copts = tflite_copts(),
-    includes = select({
-        "//tensorflow:android": [],
-        "//conditions:default": ["."],
-    }),
-    linkopts = [
-        "-lm",
-        "-ldl",
-    ],
     deps = [
+        "//tensorflow/lite/java/jni",
         "//tensorflow/lite/testing:init_tensorflow",
     ],
     alwayslink = 1,
diff --git a/tensorflow/lite/java/src/test/native/BUILD b/tensorflow/lite/java/src/test/native/BUILD
index 4d3e82b1ac..481aea7ecd 100644
--- a/tensorflow/lite/java/src/test/native/BUILD
+++ b/tensorflow/lite/java/src/test/native/BUILD
@@ -12,20 +12,11 @@ cc_library(
     testonly = 1,
     srcs = [
         "interpreter_test_jni.cc",
-    ] + select({
-        # The Android toolchain makes "jni.h" available in the include path.
-        # For non-Android toolchains, generate jni.h and jni_md.h.
-        "//tensorflow:android": [],
-        "//conditions:default": [
-            "//tensorflow/lite/java/src/main/native:jni.h",
-            "//tensorflow/lite/java/src/main/native:jni_md.h",
-        ],
-    }),
-    includes = select({
-        "//tensorflow:android": [],
-        "//conditions:default": ["../../main/native/."],
-    }),
-    deps = ["//tensorflow/lite/c:c_api_internal"],
+    ],
+    deps = [
+        "//tensorflow/lite/c:c_api_internal",
+        "//tensorflow/lite/java/jni",
+    ],
 )
 
 tflite_jni_binary(
diff --git a/tensorflow/lite/models/smartreply/demo/app/src/main/BUILD b/tensorflow/lite/models/smartreply/demo/app/src/main/BUILD
index b14af4cb20..73326e994b 100644
--- a/tensorflow/lite/models/smartreply/demo/app/src/main/BUILD
+++ b/tensorflow/lite/models/smartreply/demo/app/src/main/BUILD
@@ -62,6 +62,7 @@ cc_library(
     ],
     deps = [
         "//tensorflow/lite:framework",
+        "//tensorflow/lite/java/jni",
         "//tensorflow/lite/models/smartreply:predictor_lib",
     ],
     alwayslink = 1,
-- 
GitLab


From f0e09e4ab12414aba6c3c47750287515ea12feea Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Dec 2018 16:37:37 -0800
Subject: [PATCH 242/873] Fix bazel test failure with mac llvm

This fix tries to address the issue raised in 24212 where
the bazel test for trt_allocator_test fails. The reason was that
1ul is unsigned long while uint64_t is unsigned long long.

This fix fixes the issue.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index ad6b1d7d4c..ab3541ef6e 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -48,11 +48,11 @@ TEST(TRTAllocatorTest, Align) {
         513ul, 700ul, 12345ul, 1ul << 32}) {
     for (uint64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
       for (const uintptr_t ptr_val :
-           {1ul, alignment == 1 ? 1ul : alignment - 1, alignment, alignment + 1,
+           {1ull, alignment == 1 ? 1ull : alignment - 1, alignment, alignment + 1,
             alignment + (alignment / 2)}) {
         if (ptr_val % alignment == 0) {
           for (const uint64_t size :
-               {1ul, space == 1 ? 1ul : space - 1, space, space + 1}) {
+               {1ull, space == 1 ? 1ull : space - 1, space, space + 1}) {
             EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
           }
         } else {
-- 
GitLab


From e87bf9a3da0c396fe88d664d5a0c84a892eed9af Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Dec 2018 16:40:06 -0800
Subject: [PATCH 243/873] Additional fix of trt_allocator_test.cc

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index ab3541ef6e..18b983b9ed 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -62,7 +62,7 @@ TEST(TRTAllocatorTest, Align) {
             EXPECT_TRUE(
                 RunTest(alignment, space - diff, ptr_val + diff, space - diff));
             for (const uint64_t size :
-                 {1ul, space - diff > 1 ? space - diff - 1 : 1ul, space - diff,
+                 {1ull, space - diff > 1 ? space - diff - 1 : 1ull, space - diff,
                   space - diff + 1, space - 1}) {
               EXPECT_EQ(space - diff >= size,
                         RunTest(alignment, size, ptr_val, space));
-- 
GitLab


From f9f50b6cf831cdfef15d952152f43ba6542a14ad Mon Sep 17 00:00:00 2001
From: Eddie Zhou <eddz@google.com>
Date: Fri, 7 Dec 2018 16:44:06 -0800
Subject: [PATCH 244/873] Move skipping of test_session to setUp(), so child
 tests will also skip setUp(), which can be expensive.

PiperOrigin-RevId: 224598690
---
 tensorflow/python/framework/test_util.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 06316ce2e9..28f25daa6d 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1430,6 +1430,10 @@ class TensorFlowTestCase(googletest.TestCase):
     ops.reset_default_graph()
     random_seed.set_random_seed(random_seed.DEFAULT_GRAPH_SEED)
 
+    # Avoiding calling setUp() for the poorly named test_session method.
+    if self.id().endswith(".test_session"):
+      self.skipTest("Not a test.")
+
   def tearDown(self):
     for thread in self._threads:
       thread.check_termination()
@@ -1713,9 +1717,6 @@ class TensorFlowTestCase(googletest.TestCase):
                    use_gpu=False,
                    force_gpu=False):
     """Use cached_session instead."""
-    if self.id().endswith(".test_session"):
-      self.skipTest("Not a test.")
-
     if context.executing_eagerly():
       yield None
     else:
-- 
GitLab


From ff563b9436509a35bbb5087952c7fbfda44df46f Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Fri, 7 Dec 2018 16:44:40 -0800
Subject: [PATCH 245/873] Fixed the bug in keras where the callback attributes
 are not correctly checked.

PiperOrigin-RevId: 224598769
---
 tensorflow/contrib/distribute/python/BUILD       |  1 -
 .../contrib/distribute/python/keras_test.py      | 14 ++++----------
 .../keras/engine/distributed_training_utils.py   | 16 ++++++++++++----
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 4ea1fa050a..4c9c35da5a 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -611,7 +611,6 @@ cuda_py_test(
         "no_oss",  # TODO(b/117919883): Fix python error.
         "no_pip",
         "no_windows_gpu",
-        "noguitar",  # TODO(b/120025010): Re-enable this test on Guitar.
         "notsan",
     ],
 )
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index c0bcb5ecd5..683cc89bfb 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1085,8 +1085,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph', 'eager']))
   def test_unsupported_features(self, distribution):
     with self.cached_session():
@@ -1134,8 +1134,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph', 'eager']))
   def test_calling_with_unsupported_predefined_callbacks(self, distribution):
     with self.cached_session():
@@ -1161,12 +1161,6 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
                                    'using'):
         model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                   callbacks=[keras.callbacks.ReduceLROnPlateau()])
-      with self.assertRaisesRegexp(ValueError,
-                                   'histogram_freq in the TensorBoard callback '
-                                   'is not supported when using '
-                                   'DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])
 
 
 class TestDistributionStrategyWithLossMasking(test.TestCase,
diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py
index d100182381..32129afe64 100644
--- a/tensorflow/python/keras/engine/distributed_training_utils.py
+++ b/tensorflow/python/keras/engine/distributed_training_utils.py
@@ -199,11 +199,19 @@ def validate_callbacks(input_callbacks, optimizer, current_strategy):
       # running ops.
       if isinstance(callback, callbacks.TensorBoard):
         if callback.__getattribute__('histogram_freq'):
-          raise ValueError('histogram_freq in the TensorBoard callback is not '
-                           'supported when using DistributionStrategy.')
+          logging.warning(
+              UserWarning(
+                  '`histogram_freq` in the TensorBoard callback is not '
+                  'supported when using DistributionStrategy. Setting '
+                  '`histogram_freq` to `0`.'))
+          callback.histogram_freq = 0
         if callback.__getattribute__('write_grads'):
-          raise ValueError('write_grads in the TensorBoard callback is not '
-                           'supported when using DistributionStrategy.')
+          logging.warning(
+              UserWarning(
+                  '`write_grads` in the TensorBoard callback is not supported '
+                  'when using DistributionStrategy. Setting `write_grads` '
+                  'to `False`.'))
+          callback.histogram_freq = False
 
 
 def validate_distributed_dataset_inputs(distribution_strategy, x, y,
-- 
GitLab


From b659523f8acec043478ee7378dd17c8df4c132e2 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Fri, 7 Dec 2018 16:48:22 -0800
Subject: [PATCH 246/873] Use parent sessions configuration to ensure we
 connect to the same set of workers.

PiperOrigin-RevId: 224599211
---
 .../contrib/tpu/python/tpu/session_support.py      | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py
index a952754878..3e463823c8 100644
--- a/tensorflow/contrib/tpu/python/tpu/session_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/session_support.py
@@ -43,12 +43,19 @@ class CoordinatorShutdownException(Exception):
   pass
 
 
+def _clone_session(session, graph=None):
+  return session_lib.Session(
+      target=session.sess_str,
+      config=session._config,  # pylint: disable=protected-access
+      graph=graph if graph else session.graph)
+
+
 def _make_heartbeat_op(session, device, request_ph):
   """Return a heartbeat op or None if heartbeats are not supported by device."""
   try:
     # Test if we can connect in a isolated graph + session
     with ops.Graph().as_default():
-      with session_lib.Session(target=session.sess_str) as temp_session:
+      with _clone_session(session) as temp_session:
         with ops.device(device):
           heartbeat_op = tpu_ops.worker_heartbeat('')
           options = config_pb2.RunOptions(timeout_in_ms=5000)
@@ -220,6 +227,7 @@ class WatchdogManager(threading.Thread):
     self.ping_interval = ping_interval
     self.shutdown_timeout = shutdown_timeout
     self.daemon = True
+    self._config = session._config  # pylint: disable=protected-access
     self._target = session.sess_str
     self._running = False
     self._devices = devices
@@ -234,6 +242,7 @@ class WatchdogManager(threading.Thread):
     self._session = session_lib.Session(
         target=self._target,
         graph=self._graph,
+        config=self._config,
     )
 
     if self._devices is None:
@@ -334,8 +343,7 @@ class GracefulShutdownHook(session_run_hook.SessionRunHook):
 
     with self._graph.as_default():
       logging.info('Installing graceful shutdown hook.')
-      self._session = session_lib.Session(
-          target=training_session.sess_str, graph=self._graph)
+      self._session = _clone_session(training_session, self._graph)
       self._workers = WorkerHeartbeatManager.from_devices(
           self._session, all_worker_devices(self._session))
       self._heartbeat_supported = self._workers.num_workers() > 0
-- 
GitLab


From 8e1eb82d53c1d60936910847bd38844227c0bd3b Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 7 Dec 2018 16:49:43 -0800
Subject: [PATCH 247/873] Split keyword argument test in tf_upgrade_v2_test.py
 into 2 tests. Extend one of the tests to run API conversion for all TF
 functions instead of just the ones updated by tf_upgrade_v2 script. Fix
 issues found by this extended test.

PiperOrigin-RevId: 224599358
---
 tensorflow/python/ops/image_ops_impl.py       |   5 +-
 tensorflow/python/ops/sparse_ops.py           |  10 +-
 tensorflow/tools/compatibility/reorders_v2.py |   7 +
 .../tools/compatibility/tf_upgrade_v2.py      | 115 ++++++++++++-
 .../tools/compatibility/tf_upgrade_v2_test.py | 151 +++++++++++++++---
 5 files changed, 258 insertions(+), 30 deletions(-)

diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index 229393c970..24d049b726 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -2046,9 +2046,8 @@ def sample_distorted_bounding_box_v2(image_size,
       3-D with shape `[batch, N, 4]` describing the N bounding boxes
       associated with the image.
     seed: An optional `int`. Defaults to `0`.
-      If either `seed` or `seed2` are set to non-zero, the random number
-      generator is seeded by the given `seed`.  Otherwise, it is seeded by a
-      random seed.
+      If `seed` is set to non-zero, the random number generator is seeded by
+      the given `seed`.  Otherwise, it is seeded by a random seed.
     min_object_covered: A Tensor of type `float32`. Defaults to `0.1`.
       The cropped area of the image must contain at least this
       fraction of any bounding box supplied. The value of this parameter should
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index d7346b7371..097b485a11 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -1093,6 +1093,9 @@ def sparse_reduce_max_v2(
 @deprecation.deprecated_endpoints("sparse_reduce_max")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(
+    None, "reduction_axes is deprecated, use axis instead",
+    "reduction_axes")
 def sparse_reduce_max(sp_input, axis=None, keepdims=None,
                       reduction_axes=None, keep_dims=None):
   """Computes the max of elements across dimensions of a SparseTensor.
@@ -1141,7 +1144,7 @@ def sparse_reduce_max(sp_input, axis=None, keepdims=None,
     axis: The dimensions to reduce; list or scalar. If `None` (the
       default), reduces all dimensions.
     keepdims: If true, retain reduced dimensions with length 1.
-    reduction_axes: Deprecated name of axis.
+    reduction_axes: Deprecated name of `axis`.
     keep_dims:  Deprecated alias for `keepdims`.
 
   Returns:
@@ -1279,6 +1282,9 @@ def sparse_reduce_sum_v2(
 @deprecation.deprecated_endpoints("sparse_reduce_sum")
 @deprecation.deprecated_args(
     None, "keep_dims is deprecated, use keepdims instead", "keep_dims")
+@deprecation.deprecated_args(
+    None, "reduction_axes is deprecated, use axis instead",
+    "reduction_axes")
 def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
                       reduction_axes=None, keep_dims=None):
   """Computes the sum of elements across dimensions of a SparseTensor.
@@ -1314,7 +1320,7 @@ def sparse_reduce_sum(sp_input, axis=None, keepdims=None,
     axis: The dimensions to reduce; list or scalar. If `None` (the
       default), reduces all dimensions.
     keepdims: If true, retain reduced dimensions with length 1.
-    reduction_axes: Deprecated name of axis.
+    reduction_axes: Deprecated name of `axis`.
     keep_dims: Deprecated alias for `keepdims`.
 
   Returns:
diff --git a/tensorflow/tools/compatibility/reorders_v2.py b/tensorflow/tools/compatibility/reorders_v2.py
index e6596755c3..44494ac148 100644
--- a/tensorflow/tools/compatibility/reorders_v2.py
+++ b/tensorflow/tools/compatibility/reorders_v2.py
@@ -40,6 +40,7 @@ reorders = {
     'tf.io.parse_single_example': ['serialized', 'features', 'name', 'example_names'],
     'tf.io.serialize_many_sparse': ['sp_input', 'name', 'out_type'],
     'tf.io.serialize_sparse': ['sp_input', 'name', 'out_type'],
+    'tf.linalg.norm': ['tensor', 'ord', 'axis', 'keepdims', 'name', 'keep_dims'],
     'tf.math.argmax': ['input', 'axis', 'name', 'dimension', 'output_type'],
     'tf.math.argmin': ['input', 'axis', 'name', 'dimension', 'output_type'],
     'tf.math.confusion_matrix': ['labels', 'predictions', 'num_classes', 'dtype', 'name', 'weights'],
@@ -66,6 +67,7 @@ reorders = {
     'tf.nn.space_to_batch': ['input', 'paddings', 'block_size', 'name'],
     'tf.nn.space_to_depth': ['input', 'block_size', 'name', 'data_format'],
     'tf.nn.weighted_moments': ['x', 'axes', 'frequency_weights', 'name', 'keep_dims'],
+    'tf.norm': ['tensor', 'ord', 'axis', 'keepdims', 'name', 'keep_dims'],
     'tf.pad': ['tensor', 'paddings', 'mode', 'name', 'constant_values'],
     'tf.parse_example': ['serialized', 'features', 'name', 'example_names'],
     'tf.parse_single_example': ['serialized', 'features', 'name', 'example_names'],
@@ -82,6 +84,7 @@ reorders = {
     'tf.reduce_min': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
     'tf.reduce_prod': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
     'tf.reduce_sum': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'],
+    'tf.reverse_sequence': ['input', 'seq_lengths', 'seq_axis', 'batch_axis', 'name', 'seq_dim', 'batch_dim'],
     'tf.serialize_many_sparse': ['sp_input', 'name', 'out_type'],
     'tf.serialize_sparse': ['sp_input', 'name', 'out_type'],
     'tf.shape': ['input', 'name', 'out_type'],
@@ -90,15 +93,19 @@ reorders = {
     'tf.space_to_depth': ['input', 'block_size', 'name', 'data_format'],
     'tf.sparse.add': ['a', 'b', 'threshold', 'thresh'],
     'tf.sparse.concat': ['axis', 'sp_inputs', 'name', 'expand_nonconcat_dim', 'concat_dim'],
+    'tf.sparse.reduce_max': ['sp_input', 'axis', 'keepdims', 'reduction_axes', 'keep_dims'],
     'tf.sparse.segment_mean': ['data', 'indices', 'segment_ids', 'name', 'num_segments'],
     'tf.sparse.segment_sqrt_n': ['data', 'indices', 'segment_ids', 'name', 'num_segments'],
     'tf.sparse.segment_sum': ['data', 'indices', 'segment_ids', 'name', 'num_segments'],
+    'tf.sparse.split': ['keyword_required', 'sp_input', 'num_split', 'axis', 'name', 'split_dim'],
     'tf.sparse_add': ['a', 'b', 'threshold', 'thresh'],
     'tf.sparse_concat': ['axis', 'sp_inputs', 'name', 'expand_nonconcat_dim', 'concat_dim'],
     'tf.sparse_matmul': ['a', 'b', 'transpose_a', 'transpose_b', 'a_is_sparse', 'b_is_sparse', 'name'],
+    'tf.sparse_reduce_max': ['sp_input', 'axis', 'keepdims', 'reduction_axes', 'keep_dims'],
     'tf.sparse_segment_mean': ['data', 'indices', 'segment_ids', 'name', 'num_segments'],
     'tf.sparse_segment_sqrt_n': ['data', 'indices', 'segment_ids', 'name', 'num_segments'],
     'tf.sparse_segment_sum': ['data', 'indices', 'segment_ids', 'name', 'num_segments'],
+    'tf.sparse_split': ['keyword_required', 'sp_input', 'num_split', 'axis', 'name', 'split_dim'],
     'tf.strings.length': ['input', 'name', 'unit'],
     'tf.strings.reduce_join': ['inputs', 'axis', 'keep_dims', 'separator', 'name', 'reduction_indices'],
     'tf.strings.substr': ['input', 'pos', 'len', 'name', 'unit'],
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index b2477a541e..ea86da42f6 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py
@@ -36,6 +36,18 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.argmax": {
             "dimension": "axis",
         },
+        "tf.arg_min": {
+            "dimension": "axis",
+        },
+        "tf.arg_max": {
+            "dimension": "axis",
+        },
+        "tf.math.argmin": {
+            "dimension": "axis",
+        },
+        "tf.math.argmax": {
+            "dimension": "axis",
+        },
         "tf.image.crop_and_resize": {
             "box_ind": "box_indices",
         },
@@ -54,6 +66,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.space_to_batch": {
             "block_size": "block_shape",
         },
+        "tf.nn.space_to_batch": {
+            "block_size": "block_shape",
+        },
         "tf.constant": {
             "verify_shape": "verify_shape_is_now_always_true",
         },
@@ -66,6 +81,15 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.linalg.l2_normalize": {
             "dim": "axis",
         },
+        "tf.linalg.norm": {
+            "keep_dims": "keepdims",
+        },
+        "tf.norm": {
+            "keep_dims": "keepdims",
+        },
+        "tf.load_file_system_library": {
+            "library_filename": "library_location",
+        },
         "tf.math.count_nonzero": {
             "input_tensor": "input",
             "keep_dims": "keepdims",
@@ -99,6 +123,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.nn.separable_conv2d": {
             "rate": "dilations"
         },
+        "tf.nn.depthwise_conv2d": {
+            "rate": "dilations"
+        },
         "tf.nn.softmax": {
             "dim": "axis"
         },
@@ -117,14 +144,35 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         },
         "tf.sparse.concat": {
             "concat_dim": "axis",
+            "expand_nonconcat_dim": "expand_nonconcat_dims",
         },
         "tf.sparse_concat": {
             "concat_dim": "axis",
+            "expand_nonconcat_dim": "expand_nonconcat_dims",
         },
         "tf.sparse.split": {
             "split_dim": "axis",
         },
-        "tf.max_pool_with_argmax": {
+        "tf.sparse_split": {
+            "split_dim": "axis",
+        },
+        "tf.sparse.reduce_max": {
+            "reduction_axes": "axis",
+            "keep_dims": "keepdims",
+        },
+        "tf.sparse_reduce_max": {
+            "reduction_axes": "axis",
+            "keep_dims": "keepdims",
+        },
+        "tf.sparse.reduce_sum": {
+            "reduction_axes": "axis",
+            "keep_dims": "keepdims",
+        },
+        "tf.sparse_reduce_sum": {
+            "reduction_axes": "axis",
+            "keep_dims": "keepdims",
+        },
+        "tf.nn.max_pool_with_argmax": {
             "Targmax": "output_dtype",
         },
         "tf.multinomial": {
@@ -133,6 +181,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.random.multinomial": {
             "output_dtype": "dtype",
         },
+        "tf.reverse_sequence": {
+            "seq_dim": "seq_axis",
+            "batch_dim": "batch_axis",
+        },
         "tf.nn.batch_norm_with_global_normalization": {
             "t": "input",
             "m": "mean",
@@ -151,6 +203,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.ones_like": {
             "tensor": "input",
         },
+        "tf.nn.conv2d_transpose": {
+            "value": "input",
+            "filter": "filters",
+        },
         "tf.nn.conv3d_transpose": {
             "value": "input",
             "filter": "filters",
@@ -287,6 +343,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.squeeze": {
             "squeeze_dims": "axis",
         },
+        "tf.nn.weighted_moments": {
+            "keep_dims": "keepdims"
+        },
     }
 
     # pylint: disable=line-too-long
@@ -427,6 +486,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             "tf.sparse.split",
         "tf.sparse_matmul":
             "tf.linalg.matmul",
+        "tf.sparse_reduce_sum":
+            "tf.sparse.reduce_sum",
+        "tf.sparse_reduce_max":
+            "tf.sparse.reduce_max",
         "tf.random.stateless_multinomial":
             "tf.random.stateless_categorical",
         "tf.string_to_hash_bucket":
@@ -481,6 +544,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             "tf.argmax",
         "tf.arg_min":
             "tf.argmin",
+        # tf.nn.ctc_loss is still available in 2.0 but behavior
+        # changed significantly.
+        "tf.nn.ctc_loss":
+            "tf.compat.v1.nn.ctc_loss",
     }
     # pylint: enable=line-too-long
 
@@ -530,6 +597,8 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.sparse.segment_sqrt_n",
         "tf.sparse.segment_sum",
         "tf.sparse_matmul",
+        "tf.sparse.reduce_max",
+        "tf.sparse_reduce_max",
         "tf.io.decode_csv",
         "tf.strings.substr",
         "tf.strings.reduce_join",
@@ -566,6 +635,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.nn.embedding_lookup_sparse",
         "tf.nn.in_top_k",
         "tf.nn.space_to_depth",
+        "tf.linalg.norm",
+        "tf.norm",
+        "tf.reverse_sequence",
+        "tf.sparse_split",
     }
 
     # Functions that were reordered should be changed to the new keyword args
@@ -634,6 +707,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             assert_return_type_comment,
         "tf.assert_rank":
             assert_rank_comment,
+        "tf.cond": "tf.cond no longer takes 'strict'. "
+                   "Now 'strict' defaults to True."
+                   "fn1/fn2 arguments are replaced by true_fn/false_fn.",
         "tf.debugging.assert_equal":
             assert_return_type_comment,
         "tf.debugging.assert_greater":
@@ -664,6 +740,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             assert_rank_comment,
         "tf.debugging.assert_rank_in":
             assert_rank_comment,
+        "tf.device": "tf.device no longer takes function as an argument. "
+                     "'devide_name_or_function' argument has been renamed to "
+                     "'device_name'.",
         "tf.flags":
             "tf.flags has been removed, please use the argparse or absl"
             " module if you need command line parsing.",
@@ -701,6 +780,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             default_loss_reduction_changed,
         "tf.estimator.BaselineRegressor":
             default_loss_reduction_changed,
+        "tf.hessians": "tf.hessians no longer takes "
+                       "'colocate_gradients_with_ops' argument. Also, "
+                       "arguments have been reordered so that 'name' is the "
+                       "last argument.",
         "tf.nn.conv1d":
             "WARNING: use_cudnn_on_gpu argument has been removed and \"value\""
             " was renamed to \"input\"",
@@ -727,9 +810,33 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             tf_01s_like_no_optimize_comment,
         "tf.nn.embedding_lookup":
             "WARNING: validate_indices argument has been removed.",
-        "tf.sparse.concat":
-            ("WARNING: expand_nonconcat_dim was renamed to "
-             "expand_nonconcat_dims."),
+        "tf.while_loop":
+            "tf.while_loop no longer takes 'return_same_structure' argument. "
+            "'return_same_structure' now defaults to True. Also, 'name'"
+            "argument is now the last argument.",
+        "tf.image.sample_distorted_bounding_box":
+            "tf.image.sample_distorted_bounding_box no longer takes 'seed2' "
+            "argument.",
+        "tf.nn.ctc_beam_search_decoder":
+            "tf.nn.ctc_beam_search_decoder no longer takes 'merge_repeated' "
+            "argument. 'merge_repeated' now defaults to False.",
+        "tf.nn.fractional_avg_pool":
+            "tf.nn.fractional_avg_pool no longer takes 'seed2' and "
+            "'deterministic' arguments. Now it takes a single 'seed' arg. If "
+            "'seed' is zero, the execution is random and deterministic "
+            "otherwise",
+        "tf.nn.fractional_max_pool":
+            "tf.nn.fractional_max_pool no longer takes 'seed2' and "
+            "'deterministic' arguments. Now it takes a single 'seed' arg. If "
+            "'seed' is zero, the execution is random and deterministic "
+            "otherwise",
+        "tf.nn.softmax_cross_entropy_with_logits":
+            "tf.nn.softmax_cross_entropy_with_logits behavior has changed. "
+            "'labels' needs to be wrapped with tf.stop_gradient to keep the "
+            "old behavior. Also, 'dim' argument has been renamed to 'axis'.",
+        "tf.test.assert_equal_graph_def":
+            "tf.assert_equal_graph_def no longer takes 'checkpoint_v2' "
+            "argument. 'checkpoint_v2' now defaults to True.",
     }
 
     self.symbol_renames = {
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index 4460ad5182..63aa5f0c6b 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import inspect
 import os
 import tempfile
 
@@ -25,7 +26,6 @@ import six
 import tensorflow as tf
 # OSS TF V2 import placeholder.
 
-
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test as test_lib
 from tensorflow.python.util import tf_decorator
@@ -72,6 +72,15 @@ def get_symbol_for_name(root, name):
   return symbol
 
 
+def get_args(symbol):
+  if hasattr(inspect, "signature"):
+    signature = inspect.signature(symbol)
+    # Ignore *args and **kwargs for now.
+    return [param.name for param in signature.parameters.values()
+            if param.kind == param.POSITIONAL_OR_KEYWORD]
+  return tf_inspect.getargspec(symbol)[0]
+
+
 def get_func_and_args_from_str(call_str):
   """Parse call string to get function and argument names.
 
@@ -88,6 +97,7 @@ def get_func_and_args_from_str(call_str):
   function_name = call_str[:call_str.find("(")]
   args = call_str[open_paren_index+1:close_paren_index].split(",")
   args = [arg.split("=")[0].strip() for arg in args]
+  args = [arg for arg in args if arg]  # filter out empty strings
   return function_name, args
 
 
@@ -200,17 +210,12 @@ class TestUpgrade(test_util.TensorFlowTestCase):
     collect = False
     traverse.traverse(tf.compat.v1, visitor)
 
-  def testKeywordArgNames(self):
-    if not hasattr(tf.compat, "v2"):
-      return
-
+  def testV1KeywordArgNames(self):
     all_keyword_renames = (
         tf_upgrade_v2.TFAPIChangeSpec().function_keyword_renames)
-    v2_name_exceptions = {"verify_shape_is_now_always_true"}
 
-    # Visitor that verifies V1 argument names, converts to V2 and checks
-    # V2 argument names.
-    def conversion_visitor(unused_path, unused_parent, children):
+    # Visitor that verifies V1 argument names.
+    def arg_test_visitor(unused_path, unused_parent, children):
       for child in children:
         _, attr = tf_decorator.unwrap(child[1])
         names_v1 = get_v1_names(attr)
@@ -230,25 +235,77 @@ class TestUpgrade(test_util.TensorFlowTestCase):
                 "%s not found in %s arguments: %s" %
                 (from_name, name, str(arg_names_v1)))
 
+    visitor = public_api.PublicAPIVisitor(arg_test_visitor)
+    visitor.do_not_descend_map["tf"].append("contrib")
+    visitor.private_map["tf.compat"] = ["v1", "v2"]
+    traverse.traverse(tf.compat.v1, visitor)
+
+  def testV2KeywordArgNames(self):
+    # This test converts a call of the form:
+    # tf.foo(arg1=0, arg2=1, ...)
+    # to 2.0. Then, checks that converted function has valid argument names.
+    if not hasattr(tf.compat, "v2"):
+      return
+    v2_arg_exceptions = {
+        "verify_shape_is_now_always_true",
+        # These arguments should not be used, they just specify
+        # that a function takes named arguments.
+        "keyword_required",
+        "_sentinel",
+    }
+    v1_name_exceptions = {
+        "tf.print",  # requires print_function import
+    }
+    function_warnings = (
+        tf_upgrade_v2.TFAPIChangeSpec().function_warnings)
+    function_handles = (
+        tf_upgrade_v2.TFAPIChangeSpec().function_handle)
+    keyword_renames = (
+        tf_upgrade_v2.TFAPIChangeSpec().function_keyword_renames)
+
+    # Visitor that converts to V2 and checks V2 argument names.
+    def conversion_visitor(unused_path, unused_parent, children):
+      for child in children:
+        _, attr = tf_decorator.unwrap(child[1])
+        if not tf_inspect.isfunction(attr):
+          continue
+        names_v1 = get_v1_names(attr)
+        arg_names_v1 = get_args(attr)
+
+        for name in names_v1:
+          tf_name = "tf.%s" % name
+          if tf_name in function_warnings or tf_name in function_handles:
+            continue  # These require manual change
+          if tf_name in v1_name_exceptions:
+            continue
           # Assert that arg names after converting to v2 are present in
           # v2 function.
           # 1. First, create an input of the form:
           #    tf.foo(arg1=val1, arg2=val2, ...)
           args = ",".join(
               ["%s=%d" % (from_name, from_index)
-               for from_index, from_name in enumerate(keyword_renames.keys())])
-          text_input = "%s(%s)" % (name, args)
+               for from_index, from_name in enumerate(arg_names_v1)])
+          text_input = "%s(%s)" % (tf_name, args)
           # 2. Convert the input to V2.
           _, _, _, text = self._upgrade(text_input)
           new_function_name, new_args = get_func_and_args_from_str(text)
+          if new_function_name == "tf.compat.v1.%s" % name:
+            if tf_name in keyword_renames:
+              # If we rename arguments, new function must be available in 2.0.
+              # We should not be using compat.v1 in this case.
+              self.assertFalse(
+                  "Function '%s' is not in 2.0 when converting\n%s\nto\n%s" %
+                  (new_function_name, text_input, text))
+            continue
           # 3. Verify V2 function and arguments.
-          # Note: If we rename arguments, new function must be available in 2.0.
-          # We should not be using compat.v1 in this case.
-          self.assertIn(new_function_name, self.v2_symbols)
-          args_v2 = tf_inspect.getargspec(self.v2_symbols[new_function_name])[0]
-          args_v2.extend(v2_name_exceptions)
+          args_v2 = get_args(self.v2_symbols[new_function_name])
+          args_v2.extend(v2_arg_exceptions)
           for new_arg in new_args:
-            self.assertIn(new_arg, args_v2)
+            self.assertIn(
+                new_arg, args_v2,
+                "Invalid argument '%s' in 2.0 when converting\n%s\nto\n%s.\n"
+                "Supported arguments: %s" % (
+                    new_arg, text_input, text, str(args_v2)))
 
     visitor = public_api.PublicAPIVisitor(conversion_visitor)
     visitor.do_not_descend_map["tf"].append("contrib")
@@ -526,9 +583,24 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     text = "tf.nn.softmax_cross_entropy_with_logits_v2(labels, logits, dim=2)"
     expected_text = (
         "tf.nn.softmax_cross_entropy_with_logits(labels, logits, axis=2)")
-    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    _, unused_report, errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
+    self.assertFalse(errors)
+
+  def testSoftMaxCrossEntropyWithLogits(self):
+    text = "tf.nn.softmax_cross_entropy_with_logits(labels, logits, dim=2)"
+    expected_text = (
+        "tf.nn.softmax_cross_entropy_with_logits(labels, logits, dim=2)")
+    _, report, errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+    self.assertIn(
+        "tf.nn.softmax_cross_entropy_with_logits requires manual check.",
+        errors[0])
+    self.assertIn(
+        "tf.nn.softmax_cross_entropy_with_logits behavior has changed. ",
+        report)
+
   def testSparseMatmul(self):
     text = ("tf.sparse_matmul(a, b, c, d, e, f, g)\n")
     expected_text = ("tf.linalg.matmul(a=a, b=b, transpose_a=c, transpose_b=d, "
@@ -540,7 +612,7 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     text = "tf.nn.weighted_moments(x, axes, freq, name, kd)"
     expected_text = (
         "tf.nn.weighted_moments(x=x, axes=axes, frequency_weights=freq, "
-        "name=name, keep_dims=kd)")
+        "name=name, keepdims=kd)")
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
@@ -554,7 +626,7 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     text = "tf.sparse.concat(ax, inp, name, exp, concat)"
     expected_text = (
         "tf.sparse.concat(axis=ax, sp_inputs=inp, name=name, "
-        "expand_nonconcat_dim=exp, axis=concat)")
+        "expand_nonconcat_dims=exp, axis=concat)")
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
@@ -576,7 +648,7 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     text = "tf.nn.space_to_batch(input, paddings, block_size, name)"
     expected_text = (
         "tf.space_to_batch(input=input, paddings=paddings, "
-        "block_size=block_size, name=name)")
+        "block_shape=block_size, name=name)")
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
@@ -628,6 +700,43 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
+  def testPrint(self):
+    # tf.print() cannot be parsed unless we import print_function
+    text = """from __future__ import print_function
+tf.print()
+tf.print('abc')
+"""
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, text)  # Text should stay the same
+
+  def testSparseSplit(self):
+    text = (
+        "tf.sparse_split(sp_input=sp_input, num_split=num_split, axis=axis, "
+        "name=name)")
+    expected_text = (
+        "tf.sparse.split(sp_input=sp_input, num_split=num_split, axis=axis, "
+        "name=name)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+    text = (
+        "tf.sparse_split(sp_input=sp_input, num_split=num_split, "
+        "name=name, split_dim=axis)")
+    expected_text = (
+        "tf.sparse.split(sp_input=sp_input, num_split=num_split, "
+        "name=name, axis=axis)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+    text = (
+        "tf.sparse.split(sp_input=sp_input, num_split=num_split, "
+        "name=name, split_dim=axis)")
+    expected_text = (
+        "tf.sparse.split(sp_input=sp_input, num_split=num_split, "
+        "name=name, axis=axis)")
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
 
 class TestUpgradeFiles(test_util.TensorFlowTestCase):
 
-- 
GitLab


From 623cc7e829568118683070d761c21823c7038338 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 7 Dec 2018 16:54:42 -0800
Subject: [PATCH 248/873] [tf.data] Enable `map_and_batch_fusion` optimization

PiperOrigin-RevId: 224599990
---
 .../benchmarks/map_and_batch_benchmark.py     | 43 ++++++++++---------
 .../benchmarks/map_vectorization_benchmark.py | 21 ++++++---
 .../optimization/map_and_batch_fusion_test.py |  5 ---
 .../optimization/map_vectorization_test.py    | 19 +++++---
 .../optimization/optimize_dataset_test.py     |  5 ++-
 .../dataset_serialization_test_base.py        | 33 ++++++++++----
 .../experimental/ops/optimization_options.py  |  2 +-
 7 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py b/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py
index fbd06a5a78..b17f2bcd12 100644
--- a/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/map_and_batch_benchmark.py
@@ -26,6 +26,7 @@ import numpy as np
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import batching
+from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -139,47 +140,49 @@ class MapAndBatchBenchmark(test.Benchmark):
 
         num_iters = 1024 // (
             (element_size * batch_size) // min(num_calls, inter_op))
-        dataset = make_base_dataset(element_size)
-        chained_dataset = dataset.map(
+        fused_dataset = make_base_dataset(element_size)
+        fused_dataset = fused_dataset.map(
             math_ops.matmul,
             num_parallel_calls=num_calls).batch(batch_size=batch_size)
-        chained_iterator = dataset_ops.make_one_shot_iterator(chained_dataset)
-        chained_get_next = chained_iterator.get_next()
 
-        chained_deltas = []
+        fused_iterator = dataset_ops.make_one_shot_iterator(fused_dataset)
+        fused_get_next = fused_iterator.get_next()
+
+        fused_deltas = []
         with session.Session(
             config=config_pb2.ConfigProto(
                 inter_op_parallelism_threads=inter_op,
                 use_per_session_threads=True)) as sess:
+
           for _ in range(5):
-            sess.run(chained_get_next.op)
+            sess.run(fused_get_next.op)
           for _ in range(num_iters):
             start = time.time()
-            sess.run(chained_get_next.op)
+            sess.run(fused_get_next.op)
             end = time.time()
-            chained_deltas.append(end - start)
+            fused_deltas.append(end - start)
 
-        fused_dataset = dataset.apply(
-            batching.map_and_batch(
-                math_ops.matmul,
-                num_parallel_calls=num_calls,
-                batch_size=batch_size))
-        fused_iterator = dataset_ops.make_one_shot_iterator(fused_dataset)
-        fused_get_next = fused_iterator.get_next()
+        # `map_and_batch_fusion` is optimized by default. To get the chained
+        # dataset, with have to disable it.
+        options = dataset_ops.Options()
+        options.experimental_optimization = OptimizationOptions()
+        options.experimental_optimization.map_and_batch_fusion = False
+        chained_dataset = fused_dataset.with_options(options)
+        chained_iterator = dataset_ops.make_one_shot_iterator(chained_dataset)
+        chained_get_next = chained_iterator.get_next()
 
-        fused_deltas = []
+        chained_deltas = []
         with session.Session(
             config=config_pb2.ConfigProto(
                 inter_op_parallelism_threads=inter_op,
                 use_per_session_threads=True)) as sess:
-
           for _ in range(5):
-            sess.run(fused_get_next.op)
+            sess.run(chained_get_next.op)
           for _ in range(num_iters):
             start = time.time()
-            sess.run(fused_get_next.op)
+            sess.run(chained_get_next.op)
             end = time.time()
-            fused_deltas.append(end - start)
+            chained_deltas.append(end - start)
 
         print(
             "batch size: %d, num parallel calls: %d, inter-op parallelism: %d, "
diff --git a/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py b/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py
index 47ec6391f7..a60ba0a857 100644
--- a/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/map_vectorization_benchmark.py
@@ -24,6 +24,7 @@ import numpy as np
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.client import session
+from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import constant_op
@@ -115,13 +116,23 @@ class MapVectorizationBenchmark(test.Benchmark):
   def _compare(self, input_dataset, map_fn, batch_size, input_size, str_id):
     num_elems = int(np.sum([np.prod(x) for x in input_size]))
     name_template = "{}__batch_size_{}_input_element_size_{}_{}"
-    unoptimized = input_dataset.map(map_fn).batch(batch_size)
-    unoptimized_op = dataset_ops.make_one_shot_iterator(unoptimized).get_next()
 
-    optimized = input_dataset.map(map_fn).batch(batch_size)
+    base_dataset = input_dataset.map(map_fn).batch(batch_size)
+
     options = dataset_ops.Options()
-    options.experimental_map_vectorization = True
-    optimized = optimized.with_options(options)
+    opt_options = optimization_options.OptimizationOptions()
+    # Disable default map_and_batch_fusion optimization
+    opt_options.map_and_batch_fusion = False
+    options.experimental_optimization = opt_options
+    base_dataset = base_dataset.with_options(options)
+
+    unoptimized_op = dataset_ops.make_one_shot_iterator(base_dataset).get_next()
+
+    optimized_options = dataset_ops.Options()
+    opt_options = optimization_options.OptimizationOptions()
+    opt_options.map_vectorization = True
+    optimized_options.experimental_optimization = opt_options
+    optimized = base_dataset.with_options(optimized_options)
     optimized_op = dataset_ops.make_one_shot_iterator(optimized).get_next()
 
     unoptimized_time = self._run(
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_batch_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_batch_fusion_test.py
index 801f664f09..e2ff3116ec 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_batch_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_batch_fusion_test.py
@@ -18,7 +18,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import test_util
@@ -32,10 +31,6 @@ class MapAndBatchFusionTest(test_base.DatasetTestBase):
     dataset = dataset_ops.Dataset.range(10).apply(
         optimization.assert_next(
             ["MapAndBatch"])).map(lambda x: x * x).batch(10)
-    options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
-    options.experimental_optimization.map_and_batch_fusion = True
-    dataset = dataset.with_options(options)
     self.assertDatasetProduces(
         dataset, expected_output=[[x * x for x in range(10)]])
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index c2e08e2cd8..adc411bfb5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -23,7 +23,7 @@ import numpy as np
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
+from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -344,18 +344,25 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
       Tuple of (unoptimized dataset, optimized dataset).
     """
     map_node_name = "Map" if num_parallel_calls is None else "ParallelMap"
-    batch_size = 100
 
     def _make_dataset(node_names):
-      return base_dataset.apply(optimization.assert_next(node_names)).map(
-          map_fn, num_parallel_calls=num_parallel_calls).batch(batch_size)
+      dataset = base_dataset.apply(optimization.assert_next(node_names))
+      dataset = dataset.map(map_fn, num_parallel_calls)
+      dataset = dataset.batch(100)
+      options = dataset_ops.Options()
+      opt_options = optimization_options.OptimizationOptions()
+      opt_options.map_and_batch_fusion = False
+      options.experimental_optimization = opt_options
+      dataset = dataset.with_options(options)
+      return dataset
 
     unoptimized = _make_dataset([map_node_name, "Batch"])
     optimized = _make_dataset(["Batch", map_node_name]
                               if expect_optimized else [map_node_name, "Batch"])
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
-    options.experimental_optimization.map_vectorization = True
+    opt_options = optimization_options.OptimizationOptions()
+    opt_options.map_vectorization = True
+    options.experimental_optimization = opt_options
     optimized = optimized.with_options(options)
     return unoptimized, optimized
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
index 150cc7b5e4..05d88eb071 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
@@ -256,8 +256,9 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testOptimizationEnabledByDefault(self):
     """Tests that some optimizations are applied to datasets by default."""
     options = dataset_ops.Options()
-    expected_optimizations = ["noop_elimination"]
-    self.assertEqual(options._static_optimizations(), expected_optimizations)
+    expected_optimizations = ["noop_elimination", "map_and_batch_fusion"]
+    self.assertEqual(
+        set(options._static_optimizations()), set(expected_optimizations))
 
   def testOptimizationDisableDefault(self):
     """Tests that we can disable all static optimizations enabled by default.
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
index e65aa44d06..bdbd8702b7 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/dataset_serialization_test_base.py
@@ -23,6 +23,7 @@ import os
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import iterator_ops as contrib_iterator_ops
+from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import dtypes
@@ -74,23 +75,39 @@ class DatasetSerializationTestBase(test.TestCase):
     Raises:
       AssertionError if any test fails.
     """
+    # NOTE: We disable all default optimizations in serialization tests in order
+    # to test the actual dataset in question.
+    options = dataset_ops.Options()
+    options.experimental_optimization = OptimizationOptions()
+    options.experimental_optimization.apply_default_optimizations = False
+
+    def ds_fn1_no_opt():
+      return ds_fn1().with_options(options)
+
     self.verify_unused_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+        ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors)
     self.verify_fully_used_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+        ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors)
     self.verify_exhausted_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+        ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors)
     self.verify_init_before_restore(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+        ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors)
     self.verify_multiple_breaks(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+        ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors)
     self.verify_reset_restored_iterator(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+        ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors)
     self.verify_restore_in_empty_graph(
-        ds_fn1, num_outputs, sparse_tensors=sparse_tensors)
+        ds_fn1_no_opt, num_outputs, sparse_tensors=sparse_tensors)
     if ds_fn2:
+
+      def ds_fn2_no_opt():
+        return ds_fn2().with_options(options)
+
       self.verify_restore_in_modified_graph(
-          ds_fn1, ds_fn2, num_outputs, sparse_tensors=sparse_tensors)
+          ds_fn1_no_opt,
+          ds_fn2_no_opt,
+          num_outputs,
+          sparse_tensors=sparse_tensors)
 
   def verify_unused_iterator(self,
                              ds_fn,
diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py
index daf65cd5cd..73849435a9 100644
--- a/tensorflow/python/data/experimental/ops/optimization_options.py
+++ b/tensorflow/python/data/experimental/ops/optimization_options.py
@@ -94,7 +94,6 @@ class OptimizationOptions(options.OptionsBase):
     optimizations_to_enable = [
         "filter_fusion",
         "hoist_random_uniform",
-        "map_and_batch_fusion",
         "map_and_filter_fusion",
         "map_fusion",
         "map_parallelization",
@@ -109,6 +108,7 @@ class OptimizationOptions(options.OptionsBase):
       # The following optimizations are turned on by default, unless the
       # user explicitly disables them.
       optimizations_to_disable = [
+          "map_and_batch_fusion",
           "noop_elimination",
       ]
       for optimization in optimizations_to_disable:
-- 
GitLab


From 034565a9a1683201040de985a0d251615921219b Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Fri, 7 Dec 2018 17:01:20 -0800
Subject: [PATCH 249/873] Update TFLite iOS Camera example to use GPU Delegate.

Note: GPU Delegate library isn't released yet. We're
working on it.
PiperOrigin-RevId: 224600723
---
 .../ios/camera/CameraExampleViewController.h  | 26 ++++++++---
 .../ios/camera/CameraExampleViewController.mm | 36 ++++++++++++++-
 tensorflow/lite/examples/ios/camera/Podfile   |  8 +++-
 .../project.pbxproj                           | 44 +++----------------
 .../lite/examples/ios/download_models.sh      |  4 +-
 5 files changed, 70 insertions(+), 48 deletions(-)

diff --git a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.h b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.h
index fb5800e86d..438e6adc79 100644
--- a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.h
+++ b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.h
@@ -17,8 +17,26 @@
 
 #include <vector>
 
-#include "tensorflow/contrib/lite/kernels/register.h"
-#include "tensorflow/contrib/lite/model.h"
+// TensorFlow Lite was migrated out of `contrib/` directory. The change
+// wasn't reflected in newest CocoaPod release yet (1.12.0).
+// Change this to 0 when using a TFLite version which is newer than 1.12.0.
+// TODO(ycling): Remove the macro when we release the next version.
+#ifndef TFLITE_USE_CONTRIB_LITE
+#define TFLITE_USE_CONTRIB_LITE 1
+#endif
+
+// Set TFLITE_USE_GPU_DELEGATE to 1 to use TFLite GPU Delegate.
+// Note: TFLite GPU Delegate binary isn't releast yet, and we're working
+// on it.
+#ifndef TFLITE_USE_GPU_DELEGATE
+#define TFLITE_USE_GPU_DELEGATE 0
+#endif
+
+#if TFLITE_USE_GPU_DELEGATE && TFLITE_USE_CONTRIB_LITE
+// Sanity check.
+#error "GPU Delegate only works with newer TFLite " \
+    "after migrating out of contrib"
+#endif
 
 @interface CameraExampleViewController
     : UIViewController<UIGestureRecognizerDelegate, AVCaptureVideoDataOutputSampleBufferDelegate> {
@@ -33,10 +51,6 @@
   AVCaptureSession* session;
 
   std::vector<std::string> labels;
-  std::unique_ptr<tflite::FlatBufferModel> model;
-  tflite::ops::builtin::BuiltinOpResolver resolver;
-  std::unique_ptr<tflite::Interpreter> interpreter;
-
   double total_latency;
   int total_count;
 }
diff --git a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
index a3e6e11095..48cd313c9d 100644
--- a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
+++ b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
@@ -23,10 +23,20 @@
 #include <iostream>
 #include <queue>
 
+#if TFLITE_USE_CONTRIB_LITE
 #include "tensorflow/contrib/lite/kernels/register.h"
 #include "tensorflow/contrib/lite/model.h"
 #include "tensorflow/contrib/lite/op_resolver.h"
 #include "tensorflow/contrib/lite/string_util.h"
+#else
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/op_resolver.h"
+#include "tensorflow/lite/string_util.h"
+#if TFLITE_USE_GPU_DELEGATE
+#include "tensorflow/lite/delegates/gpu/metal_delegate.h"
+#endif
+#endif
 
 #define LOG(x) std::cerr
 
@@ -34,7 +44,12 @@ namespace {
 
 // If you have your own model, modify this to the file name, and make sure
 // you've added the file to your app resources too.
+#if TFLITE_USE_GPU_DELEGATE
+// GPU Delegate only supports float model now.
 NSString* model_file_name = @"mobilenet_v1_1.0_224";
+#else
+NSString* model_file_name = @"mobilenet_quant_v1_224.tflite";
+#endif
 NSString* model_file_type = @"tflite";
 // If you have your own model, point this to the labels file.
 NSString* labels_file_name = @"labels";
@@ -151,7 +166,12 @@ void ProcessInputWithQuantizedModel(
 - (void)teardownAVCapture;
 @end
 
-@implementation CameraExampleViewController
+@implementation CameraExampleViewController {
+  std::unique_ptr<tflite::FlatBufferModel> model;
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  std::unique_ptr<tflite::Interpreter> interpreter;
+  TfLiteDelegate* delegate;
+}
 
 - (void)setupAVCapture {
   NSError* error = nil;
@@ -363,6 +383,11 @@ void ProcessInputWithQuantizedModel(
 }
 
 - (void)dealloc {
+#if TFLITE_USE_GPU_DELEGATE
+  if (delegate) {
+    DeleteGpuDelegate(delegate);
+  }
+#endif
   [self teardownAVCapture];
 }
 
@@ -388,6 +413,15 @@ void ProcessInputWithQuantizedModel(
   LoadLabels(labels_file_name, labels_file_type, &labels);
 
   tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+
+#if TFLITE_USE_GPU_DELEGATE
+  GpuDelegateOptions options;
+  options.allow_precision_loss = true;
+  options.wait_type = GpuDelegateOptions::WaitType::kActive;
+  delegate = NewGpuDelegate(&options);
+  interpreter->ModifyGraphWithDelegate(delegate);
+#endif
+
   // Explicitly resize the input tensor.
   {
     int input = interpreter->inputs()[0];
diff --git a/tensorflow/lite/examples/ios/camera/Podfile b/tensorflow/lite/examples/ios/camera/Podfile
index 2ed5c8da1e..2e15cc63de 100644
--- a/tensorflow/lite/examples/ios/camera/Podfile
+++ b/tensorflow/lite/examples/ios/camera/Podfile
@@ -4,4 +4,10 @@ inhibit_all_warnings!
 project 'tflite_camera_example.xcodeproj'
 
 target 'tflite_camera_example'
-       pod 'TensorFlowLite', '1.12.0'
+  # Comment 'TensorFlowLite' pod and un-comment 'TensorFlowLiteGpuExperimental'
+  # to use TFLite GPU Delegate.
+  # Note: TFLite GPU Delegate binary isn't releast yet, and we're working
+  # on it.
+
+  pod 'TensorFlowLite', '1.12.0'
+  # pod 'TensorFlowLiteGpuExperimental', '0.0.1'
diff --git a/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj b/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
index 9522c41dea..9b5c2b32a8 100644
--- a/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
+++ b/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
@@ -15,6 +15,7 @@
 		1CDB2D4A1ED3A9CD007929E9 /* CameraExampleViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1CDB2D451ED3A9CD007929E9 /* CameraExampleViewController.mm */; };
 		54DC6C3C5F734F3A58069F0C /* libPods-tflite_camera_example.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BA8BF92C84895BFE59D8236 /* libPods-tflite_camera_example.a */; };
 		AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */ = {isa = PBXBuildFile; fileRef = AC1F82641FBA3CBD0052BA77 /* labels.txt */; };
+		AC31178921BB3FF900AFF1D2 /* mobilenet_quant_v1_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = AC31178821BB3FF900AFF1D2 /* mobilenet_quant_v1_224.tflite */; };
 		AC3BB41720114C400084552C /* mobilenet_v1_1.0_224.tflite in Resources */ = {isa = PBXBuildFile; fileRef = AC3BB41620114C400084552C /* mobilenet_v1_1.0_224.tflite */; };
 /* End PBXBuildFile section */
 
@@ -36,6 +37,7 @@
 		3BC5BE4BBD09374D3E98F082 /* Pods-tflite_camera_example.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.debug.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.debug.xcconfig"; sourceTree = "<group>"; };
 		55ED318E8D29C8AFEF03DF1E /* Pods-tflite_camera_example.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-tflite_camera_example.release.xcconfig"; path = "Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example.release.xcconfig"; sourceTree = "<group>"; };
 		AC1F82641FBA3CBD0052BA77 /* labels.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = labels.txt; sourceTree = "<group>"; };
+		AC31178821BB3FF900AFF1D2 /* mobilenet_quant_v1_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_quant_v1_224.tflite; sourceTree = "<group>"; };
 		AC3BB41620114C400084552C /* mobilenet_v1_1.0_224.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_v1_1.0_224.tflite; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
@@ -103,6 +105,7 @@
 		59A3CFF31CF4E68100C4259F /* data */ = {
 			isa = PBXGroup;
 			children = (
+				AC31178821BB3FF900AFF1D2 /* mobilenet_quant_v1_224.tflite */,
 				AC3BB41620114C400084552C /* mobilenet_v1_1.0_224.tflite */,
 				AC1F82641FBA3CBD0052BA77 /* labels.txt */,
 			);
@@ -120,8 +123,6 @@
 				1C564C091ED3A92E00087306 /* Sources */,
 				1C564C0A1ED3A92E00087306 /* Frameworks */,
 				1C564C0B1ED3A92E00087306 /* Resources */,
-				00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */,
-				5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */,
 			);
 			buildRules = (
 			);
@@ -175,42 +176,13 @@
 				AC3BB41720114C400084552C /* mobilenet_v1_1.0_224.tflite in Resources */,
 				1C99111C1ED3B0E600A6BFB9 /* MainStoryboard_iPhone.storyboard in Resources */,
 				AC1F82661FBA3CBD0052BA77 /* labels.txt in Resources */,
+				AC31178921BB3FF900AFF1D2 /* mobilenet_quant_v1_224.tflite in Resources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
 /* End PBXResourcesBuildPhase section */
 
 /* Begin PBXShellScriptBuildPhase section */
-		00E875C3B066535AE6B77101 /* [CP] Embed Pods Frameworks */ = {
-			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			inputPaths = (
-			);
-			name = "[CP] Embed Pods Frameworks";
-			outputPaths = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-			shellPath = /bin/sh;
-			shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-frameworks.sh\"\n";
-			showEnvVarsInLog = 0;
-		};
-		5C2D02120E3E5E09567AA946 /* [CP] Copy Pods Resources */ = {
-			isa = PBXShellScriptBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-			);
-			inputPaths = (
-			);
-			name = "[CP] Copy Pods Resources";
-			outputPaths = (
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-			shellPath = /bin/sh;
-			shellScript = "\"${SRCROOT}/Pods/Target Support Files/Pods-tflite_camera_example/Pods-tflite_camera_example-resources.sh\"\n";
-			showEnvVarsInLog = 0;
-		};
 		66DAEAAEE9EF6550C3A061E0 /* [CP] Check Pods Manifest.lock */ = {
 			isa = PBXShellScriptBuildPhase;
 			buildActionMask = 2147483647;
@@ -322,9 +294,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(inherited)",
-				);
+				HEADER_SEARCH_PATHS = "$(inherited)";
 				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
 				MTL_ENABLE_DEBUG_INFO = YES;
 				ONLY_ACTIVE_ARCH = YES;
@@ -365,9 +335,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					"$(inherited)",
-				);
+				HEADER_SEARCH_PATHS = "$(inherited)";
 				IPHONEOS_DEPLOYMENT_TARGET = 8.0;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				SDKROOT = iphoneos;
diff --git a/tensorflow/lite/examples/ios/download_models.sh b/tensorflow/lite/examples/ios/download_models.sh
index ad6ccd1b0a..4828617d95 100755
--- a/tensorflow/lite/examples/ios/download_models.sh
+++ b/tensorflow/lite/examples/ios/download_models.sh
@@ -53,6 +53,6 @@ download_and_extract "${QUANTIZED_MODELS_URL}" "${DOWNLOADS_DIR}/quantized_model
 file ${DOWNLOADS_DIR}/models
 
 cp ${DOWNLOADS_DIR}/models/models/* simple/data/
-cp "${DOWNLOADS_DIR}/quantized_models/labels.txt" camera/data/
+cp ${DOWNLOADS_DIR}/models/models/* camera/data/
 cp "${DOWNLOADS_DIR}/quantized_models/mobilenet_quant_v1_224.tflite" \
-   'camera/data/mobilenet_v1_1.0_224.tflite'
+   'camera/data/mobilenet_quant_v1_224.tflite'
-- 
GitLab


From 7b6a1018b9185542be1630b3267075012fe6e862 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 7 Dec 2018 17:01:29 -0800
Subject: [PATCH 250/873] [tf.data] Update `tf.data.Iterator` to use
 `Structure` more internally.

PiperOrigin-RevId: 224600744
---
 tensorflow/python/data/ops/iterator_ops.py | 45 ++++++++++------------
 1 file changed, 20 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index 6f9c494f39..d0e91b01f9 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -23,7 +23,6 @@ import warnings
 from tensorflow.python.compat import compat
 from tensorflow.python.data.ops import optional_ops
 from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
 from tensorflow.python.data.util import structure as structure_lib
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
@@ -188,34 +187,32 @@ class Iterator(checkpointable.CheckpointableBase):
     if output_classes is None:
       output_classes = nest.map_structure(lambda _: ops.Tensor, output_types)
     nest.assert_same_structure(output_types, output_shapes)
+    output_structure = structure_lib.convert_legacy_structure(
+        output_types, output_shapes, output_classes)
     if shared_name is None:
       shared_name = ""
+    # pylint: disable=protected-access
     if compat.forward_compatible(2018, 8, 3):
       if _device_stack_is_empty():
         with ops.device("/cpu:0"):
           iterator_resource = gen_dataset_ops.iterator_v2(
               container="",
               shared_name=shared_name,
-              output_types=nest.flatten(
-                  sparse.as_dense_types(output_types, output_classes)),
-              output_shapes=nest.flatten(
-                  sparse.as_dense_shapes(output_shapes, output_classes)))
+              output_types=output_structure._flat_types,
+              output_shapes=output_structure._flat_shapes)
       else:
         iterator_resource = gen_dataset_ops.iterator_v2(
             container="",
             shared_name=shared_name,
-            output_types=nest.flatten(
-                sparse.as_dense_types(output_types, output_classes)),
-            output_shapes=nest.flatten(
-                sparse.as_dense_shapes(output_shapes, output_classes)))
+            output_types=output_structure._flat_types,
+            output_shapes=output_structure._flat_shapes)
     else:
       iterator_resource = gen_dataset_ops.iterator(
           container="",
           shared_name=shared_name,
-          output_types=nest.flatten(
-              sparse.as_dense_types(output_types, output_classes)),
-          output_shapes=nest.flatten(
-              sparse.as_dense_shapes(output_shapes, output_classes)))
+          output_types=output_structure._flat_types,
+          output_shapes=output_structure._flat_shapes)
+    # pylint: enable=protected-access
     return Iterator(iterator_resource, None, output_types, output_shapes,
                     output_classes)
 
@@ -278,30 +275,28 @@ class Iterator(checkpointable.CheckpointableBase):
     if output_classes is None:
       output_classes = nest.map_structure(lambda _: ops.Tensor, output_types)
     nest.assert_same_structure(output_types, output_shapes)
+    output_structure = structure_lib.convert_legacy_structure(
+        output_types, output_shapes, output_classes)
     string_handle = ops.convert_to_tensor(string_handle, dtype=dtypes.string)
+    # pylint: disable=protected-access
     if compat.forward_compatible(2018, 8, 3):
       if _device_stack_is_empty():
         with ops.device("/cpu:0"):
           iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
               string_handle,
-              output_types=nest.flatten(
-                  sparse.as_dense_types(output_types, output_classes)),
-              output_shapes=nest.flatten(
-                  sparse.as_dense_shapes(output_shapes, output_classes)))
+              output_types=output_structure._flat_types,
+              output_shapes=output_structure._flat_shapes)
       else:
         iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2(
             string_handle,
-            output_types=nest.flatten(
-                sparse.as_dense_types(output_types, output_classes)),
-            output_shapes=nest.flatten(
-                sparse.as_dense_shapes(output_shapes, output_classes)))
+            output_types=output_structure._flat_types,
+            output_shapes=output_structure._flat_shapes)
     else:
       iterator_resource = gen_dataset_ops.iterator_from_string_handle(
           string_handle,
-          output_types=nest.flatten(
-              sparse.as_dense_types(output_types, output_classes)),
-          output_shapes=nest.flatten(
-              sparse.as_dense_shapes(output_shapes, output_classes)))
+          output_types=output_structure._flat_types,
+          output_shapes=output_structure._flat_shapes)
+    # pylint: enable=protected-access
     return Iterator(iterator_resource, None, output_types, output_shapes,
                     output_classes)
 
-- 
GitLab


From 602b6b86848c27439686f869590f432a54425bb3 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 7 Dec 2018 17:02:42 -0800
Subject: [PATCH 251/873] [tf.data] Update `Dataset.reduce()` to use (more of)
 `Structure`.

We still explode the `Structure` into the types/shapes/classes to perform the "most specific compatible structure" calculation. It would probably make sense to support that calculation as a method on `Structure` and its subclasses.

PiperOrigin-RevId: 224600910
---
 tensorflow/python/data/ops/dataset_ops.py | 49 +++++++++++------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index a7cd2f54c7..e08a3af852 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1195,27 +1195,23 @@ class DatasetV2(object):
 
     # Compute initial values for the state classes, shapes and types based on
     # the initial state.
-    state_classes = sparse.get_classes(initial_state)
-    state_shapes = nest.pack_sequence_as(
-        initial_state, [t.get_shape() for t in nest.flatten(initial_state)])
-    state_types = nest.pack_sequence_as(
-        initial_state, [t.dtype for t in nest.flatten(initial_state)])
+    state_structure = structure_lib.Structure.from_value(initial_state)
 
     # Iteratively rerun the reduce function until reaching a fixed point on
-    # `self._state_shapes`.
+    # `state_structure`.
     need_to_rerun = True
     while need_to_rerun:
 
       wrapped_func = StructuredFunctionWrapper(
           reduce_func,
           "reduce()",
-          input_classes=(state_classes, self.output_classes),
-          input_shapes=(state_shapes, self.output_shapes),
-          input_types=(state_types, self.output_types),
+          input_structure=structure_lib.NestedStructure(
+              (state_structure, self._element_structure)),
           add_to_graph=False)
 
       # Extract and validate class information from the returned values.
       output_classes = wrapped_func.output_classes
+      state_classes = state_structure._to_legacy_output_classes()  # pylint: disable=protected-access
       for new_state_class, state_class in zip(
           nest.flatten(output_classes), nest.flatten(state_classes)):
         if not issubclass(new_state_class, state_class):
@@ -1226,6 +1222,7 @@ class DatasetV2(object):
 
       # Extract and validate type information from the returned values.
       output_types = wrapped_func.output_types
+      state_types = state_structure._to_legacy_output_types()  # pylint: disable=protected-access
       for new_state_type, state_type in zip(
           nest.flatten(output_types), nest.flatten(state_types)):
         if new_state_type != state_type:
@@ -1236,6 +1233,7 @@ class DatasetV2(object):
 
       # Extract shape information from the returned values.
       output_shapes = wrapped_func.output_shapes
+      state_shapes = state_structure._to_legacy_output_shapes()  # pylint: disable=protected-access
       flat_state_shapes = nest.flatten(state_shapes)
       flat_new_state_shapes = nest.flatten(output_shapes)
       weakened_state_shapes = [
@@ -1253,27 +1251,26 @@ class DatasetV2(object):
           break
 
       if need_to_rerun:
-        state_shapes = nest.pack_sequence_as(state_shapes,
-                                             weakened_state_shapes)
+        # TODO(b/110122868): Support a "most specific compatible structure"
+        # method for combining structures, to avoid using legacy structures
+        # here.
+        state_structure = structure_lib.convert_legacy_structure(
+            state_types,
+            nest.pack_sequence_as(state_shapes, weakened_state_shapes),
+            state_classes)
 
     reduce_func = wrapped_func.function
     reduce_func.add_to_graph(ops.get_default_graph())
 
-    return sparse.deserialize_sparse_tensors(
-        nest.pack_sequence_as(
-            output_types,
-            gen_dataset_ops.reduce_dataset(
-                self._as_variant_tensor(),  # pylint: disable=protected-access
-                nest.flatten(sparse.serialize_sparse_tensors(initial_state)),
-                reduce_func.captured_inputs,
-                f=reduce_func,
-                output_shapes=nest.flatten(
-                    sparse.as_dense_shapes(output_shapes, output_classes)),
-                output_types=nest.flatten(
-                    sparse.as_dense_types(output_types, output_classes)))),
-        output_types,
-        output_shapes,
-        output_classes)
+    # pylint: disable=protected-access
+    return state_structure._from_compatible_tensor_list(
+        gen_dataset_ops.reduce_dataset(
+            self._as_variant_tensor(),
+            state_structure._to_tensor_list(initial_state),
+            reduce_func.captured_inputs,
+            f=reduce_func,
+            output_shapes=state_structure._flat_shapes,
+            output_types=state_structure._flat_types))
 
   def with_options(self, options):
     """Returns a new `tf.data.Dataset` with the given options set.
-- 
GitLab


From a9bfbc26263ca6af4ea9410daea093633a9b49fa Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Fri, 7 Dec 2018 17:16:07 -0800
Subject: [PATCH 252/873] Add an Android APK wrapper for the benchmark_model
 utility

This APK offers a more faithful representation of on-device performance
by executing the benchmark in the context of a foreground Activity.
See the README for more details.

PiperOrigin-RevId: 224602509
---
 tensorflow/lite/build_def.bzl                 |  4 +-
 tensorflow/lite/tools/benchmark/README.md     |  5 +
 .../benchmark/android/AndroidManifest.xml     | 42 +++++++++
 tensorflow/lite/tools/benchmark/android/BUILD | 44 +++++++++
 .../lite/tools/benchmark/android/README.md    | 65 +++++++++++++
 .../android/jni/benchmark_model_jni.cc        | 92 +++++++++++++++++++
 .../lite/benchmark/BenchmarkModel.java        | 32 +++++++
 .../benchmark/BenchmarkModelActivity.java     | 44 +++++++++
 8 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/lite/tools/benchmark/android/AndroidManifest.xml
 create mode 100644 tensorflow/lite/tools/benchmark/android/BUILD
 create mode 100644 tensorflow/lite/tools/benchmark/android/README.md
 create mode 100644 tensorflow/lite/tools/benchmark/android/jni/benchmark_model_jni.cc
 create mode 100644 tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModel.java
 create mode 100644 tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java

diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl
index c61ee63c7e..c17eddf47b 100644
--- a/tensorflow/lite/build_def.bzl
+++ b/tensorflow/lite/build_def.bzl
@@ -112,7 +112,8 @@ def tflite_jni_binary(
         linkshared = 1,
         linkstatic = 1,
         testonly = 0,
-        deps = []):
+        deps = [],
+        srcs = []):
     """Builds a jni binary for TFLite."""
     linkopts = linkopts + [
         "-Wl,--version-script",  # Export only jni functions & classes.
@@ -124,6 +125,7 @@ def tflite_jni_binary(
         linkshared = linkshared,
         linkstatic = linkstatic,
         deps = deps + [linkscript],
+        srcs = srcs,
         linkopts = linkopts,
         testonly = testonly,
     )
diff --git a/tensorflow/lite/tools/benchmark/README.md b/tensorflow/lite/tools/benchmark/README.md
index a71a2fa1c0..a4d9c879eb 100644
--- a/tensorflow/lite/tools/benchmark/README.md
+++ b/tensorflow/lite/tools/benchmark/README.md
@@ -11,6 +11,11 @@ The instructions below are for running the binary on Desktop and Android,
 for iOS please use the
 [iOS benchmark app](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark/ios).
 
+An experimental Android APK wrapper for the benchmark model utility offers more
+faithful execution behavior on Android (via a foreground Activity). It is
+located
+[here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark/android).
+
 ## Parameters
 
 The binary takes the following required parameters:
diff --git a/tensorflow/lite/tools/benchmark/android/AndroidManifest.xml b/tensorflow/lite/tools/benchmark/android/AndroidManifest.xml
new file mode 100644
index 0000000000..7cdca2885d
--- /dev/null
+++ b/tensorflow/lite/tools/benchmark/android/AndroidManifest.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="org.tensorflow.lite.benchmark">
+
+    <!-- Necessary for loading custom models from disk. -->
+    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE"/>
+
+    <!-- Target SDK 21 (<23) to avoid the need for requesting storage
+         permissions. This APK will almost always be used from the command-line
+         anyway, and be expicitly installed by the developer. -->
+    <uses-sdk
+        android:minSdkVersion="21"
+        android:targetSdkVersion="21" />
+
+    <application>
+        <!-- This Activity runs the TensorFlow Lite benchmark at creation, using
+             a provided set of arguments, then immediately terminates. -->
+        <activity android:name="org.tensorflow.lite.benchmark.BenchmarkModelActivity"
+                  android:screenOrientation="portrait"
+                  android:label="TFLite Benchmark"
+                  android:theme="@android:style/Theme.NoDisplay"
+                  android:exported="true"
+                  android:noHistory="true" />
+    </application>
+
+</manifest>
diff --git a/tensorflow/lite/tools/benchmark/android/BUILD b/tensorflow/lite/tools/benchmark/android/BUILD
new file mode 100644
index 0000000000..a291effddc
--- /dev/null
+++ b/tensorflow/lite/tools/benchmark/android/BUILD
@@ -0,0 +1,44 @@
+# Description:
+#   BenchmarkModel Android harness for TensorFlow Lite benchmarks.
+package(default_visibility = ["//visibility:private"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+load("//tensorflow/lite:build_def.bzl", "tflite_jni_binary")
+load("@build_bazel_rules_android//android:rules.bzl", "android_binary")
+
+# See README.md for details about building and executing this benchmark.
+android_binary(
+    name = "benchmark_model",
+    srcs = glob([
+        "src/**/*.java",
+    ]),
+    custom_package = "org.tensorflow.lite.benchmark",
+    manifest = "AndroidManifest.xml",
+    # In some platforms we don't have an Android SDK/NDK and this target
+    # can't be built. We need to prevent the build system from trying to
+    # use the target in that case.
+    tags = ["manual"],
+    deps = [":tensorflowlite_benchmark_native"],
+)
+
+tflite_jni_binary(
+    name = "libtensorflowlite_benchmark.so",
+    srcs = glob([
+        "jni/**/*.cc",
+        "jni/**/*.h",
+    ]),
+    deps = [
+        "//tensorflow/lite/java/jni",
+        "//tensorflow/lite/tools/benchmark:benchmark_tflite_model_lib",
+        "//tensorflow/lite/tools/benchmark:logging",
+    ],
+)
+
+cc_library(
+    name = "tensorflowlite_benchmark_native",
+    srcs = ["libtensorflowlite_benchmark.so"],
+    visibility = ["//visibility:private"],
+)
diff --git a/tensorflow/lite/tools/benchmark/android/README.md b/tensorflow/lite/tools/benchmark/android/README.md
new file mode 100644
index 0000000000..f5b67e3f79
--- /dev/null
+++ b/tensorflow/lite/tools/benchmark/android/README.md
@@ -0,0 +1,65 @@
+# TFLite Android Model Benchmark Tool
+
+## Description
+
+This Android benchmark app is a simple wrapper around the TensorFlow Lite
+[command-line benchmark utility](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark).
+
+Pushing and executing binaries directly on Android is a valid approach to
+benchmarking, but it can result in subtle (but observable) differences in
+performance relative to execution within an actual Android app. In particular,
+Android's scheduler tailors behavior based on thread and process priorities,
+which differ between a foreground Activity/Application and a regular background
+binary executed via `adb shell ...`. This tailored behavior is most evident when
+enabling multi-threaded CPU execution with TensorFlow Lite.
+
+To that end, this app offers perhaps a more faithful view of runtime performance
+that developers can expected when deploying TensorFlow Lite with their
+application.
+
+## To build/install/run
+
+(0) Refer to
+https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/android
+to edit the `WORKSPACE` to configure the android NDK/SDK.
+
+(1) Build for your specific platform, e.g.:
+
+```
+bazel build -c opt \
+  --config=android_arm64 \
+  --cxxopt='--std=c++11' \
+  tensorflow/lite/tools/benchmark/android:benchmark_model
+```
+
+(2) Connect your phone. Install the benchmark APK to your phone with adb:
+
+```
+adb install -r -d bazel-bin/tensorflow/lite/tools/benchmark/android/benchmark_model.apk
+```
+
+(3) Push the compute graph that you need to test.
+
+```
+adb push mobilenet_quant_v1_224.tflite /data/local/tmp
+```
+
+(4) Run the benchmark. Additional command-line flags are documented
+[here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/tools/benchmark/README.md)
+and can be appended to the `args` string alongside the required `--graph` flag
+(note that all args must be nested in the single quoted string that follows the
+args key).
+
+```
+adb shell am start -S -n
+  org.tensorflow.lite.benchmark/org.tensorflow.lite.benchmark.BenchmarkModelActivity \
+  --es args '"--graph=/data/local/tmp/mobilenet_quant_v1_224.tflite --num_threads=4"'
+```
+
+(5) The results will be available in Android's logcat, e.g.:
+
+```
+adb logcat | grep "Average inference"
+
+... tflite  : Average inference timings in us: Warmup: 91471, Init: 4108, Inference: 80660.1
+```
diff --git a/tensorflow/lite/tools/benchmark/android/jni/benchmark_model_jni.cc b/tensorflow/lite/tools/benchmark/android/jni/benchmark_model_jni.cc
new file mode 100644
index 0000000000..ee67bdafb0
--- /dev/null
+++ b/tensorflow/lite/tools/benchmark/android/jni/benchmark_model_jni.cc
@@ -0,0 +1,92 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <jni.h>
+
+#include <sstream>
+#include <string>
+
+#include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
+#include "tensorflow/lite/tools/benchmark/logging.h"
+
+#ifdef __ANDROID__
+#include <android/log.h>
+#endif
+
+namespace tflite {
+namespace benchmark {
+namespace {
+
+class AndroidBenchmarkLoggingListener : public BenchmarkListener {
+  void OnBenchmarkEnd(const BenchmarkResults& results) override {
+    auto inference_us = results.inference_time_us();
+    auto init_us = results.startup_latency_us();
+    auto warmup_us = results.warmup_time_us();
+    std::stringstream results_output;
+    results_output << "Average inference timings in us: "
+                   << "Warmup: " << warmup_us.avg() << ", "
+                   << "Init: " << init_us << ", "
+                   << "Inference: " << inference_us.avg();
+#ifdef __ANDROID__
+    __android_log_print(ANDROID_LOG_ERROR, "tflite", "%s",
+                        results_output.str().c_str());
+#else
+    fprintf(stderr, "%s", results_output.str().c_str());
+#endif
+  }
+};
+
+void Run(int argc, char** argv) {
+  BenchmarkTfLiteModel benchmark;
+  AndroidBenchmarkLoggingListener listener;
+  benchmark.AddListener(&listener);
+  benchmark.Run(argc, argv);
+}
+
+}  // namespace
+}  // namespace benchmark
+}  // namespace tflite
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+JNIEXPORT void JNICALL
+Java_org_tensorflow_lite_benchmark_BenchmarkModel_nativeRun(JNIEnv* env,
+                                                            jclass clazz,
+                                                            jstring args_obj) {
+  const char* args_chars = env->GetStringUTFChars(args_obj, nullptr);
+
+  // Split the args string into individual arg tokens.
+  std::istringstream iss(args_chars);
+  std::vector<std::string> args_split{std::istream_iterator<std::string>(iss),
+                                      {}};
+
+  // Construct a fake argv command-line object for the benchmark.
+  std::vector<char*> argv;
+  std::string arg0 = "(BenchmarkModelAndroid)";
+  argv.push_back(const_cast<char*>(arg0.data()));
+  for (auto& arg : args_split) {
+    argv.push_back(const_cast<char*>(arg.data()));
+  }
+
+  tflite::benchmark::Run(static_cast<int>(argv.size()), argv.data());
+
+  env->ReleaseStringUTFChars(args_obj, args_chars);
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif  // __cplusplus
diff --git a/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModel.java b/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModel.java
new file mode 100644
index 0000000000..a6cf8d78d5
--- /dev/null
+++ b/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModel.java
@@ -0,0 +1,32 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite.benchmark;
+
+/** Helper class for running a native TensorFlow Lite benchmark. */
+class BenchmarkModel {
+  static {
+    System.loadLibrary("tensorflowlite_benchmark");
+  }
+
+  // Executes a standard TensorFlow Lite benchmark according to the provided args.
+  //
+  // Note that {@code args} will be split by the native execution code.
+  public static void run(String args) {
+    nativeRun(args);
+  }
+
+  private static native void nativeRun(String args);
+}
diff --git a/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java b/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java
new file mode 100644
index 0000000000..12410adf3d
--- /dev/null
+++ b/tensorflow/lite/tools/benchmark/android/src/org/tensorflow/lite/benchmark/BenchmarkModelActivity.java
@@ -0,0 +1,44 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite.benchmark;
+
+import android.app.Activity;
+import android.content.Intent;
+import android.os.Bundle;
+import android.util.Log;
+
+/** Main {@code Activity} class for the benchmark app. */
+public class BenchmarkModelActivity extends Activity {
+
+  private static final String TAG = "tflite_BenchmarkModelActivity";
+
+  private static final String ARGS_INTENT_KEY_0 = "args";
+  private static final String ARGS_INTENT_KEY_1 = "--args";
+
+  @Override
+  public void onCreate(Bundle savedInstanceState) {
+    super.onCreate(savedInstanceState);
+
+    Intent intent = getIntent();
+    Bundle bundle = intent.getExtras();
+    String args = bundle.getString(ARGS_INTENT_KEY_0, bundle.getString(ARGS_INTENT_KEY_1));
+    Log.i(TAG, "Running TensorFlow Lite benchmark with args: " + args);
+
+    BenchmarkModel.run(args);
+
+    finish();
+  }
+}
-- 
GitLab


From bcd4d48cee1ec070f00a401107a85ae416c57abc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 17:17:17 -0800
Subject: [PATCH 253/873] Automated rollback of commit
 c05e0bd84fb8a20f746addde640ac0bc35f5026d. Revert #24081.

PiperOrigin-RevId: 224602628
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 459 +++++---------------
 tensorflow/stream_executor/cuda/cuda_dnn.h  | 194 +--------
 tensorflow/stream_executor/dnn.h            | 207 ++-------
 tensorflow/stream_executor/stream.cc        | 172 ++------
 4 files changed, 179 insertions(+), 853 deletions(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 387afefc21..1f2e2f48bb 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -2380,7 +2380,7 @@ bool ShouldIncludeWinogradNonfusedAlgo(
 }  // namespace
 
 template <class T>
-port::Status CudnnSupport::PrepareForConvolutionImpl(
+port::Status CudnnSupport::DoConvolveImpl(
     Stream* stream, const dnn::BatchDescriptor& input_descriptor,
     const DeviceMemory<T>& input_data,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -2389,34 +2389,6 @@ port::Status CudnnSupport::PrepareForConvolutionImpl(
     const dnn::BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
     dnn::DataType accumulator_type, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
-  CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type);
-  CudnnTensorDescriptor output_nd(output_descriptor, cudnn_type);
-  CudnnFilterDescriptor filter(filter_descriptor, cudnn_type);
-  CudnnConvolutionDescriptor conv(convolution_descriptor,
-                                  ToCudnnDataType(accumulator_type));
-
-  auto cudnn = cudnn_->GetHandle(parent_, stream);
-
-  SE_ASSIGN_OR_RETURN(*algorithm_desc,
-                      GetCudnnConvolutionForwardAlgorithm(
-                          stream, cudnn, algorithm_config, input_nd, filter,
-                          conv, output_nd, scratch_allocator, scratch_memory));
-
-  return port::Status::OK();
-}
-
-template <class T>
-port::Status CudnnSupport::DoConvolveImpl(
-    Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-    const DeviceMemory<T>& input_data,
-    const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<T>& filter_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& output_descriptor, DeviceMemory<T>* output_data,
-    dnn::DataType accumulator_type, const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
     dnn::ProfileResult* output_profile_result) {
   cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
   CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type);
@@ -2439,6 +2411,12 @@ port::Status CudnnSupport::DoConvolveImpl(
 
   const bool is_profiling = output_profile_result != nullptr;
 
+  DeviceMemory<uint8> scratch;
+  SE_ASSIGN_OR_RETURN(dnn::AlgorithmDesc algo_desc,
+                      GetCudnnConvolutionForwardAlgorithm(
+                          stream, cudnn, algorithm_config, input_nd, filter,
+                          conv, output_nd, scratch_allocator, &scratch));
+
   std::unique_ptr<CUDATimer, TimerDeleter> timer;
   if (is_profiling) {
     timer.reset(new CUDATimer(parent_));  // NOLINT
@@ -2454,7 +2432,7 @@ port::Status CudnnSupport::DoConvolveImpl(
   // memory. See nvbugs/2138754, b/80018418.
   if (CUDNN_VERSION < 7300) {
     SE_RETURN_IF_ERROR([&] {
-      if (algorithm_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) {
         return port::Status::OK();
       }
       if (input_descriptor.ndims() < 3) {
@@ -2479,8 +2457,7 @@ port::Status CudnnSupport::DoConvolveImpl(
     }());
   }
 
-  if (algorithm_desc.algo_id() ==
-          CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+  if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
     return port::Status(port::error::FAILED_PRECONDITION,
                         "This configuration has potential integer overflow in "
@@ -2492,19 +2469,18 @@ port::Status CudnnSupport::DoConvolveImpl(
       /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(),
       /*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(),
       /*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(),
-      /*algo=*/ToConvForwardAlgo(algorithm_desc),
-      /*workSpace=*/scratch_memory->opaque(),
-      /*workSpaceSizeInBytes=*/scratch_memory->size(), /*beta=*/beta,
+      /*algo=*/ToConvForwardAlgo(algo_desc), /*workSpace=*/scratch.opaque(),
+      /*workSpaceSizeInBytes=*/scratch.size(), /*beta=*/beta,
       /*yDesc=*/output_nd.handle(), /*y=*/output_data->opaque()));
 
   if (is_profiling) {
     if (!timer->Stop(AsCUDAStream(stream))) {
       return port::Status(port::error::INTERNAL, "Failed to stop timer");
     }
-    output_profile_result->set_algorithm(algorithm_desc);
+    output_profile_result->set_algorithm(algo_desc);
     output_profile_result->set_elapsed_time_in_ms(
         timer->GetElapsedMilliseconds());
-    output_profile_result->set_scratch_size(scratch_memory->size());
+    output_profile_result->set_scratch_size(scratch.size());
   }
 
   return port::Status::OK();
@@ -2901,7 +2877,7 @@ port::Status CudnnSupport::DoBatchNormalizationBackwardImpl(
   return port::Status::OK();
 }
 
-bool CudnnSupport::PrepareForConvolution(
+bool CudnnSupport::DoConvolve(
     Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
     const DeviceMemory<float>& input_data,
     const dnn::FilterDescriptor& filter_descriptor,
@@ -2910,70 +2886,12 @@ bool CudnnSupport::PrepareForConvolution(
     const dnn::BatchDescriptor& output_descriptor,
     DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  return IsStatusOk(PrepareForConvolutionImpl<float>(
-                        stream, batch_descriptor, input_data, filter_descriptor,
-                        filter_data, convolution_descriptor, output_descriptor,
-                        output_data, dnn::DataType::kFloat, scratch_allocator,
-                        algorithm_config, algorithm_desc, scratch_memory),
-                    /*report_error=*/true);
-}
-
-bool CudnnSupport::PrepareForConvolution(
-    Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
-    const DeviceMemory<double>& input_data,
-    const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<double>& filter_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
-    const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  return IsStatusOk(PrepareForConvolutionImpl<double>(
-                        stream, batch_descriptor, input_data, filter_descriptor,
-                        filter_data, convolution_descriptor, output_descriptor,
-                        output_data, dnn::DataType::kDouble, scratch_allocator,
-                        algorithm_config, algorithm_desc, scratch_memory),
-                    /*report_error=*/true);
-}
-
-bool CudnnSupport::PrepareForConvolution(
-    Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
-    const DeviceMemory<Eigen::half>& input_data,
-    const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<Eigen::half>& filter_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
-    const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  dnn::DataType acc_type =
-      CudnnEnvVar<ConvDoFP32ComputationFP16Input>::IsEnabled()
-          ? dnn::DataType::kFloat
-          : dnn::DataType::kHalf;
-  return IsStatusOk(
-      PrepareForConvolutionImpl<Eigen::half>(
-          stream, batch_descriptor, input_data, filter_descriptor, filter_data,
-          convolution_descriptor, output_descriptor, output_data, acc_type,
-          scratch_allocator, algorithm_config, algorithm_desc, scratch_memory),
-      /*report_error=*/true);
-}
-
-bool CudnnSupport::DoConvolve(
-    Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
-    const DeviceMemory<float>& input_data,
-    const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<float>& filter_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<float>* output_data, const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
     dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
       DoConvolveImpl(stream, batch_descriptor, input_data, filter_descriptor,
                      filter_data, convolution_descriptor, output_descriptor,
-                     output_data, dnn::DataType::kFloat, algorithm_desc,
-                     scratch_memory, output_profile_result),
+                     output_data, dnn::DataType::kFloat, scratch_allocator,
+                     algorithm_config, output_profile_result),
       /*report_error=*/!output_profile_result);
 }
 
@@ -2984,14 +2902,14 @@ bool CudnnSupport::DoConvolve(
     const DeviceMemory<double>& filter_data,
     const dnn::ConvolutionDescriptor& convolution_descriptor,
     const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<double>* output_data, const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
+    DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
+    const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
       DoConvolveImpl(stream, batch_descriptor, input_data, filter_descriptor,
                      filter_data, convolution_descriptor, output_descriptor,
-                     output_data, dnn::DataType::kDouble, algorithm_desc,
-                     scratch_memory, output_profile_result),
+                     output_data, dnn::DataType::kDouble, scratch_allocator,
+                     algorithm_config, output_profile_result),
       /*report_error=*/!output_profile_result);
 }
 
@@ -3002,9 +2920,8 @@ bool CudnnSupport::DoConvolve(
     const DeviceMemory<Eigen::half>& filter_data,
     const dnn::ConvolutionDescriptor& convolution_descriptor,
     const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<Eigen::half>* output_data,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
+    DeviceMemory<Eigen::half>* output_data, ScratchAllocator* scratch_allocator,
+    const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   dnn::DataType acc_type =
       CudnnEnvVar<ConvDoFP32ComputationFP16Input>::IsEnabled()
@@ -3013,7 +2930,7 @@ bool CudnnSupport::DoConvolve(
   return IsStatusOk(
       DoConvolveImpl(stream, batch_descriptor, input_data, filter_descriptor,
                      filter_data, convolution_descriptor, output_descriptor,
-                     output_data, acc_type, algorithm_desc, scratch_memory,
+                     output_data, acc_type, scratch_allocator, algorithm_config,
                      output_profile_result),
       /*report_error=*/!output_profile_result);
 }
@@ -3149,7 +3066,7 @@ bool CudnnSupport::DoTransformTensor(Stream* stream,
 }
 
 template <class T>
-port::Status CudnnSupport::PrepareForConvolutionBackwardDataImpl(
+port::Status CudnnSupport::DoConvolveBackwardDataImpl(
     Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
     const DeviceMemory<T>& filter_data,
     const dnn::BatchDescriptor& output_descriptor,
@@ -3159,36 +3076,6 @@ port::Status CudnnSupport::PrepareForConvolutionBackwardDataImpl(
     DeviceMemory<T>* backward_input_data, dnn::DataType accumulator_type,
     ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
-  auto cudnn = cudnn_->GetHandle(parent_, stream);
-
-  CudnnTensorDescriptor out_back_nd(output_descriptor, cudnn_type);
-  CudnnTensorDescriptor in_back_nd(input_descriptor, cudnn_type);
-  CudnnFilterDescriptor filter(filter_descriptor, cudnn_type);
-  CudnnConvolutionDescriptor conv(convolution_descriptor,
-                                  ToCudnnDataType(accumulator_type));
-
-  SE_ASSIGN_OR_RETURN(
-      *algorithm_desc,
-      GetCudnnConvolutionBackwardDataAlgorithm(
-          stream, cudnn, algorithm_config, in_back_nd, filter, conv,
-          out_back_nd, scratch_allocator, scratch_memory));
-
-  return port::Status::OK();
-}
-
-template <class T>
-port::Status CudnnSupport::DoConvolveBackwardDataImpl(
-    Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<T>& filter_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<T> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& input_descriptor,
-    DeviceMemory<T>* backward_input_data, dnn::DataType accumulator_type,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
     dnn::ProfileResult* output_profile_result) {
   cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
   // Alpha is the scaling factor for input.
@@ -3212,6 +3099,12 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
 
   const bool is_profiling = output_profile_result != nullptr;
 
+  DeviceMemory<uint8> scratch;
+  SE_ASSIGN_OR_RETURN(dnn::AlgorithmDesc algo_desc,
+                      GetCudnnConvolutionBackwardDataAlgorithm(
+                          stream, cudnn, algorithm_config, in_back_nd, filter,
+                          conv, out_back_nd, scratch_allocator, &scratch));
+
   std::unique_ptr<CUDATimer, TimerDeleter> timer;
   if (is_profiling) {
     timer.reset(new CUDATimer(parent_));  // NOLINT
@@ -3223,8 +3116,7 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
     }
   }
 
-  if (algorithm_desc.algo_id() ==
-          CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+  if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
     return port::Status(port::error::FAILED_PRECONDITION,
                         "This configuration has potential integer overflow in "
@@ -3234,44 +3126,44 @@ port::Status CudnnSupport::DoConvolveBackwardDataImpl(
   // Cudnn 7.1.4 has a bug if the workspace of the following convolution is not
   // zero-initialized, nvbugs/2254619.
   if (CUDNN_VERSION >= 7000 && CUDNN_VERSION < 7300 &&
-      algorithm_desc.algo_id() == CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
-      cudnn_type == CUDNN_DATA_HALF && algorithm_desc.tensor_ops_enabled() &&
+      algo_desc.algo_id() == CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 &&
+      cudnn_type == CUDNN_DATA_HALF && algo_desc.tensor_ops_enabled() &&
       input_descriptor.layout() == dnn::DataLayout::kBatchYXDepth &&
       filter_descriptor.layout() == dnn::FilterLayout::kOutputInputYX &&
       output_descriptor.layout() == dnn::DataLayout::kBatchDepthYX &&
       (convolution_descriptor.vertical_filter_stride() > 1 ||
        convolution_descriptor.horizontal_filter_stride() > 1)) {
-    stream->ThenMemZero(scratch_memory, scratch_memory->size());
+    stream->ThenMemZero(&scratch, scratch.size());
   }
 
-  RETURN_IF_CUDNN_ERROR(cudnnConvolutionBackwardData(
-      cudnn.handle(),
-      /*alpha=*/alpha,
-      /*wDesc=*/filter.handle(),
-      /*w=*/filter_data.opaque(),
-      /*dyDesc=*/out_back_nd.handle(),
-      /*dy=*/backward_output_data.opaque(),
-      /*convDesc=*/conv.handle(),
-      /*algo=*/ToConvBackwardDataAlgo(algorithm_desc),
-      /*workSpace=*/scratch_memory->opaque(),
-      /*workSpaceSizeInBytes=*/scratch_memory->size(),
-      /*beta=*/beta,
-      /*dxDesc=*/in_back_nd.handle(),
-      /*dx=*/backward_input_data->opaque()));
+  RETURN_IF_CUDNN_ERROR(
+      cudnnConvolutionBackwardData(cudnn.handle(),
+                                   /*alpha=*/alpha,
+                                   /*wDesc=*/filter.handle(),
+                                   /*w=*/filter_data.opaque(),
+                                   /*dyDesc=*/out_back_nd.handle(),
+                                   /*dy=*/backward_output_data.opaque(),
+                                   /*convDesc=*/conv.handle(),
+                                   /*algo=*/ToConvBackwardDataAlgo(algo_desc),
+                                   /*workSpace=*/scratch.opaque(),
+                                   /*workSpaceSizeInBytes=*/scratch.size(),
+                                   /*beta=*/beta,
+                                   /*dxDesc=*/in_back_nd.handle(),
+                                   /*dx=*/backward_input_data->opaque()));
   if (is_profiling) {
     if (!timer->Stop(AsCUDAStream(stream))) {
       return port::Status(port::error::INTERNAL, "Failed to stop timer");
     }
-    output_profile_result->set_algorithm(algorithm_desc);
+    output_profile_result->set_algorithm(algo_desc);
     output_profile_result->set_elapsed_time_in_ms(
         timer->GetElapsedMilliseconds());
-    output_profile_result->set_scratch_size(scratch_memory->size());
+    output_profile_result->set_scratch_size(scratch.size());
   }
 
   return port::Status::OK();
 }
 
-bool CudnnSupport::PrepareForConvolutionBackwardData(
+bool CudnnSupport::DoConvolveBackwardData(
     Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
     const DeviceMemory<double>& filter_data,
     const dnn::BatchDescriptor& output_descriptor,
@@ -3281,77 +3173,13 @@ bool CudnnSupport::PrepareForConvolutionBackwardData(
     DeviceMemory<double>* backward_input_data,
     ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  return IsStatusOk(
-      PrepareForConvolutionBackwardDataImpl(
-          stream, filter_descriptor, filter_data, output_descriptor,
-          backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, dnn::DataType::kDouble, scratch_allocator,
-          algorithm_config, algorithm_desc, scratch_memory),
-      /*report_error=*/true);
-}
-
-bool CudnnSupport::PrepareForConvolutionBackwardData(
-    Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<float>& filter_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<float> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& input_descriptor,
-    DeviceMemory<float>* backward_input_data,
-    ScratchAllocator* scratch_allocator,
-    const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  return IsStatusOk(
-      PrepareForConvolutionBackwardDataImpl(
-          stream, filter_descriptor, filter_data, output_descriptor,
-          backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, dnn::DataType::kFloat, scratch_allocator,
-          algorithm_config, algorithm_desc, scratch_memory),
-      /*report_error=*/true);
-}
-
-bool CudnnSupport::PrepareForConvolutionBackwardData(
-    Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<Eigen::half>& filter_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<Eigen::half> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& input_descriptor,
-    DeviceMemory<Eigen::half>* backward_input_data,
-    ScratchAllocator* scratch_allocator,
-    const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  dnn::DataType acc_type =
-      CudnnEnvVar<ConvDoFP32ComputationFP16Input>::IsEnabled()
-          ? dnn::DataType::kFloat
-          : dnn::DataType::kHalf;
-  return IsStatusOk(
-      PrepareForConvolutionBackwardDataImpl(
-          stream, filter_descriptor, filter_data, output_descriptor,
-          backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, acc_type, scratch_allocator, algorithm_config,
-          algorithm_desc, scratch_memory),
-      /*report_error=*/true);
-}
-
-bool CudnnSupport::DoConvolveBackwardData(
-    Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-    const DeviceMemory<double>& filter_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<double> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::BatchDescriptor& input_descriptor,
-    DeviceMemory<double>* backward_input_data,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
     dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
       DoConvolveBackwardDataImpl(
           stream, filter_descriptor, filter_data, output_descriptor,
           backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, dnn::DataType::kDouble, algorithm_desc,
-          scratch_memory, output_profile_result),
+          backward_input_data, dnn::DataType::kDouble, scratch_allocator,
+          algorithm_config, output_profile_result),
       /*report_error=*/!output_profile_result);
 }
 
@@ -3363,15 +3191,15 @@ bool CudnnSupport::DoConvolveBackwardData(
     const dnn::ConvolutionDescriptor& convolution_descriptor,
     const dnn::BatchDescriptor& input_descriptor,
     DeviceMemory<float>* backward_input_data,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
+    ScratchAllocator* scratch_allocator,
+    const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
       DoConvolveBackwardDataImpl(
           stream, filter_descriptor, filter_data, output_descriptor,
           backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, dnn::DataType::kFloat, algorithm_desc,
-          scratch_memory, output_profile_result),
+          backward_input_data, dnn::DataType::kFloat, scratch_allocator,
+          algorithm_config, output_profile_result),
       /*report_error=*/!output_profile_result);
 }
 
@@ -3383,24 +3211,24 @@ bool CudnnSupport::DoConvolveBackwardData(
     const dnn::ConvolutionDescriptor& convolution_descriptor,
     const dnn::BatchDescriptor& input_descriptor,
     DeviceMemory<Eigen::half>* backward_input_data,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
+    ScratchAllocator* scratch_allocator,
+    const dnn::AlgorithmConfig& algorithm_config,
     dnn::ProfileResult* output_profile_result) {
   dnn::DataType acc_type =
       CudnnEnvVar<ConvDoFP32ComputationFP16Input>::IsEnabled()
           ? dnn::DataType::kFloat
           : dnn::DataType::kHalf;
   return IsStatusOk(
-      DoConvolveBackwardDataImpl(stream, filter_descriptor, filter_data,
-                                 output_descriptor, backward_output_data,
-                                 convolution_descriptor, input_descriptor,
-                                 backward_input_data, acc_type, algorithm_desc,
-                                 scratch_memory, output_profile_result),
+      DoConvolveBackwardDataImpl(
+          stream, filter_descriptor, filter_data, output_descriptor,
+          backward_output_data, convolution_descriptor, input_descriptor,
+          backward_input_data, acc_type, scratch_allocator, algorithm_config,
+          output_profile_result),
       /*report_error=*/!output_profile_result);
 }
 
 template <class T>
-port::Status CudnnSupport::PrepareForConvolutionBackwardFilterImpl(
+port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
     Stream* stream, const dnn::BatchDescriptor& input_descriptor,
     const DeviceMemory<T>& input_data,
     const dnn::BatchDescriptor& output_descriptor,
@@ -3410,36 +3238,6 @@ port::Status CudnnSupport::PrepareForConvolutionBackwardFilterImpl(
     DeviceMemory<T>* backward_filter_data, dnn::DataType accumulator_type,
     ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
-  auto cudnn = cudnn_->GetHandle(parent_, stream);
-
-  CudnnTensorDescriptor out_back_nd(output_descriptor, cudnn_type);
-  CudnnTensorDescriptor input_nd(input_descriptor, cudnn_type);
-  CudnnFilterDescriptor filter(filter_descriptor, cudnn_type);
-  CudnnConvolutionDescriptor conv(convolution_descriptor,
-                                  ToCudnnDataType(accumulator_type));
-
-  SE_ASSIGN_OR_RETURN(
-      *algorithm_desc,
-      GetCudnnConvolutionBackwardFilterAlgorithm(
-          stream, cudnn, algorithm_config, input_nd, filter, conv, out_back_nd,
-          scratch_allocator, scratch_memory));
-
-  return port::Status::OK();
-}
-
-template <class T>
-port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
-    Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-    const DeviceMemory<T>& input_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<T> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::FilterDescriptor& filter_descriptor,
-    DeviceMemory<T>* backward_filter_data, dnn::DataType accumulator_type,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
     dnn::ProfileResult* output_profile_result) {
   cudnnDataType_t cudnn_type = GetCudnnDataType<T>();
   // Alpha is the scaling factor for input.
@@ -3463,6 +3261,12 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
 
   const bool is_profiling = output_profile_result != nullptr;
 
+  DeviceMemory<uint8> scratch;
+  SE_ASSIGN_OR_RETURN(dnn::AlgorithmDesc algo_desc,
+                      GetCudnnConvolutionBackwardFilterAlgorithm(
+                          stream, cudnn, algorithm_config, input_nd, filter,
+                          conv, out_back_nd, scratch_allocator, &scratch));
+
   std::unique_ptr<CUDATimer, TimerDeleter> timer;
   if (is_profiling) {
     timer.reset(new CUDATimer(parent_));  // NOLINT
@@ -3478,8 +3282,7 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
   // results. See nvbugs/2072856
   if (CUDNN_VERSION < 7300) {
     SE_RETURN_IF_ERROR([&] {
-      if (algorithm_desc.algo_id() !=
-          CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
+      if (algo_desc.algo_id() != CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING) {
         return port::Status::OK();
       }
       if (output_descriptor.height() > 1 && output_descriptor.width() > 1) {
@@ -3505,8 +3308,7 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
     }());
   }
 
-  if (algorithm_desc.algo_id() ==
-          CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
+  if (algo_desc.algo_id() == CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
       !ShouldIncludeWinogradNonfusedAlgo(input_descriptor, output_descriptor)) {
     return port::Status(port::error::FAILED_PRECONDITION,
                         "This configuration has potential integer overflow in "
@@ -3522,7 +3324,7 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
   //
   // See nvbugs/2379553.
   if (CUDNN_VERSION >= 7100 && CUDNN_VERSION < 7300 &&
-      algorithm_desc.algo_id() == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 &&
+      algo_desc.algo_id() == CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 &&
       cudnn_type == CUDNN_DATA_HALF &&
       input_descriptor.layout() == dnn::DataLayout::kBatchYXDepth &&
       filter_descriptor.layout() == dnn::FilterLayout::kOutputYXInput &&
@@ -3540,9 +3342,9 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
       /*diffDesc=*/out_back_nd.handle(),
       /*diffData=*/backward_output_data.opaque(),
       /*convDesc=*/conv.handle(),
-      /*algo=*/ToConvBackwardFilterAlgo(algorithm_desc),
-      /*workSpace=*/scratch_memory->opaque(),
-      /*workSpaceSizeInBytes=*/scratch_memory->size(),
+      /*algo=*/ToConvBackwardFilterAlgo(algo_desc),
+      /*workSpace=*/scratch.opaque(),
+      /*workSpaceSizeInBytes=*/scratch.size(),
       /*beta=*/beta,
       /*gradDesc=*/filter.handle(),
       /*dw=*/backward_filter_data->opaque()));
@@ -3550,16 +3352,16 @@ port::Status CudnnSupport::DoConvolveBackwardFilterImpl(
     if (!timer->Stop(AsCUDAStream(stream))) {
       return port::Status(port::error::INTERNAL, "Failed to stop timer");
     }
-    output_profile_result->set_algorithm(algorithm_desc);
+    output_profile_result->set_algorithm(algo_desc);
     output_profile_result->set_elapsed_time_in_ms(
         timer->GetElapsedMilliseconds());
-    output_profile_result->set_scratch_size(scratch_memory->size());
+    output_profile_result->set_scratch_size(scratch.size());
   }
 
   return port::Status::OK();
 }
 
-bool CudnnSupport::PrepareForConvolutionBackwardFilter(
+bool CudnnSupport::DoConvolveBackwardFilter(
     Stream* stream, const dnn::BatchDescriptor& input_descriptor,
     const DeviceMemory<double>& input_data,
     const dnn::BatchDescriptor& output_descriptor,
@@ -3569,17 +3371,18 @@ bool CudnnSupport::PrepareForConvolutionBackwardFilter(
     DeviceMemory<double>* backward_filter_data,
     ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
+    dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
-      PrepareForConvolutionBackwardFilterImpl(
+      DoConvolveBackwardFilterImpl(
           stream, input_descriptor, input_data, output_descriptor,
           backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, dnn::DataType::kDouble, scratch_allocator,
-          algorithm_config, algorithm_desc, scratch_memory),
-      /*report_error=*/true);
+          backward_filter_data, dnn::DataType::kDouble,
+
+          scratch_allocator, algorithm_config, output_profile_result),
+      /*report_error=*/!output_profile_result);
 }
 
-bool CudnnSupport::PrepareForConvolutionBackwardFilter(
+bool CudnnSupport::DoConvolveBackwardFilter(
     Stream* stream, const dnn::BatchDescriptor& input_descriptor,
     const DeviceMemory<float>& input_data,
     const dnn::BatchDescriptor& output_descriptor,
@@ -3589,17 +3392,18 @@ bool CudnnSupport::PrepareForConvolutionBackwardFilter(
     DeviceMemory<float>* backward_filter_data,
     ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-  return IsStatusOk(
-      PrepareForConvolutionBackwardFilterImpl(
-          stream, input_descriptor, input_data, output_descriptor,
-          backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, dnn::DataType::kFloat, scratch_allocator,
-          algorithm_config, algorithm_desc, scratch_memory),
-      /*report_error=*/true);
+    dnn::ProfileResult* output_profile_result) {
+  return IsStatusOk(DoConvolveBackwardFilterImpl(
+                        stream, input_descriptor, input_data, output_descriptor,
+                        backward_output_data, convolution_descriptor,
+                        filter_descriptor, backward_filter_data,
+
+                        dnn::DataType::kFloat, scratch_allocator,
+                        algorithm_config, output_profile_result),
+                    /*report_error=*/!output_profile_result);
 }
 
-bool CudnnSupport::PrepareForConvolutionBackwardFilter(
+bool CudnnSupport::DoConvolveBackwardFilter(
     Stream* stream, const dnn::BatchDescriptor& input_descriptor,
     const DeviceMemory<Eigen::half>& input_data,
     const dnn::BatchDescriptor& output_descriptor,
@@ -3609,83 +3413,20 @@ bool CudnnSupport::PrepareForConvolutionBackwardFilter(
     DeviceMemory<Eigen::half>* backward_filter_data,
     ScratchAllocator* scratch_allocator,
     const dnn::AlgorithmConfig& algorithm_config,
-    dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
+    dnn::ProfileResult* output_profile_result) {
   dnn::DataType acc_type =
       CudnnEnvVar<ConvDoFP32ComputationFP16Input>::IsEnabled()
           ? dnn::DataType::kFloat
           : dnn::DataType::kHalf;
-  return IsStatusOk(
-      PrepareForConvolutionBackwardFilterImpl(
-          stream, input_descriptor, input_data, output_descriptor,
-          backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, acc_type, scratch_allocator, algorithm_config,
-          algorithm_desc, scratch_memory),
-      /*report_error=*/true);
-}
-
-bool CudnnSupport::DoConvolveBackwardFilter(
-    Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-    const DeviceMemory<double>& input_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<double> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::FilterDescriptor& filter_descriptor,
-    DeviceMemory<double>* backward_filter_data,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
-    dnn::ProfileResult* output_profile_result) {
   return IsStatusOk(
       DoConvolveBackwardFilterImpl(
           stream, input_descriptor, input_data, output_descriptor,
           backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, dnn::DataType::kDouble, algorithm_desc,
-          scratch_memory, output_profile_result),
-      /*report_error=*/!output_profile_result);
-}
-
-bool CudnnSupport::DoConvolveBackwardFilter(
-    Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-    const DeviceMemory<float>& input_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<float> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::FilterDescriptor& filter_descriptor,
-    DeviceMemory<float>* backward_filter_data,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
-    dnn::ProfileResult* output_profile_result) {
-  return IsStatusOk(
-      DoConvolveBackwardFilterImpl(
-          stream, input_descriptor, input_data, output_descriptor,
-          backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, dnn::DataType::kFloat, algorithm_desc,
-          scratch_memory, output_profile_result),
+          backward_filter_data, acc_type, scratch_allocator, algorithm_config,
+          output_profile_result),
       /*report_error=*/!output_profile_result);
 }
 
-bool CudnnSupport::DoConvolveBackwardFilter(
-    Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-    const DeviceMemory<Eigen::half>& input_data,
-    const dnn::BatchDescriptor& output_descriptor,
-    DeviceMemory<Eigen::half> backward_output_data,
-    const dnn::ConvolutionDescriptor& convolution_descriptor,
-    const dnn::FilterDescriptor& filter_descriptor,
-    DeviceMemory<Eigen::half>* backward_filter_data,
-    const dnn::AlgorithmDesc& algorithm_desc,
-    DeviceMemory<uint8>* scratch_memory,
-    dnn::ProfileResult* output_profile_result) {
-  dnn::DataType acc_type =
-      CudnnEnvVar<ConvDoFP32ComputationFP16Input>::IsEnabled()
-          ? dnn::DataType::kFloat
-          : dnn::DataType::kHalf;
-  return IsStatusOk(DoConvolveBackwardFilterImpl(
-                        stream, input_descriptor, input_data, output_descriptor,
-                        backward_output_data, convolution_descriptor,
-                        filter_descriptor, backward_filter_data, acc_type,
-                        algorithm_desc, scratch_memory, output_profile_result),
-                    /*report_error=*/!output_profile_result);
-}
-
 template <class T>
 port::Status CudnnSupport::DoConvolveBackwardBiasImpl(
     Stream* stream, const dnn::BatchDescriptor& input_descriptor,
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 6b5b6c690f..0641be140d 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -252,43 +252,6 @@ class CudnnSupport : public dnn::DnnSupport {
       DeviceMemory<float>* scale_backprop,
       DeviceMemory<float>* offset_backprop) override;
 
-  bool PrepareForConvolution(
-      Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
-      const DeviceMemory<float>& input_data,
-      const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<float>& filter_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
-  bool PrepareForConvolution(
-      Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
-      const DeviceMemory<double>& input_data,
-      const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<double>& filter_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
-  bool PrepareForConvolution(
-      Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
-      const DeviceMemory<Eigen::half>& input_data,
-      const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<Eigen::half>& filter_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<Eigen::half>* output_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
   bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
                   const DeviceMemory<float>& input_data,
                   const dnn::FilterDescriptor& filter_descriptor,
@@ -296,8 +259,8 @@ class CudnnSupport : public dnn::DnnSupport {
                   const dnn::ConvolutionDescriptor& convolution_descriptor,
                   const dnn::BatchDescriptor& output_descriptor,
                   DeviceMemory<float>* output_data,
-                  const dnn::AlgorithmDesc& algorithm_desc,
-                  DeviceMemory<uint8>* scratch_memory,
+                  ScratchAllocator* scratch_allocator,
+                  const dnn::AlgorithmConfig& algorithm_config,
                   dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@@ -307,8 +270,8 @@ class CudnnSupport : public dnn::DnnSupport {
                   const dnn::ConvolutionDescriptor& convolution_descriptor,
                   const dnn::BatchDescriptor& output_descriptor,
                   DeviceMemory<double>* output_data,
-                  const dnn::AlgorithmDesc& algorithm_desc,
-                  DeviceMemory<uint8>* scratch_memory,
+                  ScratchAllocator* scratch_allocator,
+                  const dnn::AlgorithmConfig& algorithm_config,
                   dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor,
@@ -318,8 +281,8 @@ class CudnnSupport : public dnn::DnnSupport {
                   const dnn::ConvolutionDescriptor& convolution_descriptor,
                   const dnn::BatchDescriptor& output_descriptor,
                   DeviceMemory<Eigen::half>* output_data,
-                  const dnn::AlgorithmDesc& algorithm_desc,
-                  DeviceMemory<uint8>* scratch_memory,
+                  ScratchAllocator* scratch_allocator,
+                  const dnn::AlgorithmConfig& algorithm_config,
                   dnn::ProfileResult* output_profile_result) override;
 
   bool DoFusedConvolve(
@@ -421,20 +384,7 @@ class CudnnSupport : public dnn::DnnSupport {
     return false;
   }
 
-  bool PrepareForConvolutionBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<float>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<float> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<float>* backward_input_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
-  bool PrepareForConvolutionBackwardData(
+  bool DoConvolveBackwardData(
       Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
       const DeviceMemory<double>& filter_data,
       const dnn::BatchDescriptor& output_descriptor,
@@ -444,32 +394,6 @@ class CudnnSupport : public dnn::DnnSupport {
       DeviceMemory<double>* backward_input_data,
       ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
-  bool PrepareForConvolutionBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<Eigen::half>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<Eigen::half> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<Eigen::half>* backward_input_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
-  bool DoConvolveBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<double>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<double> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<double>* backward_input_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
       dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolveBackwardData(
@@ -480,8 +404,8 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::BatchDescriptor& input_descriptor,
       DeviceMemory<float>* backward_input_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolveBackwardData(
@@ -492,48 +416,9 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::BatchDescriptor& input_descriptor,
       DeviceMemory<Eigen::half>* backward_input_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
-      dnn::ProfileResult* output_profile_result) override;
-
-  bool PrepareForConvolutionBackwardFilter(
-      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-      const DeviceMemory<double>& input_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<double> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::FilterDescriptor& filter_descriptor,
-      DeviceMemory<double>* backward_filter_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
-  bool PrepareForConvolutionBackwardFilter(
-      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-      const DeviceMemory<float>& input_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<float> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::FilterDescriptor& filter_descriptor,
-      DeviceMemory<float>* backward_filter_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
-
-  bool PrepareForConvolutionBackwardFilter(
-      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-      const DeviceMemory<Eigen::half>& input_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<Eigen::half> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::FilterDescriptor& filter_descriptor,
-      DeviceMemory<Eigen::half>* backward_filter_data,
       ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory) override;
+      dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolveBackwardFilter(
       Stream* stream, const dnn::BatchDescriptor& input_descriptor,
@@ -543,8 +428,8 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::FilterDescriptor& filter_descriptor,
       DeviceMemory<double>* backward_filter_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolveBackwardFilter(
@@ -555,8 +440,8 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::FilterDescriptor& filter_descriptor,
       DeviceMemory<float>* backward_filter_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolveBackwardFilter(
@@ -567,8 +452,8 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::FilterDescriptor& filter_descriptor,
       DeviceMemory<Eigen::half>* backward_filter_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) override;
 
   bool DoConvolveBackwardBias(
@@ -778,7 +663,7 @@ class CudnnSupport : public dnn::DnnSupport {
       DeviceMemory<U>* offset_backprop);
 
   template <class T>
-  port::Status PrepareForConvolutionImpl(
+  port::Status DoConvolveImpl(
       Stream* stream, const dnn::BatchDescriptor& input_descriptor,
       const DeviceMemory<T>& input_data,
       const dnn::FilterDescriptor& filter_descriptor,
@@ -788,19 +673,6 @@ class CudnnSupport : public dnn::DnnSupport {
       DeviceMemory<T>* output_data, dnn::DataType accumulator_type,
       ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory);
-
-  template <class T>
-  port::Status DoConvolveImpl(
-      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-      const DeviceMemory<T>& input_data,
-      const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<T>& filter_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<T>* output_data, dnn::DataType accumulator_type,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
       dnn::ProfileResult* output_profile_result);
 
   template <typename ElementType, typename BiasType, typename ScaleType>
@@ -820,19 +692,6 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result);
 
-  template <class T>
-  port::Status PrepareForConvolutionBackwardDataImpl(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<T>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<T> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<T>* backward_input_data, dnn::DataType accumulator_type,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory);
-
   template <class T>
   port::Status DoConvolveBackwardDataImpl(
       Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
@@ -842,22 +701,9 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::BatchDescriptor& input_descriptor,
       DeviceMemory<T>* backward_input_data, dnn::DataType accumulator_type,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
-      dnn::ProfileResult* output_profile_result);
-
-  template <class T>
-  port::Status PrepareForConvolutionBackwardFilterImpl(
-      Stream* stream, const dnn::BatchDescriptor& input_descriptor,
-      const DeviceMemory<T>& input_data,
-      const dnn::BatchDescriptor& output_descriptor_in,
-      DeviceMemory<T> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::FilterDescriptor& filter_descriptor,
-      DeviceMemory<T>* backward_filter_data, dnn::DataType accumulator_type,
       ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory);
+      dnn::ProfileResult* output_profile_result);
 
   template <class T>
   port::Status DoConvolveBackwardFilterImpl(
@@ -868,8 +714,8 @@ class CudnnSupport : public dnn::DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::FilterDescriptor& filter_descriptor,
       DeviceMemory<T>* backward_filter_data, dnn::DataType accumulator_type,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result);
 
   template <class T>
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 04949c4813..c044a356ef 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -730,7 +730,6 @@ class PoolingDescriptor {
 class AlgorithmDesc {
  public:
   typedef int64 Index;
-  AlgorithmDesc() : AlgorithmDesc(0, false) {}
   AlgorithmDesc(Index a, bool use_tensor_ops) {
     proto_.set_algo_id(a);
     proto_.set_math_type(use_tensor_ops ? AlgorithmProto::TENSOR_OP_MATH
@@ -1176,52 +1175,6 @@ class DnnSupport {
     return false;
   }
 
-  virtual bool PrepareForConvolution(
-      Stream* stream, const BatchDescriptor& batch_descriptor,
-      const DeviceMemory<float>& input_data,
-      const FilterDescriptor& filter_descriptor,
-      const DeviceMemory<float>& filter_data,
-      const ConvolutionDescriptor& convolution_descriptor,
-      const BatchDescriptor& output_descriptor,
-      DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
-
-  virtual bool PrepareForConvolution(
-      Stream* stream, const BatchDescriptor& batch_descriptor,
-      const DeviceMemory<double>& input_data,
-      const FilterDescriptor& filter_descriptor,
-      const DeviceMemory<double>& filter_data,
-      const ConvolutionDescriptor& convolution_descriptor,
-      const BatchDescriptor& output_descriptor,
-      DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
-
-  virtual bool PrepareForConvolution(
-      Stream* stream, const BatchDescriptor& batch_descriptor,
-      const DeviceMemory<Eigen::half>& input_data,
-      const FilterDescriptor& filter_descriptor,
-      const DeviceMemory<Eigen::half>& filter_data,
-      const ConvolutionDescriptor& convolution_descriptor,
-      const BatchDescriptor& output_descriptor,
-      DeviceMemory<Eigen::half>* output_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
-
   // Enqueues a single-precision convolution operation onto the stream.
   //
   // Arguments (all borrowed):
@@ -1235,10 +1188,10 @@ class DnnSupport {
   //  output_descriptor: dimensions of the output layer.
   //  output_data: un-owned device memory region in which to place the
   //    convolution result.
-  //  algorithm_desc: specifies which algorithm should be used for the
+  //  scratch_allocator: un-owned, may-be-null object that may allocate scratch
+  //    space in order to speed up the convolution operation.
+  //  algorithm_config: specifies which algorithm should be used for the
   //    operation.
-  //  scratch: un-owned device memory for scratch space in order to speed up
-  //    the convolution operation.
   //  output_profile_result: the output profile result for this call. The
   //    profiling is only enabled when this is not nullptr.
   //
@@ -1263,9 +1216,8 @@ class DnnSupport {
       const DeviceMemory<float>& filter_data,
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<float>* output_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      DeviceMemory<float>* output_data, ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       ProfileResult* output_profile_result) = 0;
 
   // Enqueues a double-precision convolution operation onto the stream.
@@ -1277,9 +1229,8 @@ class DnnSupport {
       const DeviceMemory<double>& filter_data,
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<double>* output_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      DeviceMemory<double>* output_data, ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       dnn::ProfileResult* output_profile_result) = 0;
 
   // Enqueues a half-precision convolution operation onto the stream.
@@ -1292,8 +1243,8 @@ class DnnSupport {
       const dnn::ConvolutionDescriptor& convolution_descriptor,
       const dnn::BatchDescriptor& output_descriptor,
       DeviceMemory<Eigen::half>* output_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       ProfileResult* output_profile_result) = 0;
 
   // Return a list of algorithms supported by the forward convolution pass.
@@ -1349,54 +1300,6 @@ class DnnSupport {
       const BatchDescriptor& output_descriptor,
       DeviceMemory<float>* output_data) = 0;
 
-  virtual bool PrepareForConvolutionBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<float>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<float> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<float>* backward_input_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
-
-  virtual bool PrepareForConvolutionBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<double>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<double> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<double>* backward_input_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
-
-  virtual bool PrepareForConvolutionBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<Eigen::half>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<Eigen::half> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<Eigen::half>* backward_input_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
-
   // Enqueues a single-precision backward convolution (for data) operation onto
   // the stream.
   //
@@ -1416,15 +1319,15 @@ class DnnSupport {
   //  scratch_allocator: un-owned, may-be-null object that may allocate scratch
   //    space in order to speed up the convolution operation.
   virtual bool DoConvolveBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
+      Stream* stream, const FilterDescriptor& filter_descriptor,
       const DeviceMemory<float>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
+      const BatchDescriptor& output_descriptor,
       DeviceMemory<float> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
+      const ConvolutionDescriptor& convolution_descriptor,
+      const BatchDescriptor& input_descriptor,
       DeviceMemory<float>* backward_input_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       ProfileResult* output_profile_result) = 0;
 
   // Return a list of algorithms supported by the backward convolution pass for
@@ -1434,76 +1337,28 @@ class DnnSupport {
       std::vector<AlgorithmDesc>* out_algorithms);
 
   virtual bool DoConvolveBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
+      Stream* stream, const FilterDescriptor& filter_descriptor,
       const DeviceMemory<double>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<double> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<double>* backward_input_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
-      ProfileResult* output_profile_result) = 0;
-
-  virtual bool DoConvolveBackwardData(
-      Stream* stream, const dnn::FilterDescriptor& filter_descriptor,
-      const DeviceMemory<Eigen::half>& filter_data,
-      const dnn::BatchDescriptor& output_descriptor,
-      DeviceMemory<Eigen::half> backward_output_data,
-      const dnn::ConvolutionDescriptor& convolution_descriptor,
-      const dnn::BatchDescriptor& input_descriptor,
-      DeviceMemory<Eigen::half>* backward_input_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
-      ProfileResult* output_profile_result) = 0;
-
-  virtual bool PrepareForConvolutionBackwardFilter(
-      Stream* stream, const BatchDescriptor& input_descriptor,
-      const DeviceMemory<float>& input_data,
-      const BatchDescriptor& output_descriptor,
-      DeviceMemory<float> backward_output_data,
-      const ConvolutionDescriptor& convolution_descriptor,
-      const FilterDescriptor& filter_descriptor,
-      DeviceMemory<float>* backward_filter_data,
-      ScratchAllocator* scratch_allocator,
-      const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
-
-  virtual bool PrepareForConvolutionBackwardFilter(
-      Stream* stream, const BatchDescriptor& input_descriptor,
-      const DeviceMemory<double>& input_data,
       const BatchDescriptor& output_descriptor,
       DeviceMemory<double> backward_output_data,
       const ConvolutionDescriptor& convolution_descriptor,
-      const FilterDescriptor& filter_descriptor,
-      DeviceMemory<double>* backward_filter_data,
+      const BatchDescriptor& input_descriptor,
+      DeviceMemory<double>* backward_input_data,
       ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
+      ProfileResult* output_profile_result) = 0;
 
-  virtual bool PrepareForConvolutionBackwardFilter(
-      Stream* stream, const BatchDescriptor& input_descriptor,
-      const DeviceMemory<Eigen::half>& input_data,
+  virtual bool DoConvolveBackwardData(
+      Stream* stream, const FilterDescriptor& filter_descriptor,
+      const DeviceMemory<Eigen::half>& filter_data,
       const BatchDescriptor& output_descriptor,
       DeviceMemory<Eigen::half> backward_output_data,
       const ConvolutionDescriptor& convolution_descriptor,
-      const FilterDescriptor& filter_descriptor,
-      DeviceMemory<Eigen::half>* backward_filter_data,
+      const BatchDescriptor& input_descriptor,
+      DeviceMemory<Eigen::half>* backward_input_data,
       ScratchAllocator* scratch_allocator,
       const dnn::AlgorithmConfig& algorithm_config,
-      dnn::AlgorithmDesc* algorithm_desc, DeviceMemory<uint8>* scratch_memory) {
-    *algorithm_desc = {};
-    *scratch_memory = {};
-    return true;
-  }
+      ProfileResult* output_profile_result) = 0;
 
   // Enqueues a single-precision backward convolution (for filter) operation
   // onto the stream.
@@ -1532,8 +1387,8 @@ class DnnSupport {
       const ConvolutionDescriptor& convolution_descriptor,
       const FilterDescriptor& filter_descriptor,
       DeviceMemory<float>* backward_filter_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       ProfileResult* output_profile_result) = 0;
 
   // Return a list of algorithms supported by the backward convolution pass for
@@ -1550,8 +1405,8 @@ class DnnSupport {
       const ConvolutionDescriptor& convolution_descriptor,
       const FilterDescriptor& filter_descriptor,
       DeviceMemory<double>* backward_filter_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       ProfileResult* output_profile_result) = 0;
 
   virtual bool DoConvolveBackwardFilter(
@@ -1562,8 +1417,8 @@ class DnnSupport {
       const ConvolutionDescriptor& convolution_descriptor,
       const FilterDescriptor& filter_descriptor,
       DeviceMemory<Eigen::half>* backward_filter_data,
-      const dnn::AlgorithmDesc& algorithm_desc,
-      DeviceMemory<uint8>* scratch_memory,
+      ScratchAllocator* scratch_allocator,
+      const dnn::AlgorithmConfig& algorithm_config,
       ProfileResult* output_profile_result) = 0;
 
   // Enqueues a single-precision backward convolution (for bias) operation onto
diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc
index 4503127bee..3edc66cde8 100644
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@@ -549,16 +549,11 @@ Stream &Stream::ThenConvolveWithScratch(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      CheckError(dnn->PrepareForConvolution(
-          this, input_descriptor, input_data, filter_descriptor, filter_data,
-          convolution_descriptor, output_descriptor, output, scratch_allocator,
-          dnn::AlgorithmConfig(), &algorithm_desc, &scratch_memory));
       CheckError(dnn->DoConvolve(
           this, input_descriptor, input_data, filter_descriptor, filter_data,
-          convolution_descriptor, output_descriptor, output, algorithm_desc,
-          &scratch_memory, nullptr));
+          convolution_descriptor, output_descriptor, output, scratch_allocator,
+          dnn::AlgorithmConfig(),
+          /*output_profile_result=*/nullptr));
     } else {
       SetErrorAndLogNoDnnSupport();
     }
@@ -581,16 +576,11 @@ Stream &Stream::ThenConvolveWithScratch(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      CheckError(dnn->PrepareForConvolution(
-          this, input_descriptor, input_data, filter_descriptor, filter_data,
-          convolution_descriptor, output_descriptor, output, scratch_allocator,
-          dnn::AlgorithmConfig(), &algorithm_desc, &scratch_memory));
       CheckError(dnn->DoConvolve(
           this, input_descriptor, input_data, filter_descriptor, filter_data,
-          convolution_descriptor, output_descriptor, output, algorithm_desc,
-          &scratch_memory, nullptr));
+          convolution_descriptor, output_descriptor, output, scratch_allocator,
+          dnn::AlgorithmConfig(),
+          /*output_profile_result=*/nullptr));
     } else {
       SetErrorAndLogNoDnnSupport();
     }
@@ -768,18 +758,10 @@ Stream &Stream::ThenConvolveWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolution(
+      auto status = dnn->DoConvolve(
           this, input_descriptor, input_data, filter_descriptor, filter_data,
           convolution_descriptor, output_descriptor, output, scratch_allocator,
-          algorithm_config, &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
+          algorithm_config, output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -807,18 +789,10 @@ Stream &Stream::ThenConvolveWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolution(
+      auto status = dnn->DoConvolve(
           this, input_descriptor, input_data, filter_descriptor, filter_data,
           convolution_descriptor, output_descriptor, output, scratch_allocator,
-          algorithm_config, &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
+          algorithm_config, output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -846,18 +820,10 @@ Stream &Stream::ThenConvolveWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolution(
+      auto status = dnn->DoConvolve(
           this, input_descriptor, input_data, filter_descriptor, filter_data,
           convolution_descriptor, output_descriptor, output, scratch_allocator,
-          algorithm_config, &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolve(
-            this, input_descriptor, input_data, filter_descriptor, filter_data,
-            convolution_descriptor, output_descriptor, output, algorithm_desc,
-            &scratch_memory, output_profile_result);
-      }
+          algorithm_config, output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -1003,17 +969,10 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      CheckError(dnn->PrepareForConvolutionBackwardData(
-          this, filter_descriptor, filter_data, output_descriptor,
-          backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
-          &algorithm_desc, &scratch_memory));
       CheckError(dnn->DoConvolveBackwardData(
           this, filter_descriptor, filter_data, output_descriptor,
           backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, algorithm_desc, &scratch_memory,
+          backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
           /*output_profile_result=*/nullptr));
     } else {
       SetErrorAndLogNoDnnSupport();
@@ -1040,20 +999,11 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolutionBackwardData(
+      auto status = dnn->DoConvolveBackwardData(
           this, filter_descriptor, filter_data, output_descriptor,
           backward_output_data, convolution_descriptor, input_descriptor,
           backward_input_data, scratch_allocator, algorithm_config,
-          &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolveBackwardData(
-            this, filter_descriptor, filter_data, output_descriptor,
-            backward_output_data, convolution_descriptor, input_descriptor,
-            backward_input_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
+          output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -1082,20 +1032,11 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolutionBackwardData(
+      auto status = dnn->DoConvolveBackwardData(
           this, filter_descriptor, filter_data, output_descriptor,
           backward_output_data, convolution_descriptor, input_descriptor,
           backward_input_data, scratch_allocator, algorithm_config,
-          &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolveBackwardData(
-            this, filter_descriptor, filter_data, output_descriptor,
-            backward_output_data, convolution_descriptor, input_descriptor,
-            backward_input_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
+          output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -1124,20 +1065,11 @@ Stream &Stream::ThenConvolveBackwardDataWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolutionBackwardData(
+      auto status = dnn->DoConvolveBackwardData(
           this, filter_descriptor, filter_data, output_descriptor,
           backward_output_data, convolution_descriptor, input_descriptor,
           backward_input_data, scratch_allocator, algorithm_config,
-          &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolveBackwardData(
-            this, filter_descriptor, filter_data, output_descriptor,
-            backward_output_data, convolution_descriptor, input_descriptor,
-            backward_input_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
+          output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -1164,17 +1096,10 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      CheckError(dnn->PrepareForConvolutionBackwardData(
-          this, filter_descriptor, filter_data, output_descriptor,
-          backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
-          &algorithm_desc, &scratch_memory));
       CheckError(dnn->DoConvolveBackwardData(
           this, filter_descriptor, filter_data, output_descriptor,
           backward_output_data, convolution_descriptor, input_descriptor,
-          backward_input_data, algorithm_desc, &scratch_memory,
+          backward_input_data, scratch_allocator, dnn::AlgorithmConfig(),
           /*output_profile_result=*/nullptr));
     } else {
       SetErrorAndLogNoDnnSupport();
@@ -1213,17 +1138,10 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      CheckError(dnn->PrepareForConvolutionBackwardFilter(
-          this, input_descriptor, input_data, output_descriptor,
-          backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
-          &algorithm_desc, &scratch_memory));
       CheckError(dnn->DoConvolveBackwardFilter(
           this, input_descriptor, input_data, output_descriptor,
           backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, algorithm_desc, &scratch_memory,
+          backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
           /*output_profile_result=*/nullptr));
     } else {
       SetErrorAndLogNoDnnSupport();
@@ -1250,20 +1168,11 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolutionBackwardFilter(
+      auto status = dnn->DoConvolveBackwardFilter(
           this, input_descriptor, input_data, output_descriptor,
           backward_output_data, convolution_descriptor, filter_descriptor,
           backward_filter_data, scratch_allocator, algorithm_config,
-          &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolveBackwardFilter(
-            this, input_descriptor, input_data, output_descriptor,
-            backward_output_data, convolution_descriptor, filter_descriptor,
-            backward_filter_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
+          output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -1292,20 +1201,11 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolutionBackwardFilter(
+      auto status = dnn->DoConvolveBackwardFilter(
           this, input_descriptor, input_data, output_descriptor,
           backward_output_data, convolution_descriptor, filter_descriptor,
           backward_filter_data, scratch_allocator, algorithm_config,
-          &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolveBackwardFilter(
-            this, input_descriptor, input_data, output_descriptor,
-            backward_output_data, convolution_descriptor, filter_descriptor,
-            backward_filter_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
+          output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
@@ -1332,17 +1232,10 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      CheckError(dnn->PrepareForConvolutionBackwardFilter(
-          this, input_descriptor, input_data, output_descriptor,
-          backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
-          &algorithm_desc, &scratch_memory));
       CheckError(dnn->DoConvolveBackwardFilter(
           this, input_descriptor, input_data, output_descriptor,
           backward_output_data, convolution_descriptor, filter_descriptor,
-          backward_filter_data, algorithm_desc, &scratch_memory,
+          backward_filter_data, scratch_allocator, dnn::AlgorithmConfig(),
           /*output_profile_result=*/nullptr));
     } else {
       SetErrorAndLogNoDnnSupport();
@@ -1369,20 +1262,11 @@ Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm(
 
   if (ok()) {
     if (dnn::DnnSupport *dnn = parent_->AsDnn()) {
-      DeviceMemory<uint8> scratch_memory;
-      dnn::AlgorithmDesc algorithm_desc;
-      auto status = dnn->PrepareForConvolutionBackwardFilter(
+      auto status = dnn->DoConvolveBackwardFilter(
           this, input_descriptor, input_data, output_descriptor,
           backward_output_data, convolution_descriptor, filter_descriptor,
           backward_filter_data, scratch_allocator, algorithm_config,
-          &algorithm_desc, &scratch_memory);
-      if (status) {
-        status = dnn->DoConvolveBackwardFilter(
-            this, input_descriptor, input_data, output_descriptor,
-            backward_output_data, convolution_descriptor, filter_descriptor,
-            backward_filter_data, algorithm_desc, &scratch_memory,
-            output_profile_result);
-      }
+          output_profile_result);
       if (!status && !output_profile_result) {
         SetError();
       }
-- 
GitLab


From d35b41c2619cd520c0fa3fd786160d19dcc6339c Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Fri, 7 Dec 2018 17:21:52 -0800
Subject: [PATCH 254/873] [TF:XLA] Bump open source abseil revision to
 f197d7c72a54064cfde5a2058f1513a4a0ee36fb

PiperOrigin-RevId: 224603073
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index ee91cd677b..60dcca3207 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -123,11 +123,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "f7e3734c3c5854cf2bae1e193fbbd8f7d261673e0f2e042b1fca52732f688a0a",
-        strip_prefix = "abseil-cpp-284378a71b32dfb3af4e3661f585e671d1b603a3",
+        sha256 = "3ad76de484192b2d5afd49d90492b5ed0bc59eb1a4e8e0deecc7a2a077a90251",
+        strip_prefix = "abseil-cpp-f197d7c72a54064cfde5a2058f1513a4a0ee36fb",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/284378a71b32dfb3af4e3661f585e671d1b603a3.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/284378a71b32dfb3af4e3661f585e671d1b603a3.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f197d7c72a54064cfde5a2058f1513a4a0ee36fb.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/f197d7c72a54064cfde5a2058f1513a4a0ee36fb.tar.gz",
         ],
     )
 
-- 
GitLab


From 4890b781b6b77fcc6c751d351c2f320ef9043e8d Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Fri, 7 Dec 2018 17:36:37 -0800
Subject: [PATCH 255/873] Annotate tests as @run_v1_only

Skip individual test cases or entire suites that are not
running in v1. Also replace some @run_deprecated_v1
annotations since simply running the test in graph mode
was not enough.

PiperOrigin-RevId: 224604547
---
 .../audio_microfrontend_op_test.py            |   5 +
 .../lite/python/convert_saved_model_test.py   |   7 ++
 tensorflow/lite/python/convert_test.py        |   2 +
 tensorflow/lite/python/lite_test.py           |   4 +
 .../model_coverage/model_coverage_lib_test.py |  10 ++
 tensorflow/python/autograph/impl/api_test.py  |   1 +
 tensorflow/python/client/session_test.py      |  23 ++++
 .../data/kernel_tests/interleave_test.py      |   2 +
 .../kernel_tests/iterator_cluster_test.py     |   5 +
 .../python/data/kernel_tests/map_test.py      |   2 +
 .../multi_device_iterator_test.py             |   9 ++
 .../python/debug/cli/analyzer_cli_test.py     |  36 +-----
 .../python/debug/cli/cli_shared_test.py       |   3 +-
 .../debug/cli/profile_analyzer_cli_test.py    |   7 +-
 .../python/debug/cli/stepper_cli_test.py      |   1 +
 .../python/debug/lib/debug_gradients_test.py  |  15 +--
 .../python/debug/lib/debug_utils_test.py      |  20 +--
 .../debug/lib/dist_session_debug_grpc_test.py |   1 +
 .../debug/lib/session_debug_file_test.py      |   2 +-
 .../debug/lib/session_debug_grpc_test.py      |   3 +
 .../python/debug/lib/session_debug_testlib.py |   1 +
 .../python/debug/lib/source_utils_test.py     |   1 +
 tensorflow/python/debug/lib/stepper_test.py   |   4 +
 .../python/debug/wrappers/disk_usage_test.py  |   1 +
 .../debug/wrappers/dumping_wrapper_test.py    |   1 +
 .../python/debug/wrappers/framework_test.py   |   1 +
 .../debug/wrappers/local_cli_wrapper_test.py  |   1 +
 .../distribute/distribute_coordinator_test.py |   2 +
 tensorflow/python/eager/backprop_test.py      |  19 +++
 .../python/eager/function_gradients_test.py   |   2 +-
 tensorflow/python/eager/function_test.py      |   2 +
 .../framework/auto_control_deps_test.py       |   7 ++
 .../framework/error_interpolation_test.py     |   8 ++
 .../python/framework/graph_util_test.py       |   2 +-
 .../python/framework/meta_graph_test.py       |   2 +-
 tensorflow/python/framework/ops_test.py       |  13 +-
 tensorflow/python/framework/subscribe_test.py |   6 +-
 tensorflow/python/grappler/item_test.py       |   2 +-
 .../python/grappler/memory_optimizer_test.py  |   2 +-
 .../python/grappler/tf_optimizer_test.py      |   4 +-
 tensorflow/python/keras/backend_test.py       |   2 +-
 tensorflow/python/keras/callbacks_test.py     |  10 +-
 tensorflow/python/keras/engine/saving_test.py |   6 +-
 .../python/keras/engine/sequential_test.py    |   2 +-
 .../python/keras/engine/topology_test.py      |   3 +-
 .../python/keras/engine/training_test.py      |   8 +-
 tensorflow/python/keras/integration_test.py   |   9 +-
 tensorflow/python/keras/layers/lstm_test.py   |   6 +-
 .../python/keras/layers/normalization_test.py |   3 +-
 .../python/keras/layers/simplernn_test.py     |   4 +-
 .../python/keras/layers/unified_lstm_test.py  |   4 +-
 .../python/keras/layers/wrappers_test.py      |  10 +-
 .../python/keras/model_subclassing_test.py    |   2 +
 tensorflow/python/keras/models_test.py        |  13 +-
 tensorflow/python/keras/optimizers_test.py    |  18 +--
 .../kernel_tests/atrous_convolution_test.py   |   3 +
 .../python/kernel_tests/base64_ops_test.py    |   1 +
 .../kernel_tests/batch_matmul_op_test.py      |   3 +
 .../boosted_trees/quantile_ops_test.py        |   8 +-
 .../kernel_tests/checkpoint_ops_test.py       |   1 +
 .../python/kernel_tests/cholesky_op_test.py   |   2 +
 .../python/kernel_tests/cond_v2_test.py       |   4 +
 .../conditional_accumulator_test.py           |  12 +-
 .../kernel_tests/control_flow_ops_py_test.py  | 118 +++++++++++++++---
 .../kernel_tests/control_flow_util_test.py    |   4 +
 .../python/kernel_tests/ctc_loss_op_test.py   |  36 +++---
 .../dense_update_ops_no_tsan_test.py          |   8 +-
 .../kernel_tests/dense_update_ops_test.py     |   6 +-
 .../kernel_tests/depthwise_conv_op_test.py    |   4 +
 .../kernel_tests/determinant_op_test.py       |   3 +
 .../distributions/multinomial_test.py         |  17 +++
 .../python/kernel_tests/fifo_queue_test.py    |   4 +
 .../kernel_tests/functional_ops_test.py       |  10 +-
 .../kernel_tests/identity_op_py_test.py       |   2 +-
 .../linalg/linear_operator_test.py            |   6 +-
 .../python/kernel_tests/linalg_grad_test.py   |   2 +
 tensorflow/python/kernel_tests/losses_test.py |  52 ++++----
 .../kernel_tests/matrix_band_part_op_test.py  |   3 +
 .../kernel_tests/matrix_logarithm_op_test.py  |   3 +
 .../kernel_tests/matrix_solve_ls_op_test.py   |   5 +-
 .../matrix_square_root_op_test.py             |   3 +
 .../python/kernel_tests/norm_op_test.py       |   4 +
 .../python/kernel_tests/numerics_test.py      |   6 +-
 .../kernel_tests/padding_fifo_queue_test.py   |   3 +
 .../partitioned_variables_test.py             |   8 +-
 .../kernel_tests/priority_queue_test.py       |   7 ++
 .../python/kernel_tests/py_func_test.py       |  26 +++-
 tensorflow/python/kernel_tests/qr_op_test.py  |   3 +
 .../random/random_shuffle_queue_test.py       |   3 +
 .../resource_variable_ops_test.py             |  10 +-
 tensorflow/python/kernel_tests/rnn_test.py    |   2 +-
 .../kernel_tests/scatter_nd_ops_test.py       |  11 +-
 .../kernel_tests/self_adjoint_eig_op_test.py  |   3 +
 .../python/kernel_tests/session_ops_test.py   |   5 +
 .../signal/reconstruction_ops_test.py         |   3 +
 .../sparse_conditional_accumulator_test.py    |  24 ++--
 .../python/kernel_tests/stack_ops_test.py     |   4 +-
 tensorflow/python/kernel_tests/svd_op_test.py |   5 +
 .../kernel_tests/tensor_array_ops_test.py     |  32 ++---
 .../python/kernel_tests/tensordot_op_test.py  |   4 +
 .../kernel_tests/unicode_encode_op_test.py    |  13 ++
 .../python/kernel_tests/variables_test.py     |  26 ++--
 .../python/kernel_tests/while_v2_test.py      |   4 +-
 .../python/layers/normalization_test.py       |  37 +-----
 tensorflow/python/ops/gradients_test.py       |  23 ++++
 tensorflow/python/ops/nn_test.py              |   1 +
 .../ops/ragged/ragged_batch_gather_op_test.py |   4 +-
 .../ops/ragged/ragged_boolean_mask_op_test.py |   2 +-
 .../ops/ragged/ragged_concat_op_test.py       |   4 +-
 .../python/ops/ragged/ragged_dispatch_test.py |   2 +-
 .../ops/ragged/ragged_from_sparse_op_test.py  |   8 +-
 .../ops/ragged/ragged_from_tensor_op_test.py  |   6 +-
 .../ops/ragged/ragged_gather_nd_op_test.py    |   4 +-
 .../ops/ragged/ragged_gather_op_test.py       |  10 +-
 .../ops/ragged/ragged_map_fn_op_test.py       |   6 +-
 .../ragged/ragged_map_inner_values_op_test.py |  18 +--
 .../ops/ragged/ragged_operators_test.py       |   4 +-
 .../ops/ragged/ragged_reduce_op_test.py       |   2 +-
 .../ops/ragged/ragged_row_lengths_op_test.py  |   2 +-
 .../ops/ragged/ragged_segment_op_test.py      |   4 +-
 .../python/ops/ragged/ragged_stack_op_test.py |   2 +-
 .../ops/ragged/ragged_tensor_shape_test.py    |   6 +
 .../python/ops/ragged/ragged_tensor_test.py   |  12 +-
 .../ops/ragged/ragged_to_sparse_op_test.py    |   4 +-
 .../python/ops/ragged/ragged_util_test.py     |   2 +
 .../python/ops/ragged/ragged_where_op_test.py |   2 +-
 .../profiler/internal/run_metadata_test.py    |   2 +-
 .../python/profiler/model_analyzer_test.py    |   4 +
 .../python/profiler/pprof_profiler_test.py    |   2 +-
 tensorflow/python/saved_model/loader_test.py  |  10 +-
 tensorflow/python/saved_model/save_test.py    |   2 +-
 .../python/saved_model/saved_model_test.py    |  15 +--
 .../saved_model/signature_def_utils_test.py   |   2 +
 tensorflow/python/saved_model/utils_test.py   |   8 ++
 tensorflow/python/tools/freeze_graph_test.py  |   4 +-
 tensorflow/python/training/adagrad_test.py    |   4 +-
 .../training/basic_session_run_hooks_test.py  |   4 +-
 .../python/training/checkpoint_ops_test.py    |   2 +
 .../checkpointable/data_structures_test.py    |   4 +
 .../training/checkpointable/util_test.py      |   1 +
 tensorflow/python/training/input_test.py      |  32 ++---
 .../training/learning_rate_decay_test.py      |   2 +-
 .../localhost_cluster_performance_test.py     |   2 +
 .../python/training/monitored_session_test.py |   1 +
 .../python/training/moving_averages_test.py   |  32 ++---
 .../python/training/quantize_training_test.py |   2 +-
 .../python/training/queue_runner_test.py      |   8 +-
 tensorflow/python/training/saver_test.py      |  24 ++--
 ...server_lib_same_variables_no_clear_test.py |   2 +-
 tensorflow/python/training/server_lib_test.py |   7 ++
 .../python/training/session_manager_test.py   |  30 ++---
 .../python/training/slot_creator_test.py      |   6 +-
 tensorflow/python/training/supervisor_test.py |  22 ++--
 .../training/sync_replicas_optimizer_test.py  |   5 +
 .../training/tensorboard_logging_test.py      |   4 +
 .../python/training/training_ops_test.py      |  14 +--
 .../python/training/training_util_test.py     |   4 +-
 .../tools/api/tests/api_compatibility_test.py |   4 +-
 .../compatibility/testdata/test_file_v0_11.py |   8 ++
 .../compatibility/testdata/test_file_v1_12.py |   4 +
 160 files changed, 827 insertions(+), 480 deletions(-)

diff --git a/tensorflow/lite/experimental/microfrontend/python/kernel_tests/audio_microfrontend_op_test.py b/tensorflow/lite/experimental/microfrontend/python/kernel_tests/audio_microfrontend_op_test.py
index 561f5f7a50..3ce861707f 100644
--- a/tensorflow/lite/experimental/microfrontend/python/kernel_tests/audio_microfrontend_op_test.py
+++ b/tensorflow/lite/experimental/microfrontend/python/kernel_tests/audio_microfrontend_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import tensorflow as tf
 
 from tensorflow.lite.experimental.microfrontend.python.ops import audio_microfrontend_op as frontend_op
+from tensorflow.python.framework import test_util
 
 SAMPLE_RATE = 1000
 WINDOW_SIZE = 25
@@ -33,6 +34,7 @@ SMOOTHING_BITS = 10
 
 class AudioFeatureGenerationTest(tf.test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testSimple(self):
     with self.test_session():
       audio = tf.constant(
@@ -51,6 +53,7 @@ class AudioFeatureGenerationTest(tf.test.TestCase):
       self.assertAllEqual(filterbanks.eval(),
                           [[479, 425], [436, 378], [410, 350], [391, 325]])
 
+  @test_util.run_v1_only("b/120545219")
   def testSimpleFloatScaled(self):
     with self.test_session():
       audio = tf.constant(
@@ -72,6 +75,7 @@ class AudioFeatureGenerationTest(tf.test.TestCase):
                           [[7.484375, 6.640625], [6.8125, 5.90625],
                            [6.40625, 5.46875], [6.109375, 5.078125]])
 
+  @test_util.run_v1_only("b/120545219")
   def testStacking(self):
     with self.test_session():
       audio = tf.constant(
@@ -114,6 +118,7 @@ class AudioFeatureGenerationTest(tf.test.TestCase):
           [[479, 425, 479, 425, 436, 378], [479, 425, 436, 378, 410, 350],
            [436, 378, 410, 350, 391, 325], [410, 350, 391, 325, 391, 325]])
 
+  @test_util.run_v1_only("b/120545219")
   def testStackingDropFrame(self):
     with self.test_session():
       audio = tf.constant(
diff --git a/tensorflow/lite/python/convert_saved_model_test.py b/tensorflow/lite/python/convert_saved_model_test.py
index 76113853ca..11bfcdc795 100644
--- a/tensorflow/lite/python/convert_saved_model_test.py
+++ b/tensorflow/lite/python/convert_saved_model_test.py
@@ -39,6 +39,7 @@ from tensorflow.python.saved_model import tag_constants
 
 class TensorFunctionsTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testGetTensorsValid(self):
     in_tensor = array_ops.placeholder(
         shape=[1, 16, 16, 3], dtype=dtypes.float32)
@@ -49,6 +50,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
         sess.graph, ["Placeholder"])
     self.assertEqual("Placeholder:0", tensors[0].name)
 
+  @test_util.run_v1_only("b/120545219")
   def testGetTensorsInvalid(self):
     in_tensor = array_ops.placeholder(
         shape=[1, 16, 16, 3], dtype=dtypes.float32)
@@ -61,6 +63,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
     self.assertEqual("Invalid tensors 'invalid-input' were found.",
                      str(error.exception))
 
+  @test_util.run_v1_only("b/120545219")
   def testSetTensorShapeValid(self):
     tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32)
     self.assertEqual([None, 3, 5], tensor.shape.as_list())
@@ -68,6 +71,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
     convert_saved_model.set_tensor_shapes([tensor], {"Placeholder": [5, 3, 5]})
     self.assertEqual([5, 3, 5], tensor.shape.as_list())
 
+  @test_util.run_v1_only("b/120545219")
   def testSetTensorShapeNoneValid(self):
     tensor = array_ops.placeholder(dtype=dtypes.float32)
     self.assertEqual(None, tensor.shape)
@@ -75,6 +79,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
     convert_saved_model.set_tensor_shapes([tensor], {"Placeholder": [1, 3, 5]})
     self.assertEqual([1, 3, 5], tensor.shape.as_list())
 
+  @test_util.run_v1_only("b/120545219")
   def testSetTensorShapeArrayInvalid(self):
     # Tests set_tensor_shape where the tensor name passed in doesn't exist.
     tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32)
@@ -88,6 +93,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
         str(error.exception))
     self.assertEqual([None, 3, 5], tensor.shape.as_list())
 
+  @test_util.run_v1_only("b/120545219")
   def testSetTensorShapeDimensionInvalid(self):
     # Tests set_tensor_shape where the shape passed in is incompatiable.
     tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32)
@@ -101,6 +107,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
         "(?, 3, 5) to [1, 5, 5].", str(error.exception))
     self.assertEqual([None, 3, 5], tensor.shape.as_list())
 
+  @test_util.run_v1_only("b/120545219")
   def testSetTensorShapeEmpty(self):
     tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32)
     self.assertEqual([None, 3, 5], tensor.shape.as_list())
diff --git a/tensorflow/lite/python/convert_test.py b/tensorflow/lite/python/convert_test.py
index 2a6f1f634f..cf49ee2b47 100644
--- a/tensorflow/lite/python/convert_test.py
+++ b/tensorflow/lite/python/convert_test.py
@@ -34,6 +34,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_v1_only("b/120545219")
 class ConvertTest(test_util.TensorFlowTestCase):
 
   def testBasic(self):
@@ -176,6 +177,7 @@ class ConvertTest(test_util.TensorFlowTestCase):
         "QUANTIZED_UINT8.", str(error.exception))
 
 
+@test_util.run_v1_only("b/120545219")
 class ConvertTestOpHint(test_util.TensorFlowTestCase):
   """Test the hint to stub functionality."""
 
diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py
index 2c630588ca..1f9c768b44 100644
--- a/tensorflow/lite/python/lite_test.py
+++ b/tensorflow/lite/python/lite_test.py
@@ -80,6 +80,7 @@ class FromConstructor(test_util.TensorFlowTestCase):
     self.assertTrue(converter._has_valid_tensors())
 
 
+@test_util.run_v1_only('b/120545219')
 class FromSessionTest(test_util.TensorFlowTestCase):
 
   def testFloat(self):
@@ -497,6 +498,7 @@ class FromSessionTest(test_util.TensorFlowTestCase):
     interpreter.allocate_tensors()
 
 
+@test_util.run_v1_only('b/120545219')
 class FromFrozenGraphFile(test_util.TensorFlowTestCase):
 
   def testFloat(self):
@@ -744,6 +746,7 @@ class FromFrozenGraphFile(test_util.TensorFlowTestCase):
     interpreter.allocate_tensors()
 
 
+@test_util.run_v1_only('b/120545219')
 class FromSavedModelTest(test_util.TensorFlowTestCase):
 
   def _createSavedModel(self, shape):
@@ -888,6 +891,7 @@ class FromSavedModelTest(test_util.TensorFlowTestCase):
     interpreter.allocate_tensors()
 
 
+@test_util.run_v1_only('b/120545219')
 class FromKerasFile(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py
index 6b4e7427ed..4e329ac97d 100644
--- a/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py
+++ b/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py
@@ -28,6 +28,7 @@ from tensorflow.python import keras
 from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
@@ -42,6 +43,7 @@ class EvaluateFrozenGraph(test.TestCase):
     write_graph(sess.graph_def, '', graph_def_file, False)
     return graph_def_file
 
+  @test_util.run_v1_only('b/120545219')
   def testFloat(self):
     with session.Session().as_default() as sess:
       in_tensor = array_ops.placeholder(
@@ -51,6 +53,7 @@ class EvaluateFrozenGraph(test.TestCase):
 
     model_coverage.test_frozen_graph(filename, ['Placeholder'], ['add'])
 
+  @test_util.run_v1_only('b/120545219')
   def testMultipleOutputs(self):
     with session.Session().as_default() as sess:
       in_tensor_1 = array_ops.placeholder(
@@ -84,15 +87,18 @@ class EvaluateFrozenGraph(test.TestCase):
     filename = self._saveFrozenGraph(sess)
     return filename
 
+  @test_util.run_v1_only('b/120545219')
   def testQuantized(self):
     filename = self._getQuantizedModel()
     model_coverage.test_frozen_graph_quant(filename, ['inputA'], ['output'])
 
+  @test_util.run_v1_only('b/120545219')
   def testQuantizedInputShapes(self):
     filename = self._getQuantizedModel()
     model_coverage.test_frozen_graph_quant(
         filename, ['inputA'], ['output'], input_shapes={'inputA': [33, 33]})
 
+  @test_util.run_v1_only('b/120545219')
   def testQuantizedFlexAll(self):
     filename = self._getQuantizedModel()
     model_coverage.test_frozen_graph_quant(
@@ -102,6 +108,7 @@ class EvaluateFrozenGraph(test.TestCase):
 
 class EvaluateSavedModel(test.TestCase):
 
+  @test_util.run_v1_only('b/120545219')
   def testFloat(self):
     saved_model_dir = os.path.join(self.get_temp_dir(), 'simple_savedmodel')
     with session.Session().as_default() as sess:
@@ -139,18 +146,21 @@ class EvaluateKerasModel(test.TestCase):
       os.close(fd)
     return keras_file
 
+  @test_util.run_v1_only('b/120545219')
   def testFloat(self):
     model = self._getSingleInputKerasModel()
     keras_file = self._saveKerasModel(model)
 
     model_coverage.test_keras_model(keras_file)
 
+  @test_util.run_v1_only('b/120545219')
   def testPostTrainingQuantize(self):
     model = self._getSingleInputKerasModel()
     keras_file = self._saveKerasModel(model)
 
     model_coverage.test_keras_model(keras_file, post_training_quantize=True)
 
+  @test_util.run_v1_only('b/120545219')
   def testTargetOps(self):
     model = self._getSingleInputKerasModel()
     keras_file = self._saveKerasModel(model)
diff --git a/tensorflow/python/autograph/impl/api_test.py b/tensorflow/python/autograph/impl/api_test.py
index 66edda5119..d5561ba824 100644
--- a/tensorflow/python/autograph/impl/api_test.py
+++ b/tensorflow/python/autograph/impl/api_test.py
@@ -218,6 +218,7 @@ class ApiTest(test.TestCase):
                              constant_op.constant(-1))
       self.assertEqual(1, self.evaluate(x))
 
+  @test_util.run_v1_only('b/120545219')
   def test_converted_call_functools_partial(self):
 
     def test_fn(x, y, z):
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 347833ce8f..c4a118a414 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -312,6 +312,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertEqual(None, res[2])
       self.assertEqual(44.0, res[1])
 
+  @test_util.run_v1_only('b/120545219')
   def testFetchAttrs(self):
     if attr is None:
       self.skipTest('attr module is unavailable.')
@@ -340,6 +341,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(val3, result.field1)
       self.assertAllEqual(val2, result.field2)
 
+  @test_util.run_v1_only('b/120545219')
   def testFetchNestedAttrs(self):
     if attr is None:
       self.skipTest('attr module is unavailable.')
@@ -1024,6 +1026,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       fed_c_val = c.eval(feed_dict={a.name: [[4.0, 4.0]]})
       self.assertAllEqual([[16.0, 16.0, 16.0]], fed_c_val)
 
+  @test_util.run_v1_only('b/120545219')
   def testOperationRunMethod(self):
     with session.Session():
       a = constant_op.constant(1.0, shape=[1, 2])
@@ -1154,6 +1157,7 @@ class SessionTest(test_util.TensorFlowTestCase):
         else:
           importer.import_graph_def(gdef, name='import')
 
+  @test_util.run_v1_only('b/120545219')
   def testParallelRunAndSingleBuild(self):
     with session.Session() as sess:
       c = constant_op.constant(5.0)
@@ -1174,6 +1178,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       for t in threads:
         t.join()
 
+  @test_util.run_v1_only('b/120545219')
   def testParallelRunAndParallelBuild(self):
     with session.Session() as sess:
       c = constant_op.constant(5.0)
@@ -1274,6 +1279,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegexp(RuntimeError, 'The Session graph is empty.'):
         sess.run({})
 
+  @test_util.run_v1_only('b/120545219')
   def testNotEntered(self):
     # pylint: disable=protected-access
     self.assertEqual(ops._default_session_stack.get_default(), None)
@@ -1289,6 +1295,7 @@ class SessionTest(test_util.TensorFlowTestCase):
           ValueError, lambda e: 'No default session is registered.' in str(e)):
         c_2.eval()
 
+  @test_util.run_v1_only('b/120545219')
   def testInteractive(self):
     with ops.device('/cpu:0'):
       sess = session.InteractiveSession()
@@ -1301,6 +1308,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertAllEqual([[24.0]], e.eval())
       sess.close()
 
+  @test_util.run_v1_only('b/120545219')
   def testMultipleInteractiveSessionsWarning(self):
     # Reinitialize the global state to ensure that the expected warnings will
     # be emitted.
@@ -1328,6 +1336,7 @@ class SessionTest(test_util.TensorFlowTestCase):
     sess2.close()
     sess.close()
 
+  @test_util.run_v1_only('b/120545219')
   def testInteractivePlacePrunedGraph(self):
     sess = session.InteractiveSession()
 
@@ -1349,6 +1358,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       a.eval()
     sess.close()
 
+  @test_util.run_v1_only('b/120545219')
   def testDefaultSessionPlacePrunedGraph(self):
     sess = session.Session()
 
@@ -1769,9 +1779,11 @@ class SessionTest(test_util.TensorFlowTestCase):
     sess.run(a, run_metadata=run_metadata)
     self.assertEqual(len(run_metadata.partition_graphs), 0)
 
+  @test_util.run_v1_only('b/120545219')
   def testOutputPartitionGraphsDirect(self):
     self.runTestOutputPartitionGraphs(session.Session())
 
+  @test_util.run_v1_only('b/120545219')
   def testOutputPartitionGraphsDistributed(self):
     server = server_lib.Server.create_local_server()
     self.runTestOutputPartitionGraphs(session.Session(server.target))
@@ -1796,6 +1808,7 @@ class SessionTest(test_util.TensorFlowTestCase):
     del sess1
     del sess2
 
+  @test_util.run_v1_only('b/120545219')
   def testAsDefault(self):
     c = constant_op.constant(37)
     sess = session.Session()
@@ -1821,6 +1834,7 @@ class SessionTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(TypeError, 'graph must be a tf.Graph'):
       session.Session(graph=37)
 
+  @test_util.run_v1_only('b/120545219')
   def testTimeoutWithShortOperations(self):
     num_epochs = 5
     q = data_flow_ops.FIFOQueue(capacity=50, dtypes=[dtypes.int32], shapes=[()])
@@ -1834,6 +1848,7 @@ class SessionTest(test_util.TensorFlowTestCase):
         sess.run(enqueue_op)
       self.assertEqual(sess.run(q.size()), num_epochs * 2)
 
+  @test_util.run_v1_only('b/120545219')
   def testRegisterFetchAndFeedConversionFunctions(self):
 
     class SquaredTensor(object):
@@ -1865,6 +1880,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       squared_eval = sess.partial_run(partial_run, squared_tensor)
       self.assertAllClose(np2 * np2, squared_eval)
 
+  @test_util.run_v1_only('b/120545219')
   def testDefaultLogDevicePlacement(self):
 
     class CaptureStderr(str):
@@ -1914,6 +1930,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       self.assertTrue('/job:local/replica:0/task:0/device:CPU:0' in str(log),
                       str(log))
 
+  @test_util.run_v1_only('b/120545219')
   def testLocalMasterSessionTimeout(self):
     # Test that the timeout passed in a config to the session works correctly.
     config = config_pb2.ConfigProto(operation_timeout_in_ms=1000)
@@ -1927,6 +1944,7 @@ class SessionTest(test_util.TensorFlowTestCase):
       with self.assertRaises(errors.DeadlineExceededError):
         sess.run(dequeued_t)
 
+  @test_util.run_v1_only('b/120545219')
   def testDefaultServerTimeout(self):
     # Test that the default server config timeout gets used when no Session
     # config is provided.
@@ -1952,9 +1970,11 @@ class SessionTest(test_util.TensorFlowTestCase):
     with self.assertRaisesOpError('has inputs from different frames'):
       sess.run(res, feed_dict={data: 1.0})
 
+  @test_util.run_v1_only('b/120545219')
   def testBuildGraphErrorDirect(self):
     self.runTestBuildGraphError(session.Session())
 
+  @test_util.run_v1_only('b/120545219')
   def testBuildGraphErrorDist(self):
     server = server_lib.Server.create_local_server()
     self.runTestBuildGraphError(session.Session(server.target))
@@ -1993,9 +2013,11 @@ class SessionTest(test_util.TensorFlowTestCase):
       result = sess.run(f)
       self.assertEqual(result, 2.0)
 
+  @test_util.run_v1_only('b/120545219')
   def testAddFunctionToSession(self):
     self.runTestAddFunctionToSession()
 
+  @test_util.run_v1_only('b/120545219')
   def testAddFunctionToGrpcSession(self):
     server = server_lib.Server.create_local_server()
     self.runTestAddFunctionToSession(server.target)
@@ -2009,6 +2031,7 @@ class SessionTest(test_util.TensorFlowTestCase):
     with session.Session():
       pass
 
+  @test_util.run_v1_only('b/120545219')
   def testAutoConvertAndCheckData(self):
     with self.cached_session() as sess:
       a = array_ops.placeholder(dtype=dtypes.string)
diff --git a/tensorflow/python/data/kernel_tests/interleave_test.py b/tensorflow/python/data/kernel_tests/interleave_test.py
index c3450e6525..05a211afcc 100644
--- a/tensorflow/python/data/kernel_tests/interleave_test.py
+++ b/tensorflow/python/data/kernel_tests/interleave_test.py
@@ -264,6 +264,7 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("8", np.int64([4, 0, 6]), 2, 3, 1),
       ("9", np.int64([4, 0, 6]), 2, 3, 2),
   )
+  @test_util.run_v1_only("b/120545219")
   def testSkipEagerSloppyInterleaveInOrder(self, input_values, cycle_length,
                                            block_length, num_parallel_calls):
     get_next, coordination_events = _make_coordinated_sloppy_dataset(
@@ -286,6 +287,7 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("3", np.int64([4, 5, 6]), 3, 2, 3),
       ("4", np.int64([4, 0, 6]), 2, 3, 2),
   )
+  @test_util.run_v1_only("b/120545219")
   def testSkipEagerSloppyInterleaveOutOfOrder(self, input_values, cycle_length,
                                               block_length, num_parallel_calls):
     get_next, coordination_events = _make_coordinated_sloppy_dataset(
diff --git a/tensorflow/python/data/kernel_tests/iterator_cluster_test.py b/tensorflow/python/data/kernel_tests/iterator_cluster_test.py
index 728bed20a1..2008823495 100644
--- a/tensorflow/python/data/kernel_tests/iterator_cluster_test.py
+++ b/tensorflow/python/data/kernel_tests/iterator_cluster_test.py
@@ -39,6 +39,7 @@ from tensorflow.python.platform import test
 
 class IteratorClusterTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testRemoteIteratorWithoutRemoteCallFail(self):
     worker_config = config_pb2.ConfigProto()
     worker_config.device_count["CPU"] = 2
@@ -92,6 +93,7 @@ class IteratorClusterTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(remote_op, feed_dict={target_placeholder: device1})
 
+  @test_util.run_v1_only("b/120545219")
   def testRemoteIteratorUsingRemoteCallOp(self):
     worker_config = config_pb2.ConfigProto()
     worker_config.device_count["CPU"] = 2
@@ -102,6 +104,7 @@ class IteratorClusterTest(test.TestCase):
                                    "/job:worker/replica:0/task:0/cpu:1",
                                    worker[0].target)
 
+  @test_util.run_v1_only("b/120545219")
   def testRemoteIteratorUsingRemoteCallOpCrossProcess(self):
     workers, _ = test_util.create_local_cluster(2, 1)
 
@@ -109,6 +112,7 @@ class IteratorClusterTest(test.TestCase):
                                    "/job:worker/replica:0/task:1/cpu:0",
                                    workers[0].target)
 
+  @test_util.run_v1_only("b/120545219")
   def testCaptureHashTableInSharedIterator(self):
     worker, _ = test_util.create_local_cluster(1, 1)
 
@@ -143,6 +147,7 @@ class IteratorClusterTest(test.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  @test_util.run_v1_only("b/120545219")
   def testImplicitDisposeParallelMapDataset(self):
     # Tests whether a parallel map dataset will be cleaned up correctly when
     # the pipeline does not run it until exhaustion.
diff --git a/tensorflow/python/data/kernel_tests/map_test.py b/tensorflow/python/data/kernel_tests/map_test.py
index 8b22542e11..e07706413d 100644
--- a/tensorflow/python/data/kernel_tests/map_test.py
+++ b/tensorflow/python/data/kernel_tests/map_test.py
@@ -34,6 +34,7 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
@@ -82,6 +83,7 @@ def _make_coordinated_sloppy_dataset(num_elements, num_parallel_calls):
   return next_element, coordination_events
 
 
+@test_util.run_v1_only("b/120545219")
 class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _buildMapDataset(self, components, count):
diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
index 622ebb55de..0322d1f2c6 100644
--- a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
@@ -35,6 +35,7 @@ from tensorflow.python.platform import test
 # TODO(b/117581999): Add eager coverage.
 class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
+  @test_util.run_v1_only("b/120545219")
   def testNoGetNext(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
@@ -44,6 +45,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
     with self.test_session(config=config) as sess:
       self.evaluate(multi_device_iterator.initializer)
 
+  @test_util.run_v1_only("b/120545219")
   def testBasic(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
@@ -60,6 +62,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
+  @test_util.run_v1_only("b/120545219")
   def testOneOnSameDevice(self):
     with ops.device("/cpu:0"):
       dataset = dataset_ops.Dataset.range(10)
@@ -77,6 +80,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
+  @test_util.run_v1_only("b/120545219")
   def testRepeatDevices(self):
     with ops.device("/cpu:0"):
       dataset = dataset_ops.Dataset.range(20)
@@ -99,6 +103,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         self.evaluate(elem_on_3)
         self.evaluate(elem_on_4)
 
+  @test_util.run_v1_only("b/120545219")
   def testNotFullyDivisible(self):
     dataset = dataset_ops.Dataset.range(9)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
@@ -116,6 +121,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
+  @test_util.run_v1_only("b/120545219")
   def testGetNextAsOptional(self):
     dataset = dataset_ops.Dataset.range(9)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
@@ -149,6 +155,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.InvalidArgumentError):
         self.evaluate(elem_on_2_t)
 
+  @test_util.run_v1_only("b/120545219")
   def testUneven(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
@@ -166,6 +173,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
+  @test_util.run_v1_only("b/120545219")
   def testMultipleInitializations(self):
     with ops.device("/cpu:0"):
       epoch = array_ops.placeholder(dtypes.int64, shape=[])
@@ -259,6 +267,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.InvalidArgumentError):
         self.evaluate(elem_on_2_t)
 
+  @test_util.run_v1_only("b/120545219")
   def testOptimization(self):
     dataset = dataset_ops.Dataset.range(10)
     dataset = dataset.apply(optimization.assert_next(["MemoryCacheImpl"]))
diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py
index 322ecf9466..586982dc4b 100644
--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@@ -573,6 +573,7 @@ def create_analyzer_cli(dump):
   return analyzer, registry
 
 
+@test_util.run_v1_only("b/120545219")
 class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
 
   @classmethod
@@ -645,7 +646,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     self.assertEqual(len("Size (B)") + 1, dump_size_col_width)
     self.assertEqual(len("Op type") + 1, op_type_col_width)
 
-  @test_util.run_deprecated_v1
   def testMeasureTensorListColumnWidthsGivesRightAnswerForData(self):
     dump = self._debug_dump.dumped_tensor_data[0]
     self.assertLess(dump.dump_size_bytes, 1000)
@@ -661,7 +661,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     # column should be determined by the length of "VariableV2".
     self.assertEqual(len("VariableV2") + 1, op_type_col_width)
 
-  @test_util.run_deprecated_v1
   def testListTensors(self):
     # Use shorthand alias for the command prefix.
     out = self._registry.dispatch_command("lt", [])
@@ -675,7 +674,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     # Check the main menu.
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorsInReverseTimeOrderWorks(self):
     # Use shorthand alias for the command prefix.
     out = self._registry.dispatch_command("lt", ["-s", "timestamp", "-r"])
@@ -691,7 +689,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         reverse=True)
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorsInDumpSizeOrderWorks(self):
     out = self._registry.dispatch_command("lt", ["-s", "dump_size"])
     assert_listed_tensors(
@@ -705,7 +702,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         sort_by="dump_size")
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorsInReverseDumpSizeOrderWorks(self):
     out = self._registry.dispatch_command("lt", ["-s", "dump_size", "-r"])
     assert_listed_tensors(
@@ -725,7 +721,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     self.assertIn("ValueError: Unsupported key to sort tensors by: foobar",
                   out.lines)
 
-  @test_util.run_deprecated_v1
   def testListTensorsInOpTypeOrderWorks(self):
     # Use shorthand alias for the command prefix.
     out = self._registry.dispatch_command("lt", ["-s", "op_type"])
@@ -741,7 +736,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         reverse=False)
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorsInReverseOpTypeOrderWorks(self):
     # Use shorthand alias for the command prefix.
     out = self._registry.dispatch_command("lt", ["-s", "op_type", "-r"])
@@ -757,7 +751,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         reverse=True)
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorsInTensorNameOrderWorks(self):
     # Use shorthand alias for the command prefix.
     out = self._registry.dispatch_command("lt", ["-s", "tensor_name"])
@@ -773,7 +766,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         reverse=False)
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorsInReverseTensorNameOrderWorks(self):
     # Use shorthand alias for the command prefix.
     out = self._registry.dispatch_command("lt", ["-s", "tensor_name", "-r"])
@@ -789,7 +781,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         reverse=True)
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorsFilterByNodeNameRegex(self):
     out = self._registry.dispatch_command("list_tensors",
                                           ["--node_name_filter", ".*read.*"])
@@ -803,7 +794,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     assert_listed_tensors(self, out, [], [], node_name_regex="^read")
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorFilterByOpTypeRegex(self):
     out = self._registry.dispatch_command("list_tensors",
                                           ["--op_type_filter", "Identity"])
@@ -832,7 +822,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         op_type_regex="(Add|MatMul)")
     check_main_menu(self, out, list_tensors_enabled=False)
 
-  @test_util.run_deprecated_v1
   def testListTensorWithFilterAndNodeNameExclusionWorks(self):
     # First, create and register the filter.
     def is_2x1_vector(datum, tensor):
@@ -889,7 +878,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     out = self._registry.dispatch_command("list_tensors", ["--bar"])
     check_syntax_error_output(self, out, "list_tensors")
 
-  @test_util.run_deprecated_v1
   def testNodeInfoByNodeName(self):
     node_name = "simple_mul_add/matmul"
     out = self._registry.dispatch_command("node_info", [node_name])
@@ -914,7 +902,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         [(len(out.lines[0]) - len(node_name), len(out.lines[0]), "bold")],
         out.font_attr_segs[0])
 
-  @test_util.run_deprecated_v1
   def testNodeInfoShowAttributes(self):
     node_name = "simple_mul_add/matmul"
     out = self._registry.dispatch_command("node_info", ["-a", node_name])
@@ -938,7 +925,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         print_tensor_node_name=node_name,
         list_outputs_node_name=node_name)
 
-  @test_util.run_deprecated_v1
   def testNodeInfoShowDumps(self):
     node_name = "simple_mul_add/matmul"
     out = self._registry.dispatch_command("node_info", ["-d", node_name])
@@ -963,7 +949,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
                     len(out.lines[16]) - len(out.lines[16].strip()),
                     len(out.lines[16]), "pt %s:0 -n 0" % node_name)
 
-  @test_util.run_deprecated_v1
   def testNodeInfoShowStackTraceUnavailableIsIndicated(self):
     self._debug_dump.set_python_graph(None)
 
@@ -987,7 +972,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         print_tensor_node_name=node_name,
         list_outputs_node_name=node_name)
 
-  @test_util.run_deprecated_v1
   def testNodeInfoShowStackTraceAvailableWorks(self):
     self._debug_dump.set_python_graph(self._sess.graph)
 
@@ -1011,7 +995,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         print_tensor_node_name=node_name,
         list_outputs_node_name=node_name)
 
-  @test_util.run_deprecated_v1
   def testNodeInfoByTensorName(self):
     node_name = "simple_mul_add/u/read"
     tensor_name = node_name + ":0"
@@ -1381,7 +1364,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         break
     return index
 
-  @test_util.run_deprecated_v1
   def testPrintSourceForOpNamesWholeFileWorks(self):
     self._debug_dump.set_python_graph(self._sess.graph)
     out = self._registry.dispatch_command(
@@ -1434,7 +1416,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     self.assertEqual("pt simple_mul_add/add",
                      out.font_attr_segs[index + 1][0][2].content)
 
-  @test_util.run_deprecated_v1
   def testPrintSourceForTensorNamesWholeFileWorks(self):
     self._debug_dump.set_python_graph(self._sess.graph)
     out = self._registry.dispatch_command(
@@ -1455,7 +1436,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     self.assertEqual("pt simple_mul_add/u:0",
                      out.font_attr_segs[index + 2][0][2].content)
 
-  @test_util.run_deprecated_v1
   def testPrintSourceForOpNamesStartingAtSpecifiedLineWorks(self):
     self._debug_dump.set_python_graph(self._sess.graph)
     out = self._registry.dispatch_command(
@@ -1482,7 +1462,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
     self.assertEqual("pt simple_mul_add/u/read",
                      out.font_attr_segs[index + 3][0][2].content)
 
-  @test_util.run_deprecated_v1
   def testPrintSourceForOpNameSettingMaximumElementCountWorks(self):
     self._debug_dump.set_python_graph(self._sess.graph)
     out = self._registry.dispatch_command(
@@ -1527,7 +1506,6 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         self.assertTrue(cli_shared.COLOR_GRAY in attr_seg[2] or
                         attr_seg[2] == cli_shared.COLOR_GRAY)
 
-  @test_util.run_deprecated_v1
   def testListSourceWithNodeNameFilterWithMatchesWorks(self):
     self._debug_dump.set_python_graph(self._sess.graph)
     out = self._registry.dispatch_command("list_source", ["-n", ".*/read"])
@@ -1691,6 +1669,7 @@ class AnalyzerCLIPrintLargeTensorTest(test_util.TensorFlowTestCase):
     self.assertNotIn("...,", out.lines[4])
 
 
+@test_util.run_v1_only("b/120545219")
 class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
 
   @classmethod
@@ -1742,7 +1721,6 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
     # Tear down temporary dump directory.
     shutil.rmtree(cls._dump_root)
 
-  @test_util.run_deprecated_v1
   def testNodeInfoWithControlDependencies(self):
     # Call node_info on a node with control inputs.
     out = self._registry.dispatch_command("node_info",
@@ -1783,7 +1761,6 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
                     len(out.lines[z_line]),
                     "ni -a -d -t control_deps/ctrl_dep_z")
 
-  @test_util.run_deprecated_v1
   def testListInputsNonRecursiveNoControl(self):
     """List inputs non-recursively, without any control inputs."""
 
@@ -1826,7 +1803,6 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
                     len(out.lines[3]) - len("control_deps/ctrl_dep_y"),
                     len(out.lines[3]), "li -c -r control_deps/ctrl_dep_y")
 
-  @test_util.run_deprecated_v1
   def testListInputsNonRecursiveNoControlUsingTensorName(self):
     """List inputs using the name of an output tensor of the node."""
 
@@ -1855,7 +1831,6 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
                     len(out.lines[3]) - len("control_deps/ctrl_dep_y"),
                     len(out.lines[3]), "li -c -r control_deps/ctrl_dep_y")
 
-  @test_util.run_deprecated_v1
   def testListInputsNonRecursiveWithControls(self):
     """List inputs non-recursively, with control inputs."""
     node_name = "control_deps/ctrl_dep_z"
@@ -1886,7 +1861,6 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
                     len(out.lines[5]) - len("control_deps/x"),
                     len(out.lines[5]), "li -c -r control_deps/x")
 
-  @test_util.run_deprecated_v1
   def testListInputsRecursiveWithControls(self):
     """List inputs recursively, with control inputs."""
     node_name = "control_deps/ctrl_dep_z"
@@ -1932,7 +1906,6 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
                     len(out.lines[18]) - len("control_deps/x"),
                     len(out.lines[18]), "li -c -r control_deps/x")
 
-  @test_util.run_deprecated_v1
   def testListInputsRecursiveWithControlsWithDepthLimit(self):
     """List inputs recursively, with control inputs and a depth limit."""
     node_name = "control_deps/ctrl_dep_z"
@@ -1992,7 +1965,6 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
         "ERROR: There is no node named \"control_deps/z/foo\" in the "
         "partition graphs"], out.lines)
 
-  @test_util.run_deprecated_v1
   def testListRecipientsRecursiveWithControlsWithDepthLimit(self):
     """List recipients recursively, with control inputs and a depth limit."""
 
@@ -2025,6 +1997,7 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase):
                      out.font_attr_segs[0])
 
 
+@test_util.run_v1_only("b/120545219")
 class AnalyzerCLIWhileLoopTest(test_util.TensorFlowTestCase):
 
   @classmethod
@@ -2064,7 +2037,6 @@ class AnalyzerCLIWhileLoopTest(test_util.TensorFlowTestCase):
     # Tear down temporary dump directory.
     shutil.rmtree(cls._dump_root)
 
-  @test_util.run_deprecated_v1
   def testMultipleDumpsPrintTensorNoNumber(self):
     output = self._registry.dispatch_command("pt", ["while/Identity:0"])
 
@@ -2082,7 +2054,6 @@ class AnalyzerCLIWhileLoopTest(test_util.TensorFlowTestCase):
     self.assertEqual("For example:", output.lines[-2])
     self.assertEqual("  print_tensor while/Identity:0 -n 0", output.lines[-1])
 
-  @test_util.run_deprecated_v1
   def testMultipleDumpsPrintTensorWithNumber(self):
     for i in xrange(5):
       output = self._registry.dispatch_command(
@@ -2096,7 +2067,6 @@ class AnalyzerCLIWhileLoopTest(test_util.TensorFlowTestCase):
       self.assertTrue(output.lines[4].startswith("array(%d" % i))
       self.assertTrue(output.lines[4].endswith(")"))
 
-  @test_util.run_deprecated_v1
   def testMultipleDumpsPrintTensorInvalidNumber(self):
     output = self._registry.dispatch_command("pt",
                                              ["while/Identity:0", "-n", "10"])
diff --git a/tensorflow/python/debug/cli/cli_shared_test.py b/tensorflow/python/debug/cli/cli_shared_test.py
index d191a234fd..66a12efda5 100644
--- a/tensorflow/python/debug/cli/cli_shared_test.py
+++ b/tensorflow/python/debug/cli/cli_shared_test.py
@@ -105,6 +105,7 @@ class TimeToReadableStrTest(test_util.TensorFlowTestCase):
       cli_shared.time_to_readable_str(100, force_time_unit="ks")
 
 
+@test_util.run_v1_only("b/120545219")
 class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -334,6 +335,7 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     self.assertEqual("run #1: 1 fetch (a:0); 1 feed (foo)", short_description)
 
 
+@test_util.run_v1_only("b/120545219")
 class GetErrorIntroTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -342,7 +344,6 @@ class GetErrorIntroTest(test_util.TensorFlowTestCase):
   def tearDown(self):
     ops.reset_default_graph()
 
-  @test_util.run_deprecated_v1
   def testShapeError(self):
     tf_error = errors.OpError(None, self.var_a.initializer, "foo description",
                               None)
diff --git a/tensorflow/python/debug/cli/profile_analyzer_cli_test.py b/tensorflow/python/debug/cli/profile_analyzer_cli_test.py
index effcd500c7..d6d2b58b5f 100644
--- a/tensorflow/python/debug/cli/profile_analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/profile_analyzer_cli_test.py
@@ -70,6 +70,7 @@ def _assert_no_lines_match(pattern, lines):
         "%s matched at least one line in %s." % (pattern, str(lines)))
 
 
+@test_util.run_v1_only("b/120545219")
 class ProfileAnalyzerListProfileTest(test_util.TensorFlowTestCase):
 
   def testNodeInfoEmpty(self):
@@ -321,6 +322,7 @@ class ProfileAnalyzerListProfileTest(test_util.TensorFlowTestCase):
     _assert_at_least_one_line_matches(r"Device Total.*0\.009ms", prof_output)
 
 
+@test_util.run_v1_only("b/120545219")
 class ProfileAnalyzerPrintSourceTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -348,7 +350,6 @@ class ProfileAnalyzerPrintSourceTest(test_util.TensorFlowTestCase):
     ops.reset_default_graph()
     super(ProfileAnalyzerPrintSourceTest, self).tearDown()
 
-  @test_util.run_deprecated_v1
   def testPrintSourceForWhileLoop(self):
     prof_output = self.prof_analyzer.print_source([__file__])
 
@@ -362,7 +363,6 @@ class ProfileAnalyzerPrintSourceTest(test_util.TensorFlowTestCase):
         r"\[(\|)+(\s)*\] .*us .*7\(55\) .*L%d.*(\S)+" % self.loop_lineno,
         prof_output.lines)
 
-  @test_util.run_deprecated_v1
   def testPrintSourceOutputContainsClickableLinks(self):
     prof_output = self.prof_analyzer.print_source([__file__])
     any_match, line_index = _at_least_one_line_matches(
@@ -379,7 +379,6 @@ class ProfileAnalyzerPrintSourceTest(test_util.TensorFlowTestCase):
         break
     self.assertTrue(any_menu_item_match)
 
-  @test_util.run_deprecated_v1
   def testPrintSourceWithNonDefaultTimeUnit(self):
     prof_output = self.prof_analyzer.print_source([
         __file__, "--time_unit", "ms"])
@@ -394,7 +393,6 @@ class ProfileAnalyzerPrintSourceTest(test_util.TensorFlowTestCase):
         r"\[(\|)+(\s)*\] .*ms .*7\(55\) .*L%d.*(\S)+" % self.loop_lineno,
         prof_output.lines)
 
-  @test_util.run_deprecated_v1
   def testPrintSourceWithNodeNameFilter(self):
     prof_output = self.prof_analyzer.print_source([
         __file__, "--node_name_filter", "x$"])
@@ -427,7 +425,6 @@ class ProfileAnalyzerPrintSourceTest(test_util.TensorFlowTestCase):
         break
     self.assertTrue(any_menu_item_match)
 
-  @test_util.run_deprecated_v1
   def testPrintSourceWithOpTypeFilter(self):
     prof_output = self.prof_analyzer.print_source([
         __file__, "--op_type_filter", "Less"])
diff --git a/tensorflow/python/debug/cli/stepper_cli_test.py b/tensorflow/python/debug/cli/stepper_cli_test.py
index 7b8a42c253..5cf69d0168 100644
--- a/tensorflow/python/debug/cli/stepper_cli_test.py
+++ b/tensorflow/python/debug/cli/stepper_cli_test.py
@@ -129,6 +129,7 @@ def _parse_updated(lines):
   return updated
 
 
+@test_util.run_v1_only("b/120545219")
 class NodeStepperSimpleGraphTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/debug/lib/debug_gradients_test.py b/tensorflow/python/debug/lib/debug_gradients_test.py
index 1c53147863..885691c3ef 100644
--- a/tensorflow/python/debug/lib/debug_gradients_test.py
+++ b/tensorflow/python/debug/lib/debug_gradients_test.py
@@ -36,6 +36,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import gradient_descent
 
 
+@test_util.run_v1_only("b/120545219")
 class IdentifyGradientTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -54,7 +55,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     ops.reset_default_graph()
     debug_gradients.clear_gradient_debuggers()
 
-  @test_util.run_deprecated_v1
   def testIdentifyGradientGivesCorrectTensorObjectWithoutContextManager(self):
     grad_debugger = debug_gradients.GradientsDebugger()
     id_grad_w = grad_debugger.identify_gradient(self.w)
@@ -85,7 +85,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertIsInstance(w_grad, ops.Tensor)
     self.assertAllClose(1.0, self.sess.run(w_grad))
 
-  @test_util.run_deprecated_v1
   def testIdentifyGradientGivesCorrectTensorObjectWithTfGradients(self):
     grad_debugger = debug_gradients.GradientsDebugger()
     id_grad_w = grad_debugger.identify_gradient(self.w)
@@ -117,7 +116,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertIsInstance(w_grad, ops.Tensor)
     self.assertAllClose(1.0, self.sess.run(w_grad))
 
-  @test_util.run_deprecated_v1
   def testCallingIdentifyGradientTwiceWithTheSameGradientsDebuggerErrors(self):
     grad_debugger = debug_gradients.GradientsDebugger()
     grad_debugger.identify_gradient(self.w)
@@ -125,7 +123,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
                                  "The graph already contains an op named .*"):
       grad_debugger.identify_gradient(self.w)
 
-  @test_util.run_deprecated_v1
   def testIdentifyGradientWorksOnMultipleLosses(self):
     grad_debugger_1 = debug_gradients.GradientsDebugger()
     grad_debugger_2 = debug_gradients.GradientsDebugger()
@@ -154,7 +151,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertAllClose(2.0 * 5.0, self.sess.run(dz1_dy))
     self.assertAllClose(0.5 * (5.0**-0.5), self.sess.run(dz2_dy))
 
-  @test_util.run_deprecated_v1
   def testIdentifyGradientRaisesLookupErrorForUnknownXTensor(self):
     grad_debugger_1 = debug_gradients.GradientsDebugger()
     grad_debugger_2 = debug_gradients.GradientsDebugger()
@@ -175,7 +171,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
         r"This GradientsDebugger has not received any gradient tensor for "):
       grad_debugger_2.gradient_tensor(self.w)
 
-  @test_util.run_deprecated_v1
   def testIdentifyGradientRaisesTypeErrorForNonTensorOrTensorNameInput(self):
     grad_debugger = debug_gradients.GradientsDebugger()
     with self.assertRaisesRegexp(
@@ -184,7 +179,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
         r"has type .*Operation.*"):
       grad_debugger.gradient_tensor(variables.global_variables_initializer())
 
-  @test_util.run_deprecated_v1
   def testIdentifyGradientTensorWorksWithGradientDescentOptimizer(self):
     grad_debugger = debug_gradients.GradientsDebugger()
     id_grad_w = grad_debugger.identify_gradient(self.w)
@@ -200,7 +194,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertIsInstance(w_grad, ops.Tensor)
     self.assertAllClose(1.0, self.sess.run(w_grad))
 
-  @test_util.run_deprecated_v1
   def testWatchGradientsByXTensorNamesWorks(self):
     y = math_ops.add(self.w, -1.0, name="y")
 
@@ -227,7 +220,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertIsInstance(w_grad, ops.Tensor)
     self.assertAllClose(1.0, self.sess.run(w_grad))
 
-  @test_util.run_deprecated_v1
   def testWatchGradientsByXTensorNamesWorksWithoutContextManager(self):
     y = math_ops.add(self.w, -1.0, name="y")
 
@@ -254,7 +246,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertIsInstance(w_grad, ops.Tensor)
     self.assertAllClose(1.0, self.sess.run(w_grad))
 
-  @test_util.run_deprecated_v1
   def testWatchGradientsWorksOnRefTensor(self):
     y = math_ops.add(self.w, -1.0, name="y")
 
@@ -273,7 +264,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertAllClose(3.0, self.sess.run(
         grad_debugger.gradient_tensor("u:0")))
 
-  @test_util.run_deprecated_v1
   def testWatchGradientsWorksOnMultipleTensors(self):
     y = math_ops.add(self.w, -1.0, name="y")
 
@@ -294,7 +284,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertAllClose(3.0, self.sess.run(
         grad_debugger.gradient_tensor("u:0")))
 
-  @test_util.run_deprecated_v1
   def testWatchGradientsByXTensorsWorks(self):
     y = math_ops.add(self.w, -1.0, name="foo/y")
     z = math_ops.square(y, name="foo/z")
@@ -317,7 +306,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertAllClose(10.0, self.sess.run(w_grad))
     self.assertAllClose(30.0, self.sess.run(u_grad))
 
-  @test_util.run_deprecated_v1
   def testWatchGradientsByTensorCanWorkOnMultipleLosses(self):
     y = math_ops.add(self.w, -1.0, name="y")
     z1 = math_ops.square(y, name="z1")
@@ -343,7 +331,6 @@ class IdentifyGradientTest(test_util.TensorFlowTestCase):
     self.assertAllClose(2.0 * 5.0, self.sess.run(dz1_dy))
     self.assertAllClose(0.5 * (5.0**-0.5), self.sess.run(dz2_dy))
 
-  @test_util.run_deprecated_v1
   def testGradientsValuesFromDumpWorks(self):
     y = math_ops.add(self.w, -1.0, name="y")
     z = math_ops.square(y, name="z")
diff --git a/tensorflow/python/debug/lib/debug_utils_test.py b/tensorflow/python/debug/lib/debug_utils_test.py
index cf59b30e3d..9d59cfc179 100644
--- a/tensorflow/python/debug/lib/debug_utils_test.py
+++ b/tensorflow/python/debug/lib/debug_utils_test.py
@@ -185,7 +185,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
     self.assertEqual(["file:///tmp/tfdbg_1", "file:///tmp/tfdbg_2"],
                      watch_0.debug_urls)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_allNodes(self):
     debug_utils.watch_graph(
         self._run_options,
@@ -217,7 +217,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
     self.assertTrue("p1" in node_names)
     self.assertTrue("s" in node_names)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_nodeNameWhitelist(self):
     debug_utils.watch_graph(
         self._run_options,
@@ -232,7 +232,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
         sorted(["a1_init", "a1", "a1/Assign", "a1/read", "p1"]),
         sorted(node_names))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_opTypeWhitelist(self):
     debug_utils.watch_graph(
         self._run_options,
@@ -258,7 +258,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
         ["DebugIdentity"], ["file:///tmp/tfdbg_1"])
     self.assertEqual(["p1"], node_names)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_tensorDTypeWhitelist(self):
     debug_utils.watch_graph(
         self._run_options,
@@ -271,7 +271,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
         ["DebugIdentity"], ["file:///tmp/tfdbg_1"])
     self.assertItemsEqual(["a1", "a1/Assign", "b", "b/Assign"], node_names)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_nodeNameAndTensorDTypeWhitelists(self):
     debug_utils.watch_graph(
         self._run_options,
@@ -285,7 +285,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
         ["DebugIdentity"], ["file:///tmp/tfdbg_1"])
     self.assertItemsEqual(["a1", "a1/Assign"], node_names)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_nodeNameBlacklist(self):
     debug_utils.watch_graph_with_blacklists(
         self._run_options,
@@ -300,7 +300,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
         sorted(["b_init", "b", "b/Assign", "b/read", "c", "s"]),
         sorted(node_names))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_opTypeBlacklist(self):
     debug_utils.watch_graph_with_blacklists(
         self._run_options,
@@ -313,7 +313,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
         ["DebugIdentity"], ["file:///tmp/tfdbg_1"])
     self.assertEqual(sorted(["p1", "s"]), sorted(node_names))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_nodeNameAndOpTypeBlacklists(self):
     debug_utils.watch_graph_with_blacklists(
         self._run_options,
@@ -327,7 +327,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
         ["DebugIdentity"], ["file:///tmp/tfdbg_1"])
     self.assertEqual(["s"], node_names)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_tensorDTypeBlacklists(self):
     debug_utils.watch_graph_with_blacklists(
         self._run_options,
@@ -344,7 +344,7 @@ class DebugUtilsTest(test_util.TensorFlowTestCase):
     self.assertNotIn("b/Assign", node_names)
     self.assertIn("s", node_names)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWatchGraph_nodeNameAndTensorDTypeBlacklists(self):
     debug_utils.watch_graph_with_blacklists(
         self._run_options,
diff --git a/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py b/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py
index 74498c8ea3..2405e29aaa 100644
--- a/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py
+++ b/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py
@@ -44,6 +44,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
 
+@test_util.run_v1_only("b/120545219")
 class DistributedSessionDebugTest(test_util.TensorFlowTestCase):
   """Test the debugging of distributed sessions."""
 
diff --git a/tensorflow/python/debug/lib/session_debug_file_test.py b/tensorflow/python/debug/lib/session_debug_file_test.py
index f5f9ba29ab..16ab815d92 100644
--- a/tensorflow/python/debug/lib/session_debug_file_test.py
+++ b/tensorflow/python/debug/lib/session_debug_file_test.py
@@ -34,6 +34,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
 
+@test_util.run_v1_only("b/120545219")
 class SessionDebugFileTest(session_debug_testlib.SessionDebugTestBase):
 
   def _debug_urls(self, run_number=None):
@@ -45,7 +46,6 @@ class SessionDebugFileTest(session_debug_testlib.SessionDebugTestBase):
     else:
       return os.path.join(self._dump_root, "run_%d" % run_number)
 
-  @test_util.run_deprecated_v1
   def testAllowsDifferentWatchesOnDifferentRuns(self):
     """Test watching different tensors on different runs of the same graph."""
 
diff --git a/tensorflow/python/debug/lib/session_debug_grpc_test.py b/tensorflow/python/debug/lib/session_debug_grpc_test.py
index bfc9a3a382..472e244915 100644
--- a/tensorflow/python/debug/lib/session_debug_grpc_test.py
+++ b/tensorflow/python/debug/lib/session_debug_grpc_test.py
@@ -91,6 +91,7 @@ class GrpcDebugServerTest(test_util.TensorFlowTestCase):
     server.stop_server().wait()
 
 
+@test_util.run_v1_only("b/120545219")
 class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase):
 
   @classmethod
@@ -353,6 +354,7 @@ class SessionDebugConcurrentTest(
     return urls
 
 
+@test_util.run_v1_only("b/120545219")
 class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
   """Test server gating of debug ops."""
 
@@ -730,6 +732,7 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase):
       self.assertEqual("DebugNumericSummary", debug_watch.debug_op)
 
 
+@test_util.run_v1_only("b/120545219")
 class DelayedDebugServerTest(test_util.TensorFlowTestCase):
 
   def testDebuggedSessionRunWorksWithDelayedDebugServerStartup(self):
diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py
index 25ef91b575..5165febff5 100644
--- a/tensorflow/python/debug/lib/session_debug_testlib.py
+++ b/tensorflow/python/debug/lib/session_debug_testlib.py
@@ -84,6 +84,7 @@ class _RNNCellForTest(rnn_cell_impl.RNNCell):
     return (math_ops.multiply(self._w, input_), state)
 
 
+@test_util.run_v1_only("b/120545219")
 class SessionDebugTestBase(test_util.TensorFlowTestCase):
   """Base class for unit tests of tfdbg running with tf.Session."""
 
diff --git a/tensorflow/python/debug/lib/source_utils_test.py b/tensorflow/python/debug/lib/source_utils_test.py
index 9083297fdb..4f4aea0321 100644
--- a/tensorflow/python/debug/lib/source_utils_test.py
+++ b/tensorflow/python/debug/lib/source_utils_test.py
@@ -216,6 +216,7 @@ class SourceHelperTest(test_util.TensorFlowTestCase):
     os.remove(unrelated_source_path)
 
 
+@test_util.run_v1_only("b/120545219")
 class ListSourceAgainstDumpTest(test_util.TensorFlowTestCase):
 
   def createAndRunGraphWithWhileLoop(self):
diff --git a/tensorflow/python/debug/lib/stepper_test.py b/tensorflow/python/debug/lib/stepper_test.py
index 3839c67198..9e78e207b8 100644
--- a/tensorflow/python/debug/lib/stepper_test.py
+++ b/tensorflow/python/debug/lib/stepper_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import gradient_descent
 
 
+@test_util.run_v1_only("b/120545219")
 class StepperTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -443,6 +444,7 @@ class StepperTest(test_util.TensorFlowTestCase):
           self.assertAllClose(-4.0, result["fz"]["z"])
 
 
+@test_util.run_v1_only("b/120545219")
 class StepperTestWithPlaceHolders(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -577,6 +579,7 @@ class StepperTestWithPlaceHolders(test_util.TensorFlowTestCase):
       self.assertAllClose([[-1.0], [6.0]], stepper.finalize())
 
 
+@test_util.run_v1_only("b/120545219")
 class StepperAssignAddTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -692,6 +695,7 @@ class StepperAssignAddTest(test_util.TensorFlowTestCase):
       self.assertAllClose(12.0, stepper.cont(self.v))
 
 
+@test_util.run_v1_only("b/120545219")
 class StepperBackwardRunTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/debug/wrappers/disk_usage_test.py b/tensorflow/python/debug/wrappers/disk_usage_test.py
index 0874525966..88b1cd540d 100644
--- a/tensorflow/python/debug/wrappers/disk_usage_test.py
+++ b/tensorflow/python/debug/wrappers/disk_usage_test.py
@@ -32,6 +32,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import monitored_session
 
 
+@test_util.run_v1_only("b/120545219")
 class DumpingDebugWrapperDiskUsageLimitTest(test_util.TensorFlowTestCase):
 
   @classmethod
diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py
index 11011a5c13..42e3b09382 100644
--- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py
+++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py
@@ -41,6 +41,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import monitored_session
 
 
+@test_util.run_v1_only("b/120545219")
 class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/debug/wrappers/framework_test.py b/tensorflow/python/debug/wrappers/framework_test.py
index 68584b4ede..a50fa7cf4b 100644
--- a/tensorflow/python/debug/wrappers/framework_test.py
+++ b/tensorflow/python/debug/wrappers/framework_test.py
@@ -141,6 +141,7 @@ class TestDebugWrapperSessionBadAction(framework.BaseDebugWrapperSession):
     return framework.OnRunEndResponse()
 
 
+@test_util.run_v1_only("b/120545219")
 class DebugWrapperSessionTest(test_util.TensorFlowTestCase):
 
   def _no_rewrite_session_config(self):
diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py
index 149a7497df..e38df861f5 100644
--- a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py
+++ b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py
@@ -127,6 +127,7 @@ class LocalCLIDebuggerWrapperSessionForTest(
         return e.exit_token
 
 
+@test_util.run_v1_only("b/120545219")
 class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/distribute/distribute_coordinator_test.py b/tensorflow/python/distribute/distribute_coordinator_test.py
index f2cb950aad..7598c105c2 100644
--- a/tensorflow/python/distribute/distribute_coordinator_test.py
+++ b/tensorflow/python/distribute/distribute_coordinator_test.py
@@ -427,6 +427,7 @@ class DistributeCoordinatorTestStandaloneMode(DistributeCoordinatorTestBase):
     # Each finished worker will increment self._result_correct.
     self.assertEqual(self._result_correct, NUM_WORKERS)
 
+  @test_util.run_v1_only("b/120545219")
   def testBetweenGraphWithMonitoredSession(self):
     """Test monitored session in standalone client mode."""
     distribute_coordinator.run_distribute_coordinator(
@@ -600,6 +601,7 @@ class DistributeCoordinatorTestInpendentWorkerMode(
     # Each finished worker will increment self._result_correct.
     self.assertEqual(self._result_correct, NUM_WORKERS)
 
+  @test_util.run_v1_only("b/120545219")
   def testBetweenGraphWithMonitoredSession(self):
     cluster_spec = self._create_cluster_spec(
         num_workers=NUM_WORKERS, num_ps=NUM_PS)
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 3cec40a48f..61c47a29fd 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -648,6 +648,7 @@ class BackpropTest(test.TestCase):
       g.gradient(x, y)
 
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only('b/120545219')
   def testGradientTapeWithCond(self):
     x = constant_op.constant(3.0)
 
@@ -669,6 +670,7 @@ class BackpropTest(test.TestCase):
       self.assertEqual(self.evaluate(dy), 6.0)
 
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only('b/120545219')
   def testGradientTapeWithWhileLoop(self):
     i = constant_op.constant(1)
     x = constant_op.constant(2.)
@@ -704,6 +706,7 @@ class BackpropTest(test.TestCase):
 
   @test_util.assert_no_new_tensors
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only('b/120545219')
   def testPersistentTape(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant(3.0)
@@ -1243,16 +1246,19 @@ class JacobianTest(test.TestCase):
     answer = [array_ops.diag(2 * x * y), array_ops.diag(x * x)]
     return jacobian, answer
 
+  @test_util.run_v1_only('b/120545219')
   def testPfor(self):
     jacobian, answer = self._jacobian(experimental_use_pfor=True)
     for j, a in zip(jacobian, answer):
       self.assertAllEqual(a, j)
 
+  @test_util.run_v1_only('b/120545219')
   def testWhileLoop(self):
     jacobian, answer = self._jacobian(experimental_use_pfor=False)
     for j, a in zip(jacobian, answer):
       self.assertAllEqual(a, j)
 
+  @test_util.run_v1_only('b/120545219')
   def testPforDefun(self):
 
     @function.defun
@@ -1263,6 +1269,7 @@ class JacobianTest(test.TestCase):
     for j, a in zip(jacobian, answer):
       self.assertAllEqual(a, j)
 
+  @test_util.run_v1_only('b/120545219')
   def testWhileLoopDefun(self):
 
     @function.defun
@@ -1273,6 +1280,7 @@ class JacobianTest(test.TestCase):
     for j, a in zip(jacobian, answer):
       self.assertAllEqual(a, j)
 
+  @test_util.run_v1_only('b/120545219')
   def testPersistentTape(self):
     if not context.executing_eagerly():
       return
@@ -1283,6 +1291,7 @@ class JacobianTest(test.TestCase):
     with self.assertRaisesRegexp(RuntimeError, 'persistent'):
       g.jacobian(y, x, experimental_use_pfor=False)
 
+  @test_util.run_v1_only('b/120545219')
   def testPforException(self):
     var = variables.Variable([1.])
 
@@ -1303,6 +1312,7 @@ class JacobianTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'No converter'):
       g.jacobian(y, x, experimental_use_pfor=True)
 
+  @test_util.run_v1_only('b/120545219')
   def test_parallel_iterations(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant([[1., 2], [3, 4]])
@@ -1328,14 +1338,17 @@ class BatchJacobianTest(test.TestCase):
                               array_ops.diag(2 * x[1] * y[1])])
     return batch_jacobian, answer
 
+  @test_util.run_v1_only('b/120545219')
   def testPfor(self):
     batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=True)
     self.assertAllEqual(answer, batch_jacobian)
 
+  @test_util.run_v1_only('b/120545219')
   def testWhileLoop(self):
     batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=False)
     self.assertAllEqual(answer, batch_jacobian)
 
+  @test_util.run_v1_only('b/120545219')
   def testPforDefun(self):
 
     @function.defun
@@ -1345,6 +1358,7 @@ class BatchJacobianTest(test.TestCase):
     batch_jacobian, answer = _f()
     self.assertAllEqual(answer, batch_jacobian)
 
+  @test_util.run_v1_only('b/120545219')
   def testWhileLoopDefun(self):
 
     @function.defun
@@ -1354,6 +1368,7 @@ class BatchJacobianTest(test.TestCase):
     batch_jacobian, answer = _f()
     self.assertAllEqual(answer, batch_jacobian)
 
+  @test_util.run_v1_only('b/120545219')
   def testPersistentTape(self):
     if not context.executing_eagerly():
       return
@@ -1364,6 +1379,7 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(RuntimeError, 'persistent'):
       g.batch_jacobian(y, x, experimental_use_pfor=False)
 
+  @test_util.run_v1_only('b/120545219')
   def testBadShape(self):
     x = random_ops.random_uniform([2, 3])
     with backprop.GradientTape() as g:
@@ -1371,6 +1387,7 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'Need first dimension'):
       g.batch_jacobian(y, x)
 
+  @test_util.run_v1_only('b/120545219')
   def testBadInputRank(self):
     x = random_ops.random_uniform([2])
     with backprop.GradientTape() as g:
@@ -1385,6 +1402,7 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'must have rank at least 2'):
       g.batch_jacobian(y, x)
 
+  @test_util.run_v1_only('b/120545219')
   def testPforException(self):
     var = variables.Variable([1.])
 
@@ -1405,6 +1423,7 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'No converter'):
       g.batch_jacobian(y, x, experimental_use_pfor=True)
 
+  @test_util.run_v1_only('b/120545219')
   def test_parallel_iterations(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant([[1., 2], [3, 4]])
diff --git a/tensorflow/python/eager/function_gradients_test.py b/tensorflow/python/eager/function_gradients_test.py
index 9b83f57089..98dec0b361 100644
--- a/tensorflow/python/eager/function_gradients_test.py
+++ b/tensorflow/python/eager/function_gradients_test.py
@@ -187,7 +187,7 @@ class FunctionGradientsTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllEqual(2, g(constant_op.constant(2.)))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testGraphModeEagerGradError(self):
     with context.graph_mode():
       def f():
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index e0854b0632..50d1b4b6f7 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -963,6 +963,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
   # construction. Eager's configuration is controlled in `__main__`.
   @test_util.run_in_graph_and_eager_modes(
       config=config_pb2.ConfigProto(device_count={'CPU': 4}))
+  @test_util.run_v1_only('b/120545219')
   def testDeviceAnnotationsRespected(self):
 
     def multi_device_fn():
@@ -1001,6 +1002,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes(
       config=config_pb2.ConfigProto(device_count={'CPU': 2}))
+  @test_util.run_v1_only('b/120545219')
   def testCallingGraphFunctionOnDifferentDevice(self):
 
     def func():
diff --git a/tensorflow/python/framework/auto_control_deps_test.py b/tensorflow/python/framework/auto_control_deps_test.py
index a1dff9e834..5f5de45b9e 100644
--- a/tensorflow/python/framework/auto_control_deps_test.py
+++ b/tensorflow/python/framework/auto_control_deps_test.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import auto_control_deps as acd
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -46,6 +47,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
         val = c.mark_as_return(val)
       self.assertAllEqual(val.eval(), 4.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondMustRun(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
@@ -67,6 +69,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
       self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0)
       self.assertAllEqual(val.eval(feed_dict={p: True}), 6.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondMustRunSeparateRead(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
@@ -90,6 +93,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
       one.eval(feed_dict={p: True})
       self.assertAllEqual(v.read_value().eval(), 6.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondNested(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
@@ -124,6 +128,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
       self.assertAllEqual(val.eval(feed_dict={p: True, q: True}), 7.0)
       self.assertAllEqual(val.eval(feed_dict={p: True, q: False}), 8.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondOneBranch(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
@@ -144,6 +149,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
       self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0)
       self.assertAllEqual(val.eval(feed_dict={p: True}), 5.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondOneBranchUpdateBefore(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
@@ -165,6 +171,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
       self.assertAllEqual(val.eval(feed_dict={p: False}), 6.0)
       self.assertAllEqual(val.eval(feed_dict={p: True}), 12.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondOneBranchUpdateAfter(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
diff --git a/tensorflow/python/framework/error_interpolation_test.py b/tensorflow/python/framework/error_interpolation_test.py
index 1b77548592..9eaa4a5f2d 100644
--- a/tensorflow/python/framework/error_interpolation_test.py
+++ b/tensorflow/python/framework/error_interpolation_test.py
@@ -23,6 +23,7 @@ import os
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import error_interpolation
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.framework import traceable_stack
 from tensorflow.python.platform import test
 from tensorflow.python.util import tf_stack
@@ -112,6 +113,7 @@ class ComputeColocationSummaryFromOpTest(test.TestCase):
     self.assertIn("No node-device colocations", summary)
 
 
+@test_util.run_v1_only("b/120545219")
 class InterpolateFilenamesAndLineNumbersTest(test.TestCase):
 
   def setUp(self):
@@ -193,6 +195,7 @@ class InterpolateFilenamesAndLineNumbersTest(test.TestCase):
     self.assertRegexpMatches(interpolated_string, "constant_op.py:[0-9]+.*")
 
 
+@test_util.run_v1_only("b/120545219")
 class InterpolateDeviceSummaryTest(test.TestCase):
 
   def _fancy_device_function(self, unused_op):
@@ -236,6 +239,7 @@ class InterpolateDeviceSummaryTest(test.TestCase):
     self.assertRegexpMatches(result, expected_re)
 
 
+@test_util.run_v1_only("b/120545219")
 class InterpolateColocationSummaryTest(test.TestCase):
 
   def setUp(self):
@@ -260,11 +264,13 @@ class InterpolateColocationSummaryTest(test.TestCase):
 
     self.graph = node_three.graph
 
+  @test_util.run_v1_only("b/120545219")
   def testNodeThreeHasColocationInterpolation(self):
     message = "{{colocation_node Three_with_one}}"
     result = error_interpolation.interpolate(message, self.graph)
     self.assertIn("colocate_with(One)", result)
 
+  @test_util.run_v1_only("b/120545219")
   def testNodeFourHasColocationInterpolationForNodeThreeOnly(self):
     message = "{{colocation_node Four_with_three}}"
     result = error_interpolation.interpolate(message, self.graph)
@@ -273,12 +279,14 @@ class InterpolateColocationSummaryTest(test.TestCase):
         "One", result,
         "Node One should not appear in Four_with_three's summary:\n%s" % result)
 
+  @test_util.run_v1_only("b/120545219")
   def testNodeFiveHasColocationInterpolationForNodeOneAndTwo(self):
     message = "{{colocation_node Five_with_one_with_two}}"
     result = error_interpolation.interpolate(message, self.graph)
     self.assertIn("colocate_with(One)", result)
     self.assertIn("colocate_with(Two)", result)
 
+  @test_util.run_v1_only("b/120545219")
   def testColocationInterpolationForNodeLackingColocation(self):
     message = "{{colocation_node One}}"
     result = error_interpolation.interpolate(message, self.graph)
diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py
index 4e7408ad49..dd26b8a78e 100644
--- a/tensorflow/python/framework/graph_util_test.py
+++ b/tensorflow/python/framework/graph_util_test.py
@@ -103,7 +103,7 @@ class DeviceFunctionsTest(test.TestCase):
     self.assertDeviceEqual(var_5.device, "/device:GPU:0")
     self.assertDeviceEqual(var_6.device, "/device:CPU:0")
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNestedDeviceFunctions(self):
     with ops.Graph().as_default():
       var_0 = variables.VariableV1(0)
diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py
index 46ce4616a5..e6e8788164 100644
--- a/tensorflow/python/framework/meta_graph_test.py
+++ b/tensorflow/python/framework/meta_graph_test.py
@@ -528,7 +528,7 @@ class ScopedMetaGraphTest(test.TestCase):
         actual_grad_value = self.evaluate(grad)
         self.assertEqual(expected_grad_value, actual_grad_value)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testImportWhileLoopInWhileLoop(self):
     # Create a simple while loop.
     with ops.Graph().as_default():
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 7baa02b446..0fcbcd6ee4 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -605,6 +605,7 @@ class OperationTest(test_util.TensorFlowTestCase):
       x.op._update_input(1, x)  # pylint: disable=protected-access
 
   @test_util.enable_control_flow_v2
+  @test_util.run_v1_only("b/120545219")
   def testAddWhileInput(self):
     @eager_function.defun
     def test():
@@ -780,7 +781,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     self.assertEqual(op3.name, "myop_2")
     self.assertEqual(op4.name, "myop_1_1")
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCond(self):
     g = ops.Graph()
     with g.as_default():
@@ -810,7 +811,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
                      "cond/cond_text")
     # pylint: enable=protected-access
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileLoop(self):
     g = ops.Graph()
     with g.as_default():
@@ -840,7 +841,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
                      "myloop/while_context")
     # pylint: enable=protected-access
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileLoopWithInternalControlDep(self):
     g = ops.Graph()
     with g.as_default():
@@ -864,7 +865,7 @@ class CreateOpFromTFOperationTest(test_util.TensorFlowTestCase):
     # Internal control dep is preserved
     self.assertEqual(op.control_inputs, [c])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileLoopWithExternalControlDep(self):
     g = ops.Graph()
     with g.as_default():
@@ -2283,7 +2284,7 @@ class InitScopeTest(test_util.TensorFlowTestCase):
       self.assertEqual(4, int(compiled_outer(inner=compiled_inner)))
       self.assertEqual(7, int(compiled_outer(inner=compiled_inner)))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testFallsBackToGlobalGraphWhenAllGraphsAreBuildingFunctions(self):
     with context.graph_mode():
       ops.reset_default_graph()
@@ -2994,7 +2995,7 @@ class TracebackTest(test_util.TensorFlowTestCase):
 
 class EnableEagerExecutionTest(test_util.TensorFlowTestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBadArgumentsToEnableEagerExecution(self):
     with self.assertRaisesRegexp(TypeError, "config must be a tf.ConfigProto"):
       ops.enable_eager_execution(context.DEVICE_PLACEMENT_SILENT)
diff --git a/tensorflow/python/framework/subscribe_test.py b/tensorflow/python/framework/subscribe_test.py
index 61c6ea6519..a74e96f9d9 100644
--- a/tensorflow/python/framework/subscribe_test.py
+++ b/tensorflow/python/framework/subscribe_test.py
@@ -215,7 +215,7 @@ class SubscribeTest(test_util.TensorFlowTestCase):
     self.assertIn('graph2', shared)
     self.assertIn('graph3', shared)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testSubscribeVariable(self):
     """Confirm that variables can be subscribed."""
     v1 = variables.VariableV1(0.0)
@@ -254,7 +254,7 @@ class SubscribeTest(test_util.TensorFlowTestCase):
       # Make sure the values read from the variable match the expected ones.
       self.assertEqual([0.0, 3.0], shared)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testResourceType(self):
     """Confirm that subscribe correctly handles tensors with 'resource' type."""
     tensor_array = tensor_array_ops.TensorArray(
@@ -344,7 +344,7 @@ class SubscribeTest(test_util.TensorFlowTestCase):
     self.assertEqual(add.device, add_sub.device)
     self.assertEqual(mul.device, mul_sub.device)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_subscribe_tensors_within_control_flow_context(self):
     """Side effect ops are added with the same control flow context."""
     c1 = constant_op.constant(10)
diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py
index 78604b259c..c02fd9f55b 100644
--- a/tensorflow/python/grappler/item_test.py
+++ b/tensorflow/python/grappler/item_test.py
@@ -108,7 +108,7 @@ class ItemTest(test.TestCase):
     newest_tf_item = grappler_item.tf_item
     self.assertEqual(new_tf_item, newest_tf_item)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testColocationContraints(self):
     with ops.Graph().as_default() as g:
       c = constant_op.constant([10])
diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index 6eb16fbd39..e2864ebb4d 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -62,7 +62,7 @@ class MemoryOptimizerSwapTest(test.TestCase):
     self.assertEqual(len(graph.node), graph_size)
     self.assertItemsEqual([node.name for node in graph.node], nodes)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testSimpleSwap(self):
     """Check that the swap annotations are followed."""
     a = variables.VariableV1(10, name='a')
diff --git a/tensorflow/python/grappler/tf_optimizer_test.py b/tensorflow/python/grappler/tf_optimizer_test.py
index 06ccaa813f..8186c81378 100644
--- a/tensorflow/python/grappler/tf_optimizer_test.py
+++ b/tensorflow/python/grappler/tf_optimizer_test.py
@@ -57,7 +57,7 @@ class PyWrapOptimizeGraphTest(test.TestCase):
     self.assertEqual(len(graph.node), 1)
     self.assertItemsEqual([node.name for node in graph.node], ['d'])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testKeepNodes(self):
     g = ops.Graph()
     with g.as_default():
@@ -86,7 +86,7 @@ class PyWrapOptimizeGraphTest(test.TestCase):
     self.assertEqual(len(optimized_graph_nodes), len(expected_nodes))
     self.assertAllInSet(optimized_graph_nodes, expected_nodes)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testLoops(self):
     g = ops.Graph()
     with g.as_default():
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 6401e39e53..af01b46fa9 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -1422,7 +1422,7 @@ class TestCTC(test.TestCase):
                 decode_truth[i] == keras.backend.eval(decode_pred_tf[i])))
       self.assertAllClose(log_prob_truth, log_prob_pred)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_ctc_batch_cost(self):
     with self.cached_session():
       label_lens = np.expand_dims(np.asarray([5, 4]), 1)
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 6c9a382b32..4a65ade33c 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -403,7 +403,7 @@ class KerasCallbacksTest(test.TestCase):
           float(keras.backend.get_value(
               model.optimizer.lr)) - 0.01 / 4) < keras.backend.epsilon()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_ReduceLROnPlateau(self):
     with self.cached_session():
       np.random.seed(1337)
@@ -675,7 +675,7 @@ class KerasCallbacksTest(test.TestCase):
       self.assertEqual(len(loss), 1)
       self.assertEqual(loss[0], np.inf)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_TensorBoard(self):
     np.random.seed(1337)
 
@@ -779,7 +779,7 @@ class KerasCallbacksTest(test.TestCase):
           data_generator(True), len(x_train), epochs=2, callbacks=cbks)
       assert os.path.exists(temp_dir)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_TensorBoard_multi_input_output(self):
     np.random.seed(1337)
     tmpdir = self.get_temp_dir()
@@ -851,7 +851,7 @@ class KerasCallbacksTest(test.TestCase):
                           callbacks=callbacks_factory(histogram_freq=1))
       assert os.path.isdir(filepath)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_Tensorboard_histogram_summaries_in_test_function(self):
 
     class FileWriterStub(object):
@@ -929,7 +929,7 @@ class KerasCallbacksTest(test.TestCase):
 
       self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_Tensorboard_histogram_summaries_with_generator(self):
     np.random.seed(1337)
     tmpdir = self.get_temp_dir()
diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index 6d9d9a2fca..bc33a3ea7f 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -332,7 +332,7 @@ class TestWeightSavingAndLoading(test.TestCase, parameterized.TestCase):
 
 class TestWholeModelSaving(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_sequential_model_saving(self):
     if h5py is None:
       self.skipTest('h5py required to run this test')
@@ -635,7 +635,7 @@ class TestWholeModelSaving(test.TestCase):
       os.close(fd)
       os.remove(fname)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_saving_model_with_long_weights_names(self):
     if h5py is None:
       self.skipTest('h5py required to run this test')
@@ -756,7 +756,7 @@ class SubclassedModel(training.Model):
 
 class TestWeightSavingAndLoadingTFFormat(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_keras_optimizer_warning(self):
     graph = ops.Graph()
     with graph.as_default(), self.session(graph):
diff --git a/tensorflow/python/keras/engine/sequential_test.py b/tensorflow/python/keras/engine/sequential_test.py
index 001c3d749b..10f69da061 100644
--- a/tensorflow/python/keras/engine/sequential_test.py
+++ b/tensorflow/python/keras/engine/sequential_test.py
@@ -226,7 +226,7 @@ class TestSequential(keras_parameterized.TestCase):
     inner_model.trainable = True
     self.assertEqual(len(model.trainable_weights), 4)
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_sequential_update_disabling(self):
     val_a = np.random.random((10, 4))
     val_out = np.random.random((10, 4))
diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py
index 03bfd35589..4071e2c091 100644
--- a/tensorflow/python/keras/engine/topology_test.py
+++ b/tensorflow/python/keras/engine/topology_test.py
@@ -107,6 +107,7 @@ class TopologyConstructionTest(test.TestCase):
     self.assertEqual(len(network.updates), 5)
     self.assertEqual(len(network.get_updates_for(x4)), 2)
 
+  @test_util.run_v1_only('b/120545219')
   def test_get_updates_bn(self):
     x1 = input_layer_lib.Input(shape=(1,))
     layer = keras.layers.BatchNormalization()
@@ -833,7 +834,7 @@ class TopologyConstructionTest(test.TestCase):
       output_val_2 = m2.predict(x_val)
       self.assertAllClose(output_val, output_val_2, atol=1e-6)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_explicit_training_argument(self):
     with self.cached_session():
       a = keras.layers.Input(shape=(2,))
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index c3c3f06ffd..91a0c7cc2f 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -471,7 +471,7 @@ class TrainingTest(keras_parameterized.TestCase):
         metrics=['accuracy'],
         run_eagerly=testing_utils.should_run_eagerly())
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_that_trainable_disables_updates(self):
     val_a = np.random.random((10, 4))
     val_out = np.random.random((10, 4))
@@ -864,6 +864,7 @@ class LossWeightingTest(keras_parameterized.TestCase):
     self.assertLess(score[0], ref_score[0])
 
   @keras_parameterized.run_all_keras_modes
+  @tf_test_util.run_v1_only('b/120545219')
   def test_sample_weights(self):
     num_classes = 5
     batch_size = 5
@@ -961,6 +962,7 @@ class LossWeightingTest(keras_parameterized.TestCase):
       self.assertTrue(msg_found)
 
   @keras_parameterized.run_all_keras_modes
+  @tf_test_util.run_v1_only('b/120545219')
   # TODO(b/120562577): Test failing with assertion error.
   def DISABLED_test_temporal_sample_weights(self):
     num_classes = 5
@@ -1283,7 +1285,7 @@ class LossMaskingTest(keras_parameterized.TestCase):
 
 class TestDynamicTrainability(keras_parameterized.TestCase):
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_trainable_warning(self):
     with self.cached_session():
       x = np.random.random((5, 3))
@@ -1297,7 +1299,7 @@ class TestDynamicTrainability(keras_parameterized.TestCase):
       model.train_on_batch(x, y)
       self.assertRaises(Warning)
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_trainable_argument(self):
     with self.cached_session():
       x = np.random.random((5, 3))
diff --git a/tensorflow/python/keras/integration_test.py b/tensorflow/python/keras/integration_test.py
index f1a0932613..c516514f63 100644
--- a/tensorflow/python/keras/integration_test.py
+++ b/tensorflow/python/keras/integration_test.py
@@ -35,7 +35,7 @@ class KerasIntegrationTest(test.TestCase):
   def test_version(self):
     self.assertTrue(keras.__version__.endswith('-tf'))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_vector_classification_sequential(self):
     with self.cached_session():
       np.random.seed(1337)
@@ -134,6 +134,7 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
+  @test_util.run_v1_only('b/120545219')
   def test_image_classification_sequential(self):
     with self.cached_session():
       np.random.seed(1337)
@@ -168,7 +169,7 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_video_classification_functional(self):
     with self.cached_session():
       np.random.seed(1337)
@@ -197,7 +198,7 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_vector_classification_shared_sequential(self):
     # Test that Sequential models that feature internal updates
     # and internal losses can be shared.
@@ -232,7 +233,7 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_vector_classification_shared_model(self):
     # Test that functional models that feature internal updates
     # and internal losses can be shared.
diff --git a/tensorflow/python/keras/layers/lstm_test.py b/tensorflow/python/keras/layers/lstm_test.py
index 3f89cc398e..aea4261502 100644
--- a/tensorflow/python/keras/layers/lstm_test.py
+++ b/tensorflow/python/keras/layers/lstm_test.py
@@ -115,7 +115,7 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
     self.assertEqual(layer.cell.bias.constraint, b_constraint)
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_with_masking_layer_LSTM(self):
     layer_class = keras.layers.LSTM
     inputs = np.random.random((2, 3, 4))
@@ -128,7 +128,7 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
                   optimizer=RMSPropOptimizer(0.01))
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_masking_with_stacking_LSTM(self):
     inputs = np.random.random((2, 3, 4))
     targets = np.abs(np.random.random((2, 3, 5)))
@@ -314,7 +314,7 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
 
 class LSTMLayerGraphOnlyTest(test.TestCase):
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_statefulness_LSTM(self):
     num_samples = 2
     timesteps = 3
diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py
index 9138c0a08a..c1acc2eb3a 100644
--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.training import gradient_descent
 
 
 @tf_test_util.run_all_in_graph_and_eager_modes
+@tf_test_util.run_v1_only('b/120545219')
 class NormalizationLayersTest(test.TestCase):
 
   def test_basic_batchnorm(self):
@@ -227,6 +228,7 @@ class NormalizationLayersTest(test.TestCase):
       norm(inp)
 
 
+@tf_test_util.run_v1_only('b/120545219')
 class NormalizationLayersGraphModeOnlyTest(test.TestCase):
 
   def test_shared_batchnorm(self):
@@ -301,7 +303,6 @@ class NormalizationLayersGraphModeOnlyTest(test.TestCase):
       x2 = model.predict(val_a)
       self.assertAllClose(x1, x2, atol=1e-7)
 
-  @tf_test_util.run_deprecated_v1
   def test_batchnorm_trainable(self):
     """Tests that batchnorm layer is trainable when learning phase is enabled.
 
diff --git a/tensorflow/python/keras/layers/simplernn_test.py b/tensorflow/python/keras/layers/simplernn_test.py
index b49b159b71..bb3fea2692 100644
--- a/tensorflow/python/keras/layers/simplernn_test.py
+++ b/tensorflow/python/keras/layers/simplernn_test.py
@@ -98,7 +98,7 @@ class SimpleRNNLayerTest(test.TestCase):
     self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
     self.assertEqual(layer.cell.bias.constraint, b_constraint)
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_with_masking_layer_SimpleRNN(self):
     layer_class = keras.layers.SimpleRNN
     inputs = np.random.random((2, 3, 4))
@@ -121,7 +121,7 @@ class SimpleRNNLayerTest(test.TestCase):
 
 class SimpleRNNLayerGraphOnlyTest(test.TestCase):
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_statefulness_SimpleRNN(self):
     num_samples = 2
     timesteps = 3
diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 33351948ee..932b2d331d 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -56,9 +56,9 @@ _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
 _config = config_pb2.ConfigProto(graph_options=_graph_options)
 
 
+@test_util.run_v1_only('b/120545219')
 class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
 
-  @test_util.run_deprecated_v1
   def test_unifiedLSTM(self):
     input_shape = 10
     rnn_state_size = 8
@@ -103,7 +103,6 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         self.assertNotEqual(existing_loss, loss_value)
         existing_loss = loss_value
 
-  @test_util.run_deprecated_v1
   def test_unifiedLSTM_with_cond(self):
     # This test is to demonstrate the graph rewrite of grappler plugin under
     # the condition that the function returns different number of internal
@@ -692,6 +691,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     model.train_on_batch([main_inputs] + initial_state, targets)
 
 
+@test_util.run_v1_only('b/120545219')
 class LSTMLayerGraphOnlyTest(test.TestCase):
 
   def test_statefulness_LSTM(self):
diff --git a/tensorflow/python/keras/layers/wrappers_test.py b/tensorflow/python/keras/layers/wrappers_test.py
index b9196416dd..727f33dadc 100644
--- a/tensorflow/python/keras/layers/wrappers_test.py
+++ b/tensorflow/python/keras/layers/wrappers_test.py
@@ -165,6 +165,7 @@ class TimeDistributedTest(test.TestCase):
       y = model.predict(np.random.random((10, 3, 2)))
       self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1)
 
+  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_batchnorm(self):
     with self.cached_session():
       # test that wrapped BN updates still work.
@@ -187,6 +188,7 @@ class TimeDistributedTest(test.TestCase):
       # Verify input_map has one mapping from inputs to reshaped inputs.
       self.assertEqual(len(td._input_map.keys()), 1)
 
+  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_trainable(self):
     # test layers that need learning_phase to be set
     x = keras.layers.Input(shape=(3, 2))
@@ -201,7 +203,7 @@ class TimeDistributedTest(test.TestCase):
     assert len(layer.updates) == 2
     assert len(layer.trainable_weights) == 2
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self):
     with self.cached_session():
       # test with unspecified shape and Embeddings with mask_zero
@@ -234,7 +236,7 @@ class TimeDistributedTest(test.TestCase):
         self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i])
       self.assertIs(mask_outputs[-1], None)  # final layer
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_with_masking_layer(self):
     with self.cached_session():
       # test with Masking layer
@@ -377,7 +379,7 @@ class BidirectionalTest(test.TestCase):
       model.compile(loss='mse', optimizer='sgd')
       model.fit(x, y, epochs=1, batch_size=1)
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_Bidirectional_merged_value(self):
     rnn = keras.layers.LSTM
     samples = 2
@@ -508,7 +510,7 @@ class BidirectionalTest(test.TestCase):
       layer.trainable = True
       assert len(layer.trainable_weights) == 6
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_Bidirectional_updates(self):
     with self.cached_session():
       x = keras.layers.Input(shape=(3, 2))
diff --git a/tensorflow/python/keras/model_subclassing_test.py b/tensorflow/python/keras/model_subclassing_test.py
index 620275e50f..553c7fb009 100644
--- a/tensorflow/python/keras/model_subclassing_test.py
+++ b/tensorflow/python/keras/model_subclassing_test.py
@@ -187,6 +187,7 @@ def get_nested_model_3(input_dim, num_classes):
 
 
 @test_util.run_all_in_graph_and_eager_modes
+@test_util.run_v1_only('b/120545219')
 class ModelSubclassingTest(test.TestCase):
 
   def test_custom_build(self):
@@ -915,6 +916,7 @@ class ModelSubclassingTest(test.TestCase):
       self.assertEqual(1, len(model.get_updates_for(x)))
 
 
+@test_util.run_v1_only('b/120545219')
 class GraphSpecificModelSubclassingTests(test.TestCase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index 907ac41d0e..c466d94fed 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -69,7 +69,7 @@ def sequential_model(add_input_layer, include_input_shape=True):
 
 class TestModelCloning(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_clone_sequential_model(self):
     with self.cached_session():
       val_a = np.random.random((10, 4))
@@ -102,10 +102,9 @@ class TestModelCloning(test.TestCase):
       new_model.compile('rmsprop', 'mse')
       new_model.train_on_batch(None, val_out)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_clone_sequential_model_input_layer(self):
 
-    @test_util.run_deprecated_v1
     def test_input_layer(include_inputs):
       with self.cached_session():
         val_a = np.random.random((10, 4))
@@ -142,7 +141,7 @@ class TestModelCloning(test.TestCase):
     test_input_layer(True)
     test_input_layer(False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_clone_functional_model(self):
     with self.cached_session():
       val_a = np.random.random((10, 4))
@@ -318,6 +317,7 @@ class TestModelDeepCopy(test.TestCase):
                       model_copy.get_weights()[0]))
 
 
+@test_util.run_v1_only('b/120545219')
 class TestCloneAndBuildModel(test.TestCase):
 
   def test_clone_and_build_non_compiled_model(self):
@@ -404,7 +404,6 @@ class TestCloneAndBuildModel(test.TestCase):
       new_model.train_on_batch(inp, out)
       new_model.evaluate(inp, out)
 
-  @test_util.run_deprecated_v1
   def test_clone_and_build_compiled_sequential_model(self):
     with self.cached_session():
       model = keras.models.Sequential()
@@ -417,7 +416,6 @@ class TestCloneAndBuildModel(test.TestCase):
 
     self._clone_and_build_test_helper(model)
 
-  @test_util.run_deprecated_v1
   def test_clone_and_build_functional_model(self):
     with self.cached_session():
       input_a = keras.Input(shape=(4,))
@@ -434,7 +432,6 @@ class TestCloneAndBuildModel(test.TestCase):
 
     self._clone_and_build_test_helper(model)
 
-  @test_util.run_deprecated_v1
   def test_clone_and_build_subclassed_model(self):
     class SubclassedModel(keras.Model):
 
@@ -483,11 +480,9 @@ class TestCloneAndBuildModel(test.TestCase):
   def test_replace_tf_optimizer_iterations_variable(self):
     self.assert_optimizer_iterations_increases(adam.AdamOptimizer(0.01))
 
-  @test_util.run_deprecated_v1
   def test_replace_keras_optimizer_iterations_variable(self):
     self.assert_optimizer_iterations_increases('adam')
 
-  @test_util.run_deprecated_v1
   def test_clone_and_build_sequential_model_without_inputs_defined(self):
     with self.cached_session():
       model = sequential_model(False, False)
diff --git a/tensorflow/python/keras/optimizers_test.py b/tensorflow/python/keras/optimizers_test.py
index d3cacb702c..77104a5d4d 100644
--- a/tensorflow/python/keras/optimizers_test.py
+++ b/tensorflow/python/keras/optimizers_test.py
@@ -91,26 +91,26 @@ def _test_optimizer(optimizer, target=0.75):
 
 class KerasOptimizersTest(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_sgd(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.SGD(lr=0.01,
                                            momentum=0.9,
                                            nesterov=True))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_rmsprop(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.RMSprop())
       _test_optimizer(keras.optimizers.RMSprop(decay=1e-3))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_adagrad(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adagrad())
       _test_optimizer(keras.optimizers.Adagrad(decay=1e-3))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_adadelta(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adadelta(), target=0.6)
@@ -119,32 +119,32 @@ class KerasOptimizersTest(test.TestCase):
       # the accuracy.
       _test_optimizer(keras.optimizers.Adadelta(decay=1e-3), target=0.4)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_adam(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adam())
       _test_optimizer(keras.optimizers.Adam(decay=1e-3))
       _test_optimizer(keras.optimizers.Adam(amsgrad=True))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_adamax(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adamax())
       _test_optimizer(keras.optimizers.Adamax(decay=1e-3))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_nadam(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Nadam())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_clipnorm(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.SGD(lr=0.01,
                                            momentum=0.9,
                                            clipnorm=0.5))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def test_clipvalue(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.SGD(lr=0.01,
diff --git a/tensorflow/python/kernel_tests/atrous_convolution_test.py b/tensorflow/python/kernel_tests/atrous_convolution_test.py
index 6b16fca29d..2fb8a37e2b 100644
--- a/tensorflow/python/kernel_tests/atrous_convolution_test.py
+++ b/tensorflow/python/kernel_tests/atrous_convolution_test.py
@@ -110,6 +110,7 @@ class AtrousConvolutionTest(test.TestCase):
 
     add_check(check, y1, y2)
 
+  @test_util.run_v1_only("b/120545219")
   def test_unknown_spatial_dims_for_channel_last_format(self):
     x = array_ops.placeholder(dtypes.float32, [1, None, None, 10])
     w = array_ops.zeros([3, 3, 10, 20])
@@ -117,6 +118,7 @@ class AtrousConvolutionTest(test.TestCase):
         x, w, "VALID", dilation_rate=[2, 2], data_format="NHWC")
     self.assertEqual(y.shape.as_list(), [1, None, None, 20])
 
+  @test_util.run_v1_only("b/120545219")
   def test_unknown_spatial_dims_for_channel_first_format(self):
     x = array_ops.placeholder(dtypes.float32, [1, 10, None, None])
     w = array_ops.zeros([3, 3, 10, 20])
@@ -262,6 +264,7 @@ class AtrousConvolutionTest(test.TestCase):
     err_tolerance = 1e-3
     self.assertLess(err, err_tolerance)
 
+  @test_util.run_v1_only("b/120545219")
   def testGradient(self):
     with self.cached_session():
       for padding in ["SAME", "VALID"]:
diff --git a/tensorflow/python/kernel_tests/base64_ops_test.py b/tensorflow/python/kernel_tests/base64_ops_test.py
index bb903d827f..381f190b8d 100644
--- a/tensorflow/python/kernel_tests/base64_ops_test.py
+++ b/tensorflow/python/kernel_tests/base64_ops_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_v1_only("b/120545219")
 class Base64OpsTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/kernel_tests/batch_matmul_op_test.py b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
index f2f0291b89..c32a6c7e41 100644
--- a/tensorflow/python/kernel_tests/batch_matmul_op_test.py
+++ b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python import tf2
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import math_ops
@@ -135,6 +136,7 @@ class BatchMatmulOpTest(test.TestCase):
 
 def _GetBatchMatmulOpTest(dtype, adjoint_a, adjoint_b, use_static_shape):
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     np.random.seed(42)
     self._testNonEmpty(dtype, adjoint_a, adjoint_b, use_static_shape)
@@ -184,6 +186,7 @@ class BatchMatmulGradientTest(test.TestCase):
 
 def _GetBatchMatmulGradientTest(dtype, adjoint_a, adjoint_b):
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     self._compare(1, 2, 3, 5, dtype, adjoint_a, adjoint_b)
     self._compare(3, 4, 7, 10, dtype, adjoint_a, adjoint_b)
diff --git a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
index 390672febe..2b9863fb89 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
@@ -82,7 +82,7 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
     self.max_elements = 1 << 16
     self.num_quantiles = constant_op.constant(3, dtype=dtypes.int64)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBasicQuantileBucketsSingleResource(self):
     with self.cached_session() as sess:
       quantile_accumulator_handle = self.create_resource("floats", self.eps,
@@ -107,7 +107,7 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self._feature_0_quantiles, quantiles[0].eval())
       self.assertAllClose(self._feature_1_quantiles, quantiles[1].eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBasicQuantileBucketsMultipleResources(self):
     with self.cached_session() as sess:
       quantile_accumulator_handle_0 = self.create_resource("float_0", self.eps,
@@ -142,7 +142,7 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self._feature_0_quantiles, quantiles[0].eval())
       self.assertAllClose(self._feature_1_quantiles, quantiles[1].eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSaveRestoreAfterFlush(self):
     save_dir = os.path.join(self.get_temp_dir(), "save_restore")
     save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash")
@@ -175,7 +175,7 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self._feature_0_boundaries, buckets[0].eval())
       self.assertAllClose(self._feature_1_boundaries, buckets[1].eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSaveRestoreBeforeFlush(self):
     save_dir = os.path.join(self.get_temp_dir(), "save_restore")
     save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash")
diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
index b8c8c9edb5..6e289bf9b7 100644
--- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py
+++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
@@ -105,6 +105,7 @@ class GenerateVocabRemappingTest(test.TestCase):
       self.assertAllEqual(expected_num_present, self.evaluate(num_present))
 
 
+@test_util.run_v1_only('b/120545219')
 class LoadAndRemapMatrixTest(test.TestCase):
   """Tests for the load_and_remap_matrix() op."""
 
diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py
index f3947236b1..a08cfe960d 100644
--- a/tensorflow/python/kernel_tests/cholesky_op_test.py
+++ b/tensorflow/python/kernel_tests/cholesky_op_test.py
@@ -155,6 +155,7 @@ class CholeskyOpTest(test.TestCase):
           np.array([[[1., 2., 3.], [3., 4., 5.]], [[1., 2., 3.], [3., 4., 5.]]
                    ]))
 
+  @test_util.run_v1_only("b/120545219")
   def testWrongDimensions(self):
     tensor3 = constant_op.constant([1., 2.])
     with self.assertRaises(ValueError):
@@ -233,6 +234,7 @@ class CholeskyGradTest(test.TestCase):
     self.runFiniteDifferences(
         shapes, dtypes=(dtypes_lib.float64,), scalarTest=True)
 
+  @test_util.run_v1_only("b/120545219")
   def testTwoBlockMatrixComplexFloat(self):
     np.random.seed(0)
     shapes = self.getShapes([2 * self._backprop_block_size + 1])
diff --git a/tensorflow/python/kernel_tests/cond_v2_test.py b/tensorflow/python/kernel_tests/cond_v2_test.py
index 843d007cc8..8fe3ba41e2 100644
--- a/tensorflow/python/kernel_tests/cond_v2_test.py
+++ b/tensorflow/python/kernel_tests/cond_v2_test.py
@@ -170,6 +170,7 @@ class CondV2Test(test.TestCase):
         self.assertRegexpMatches(
             cond2_op.get_attr("else_branch").name, r"foo_cond_1_false_\d*")
 
+  @test_util.run_v1_only("b/120545219")
   def testDefunInCond(self):
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
@@ -189,6 +190,7 @@ class CondV2Test(test.TestCase):
     self._testCond(true_fn, false_fn, [x, y])
     self._testCond(true_fn, false_fn, [y])
 
+  @test_util.run_deprecated_v1
   def testNestedDefunInCond(self):
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
@@ -213,6 +215,7 @@ class CondV2Test(test.TestCase):
     self._testCond(true_fn, false_fn, [x, y])
     self._testCond(true_fn, false_fn, [y])
 
+  @test_util.run_deprecated_v1
   def testDoubleNestedDefunInCond(self):
     x = constant_op.constant(1.0, name="x")
     y = constant_op.constant(2.0, name="y")
@@ -773,6 +776,7 @@ class CondV2Test(test.TestCase):
     self.assertAllEqual(
         self.evaluate(output_t), [-5, -4, -3, -2, -1, 0, 1, 4, 9, 16])
 
+  @test_util.run_deprecated_v1
   def testForwardPassRewrite(self):
     x = constant_op.constant(1.0, name="x")
     output = cond_v2.cond_v2(constant_op.constant(True),
diff --git a/tensorflow/python/kernel_tests/conditional_accumulator_test.py b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
index 5847e4639b..ce34201706 100644
--- a/tensorflow/python/kernel_tests/conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
@@ -199,7 +199,7 @@ class ConditionalAccumulatorTest(test.TestCase):
           is_all_equal &= (val[i][j] == elems_ave[i][j])
       self.assertTrue(is_all_equal)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAccumulatorWrongDynamicShape(self):
     with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
@@ -321,7 +321,7 @@ class ConditionalAccumulatorTest(test.TestCase):
           shape=tensor_shape.TensorShape([1]),
           reduction_type="Invalid")
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAccumulatorInvalidTakeGrad(self):
     with self.cached_session():
       q = data_flow_ops.ConditionalAccumulator(
@@ -435,7 +435,7 @@ class ConditionalAccumulatorTest(test.TestCase):
                                    if x >= ls) / sum(1 for x in local_steps
                                                      if x >= ls), val)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelApplyGrad(self):
     with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
@@ -461,7 +461,7 @@ class ConditionalAccumulatorTest(test.TestCase):
 
       self.assertEqual(val, sum(elems) / len(elems))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelTakeGrad(self):
     with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
@@ -494,7 +494,7 @@ class ConditionalAccumulatorTest(test.TestCase):
 
       self.assertItemsEqual(elems, results)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAccumulatorApplyAndBlockingTake(self):
     with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
@@ -528,7 +528,7 @@ class ConditionalAccumulatorTest(test.TestCase):
     with self.assertRaisesOpError("was cancelled"):
       self.evaluate(takeg_op)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAccumulatorCancel(self):
     with self.cached_session() as sess:
       q = data_flow_ops.ConditionalAccumulator(
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 95420fc0f1..0fd293ebba 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -129,6 +129,7 @@ def isum(s, maximum_iterations=None):
 @test_util.with_control_flow_v2
 class ControlFlowTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testRefIdentity(self):
     with self.cached_session():
       v = variables.VariableV1(7)
@@ -141,7 +142,7 @@ class ControlFlowTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertEqual(9, self.evaluate(v2))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRefEnter(self):
     with self.cached_session():
       v = variables.VariableV1(7)
@@ -155,7 +156,7 @@ class ControlFlowTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertEqual(9, self.evaluate(v3))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRefSwitch(self):
     with self.cached_session():
       v = variables.VariableV1(7)
@@ -193,6 +194,7 @@ class ControlFlowTest(test.TestCase):
           v, "frame2", is_constant=False)
       self.assertEqual(enter_v_non_constant.shape, None)
 
+  @test_util.run_v1_only("b/120545219")
   def testSwitchMergeIndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant([1, 2, 3, 4, 5, 6])
@@ -207,6 +209,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(np.arange(1, 7), val)
     self.assertAllEqual(np.arange(0, 12, 2), ind)
 
+  @test_util.run_v1_only("b/120545219")
   def testSwitchDeadBranch(self):
     with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
@@ -219,6 +222,7 @@ class ControlFlowTest(test.TestCase):
           lambda e: "Retval[0] does not have value" in str(e)):
         self.evaluate(dead_branch)
 
+  @test_util.run_v1_only("b/120545219")
   def testSwitchMergeLess(self):
     with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
@@ -231,6 +235,7 @@ class ControlFlowTest(test.TestCase):
       result = self.evaluate(merge_op)
     self.assertAllEqual(np.arange(1, 7), result)
 
+  @test_util.run_v1_only("b/120545219")
   def testSwitchMergeAddIdentity(self):
     with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
@@ -244,6 +249,7 @@ class ControlFlowTest(test.TestCase):
       result = self.evaluate(merge_op)
     self.assertAllEqual(np.array([x + 1 for x in [1, 2, 3, 4, 5, 6]]), result)
 
+  @test_util.run_v1_only("b/120545219")
   def testSwitchMergeAddMul(self):
     with self.cached_session():
       data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
@@ -258,6 +264,7 @@ class ControlFlowTest(test.TestCase):
       result = self.evaluate(merge_op)
     self.assertAllEqual(np.array([x * 5 for x in [1, 2, 3, 4, 5, 6]]), result)
 
+  @test_util.run_v1_only("b/120545219")
   def testLoop_false(self):
     with self.cached_session():
       false = ops.convert_to_tensor(False)
@@ -302,6 +309,7 @@ class ControlFlowTest(test.TestCase):
       result = self.evaluate(exit_i)
     self.assertAllEqual(10, result)
 
+  @test_util.run_v1_only("b/120545219")
   def testLoop_2(self):
     with self.cached_session():
       zero = constant_op.constant(0)
@@ -328,6 +336,7 @@ class ControlFlowTest(test.TestCase):
       result = self.evaluate(exit_i)
     self.assertAllEqual(10, result)
 
+  @test_util.run_v1_only("b/120545219")
   def testDifferentFrame(self):
     with self.cached_session():
       data = array_ops.placeholder(dtypes.float32, shape=[])
@@ -362,6 +371,7 @@ class ControlFlowTest(test.TestCase):
         lambda: math_ops.subtract(x, 1.))
     self.assertEqual(b.shape, tensor_shape.scalar())
 
+  @test_util.run_v1_only("b/120545219")
   def testFetchable(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(dtypes.float32)
@@ -378,6 +388,7 @@ class ControlFlowTest(test.TestCase):
               sess.run(t, feed_dict={x: 3})
 
   @test_util.disable_control_flow_v2("Not relevant")
+  @test_util.run_v1_only("b/120545219")
   def testFeedable(self):
     with self.cached_session() as sess:
       c = constant_op.constant(2)
@@ -395,6 +406,7 @@ class ControlFlowTest(test.TestCase):
             with self.assertRaisesRegexp(ValueError, "may not be fed"):
               sess.run(r, feed_dict={t: 3})
 
+  @test_util.run_v1_only("b/120545219")
   def testCondIndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant(10)
@@ -410,6 +422,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondSparseTensor(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
@@ -427,6 +440,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual([[1], [4]], r.indices.eval())
       self.assertAllEqual(r.values.get_shape(), (2,))
 
+  @test_util.run_v1_only("b/120545219")
   def testCondResource(self):
 
     with self.cached_session():
@@ -441,6 +455,7 @@ class ControlFlowTest(test.TestCase):
 
       self.assertEqual(1.0, control_flow_ops.cond(rv, case, lambda: t).eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testCondWithTensorArrayGrad(self):
     with self.cached_session() as sess:
       with ops.device(test.gpu_device_name()):
@@ -455,6 +470,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(sess.run(g, {pred: False}), [0.0, 0.0, 0.0])
 
   @test_util.disable_control_flow_v2("b/113293074")
+  @test_util.run_v1_only("b/120545219")
   def testCondIndexedSlicesDifferentTypes(self):
     with self.cached_session():
       values = constant_op.constant(10)
@@ -472,6 +488,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(0, ind)
     self.assertTrue(ind.dtype == np.int64)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondColocation(self):
     with self.session(use_gpu=True):
       with ops.device("/cpu:0"):
@@ -576,6 +593,7 @@ class ControlFlowTest(test.TestCase):
         alive, count = body(i)
       self.assertAllEqual(4, self.evaluate(count))
 
+  @test_util.run_v1_only("b/120545219")
   def testCond_6(self):
     with self.cached_session():
       v1 = variables.Variable([7])
@@ -671,6 +689,7 @@ class ControlFlowTest(test.TestCase):
       test_result = self.evaluate(r)
       self.assertDictEqual({"a": {"c": 210}, "b": {"d": 210}}, test_result)
 
+  @test_util.run_v1_only("b/120545219")
   def testCheckNestedOutputStruct(self):
     with self.cached_session() as sess:
       x = constant_op.constant(10)
@@ -701,7 +720,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual([2.0], self.evaluate(r))
 
   @test_util.disable_control_flow_v2("b/79881896 (control deps)")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCondWithControl(self):
     with self.cached_session():
       control_holder = array_ops.placeholder(dtypes.float32, shape=())
@@ -717,6 +736,7 @@ class ControlFlowTest(test.TestCase):
           lambda: constant_op.constant(1))
       self.assertEqual(5, self.evaluate(r))
 
+  @test_util.run_v1_only("b/120545219")
   def testUninitializedRefIdentity(self):
     with self.cached_session() as sess:
       v = gen_state_ops.variable(
@@ -771,6 +791,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(pred, fn1, fn2)
       self.evaluate(r)
 
+  @test_util.run_v1_only("b/120545219")
   def testCondGrad_1(self):
     with self.cached_session():
       x = constant_op.constant(10.0, name="x")
@@ -845,6 +866,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(sess.run(grad_grad, {pred: True, x: 1.0, y: 2.0}), 0.0)
       self.assertEqual(sess.run(grad_grad, {pred: False, x: 1.0, y: 2.0}), 0.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testNestedCond_Simple(self):
     with self.cached_session():
       x = constant_op.constant(0., name="X")
@@ -861,7 +883,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(1.0, self.evaluate(result))
 
   @test_util.disable_control_flow_v2("b/113327884")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCondGrad_Gather(self):
     with self.cached_session() as sess:
       v1 = variables.Variable([1.0, 42.0])
@@ -885,6 +907,7 @@ class ControlFlowTest(test.TestCase):
       ]
       self.assertAllEqual(dense_gv, [0.0, 2.0])
 
+  @test_util.run_v1_only("b/120545219")
   def testCondPredicateTensor(self):
     """Regression test for lowering predicate from non-first output of an op."""
 
@@ -1011,6 +1034,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10000, self.evaluate(r))
 
   @test_util.disable_control_flow_v2("b/79881896 (control deps)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileExternalControlDependencies(self):
     with self.cached_session():
       v = variables.Variable(0.0)
@@ -1027,6 +1051,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(v.eval(), 1.0)
 
   @test_util.disable_control_flow_v2("b/79881896 (control deps)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileExternalControlDependenciesNoInput(self):
     with self.cached_session():
       v = variables.Variable(0.0)
@@ -1043,7 +1068,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(v.eval(), 1.0)
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithRefs_1(self):
     with self.cached_session() as sess:
       x = variables.VariableV1(0)._ref()  # pylint: disable=protected-access
@@ -1080,6 +1105,7 @@ class ControlFlowTest(test.TestCase):
       r = isum(s, maximum_iterations=3)
       self.assertAllEqual([1 + 3, 2 + 3, 3 + 3, 4 + 3, 5 + 3], self.evaluate(r))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithMaximumIterationsAndSingleArgument(self):
     with self.cached_session():
       r = control_flow_ops.while_loop(
@@ -1087,6 +1113,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(1, self.evaluate(r))
 
   @test_util.disable_control_flow_v2("b/115776323 (max_iters)")
+  @test_util.run_v1_only("b/120545219")
   def testSingleNestedMaximumIterationsWhileLoopGradientInXLAContext(self):
     v = constant_op.constant(1.0)
 
@@ -1112,6 +1139,7 @@ class ControlFlowTest(test.TestCase):
     # Should execute without issue.
     self.assertEqual(3, self.evaluate(loop_execute))
 
+  @test_util.run_v1_only("b/120545219")
   def testInvalidMaximumIterationsWhileLoopGradientInXLAContext(self):
     v = constant_op.constant(1.0)
 
@@ -1172,6 +1200,7 @@ class ControlFlowTest(test.TestCase):
           r"context '.*' \(currently defined in '.*'\)"):
         _ = gradients_impl.gradients(loop_with_maxiter, v)
 
+  @test_util.run_v1_only("b/120545219")
   def testInvalidMaximumIterationsFromSiblingContextWhileLoopInXLAContext(self):
     v = constant_op.constant(1.0)
 
@@ -1215,6 +1244,7 @@ class ControlFlowTest(test.TestCase):
         _ = gradients_impl.gradients(loop, v)
 
   @test_util.disable_control_flow_v2("b/118457764")
+  @test_util.run_v1_only("b/120545219")
   def testNestedWhileLoopWithMaxItersFromOuterContextInXLAContext(self):
     v = constant_op.constant(1.0)
 
@@ -1326,6 +1356,7 @@ class ControlFlowTest(test.TestCase):
       result = r[3].eval()
     self.assertAllEqual(42, result)
 
+  @test_util.run_v1_only("b/120545219")
   def testWhile_5(self):
     with self.cached_session():
 
@@ -1351,6 +1382,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllEqual(np.array([0, 1, 2, 3, 4, 5, 6]), result)
 
   @test_util.disable_control_flow_v2("b/116338794 (buffer_reuse)")
+  @test_util.run_v1_only("b/120545219")
   def testBufferForwarding(self):
     run_options = config_pb2.RunOptions(
         trace_level=config_pb2.RunOptions.FULL_TRACE)
@@ -1435,6 +1467,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20)
       self.assertEqual([10000], self.evaluate(r))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileShapeInference(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1461,6 +1494,7 @@ class ControlFlowTest(test.TestCase):
         r = control_flow_ops.while_loop(c, b, [i, m])
 
   @test_util.disable_control_flow_v2("b/116328420 (SparseTensor)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileShapeInferenceSparseTensor(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
@@ -1493,7 +1527,7 @@ class ControlFlowTest(test.TestCase):
             [i.get_shape(), tensor_shape.TensorShape([5])])
 
   @test_util.disable_control_flow_v2("b/116282023 (IndexedSlices)")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileShapeInferenceIndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant([[2.0, 4.0], [3.0, 5.0]], name="values")
@@ -1584,6 +1618,7 @@ class ControlFlowTest(test.TestCase):
     self._testNestedWhile_2(use_gpu=False)
     self._testNestedWhile_2(use_gpu=True)
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithControl_1(self):
     with self.cached_session():
       n = constant_op.constant(0)
@@ -1615,6 +1650,7 @@ class ControlFlowTest(test.TestCase):
           condition, body, [r], parallel_iterations=1)
       self.assertAllEqual(12, self.evaluate(res))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithControl_3(self):
     with self.cached_session() as sess:
       b = array_ops.placeholder(dtypes.bool)
@@ -1624,6 +1660,7 @@ class ControlFlowTest(test.TestCase):
         r = control_flow_ops.while_loop(lambda x: x < 10, lambda x: x + c, [x0])
       self.assertEqual(10, sess.run(r, {b: True}))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithControl_4(self):
     with self.cached_session() as sess:
       b = array_ops.placeholder(dtypes.bool)
@@ -1635,6 +1672,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, sess.run(r, {b: True}))
 
   @test_util.disable_control_flow_v2("b/79881896 (control_deps)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithControl_5(self):
     with self.cached_session() as sess:
       b = array_ops.placeholder(dtypes.bool)
@@ -1663,6 +1701,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(0, self.evaluate(loop))
 
   @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileCondWithControl_1(self):
     with self.cached_session():
       v = variable_scope.get_variable(
@@ -1686,6 +1725,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(65536.0, self.evaluate(v))
 
   @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileCondExitControl(self):
 
     with self.cached_session():
@@ -1855,6 +1895,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileUpdateVariable_2(self):
     with self.cached_session():
       select1 = variables.Variable([3.0, 4.0, 5.0])
@@ -1905,7 +1946,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileUpdateVariable_4(self):
     with self.cached_session():
       var_a = variables.Variable(0, name="a")
@@ -1934,7 +1975,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, self.evaluate(var_b))
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileUpdateVariable_5(self):
     with self.cached_session():
       # Create some variables.
@@ -1965,6 +2006,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(10, self.evaluate(var_b))
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileUpdateVariable_6(self):
     with self.cached_session():
       # Create some variables.
@@ -1994,6 +2036,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(55, self.evaluate(var_b))
       self.assertEqual(10, self.evaluate(var_a))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileQueue_1(self):
     with self.cached_session():
       q = data_flow_ops.FIFOQueue(-1, dtypes.int32)
@@ -2012,6 +2055,7 @@ class ControlFlowTest(test.TestCase):
       for i in xrange(10):
         self.assertEqual([i], q.dequeue().eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileTimeOut(self):
     run_options = config_pb2.RunOptions(timeout_in_ms=1)
     with self.cached_session() as sess:
@@ -2023,6 +2067,7 @@ class ControlFlowTest(test.TestCase):
         sess.run(r, options=run_options)
 
   @test_util.disable_control_flow_v2("b/117119329 (stack)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileStack_1(self):
     with self.cached_session():
       s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo")
@@ -2092,10 +2137,12 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1024.0, self.evaluate(r))
 
   @test_util.disable_control_flow_v2("b/116351701 (colocation)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_ColocateGradients(self):
     self._testWhileGrad_ColocateGradients(colocate=False)
     self._testWhileGrad_ColocateGradients(colocate=True)
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_Square(self):
     with self.cached_session():
       v = constant_op.constant(2.0, name="v")
@@ -2107,6 +2154,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(1024.0, self.evaluate(r))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_Shape(self):
     with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=[None])
@@ -2137,6 +2185,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients([r, y], x)[0]
       self.assertAllClose([2.0, 4.0], sess.run(r, feed_dict={x: [1.0, 2.0]}))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_MultipleUses(self):
     with self.cached_session():
       v = constant_op.constant(2.0, name="v")
@@ -2148,6 +2197,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertEqual(524288.0, self.evaluate(r))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_LoopAdd(self):
     with self.cached_session():
       v = constant_op.constant(2.0, name="v")
@@ -2211,6 +2261,7 @@ class ControlFlowTest(test.TestCase):
   def testNestedWhileCondWhileGradGpu(self):
     self._testNestedWhileCondWhileGrad(use_gpu=True)
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_Variable(self):
     with self.cached_session():
       a = variables.Variable(3.0)
@@ -2236,6 +2287,7 @@ class ControlFlowTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertAllClose(216.0, g[0].eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGradInCond(self):
 
     with self.cached_session():
@@ -2253,7 +2305,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(9.0, r.eval(feed_dict={x: 1.0}))
 
   @test_util.disable_control_flow_v2("b/116340060")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testGradInWhileWrtInitialLoopVal(self):
     with self.cached_session():
       x = array_ops.placeholder(dtypes.float32, shape=(), name="x")
@@ -2271,6 +2323,7 @@ class ControlFlowTest(test.TestCase):
           "loop invariants or wrt the input parameters to the loop body."):
         control_flow_ops.while_loop(lambda i, x: i < 3, body, [0, y])
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGradInWhile(self):
     with self.cached_session():
       n = ops.convert_to_tensor(1.0, name="n")
@@ -2287,6 +2340,7 @@ class ControlFlowTest(test.TestCase):
                                       [tensor_shape.unknown_shape()])
       self.assertAllClose(9.0, r.eval(feed_dict={x: 1.0}))
 
+  @test_util.run_v1_only("b/120545219")
   def testCondGradInNestedWhiles(self):
 
     def outer_body(i, x):
@@ -2375,6 +2429,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual([100.0, 1.0, 102.0, 3.0, 4.0 + 100 * 2.0],
                        self.evaluate(r_flattened))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhile_NestedBadArityFails(self):
     with self.cached_session():
       named = collections.namedtuple("named", ("a", "b"))
@@ -2391,6 +2446,7 @@ class ControlFlowTest(test.TestCase):
       with self.assertRaisesRegexp(ValueError, "the same number of elements"):
         control_flow_ops.while_loop(c, b, loop_vars)
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_ys_xs(self):
     with self.cached_session():
       x = constant_op.constant(3.0, name="x")
@@ -2435,6 +2491,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1024.0, r[0].eval())
 
   @test_util.disable_control_flow_v2("b/116355153 (back_prop flag)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_NoGradient(self):
     with self.cached_session():
       v = constant_op.constant(2.0, name="v")
@@ -2446,6 +2503,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(1.0, r[0].eval())
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_NoDependency(self):
     with self.cached_session() as sess:
       variable = variables.Variable(array_ops.ones([2, 3]))
@@ -2486,6 +2544,7 @@ class ControlFlowTest(test.TestCase):
       grad = gradients_impl.gradients(cost, [c0])
       self.assertAllClose(0.0, sess.run(grad[0]))
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_SerialTwoLoops(self):
     with self.cached_session():
       i = constant_op.constant(0, name="i")
@@ -2504,6 +2563,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients([rx], x)
       self.assertAllClose(1024.0, r[0].eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_ParallelTwoLoops(self):
     with self.cached_session():
       i = constant_op.constant(0, name="i")
@@ -2523,6 +2583,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients([rx], x)
       self.assertAllClose(64.0, r[0].eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_OneOutputWithControlDependencyOnSecond(self):
     with self.cached_session():
       i = constant_op.constant(0, name="i")
@@ -2566,6 +2627,7 @@ class ControlFlowTest(test.TestCase):
     self._testNestedWhileGrad_Simple(use_gpu=False)
     self._testNestedWhileGrad_Simple(use_gpu=True)
 
+  @test_util.run_v1_only("b/120545219")
   def testNestedWhileGrad_SerialInner(self):
     with self.cached_session():
       v = constant_op.constant(1.0)
@@ -2613,6 +2675,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, v)[0]
       self.assertAllClose(512.0, self.evaluate(r))
 
+  @test_util.run_v1_only("b/120545219")
   def testNestedWhileGrad_ParallelIterations(self):
     # Make sure the stack pushes and pops of an inner loop are executed in
     # the sequential order of the iterations of its outer loop.
@@ -2702,6 +2765,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose([[0.98000002, 1.98000002]], self.evaluate(x))
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithRefsWithGradients_1(self):
     with self.cached_session() as sess:
       x = variables.VariableV1(0.)._ref()  # pylint: disable=protected-access
@@ -2731,6 +2795,7 @@ class ControlFlowTest(test.TestCase):
     self.assertEqual(73, value_x_grad)
 
   @test_util.disable_control_flow_v2("b/116282023 (IndexedSlices)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_IndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
@@ -2753,7 +2818,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([1024.0, 1024.0]), self.evaluate(r))
 
   @test_util.disable_control_flow_v2("b/116328420 (SparseTensor)")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileGrad_SparseTensor(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name="values")
@@ -2777,6 +2842,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([1024.0, 1024.0]), self.evaluate(r))
 
   @test_util.disable_control_flow_v2("b/115920078 (gradients)")
+  @test_util.run_v1_only("b/120545219")
   def testCallGradInLoop(self):
     with self.cached_session() as sess:
       i0 = constant_op.constant(0)
@@ -2959,6 +3025,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose([0., 0.], self.evaluate(dy_dq))
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
+  @test_util.run_v1_only("b/120545219")
   def testWhileGradientWithNontrainablePath2(self):
     q = variables.Variable([7., 8.])
 
@@ -2977,6 +3044,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose([1., 1.], self.evaluate(dy_dq))
 
   @test_util.disable_control_flow_v2("b/115920078 (gradients)")
+  @test_util.run_v1_only("b/120545219")
   def testIssue16504(self):
     c = constant_op.constant(np.arange(100), dtype=dtypes.float32)
     w = variables.Variable(
@@ -3000,6 +3068,7 @@ class ControlFlowTest(test.TestCase):
     grad, = gradients_impl.gradients(w, c)
     self.assertIsNotNone(grad)
 
+  @test_util.run_v1_only("b/120545219")
   def testStopGradMultiFlows(self):
     with self.cached_session():
 
@@ -3026,6 +3095,7 @@ class ControlFlowTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertEqual(5.0, self.evaluate(result))
 
+  @test_util.run_v1_only("b/120545219")
   def testOneValueCond(self):
 
     with self.cached_session():
@@ -3059,6 +3129,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(4.0, i.eval(feed_dict={d: 1}))
       self.assertAllClose(2.0 * math.sqrt(2), i.eval(feed_dict={d: 2}))
 
+  @test_util.run_v1_only("b/120545219")
   def testCase(self):
     with self.cached_session():
       x = constant_op.constant(1)
@@ -3111,6 +3182,7 @@ class ControlFlowTest(test.TestCase):
 
       self.assertAllEqual(r6.eval(), 0)
 
+  @test_util.run_v1_only("b/120545219")
   def testCaseSideEffects(self):
     with self.cached_session() as sess:
       v0 = variables.Variable(-1)
@@ -3147,6 +3219,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllEqual(self.evaluate([v0, v1, v2]), [0, -1, -1])
 
   @test_util.disable_control_flow_v2("b/113324949 (ref vars)")
+  @test_util.run_v1_only("b/120545219")
   def testOneOpCond(self):
     with self.cached_session():
       v = variables.Variable(0)
@@ -3175,6 +3248,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(2, i.eval(feed_dict={c.name: 0}))
       self.assertEqual(2, self.evaluate(v))
 
+  @test_util.run_v1_only("b/120545219")
   def testWithOpsDependencies(self):
     with self.cached_session() as sess:
       v = variables.VariableV1(0.0)
@@ -3198,6 +3272,7 @@ class ControlFlowTest(test.TestCase):
     # Ensure that 'v' is initialized
     self.assertAllClose(0.0, real_v_val)
 
+  @test_util.run_v1_only("b/120545219")
   def testWithTensorDependencies(self):
     with self.cached_session():
       v = variables.VariableV1(0.0)
@@ -3224,6 +3299,7 @@ class ControlFlowTest(test.TestCase):
       # Ensure that 'v' is initialized
       self.assertAllClose(0.0, self.evaluate(v))
 
+  @test_util.run_v1_only("b/120545219")
   def testWithIndexedSlicesDependencies(self):
     with self.cached_session():
       v = variables.VariableV1(
@@ -3270,6 +3346,7 @@ class ControlFlowTest(test.TestCase):
         self.assertDeviceEqual("", with_vdef_dep.device)
         self.assertEqual([b"loc:@vdef"], with_vdef_dep.op.colocation_groups())
 
+  @test_util.run_v1_only("b/120545219")
   def testGroup(self):
     with self.cached_session() as sess:
       v1 = variables.VariableV1([0.0])
@@ -3289,6 +3366,7 @@ class ControlFlowTest(test.TestCase):
     self.assertAllClose([0.0], v1_val)
     self.assertAllClose([1.0], v2_val)
 
+  @test_util.run_v1_only("b/120545219")
   def testGroupEmpty(self):
     op = control_flow_ops.group()
     self.assertEqual(op.type, "NoOp")
@@ -3349,7 +3427,7 @@ class ControlFlowTest(test.TestCase):
     self.assertEqual([None, None], m.get_shape().as_list())
     self.assertEqual([], index.get_shape())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRefSelect(self):
     index = array_ops.placeholder(dtypes.int32)
 
@@ -3404,6 +3482,7 @@ class ControlFlowTest(test.TestCase):
       with self.assertRaises(ValueError):
         sess.run(tensor_list[0])
 
+  @test_util.run_v1_only("b/120545219")
   def testWhilePyFuncBasic(self):
 
     def func(x):
@@ -3417,6 +3496,7 @@ class ControlFlowTest(test.TestCase):
           [tensor_shape.unknown_shape(), tensor_shape.unknown_shape()])
       self.assertEqual(r[1].eval(), 65536.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileFuncBasic(self):
 
     @function.Defun(dtypes.float32)
@@ -3440,6 +3520,7 @@ class ControlFlowTest(test.TestCase):
                 ]), 1)
 
 
+  @test_util.run_v1_only("b/120545219")
   def testQIntSwitchMerge(self):
     with self.cached_session(force_gpu=test.is_gpu_available()) as sess:
       constant_qint = constant_op.constant(np.array([42]), dtypes.qint8)
@@ -3448,6 +3529,7 @@ class ControlFlowTest(test.TestCase):
       result = control_flow_ops.merge([v_f, v_t])
       self.evaluate(result)
 
+  @test_util.run_v1_only("b/120545219")
   def testQIntRefSwitchMerge(self):
     with self.cached_session(use_gpu=test.is_gpu_available()) as sess:
       var_qint = gen_state_ops.variable(
@@ -3461,6 +3543,7 @@ class ControlFlowTest(test.TestCase):
       result = control_flow_ops.ref_merge([v_f, v_t])
       self.evaluate(result)
 
+  @test_util.run_v1_only("b/120545219")
   def testUInt64SwitchMerge(self):
     with self.cached_session(force_gpu=test.is_gpu_available()) as sess:
       constant_uint64 = constant_op.constant(np.array([42]), dtypes.uint64)
@@ -3508,6 +3591,7 @@ class ControlFlowContextCheckTest(test.TestCase):
         math_ops.less(1, 2), true_fn, lambda: constant_op.constant(0))
     return cond_tensor[0]
 
+  @test_util.run_v1_only("b/120545219")
   def testInvalidContext(self):
     # Accessing a while loop tensor outside of control flow is illegal.
     while_tensor = self._getWhileTensor()
@@ -3517,7 +3601,7 @@ class ControlFlowContextCheckTest(test.TestCase):
         "is in a while loop. See info log for more details."):
       math_ops.add(1, while_tensor)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInvalidContextInCond(self):
     # Accessing a while loop tensor in cond is illegal.
     while_tensor = self._getWhileTensor()
@@ -3530,6 +3614,7 @@ class ControlFlowContextCheckTest(test.TestCase):
           math_ops.less(1, 2), lambda: math_ops.add(1, while_tensor),
           lambda: constant_op.constant(0))
 
+  @test_util.run_v1_only("b/120545219")
   def testInvalidContextInWhile(self):
     # Accessing a while loop tensor in a different while loop is illegal.
     while_tensor = self._getWhileTensor()
@@ -3564,6 +3649,7 @@ class ControlFlowContextCheckTest(test.TestCase):
 
     control_flow_ops.cond(math_ops.less(1, 2), branch_fn, branch_fn)
 
+  @test_util.run_v1_only("b/120545219")
   def testValidWhileContext(self):
     # Accessing a tensor in a nested while is OK.
     def body(_):
@@ -3572,6 +3658,7 @@ class ControlFlowContextCheckTest(test.TestCase):
 
     control_flow_ops.while_loop(lambda i: i < 5, body, [0])
 
+  @test_util.run_v1_only("b/120545219")
   def testValidNestedContexts(self):
     # Accessing a tensor from a cond context in a while context, all inside an
     # outer while context, is OK.
@@ -3586,7 +3673,7 @@ class ControlFlowContextCheckTest(test.TestCase):
 
     control_flow_ops.while_loop(lambda i: i < 5, body, [0])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInvalidNestedContexts(self):
     # Accessing a tensor from a while context in a different while context, all
     # inside a cond context, is illegal.
@@ -3605,6 +3692,7 @@ class ControlFlowContextCheckTest(test.TestCase):
 
 class TupleTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testTensors(self):
     for v1_first in [True, False]:
       with self.cached_session():
@@ -3635,7 +3723,7 @@ class TupleTest(test.TestCase):
           self.assertAllClose([30.0], self.evaluate(t2))
           self.assertAllClose([1.0], self.evaluate(v1))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testIndexedSlices(self):
     for v1_first in [True, False]:
       with self.cached_session():
@@ -3887,6 +3975,7 @@ class EagerTest(test.TestCase):
           isum(tensor, maximum_iterations=3).numpy(),
           [1 + 3, 2 + 3, 3 + 3, 4 + 3, 5 + 3])
 
+  @test_util.run_v1_only("b/120545219")
   def testWhileWithMaximumIterationsAndSingleArgument(self):
     with context.eager_mode():
       tensor = constant_op.constant(0)
@@ -3909,6 +3998,7 @@ class EagerTest(test.TestCase):
       self.assertAllEqual(t1.numpy(), tup1.numpy())
       self.assertAllEqual(t2.numpy(), tup2.numpy())
 
+  @test_util.run_v1_only("b/120545219")
   def testCase(self):
     with context.eager_mode():
       x = constant_op.constant(1)
diff --git a/tensorflow/python/kernel_tests/control_flow_util_test.py b/tensorflow/python/kernel_tests/control_flow_util_test.py
index 762c445da0..573f4b0d25 100644
--- a/tensorflow/python/kernel_tests/control_flow_util_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_util_test.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
@@ -32,6 +33,7 @@ from tensorflow.python.platform import test
 
 class ControlFlowUtilTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testIsSwitch(self):
     switch_false, _ = control_flow_ops.switch(1, True)
     switch = switch_false.op
@@ -44,6 +46,7 @@ class ControlFlowUtilTest(test.TestCase):
 
     self.assertFalse(control_flow_util.IsSwitch(test_ops.int_output().op))
 
+  @test_util.run_v1_only("b/120545219")
   def testIsLoopEnter(self):
     enter = gen_control_flow_ops.enter(1, frame_name="name").op
     self.assertTrue(control_flow_util.IsLoopEnter(enter))
@@ -61,6 +64,7 @@ class ControlFlowUtilTest(test.TestCase):
 
     self.assertFalse(control_flow_util.IsLoopEnter(test_ops.int_output().op))
 
+  @test_util.run_v1_only("b/120545219")
   def testIsLoopExit(self):
     exit_op = control_flow_ops.exit(1).op
     self.assertTrue(control_flow_util.IsLoopExit(exit_op))
diff --git a/tensorflow/python/kernel_tests/ctc_loss_op_test.py b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
index e6b5835079..e24f304c1b 100644
--- a/tensorflow/python/kernel_tests/ctc_loss_op_test.py
+++ b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
@@ -106,7 +106,7 @@ class CTCLossTest(test.TestCase):
         with self.assertRaisesOpError(expected_err_re):
           self.evaluate([loss, grad])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBasic(self):
     """Test two batch entries."""
     # Input and ground truth from Alex Graves' implementation.
@@ -242,7 +242,7 @@ class CTCLossTest(test.TestCase):
 
     self._testCTCLoss(inputs, seq_lens, labels, loss_truth, grad_truth)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_time_major(self):
     """Testing time_major param.
 
@@ -272,7 +272,7 @@ class CTCLossTest(test.TestCase):
       (tf_loss, tf_loss_transposed) = self.evaluate([loss, loss_transposed])
       self.assertAllEqual(tf_loss, tf_loss_transposed)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInvalidSecondGradient(self):
     inputs = np.random.randn(2, 2, 3).astype(np.float32)
     inputs_t = constant_op.constant(inputs)
@@ -289,7 +289,7 @@ class CTCLossTest(test.TestCase):
                                    "explicitly disabled"):
         _ = gradients_impl._hessian_vector_product(loss, [inputs_t], v)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testEmptyBatch(self):
     inputs = constant_op.constant([], dtype=dtypes.float32, shape=(1, 0, 2))
     sequence_lengths = constant_op.constant([], dtype=dtypes.int32)
@@ -306,7 +306,7 @@ class CTCLossTest(test.TestCase):
 
 class CTCLossTestV2(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCtcLossV2(self):
     random_seed.set_random_seed(5)
 
@@ -351,7 +351,7 @@ class CTCLossTestV2(test.TestCase):
             logit_length=logit_length,
             blank_index=0))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCtcLossDenseIsSameAsCtcLoss(self):
     with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
       random_seed.set_random_seed(5)
@@ -405,7 +405,7 @@ class CTCLossTestV2(test.TestCase):
               rtol=2e-06,
               atol=2e-06)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCtcLossDenseUniqueFastPathIsSameAsCtcLoss(self):
     random_seed.set_random_seed(5)
 
@@ -459,7 +459,7 @@ class CTCLossTestV2(test.TestCase):
             rtol=2e-06,
             atol=2e-06)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCtcLossDenseWithBlankIndexIsSameAsCtcLoss(self):
     random_seed.set_random_seed(5)
 
@@ -516,7 +516,7 @@ class CTCLossTestV2(test.TestCase):
             rtol=2e-06,
             atol=2e-06)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCtcLossDenseWithNegativeBlankIndexIsSameAsCtcLoss(self):
     with ops.device("/GPU:0" if test.is_gpu_available() else "/CPU:0"):
       random_seed.set_random_seed(5)
@@ -565,7 +565,7 @@ class CTCLossTestV2(test.TestCase):
               rtol=2e-06,
               atol=2e-06)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeated(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 3, 3, 3, 0],
@@ -579,7 +579,7 @@ class CTCLossTestV2(test.TestCase):
          [1, 4, 0, 0],
          [4, 2, 9, 4]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedPreservesDtypes(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=constant_op.constant(
@@ -597,7 +597,7 @@ class CTCLossTestV2(test.TestCase):
          [1, 4, 0, 0],
          [4, 2, 9, 4]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedExtraPadding(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 3, 3, 3, 0, 0, 0],
@@ -611,7 +611,7 @@ class CTCLossTestV2(test.TestCase):
          [1, 4, 0, 0],
          [4, 2, 9, 4]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedFrontRepeats(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 1, 1, 2, 2],
@@ -625,7 +625,7 @@ class CTCLossTestV2(test.TestCase):
          [1, 2],
          [1, 0]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedAllLabelsTheSame(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 1, 1, 1, 1],
@@ -658,7 +658,7 @@ class CTCLossTestV2(test.TestCase):
 
     self.assertAllEqual(padded_dense, new_dense)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testUnique(self):
     labels = [
         [3, 4, 4, 3],
@@ -674,7 +674,7 @@ class CTCLossTestV2(test.TestCase):
         [0, 0, 0, 1],
     ], idx)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSumStates(self):
     idx = [
         [0, 1, 0, 1],
@@ -694,7 +694,7 @@ class CTCLossTestV2(test.TestCase):
          [1.8, 0.8, 0.0, 0.0]]
     ], sum_of_states)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testStateToOlabel(self):
     labels = [
         [3, 4, 3, 4],
@@ -733,7 +733,7 @@ class CTCLossTestV2(test.TestCase):
          [22.0 + 23.0 + 24.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
     ])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testStateToOlabelUnique(self):
     labels = [
         [3, 4, 3, 4],
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
index 4f74e1e741..4e3da068b8 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
@@ -33,7 +33,7 @@ class AssignOpTest(test.TestCase):
   # NOTE(mrry): We exclude thess tests from the TSAN TAP target, because they
   #   contain benign and deliberate data races when multiple threads update
   #   the same parameters without a lock.
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelUpdateWithoutLocking(self):
     with self.cached_session() as sess:
       ones_t = array_ops.fill([1024, 1024], 1.0)
@@ -61,7 +61,7 @@ class AssignOpTest(test.TestCase):
       self.assertTrue((vals >= ones).all())
       self.assertTrue((vals <= ones * 20).all())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelAssignWithoutLocking(self):
     with self.cached_session() as sess:
       ones_t = array_ops.fill([1024, 1024], float(1))
@@ -94,7 +94,7 @@ class AssignOpTest(test.TestCase):
   # contain non-benign but known data races between the variable assignment and
   # returning the output tensors. This issue will be resolved with the new
   # resource variables.
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelUpdateWithLocking(self):
     with self.cached_session() as sess:
       zeros_t = array_ops.fill([1024, 1024], 0.0)
@@ -122,7 +122,7 @@ class AssignOpTest(test.TestCase):
       ones = np.ones((1024, 1024)).astype(np.float32)
       self.assertAllEqual(vals, ones * 20)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelAssignWithLocking(self):
     with self.cached_session() as sess:
       zeros_t = array_ops.fill([1024, 1024], 0.0)
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_test.py b/tensorflow/python/kernel_tests/dense_update_ops_test.py
index 309da88bef..545de87ca1 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_test.py
@@ -86,7 +86,7 @@ class AssignOpTest(test.TestCase):
   def testBasic(self):
     self._testTypes(np.arange(0, 20).reshape([4, 5]))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAssignNonStrictShapeChecking(self):
     with self.cached_session():
       data = array_ops.fill([1024, 1024], 0)
@@ -101,7 +101,7 @@ class AssignOpTest(test.TestCase):
       a2.op.run()
       self.assertAllEqual(p.eval(), self.evaluate(data2))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitRequiredAssignAdd(self):
     with self.cached_session():
       p = variables.VariableV1(array_ops.fill([1024, 1024], 1), dtypes.int32)
@@ -109,7 +109,7 @@ class AssignOpTest(test.TestCase):
       with self.assertRaisesOpError("use uninitialized"):
         a.op.run()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitRequiredAssignSub(self):
     with self.cached_session():
       p = variables.VariableV1(array_ops.fill([1024, 1024], 1), dtypes.int32)
diff --git a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
index f6d834c2f8..5b1a47fb03 100644
--- a/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
+++ b/tensorflow/python/kernel_tests/depthwise_conv_op_test.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import nn_impl
@@ -185,6 +186,7 @@ class DepthwiseConv2DTest(test.TestCase):
     self.assertShapeEqual(native_result, conv_native)
     self.assertShapeEqual(native_result, conv_interface)
 
+  @test_util.run_v1_only("b/120545219")
   def testDepthwiseConv2D(self):
     for index, (input_size, filter_size, _, stride,
                 padding) in enumerate(ConfigsToTest()):
@@ -428,6 +430,7 @@ class DepthwiseConv2DTest(test.TestCase):
           use_gpu, grouped_conv, err)
       self.assertLess(err, tolerance)
 
+  @test_util.run_v1_only("b/120545219")
   def testDepthwiseConv2DInputGrad(self):
     for index, (input_size, filter_size, output_size, stride,
                 padding) in enumerate(CheckGradConfigsToTest()):
@@ -477,6 +480,7 @@ class DepthwiseConv2DTest(test.TestCase):
             use_gpu=True,
             data_format="NCHW")
 
+  @test_util.run_v1_only("b/120545219")
   def testDepthwiseConv2DFilterGrad(self):
     for index, (input_size, filter_size, output_size, stride,
                 padding) in enumerate(CheckGradConfigsToTest()):
diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py
index d6ef9e70b8..dbfda385ed 100644
--- a/tensorflow/python/kernel_tests/determinant_op_test.py
+++ b/tensorflow/python/kernel_tests/determinant_op_test.py
@@ -133,6 +133,7 @@ class DeterminantOpTest(test.TestCase):
     huge_matrix = np.array([[max_double, 0.0], [0.0, max_double]])
     self._compareDeterminant(huge_matrix)
 
+  @test_util.run_v1_only("b/120545219")
   def testNonSquareMatrix(self):
     # When the determinant of a non-square matrix is attempted we should return
     # an error
@@ -140,6 +141,7 @@ class DeterminantOpTest(test.TestCase):
       linalg_ops.matrix_determinant(
           np.array([[1., 2., 3.], [3., 5., 4.]]).astype(np.float32))
 
+  @test_util.run_v1_only("b/120545219")
   def testWrongDimensions(self):
     # The input to the determinant should be a 2-dimensional tensor.
     tensor1 = constant_op.constant([1., 2.])
@@ -150,6 +152,7 @@ class DeterminantOpTest(test.TestCase):
     self._compareDeterminant(np.empty([0, 2, 2]))
     self._compareDeterminant(np.empty([2, 0, 0]))
 
+  @test_util.run_v1_only("b/120545219")
   def testConcurrentExecutesWithoutError(self):
     with self.session(use_gpu=True) as sess:
       matrix1 = random_ops.random_normal([5, 5], seed=42)
diff --git a/tensorflow/python/kernel_tests/distributions/multinomial_test.py b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
index b3f3416a52..187ddd4cf4 100644
--- a/tensorflow/python/kernel_tests/distributions/multinomial_test.py
+++ b/tensorflow/python/kernel_tests/distributions/multinomial_test.py
@@ -22,6 +22,7 @@ from tensorflow.python.eager import backprop
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.distributions import multinomial
@@ -33,6 +34,7 @@ class MultinomialTest(test.TestCase):
   def setUp(self):
     self._rng = np.random.RandomState(42)
 
+  @test_util.run_v1_only("b/120545219")
   def testSimpleShapes(self):
     with self.cached_session():
       p = [.1, .3, .6]
@@ -42,6 +44,7 @@ class MultinomialTest(test.TestCase):
       self.assertEqual(tensor_shape.TensorShape([3]), dist.event_shape)
       self.assertEqual(tensor_shape.TensorShape([]), dist.batch_shape)
 
+  @test_util.run_v1_only("b/120545219")
   def testComplexShapes(self):
     with self.cached_session():
       p = 0.5 * np.ones([3, 2, 2], dtype=np.float32)
@@ -52,6 +55,7 @@ class MultinomialTest(test.TestCase):
       self.assertEqual(tensor_shape.TensorShape([2]), dist.event_shape)
       self.assertEqual(tensor_shape.TensorShape([3, 2]), dist.batch_shape)
 
+  @test_util.run_v1_only("b/120545219")
   def testN(self):
     p = [[0.1, 0.2, 0.7], [0.2, 0.3, 0.5]]
     n = [[3.], [4]]
@@ -60,6 +64,7 @@ class MultinomialTest(test.TestCase):
       self.assertEqual((2, 1), dist.total_count.get_shape())
       self.assertAllClose(n, dist.total_count.eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testP(self):
     p = [[0.1, 0.2, 0.7]]
     with self.cached_session():
@@ -68,6 +73,7 @@ class MultinomialTest(test.TestCase):
       self.assertEqual((1, 3), dist.logits.get_shape())
       self.assertAllClose(p, dist.probs.eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testLogits(self):
     p = np.array([[0.1, 0.2, 0.7]], dtype=np.float32)
     logits = np.log(p) - 50.
@@ -78,6 +84,7 @@ class MultinomialTest(test.TestCase):
       self.assertAllClose(p, multinom.probs.eval())
       self.assertAllClose(logits, multinom.logits.eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testPmfUnderflow(self):
     logits = np.array([[-200, 0]], dtype=np.float32)
     with self.cached_session():
@@ -85,6 +92,7 @@ class MultinomialTest(test.TestCase):
       lp = dist.log_prob([1., 0.]).eval()[0]
       self.assertAllClose(-200, lp, atol=0, rtol=1e-6)
 
+  @test_util.run_v1_only("b/120545219")
   def testPmfandCountsAgree(self):
     p = [[0.1, 0.2, 0.7]]
     n = [[5.]]
@@ -97,6 +105,7 @@ class MultinomialTest(test.TestCase):
       with self.assertRaisesOpError("counts must sum to `self.total_count`"):
         dist.prob([3., 3, 0]).eval()
 
+  @test_util.run_v1_only("b/120545219")
   def testPmfNonIntegerCounts(self):
     p = [[0.1, 0.2, 0.7]]
     n = [[5.]]
@@ -157,6 +166,7 @@ class MultinomialTest(test.TestCase):
       self.assertAllClose([0.1, 0.9], self.evaluate(pmf))
       self.assertEqual((2), pmf.get_shape())
 
+  @test_util.run_v1_only("b/120545219")
   def testPmfCountsStretchedInBroadcastWhenSameRank(self):
     with self.cached_session():
       p = [[0.1, 0.9], [0.7, 0.3]]
@@ -165,6 +175,7 @@ class MultinomialTest(test.TestCase):
       self.assertAllClose(pmf.eval(), [0.1, 0.7])
       self.assertEqual((2), pmf.get_shape())
 
+  @test_util.run_v1_only("b/120545219")
   def testPmfCountsStretchedInBroadcastWhenLowerRank(self):
     with self.cached_session():
       p = [[0.1, 0.9], [0.7, 0.3]]
@@ -194,6 +205,7 @@ class MultinomialTest(test.TestCase):
       self.evaluate(pmf)
       self.assertEqual((4, 3), pmf.get_shape())
 
+  @test_util.run_v1_only("b/120545219")
   def testMultinomialMean(self):
     with self.cached_session():
       n = 5.
@@ -203,6 +215,7 @@ class MultinomialTest(test.TestCase):
       self.assertEqual((3,), dist.mean().get_shape())
       self.assertAllClose(expected_means, dist.mean().eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testMultinomialCovariance(self):
     with self.cached_session():
       n = 5.
@@ -214,6 +227,7 @@ class MultinomialTest(test.TestCase):
       self.assertEqual((3, 3), dist.covariance().get_shape())
       self.assertAllClose(expected_covariances, dist.covariance().eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testMultinomialCovarianceBatch(self):
     with self.cached_session():
       # Shape [2]
@@ -246,6 +260,7 @@ class MultinomialTest(test.TestCase):
       self.assertEqual((3, 5, 4, 4), covariance.get_shape())
       self.assertEqual((6, 3, 3, 3), covariance2.get_shape())
 
+  @test_util.run_v1_only("b/120545219")
   def testCovarianceFromSampling(self):
     # We will test mean, cov, var, stddev on a DirichletMultinomial constructed
     # via broadcast between alpha, n.
@@ -288,6 +303,7 @@ class MultinomialTest(test.TestCase):
       self.assertAllClose(sample_var_, analytic_var, atol=0.01, rtol=0.01)
       self.assertAllClose(sample_stddev_, analytic_stddev, atol=0.01, rtol=0.01)
 
+  @test_util.run_v1_only("b/120545219")
   def testSampleUnbiasedNonScalarBatch(self):
     with self.cached_session() as sess:
       dist = multinomial.Multinomial(
@@ -317,6 +333,7 @@ class MultinomialTest(test.TestCase):
       self.assertAllClose(
           actual_covariance_, sample_covariance_, atol=0., rtol=0.20)
 
+  @test_util.run_v1_only("b/120545219")
   def testSampleUnbiasedScalarBatch(self):
     with self.cached_session() as sess:
       dist = multinomial.Multinomial(
diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py
index 9655351a01..0579dddb70 100644
--- a/tensorflow/python/kernel_tests/fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/fifo_queue_test.py
@@ -39,6 +39,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
+@test_util.run_v1_only("b/120545219")
 class FIFOQueueTest(test.TestCase):
 
   def testConstructor(self):
@@ -1423,6 +1424,7 @@ class FIFOQueueTest(test.TestCase):
         session.run([a, c])
 
 
+@test_util.run_v1_only("b/120545219")
 class FIFOQueueDictTest(test.TestCase):
 
   def testConstructor(self):
@@ -1583,6 +1585,7 @@ class FIFOQueueDictTest(test.TestCase):
       self.assertTrue([compat.as_bytes("dd"), compat.as_bytes("ee")], list(s))
 
 
+@test_util.run_v1_only("b/120545219")
 class FIFOQueueWithTimeoutTest(test.TestCase):
 
   def testDequeueWithTimeout(self):
@@ -1617,6 +1620,7 @@ class FIFOQueueWithTimeoutTest(test.TestCase):
       self.assertEqual(37, self.evaluate(dequeued_t))
 
 
+@test_util.run_v1_only("b/120545219")
 class QueueContainerTest(test.TestCase):
 
   def testContainer(self):
diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
index c489623fe5..95ee454614 100644
--- a/tensorflow/python/kernel_tests/functional_ops_test.py
+++ b/tensorflow/python/kernel_tests/functional_ops_test.py
@@ -494,7 +494,7 @@ class FunctionalOpsTest(test.TestCase):
 
   @test_util.disable_control_flow_v2("b/119323354")
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMapEmptyScalar(self):
     map_return = functional_ops.map_fn(lambda x: 1, constant_op.constant([]))
     self.assertAllEqual([0], map_return.get_shape().dims)
@@ -503,7 +503,7 @@ class FunctionalOpsTest(test.TestCase):
   # TODO(akshayka): this test fails in eager: the iterable is of length 0 so
   # so the body of the while loop never executes
   @test_util.disable_control_flow_v2("b/119323354")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMapEmptyTensor(self):
     with self.cached_session():
       map_return = functional_ops.map_fn(lambda x: array_ops.zeros([3, 2]),
@@ -797,7 +797,7 @@ class FunctionalOpsTest(test.TestCase):
     self.assertAllEqual(Run(100., False), 5050.)
     self.assertAllEqual(Run(100., True), 5050.)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileError(self):
     for use_gpu in (True, False):
       with ops.Graph().as_default() as g:
@@ -1027,7 +1027,7 @@ class FunctionalOpsTest(test.TestCase):
   def testForMLPWhile(self):
     self._testForMLP(True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testForError(self):
 
     @function.Defun(dtypes.int32, dtypes.float32)
@@ -1233,7 +1233,7 @@ class PartitionedCallTest(test.TestCase):
       self.assertAllEqual(expected, result)
 
   # Use an invalid executor name to test the plumbing of the executor_type attr.
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testExecutorTypeAttrExecutorNotFound(self):
     @function.Defun(dtypes.int32)
     def AddFive(x):
diff --git a/tensorflow/python/kernel_tests/identity_op_py_test.py b/tensorflow/python/kernel_tests/identity_op_py_test.py
index 1a6794e896..40ec9db422 100644
--- a/tensorflow/python/kernel_tests/identity_op_py_test.py
+++ b/tensorflow/python/kernel_tests/identity_op_py_test.py
@@ -62,7 +62,7 @@ class IdentityOpTest(test.TestCase):
       self.assertEquals(shape,
                         array_ops.identity(np.array(array_2x3)).get_shape())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRefIdentityShape(self):
     with self.cached_session():
       shape = [2, 3]
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_test.py
index 8f8b15e8ed..18e13a76a0 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_test.py
@@ -214,7 +214,7 @@ class LinearOperatorTest(test.TestCase):
     operator = LinearOperatorMatmulSolve(matrix, is_square=True)
     self.assertTrue(operator.is_square)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_linear_operator_matmul_hints_closed(self):
     matrix = array_ops.placeholder(dtypes.float32)
     operator1 = LinearOperatorMatmulSolve(matrix)
@@ -241,7 +241,7 @@ class LinearOperatorTest(test.TestCase):
     self.assertTrue(operator_matmul.is_self_adjoint)
     self.assertEqual(None, operator_matmul.is_positive_definite)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_linear_operator_matmul_hints_false(self):
     matrix = array_ops.placeholder(dtypes.float32)
     operator1 = LinearOperatorMatmulSolve(
@@ -274,7 +274,7 @@ class LinearOperatorTest(test.TestCase):
     self.assertEqual(None, operator_matmul.is_self_adjoint)
     self.assertEqual(None, operator_matmul.is_positive_definite)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_linear_operator_matmul_hint_infer_square(self):
     matrix1 = array_ops.placeholder(shape=[2, 3], dtype=dtypes.float32)
     matrix2 = array_ops.placeholder(shape=[3, 2], dtype=dtypes.float32)
diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py
index 28e1d7e168..ff84221611 100644
--- a/tensorflow/python/kernel_tests/linalg_grad_test.py
+++ b/tensorflow/python/kernel_tests/linalg_grad_test.py
@@ -61,6 +61,7 @@ class MatrixUnaryFunctorGradientTest(test_lib.TestCase):
 
 def _GetMatrixUnaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_):
 
+  @test_util.run_v1_only('b/120545219')
   def Test(self):
     with self.session(use_gpu=True):
       np.random.seed(1)
@@ -103,6 +104,7 @@ def _GetMatrixBinaryFunctorGradientTest(functor_,
                                         float32_tol_fudge=1.0,
                                         **kwargs_):
 
+  @test_util.run_v1_only('b/120545219')
   def Test(self):
     # TODO(rmlarsen): Debug illegal address bug on CUDA and re-enable
     # GPU test for matrix_solve.
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index abff61f81b..4584a27e62 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -51,26 +51,26 @@ class AbsoluteDifferenceLossTest(test.TestCase):
         losses.absolute_difference(
             self._predictions, self._predictions, weights=None)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeight(self):
     loss = losses.absolute_difference(self._predictions, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(0.0, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLoss(self):
     loss = losses.absolute_difference(self._labels, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(5.5, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
     with self.cached_session():
       self.assertAlmostEqual(5.5 * weights, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.absolute_difference(self._labels, self._predictions,
@@ -148,7 +148,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
       self.assertEquals(loss.op.name, 'softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -158,7 +158,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
       loss = losses.softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(weights * 10.0, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -311,7 +311,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
       self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -321,7 +321,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(weights * 10.0, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -677,13 +677,13 @@ class LogLossTest(test.TestCase):
       with self.assertRaises(ValueError):
         losses.log_loss(self._labels, self._labels, weights=None)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeight(self):
     loss = losses.log_loss(self._labels, self._labels)
     with self.cached_session():
       self.assertAlmostEqual(0.0, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeightWithPlaceholder(self):
     tf_predictions = array_ops.placeholder(
         dtypes.float32, shape=self._np_labels.shape)
@@ -692,14 +692,14 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(
           0.0, loss.eval(feed_dict={tf_predictions: self._np_labels}), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLoss(self):
     loss = losses.log_loss(self._labels, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(-np.sum(self._expected_losses) / 6.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.log_loss(self._labels, self._predictions, weights)
@@ -707,7 +707,7 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.log_loss(self._labels, self._predictions,
@@ -716,7 +716,7 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeightAndPlaceholder(self):
     tf_predictions = array_ops.placeholder(
         dtypes.float32, shape=self._np_predictions.shape)
@@ -728,7 +728,7 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              loss, 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeightAndPlaceholderWithRankOnly(self):
     tf_predictions = array_ops.placeholder(dtypes.float32, shape=[None, None])
     weights = 2.3
@@ -788,7 +788,7 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(-np.sum(expected_losses) / 5.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithMeasurementSpecificWeightsWithPlaceholder(self):
     weights = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3))
     expected_losses = np.multiply(self._expected_losses, weights)
@@ -816,7 +816,7 @@ class LogLossTest(test.TestCase):
     with self.cached_session():
       self.assertAlmostEqual(-np.sum(expected_losses), self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithSampleSpecificWeightsMostZeroWithPlaceholder(self):
     weights = np.array([0, 0, 0, 0, 0, 2]).reshape((2, 3))
     expected_losses = np.multiply(self._expected_losses, weights)
@@ -955,26 +955,26 @@ class MeanSquaredErrorTest(test.TestCase):
           losses.mean_squared_error(predictions=constant_op.constant(0),
                                     labels=constant_op.constant(0)).eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeight(self):
     loss = losses.mean_squared_error(self._predictions, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(0.0, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLoss(self):
     loss = losses.mean_squared_error(self._labels, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(49.5, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
     with self.cached_session():
       self.assertAlmostEqual(49.5 * weights, self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.mean_squared_error(self._labels, self._predictions,
@@ -1068,12 +1068,12 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
       self.assertAlmostEqual(
           expected_loss, dynamic_inputs_op.eval(feed_dict=feed_dict), places=3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeight(self):
     self._test_valid_weights(
         self._labels, self._labels, expected_loss=0.0)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLoss(self):
     self._test_valid_weights(
         self._labels, self._predictions,
@@ -1104,7 +1104,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
           np_grad = self.evaluate(grad)
           self.assertFalse(np.isnan(np_grad).any())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     weight = 2.3
     self._test_valid_weights(
@@ -1112,7 +1112,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
         expected_loss=weight * np.sum(self._expected_losses),
         weights=weight)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.mean_pairwise_squared_error(
@@ -1179,7 +1179,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
             weights_placeholder: weights,
         })
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInvalid3dWeighted2x0(self):
     labels = np.array([
         [[1, 9, 2], [12, 11, 10], [9, 8, 7]],
diff --git a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
index 129ea40dfe..fdb7e4a1a4 100644
--- a/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_band_part_op_test.py
@@ -23,6 +23,7 @@ from tensorflow.python.client import session
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradient_checker
@@ -44,6 +45,7 @@ class MatrixBandPartTest(test_lib.TestCase):
 
 def _GetMatrixBandPartTest(dtype_, batch_shape_, shape_):
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     mat = np.ones(shape_).astype(dtype_)
     batch_mat = np.tile(mat, batch_shape_ + (1, 1))
@@ -73,6 +75,7 @@ class MatrixBandPartGradTest(test_lib.TestCase):
 
 def _GetMatrixBandPartGradTest(dtype_, batch_shape_, shape_):
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     shape = batch_shape_ + shape_
     x = constant_op.constant(np.random.rand(*shape), dtype=dtype_)
diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
index b0bce6a1b9..682ac12adc 100644
--- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py
@@ -84,6 +84,7 @@ class LogarithmOpTest(test.TestCase):
     # Complex batch
     self._verifyLogarithmComplex(self._makeBatch(matrix1, matrix2))
 
+  @test_util.run_v1_only("b/120545219")
   def testNonSquareMatrix(self):
     # When the logarithm of a non-square matrix is attempted we should return
     # an error
@@ -91,6 +92,7 @@ class LogarithmOpTest(test.TestCase):
       gen_linalg_ops.matrix_logarithm(
           np.array([[1., 2., 3.], [3., 4., 5.]], dtype=np.complex64))
 
+  @test_util.run_v1_only("b/120545219")
   def testWrongDimensions(self):
     # The input to the logarithm should be at least a 2-dimensional tensor.
     tensor3 = constant_op.constant([1., 2.], dtype=dtypes.complex64)
@@ -121,6 +123,7 @@ class LogarithmOpTest(test.TestCase):
             size=np.prod(shape)).reshape(shape).astype(np.complex128)
         self._verifyLogarithmComplex(matrix)
 
+  @test_util.run_v1_only("b/120545219")
   def testConcurrentExecutesWithoutError(self):
     with self.session(use_gpu=True) as sess:
       matrix1 = math_ops.cast(
diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
index 42fd95d311..463477a6a2 100644
--- a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
@@ -135,7 +135,7 @@ class MatrixSolveLsOpTest(test_lib.TestCase):
       self.assertEqual(np_ans.shape, tf_ans_val.shape)
       self.assertAllClose(np_ans, tf_ans_val, atol=2 * tol, rtol=2 * tol)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWrongDimensions(self):
     # The matrix and right-hand sides should have the same number of rows.
     with self.session(use_gpu=True):
@@ -144,7 +144,6 @@ class MatrixSolveLsOpTest(test_lib.TestCase):
       with self.assertRaises(ValueError):
         linalg_ops.matrix_solve_ls(matrix, rhs)
 
-  @test_util.run_deprecated_v1
   def testEmpty(self):
     full = np.array([[1., 2.], [3., 4.], [5., 6.]])
     empty0 = np.empty([3, 0])
@@ -164,7 +163,7 @@ class MatrixSolveLsOpTest(test_lib.TestCase):
             linalg_ops.matrix_solve_ls(empty1, empty1, fast=fast))
         self.assertEqual(tf_ans.shape, (2, 2))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBatchResultSize(self):
     # 3x3x3 matrices, 3x3x1 right-hand sides.
     matrix = np.array([1., 2., 3., 4., 5., 6., 7., 8., 9.] * 3).reshape(3, 3, 3)
diff --git a/tensorflow/python/kernel_tests/matrix_square_root_op_test.py b/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
index 1e2109b8c4..3edb390c72 100644
--- a/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
+++ b/tensorflow/python/kernel_tests/matrix_square_root_op_test.py
@@ -90,17 +90,20 @@ class SquareRootOpTest(test.TestCase):
     self._verifySquareRootReal(np.empty([0, 2, 2]))
     self._verifySquareRootReal(np.empty([2, 0, 0]))
 
+  @test_util.run_v1_only("b/120545219")
   def testWrongDimensions(self):
     # The input to the square root should be at least a 2-dimensional tensor.
     tensor = constant_op.constant([1., 2.])
     with self.assertRaises(ValueError):
       gen_linalg_ops.matrix_square_root(tensor)
 
+  @test_util.run_v1_only("b/120545219")
   def testNotSquare(self):
     with self.assertRaises(ValueError):
       tensor = constant_op.constant([[1., 0., -1.], [-1., 1., 0.]])
       self.evaluate(gen_linalg_ops.matrix_square_root(tensor))
 
+  @test_util.run_v1_only("b/120545219")
   def testConcurrentExecutesWithoutError(self):
     with test_util.use_gpu():
       matrix1 = random_ops.random_normal([5, 5], seed=42)
diff --git a/tensorflow/python/kernel_tests/norm_op_test.py b/tensorflow/python/kernel_tests/norm_op_test.py
index 5ff0c58bf1..20b9ad95c8 100644
--- a/tensorflow/python/kernel_tests/norm_op_test.py
+++ b/tensorflow/python/kernel_tests/norm_op_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
 from tensorflow.python.platform import test as test_lib
@@ -35,6 +36,7 @@ def _AddTest(test, test_name, fn):
 
 class NormOpTest(test_lib.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testBadOrder(self):
     matrix = [[0., 1.], [2., 3.]]
     for ord_ in "fro", -7, -1.1, 0:
@@ -52,6 +54,7 @@ class NormOpTest(test_lib.TestCase):
                                    "'ord' must be a supported matrix norm"):
         linalg_ops.norm(matrix, ord=ord_, axis=[-2, -1])
 
+  @test_util.run_v1_only("b/120545219")
   def testInvalidAxis(self):
     matrix = [[0., 1.], [2., 3.]]
     for axis_ in [], [1, 2, 3], [[1]], [[1], [2]], [3.1415], [1, 1]:
@@ -78,6 +81,7 @@ def _GetNormOpTest(dtype_, shape_, ord_, axis_, keep_dims_, use_static_shape_):
         tf_norm_val = sess.run(tf_norm, feed_dict={tf_matrix: matrix})
     self.assertAllClose(np_norm, tf_norm_val, rtol=1e-5, atol=1e-5)
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     is_matrix_norm = (isinstance(axis_, tuple) or
                       isinstance(axis_, list)) and len(axis_) == 2
diff --git a/tensorflow/python/kernel_tests/numerics_test.py b/tensorflow/python/kernel_tests/numerics_test.py
index 5751f3fe76..f13f9d6806 100644
--- a/tensorflow/python/kernel_tests/numerics_test.py
+++ b/tensorflow/python/kernel_tests/numerics_test.py
@@ -64,9 +64,9 @@ class VerifyTensorAllFiniteTest(test.TestCase):
         self.evaluate(t_verified)
 
 
+@test_util.run_v1_only("b/120545219")
 class NumericsTest(test.TestCase):
 
-  @test_util.run_deprecated_v1
   def testInf(self):
     with self.session(graph=ops.Graph()):
       t1 = constant_op.constant(1.0)
@@ -77,7 +77,6 @@ class NumericsTest(test.TestCase):
       with self.assertRaisesOpError("Inf"):
         self.evaluate(a)
 
-  @test_util.run_deprecated_v1
   def testNaN(self):
     with self.session(graph=ops.Graph()):
       t1 = constant_op.constant(0.0)
@@ -88,7 +87,6 @@ class NumericsTest(test.TestCase):
       with self.assertRaisesOpError("NaN"):
         self.evaluate(a)
 
-  @test_util.run_deprecated_v1
   def testBoth(self):
     with self.session(graph=ops.Graph()):
       t1 = constant_op.constant([1.0, 0.0])
@@ -107,7 +105,6 @@ class NumericsTest(test.TestCase):
       self.assertAllEqual(np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]), value)
       self.assertEqual([2, 3], checked.get_shape())
 
-  @test_util.run_deprecated_v1
   def testControlFlowCond(self):
     predicate = array_ops.placeholder(dtypes.bool, shape=[])
     _ = control_flow_ops.cond(predicate,
@@ -120,7 +117,6 @@ class NumericsTest(test.TestCase):
         r"or `tf.while_loop\(\)`\."):
       numerics.add_check_numerics_ops()
 
-  @test_util.run_deprecated_v1
   def testControlFlowWhile(self):
     predicate = array_ops.placeholder(dtypes.bool, shape=[])
     _ = control_flow_ops.while_loop(lambda _: predicate,
diff --git a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
index b4818360d5..e3999695d0 100644
--- a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
@@ -29,11 +29,13 @@ from tensorflow.python.framework import dtypes as dtypes_lib
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_v1_only("b/120545219")
 class PaddingFIFOQueueTest(test.TestCase):
 
   def testConstructor(self):
@@ -1393,6 +1395,7 @@ class PaddingFIFOQueueTest(test.TestCase):
     with self.assertRaisesOpError("was cancelled"):
       self.evaluate(enqueue_many_op)
 
+  @test_util.run_deprecated_v1
   def testResetOfBlockingOperation(self):
     with self.cached_session() as sess:
       q_empty = data_flow_ops.PaddingFIFOQueue(5, dtypes_lib.float32, ((),))
diff --git a/tensorflow/python/kernel_tests/partitioned_variables_test.py b/tensorflow/python/kernel_tests/partitioned_variables_test.py
index 48655391fa..da79b4ecfc 100644
--- a/tensorflow/python/kernel_tests/partitioned_variables_test.py
+++ b/tensorflow/python/kernel_tests/partitioned_variables_test.py
@@ -412,7 +412,7 @@ class PartitionedVariablesTestCase(test.TestCase):
   def testResourceName(self):
     self._testNameHelper(use_resource=True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRandomInitValue(self):
     with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([200, 40]))
@@ -430,7 +430,7 @@ class PartitionedVariablesTestCase(test.TestCase):
           "200 40 0,200:36,4"
       ])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRandomInitUnevenPartitions(self):
     with self.cached_session():
       rnd = variables.Variable(
@@ -469,7 +469,7 @@ class PartitionedVariablesTestCase(test.TestCase):
         if i < len(save_specs):
           self._TestSaveSpec(vs, save_specs[i])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testDegenerate(self):
     with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([10, 43]))
@@ -481,7 +481,7 @@ class PartitionedVariablesTestCase(test.TestCase):
       self.assertAllClose(rnd, val)
       self._TestSaveSpec(vs, ["10 43 0,10:0,43"])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSliceSizeOne(self):
     with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([10, 43]))
diff --git a/tensorflow/python/kernel_tests/priority_queue_test.py b/tensorflow/python/kernel_tests/priority_queue_test.py
index 9be682ea52..49ec7ee483 100644
--- a/tensorflow/python/kernel_tests/priority_queue_test.py
+++ b/tensorflow/python/kernel_tests/priority_queue_test.py
@@ -27,6 +27,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import data_flow_ops
 import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
@@ -35,6 +36,7 @@ from tensorflow.python.platform import test
 
 class PriorityQueueTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertReadOnceSorts(self):
     with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
@@ -112,6 +114,7 @@ class PriorityQueueTest(test.TestCase):
         missed.remove((dv0, dv1))
       self.assertEqual(missed, set())
 
+  @test_util.run_v1_only("b/120545219")
   def testRoundTripFillsCapacityMultiThreadedEnqueueAndDequeue(self):
     with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(10, (dtypes.int64), (()))
@@ -267,6 +270,7 @@ class PriorityQueueTest(test.TestCase):
         missed.remove((dv0, dv1))
       self.assertEqual(missed, set())
 
+  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertOnceReadOnceSorts(self):
     with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
@@ -288,6 +292,7 @@ class PriorityQueueTest(test.TestCase):
       for e, dv0, dv1 in zip(deq_elem, deq_value_0, deq_value_1):
         self.assertTrue((dv0, dv1) in allowed[e])
 
+  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertOnceReadManySorts(self):
     with self.cached_session():
       q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (()))
@@ -296,6 +301,7 @@ class PriorityQueueTest(test.TestCase):
       deq_values = np.hstack((q.dequeue_many(100)[0].eval() for _ in range(10)))
       self.assertAllEqual(deq_values, sorted(elem))
 
+  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertOnceReadOnceLotsSorts(self):
     with self.cached_session():
       q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (()))
@@ -311,6 +317,7 @@ class PriorityQueueTest(test.TestCase):
       with self.assertRaises(TypeError):
         q.enqueue_many((["a", "b", "c"], ["a", "b", "c"])).run()
 
+  @test_util.run_v1_only("b/120545219")
   def testInsertingNonScalarFails(self):
     with self.cached_session() as sess:
       input_priority = array_ops.placeholder(dtypes.int64)
diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py
index 1f3f02a9f0..482633d539 100644
--- a/tensorflow/python/kernel_tests/py_func_test.py
+++ b/tensorflow/python/kernel_tests/py_func_test.py
@@ -102,6 +102,7 @@ class PyFuncTest(test.TestCase):
           script_ops.eager_py_func(np_func, [x, y], [dtypes.float32]))
       self.assertEqual(z[0], np_func(1.0, 2.0).astype(np.float32))
 
+  @test_util.run_v1_only("b/120545219")
   def testArray(self):
     with self.cached_session():
       x = constant_op.constant([1.0, 2.0], dtypes.float64)
@@ -168,6 +169,7 @@ class PyFuncTest(test.TestCase):
                              (dtypes.float64, dtypes.float64)))
       self.assertAllClose(y, [0.0, 1.0])
 
+  @test_util.run_v1_only("b/120545219")
   def testStrings(self):
 
     def read_fixed_length_numpy_strings():
@@ -185,6 +187,7 @@ class PyFuncTest(test.TestCase):
           script_ops.py_func(read_and_return_strings, [x, y], dtypes.string))
       self.assertAllEqual(z, [b"hello there", b"hi there"])
 
+  @test_util.run_v1_only("b/120545219")
   def testStringsAreConvertedToBytes(self):
 
     def read_fixed_length_numpy_strings():
@@ -202,6 +205,7 @@ class PyFuncTest(test.TestCase):
           script_ops.py_func(read_and_return_strings, [x, y], dtypes.string))
       self.assertAllEqual(z, [b"hello there", b"hi there"])
 
+  @test_util.run_v1_only("b/120545219")
   def testObjectArraysAreConvertedToBytes(self):
 
     def read_object_array():
@@ -217,12 +221,14 @@ class PyFuncTest(test.TestCase):
       z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string])
       self.assertListEqual(list(z.eval()), [b"hello there", b"hi ya"])
 
+  @test_util.run_v1_only("b/120545219")
   def testStringPadding(self):
     correct = [b"this", b"is", b"a", b"test"]
     with self.cached_session():
       s, = script_ops.py_func(lambda: [correct], [], [dtypes.string])
       self.assertAllEqual(s.eval(), correct)
 
+  @test_util.run_v1_only("b/120545219")
   def testStringPaddingAreConvertedToBytes(self):
     inp = ["this", "is", "a", "test"]
     correct = [b"this", b"is", b"a", b"test"]
@@ -230,6 +236,7 @@ class PyFuncTest(test.TestCase):
       s, = script_ops.py_func(lambda: [inp], [], [dtypes.string])
       self.assertAllEqual(s.eval(), correct)
 
+  @test_util.run_v1_only("b/120545219")
   def testLarge(self):
     with self.cached_session() as sess:
       x = array_ops.zeros([1000000], dtype=np.float32)
@@ -243,6 +250,7 @@ class PyFuncTest(test.TestCase):
       x = self.evaluate(script_ops.py_func(lambda: 42.0, [], dtypes.float64))
       self.assertAllClose(x, 42.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testAlias(self):
     with self.cached_session():
       np_array = np.array([1.0, 2.0], dtype=np.float32)
@@ -251,6 +259,7 @@ class PyFuncTest(test.TestCase):
       value.op.run()
       self.assertAllEqual(np_array, [1.0, 2.0])
 
+  @test_util.run_v1_only("b/120545219")
   def testReturnUnicodeString(self):
     with self.cached_session():
       correct = u"你好 世界"
@@ -261,6 +270,7 @@ class PyFuncTest(test.TestCase):
       z, = script_ops.py_func(unicode_string, [], [dtypes.string])
       self.assertEqual(z.eval(), correct.encode("utf8"))
 
+  @test_util.run_v1_only("b/120545219")
   def testBadNumpyReturnType(self):
     with self.cached_session():
 
@@ -274,6 +284,7 @@ class PyFuncTest(test.TestCase):
                                    "Unsupported numpy type"):
         self.evaluate(y)
 
+  @test_util.run_v1_only("b/120545219")
   def testBadReturnType(self):
     with self.cached_session():
 
@@ -287,6 +298,7 @@ class PyFuncTest(test.TestCase):
                                    "Unsupported object type"):
         self.evaluate(z)
 
+  @test_util.run_v1_only("b/120545219")
   def testReturnInput(self):
     with self.cached_session():
 
@@ -321,6 +333,7 @@ class PyFuncTest(test.TestCase):
       self.assertEqual(self.evaluate(x), 0)
       self.assertEqual(self.evaluate(x), 0)
 
+  @test_util.run_v1_only("b/120545219")
   def testGradientFunction(self):
     # Input to tf.py_func is necessary, otherwise get_gradient_function()
     # returns None per default.
@@ -330,6 +343,7 @@ class PyFuncTest(test.TestCase):
     self.assertEqual(None, ops.get_gradient_function(x.op))
     self.assertEqual(None, ops.get_gradient_function(y.op))
 
+  @test_util.run_v1_only("b/120545219")
   def testCOrder(self):
     with self.cached_session():
       val = [[1, 2], [3, 4]]
@@ -337,6 +351,7 @@ class PyFuncTest(test.TestCase):
                               [dtypes.int64])
       self.assertAllEqual(val, self.evaluate(x))
 
+  @test_util.run_v1_only("b/120545219")
   def testParallel(self):
     # Tests that tf.py_func's can run in parallel if they release the GIL.
     with self.cached_session() as session:
@@ -382,6 +397,7 @@ class PyFuncTest(test.TestCase):
       self.assertIsNone(ret)
       self.assertAllEqual([3], s.value)
 
+  @test_util.run_v1_only("b/120545219")
   def testNoReturnValueStateless(self):
 
     def do_nothing(unused_x):
@@ -420,6 +436,7 @@ class PyFuncTest(test.TestCase):
     with self.assertRaisesWithPredicateMatch(tf_exp, expected_error_check):
       self.evaluate(f)
 
+  @test_util.run_v1_only("b/120545219")
   def testExceptionHandling(self):
     with self.cached_session():
       self._testExceptionHandling(ValueError, errors.InvalidArgumentError)
@@ -514,7 +531,7 @@ class PyFuncTest(test.TestCase):
       self.assertAllEqual(ret, [[3.0], [3.0], [3.0]])
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testEagerExceptionHandling(self):
     with test_util.device(use_gpu=True):
       self._testExceptionHandling(
@@ -534,7 +551,7 @@ class PyFuncTest(test.TestCase):
       self._testExceptionHandling(WeirdError, errors.UnknownError, eager=True)
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testEagerReturningVariableRaisesError(self):
     def return_variable():
       return resource_variable_ops.ResourceVariable(0.0)
@@ -558,6 +575,7 @@ class PyFuncTest(test.TestCase):
     dy_dx = tape.gradient(y, x)
     self.assertEqual(self.evaluate(dy_dx), 6.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testEagerGradientGraph(self):
 
     def f(x):
@@ -568,6 +586,7 @@ class PyFuncTest(test.TestCase):
     dy_dx = gradients_impl.gradients(y, x)[0]
     self.assertEqual(self.evaluate(dy_dx), 6.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testEagerGradientGraphTwoOutputs(self):
 
     def f(x, y):
@@ -597,6 +616,7 @@ class PyFuncTest(test.TestCase):
     self.assertEqual(self.evaluate(dz_dx), 6.0)
     self.assertEqual(self.evaluate(dz_dy), 8.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testEagerGradientGraphMultipleArgs(self):
 
     def f(x, y):
@@ -610,6 +630,7 @@ class PyFuncTest(test.TestCase):
     self.assertEqual(self.evaluate(dz_dx), 6.0)
     self.assertEqual(self.evaluate(dz_dy), 8.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testEagerGradientGraphLogHuber(self):
 
     def log_huber(x, m):
@@ -631,6 +652,7 @@ class PyFuncTest(test.TestCase):
       self.assertEqual(y, 1.0)
       self.assertEqual(dy_dx, 2.0)
 
+  @test_util.run_v1_only("b/120545219")
   def testEagerRespectsDevicePlacmentOfOp(self):
 
     def f(x):
diff --git a/tensorflow/python/kernel_tests/qr_op_test.py b/tensorflow/python/kernel_tests/qr_op_test.py
index 5e9767f20c..5adb95c7d6 100644
--- a/tensorflow/python/kernel_tests/qr_op_test.py
+++ b/tensorflow/python/kernel_tests/qr_op_test.py
@@ -40,6 +40,7 @@ def _AddTest(test_class, op_name, testcase_name, fn):
 
 class QrOpTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testWrongDimensions(self):
     # The input to qr should be a tensor of at least rank 2.
     scalar = constant_op.constant(1.)
@@ -115,6 +116,7 @@ def _GetQrOpTest(dtype_, shape_, full_matrices_, use_static_shape_):
       tol = 1e-14
     self.assertAllClose(identity, xx, atol=tol)
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     np.random.seed(1)
     x_np = np.random.uniform(
@@ -163,6 +165,7 @@ class QrGradOpTest(test.TestCase):
 
 def _GetQrGradOpTest(dtype_, shape_, full_matrices_):
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     np.random.seed(42)
     a = np.random.uniform(low=-1.0, high=1.0, size=shape_).astype(dtype_)
diff --git a/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py b/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
index ed4f5434d9..dd814a22b4 100644
--- a/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
+++ b/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
@@ -29,11 +29,13 @@ from tensorflow.python.framework import dtypes as dtypes_lib
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
 
+@test_util.run_v1_only("b/120545219")
 class RandomShuffleQueueTest(test.TestCase):
 
   def setUp(self):
@@ -1415,6 +1417,7 @@ class RandomShuffleQueueTest(test.TestCase):
 
       self.assertItemsEqual(elem, results)
 
+  @test_util.run_v1_only("b/120545219")
   def testBigDequeueMany(self):
     with self.cached_session() as sess:
       q = data_flow_ops.RandomShuffleQueue(2, 0, dtypes_lib.int32, ((),))
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 3992d6bdfb..433957fd1d 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -628,7 +628,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
           variable_def=other_v_def)
       self.assertTrue(other_v_prime._cached_value is not None)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testVariableDefInitializedInstances(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
       v_def = resource_variable_ops.ResourceVariable(
@@ -733,7 +733,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     self.assertEqual(0.0, self.evaluate(v.value()))
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testDestroyResource(self):
     v = resource_variable_ops.ResourceVariable(3.0, name="var0")
     self.evaluate(variables.global_variables_initializer())
@@ -792,7 +792,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       with self.assertRaises(ValueError):
         _ = w.value().op.get_attr("_class")
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSharedName(self):
     with self.cached_session():
       v = resource_variable_ops.ResourceVariable(300.0, name="var4")
@@ -849,7 +849,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       v.initializer.run(feed_dict={v.initial_value: 3.0})
       self.assertEqual(3.0, v.value().eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testControlFlowInitialization(self):
     """Expects an error if an initializer is in a control-flow scope."""
 
@@ -986,7 +986,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(self.evaluate(v.assign_add(1)), [1, 2, 3, 4])
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCopyToGraphUninitialized(self):
     v = resource_variable_ops.ResourceVariable([0, 1, 2, 3])
     copy_to_graph = ops.Graph()
diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index 3bc457f8fb..a49496e4ef 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -262,7 +262,7 @@ class RNNTest(test.TestCase):
       rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32, sequence_length=[4])
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTensorArrayStateIsAccepted(self):
     cell = TensorArrayStateRNNCell()
     in_eager_mode = context.executing_eagerly()
diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
index c1241ba87e..8510a08f0c 100644
--- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
+++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py
@@ -217,7 +217,7 @@ class StatefulScatterNdTest(test.TestCase):
   def testVariableRankAdd(self):
     self._VariableRankTests(_NumpyAdd, state_ops.scatter_nd_add)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testVariableRankSub(self):
     self._VariableRankTests(_NumpySub, state_ops.scatter_nd_sub)
 
@@ -235,7 +235,7 @@ class StatefulScatterNdTest(test.TestCase):
         self._VariableRankTest(
             np_scatter, tf_scatter, vtype, itype, repeat_indices=True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testScatterRepeatIndices(self):
     """This tests scatter_add using indices that repeat."""
     self._ScatterRepeatIndicesTest(_NumpyAdd, state_ops.scatter_nd_add)
@@ -257,7 +257,7 @@ class StatefulScatterNdTest(test.TestCase):
   #     session.run([update0, update1])
   #     self.assertAllEqual([False, True], self.evaluate(var))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testScatterOutOfRangeCpu(self):
     # TODO(simister): Re-enable once binary size increase due to
     # scatter_nd ops is under control.
@@ -294,7 +294,7 @@ class StatefulScatterNdTest(test.TestCase):
         state_ops.scatter_nd_update(ref, indices,
                                     updates).get_shape().as_list(), shape)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testResVarInvalidOutputShape(self):
     res = variables.Variable(
         initial_value=lambda: array_ops.zeros(shape=[], dtype=dtypes.float32),
@@ -509,7 +509,7 @@ class ScatterNdTest(test.TestCase):
         ValueError, "Indices and updates specified for empty output shape"):
       self.scatter_nd(indices, updates, shape)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testEmptyOutputShape2(self):
     indices = array_ops.placeholder(dtypes.int32, shape=None)
     updates = array_ops.placeholder(dtypes.int32, shape=None)
@@ -717,6 +717,7 @@ class ScatterNdTensorTest(test.TestCase):
     self.assertAllEqual(subbed,
                         constant_op.constant([1, -10, 1, -9, -8, 1, 1, -11]))
 
+  @test_util.run_v1_only("b/120545219")
   def testUpdateAddSubGradients(self):
 
     with self.cached_session():
diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
index ce782dbc28..47b22ec296 100644
--- a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
+++ b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes as dtypes_lib
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import linalg_ops
@@ -39,6 +40,7 @@ def _AddTest(test_class, op_name, testcase_name, fn):
 
 class SelfAdjointEigTest(test.TestCase):
 
+  @test_util.run_deprecated_v1
   def testWrongDimensions(self):
     # The input to self_adjoint_eig should be a tensor of
     # at least rank 2.
@@ -49,6 +51,7 @@ class SelfAdjointEigTest(test.TestCase):
     with self.assertRaises(ValueError):
       linalg_ops.self_adjoint_eig(vector)
 
+  @test_util.run_deprecated_v1
   def testConcurrentExecutesWithoutError(self):
     all_ops = []
     with self.session(use_gpu=True) as sess:
diff --git a/tensorflow/python/kernel_tests/session_ops_test.py b/tensorflow/python/kernel_tests/session_ops_test.py
index dc663cb091..bc5d8e8151 100644
--- a/tensorflow/python/kernel_tests/session_ops_test.py
+++ b/tensorflow/python/kernel_tests/session_ops_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import session_ops
@@ -28,6 +29,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
+@test_util.run_v1_only("b/120545219")
 class SessionOpsTest(test.TestCase):
 
   def testHandleBasic(self):
@@ -232,6 +234,7 @@ class SessionOpsTest(test.TestCase):
                      b_p: b_handle.handle})
       self.assertEqual(3.0, c_handle.eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testFeedOneHandleDirectly(self):
     with self.cached_session() as sess:
       a = constant_op.constant(10.0)
@@ -243,6 +246,7 @@ class SessionOpsTest(test.TestCase):
 
       self.assertAllClose(2500.0, sess.run(d, feed_dict={c: h_c}))
 
+  @test_util.run_v1_only("b/120545219")
   def testDirectHandleFeedOverlappingWithFetches(self):
     with self.cached_session() as sess:
       a = constant_op.constant(10.0)
@@ -283,6 +287,7 @@ class SessionOpsTest(test.TestCase):
       self.assertAllClose(48.0, sess.run(e, feed_dict={c: h_c, d: h_d}))
       self.assertAllClose(-48.0, sess.run(e, feed_dict={c: h_d, d: h_c}))
 
+  @test_util.run_v1_only("b/120545219")
   def testFeedHandleToVariableDirectly(self):
     with self.cached_session() as sess:
       a = variables.Variable(12.0)
diff --git a/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py b/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py
index 4cb6cedee9..e0ce06418a 100644
--- a/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py
+++ b/tensorflow/python/kernel_tests/signal/reconstruction_ops_test.py
@@ -66,6 +66,7 @@ class ReconstructionOpsTest(test.TestCase):
 
       self.assertAllClose(output, expected_output)
 
+  @test_util.run_deprecated_v1
   def test_unknown_shapes(self):
     # This test uses placeholders and does not work in eager mode.
     if context.executing_eagerly():
@@ -85,6 +86,7 @@ class ReconstructionOpsTest(test.TestCase):
 
       self.assertAllClose(output, expected_output)
 
+  @test_util.run_deprecated_v1
   def test_unknown_rank(self):
     # This test uses placeholders and does not work in eager mode.
     if context.executing_eagerly():
@@ -104,6 +106,7 @@ class ReconstructionOpsTest(test.TestCase):
 
       self.assertAllClose(output, expected_output)
 
+  @test_util.run_deprecated_v1
   def test_fast_path(self):
     # This test uses tensor names and does not work in eager mode.
     if context.executing_eagerly():
diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
index 275c86e534..4a967b6562 100644
--- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py
@@ -267,7 +267,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       self.assertAllEqual(val.values, [[5, 5], [0, 20], [30, 0]])
       self.assertAllEqual(val.dense_shape, [-1, 2])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelApplyGradMean(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -299,7 +299,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
           np.array([[expected_val, 0], [0, expected_val]]).astype(np.float32),
           val, sess)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelApplyGradSum(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -334,7 +334,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
           np.array([[expected_val, 0], [0, expected_val]]).astype(np.float32),
           val, sess)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testParallelTakeGrad(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -374,7 +374,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
         self._assertEqual_nparray(
             np.array([[0, 0], [elems[i], 0]]), results[i], sess)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAccumulatorApplyAndBlockingTake(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -410,7 +410,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
     with self.assertRaisesOpError("was cancelled"):
       self.evaluate(takeg_op)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAccumulatorCancel(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -430,7 +430,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
 
       takeg_thread.join()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonVectorIndices(self):
     with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -443,7 +443,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
             grad_indices=[[0, 1], [1, 0]],
             grad_values=np.array([1, 2]).astype(np.float32)).run()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testZeroDimensionValues(self):
     with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -454,7 +454,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
         q.apply_grad(
             grad_indices=[0], grad_values=np.array(1).astype(np.float32)).run()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWrongNonEmptyInputValues(self):
     with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -466,7 +466,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
             grad_indices=[0, 1],
             grad_values=np.array([[0, 1, 1]]).astype(np.float32)).run()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testDynamicNonVectorIndices(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -486,7 +486,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
                      x_values: np.array([1, 2]).astype(np.float32)
                  })
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testDynamicWrongNonEmptyInputValues(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -505,7 +505,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
                      x_values: np.array([[0, 1, 1]]).astype(np.float32)
                  })
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testEmptyShapeApply(self):
     with self.cached_session():
       q = data_flow_ops.SparseConditionalAccumulator(
@@ -531,7 +531,7 @@ class IndexedSlicesConditionalAccumulatorTest(test.TestCase):
       q.apply_grad(grad_indices=[0], grad_values=[1.0], grad_shape=[]).run()
       q.apply_grad(grad_indices=[0], grad_values=[1.0]).run()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testValidateShape(self):
     with self.cached_session() as sess:
       q = data_flow_ops.SparseConditionalAccumulator(
diff --git a/tensorflow/python/kernel_tests/stack_ops_test.py b/tensorflow/python/kernel_tests/stack_ops_test.py
index d50f3f4680..1930d2484f 100644
--- a/tensorflow/python/kernel_tests/stack_ops_test.py
+++ b/tensorflow/python/kernel_tests/stack_ops_test.py
@@ -96,7 +96,7 @@ class StackOpTest(test.TestCase):
           c1, b1, [r, v], [r.get_shape(), tensor_shape.unknown_shape()])
       self.assertAllClose(np.ones(2000) * 10.0, self.evaluate(ry))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testStackWhileSwap(self):
     self._testStackWhileSwap(use_gpu=False)
     self._testStackWhileSwap(use_gpu=True)
@@ -248,7 +248,7 @@ class StackOpRefTest(test.TestCase):
           c1, b1, [r, v], [r.get_shape(), tensor_shape.unknown_shape()])
       self.assertAllClose(np.ones(2000) * 10.0, self.evaluate(ry))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testStackWhileSwap(self):
     self._testStackWhileSwap(use_gpu=False)
     self._testStackWhileSwap(use_gpu=True)
diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
index 94155fd117..cfa9f122d1 100644
--- a/tensorflow/python/kernel_tests/svd_op_test.py
+++ b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -22,6 +22,7 @@ import numpy as np
 
 from tensorflow.python import tf2
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import linalg_ops
@@ -39,6 +40,7 @@ def _AddTest(test_class, op_name, testcase_name, fn):
 
 class SvdOpTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testWrongDimensions(self):
     # The input to svd should be a tensor of at least rank 2.
     scalar = constant_op.constant(1.)
@@ -50,6 +52,7 @@ class SvdOpTest(test.TestCase):
                                  "Shape must be at least rank 2 but is rank 1"):
       linalg_ops.svd(vector)
 
+  @test_util.run_v1_only("b/120545219")
   def testConcurrentExecutesWithoutError(self):
     with self.session(use_gpu=True) as sess:
       all_ops = []
@@ -126,6 +129,7 @@ def _GetSvdOpTest(dtype_, shape_, use_static_shape_, compute_uv_,
     identity = array_ops.matrix_band_part(array_ops.ones_like(xx), 0, 0)
     self.assertAllClose(identity, xx, atol=tol)
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     is_complex = dtype_ in (np.complex64, np.complex128)
     is_single = dtype_ in (np.float32, np.complex64)
@@ -214,6 +218,7 @@ def _GetSvdGradOpTest(dtype_, shape_, compute_uv_, full_matrices_):
     tf_v *= phase[..., :n]
     return tf_s, tf_u, tf_v
 
+  @test_util.run_v1_only("b/120545219")
   def Test(self):
     np.random.seed(42)
     a = np.random.uniform(low=-1.0, high=1.0, size=shape_).astype(dtype_)
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 884c04eb7a..88625841bc 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -309,7 +309,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArraySplitRead(dtypes.string)
 
   @test_util.disable_control_flow_v2("v2 does not support TensorArray.grad.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("v2 does not support TensorArray.grad.")
   def testSkipEagerTensorGradArrayWriteRead(self):
     with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
@@ -364,7 +364,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual([2.0], session.run(g2))
 
   @test_util.disable_control_flow_v2("v2 does not support TensorArray.grad.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("v2 does not support TensorArray.grad.")
   def testSkipEagerTensorGradArrayDynamicWriteRead(self):
     with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
@@ -407,7 +407,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(3, g_vs)
 
   @test_util.disable_control_flow_v2("v2 does not support TensorArray.grad.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("v2 does not support TensorArray.grad.")
   def testSkipEagerTensorGradAccessTwiceReceiveSameObject(self):
     with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
@@ -424,7 +424,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(t_g_ta_0, t_g_ta_1)
       self.assertAllEqual([[4.0, 5.0]], d_r1_0)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTensorArrayWriteWrongIndexOrDataTypeFails(self):
     with self.session(use_gpu=True):
       ta = _make_ta(3, "foo", dtype=dtypes.float32)
@@ -458,7 +458,7 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaisesOpError(error_msg):
         self.evaluate(ta.write(3, 3.0).flow)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTensorArrayReadWrongIndexOrDataTypeFails(self):
     with self.session(use_gpu=True):
       ta = _make_ta(3, "foo", dtype=dtypes.float32)
@@ -493,7 +493,7 @@ class TensorArrayTest(test.TestCase):
         self.evaluate(ta.read(3))
 
   @test_util.disable_control_flow_v2("v2 allows multiple writes.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("v2 allows multiple writes.")
   def testSkipEagerTensorArrayWriteMultipleFails(self):
     with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
@@ -504,7 +504,7 @@ class TensorArrayTest(test.TestCase):
           "it has already been written to."):
         self.evaluate(ta.write(2, 3.0).write(2, 3.0).flow)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTensorArrayConcatIncompatibleShapesFails(self):
     with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
@@ -536,7 +536,7 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaisesOpError("shape"):
         self.evaluate(w3.concat())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTensorArraySplitIncompatibleShapesFails(self):
     with self.session(use_gpu=True):
       in_eager_mode = context.executing_eagerly()
@@ -611,14 +611,14 @@ class TensorArrayTest(test.TestCase):
         wb1_grad.flow.eval()
 
   @test_util.disable_control_flow_v2("v2 does not support TensorArray.grad.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("v2 does not support TensorArray.grad.")
   def testSkipEagerTensorArrayWriteGradientAddMultipleAdds(self):
     for dtype in (dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64,
                   dtypes.complex64, dtypes.complex128):
       self._testTensorArrayWriteGradientAddMultipleAdds(dtype)
 
   @test_util.disable_control_flow_v2("Low level legacy TA op test.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("Low level legacy TA op test.")
   def testSkipEagerTensorArrayGradWithShapeKnownElementShape(self):
     with self.session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
@@ -649,7 +649,7 @@ class TensorArrayTest(test.TestCase):
                           sess.run(read_value, feed_dict={value: fed_value}))
 
   @test_util.disable_control_flow_v2("Low level legacy TA op test.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("Low level legacy TA op test.")
   def testSkipEagerTensorArrayGradWithShapeUnknownElementShape(self):
     with self.session(use_gpu=True) as sess:
       ta = tensor_array_ops.TensorArray(
@@ -778,7 +778,7 @@ class TensorArrayTest(test.TestCase):
     self._testTensorArrayGradientWritePackConcatAndRead()
 
   @test_util.disable_control_flow_v2("v2 does not support clear_after_read.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("v2 does not support clear_after_read.")
   def testTensorArrayReadTwice(self):
     with self.session(use_gpu=True):
       value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]])
@@ -1351,7 +1351,7 @@ class TensorArrayTest(test.TestCase):
                                     .ENABLE_TENSOR_ARRAY_V2 else v1_msg):
         ta.stack().eval()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSkipEagerTensorArrayEvalEmpty(self):
     self._testTensorArrayEvalEmpty()
 
@@ -1443,7 +1443,7 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(expected_grad, grad_vals[0])
 
   @test_util.disable_control_flow_v2("colocate_with not supported in v2.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSkipEagerTensorArrayGetsDeviceFromFirstWrite(self):
     with ops.device("/job:worker/task:0/cpu:0"):
       # this initial device will be ignored.
@@ -1493,7 +1493,7 @@ class TensorArrayTest(test.TestCase):
             [s for s in dev_stats[d] if "/TensorArray" in s.node_name])
 
   @test_util.disable_control_flow_v2("colocate_with not supported in v2.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSkipEagerTensorArrayGetsDeviceFromFirstWriteInWhileLoop(self):
     with ops.device("/job:worker/task:0/cpu:0"):
       ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=2)
@@ -1524,7 +1524,7 @@ class TensorArrayTest(test.TestCase):
             [s for s in dev_stats[d] if "TensorArray" == s.node_name])
 
   @test_util.disable_control_flow_v2("colocate_with not supported in v2.")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSkipEagerTensorArrayDisabledColocateWithFirstWriteCall(self):
     with ops.device("/job:worker/task:0/cpu:0"):
       ta = tensor_array_ops.TensorArray(
diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py
index 2894b7dd50..febfe23b16 100644
--- a/tensorflow/python/kernel_tests/tensordot_op_test.py
+++ b/tensorflow/python/kernel_tests/tensordot_op_test.py
@@ -24,6 +24,7 @@ from tensorflow.python import tf2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test as test_lib
@@ -40,6 +41,7 @@ def _add_test(test, test_name, fn):
 
 class TensordotTest(test_lib.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def test_invalid_shape(self):
     a = [[1, 2], [3, 4]]
     b = [[1, 2], [3, 4], [5, 6]]
@@ -63,6 +65,7 @@ class TensordotTest(test_lib.TestCase):
                 axes_ph: (a_axes, b_axes)
             })
 
+  @test_util.run_v1_only("b/120545219")
   def test_invalid_axes(self):
     a = [[1, 2], [3, 4]]
     b = [[1, 2], [3, 4]]
@@ -105,6 +108,7 @@ class TensordotTest(test_lib.TestCase):
         self.assertAllEqual(tf_ans.shape, np_ans.shape)
         self.assertAllEqual(tf_ans, np_ans)
 
+  @test_util.run_v1_only("b/120545219")
   def test_partial_shape_inference(self):
     for axes in ([1], [0]), 1:
       a = array_ops.placeholder(dtypes.float32)
diff --git a/tensorflow/python/kernel_tests/unicode_encode_op_test.py b/tensorflow/python/kernel_tests/unicode_encode_op_test.py
index a5a5c2017c..72c8a26766 100644
--- a/tensorflow/python/kernel_tests/unicode_encode_op_test.py
+++ b/tensorflow/python/kernel_tests/unicode_encode_op_test.py
@@ -24,6 +24,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors_impl as errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_string_ops
 from tensorflow.python.platform import test
@@ -58,6 +59,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
         ragged_string_ops.unicode_encode(test_value, encoding, "strict").eval()
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def testIgnoreErrors(self, encoding):
     test_value = np.array([72, 101, 2147483647, -1, 111], np.int32)
     expected_value = u"Heo".encode(encoding)
@@ -69,6 +71,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def testReplaceErrors(self, encoding):
     test_value = np.array([72, 101, 2147483647, -1, 111], np.int32)
     expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding)
@@ -109,6 +112,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
   # -- regular Tensor tests -- #
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def testVector(self, encoding):
     test_value = np.array([72, 101, 108, 108, 111], np.int32)
     expected_value = u"Hello".encode(encoding)
@@ -144,6 +148,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def testMatrix(self, encoding):
     test_value = np.array(
         [[72, 128516, 108, 108, 111], [87, 128516, 114, 108, 100]], np.int32)
@@ -157,6 +162,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def test3DimMatrix(self, encoding):
     test_value = constant_op.constant(
         [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]],
@@ -172,6 +178,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def test4DimMatrix(self, encoding):
     test_value = constant_op.constant(
         [[[[72, 101, 108, 108, 111]], [[87, 111, 114, 108, 100]]],
@@ -192,6 +199,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
   # -- Ragged Tensor tests -- #
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def testRaggedMatrix(self, encoding):
     test_value = ragged_factory_ops.constant(
         [[72, 195, 108, 108, 111], [87, 128516, 114, 108, 100, 46]], np.int32)
@@ -205,6 +213,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def test3DimMatrixWithRagged2ndDim(self, encoding):
     test_value = ragged_factory_ops.constant(
         [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]],
@@ -224,6 +233,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result.tolist(), expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def test3DimMatrixWithRagged3rdDim(self, encoding):
     test_value = ragged_factory_ops.constant(
         [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100, 46]],
@@ -241,6 +251,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result.tolist(), expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def test3DimMatrixWithRagged2ndAnd3rdDim(self, encoding):
     test_value = ragged_factory_ops.constant(
         [[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100, 46]], [],
@@ -254,6 +265,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result.tolist(), expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def test4DimRaggedMatrix(self, encoding):
     test_value = ragged_factory_ops.constant(
         [[[[72, 101, 108, 108, 111], [87, 111, 114, 108, 100]]],
@@ -267,6 +279,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(result.tolist(), expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
+  @test_util.run_v1_only("b/120545219")
   def testRaggedMatrixWithMultiDimensionInnerValues(self, encoding):
     test_inner_values = constant_op.constant([[[72, 101, 108, 108, 111],
                                                [87, 111, 114, 108, 100]],
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 08d885e8a8..336e9b0bca 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -43,7 +43,7 @@ from tensorflow.python.util import compat
 
 class VariablesTestCase(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitialization(self):
     with self.cached_session():
       var0 = variables.VariableV1(0.0)
@@ -71,7 +71,7 @@ class VariablesTestCase(test.TestCase):
       self.assertAllClose(0.0, self.evaluate(var0))
       self.assertAllClose(1.1, self.evaluate(var1))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitializationOrder(self):
     with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([3, 6]), name="rnd")
@@ -194,7 +194,7 @@ class VariablesTestCase(test.TestCase):
   def testCountUpToInt64(self):
     self._countUpToTest(dtypes.int64)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testControlDepsNone(self):
     with self.cached_session():
       c = constant_op.constant(1.0)
@@ -208,7 +208,7 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual([], var_x.value().op.control_inputs)
       self.assertEqual([], var_x._ref().op.control_inputs)  # pylint: disable=protected-access
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testControlFlow(self):
     with self.cached_session() as sess:
       v0 = variables.Variable(0, name="v0")
@@ -245,7 +245,7 @@ class VariablesTestCase(test.TestCase):
       self.evaluate(v0.initializer)
       self.evaluate(add)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testControlFlowInitialization(self):
     """Expects an error if an initializer is in a control-flow scope."""
     def cond(i, _):
@@ -412,7 +412,7 @@ class VariablesTestCase(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertAllClose([1, 12], self.evaluate(var))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testColocation(self):
     with ops.device("/job:ps"):
       var = variables.VariableV1(0, name="v")
@@ -421,7 +421,7 @@ class VariablesTestCase(test.TestCase):
     self.assertDeviceEqual("/job:ps", assign_op.device)
     self.assertEqual([b"loc:@v"], assign_op.op.colocation_groups())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitializerFunction(self):
     value = [[-42], [133.7]]
     shape = [2, 1]
@@ -459,7 +459,7 @@ class VariablesTestCase(test.TestCase):
           lambda: constant_op.constant(1.),
           constraint=constraint)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNoRefDataRace(self):
     with self.cached_session():
       a = variables.Variable([1, 2, 3], dtype=dtypes.float32)
@@ -489,7 +489,7 @@ class VariablesTestCase(test.TestCase):
       for i in v2.initializer.inputs:
         self.assertEqual(expected_group_v2, i.op.colocation_groups())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testVariableDefInitializedInstances(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
       v_def = variables.Variable(
@@ -542,7 +542,7 @@ class VariablesTestCase(test.TestCase):
 
       self.assertAllClose(np.ones((5, 5), np.float32), self.evaluate(var))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRepr(self):
     var = variables.VariableV1(np.zeros((5, 5), np.float32), name="noop")
     self.assertEqual(
@@ -576,7 +576,7 @@ class IsInitializedTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.assertEqual(0, self.evaluate(uninited).size)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testVariableList(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.VariableV1([1, 2], name="v")
@@ -614,7 +614,7 @@ class ObsoleteIsInitializedTest(test.TestCase):
     with ops.Graph().as_default():
       self.assertEqual(None, variables.assert_variables_initialized())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testVariables(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.VariableV1([1, 2])
@@ -626,7 +626,7 @@ class ObsoleteIsInitializedTest(test.TestCase):
       variables.global_variables_initializer().run()
       self.evaluate(inited)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testVariableList(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.VariableV1([1, 2])
diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
index d155544f3e..cae459a34e 100644
--- a/tensorflow/python/kernel_tests/while_v2_test.py
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -53,6 +53,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
       self.assertEqual(self.evaluate(ret), 16.)
       self.assertSequenceEqual(self.evaluate(grad), [32.])
 
+  @test_util.run_v1_only("b/120545219")
   def testReturnSameStructureTrue(self):
     x = constant_op.constant(2.)
     ret = while_loop_v2(
@@ -145,7 +146,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
       self.assertSequenceEqual(self.evaluate(grad), [32.])
       self.assertSequenceEqual(self.evaluate(grad_grad), [48.])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPruning(self):
     x = constant_op.constant(1)
 
@@ -441,6 +442,7 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     # grad = gradients_impl.gradients(output, [n])
     # self.assertEqual(self.evaluate(grad), 3.5)
 
+  @test_util.run_deprecated_v1
   def testForwardPassRewrite(self):
     x = constant_op.constant(1.0, name="x")
     output = while_v2.while_loop(lambda x: x < 10.0,
diff --git a/tensorflow/python/layers/normalization_test.py b/tensorflow/python/layers/normalization_test.py
index 07d8e40b75..6535f74129 100644
--- a/tensorflow/python/layers/normalization_test.py
+++ b/tensorflow/python/layers/normalization_test.py
@@ -38,6 +38,7 @@ from tensorflow.python.training import gradient_descent
 from tensorflow.python.training import saver as saver_lib
 
 
+@test_util.run_v1_only('b/120545219')
 class BNTest(test.TestCase):
 
   def _simple_model(self, image, fused, freeze_mode):
@@ -144,7 +145,6 @@ class BNTest(test.TestCase):
 
     return train_vars, loss_val
 
-  @test_util.run_deprecated_v1
   def testHalfPrecision(self):
     ref_vars, ref_loss = self._trainEvalSequence(
         dtype=dtypes.float32,
@@ -230,43 +230,33 @@ class BNTest(test.TestCase):
                                ckpt_b_use_gpu, use_gpu_test_a, use_gpu_test_b,
                                freeze_mode)
 
-  @test_util.run_deprecated_v1
   def testCheckpointFusedCPUAndFusedGPU(self):
     self._testCheckpointCrossDevice(True, False, True, True)
 
-  @test_util.run_deprecated_v1
   def testCheckpointFusedCPUAndFusedCPU(self):
     self._testCheckpointCrossDevice(True, False, True, False)
 
-  @test_util.run_deprecated_v1
   def testCheckpointFusedGPUAndFusedGPU(self):
     self._testCheckpointCrossDevice(True, True, True, True)
 
-  @test_util.run_deprecated_v1
   def testCheckpointNonFusedCPUAndNonFusedGPU(self):
     self._testCheckpointCrossDevice(False, False, False, True)
 
-  @test_util.run_deprecated_v1
   def testCheckpointNonFusedCPUAndNonFusedCPU(self):
     self._testCheckpointCrossDevice(False, False, False, False)
 
-  @test_util.run_deprecated_v1
   def testCheckpointNonFusedGPUAndNonFusedGPU(self):
     self._testCheckpointCrossDevice(False, True, False, True)
 
-  @test_util.run_deprecated_v1
   def testCheckpointNonFusedGPUAndFusedGPU(self):
     self._testCheckpointCrossDevice(False, True, True, True)
 
-  @test_util.run_deprecated_v1
   def testCheckpointNonFusedGPUAndFusedCPU(self):
     self._testCheckpointCrossDevice(False, True, True, False)
 
-  @test_util.run_deprecated_v1
   def testCheckpointNonFusedCPUAndFusedCPU(self):
     self._testCheckpointCrossDevice(False, False, True, False)
 
-  @test_util.run_deprecated_v1
   def testCreateBN(self):
     # Call layer.
     bn = normalization_layers.BatchNormalization(axis=1)
@@ -293,7 +283,6 @@ class BNTest(test.TestCase):
         ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
         bn.trainable_variables)
 
-  @test_util.run_deprecated_v1
   def testCreateFusedBNFloat16(self):
     # Call layer.
     bn = normalization_layers.BatchNormalization(axis=1, fused=True)
@@ -323,7 +312,6 @@ class BNTest(test.TestCase):
         ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES),
         bn.trainable_variables)
 
-  @test_util.run_deprecated_v1
   def test3DInputAxis1(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -367,7 +355,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def test3DInputAxis2(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -451,7 +438,6 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def test4DInputAxis2(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -493,7 +479,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def test4DInputAxis3(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -535,7 +520,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def test4DInputAxis3Fused(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -619,7 +603,6 @@ class BNTest(test.TestCase):
         self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
         self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def testNegativeAxis(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -662,7 +645,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def testBooleanLearningPhase(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -703,7 +685,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def testFunctionalNoReuse(self):
     inputs = variables.Variable(
         np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
@@ -756,7 +737,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def testFunctionalReuse(self):
     inputs1 = variables.Variable(
         np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
@@ -821,7 +801,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=2)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def testFunctionalReuseFromScope(self):
     inputs = variables.Variable(
         np.random.random((5, 4, 3, 6)), dtype=dtypes.float32)
@@ -836,7 +815,6 @@ class BNTest(test.TestCase):
           inputs, axis=-1, momentum=0.9, epsilon=epsilon, training=training)
       self.assertEqual(len(variables.global_variables()), 5)
 
-  @test_util.run_deprecated_v1
   def testNoCenter(self):
     bn = normalization_layers.BatchNormalization(axis=1, center=False)
     inputs = random_ops.random_uniform((5, 4, 3), seed=1)
@@ -852,7 +830,6 @@ class BNTest(test.TestCase):
     self.assertEqual(len(bn.trainable_variables), 1)
     self.assertEqual(len(bn.non_trainable_variables), 2)
 
-  @test_util.run_deprecated_v1
   def testNoScale(self):
     bn = normalization_layers.BatchNormalization(axis=1, scale=False)
     inputs = random_ops.random_uniform((5, 4, 3), seed=1)
@@ -868,7 +845,6 @@ class BNTest(test.TestCase):
     self.assertEqual(len(bn.trainable_variables), 1)
     self.assertEqual(len(bn.non_trainable_variables), 2)
 
-  @test_util.run_deprecated_v1
   def testRegularizers(self):
     reg = lambda x: 0.1 * math_ops.reduce_sum(x)
     bn = normalization_layers.BatchNormalization(axis=1, beta_regularizer=reg)
@@ -894,7 +870,6 @@ class BNTest(test.TestCase):
     self.assertEqual(bn.gamma_constraint, g_constraint)
     self.assertEqual(bn.beta_constraint, b_constraint)
 
-  @test_util.run_deprecated_v1
   def testRenorm(self):
     shape = (4, 3)
     xt = array_ops.placeholder(dtypes.float32, shape)
@@ -953,7 +928,6 @@ class BNTest(test.TestCase):
         self.assertAllClose(y_train, yt_val_train, atol=1e-5)
         self.assertAllClose(y_test, yt_val_test, atol=1e-5)
 
-  @test_util.run_deprecated_v1
   def testAdjustment(self):
     shape = (4, 3)
     xt = array_ops.placeholder(dtypes.float32, shape)
@@ -998,7 +972,6 @@ class BNTest(test.TestCase):
         self.assertAllClose(y_train, yt_val_train, atol=1e-5)
         self.assertAllClose(y_test, yt_val_test, atol=1e-5)
 
-  @test_util.run_deprecated_v1
   def testRenormWithAdjustment(self):
     shape = (4, 3)
     xt = array_ops.placeholder(dtypes.float32, shape)
@@ -1069,7 +1042,6 @@ class BNTest(test.TestCase):
       normalization_layers.batch_normalization(
           inp, virtual_batch_size=-1)
 
-  @test_util.run_deprecated_v1
   def testGhostBNVirtualBatchFull(self):
     shape = [6, 5, 4, 3]
     inp = random_ops.random_uniform(shape, seed=1)
@@ -1095,7 +1067,6 @@ class BNTest(test.TestCase):
         inp, virtual_batch_size=3)
     self.assertListEqual(out.shape.as_list(), shape)
 
-  @test_util.run_deprecated_v1
   def testGhostBNUnknownBatchSize(self):
     np_shape = [10, 5, 4]
     tf_shape = [None, 5, 4]
@@ -1111,7 +1082,6 @@ class BNTest(test.TestCase):
 
       self.assertListEqual(list(y.shape), np_shape)
 
-  @test_util.run_deprecated_v1
   def testGhostBN2Dims(self):
     shape = [6, 2]
     virtual_batch_size = 3
@@ -1165,7 +1135,6 @@ class BNTest(test.TestCase):
         self.assertAllClose(y_train, y_val_train, atol=1e-5)
         self.assertAllClose(y_test, y_val_test, atol=1e-5)
 
-  @test_util.run_deprecated_v1
   def testGhostBN4DimsAxis3(self):
     shape = [6, 10, 10, 3]
     virtual_batch_size = 2
@@ -1219,7 +1188,6 @@ class BNTest(test.TestCase):
         self.assertAllClose(y_train, y_val_train, atol=1e-2)
         self.assertAllClose(y_test, y_val_test, atol=1e-2)
 
-  @test_util.run_deprecated_v1
   def testGhostBN4DimsAxis1(self):
     shape = [6, 3, 10, 10]
     virtual_batch_size = 2
@@ -1290,7 +1258,6 @@ class BNTest(test.TestCase):
       normalization_layers.batch_normalization(
           inp, axis=[1, 2, 1])   # duplicate
 
-  @test_util.run_deprecated_v1
   def test3DInputMultiAxis12(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -1332,7 +1299,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def test5DInputMultiAxis123(self):
     epsilon = 1e-3
     bn = normalization_layers.BatchNormalization(
@@ -1374,7 +1340,6 @@ class BNTest(test.TestCase):
       self.assertAlmostEqual(np.mean(normed_np_output), 0., places=1)
       self.assertAlmostEqual(np.std(normed_np_output), 1., places=1)
 
-  @test_util.run_deprecated_v1
   def testGhostBN5DimsMultiAxis14(self):
     shape = [6, 3, 10, 10, 4]
     virtual_batch_size = 3
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index a9058c4a34..abdcbc7a3a 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -158,6 +158,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       grads = gradients.gradients(z, [x])
       self.assertTrue(all(x is not None for x in grads))
 
+  @test_util.run_v1_only("b/120545219")
   def testBoundaryContinue(self):
     # Test that we differentiate both 'x' and 'y' correctly when x is a
     # predecessor of y.
@@ -169,6 +170,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       self.assertTrue(all(x is not None for x in grads))
       self.assertEqual(6.0, grads[0].eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testAggregationMethodAccumulateN(self):
     with self.cached_session():
       x = constant(1.0)
@@ -182,6 +184,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       self.assertEqual(20.0, grads[0].eval())
       self.assertEqual(10.0, grads[1].eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testAggregationMethodAddN(self):
     with self.cached_session():
       x = constant(1.0)
@@ -193,6 +196,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       self.assertEqual(20.0, grads[0].eval())
       self.assertEqual(10.0, grads[1].eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testAggregationMethodTree(self):
     with self.cached_session():
       x = constant(1.0)
@@ -239,6 +243,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
             [dx, dy], feed_dict={x: [1.0], dy.indices: [0], dy.values: [2.0]})
       self.assertEqual(vdx, vdy)
 
+  @test_util.run_v1_only("b/120545219")
   def testNonDifferentiableSwitchInWhileLoop(self):
     with ops.Graph().as_default():
       v = array_ops.placeholder(dtypes.float32, [])
@@ -270,6 +275,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       gradient = gradients.gradients(graph.as_graph_element(var), var)
       self.assertIsNotNone(gradient)
 
+  @test_util.run_v1_only("b/120545219")
   def testVariableRefGradient(self):
     with ops.Graph().as_default():
       init = constant_op.constant(100.0)
@@ -277,6 +283,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       gradient = gradients.gradients(var._ref(), var)
       self.assertIsNotNone(gradient)
 
+  @test_util.run_v1_only("b/120545219")
   def testDependentYs(self):
     with self.cached_session():
       x = constant_op.constant(3.0)
@@ -292,6 +299,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       g = gradients.gradients([z, z2], x)
       self.assertAllClose(17502.0, g[0].eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testPartialDerivatives(self):
     with self.cached_session():
       x = constant_op.constant(1.)
@@ -302,6 +310,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
       partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y])
       self.assertEqual([1.0, 1.0], [g.eval() for g in partialg])
 
+  @test_util.run_v1_only("b/120545219")
   def testStopGradients(self):
     def _MakeGraph(rng, stop_gradients=()):
       def _FunctionOf(xs, k=3):
@@ -606,6 +615,7 @@ class PreventGradientTest(test_util.TensorFlowTestCase):
 
 class HessianVectorProductTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testHessianVectorProduct(self):
     # Manually compute the Hessian explicitly for a low-dimensional problem
     # and check that HessianVectorProduct matches multiplication by the
@@ -634,6 +644,7 @@ class HessianVectorProductTest(test_util.TensorFlowTestCase):
 
 class HessianTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testHessian1D(self):
     # Manually compute the Hessian explicitly for a low-dimensional problem
     # and check that `hessian` matches. Specifically, the Hessian of
@@ -651,6 +662,7 @@ class HessianTest(test_util.TensorFlowTestCase):
       hess_actual = self.evaluate(hess)
     self.assertAllClose(hess_value, hess_actual)
 
+  @test_util.run_v1_only("b/120545219")
   def testHessian1D_multi(self):
     # Test the computation of the hessian with respect to multiple tensors
     m = 4
@@ -671,6 +683,7 @@ class HessianTest(test_util.TensorFlowTestCase):
     for hess_value, hess_actual in zip(hess_values, hessians_actual):
       self.assertAllClose(hess_value, hess_actual)
 
+  @test_util.run_v1_only("b/120545219")
   def testHessianInvalidDimension(self):
     for shape in [(10, 10), None]:
       with self.cached_session(use_gpu=True):
@@ -679,6 +692,7 @@ class HessianTest(test_util.TensorFlowTestCase):
         with self.assertRaises(ValueError):
           gradients.hessians(x, x)
 
+  @test_util.run_v1_only("b/120545219")
   def testHessian2D_square_matrix(self):
     # Manually compute the Hessian explicitly for a low-dimensional problem
     # and check that `hessian` matches. Specifically, the Hessian of
@@ -700,6 +714,7 @@ class HessianTest(test_util.TensorFlowTestCase):
     self.assertAllEqual((m, m, m, m), hess_actual.shape)
     self.assertAllClose(hess_value, hess_actual.reshape((m * m, m * m)))
 
+  @test_util.run_v1_only("b/120545219")
   def testHessian2D_non_square_matrix(self):
     m = 3
     n = 4
@@ -722,6 +737,7 @@ class HessianTest(test_util.TensorFlowTestCase):
 
 class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testIndexedSlicesToTensor(self):
     with self.cached_session():
       np_val = np.random.rand(4, 4, 4, 4).astype(np.float32)
@@ -731,6 +747,7 @@ class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
       c_dense = math_ops.multiply(c_sparse, 1.0)
       self.assertAllClose(np_val, self.evaluate(c_dense))
 
+  @test_util.run_v1_only("b/120545219")
   def testIndexedSlicesToTensorList(self):
     with self.cached_session():
       numpy_list = []
@@ -747,6 +764,7 @@ class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
       packed_sparse = array_ops.stack(sparse_list)
       self.assertAllClose(packed_dense.eval(), self.evaluate(packed_sparse))
 
+  @test_util.run_v1_only("b/120545219")
   def testInt64Indices(self):
     with self.cached_session():
       np_val = np.random.rand(4, 4, 4, 4).astype(np.float32)
@@ -759,6 +777,7 @@ class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
       c_dense = math_ops.multiply(c_sparse, 1.0)
       self.assertAllClose(np_val, self.evaluate(c_dense))
 
+  @test_util.run_v1_only("b/120545219")
   def testWarnings(self):
     # TODO(gunan) Reenable after this issue is fixed:
     # https://github.com/google/protobuf/issues/2812
@@ -802,6 +821,7 @@ class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
 
 class OnlyRealGradientsTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testRealOnly(self):
     x = constant_op.constant(7+3j, dtype=dtypes.complex64)
     y = math_ops.square(x)
@@ -814,6 +834,7 @@ class OnlyRealGradientsTest(test_util.TensorFlowTestCase):
 
 class ResourceCondTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testBasic(self):
     gamma = resource_variable_ops.ResourceVariable(
         np.random.random((3,)),
@@ -943,6 +964,7 @@ class CustomGradientTest(test_util.TensorFlowTestCase):
       self.assertEqual(6., math_ops.reduce_sum(dx).numpy())
       self.assertEqual(8., math_ops.reduce_sum(dw).numpy())
 
+  @test_util.run_v1_only("b/120545219")
   def testCustomGradientErrorsWithNonResourceVariables(self):
 
     def F(x, use_resource=False):
@@ -993,6 +1015,7 @@ class CustomGradientTest(test_util.TensorFlowTestCase):
       # Smoke test to ensure numpy inputs are accepted
       F(x)
 
+  @test_util.run_v1_only("b/120545219")
   def testRVGradientsDynamicCond(self):
     with self.cached_session():
       alpha = resource_variable_ops.ResourceVariable(
diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py
index 414a5cdbbe..82fab74183 100644
--- a/tensorflow/python/ops/nn_test.py
+++ b/tensorflow/python/ops/nn_test.py
@@ -450,6 +450,7 @@ class DropoutTest(test_lib.TestCase):
     with self.assertRaises(ValueError):
       nn_ops.dropout(t, array_ops.placeholder(dtypes.float32, shape=[2]))
 
+  @test_util.run_deprecated_v1
   def testInvalidRate(self):
     x_dim = 40
     y_dim = 30
diff --git a/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py b/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
index d9d840500c..7fe185641f 100644
--- a/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
@@ -135,7 +135,7 @@ class RaggedBatchGatherOpTest(test_util.TensorFlowTestCase,
           expected=ragged.constant_value(
               [[[[b'c', b'a'], [b'd', b'd']], [[b'f', b'e']]]], ragged_rank=2)),
   ])
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedBatchGather(self, descr, params, indices, expected):
     result = ragged.batch_gather(params, indices)
     self.assertEqual(
@@ -188,7 +188,7 @@ class RaggedBatchGatherOpTest(test_util.TensorFlowTestCase,
            indices=[[[0]]],
            message='batch shape from indices does not match params shape'),
   ])
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedBatchGatherStaticError(self,
                                        params,
                                        indices,
diff --git a/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py b/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
index d939d9d634..763b016405 100644
--- a/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
@@ -298,7 +298,7 @@ class RaggedBooleanMaskOpTest(test_util.TensorFlowTestCase,
           keepdims=True,
           expected=ragged.constant_value([[[1], [4, 6]], [[7, 9], []]])),
   ])  # pyformat: disable
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testBooleanMask(self, descr, data, mask, keepdims, expected):
     actual = ragged.boolean_mask(data, mask, keepdims=keepdims)
     self.assertEqual(
diff --git a/tensorflow/python/ops/ragged/ragged_concat_op_test.py b/tensorflow/python/ops/ragged/ragged_concat_op_test.py
index 3699f90f46..ba7867418a 100644
--- a/tensorflow/python/ops/ragged/ragged_concat_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_concat_op_test.py
@@ -221,7 +221,7 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=0,
           expected=[[b'a00', b'a01'], [], [b'a20', b'a21']]),
   )   # pyformat: disable
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedConcat(self,
                        descr,
                        rt_inputs,
@@ -280,7 +280,7 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           error=errors.InvalidArgumentError,
           message='Input tensors have incompatible shapes'),
   ])
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRuntimeError(self, rt_inputs, axis, error, message,
                        ragged_ranks=None):
     rt_inputs = [
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
index 2bb10adce0..2533c60c4e 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
@@ -126,6 +126,7 @@ BINARY_INT_OPS = [
 ]
 
 
+@test_util.run_v1_only('b/120545219')
 class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
                                parameterized.TestCase):
 
@@ -401,7 +402,6 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
         result_flat_values = array_ops.reshape(result, [-1])
       self.assertAllEqual(expected_flat_values, result_flat_values)
 
-  @test_util.run_deprecated_v1
   def testElementwiseOpUnknownRankError(self):
     x = ragged.constant([[1, 2], [3]])
     y = ragged.from_row_splits(
diff --git a/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py b/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
index 3c0db9e8fb..674dbab112 100644
--- a/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
@@ -29,7 +29,7 @@ from tensorflow.python.platform import googletest
 
 class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testDocStringExample(self):
     st = sparse_tensor.SparseTensor(
         indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]],
@@ -40,7 +40,7 @@ class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
     with self.test_session():
       self.assertEqual(rt.eval().tolist(), [[1, 2, 3], [4], [], [5]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testEmpty(self):
     st = sparse_tensor.SparseTensor(
         indices=array_ops.zeros([0, 2], dtype=dtypes.int64),
@@ -67,7 +67,7 @@ class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
     self.assertRaisesRegexp(ValueError, r'rank\(st_input\) must be 2',
                             ragged.from_sparse, st3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testGoodPartialSparseTensorRank(self):
     st1 = sparse_tensor.SparseTensor(
         indices=[[0, 0]],
@@ -82,7 +82,7 @@ class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
     ragged.from_sparse(st1)
     ragged.from_sparse(st2)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testNonRaggedSparseTensor(self):
     # "index_suffix" means the value of the innermost dimension of the index
     # (i.e., indices[i][-1]).
diff --git a/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py b/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
index 1d8a00cc18..09c17fad61 100644
--- a/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
@@ -31,7 +31,7 @@ from tensorflow.python.platform import googletest
 class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
                              parameterized.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testDocStringExamples(self):
     # The examples from ragged.from_tensor.__doc__.
     dt = constant_op.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
@@ -263,7 +263,7 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
                        [[[5, 6], [7]], [[0, 8], []]]]
       },
   )  # pyformat: disable
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedFromTensor(self,
                            tensor,
                            expected,
@@ -398,7 +398,7 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
           'expected': [[], []]
       },
   )
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testEmpty(self, dt_shape, expected, lengths=None, padding=None):
     dt = array_ops.zeros(dt_shape)
     rt = ragged.from_tensor(dt, lengths, padding)
diff --git a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
index 62c6819374..b0a4fe9d31 100644
--- a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import ragged
 from tensorflow.python.platform import googletest
 
 
+@test_util.run_v1_only('b/120545219')
 class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
                            parameterized.TestCase):
 
@@ -183,7 +184,6 @@ class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
           indices=[[0, 0, 1], [0, 0, 0], [0, 1, 0]],
           expected=[[b'c', b'd'], [b'a', b'b'], [b'e', b'f']]),
   ])  # pyformat: disable
-  @test_util.run_deprecated_v1
   def testRaggedGatherNd(self, descr, params, indices, expected):
     result = ragged.gather_nd(params, indices)
     self.assertEqual(
@@ -193,7 +193,6 @@ class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
         expected = expected.tolist()
       self.assertEqual(self.evaluate(result).tolist(), expected)
 
-  @test_util.run_deprecated_v1
   def testRaggedGatherNdUnknownRankError(self):
     params = ragged.constant([['a', 'b'], ['c', 'd']])
     indices1 = array_ops.placeholder(dtypes.int32, shape=None)
@@ -221,7 +220,6 @@ class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
           indices=ragged.constant([[0]]),
           message='The innermost dimension of indices may not be ragged'),
   ])
-  @test_util.run_deprecated_v1
   def testRaggedGatherNdStaticError(self,
                                     params,
                                     indices,
diff --git a/tensorflow/python/ops/ragged/ragged_gather_op_test.py b/tensorflow/python/ops/ragged/ragged_gather_op_test.py
index 76c90cdfee..d371745b73 100644
--- a/tensorflow/python/ops/ragged/ragged_gather_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_op_test.py
@@ -30,7 +30,7 @@ from tensorflow.python.platform import googletest
 
 class RaggedTensorOpsTest(test_util.TensorFlowTestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testDocStringExamples(self):
     params = constant_op.constant(['a', 'b', 'c', 'd', 'e'])
     indices = constant_op.constant([3, 1, 2, 1, 0])
@@ -66,7 +66,7 @@ class RaggedTensorOpsTest(test_util.TensorFlowTestCase):
           ragged.gather(params, indices).eval().tolist(),
           [[b'f'], [b'a', b'b'], [b'f'], [b'c', b'd', b'e']])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testTensorParamsAndRaggedIndices(self):
     params = ['a', 'b', 'c', 'd', 'e']
     indices = ragged.constant([[2, 1], [1, 2, 0], [3]])
@@ -75,7 +75,7 @@ class RaggedTensorOpsTest(test_util.TensorFlowTestCase):
           ragged.gather(params, indices).eval().tolist(),
           [[b'c', b'b'], [b'b', b'c', b'a'], [b'd']])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedParamsAndRaggedIndices(self):
     params = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
     indices = ragged.constant([[2, 1], [1, 2, 0], [3]])
@@ -108,7 +108,7 @@ class RaggedTensorOpsTest(test_util.TensorFlowTestCase):
            [[[b'g']], [[b'g']]]]                                  #  [p2, p2]]
       )  # pyformat: disable
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testTensorParamsAnd4DRaggedIndices(self):
     indices = ragged.constant(
         [[[[3, 4], [0, 6]], []], [[[2, 1], [1, 0]], [[2, 5]], [[2, 3]]],
@@ -123,7 +123,7 @@ class RaggedTensorOpsTest(test_util.TensorFlowTestCase):
            [[[b'c', b'b'], [b'b', b'a']], [[b'c', b'f']], [[b'c', b'd']]],
            [[[b'b', b'a']]]])  # pyformat: disable
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOutOfBoundsError(self):
     tensor_params = ['a', 'b', 'c']
     tensor_indices = [0, 1, 2]
diff --git a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
index ecd78a91b2..ab70d5a123 100644
--- a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import googletest
 
 
+@test_util.run_v1_only('b/120545219')
 class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
   @parameterized.parameters([
       # The following test sets map over a RaggedTensor and apply a
@@ -141,7 +142,6 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       ),
   ])
 
-  @test_util.run_deprecated_v1
   def testRaggedMap(
       self,
       fn,
@@ -166,7 +166,6 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       output_values = self.evaluate(output)
       self.assertAllEqual(expected_output, output_values.tolist())
 
-  @test_util.run_deprecated_v1
   def testRaggedMapOnStructure(self):
     batman = ragged.constant([[1, 2, 3], [4], [5, 6, 7]])
     # [[10, 20, 30], [40], [50, 60, 70]]
@@ -187,7 +186,6 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       self.assertAllEqual(output.eval().tolist(), [66, 44, 198])
 
   # Test mapping over a dict of RTs can produce a dict of RTs.
-  @test_util.run_deprecated_v1
   def testRaggedMapOnStructure_RaggedOutputs(self):
     batman = ragged.constant([[1, 2, 3], [4], [5, 6, 7]])
     # [[10, 20, 30], [40], [50, 60, 70]]
@@ -219,7 +217,6 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       self.assertAllEqual(output['robin'].eval().tolist(),
                           [[11, 21, 31], [41], [51, 61, 71]])
 
-  @test_util.run_deprecated_v1
   def testZip(self):
     x = ragged.constant([[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]],
                         dtypes.int64)
@@ -242,7 +239,6 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           result, [[[0, 10], [0, 20]], [[1, 30], [1, 40]], [[2, 50], [2, 60]],
                    [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
 
-  @test_util.run_deprecated_v1
   def testBatchGather(self):
     tokens = ragged.constant([['hello', '.', 'there'], ['merhaba'],
                               ['bonjour', '.', 'ca va', '?']])
diff --git a/tensorflow/python/ops/ragged/ragged_map_inner_values_op_test.py b/tensorflow/python/ops/ragged/ragged_map_inner_values_op_test.py
index b5802cb82d..dc214f4437 100644
--- a/tensorflow/python/ops/ragged/ragged_map_inner_values_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_inner_values_op_test.py
@@ -43,7 +43,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
     with self.test_session():
       self.assertEqual(result.eval().tolist(), expected)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testDocStringExamples(self):
     """Test the examples in apply_op_to_ragged_values.__doc__."""
     rt = ragged.constant([[1, 2, 3], [], [4, 5], [6]])
@@ -63,7 +63,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         args=(tensor,),
         expected=[[0, 0, 0], [], [0, 0]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOpWithTwoRaggedTensorArgs(self):
     x = ragged.constant([[3, 1, 4], [], [1, 5]])
     y = ragged.constant([[1, 2, 3], [], [4, 5]])
@@ -76,7 +76,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply, args=(5, y), expected=[[5, 10, 15], [], [20, 25]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOpWithThreeRaggedTensorArgs(self):
     condition = ragged.constant(
         [[True, True, False], [], [True, False]])  # pyformat: disable
@@ -87,7 +87,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         args=(condition, x, y),
         expected=[[b'a', b'b', b'C'], [], [b'd', b'E']])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOpWithRaggedTensorListArg(self):
     x = ragged.constant([[1, 2, 3], [], [4, 5]])
     y = ragged.constant([[10, 20, 30], [], [40, 50]])
@@ -96,7 +96,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         args=([x, y, x],),
         expected=[[12, 24, 36], [], [48, 60]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOpWithKeywordArgs(self):
     x = ragged.constant([[3, 1, 4], [], [1, 5]])
     y = ragged.constant([[1, 2, 3], [], [4, 5]])
@@ -105,7 +105,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         kwargs=dict(x=x, y=y),
         expected=[[3, 2, 12], [], [4, 25]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOpWithMixedPositionalAndKeywordArgs(self):
     x = ragged.constant([[3, 1, 4], [], [1, 5]])
     y = ragged.constant([[1, 2, 3], [], [4, 5]])
@@ -128,7 +128,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         },
         expected=[[8, 15, 13], [], [16, 25]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOpWithRaggedRankGreaterThanOne(self):
     # ragged_rank=0
     x0 = [3, 1, 4, 1, 5, 9, 2, 6, 5]
@@ -173,7 +173,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
             [[[54, 14], [48, 45]]]    # row 3
         ])  # pyformat: disable
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testOpWithRaggedRankThree(self):
     x = ragged.constant([[[3, 1, 4]], [], [[], [1, 5]]])
     y = ragged.constant([[[1, 2, 3]], [], [[], [4, 5]]])
@@ -203,7 +203,7 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
                             r'Inputs must have identical ragged splits.*',
                             ragged.map_inner_values, math_ops.add, x, y)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorSplitsMismatchErrorAtRuntime(self):
     splits1 = array_ops.placeholder_with_default(
         constant_op.constant([0, 3, 3, 5], dtypes.int64), None)
diff --git a/tensorflow/python/ops/ragged/ragged_operators_test.py b/tensorflow/python/ops/ragged/ragged_operators_test.py
index 7fe8159d82..c1223db307 100644
--- a/tensorflow/python/ops/ragged/ragged_operators_test.py
+++ b/tensorflow/python/ops/ragged/ragged_operators_test.py
@@ -23,11 +23,11 @@ from tensorflow.python.ops import ragged
 from tensorflow.python.platform import googletest
 
 
+@test_util.run_v1_only('b/120545219')
 class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase):
   # @TODO(edloper): Test right-handed versions of operators once we add
   # broadcasting support for elementwise ops.
 
-  @test_util.run_deprecated_v1
   def testOrderingOperators(self):
     x = ragged.constant([[1, 5], [3]])
     y = ragged.constant([[4, 5], [1]])
@@ -41,7 +41,6 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase):
     if a != b:
       print('%30s %s' % (b, a))
 
-  @test_util.run_deprecated_v1
   def testArithmeticOperators(self):
     x = ragged.constant([[1.0, -2.0], [8.0]])
     y = ragged.constant([[4.0, 4.0], [2.0]])
@@ -77,7 +76,6 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase):
       self.assertEqual((2.0 % y).eval().tolist(), [[2.0, 2.0], [0.0]])
       self.assertEqual((x % 2.0).eval().tolist(), [[1.0, 0.0], [0.0]])
 
-  @test_util.run_deprecated_v1
   def testLogicalOperators(self):
     a = ragged.constant([[True, True], [False]])
     b = ragged.constant([[True, False], [False]])
diff --git a/tensorflow/python/ops/ragged/ragged_reduce_op_test.py b/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
index 9f51d59ba3..2e4db2a423 100644
--- a/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
@@ -300,7 +300,7 @@ class RaggedReduceOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=2,
           expected=[[mean(1, 2), mean(3, 4, 5)], [mean(6, 7), 8], [9]]),
   )
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testReduce(self, ragged_reduce_op, rt_input, axis, expected):
     rt_input = ragged.constant(rt_input)
     reduced = ragged_reduce_op(rt_input, axis)
diff --git a/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py b/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
index 4a705be484..9f4aa1b136 100644
--- a/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
@@ -143,7 +143,7 @@ class RaggedRowLengthsOp(test_util.TensorFlowTestCase, parameterized.TestCase):
           expected=[[2, 3, 0], [4, 1]],
           expected_ragged_rank=1),
   ])  # pyformat: disable
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRowLengths(self,
                      rt_input,
                      expected,
diff --git a/tensorflow/python/ops/ragged/ragged_segment_op_test.py b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
index 9e4877ae3e..52fe739342 100644
--- a/tensorflow/python/ops/ragged/ragged_segment_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
@@ -182,7 +182,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
                  []]  # pyformat: disable
     self.assertEqual(self.evaluate(segmented2).tolist(), expected2)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedSegmentIds(self):
     rt = ragged.constant([
         [[111, 112, 113, 114], [121],],  # row 0
@@ -205,7 +205,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
         'but segment_ids is ragged and data is not.', ragged.segment_sum, dt,
         segment_ids, 3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testShapeMismatchError2(self):
     rt = ragged.constant([
         [[111, 112, 113, 114], [121]],  # row 0
diff --git a/tensorflow/python/ops/ragged/ragged_stack_op_test.py b/tensorflow/python/ops/ragged/ragged_stack_op_test.py
index 4343471694..ed07aca6d4 100644
--- a/tensorflow/python/ops/ragged/ragged_stack_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_stack_op_test.py
@@ -265,7 +265,7 @@ class RaggedStackOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=0,
           expected=[[[b'a00', b'a01'], [], [b'a20', b'a21']]]),
   )   # pyformat: disable
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedStack(self,
                       descr,
                       rt_inputs,
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py b/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
index 9c2dd26050..699dcc2bdb 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
@@ -82,6 +82,7 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
           value=ragged.constant_value([[[1, 2], [3]], [[4, 5]]]),
           expected_dim_sizes=[2, [2, 1], [2, 1, 2]]),
   ])
+  @test_util.run_v1_only('b/120545219')
   def testFromTensor(self, value, expected_dim_sizes):
     shape = ragged.RaggedTensorDynamicShape.from_tensor(value)
     expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(
@@ -105,6 +106,7 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
           rank=5,
           expected_dim_sizes=[1, 3, [3, 2, 4], 2, 3]),
   ])
+  @test_util.run_v1_only('b/120545219')
   def testBroadcastToRank(self, dim_sizes, rank, expected_dim_sizes):
     shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
     expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(
@@ -281,6 +283,7 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
            original_dim_sizes=[2, (2, 1), 2, 1],
            broadcast_dim_sizes=[2, (2, 1), 2, (2, 1, 2, 1, 2, 1)]),
   ])  # pyformat: disable
+  @test_util.run_v1_only('b/120545219')
   def testBroadcastDimension(self, axis, row_length, original_dim_sizes,
                              broadcast_dim_sizes):
     """Tests for the broadcast_dimension method.
@@ -370,6 +373,7 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
               y_dims=[1, 1, 2, (2, 1)],
               expected_dims=[2, (2, 1), 2, (2, 1, 2, 1, 2, 1)]),
       ])
+  @test_util.run_v1_only('b/120545219')
   def testBroadcastDynamicShape(self, x_dims, y_dims, expected_dims):
     x_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(x_dims)
     y_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(y_dims)
@@ -416,6 +420,7 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
           dim_sizes=[3, [3, 0, 2]],
           expected=ragged.constant_value([[10, 10, 10], [], [10, 10]])),
   ])
+  @test_util.run_v1_only('b/120545219')
   def testRaggedBroadcastTo(self, x, dim_sizes, expected):
     shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
     result = ragged.broadcast_to(x, shape)
@@ -470,6 +475,7 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
               [[[[11, 21], [32]], [[13, 23], [34]]],
                [[[15, 25], [36]]]])),
   ])
+  @test_util.run_v1_only('b/120545219')
   def testRaggedAddWithBroadcasting(self, x, y, expected, doc):
     expected_rrank = getattr(expected, 'ragged_rank', 0)
     x = ragged.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py
index 608fbd6e5b..e86676f70a 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py
@@ -861,7 +861,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       (SLICE_BUILDER[:, -2:], [row[-2:] for row in EXAMPLE_RAGGED_TENSOR_2D]),
       # TODO(edloper): Add tests for strided slices, once support is added.
   )
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorGetItemWithRaggedRank1(self, slice_spec, expected):
     """Test that rt.__getitem__(slice_spec) == expected."""
     # Ragged tensor
@@ -903,7 +903,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       (SLICE_BUILDER[..., 0, 0, 0], IndexError,
        'Too many indices for RaggedTensor'),
   )
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorGetItemErrorsWithRaggedRank1(self, slice_spec, expected,
                                                    message):
     """Test that rt.__getitem__(slice_spec) == expected."""
@@ -982,7 +982,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       # TODO(edloper): Add tests slicing inner ragged dimensions, one support
       # is added.
   )
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorGetItemWithRaggedRank2(self, slice_spec, expected):
     """Test that rt.__getitem__(slice_spec) == expected."""
     rt = ragged.from_nested_row_splits(
@@ -1069,7 +1069,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     rt = ragged.from_row_splits(values, [0, 1])
     self._TestGetItemException(rt, slice_spec, expected, message)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testGetItemNewAxis(self):
     # rt: [[[['a', 'b'], ['c', 'd']], [], [['e', 'f']]], []]
     splits1 = [0, 3, 3]
@@ -1155,7 +1155,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
   # RaggedTensor.with_values() and RaggedTensor.with_inner_values().
   #=============================================================================
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testWithValues(self):
     rt1 = ragged.constant([[1, 2], [3, 4, 5], [6], [], [7]])
     rt2 = ragged.constant([[[1, 2], [3, 4, 5]], [[6]], [], [[], [7]]])
@@ -1208,7 +1208,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       self.assertEqual(result['rt1'].tolist(), [[1, 2, 3], [4]])
       self.assertEqual(result['rt2'].tolist(), [[[], [1, 2]], [[3]]])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testSessionPartialRunFeed(self):
     # Placeholder inputs.
     a = ragged.from_row_splits(
diff --git a/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py b/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
index 69b31ad0e9..9863e3b583 100644
--- a/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
@@ -140,7 +140,7 @@ class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
     self.assertEqual(st.values.shape.as_list(), [7])
     self.assertEqual(st.dense_shape.shape.as_list(), [3])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testKernelErrors(self):
     # An empty vector, defined using a placeholder to ensure that we can't
     # determine that it's invalid at graph-construction time.
@@ -179,7 +179,7 @@ class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
       self.assertRaisesRegexp(errors.InvalidArgumentError, empty_splits_error,
                               ragged.to_sparse(bad_rt5).eval)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testGradient(self):
     # rt1.shape == rt2.shape == [2, (D2), (D3), 2].
     rt1 = ragged.constant([[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0]]]],
diff --git a/tensorflow/python/ops/ragged/ragged_util_test.py b/tensorflow/python/ops/ragged/ragged_util_test.py
index c24ea65353..69c605dbf9 100644
--- a/tensorflow/python/ops/ragged/ragged_util_test.py
+++ b/tensorflow/python/ops/ragged/ragged_util_test.py
@@ -87,6 +87,7 @@ class RaggedRepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       # Empty tensor
       dict(data=[], repeats=[], axis=0, expected=[]),
   ])
+  @test_util.run_v1_only('b/120545219')
   def testRepeat(self, data, repeats, expected, axis=None):
     result = ragged_util.repeat(data, repeats, axis)
     with self.test_session():
@@ -135,6 +136,7 @@ class RaggedRepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           dict(data=TENSOR_4D, repeats=[1, 3, 0, 0, 2], axis=3),
       ]
   ])
+  @test_util.run_v1_only('b/120545219')
   def testValuesMatchesNumpy(self, mode, data, repeats, axis):
     # Exception: we can't handle negative axis if data.ndims is unknown.
     if axis < 0 and mode == 'unknown_shape':
diff --git a/tensorflow/python/ops/ragged/ragged_where_op_test.py b/tensorflow/python/ops/ragged/ragged_where_op_test.py
index de83a54977..6d645eefac 100644
--- a/tensorflow/python/ops/ragged/ragged_where_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_where_op_test.py
@@ -165,7 +165,7 @@ class RaggedWhereOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           y=ragged.constant_value([[[['a']]], [[['b']]]]),
           expected=ragged.constant_value([[[[], [b'A']]], [[[b'b']]]])),
   ])   # pyformat: disable
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testRaggedWhere(self, condition, expected, x=None, y=None):
     result = ragged.where(condition, x, y)
     self.assertEqual(
diff --git a/tensorflow/python/profiler/internal/run_metadata_test.py b/tensorflow/python/profiler/internal/run_metadata_test.py
index a8859f845b..f96d721f46 100644
--- a/tensorflow/python/profiler/internal/run_metadata_test.py
+++ b/tensorflow/python/profiler/internal/run_metadata_test.py
@@ -169,7 +169,7 @@ class RunMetadataTest(test.TestCase):
     ret = _extract_node(run_meta, 'MatMul:MatMul')
     self.assertEqual(len(ret), 0)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testLoopCPU(self):
     ops.reset_default_graph()
     with ops.device('/cpu:0'):
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 8648f0b514..1c7c15be4f 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -76,6 +76,7 @@ class PrintModelAnalysisTest(test.TestCase):
                          '  ScalarW (1, 1/1 params)\n',
                          lib.CheckAndRemoveDoc(f.read()))
 
+  @test_util.run_v1_only('b/120545219')
   def testSelectEverythingDetail(self):
     ops.reset_default_graph()
     dev = '/device:GPU:0' if test.is_gpu_available() else '/device:CPU:0'
@@ -203,6 +204,7 @@ class PrintModelAnalysisTest(test.TestCase):
             lib.CheckAndRemoveDoc(f.read())[0:80])
         # pylint: enable=line-too-long
 
+  @test_util.run_v1_only('b/120545219')
   def testComplexCodeView(self):
     ops.reset_default_graph()
     outfile = os.path.join(test.get_temp_dir(), 'dump')
@@ -619,6 +621,7 @@ class PrintModelAnalysisTest(test.TestCase):
           else:
             self.assertEqual(len(gfile.ListDirectory(profile_dir)), 0)
 
+  @test_util.run_v1_only('b/120545219')
   def testAutoProfiling(self):
     ops.reset_default_graph()
     time_dir = os.path.join(test.get_temp_dir(), 'time')
@@ -706,6 +709,7 @@ class PrintModelAnalysisTest(test.TestCase):
                       exception_str)
       self.assertTrue(mat is None)
 
+  @test_util.run_v1_only('b/120545219')
   def testTrackPersistentBytes(self):
     ops.reset_default_graph()
     a = array_ops.constant(np.ones((100, 100)))
diff --git a/tensorflow/python/profiler/pprof_profiler_test.py b/tensorflow/python/profiler/pprof_profiler_test.py
index 120a0d0eaa..3f5bd9e79b 100644
--- a/tensorflow/python/profiler/pprof_profiler_test.py
+++ b/tensorflow/python/profiler/pprof_profiler_test.py
@@ -136,7 +136,7 @@ comment: 9
       profile.ParseFromString(profile_contents)
       self.assertEquals(expected_proto, str(profile))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testProfileWithWhileLoop(self):
     options = config_pb2.RunOptions()
     options.trace_level = config_pb2.RunOptions.FULL_TRACE
diff --git a/tensorflow/python/saved_model/loader_test.py b/tensorflow/python/saved_model/loader_test.py
index 3b7f0b250e..3e27c0801c 100644
--- a/tensorflow/python/saved_model/loader_test.py
+++ b/tensorflow/python/saved_model/loader_test.py
@@ -94,7 +94,7 @@ class SavedModelLoaderTest(test.TestCase, parameterized.TestCase):
     super(SavedModelLoaderTest, self).tearDown()
     shutil.rmtree(test.get_temp_dir(), ignore_errors=True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_load_function(self, builder_cls):
     self.export_simple_graph(builder_cls)
     loader = loader_impl.SavedModelLoader(SIMPLE_ADD_SAVED_MODEL)
@@ -110,7 +110,7 @@ class SavedModelLoaderTest(test.TestCase, parameterized.TestCase):
       self.assertEqual(5, sess.graph.get_tensor_by_name("x:0").eval())
       self.assertEqual(7, sess.graph.get_tensor_by_name("y:0").eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_load_graph(self, builder_cls):
     self.export_simple_graph(builder_cls)
     loader = loader_impl.SavedModelLoader(SIMPLE_ADD_SAVED_MODEL)
@@ -130,7 +130,7 @@ class SavedModelLoaderTest(test.TestCase, parameterized.TestCase):
       with self.assertRaises(errors.FailedPreconditionError):
         self.evaluate(y)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_load_with_import_scope(self, builder_cls):
     self.export_graph_with_main_op(builder_cls)
     loader = loader_impl.SavedModelLoader(SAVED_MODEL_WITH_MAIN_OP)
@@ -179,7 +179,7 @@ class SavedModelLoaderTest(test.TestCase, parameterized.TestCase):
       loader.restore_variables(sess, tf_saver.Saver())
       self.assertEqual(55, self.evaluate(z))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_run_init_op(self, builder_cls):
     self.export_graph_with_main_op(builder_cls)
     loader = loader_impl.SavedModelLoader(SAVED_MODEL_WITH_MAIN_OP)
@@ -212,7 +212,7 @@ class SavedModelLoaderTest(test.TestCase, parameterized.TestCase):
     with self.assertRaises(RuntimeError):
       loader.get_meta_graph_def_from_tags(["not_a_graph"])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_load_saved_model_with_no_variables(self, builder_cls):
     """Test that SavedModel runs saver when there appear to be no variables.
 
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index 668a374415..97218a98ea 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -286,7 +286,7 @@ class SaveTest(test.TestCase):
         {"out": model(array_ops.ones([1, 4]))},
         _import_and_infer(save_dir, {"x": [[1., 1., 1., 1.]]}))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def test_export_functional_keras_model_after_fit(self):
     x = input_layer.Input((1,))
     y = core.Dense(1, name="y")(x)
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index d4c8d72011..8d94c7c989 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -328,7 +328,7 @@ class SavedModelTest(SavedModelTestBase):
       self.assertRaises(RuntimeError, loader.load, sess, ["foo", "baz"],
                         export_dir)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testVariables(self):
     export_dir = self._get_export_dir("test_variables")
     builder = saved_model_builder._SavedModelBuilder(export_dir)
@@ -474,7 +474,7 @@ class SavedModelTest(SavedModelTestBase):
       self.assertEqual(
           42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCollections(self):
     export_dir = self._get_export_dir("test_collections")
     builder = saved_model_builder._SavedModelBuilder(export_dir)
@@ -819,6 +819,7 @@ class SavedModelTest(SavedModelTestBase):
       self._validate_assets(export_dir, foo_graph.asset_file_def, "hello42.txt",
                             "foo bar baz 0", "asset_file_tensor_0:0")
 
+  @test_util.run_v1_only("b/120545219")
   def testCustomInitOp(self):
     export_dir = self._get_export_dir("test_main_op")
     builder = saved_model_builder._SavedModelBuilder(export_dir)
@@ -854,7 +855,7 @@ class SavedModelTest(SavedModelTestBase):
       # the main_op, following a restore.
       self.assertEqual(3, ops.get_collection("v")[2].eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTrainOp(self):
     export_dir = self._get_export_dir("test_train_op")
     builder = saved_model_builder._SavedModelBuilder(export_dir)
@@ -882,7 +883,7 @@ class SavedModelTest(SavedModelTestBase):
       self.assertIsInstance(
           loader_impl.get_train_op(meta_graph_def), ops.Tensor)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTrainOpGroup(self):
     export_dir = self._get_export_dir("test_train_op_group")
     builder = saved_model_builder._SavedModelBuilder(export_dir)
@@ -910,7 +911,7 @@ class SavedModelTest(SavedModelTestBase):
       self.assertIsInstance(
           loader_impl.get_train_op(meta_graph_def), ops.Operation)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTrainOpAfterVariables(self):
     export_dir = self._get_export_dir("test_train_op_after_variables")
     builder = saved_model_builder._SavedModelBuilder(export_dir)
@@ -1029,7 +1030,7 @@ class SavedModelTest(SavedModelTestBase):
       self._validate_assets(export_dir, bar_graph.asset_file_def, "foo.txt",
                             "content_foo", "asset_file_tensor:0")
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testOp(self):
     export_dir = self._get_export_dir("test_op")
     builder = saved_model_builder._SavedModelBuilder(export_dir)
@@ -1492,7 +1493,7 @@ class SavedModelV1Test(SavedModelTestBase):
     self.assertIn("T", node_def.attr)
     self.assertIn("Tout", node_def.attr)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testLegacyInitOp(self):
     export_dir = self._get_export_dir("test_legacy_init_op")
     builder = saved_model_builder.SavedModelBuilder(export_dir)
diff --git a/tensorflow/python/saved_model/signature_def_utils_test.py b/tensorflow/python/saved_model/signature_def_utils_test.py
index 53c452359f..d1347eb017 100644
--- a/tensorflow/python/saved_model/signature_def_utils_test.py
+++ b/tensorflow/python/saved_model/signature_def_utils_test.py
@@ -423,6 +423,7 @@ class SignatureDefUtilsTest(test.TestCase):
         {},
         signature_constants.PREDICT_METHOD_NAME)
 
+  @test_util.run_v1_only("b/120545219")
   def testOpSignatureDef(self):
     key = "adding_1_and_2_key"
     add_op = math_ops.add(1, 2, name="adding_1_and_2")
@@ -430,6 +431,7 @@ class SignatureDefUtilsTest(test.TestCase):
     self.assertIn(key, signature_def.outputs)
     self.assertEqual(add_op.name, signature_def.outputs[key].name)
 
+  @test_util.run_v1_only("b/120545219")
   def testLoadOpFromSignatureDef(self):
     key = "adding_1_and_2_key"
     add_op = math_ops.add(1, 2, name="adding_1_and_2")
diff --git a/tensorflow/python/saved_model/utils_test.py b/tensorflow/python/saved_model/utils_test.py
index 0888dcb411..2afe8abfd6 100644
--- a/tensorflow/python/saved_model/utils_test.py
+++ b/tensorflow/python/saved_model/utils_test.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import test
@@ -32,6 +33,7 @@ from tensorflow.python.saved_model import utils
 
 class UtilsTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testBuildTensorInfoOp(self):
     x = constant_op.constant(1, name="x")
     y = constant_op.constant(2, name="y")
@@ -41,6 +43,7 @@ class UtilsTest(test.TestCase):
     self.assertEqual(types_pb2.DT_INVALID, z_op_info.dtype)
     self.assertEqual(0, len(z_op_info.tensor_shape.dim))
 
+  @test_util.run_v1_only("b/120545219")
   def testBuildTensorInfoDefunOp(self):
     @function.defun
     def my_init_fn(x, y):
@@ -54,6 +57,7 @@ class UtilsTest(test.TestCase):
     self.assertEqual(types_pb2.DT_INVALID, init_op_info.dtype)
     self.assertEqual(0, len(init_op_info.tensor_shape.dim))
 
+  @test_util.run_v1_only("b/120545219")
   def testBuildTensorInfoDense(self):
     x = array_ops.placeholder(dtypes.float32, 1, name="x")
     x_tensor_info = utils.build_tensor_info(x)
@@ -62,6 +66,7 @@ class UtilsTest(test.TestCase):
     self.assertEqual(1, len(x_tensor_info.tensor_shape.dim))
     self.assertEqual(1, x_tensor_info.tensor_shape.dim[0].size)
 
+  @test_util.run_v1_only("b/120545219")
   def testBuildTensorInfoSparse(self):
     x = array_ops.sparse_placeholder(dtypes.float32, [42, 69], name="x")
     x_tensor_info = utils.build_tensor_info(x)
@@ -76,6 +81,7 @@ class UtilsTest(test.TestCase):
     self.assertEqual(42, x_tensor_info.tensor_shape.dim[0].size)
     self.assertEqual(69, x_tensor_info.tensor_shape.dim[1].size)
 
+  @test_util.run_v1_only("b/120545219")
   def testGetTensorFromInfoDense(self):
     expected = array_ops.placeholder(dtypes.float32, 1, name="x")
     tensor_info = utils.build_tensor_info(expected)
@@ -83,6 +89,7 @@ class UtilsTest(test.TestCase):
     self.assertIsInstance(actual, ops.Tensor)
     self.assertEqual(expected.name, actual.name)
 
+  @test_util.run_v1_only("b/120545219")
   def testGetTensorFromInfoSparse(self):
     expected = array_ops.sparse_placeholder(dtypes.float32, name="x")
     tensor_info = utils.build_tensor_info(expected)
@@ -122,6 +129,7 @@ class UtilsTest(test.TestCase):
                                                  import_scope="foo")
       self.assertEqual(expected.name, actual.name)
 
+  @test_util.run_v1_only("b/120545219")
   def testGetTensorFromInfoRaisesErrors(self):
     expected = array_ops.placeholder(dtypes.float32, 1, name="x")
     tensor_info = utils.build_tensor_info(expected)
diff --git a/tensorflow/python/tools/freeze_graph_test.py b/tensorflow/python/tools/freeze_graph_test.py
index efdf7dd2cf..de2672db3c 100644
--- a/tensorflow/python/tools/freeze_graph_test.py
+++ b/tensorflow/python/tools/freeze_graph_test.py
@@ -161,11 +161,11 @@ class FreezeGraphTest(test_util.TensorFlowTestCase):
             },)
         builder.save(as_text=True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testFreezeGraphV1(self):
     self._testFreezeGraph(saver_pb2.SaverDef.V1)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testFreezeGraphV2(self):
     self._testFreezeGraph(saver_pb2.SaverDef.V2)
 
diff --git a/tensorflow/python/training/adagrad_test.py b/tensorflow/python/training/adagrad_test.py
index da26fcdb7f..1e2d29b337 100644
--- a/tensorflow/python/training/adagrad_test.py
+++ b/tensorflow/python/training/adagrad_test.py
@@ -306,7 +306,7 @@ class AdagradOptimizerTest(test.TestCase):
             np.array([2.715679168701172, 3.715679168701172]),
             self.evaluate(var1))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testDynamicShapeVariable_Ok(self):
     with self.cached_session():
       v = variable_scope.get_variable("v", initializer=constant_op.constant(1.),
@@ -315,7 +315,7 @@ class AdagradOptimizerTest(test.TestCase):
       # Creating optimizer should cause no exception.
       adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testDynamicShapeVariableWithCallableInit(self):
     var0 = variable_scope.get_variable("var0",
                                        initializer=constant_op.constant(1.),
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index 08942c5bb6..1af27626ba 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -1122,6 +1122,7 @@ class StepCounterHookTest(test.TestCase):
         self.assertGreater(summary_value.simple_value, 0)
 
 
+@test_util.run_v1_only('b/120545219')
 class SummarySaverHookTest(test.TestCase):
 
   def setUp(self):
@@ -1148,13 +1149,11 @@ class SummarySaverHookTest(test.TestCase):
       basic_session_run_hooks.SummarySaverHook(
           scaffold=monitored_session.Scaffold(), summary_op=self.summary_op)
 
-  @test_util.run_deprecated_v1
   def test_raise_in_both_secs_and_steps(self):
     with self.assertRaises(ValueError):
       basic_session_run_hooks.SummarySaverHook(
           save_secs=10, save_steps=20, summary_writer=self.summary_writer)
 
-  @test_util.run_deprecated_v1
   def test_raise_in_none_secs_and_steps(self):
     with self.assertRaises(ValueError):
       basic_session_run_hooks.SummarySaverHook(
@@ -1405,6 +1404,7 @@ class FinalOpsHookTest(test.TestCase):
                              hook.final_ops_values.tolist())
 
 
+@test_util.run_v1_only('b/120545219')
 class ResourceSummarySaverHookTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py
index 21ad3df1c8..c481547139 100644
--- a/tensorflow/python/training/checkpoint_ops_test.py
+++ b/tensorflow/python/training/checkpoint_ops_test.py
@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import partitioned_variables
@@ -33,6 +34,7 @@ from tensorflow.python.training import checkpoint_ops
 from tensorflow.python.training import saver as saver_lib
 
 
+@test_util.run_v1_only('b/120545219')
 class LoadAndRemapWrappersTest(test.TestCase):
   """Tests for the functionality of the Python wrappers."""
 
diff --git a/tensorflow/python/training/checkpointable/data_structures_test.py b/tensorflow/python/training/checkpointable/data_structures_test.py
index 9cefd942ac..bcec6e0100 100644
--- a/tensorflow/python/training/checkpointable/data_structures_test.py
+++ b/tensorflow/python/training/checkpointable/data_structures_test.py
@@ -73,6 +73,7 @@ class HasList(training.Model):
 class ListTests(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
   def testTracking(self):
     model = HasList()
     output = model(array_ops.ones([32, 2]))
@@ -105,6 +106,7 @@ class ListTests(test.TestCase):
     self.assertIn(v, model.trainable_variables)
     self.assertNotIn(v, model.non_trainable_variables)
 
+  @test_util.run_v1_only("b/120545219")
   def testUpdatesForwarded(self):
     with context.graph_mode():
       model = HasList()
@@ -121,6 +123,7 @@ class ListTests(test.TestCase):
       self.assertEqual(0, len(model.updates))
 
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
   def testLossesForwarded(self):
     model = HasList()
     model_input = array_ops.ones([32, 2])
@@ -295,6 +298,7 @@ class HasMapping(training.Model):
 class MappingTests(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
   def testTracking(self):
     model = HasMapping()
     output = model(array_ops.ones([32, 2]))
diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py
index 78047eda90..3bdab4cb0b 100644
--- a/tensorflow/python/training/checkpointable/util_test.py
+++ b/tensorflow/python/training/checkpointable/util_test.py
@@ -616,6 +616,7 @@ class CheckpointingTests(test.TestCase):
 
   # pylint: disable=cell-var-from-loop
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_v1_only("b/120545219")
   def testWithDefun(self):
     num_training_steps = 2
     checkpoint_directory = self.get_temp_dir()
diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py
index a3d268a017..d89f5f3bbd 100644
--- a/tensorflow/python/training/input_test.py
+++ b/tensorflow/python/training/input_test.py
@@ -872,19 +872,19 @@ class BatchTest(test_lib.TestCase):
       for thread in threads:
         thread.join()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInput(self):
     self._testKeepInputHelper(1, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(1, True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInput(self):
     self._testKeepInputHelper(5, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(5, True)
 
@@ -1482,19 +1482,19 @@ class BatchJoinTest(test_lib.TestCase):
       for thread in threads:
         thread.join()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInput(self):
     self._testKeepInputHelper(1, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(1, True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInput(self):
     self._testKeepInputHelper(5, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(5, True)
 
@@ -1905,19 +1905,19 @@ class ShuffleBatchTest(test_lib.TestCase):
       for thread in threads:
         thread.join()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInput(self):
     self._testKeepInputHelper(1, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(1, True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInput(self):
     self._testKeepInputHelper(5, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(5, True)
 
@@ -2309,19 +2309,19 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
       for thread in threads:
         thread.join()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInput(self):
     self._testKeepInputHelper(1, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSingleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(1, True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInput(self):
     self._testKeepInputHelper(5, False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultipleThreadKeepInputEnqueueMany(self):
     self._testKeepInputHelper(5, True)
 
diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py
index 9de5bc8168..1029d4cea8 100644
--- a/tensorflow/python/training/learning_rate_decay_test.py
+++ b/tensorflow/python/training/learning_rate_decay_test.py
@@ -101,7 +101,7 @@ class LRDecayTest(test_util.TensorFlowTestCase):
     self.assertAllClose(self.evaluate(decayed_lr), 0.001, 1e-6)
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPiecewiseConstantEdgeCases(self):
     x_int = resource_variable_ops.ResourceVariable(
         0, dtype=variables.dtypes.int32)
diff --git a/tensorflow/python/training/localhost_cluster_performance_test.py b/tensorflow/python/training/localhost_cluster_performance_test.py
index 7c097b943d..c4cbc8a55d 100644
--- a/tensorflow/python/training/localhost_cluster_performance_test.py
+++ b/tensorflow/python/training/localhost_cluster_performance_test.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.python.client import session as session_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
@@ -34,6 +35,7 @@ from tensorflow.python.training import device_setter
 
 class CreateLocalClusterTest(test.TestCase):
 
+  @test_util.run_v1_only("b/120545219")
   def testCreateLocalCluster(self):
     workers, _ = test.create_local_cluster(num_workers=2, num_ps=2)
     worker_sessions = [session_lib.Session(w.target) for w in workers]
diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py
index 9dbcfa52b7..99ee9ea7e2 100644
--- a/tensorflow/python/training/monitored_session_test.py
+++ b/tensorflow/python/training/monitored_session_test.py
@@ -541,6 +541,7 @@ class WrappedSessionTest(test.TestCase):
       self.assertFalse(wrapped_sess1.should_stop())
       self.assertTrue(wrapped_sess1.should_stop())
 
+  @test_util.run_deprecated_v1
   def test_close_twice(self):
     with self.cached_session() as sess:
       wrapped_sess = monitored_session._WrappedSession(sess)
diff --git a/tensorflow/python/training/moving_averages_test.py b/tensorflow/python/training/moving_averages_test.py
index b15f7377f0..03bcde9c84 100644
--- a/tensorflow/python/training/moving_averages_test.py
+++ b/tensorflow/python/training/moving_averages_test.py
@@ -219,38 +219,38 @@ class ExponentialMovingAverageTest(test.TestCase):
                         (10.0 + 30.0) * (1 - dk)) / _Scale(dk, 2), dim)
     self.assertAllClose(expected, self.evaluate(avg2))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNoNumUpdates_Scalar(self):
     with self.cached_session():
       ema = moving_averages.ExponentialMovingAverage(0.25)
       self._CheckDecay(ema, actual_decay=0.25, dim=1)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNoNumUpdates_Scalar_Debias(self):
     with self.cached_session():
       ema = moving_averages.ExponentialMovingAverage(0.25, zero_debias=True)
       self._CheckDecay(ema, actual_decay=0.25, dim=1)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNoNumUpdates_Vector(self):
     with self.cached_session():
       ema = moving_averages.ExponentialMovingAverage(0.25)
       self._CheckDecay(ema, actual_decay=0.25, dim=5)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNoNumUpdates_Vector_Debias(self):
     with self.cached_session():
       ema = moving_averages.ExponentialMovingAverage(0.25, zero_debias=True)
       self._CheckDecay(ema, actual_decay=0.25, dim=5)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNumUpdates_Scalar(self):
     with self.cached_session():
       # With num_updates 1, the decay applied is 0.1818
       ema = moving_averages.ExponentialMovingAverage(0.25, num_updates=1)
       self._CheckDecay(ema, actual_decay=0.181818, dim=1)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNumUpdates_Scalar_Debias(self):
     with self.cached_session():
       # With num_updates 1, the decay applied is 0.1818
@@ -258,14 +258,14 @@ class ExponentialMovingAverageTest(test.TestCase):
           0.25, num_updates=1, zero_debias=True)
       self._CheckDecay(ema, actual_decay=0.181818, dim=1)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNumUpdates_Vector(self):
     with self.cached_session():
       # With num_updates 1, the decay applied is 0.1818
       ema = moving_averages.ExponentialMovingAverage(0.25, num_updates=1)
       self._CheckDecay(ema, actual_decay=0.181818, dim=5)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNumUpdates_Vector_Debias(self):
     with self.cached_session():
       # With num_updates 1, the decay applied is 0.1818
@@ -273,7 +273,7 @@ class ExponentialMovingAverageTest(test.TestCase):
           0.25, num_updates=1, zero_debias=True)
       self._CheckDecay(ema, actual_decay=0.181818, dim=5)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesWithControlDeps(self):
     with self.cached_session() as sess:
       v0 = variables.Variable(0, name="v0")
@@ -299,7 +299,7 @@ class ExponentialMovingAverageTest(test.TestCase):
       self.assertEqual([17.5], self.evaluate(v1_avg))
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBasicEager(self):
     v0 = variables.Variable(1.0)
     v1 = variables.Variable(2.0)
@@ -355,11 +355,11 @@ class ExponentialMovingAverageTest(test.TestCase):
       self.assertEqual(ema.average(v1).op.name, ema.average_name(v1))
       self.assertEqual(ema.average(tensor2).op.name, ema.average_name(tensor2))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNames(self):
     self.averageVariablesNamesHelper(zero_debias=True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNamesNoDebias(self):
     self.averageVariablesNamesHelper(zero_debias=False)
 
@@ -405,15 +405,15 @@ class ExponentialMovingAverageTest(test.TestCase):
         self.assertEqual(
             ema.average(tensor2).op.name, ema.average_name(tensor2))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNamesRespectScope(self):
     self.averageVariablesNamesRespectScopeHelper(zero_debias=True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesNamesRespectScopeNoDebias(self):
     self.averageVariablesNamesRespectScopeHelper(zero_debias=False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSubsetAverageVariablesNames(self):
     with self.cached_session():
       v0 = variables.Variable(10.0, name="v0")
@@ -442,7 +442,7 @@ class ExponentialMovingAverageTest(test.TestCase):
       self.assertEqual(ema.average(v1).op.name, ema.average_name(v1))
       self.assertEqual(ema.average(tensor2).op.name, ema.average_name(tensor2))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAverageVariablesDeviceAssignment(self):
     with ops.device("/job:dev_v0"):
       v0 = variables.Variable(10.0, name="v0")
diff --git a/tensorflow/python/training/quantize_training_test.py b/tensorflow/python/training/quantize_training_test.py
index 62e783f200..2352af7e99 100644
--- a/tensorflow/python/training/quantize_training_test.py
+++ b/tensorflow/python/training/quantize_training_test.py
@@ -53,7 +53,7 @@ class PywrapQuantizeTrainingTest(test.TestCase):
 
   # Test that save/restoring works for EMA variables generated in the
   # quantized training rewrite.
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testQuantizedSaveRestore(self):
     save_path = os.path.join(self.get_temp_dir(), 'quantized_save_restore')
 
diff --git a/tensorflow/python/training/queue_runner_test.py b/tensorflow/python/training/queue_runner_test.py
index 4113cecf55..2f6e924f98 100644
--- a/tensorflow/python/training/queue_runner_test.py
+++ b/tensorflow/python/training/queue_runner_test.py
@@ -41,7 +41,7 @@ _MockOp = collections.namedtuple("MockOp", ["name"])
 
 class QueueRunnerTest(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBasic(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -62,7 +62,7 @@ class QueueRunnerTest(test.TestCase):
       # The variable should be 3.
       self.assertEqual(3, self.evaluate(var))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testTwoOps(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -132,7 +132,7 @@ class QueueRunnerTest(test.TestCase):
       with self.assertRaisesRegexp(errors_impl.OutOfRangeError, "is closed"):
         self.evaluate(dequeue1)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRespectCoordShouldStop(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -221,7 +221,7 @@ class QueueRunnerTest(test.TestCase):
       new_threads = qr.create_threads(sess, coord=coord)
       self.assertEqual([], new_threads)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testThreads(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 5d621ba4ff..95c21cb815 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -390,7 +390,7 @@ class SaverTest(test.TestCase):
             ValueError, "The passed save_path is not a valid checkpoint:"):
           save.restore(sess, "invalid path")
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInt64(self):
     save_path = os.path.join(self.get_temp_dir(), "int64")
 
@@ -466,7 +466,7 @@ class SaverTest(test.TestCase):
       # Verify non-duplicate names work.
       saver_module.Saver({"v0": v0, "v2": v2.saveable})
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testBasicsWithListOfVariables(self):
     save_path = os.path.join(self.get_temp_dir(), "basics_with_list")
 
@@ -667,7 +667,7 @@ class SaverTest(test.TestCase):
       self.assertAllClose(1.0, one.eval())
       self.assertAllClose([2.0, 2.0, 2.0], twos.eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testReshape(self):
     save_path = os.path.join(self.get_temp_dir(), "variables_reshape")
     with session.Session("", graph=ops_lib.Graph()) as sess:
@@ -726,7 +726,7 @@ class SaverTest(test.TestCase):
   def testSaveWithGlobalStepWithPadding(self):
     self.testSaveWithGlobalStep(pad_step_number=True)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSaveToNonexistingPath(self):
     file_io.write_string_to_file(
         os.path.join(self.get_temp_dir(), "actually_a_file"), "")
@@ -1607,7 +1607,7 @@ class SaveRestoreWithVariableNameMap(test.TestCase):
       self.assertEqual(20.0, self.evaluate(v1))
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testNonReshapeResourceVariable(self):
     self._testNonReshape(resource_variable_ops.ResourceVariable)
 
@@ -1622,7 +1622,7 @@ class MetaGraphTest(test.TestCase):
     gfile.MakeDirs(test_dir)
     return test_dir
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testAddCollectionDef(self):
     test_dir = self._get_test_dir("good_collection")
     filename = os.path.join(test_dir, "metafile")
@@ -1772,13 +1772,13 @@ class MetaGraphTest(test.TestCase):
       v1 = sess.graph.get_tensor_by_name("v1:0")
       self.assertEqual(11.0, v1.eval())
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testMultiSaverCollection(self):
     test_dir = self._get_test_dir("saver_collection")
     self._testMultiSaverCollectionSave(test_dir)
     self._testMultiSaverCollectionRestore(test_dir)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testClearExtraneousSavers(self):
     test_dir = self._get_test_dir("clear_extraneous_savers")
     filename = os.path.join(test_dir, "metafile")
@@ -1866,7 +1866,7 @@ class MetaGraphTest(test.TestCase):
                                                lambda e: "does not exist"):
         saver_module.import_meta_graph(filename)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSliceVariable(self):
     test_dir = self._get_test_dir("slice_saver")
     filename = os.path.join(test_dir, "metafile")
@@ -2122,7 +2122,7 @@ class MetaGraphTest(test.TestCase):
                                       lambda: math_ops.multiply(x, -1.0))))
     # pylint: enable=g-long-lambda
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testStrippedOpListDef(self):
     with self.cached_session():
       # Creates a graph.
@@ -2988,7 +2988,7 @@ class CheckpointableCompatibilityTests(test.TestCase):
       # exception" block in Python 3.
       self.assertNotIn("NewCheckpointReader", cs.exception.message)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testGraphChangedForRestoreErrorRaised(self):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
@@ -3010,7 +3010,7 @@ class CheckpointableCompatibilityTests(test.TestCase):
             "a mismatch between the current graph and the graph"):
           a_saver.restore(sess=sess, save_path=save_path)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testLoadFromObjectBasedGraph(self):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
diff --git a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py
index 1b2d588f44..ff3fab9f37 100644
--- a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py
+++ b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py
@@ -34,7 +34,7 @@ class SameVariablesNoClearTest(test.TestCase):
   # TODO(b/34465411): Starting multiple servers with different configurations
   # in the same test is flaky. Move this test case back into
   # "server_lib_test.py" when this is no longer the case.
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSameVariablesNoClear(self):
     server = server_lib.Server.create_local_server()
 
diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py
index 323e94c257..92cdc1c4ad 100644
--- a/tensorflow/python/training/server_lib_test.py
+++ b/tensorflow/python/training/server_lib_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import math_ops
@@ -55,6 +56,7 @@ class GrpcServerTest(test.TestCase):
       self.assertAllEqual([[4]], sess.run(e))
     # TODO(mrry): Add `server.stop()` and `server.join()` when these work.
 
+  @test_util.run_v1_only("b/120545219")
   def testMultipleSessions(self):
     server = self._cached_server
 
@@ -73,6 +75,7 @@ class GrpcServerTest(test.TestCase):
     # TODO(mrry): Add `server.stop()` and `server.join()` when these work.
 
   # Verifies various reset failures.
+  @test_util.run_v1_only("b/120545219")
   def testResetFails(self):
     # Creates variable with container name.
     with ops.container("test0"):
@@ -146,6 +149,7 @@ class GrpcServerTest(test.TestCase):
       self.assertEqual(0.5, min_val)
       self.assertEqual(0.5, max_val)
 
+  @test_util.run_v1_only("b/120545219")
   def testCloseCancelsBlockingOperation(self):
     server = self._cached_server
     sess = session.Session(server.target, config=self._useRPCConfig())
@@ -207,6 +211,7 @@ class GrpcServerTest(test.TestCase):
               "local": ["localhost"]
           }, job_name="local", task_index=0)
 
+  @test_util.run_v1_only("b/120545219")
   def testTimeoutRaisesException(self):
     server = self._cached_server
     q = data_flow_ops.FIFOQueue(1, [dtypes.float32])
@@ -241,6 +246,7 @@ class GrpcServerTest(test.TestCase):
       queue_runner_impl.start_queue_runners(sess)
       sess.run(var.assign(3.0))
 
+  @test_util.run_v1_only("b/120545219")
   def testIsolateSessionState(self):
     server = self._cached_server
 
@@ -296,6 +302,7 @@ class GrpcServerTest(test.TestCase):
     self.assertAllEqual(37, isolate_sess_0.run(v))
     self.assertAllEqual([19, 86], isolate_sess_1.run(v))
 
+  @test_util.run_v1_only("b/120545219")
   def testShapeChangingIsolateState(self):
     server = self._cached_server
     sharing_config = config_pb2.ConfigProto(isolate_session_state=False)
diff --git a/tensorflow/python/training/session_manager_test.py b/tensorflow/python/training/session_manager_test.py
index 4294ffa851..c9a0c56ffc 100644
--- a/tensorflow/python/training/session_manager_test.py
+++ b/tensorflow/python/training/session_manager_test.py
@@ -69,7 +69,7 @@ class SessionManagerTest(test.TestCase):
           "", init_fn=lambda sess: sess.run(v.initializer))
       self.assertAllClose([125], sess.run(v))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPrepareSessionFails(self):
     checkpoint_dir = os.path.join(self.get_temp_dir(), "prepare_session")
     checkpoint_dir2 = os.path.join(self.get_temp_dir(), "prepare_session2")
@@ -154,7 +154,7 @@ class SessionManagerTest(test.TestCase):
               sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
       self.assertEquals(1, sess.run(v))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRecoverSession(self):
     # Create a checkpoint.
     checkpoint_dir = os.path.join(self.get_temp_dir(), "recover_session")
@@ -187,6 +187,7 @@ class SessionManagerTest(test.TestCase):
           checkpoint_filename_with_path=checkpoint_management.latest_checkpoint(
               checkpoint_dir))
 
+  @test_util.run_v1_only("b/120545219")
   def testWaitForSessionReturnsNoneAfterTimeout(self):
     with ops.Graph().as_default():
       variables.VariableV1(1, name="v")
@@ -209,7 +210,7 @@ class SessionManagerTest(test.TestCase):
               variables.global_variables()),
           local_init_op=None)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRecoverSessionWithReadyForLocalInitOp(self):
     # Create a checkpoint.
     checkpoint_dir = os.path.join(self.get_temp_dir(),
@@ -263,7 +264,7 @@ class SessionManagerTest(test.TestCase):
       self.assertEquals(1, sess.run(v))
       self.assertEquals(1, sess.run(w))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRecoverSessionWithReadyForLocalInitOpFailsToReadyLocal(self):
     # We use ready_for_local_init_op=tf.report_uninitialized_variables(),
     # which causes recover_session to not run local_init_op, and to return
@@ -320,7 +321,7 @@ class SessionManagerTest(test.TestCase):
               sess.graph.get_tensor_by_name("w:0")).eval(session=sess))
       self.assertEquals(1, sess.run(v))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRecoverSessionNoChkptStillRunsLocalInitOp(self):
     # This test checks for backwards compatibility.
     # In particular, we continue to ensure that recover_session will execute
@@ -349,7 +350,7 @@ class SessionManagerTest(test.TestCase):
               sess.graph.get_tensor_by_name("w:0")).eval(session=sess))
       self.assertEquals(1, sess.run(w))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRecoverSessionFailsStillRunsLocalInitOp(self):
     # Create a checkpoint.
     checkpoint_dir = os.path.join(
@@ -393,7 +394,7 @@ class SessionManagerTest(test.TestCase):
               sess.graph.get_tensor_by_name("w:0")).eval(session=sess))
       self.assertEquals(1, sess.run(w))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWaitForSessionLocalInit(self):
     server = server_lib.Server.create_local_server()
     with ops.Graph().as_default() as graph:
@@ -445,7 +446,7 @@ class SessionManagerTest(test.TestCase):
         # because of overly restrictive ready_for_local_init_op
         sm.wait_for_session("", max_wait_secs=3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWaitForSessionInsufficientReadyForLocalInitCheck(self):
     with ops.Graph().as_default() as graph:
       v = variables.VariableV1(1, name="v")
@@ -463,7 +464,7 @@ class SessionManagerTest(test.TestCase):
                                  "Session was not ready after waiting.*"):
       sm.wait_for_session("", max_wait_secs=3)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPrepareSessionWithReadyForLocalInitOp(self):
     with ops.Graph().as_default():
       v = variables.VariableV1(1, name="v")
@@ -503,7 +504,7 @@ class SessionManagerTest(test.TestCase):
       self.assertEquals(1, sess.run(w))
       self.assertEquals(3, sess.run(x))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPrepareSessionWithPartialInitOp(self):
     with ops.Graph().as_default():
       v = variables.VariableV1(1, name="v")
@@ -570,7 +571,7 @@ class SessionManagerTest(test.TestCase):
       self.assertEquals(1, sess.run(w_res))
       self.assertEquals(3, sess.run(x_res))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPrepareSessionWithCyclicInitializer(self):
     # Regression test. Previously Variable._build_initializer_expr would enter
     # into an infinite recursion when the variable's initial_value involved
@@ -644,7 +645,7 @@ class SessionManagerTest(test.TestCase):
           "Init operations did not make model ready for local_init"):
         sm2.prepare_session("", init_op=None)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPrepareSessionWithInsufficientReadyForLocalInitCheck(self):
     with ops.Graph().as_default():
       v = variables.VariableV1(1, name="v")
@@ -697,7 +698,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
           "", init_fn=lambda sess: sess.run(v.initializer))
       self.assertAllClose([125], sess.run(v))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testPrepareSessionFails(self):
     checkpoint_dir = os.path.join(self.get_temp_dir(), "prepare_session")
     checkpoint_dir2 = os.path.join(self.get_temp_dir(), "prepare_session2")
@@ -759,7 +760,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
           variables.is_variable_initialized(
               sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRecoverSession(self):
     # Create a checkpoint.
     checkpoint_dir = os.path.join(self.get_temp_dir(), "recover_session")
@@ -798,6 +799,7 @@ class ObsoleteSessionManagerTest(test.TestCase):
               sess.graph.get_tensor_by_name("v:0")).eval(session=sess))
       self.assertEquals(1, sess.run(v))
 
+  @test_util.run_v1_only("b/120545219")
   def testWaitForSessionReturnsNoneAfterTimeout(self):
     with ops.Graph().as_default():
       variables.VariableV1(1, name="v")
diff --git a/tensorflow/python/training/slot_creator_test.py b/tensorflow/python/training/slot_creator_test.py
index 1f26aaa434..f1f0d58a69 100644
--- a/tensorflow/python/training/slot_creator_test.py
+++ b/tensorflow/python/training/slot_creator_test.py
@@ -32,7 +32,7 @@ from tensorflow.python.training import slot_creator
 
 class SlotCreatorTest(test.TestCase):
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCreateSlotFromVariable(self):
     with self.cached_session():
       v = variables.Variable([1.0, 2.5], name="var")
@@ -73,7 +73,7 @@ class SlotCreatorTest(test.TestCase):
       self.assertEqual(dtypes.float64, slot.dtype.base_dtype)
       self.assertAllEqual([0.0, 0.0], self.evaluate(slot))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCreateZerosSlotFromDynamicShapedVariable(self):
     with self.cached_session():
       dyn_shape = constant_op.constant([2], dtype=dtypes.int32)
@@ -125,7 +125,7 @@ class SlotCreatorTest(test.TestCase):
       self.assertEqual(dtypes.float64, slot.dtype.base_dtype)
       self.assertAllEqual([0.0, 0.0], self.evaluate(slot))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testCreateSlotFromVariableRespectsScope(self):
     # See discussion on #2740.
     with self.cached_session():
diff --git a/tensorflow/python/training/supervisor_test.py b/tensorflow/python/training/supervisor_test.py
index f6505acc9a..180ddb5287 100644
--- a/tensorflow/python/training/supervisor_test.py
+++ b/tensorflow/python/training/supervisor_test.py
@@ -421,7 +421,7 @@ class SupervisorTest(test.TestCase):
       with self.assertRaisesRegexp(RuntimeError, "requires a summary writer"):
         sv.summary_computed(sess, sess.run(summ))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testLogdirButExplicitlyNoSummaryWriter(self):
     logdir = self._test_dir("explicit_no_summary_writer")
     with ops.Graph().as_default():
@@ -507,7 +507,7 @@ class SupervisorTest(test.TestCase):
       sv = supervisor.Supervisor(logdir="", session_manager=sm)
       sv.prepare_or_wait_for_session("")
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitOp(self):
     logdir = self._test_dir("default_init_op")
     with ops.Graph().as_default():
@@ -517,7 +517,7 @@ class SupervisorTest(test.TestCase):
       self.assertAllClose([1.0, 2.0, 3.0], sess.run(v))
       sv.stop()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitFn(self):
     logdir = self._test_dir("default_init_op")
     with ops.Graph().as_default():
@@ -531,7 +531,7 @@ class SupervisorTest(test.TestCase):
       self.assertAllClose([1.0, 2.0, 3.0], sess.run(v))
       sv.stop()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testInitOpWithFeedDict(self):
     logdir = self._test_dir("feed_dict_init_op")
     with ops.Graph().as_default():
@@ -545,7 +545,7 @@ class SupervisorTest(test.TestCase):
       self.assertAllClose([1.0, 2.0, 3.0], sess.run(v))
       sv.stop()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testReadyForLocalInitOp(self):
     server = server_lib.Server.create_local_server()
     logdir = self._test_dir("default_ready_for_local_init_op")
@@ -588,7 +588,7 @@ class SupervisorTest(test.TestCase):
     sv0.stop()
     sv1.stop()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testReadyForLocalInitOpRestoreFromCheckpoint(self):
     server = server_lib.Server.create_local_server()
     logdir = self._test_dir("ready_for_local_init_op_restore")
@@ -720,7 +720,7 @@ class SupervisorTest(test.TestCase):
                                    "Variables not initialized: w"):
         sv.prepare_or_wait_for_session(server.target)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSetupFail(self):
     logdir = self._test_dir("setup_fail")
     with ops.Graph().as_default():
@@ -731,7 +731,7 @@ class SupervisorTest(test.TestCase):
       variables.VariableV1([1.0, 2.0, 3.0], name="v")
       supervisor.Supervisor(logdir=logdir, is_chief=False)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testDefaultGlobalStep(self):
     logdir = self._test_dir("default_global_step")
     with ops.Graph().as_default():
@@ -741,7 +741,7 @@ class SupervisorTest(test.TestCase):
       self.assertEquals(287, sess.run(sv.global_step))
       sv.stop()
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testRestoreFromMetaGraph(self):
     logdir = self._test_dir("restore_from_meta_graph")
     with ops.Graph().as_default():
@@ -763,7 +763,7 @@ class SupervisorTest(test.TestCase):
   # This test is based on the fact that the standard services start
   # right away and get to run once before sv.stop() returns.
   # We still sleep a bit to make the test robust.
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testStandardServicesWithoutGlobalStep(self):
     logdir = self._test_dir("standard_services_without_global_step")
     # Create a checkpoint.
@@ -814,7 +814,7 @@ class SupervisorTest(test.TestCase):
 
   # Same as testStandardServicesNoGlobalStep but with a global step.
   # We should get a summary about the step time.
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testStandardServicesWithGlobalStep(self):
     logdir = self._test_dir("standard_services_with_global_step")
     # Create a checkpoint.
diff --git a/tensorflow/python/training/sync_replicas_optimizer_test.py b/tensorflow/python/training/sync_replicas_optimizer_test.py
index 1ef8756ef6..428583d048 100644
--- a/tensorflow/python/training/sync_replicas_optimizer_test.py
+++ b/tensorflow/python/training/sync_replicas_optimizer_test.py
@@ -22,6 +22,7 @@ import time
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.framework.test_util import create_local_cluster
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -88,6 +89,7 @@ class SyncReplicasOptimizerTest(test.TestCase):
   def _run(self, train_op, sess):
     sess.run(train_op)
 
+  @test_util.run_v1_only("b/120545219")
   def test2Workers(self):
     num_workers = 2
     replicas_to_aggregate = 2
@@ -178,6 +180,7 @@ class SyncReplicasOptimizerTest(test.TestCase):
                         sessions[1].run(var_1_g_1))
 
   # 3 workers and one of them is backup.
+  @test_util.run_v1_only("b/120545219")
   def test3Workers1Backup(self):
     num_workers = 3
     replicas_to_aggregate = 2
@@ -266,6 +269,7 @@ class SyncReplicasOptimizerHookTest(test.TestCase):
                                  "apply_gradient should be called"):
       hook.begin()
 
+  @test_util.run_v1_only("b/120545219")
   def testCanCreatedBeforeMinimizeCalled(self):
     """This behavior is required to be integrated with Estimators."""
     opt = training.SyncReplicasOptimizer(
@@ -278,6 +282,7 @@ class SyncReplicasOptimizerHookTest(test.TestCase):
     opt.minimize(v, global_step=global_step)
     hook.begin()
 
+  @test_util.run_v1_only("b/120545219")
   def testFetchVariableList(self):
     opt = training.SyncReplicasOptimizer(
         opt=adam.AdamOptimizer(0.01),
diff --git a/tensorflow/python/training/tensorboard_logging_test.py b/tensorflow/python/training/tensorboard_logging_test.py
index 5af6a0aa7b..5088ab07e5 100644
--- a/tensorflow/python/training/tensorboard_logging_test.py
+++ b/tensorflow/python/training/tensorboard_logging_test.py
@@ -25,6 +25,7 @@ import tempfile
 import time
 
 from tensorflow.core.util import event_pb2
+from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary_iterator
@@ -32,6 +33,7 @@ from tensorflow.python.summary.writer import writer
 from tensorflow.python.training import tensorboard_logging
 
 
+@test_util.run_v1_only("b/120545219")
 class EventLoggingTest(test.TestCase):
 
   def setUp(self):
@@ -85,6 +87,7 @@ class EventLoggingTest(test.TestCase):
                                   (event_pb2.LogMessage.ERROR, "format")])
     self.assertEqual(2, self.logged_message_count)
 
+  @test_util.run_v1_only("b/120545219")
   def testVerbosity(self):
     tensorboard_logging.set_summary_writer(self._sw)
     tensorboard_logging.set_verbosity(tensorboard_logging.ERROR)
@@ -112,6 +115,7 @@ class EventLoggingTest(test.TestCase):
     tensorboard_logging.warn("this should work")
     self.assertEqual(1, self.logged_message_count)
 
+  @test_util.run_v1_only("b/120545219")
   def testSummaryWriterFailsAfterClear(self):
     tensorboard_logging._clear_summary_writer()
     with self.assertRaises(RuntimeError):
diff --git a/tensorflow/python/training/training_ops_test.py b/tensorflow/python/training/training_ops_test.py
index 51f49ca081..ba0f40999b 100644
--- a/tensorflow/python/training/training_ops_test.py
+++ b/tensorflow/python/training/training_ops_test.py
@@ -60,7 +60,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       self.assertShapeEqual(out, apply_sgd)
       self.assertAllCloseAccordingToType(x - alpha * delta, out)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testApplyGradientDescent(self):
     for (dtype, use_gpu) in itertools.product(
         [np.float16, np.float32, np.float64], [False, True]):
@@ -129,7 +129,7 @@ class TrainingOpsTest(TensorFlowTestCase):
         self.assertAllClose(linear_update, self.evaluate(linear))
         self.assertAllClose(expected_out, out)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testApplyAdagrad(self):
     for (dtype, use_gpu) in itertools.product(
         [np.float16, np.float32, np.float64], [False, True]):
@@ -139,7 +139,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       grad = np.arange(100).astype(dtype)
       self._testTypesForAdagrad(x, y, lr, grad, use_gpu)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testApplyFtrl(self):
     for dtype in [np.float16, np.float32, np.float64]:
       x = np.arange(100).astype(dtype)
@@ -211,7 +211,7 @@ class TrainingOpsTest(TensorFlowTestCase):
         self.assertAllCloseAccordingToType(y[index] + grad[i] * grad[i],
                                            self.evaluate(accum)[index])
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSparseApplyAdagrad(self):
     for (dtype, index_type) in itertools.product(
         [np.float16, np.float32, np.float64], [np.int32, np.int64]):
@@ -225,7 +225,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       indices = np.array([0, 2]).astype(index_type)
       self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSparseApplyAdagradDim1(self):
     for (dtype, index_type) in itertools.product(
         [np.float16, np.float32, np.float64], [np.int32, np.int64]):
@@ -239,7 +239,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       indices = np.array([0, 2]).astype(index_type)
       self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testSparseApplyFtrlDim1(self):
     for (dtype, index_type) in itertools.product(
         [np.float16, np.float32, np.float64], [np.int32, np.int64]):
@@ -255,7 +255,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       indices = np.array([0, 2]).astype(index_type)
       self._testTypesForSparseFtrl(x, y, z, lr, grad, indices)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testApplyAdam(self):
     for dtype, use_gpu in itertools.product(
         [np.float16, np.float32, np.float64], [False, True]):
diff --git a/tensorflow/python/training/training_util_test.py b/tensorflow/python/training/training_util_test.py
index 3317008fce..3f9858a33b 100644
--- a/tensorflow/python/training/training_util_test.py
+++ b/tensorflow/python/training/training_util_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.training import monitored_session
 from tensorflow.python.training import training_util
 
 
+@test_util.run_v1_only('b/120545219')
 class GlobalStepTest(test.TestCase):
 
   def _assert_global_step(self, global_step, expected_dtype=dtypes.int64):
@@ -47,7 +48,6 @@ class GlobalStepTest(test.TestCase):
     self.assertRaisesRegexp(TypeError, 'does not have integer type',
                             training_util.get_global_step, g)
 
-  @test_util.run_deprecated_v1
   def test_invalid_shape(self):
     with ops.Graph().as_default() as g:
       self.assertIsNone(training_util.get_global_step())
@@ -72,7 +72,6 @@ class GlobalStepTest(test.TestCase):
                               training_util.create_global_step, g)
       self._assert_global_step(training_util.create_global_step(ops.Graph()))
 
-  @test_util.run_deprecated_v1
   def test_get_global_step(self):
     with ops.Graph().as_default() as g:
       self.assertIsNone(training_util.get_global_step())
@@ -93,6 +92,7 @@ class GlobalStepTest(test.TestCase):
       self._assert_global_step(training_util.get_or_create_global_step(g))
 
 
+@test_util.run_v1_only('b/120545219')
 class GlobalStepReadTest(test.TestCase):
 
   def test_global_step_read_is_none_if_there_is_no_global_step(self):
diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py
index 40162daf14..723fceef41 100644
--- a/tensorflow/tools/api/tests/api_compatibility_test.py
+++ b/tensorflow/tools/api/tests/api_compatibility_test.py
@@ -314,7 +314,7 @@ class ApiCompatibilityTest(test.TestCase):
         update_goldens=FLAGS.update_goldens,
         api_version=api_version)
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testAPIBackwardsCompatibility(self):
     api_version = 1
     golden_file_pattern = os.path.join(
@@ -333,7 +333,7 @@ class ApiCompatibilityTest(test.TestCase):
         'tensorflow.python.util.lazy_loader.LazyLoader'
         in str(type(tf.contrib)))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only('b/120545219')
   def testAPIBackwardsCompatibilityV1(self):
     api_version = 1
     golden_file_pattern = os.path.join(
diff --git a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
index 68ba7a2630..917236da4b 100644
--- a/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
+++ b/tensorflow/tools/compatibility/testdata/test_file_v0_11.py
@@ -34,6 +34,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
   a unit test if the converter is successful.
   """
 
+  @test_util.run_v1_only("b/120545219")
   def testArgRenames(self):
     with self.cached_session():
 
@@ -97,6 +98,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
           tf.expand_dims([[1, 2], [3, 4]], axis=1).eval(),
           [[[1, 2]], [[3, 4]]])
 
+  @test_util.run_v1_only("b/120545219")
   def testArgMinMax(self):
     with self.cached_session():
       self.assertAllEqual(
@@ -112,6 +114,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
           tf.argmax([[1, 2, 3], [4, 1, 0]], dimension=0).eval(),
           [1, 0, 0])
 
+  @test_util.run_v1_only("b/120545219")
   def testExpandAndSqueeze(self):
     with self.cached_session():
 
@@ -139,6 +142,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
                   [[1, 2, 3]], dim=1), squeeze_dims=[1]).eval(),
           a)
 
+  @test_util.run_v1_only("b/120545219")
   def testArithmeticRenames(self):
     with self.cached_session() as s:
       stuff = tf.split(1, 2, [[1, 2, 3, 4], [4, 5, 6, 7]])
@@ -163,6 +167,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
       #     # TODO(aselle): (tf.batch_*)
       # ]
 
+  @test_util.run_v1_only("b/120545219")
   def testBatchAndSvd(self):
     with self.cached_session():
       mat = [[1., 2.], [2., 3.]]
@@ -174,6 +179,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
           tf.svd(mat, False, True).eval(),
           tf.svd(mat, compute_uv=False, full_matrices=True).eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testCrossEntropy(self):
     # TODO(aselle): Test sparse_softmax_...
     with self.cached_session():
@@ -190,6 +196,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
           tf.nn.sigmoid_cross_entropy_with_logits(
               labels=labels, logits=logits).eval())
 
+  @test_util.run_v1_only("b/120545219")
   def testVariables(self):
     with self.cached_session() as s:
 
@@ -200,6 +207,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
       _ = [v.name for v in tf.all_variables()]
       _ = [v.name for v in tf.local_variables()]
 
+  @test_util.run_v1_only("b/120545219")
   def testSummaries(self):
     with self.cached_session() as s:
       var = tf.Variable([1, 2, 3], dtype=tf.float32)
diff --git a/tensorflow/tools/compatibility/testdata/test_file_v1_12.py b/tensorflow/tools/compatibility/testdata/test_file_v1_12.py
index fd688781b0..5ce4dd49ad 100644
--- a/tensorflow/tools/compatibility/testdata/test_file_v1_12.py
+++ b/tensorflow/tools/compatibility/testdata/test_file_v1_12.py
@@ -28,11 +28,13 @@ class TestUpgrade(test_util.TensorFlowTestCase):
   def setUp(self):
     tf.enable_eager_execution()
 
+  @test_util.run_v1_only("b/120545219")
   def testRenames(self):
     with self.cached_session():
       self.assertAllClose(1.04719755, tf.acos(0.5))
       self.assertAllClose(0.5, tf.rsqrt(4.0))
 
+  @test_util.run_v1_only("b/120545219")
   def testSerializeSparseTensor(self):
     sp_input = tf.SparseTensor(
         indices=tf.constant([[1]], dtype=tf.int64),
@@ -44,6 +46,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
       self.assertEqual((3,), serialized_sp.shape)
       self.assertTrue(serialized_sp[0].numpy())  # check non-empty
 
+  @test_util.run_v1_only("b/120545219")
   def testSerializeManySparse(self):
     sp_input = tf.SparseTensor(
         indices=tf.constant([[0, 1]], dtype=tf.int64),
@@ -55,6 +58,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
           sp_input, 'serialize_name', tf.string)
       self.assertEqual((1, 3), serialized_sp.shape)
 
+  @test_util.run_v1_only("b/120545219")
   def testArgMaxMin(self):
     self.assertAllClose(
         [1],
-- 
GitLab


From 323f3eb4c2f15c713e992881ea8854069f2433f4 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Fri, 7 Dec 2018 17:41:38 -0800
Subject: [PATCH 256/873] [tf.data] Replace internal mechanism for copying
 batch elements in MapAndBatchDataset as the current mechanism could result in
 a segfault if the elements to be batched are DT_VARIANT.

PiperOrigin-RevId: 224605099
---
 .../data/experimental/map_and_batch_dataset_op.cc  |  4 ++--
 .../map_and_batch_dataset_serialization_test.py    | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
index d3ae845cfb..d86c3a1a63 100644
--- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
@@ -455,8 +455,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
                 // TODO(mrry): Add a version of DoParallelConcat that allows us
                 // to move `tensor` where possible, to speed up string tensor
                 // batching.
-                Status copy_status = ::tensorflow::functor::DoParallelConcat(
-                    *dataset()->device_, tensor, offset, batch);
+                Status copy_status =
+                    batch_util::CopyElementToSlice(tensor, batch, offset);
                 if (!copy_status.ok()) {
                   result->UpdateStatus(copy_status, offset);
                   break;
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
index 166ffa99ca..8bfe6ce2f3 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/map_and_batch_dataset_serialization_test.py
@@ -22,6 +22,7 @@ import math
 from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base
 from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
@@ -83,6 +84,19 @@ class MapAndBatchDatasetSerializationTest(
     self.run_core_tests(lambda: build_ds(10, True), lambda: build_ds(15, True),
                         num_outputs_drop_remainder)
 
+  def testSparse(self):
+
+    def build_dataset():
+
+      def map_fn(i):
+        return sparse_tensor.SparseTensorValue(
+            indices=[[0]], values=(i * [1]), dense_shape=[1])
+
+      return dataset_ops.Dataset.range(10).apply(
+          batching.map_and_batch(map_fn, 5))
+
+    self.run_core_tests(build_dataset, None, 2)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From b03cb954ac1e29b4eb8242173902cebda5c95230 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Dec 2018 17:52:03 -0800
Subject: [PATCH 257/873] Fix test failure on Ubuntu cc

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index 18b983b9ed..55186d5992 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -48,11 +48,11 @@ TEST(TRTAllocatorTest, Align) {
         513ul, 700ul, 12345ul, 1ul << 32}) {
     for (uint64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
       for (const uintptr_t ptr_val :
-           {1ull, alignment == 1 ? 1ull : alignment - 1, alignment, alignment + 1,
+           {static_cast<uint64_t>(1), alignment == 1 ? static_cast<uint64_t>(1) : alignment - 1, alignment, alignment + 1,
             alignment + (alignment / 2)}) {
         if (ptr_val % alignment == 0) {
           for (const uint64_t size :
-               {1ull, space == 1 ? 1ull : space - 1, space, space + 1}) {
+               {static_cast<uint64_t>(1), space == 1 ? static_cast<uint64_t>(1) : space - 1, space, space + 1}) {
             EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
           }
         } else {
@@ -62,7 +62,7 @@ TEST(TRTAllocatorTest, Align) {
             EXPECT_TRUE(
                 RunTest(alignment, space - diff, ptr_val + diff, space - diff));
             for (const uint64_t size :
-                 {1ull, space - diff > 1 ? space - diff - 1 : 1ull, space - diff,
+                 {static_cast<uint64_t>(1), space - diff > 1 ? space - diff - 1 : static_cast<uint64_t>(1), space - diff,
                   space - diff + 1, space - 1}) {
               EXPECT_EQ(space - diff >= size,
                         RunTest(alignment, size, ptr_val, space));
-- 
GitLab


From b05c71ac5e93b95f0265ed43102aa27f74f5ec26 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Fri, 7 Dec 2018 17:59:49 -0800
Subject: [PATCH 258/873] Use the new Dataset.reduce in `for` loops.

PiperOrigin-RevId: 224606535
---
 .../autograph/operators/control_flow.py       | 45 +++++--------------
 1 file changed, 10 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py
index 670897744a..89f7b8522f 100644
--- a/tensorflow/python/autograph/operators/control_flow.py
+++ b/tensorflow/python/autograph/operators/control_flow.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.operators import py_builtins
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_math_ops
@@ -100,6 +99,7 @@ def _known_len_for_stmt(iter_, extra_test, body, init_state):
       extra_deps=(iter_,),
       opts=dict(maximum_iterations=n))
   # Dropping the iteration index because it's not syntactically visible.
+  # TODO(mdan): Don't.
   results = results[1:]
 
   # TODO(mdan): Remove this special case.
@@ -110,40 +110,15 @@ def _known_len_for_stmt(iter_, extra_test, body, init_state):
 
 def _dataset_for_stmt(ds, extra_test, body, init_state):
   """Overload of for_stmt that iterates over TF Datasets."""
-  # Because Datsets only expose get_next, in the style of Python iterators,
-  # we are forced to unpack the loop as:
-  #
-  # epoch_number, iterate = ds.get_next()
-  # while epoch_number < 2:
-  #   <body>
-  #   epoch_number, iterate = ds.get_next()
-  epoch_numbers = dataset_ops.Dataset.range(2)
-  def tag_with(ds, tag):
-    return dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.from_tensors(tag).repeat(), ds))
-  ds_with_epoch = epoch_numbers.flat_map(lambda i: tag_with(ds, i))
-
-  iterator = dataset_ops.make_initializable_iterator(ds_with_epoch)
-  with ops.control_dependencies((iterator.initializer,)):
-    epoch_number, iterate = iterator.get_next()
-
-    def while_body(epoch_number, iterate, *state):
-      new_state = body(iterate, *state)
-      epoch_number, iterate = iterator.get_next()
-      return (epoch_number, iterate) + new_state
-
-    def while_cond(epoch_number, iterate, *state):
-      del iterate
-      return gen_math_ops.logical_and(epoch_number < 1, extra_test(*state))
-
-    results = while_stmt(
-        while_cond,
-        while_body,
-        init_state=(epoch_number, iterate) + init_state,
-        extra_deps=())
-  # Dropping the epoch number and iterate because they are not syntactically
-  # visible.
-  results = results[2:]
+  if extra_test(*init_state) is not True:
+    raise NotImplementedError(
+        'break statements are not yet supported in for/Dataset loops')
+
+  def reduce_body(state, iterate):
+    new_state = body(iterate, *state)
+    return new_state
+
+  results = ds.reduce(init_state, reduce_body)
 
   # TODO(mdan): Remove this special case.
   if len(results) == 1:
-- 
GitLab


From 4c27508f8185c3d92f35c665aa02411623a7b165 Mon Sep 17 00:00:00 2001
From: Derek Murray <mrry@google.com>
Date: Fri, 7 Dec 2018 18:00:43 -0800
Subject: [PATCH 259/873] [tf.data] Add support for unbatching to the
 `Structure` class.

PiperOrigin-RevId: 224606610
---
 .../python/data/experimental/ops/batching.py  | 13 ++--
 tensorflow/python/data/ops/dataset_ops.py     | 73 +++++++------------
 tensorflow/python/data/ops/optional_ops.py    |  8 ++
 tensorflow/python/data/util/BUILD             |  1 +
 tensorflow/python/data/util/structure.py      | 66 +++++++++++++++++
 tensorflow/python/data/util/structure_test.py | 72 +++++++++++++++++-
 6 files changed, 176 insertions(+), 57 deletions(-)

diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
index f70a4e0537..29df98f4ea 100644
--- a/tensorflow/python/data/experimental/ops/batching.py
+++ b/tensorflow/python/data/experimental/ops/batching.py
@@ -24,7 +24,6 @@ from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import convert
 from tensorflow.python.data.util import nest
-from tensorflow.python.data.util import sparse
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -406,21 +405,19 @@ def unbatch():
 
   def _apply_fn(dataset):
     """Function from `Dataset` to `Dataset` that applies the transformation."""
-    if not sparse.any_sparse(dataset.output_classes):
-      return _UnbatchDataset(dataset)
-
     # NOTE(mrry): We must ensure that any SparseTensors in `dataset`
     # are normalized to the rank-1 dense representation, so that the
     # sparse-oblivious unbatching logic will slice them
     # appropriately. This leads to a somewhat inefficient re-encoding step
     # for all SparseTensor components.
-    # TODO(mrry): Consider optimizing this in future
-    # if it turns out to be a bottleneck.
+    # TODO(mrry): Consider optimizing this in future if it turns out to be
+    # a bottleneck.
     def normalize(arg, *rest):
+      # pylint: disable=protected-access
       if rest:
-        return sparse.serialize_many_sparse_tensors((arg,) + rest)
+        return dataset._element_structure._to_batched_tensor_list((arg,) + rest)
       else:
-        return sparse.serialize_many_sparse_tensors(arg)
+        return dataset._element_structure._to_batched_tensor_list(arg)
 
     normalized_dataset = dataset.map(normalize)
 
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index e08a3af852..bee04aaef2 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1800,21 +1800,12 @@ class TensorDataset(DatasetSource):
               t, name="component_%d" % i)
           for i, t in enumerate(nest.flatten(tensors))
       ])
-
-    self._tensors = sparse.serialize_sparse_tensors(tensors)
-    output_classes = sparse.get_classes(tensors)
-    output_shapes = nest.pack_sequence_as(
-        tensors, [t.get_shape() for t in nest.flatten(tensors)])
-    output_types = nest.pack_sequence_as(
-        tensors, [t.dtype for t in nest.flatten(tensors)])
-    self._structure = structure_lib.convert_legacy_structure(
-        output_types, output_shapes, output_classes)
+    self._structure = structure_lib.Structure.from_value(tensors)
+    self._tensors = self._structure._to_tensor_list(tensors)  # pylint: disable=protected-access
 
   def _as_variant_tensor(self):
-    # pylint: disable=protected-access
     return gen_dataset_ops.tensor_dataset(
-        nest.flatten(self._tensors),
-        output_shapes=self._structure._flat_shapes)
+        self._tensors, output_shapes=self._structure._flat_shapes)  # pylint: disable=protected-access
 
   @property
   def _element_structure(self):
@@ -1834,27 +1825,22 @@ class TensorSliceDataset(DatasetSource):
               t, name="component_%d" % i)
           for i, t in enumerate(nest.flatten(tensors))
       ])
-      flat_tensors = nest.flatten(tensors)
+
+    batched_structure = structure_lib.Structure.from_value(tensors)
+    # pylint: disable=protected-access
+    self._tensors = batched_structure._to_batched_tensor_list(tensors)
+    self._structure = batched_structure._unbatch()
+    # pylint: enable=protected-access
 
     batch_dim = tensor_shape.Dimension(tensor_shape.dimension_value(
-        flat_tensors[0].get_shape()[0]))
-    for t in flat_tensors[1:]:
+        self._tensors[0].get_shape()[0]))
+    for t in self._tensors[1:]:
       batch_dim.assert_is_compatible_with(tensor_shape.Dimension(
           tensor_shape.dimension_value(t.get_shape()[0])))
-    self._tensors = sparse.serialize_many_sparse_tensors(tensors)
-    output_classes = sparse.get_classes(tensors)
-    output_shapes = nest.pack_sequence_as(
-        tensors, [t.get_shape()[1:] for t in nest.flatten(tensors)])
-    output_types = nest.pack_sequence_as(
-        tensors, [t.dtype for t in nest.flatten(tensors)])
-    self._structure = structure_lib.convert_legacy_structure(
-        output_types, output_shapes, output_classes)
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.tensor_slice_dataset(
-        nest.flatten(self._tensors),
-        output_shapes=nest.flatten(
-            self._structure._to_legacy_output_shapes()))  # pylint: disable=protected-access
+        self._tensors, output_shapes=self._structure._flat_shapes)  # pylint: disable=protected-access
 
   @property
   def _element_structure(self):
@@ -1871,17 +1857,13 @@ class SparseTensorSliceDataset(DatasetSource):
       raise TypeError("`sparse_tensor` must be a `tf.SparseTensor` object.")
     self._sparse_tensor = sparse_tensor
 
-    output_classes = (ops.Tensor, ops.Tensor, ops.Tensor)
     indices_shape = self._sparse_tensor.indices.get_shape()
     shape_shape = self._sparse_tensor.dense_shape.get_shape()
     rank = (indices_shape.dims[1] - 1).merge_with(shape_shape.dims[0] - 1)
-    num_values = tensor_shape.Dimension(None)
-    output_shapes = (tensor_shape.TensorShape([num_values, rank]),
-                     tensor_shape.TensorShape([num_values]),
-                     tensor_shape.TensorShape([rank]))
-    output_types = (dtypes.int64, self._sparse_tensor.dtype, dtypes.int64)
-    self._structure = structure_lib.convert_legacy_structure(
-        output_types, output_shapes, output_classes)
+    self._structure = structure_lib.NestedStructure(
+        (structure_lib.TensorStructure(dtypes.int64, [None, rank]),
+         structure_lib.TensorStructure(self._sparse_tensor.dtype, [None]),
+         structure_lib.TensorStructure(dtypes.int64, [rank])))
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.sparse_tensor_slice_dataset(
@@ -1936,6 +1918,9 @@ class DatasetStructure(structure_lib.Structure):
   def _to_tensor_list(self, value):
     return [value._as_variant_tensor()]  # pylint: disable=protected-access
 
+  def _to_batched_tensor_list(self, value):
+    raise NotImplementedError("Unbatching for `tf.data.Dataset` objects.")
+
   def _from_tensor_list(self, flat_value):
     if (len(flat_value) != 1 or flat_value[0].dtype != dtypes.variant or
         not flat_value[0].shape.is_compatible_with(tensor_shape.scalar())):
@@ -1963,6 +1948,9 @@ class DatasetStructure(structure_lib.Structure):
   def _batch(self, batch_size):
     raise NotImplementedError("Batching for `tf.data.Dataset` objects.")
 
+  def _unbatch(self):
+    raise NotImplementedError("Unbatching for `tf.data.Dataset` objects.")
+
 
 # pylint: disable=protected-access
 structure_lib.Structure._register_custom_converter(DatasetV2,
@@ -2153,25 +2141,14 @@ class _GeneratorDataset(DatasetSource):
         destroyed. The return value is ignored.
     """
     super(_GeneratorDataset, self).__init__()
-    # These members will be initialized by `tf_init_func`.
-    self._state_classes = None
-    self._state_shapes = None
-    self._state_types = None
-
     self._init_args = init_args
 
-    init_args_classes = sparse.get_classes(init_args)
-    init_args_shapes = nest.pack_sequence_as(
-        init_args, [t.get_shape() for t in nest.flatten(init_args)])
-    init_args_types = nest.pack_sequence_as(
-        init_args, [t.dtype for t in nest.flatten(init_args)])
+    self._init_structure = structure_lib.Structure.from_value(init_args)
 
     self._init_func = StructuredFunctionWrapper(
         init_func,
         self._transformation_name(),
-        input_classes=init_args_classes,
-        input_shapes=init_args_shapes,
-        input_types=init_args_types)
+        input_structure=self._init_structure)
 
     self._next_func = StructuredFunctionWrapper(
         next_func,
@@ -2185,7 +2162,7 @@ class _GeneratorDataset(DatasetSource):
 
   def _as_variant_tensor(self):
     return gen_dataset_ops.generator_dataset(
-        nest.flatten(self._init_args)
+        self._init_structure._to_tensor_list(self._init_args)  # pylint: disable=protected-access
         + self._init_func.function.captured_inputs,
         self._next_func.function.captured_inputs,
         self._finalize_func.function.captured_inputs,
diff --git a/tensorflow/python/data/ops/optional_ops.py b/tensorflow/python/data/ops/optional_ops.py
index 66011d8518..dcb743bee0 100644
--- a/tensorflow/python/data/ops/optional_ops.py
+++ b/tensorflow/python/data/ops/optional_ops.py
@@ -169,6 +169,10 @@ class OptionalStructure(structure.Structure):
   def _to_tensor_list(self, value):
     return [value._variant_tensor]  # pylint: disable=protected-access
 
+  def _to_batched_tensor_list(self, value):
+    raise NotImplementedError(
+        "Unbatching for `tf.data.experimental.Optional` objects.")
+
   def _from_tensor_list(self, flat_value):
     if (len(flat_value) != 1 or flat_value[0].dtype != dtypes.variant or
         not flat_value[0].shape.is_compatible_with(tensor_shape.scalar())):
@@ -197,6 +201,10 @@ class OptionalStructure(structure.Structure):
     raise NotImplementedError(
         "Batching for `tf.data.experimental.Optional` objects.")
 
+  def _unbatch(self):
+    raise NotImplementedError(
+        "Unbatching for `tf.data.experimental.Optional` objects.")
+
 
 # pylint: disable=protected-access
 structure.Structure._register_custom_converter(Optional,
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index f15ebc32a8..04e80299e0 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -93,6 +93,7 @@ py_test(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:variables",
+        "//tensorflow/python/data/kernel_tests:test_base",
         "@absl_py//absl/testing:parameterized",
     ],
 )
diff --git a/tensorflow/python/data/util/structure.py b/tensorflow/python/data/util/structure.py
index 5e3addacaa..9de0c4da0e 100644
--- a/tensorflow/python/data/util/structure.py
+++ b/tensorflow/python/data/util/structure.py
@@ -113,6 +113,26 @@ class Structure(object):
     """
     raise NotImplementedError("Structure._to_tensor_list()")
 
+  @abc.abstractmethod
+  def _to_batched_tensor_list(self, value):
+    """Returns a flat list of rank >= 1 `tf.Tensor` representing `value`.
+
+    This method can be used, along with `self._flat_shapes` and
+    `self._flat_types` to represent structured values in lower level APIs
+    (such as plain TensorFlow operations) that do not understand structure,
+    *and* that require that the plain tensors have a rank of at least one
+    (e.g. for the purpose of slicing the tensors).
+
+    Requires: `self.is_compatible_with(Structure.from_value(value))`.
+
+    Args:
+      value: A value with compatible structure.
+
+    Returns:
+      A flat list of `tf.Tensor` representing `value`.
+    """
+    raise NotImplementedError("Structure._to_batched_tensor_list()")
+
   @abc.abstractmethod
   def _from_tensor_list(self, flat_value):
     """Builds a flat list of `tf.Tensor` into a value matching this structure.
@@ -159,6 +179,10 @@ class Structure(object):
     """
     raise NotImplementedError("Structure._batch()")
 
+  @abc.abstractmethod
+  def _unbatch(self):
+    raise NotImplementedError("Structure._unbatch()")
+
   @staticmethod
   def from_value(value):
     """Returns a `Structure` that represents the given `value`.
@@ -329,6 +353,22 @@ class NestedStructure(Structure):
       ret.extend(structure._to_tensor_list(sub_value))
     return ret
 
+  def _to_batched_tensor_list(self, value):
+    ret = []
+
+    try:
+      flat_value = nest.flatten_up_to(self._nested_structure, value)
+    except (ValueError, TypeError):
+      raise ValueError("The value %r is not compatible with the nested "
+                       "structure %r." % (value, self._nested_structure))
+
+    for sub_value, structure in zip(flat_value, self._flat_nested_structure):
+      if not structure.is_compatible_with(Structure.from_value(sub_value)):
+        raise ValueError("Component value %r is not compatible with the nested "
+                         "structure %r." % (sub_value, structure))
+      ret.extend(structure._to_batched_tensor_list(sub_value))
+    return ret
+
   def _from_tensor_list(self, flat_value):
     if len(flat_value) != len(self._flat_types):
       raise ValueError("Expected %d flat values in NestedStructure but got %d."
@@ -378,6 +418,10 @@ class NestedStructure(Structure):
     return NestedStructure(nest.map_structure(
         lambda s: s._batch(batch_size), self._nested_structure))
 
+  def _unbatch(self):
+    return NestedStructure(nest.map_structure(
+        lambda s: s._unbatch(), self._nested_structure))
+
 
 @tf_export("data.experimental.TensorStructure")
 class TensorStructure(Structure):
@@ -406,6 +450,11 @@ class TensorStructure(Structure):
                        "and shape %s." % (value, self._dtype, self._shape))
     return [value]
 
+  def _to_batched_tensor_list(self, value):
+    if self._shape.merge_with(value.shape).ndims == 0:
+      raise ValueError("Unbatching a tensor is only supported for rank >= 1")
+    return [value]
+
   def _from_tensor_list(self, flat_value):
     if len(flat_value) != 1:
       raise ValueError("TensorStructure corresponds to a single tf.Tensor.")
@@ -442,6 +491,11 @@ class TensorStructure(Structure):
         self._dtype,
         tensor_shape.TensorShape([batch_size]).concatenate(self._shape))
 
+  def _unbatch(self):
+    if self._shape.ndims == 0:
+      raise ValueError("Unbatching a tensor is only supported for rank >= 1")
+    return TensorStructure(self._dtype, self._shape[1:])
+
 
 @tf_export("data.experimental.SparseTensorStructure")
 class SparseTensorStructure(Structure):
@@ -471,6 +525,13 @@ class SparseTensorStructure(Structure):
   def _to_tensor_list(self, value):
     return [sparse_ops.serialize_sparse(value, out_type=dtypes.variant)]
 
+  def _to_batched_tensor_list(self, value):
+    if self._dense_shape.merge_with(
+        tensor_util.constant_value_as_shape(value.dense_shape)).ndims == 0:
+      raise ValueError(
+          "Unbatching a sparse tensor is only supported for rank >= 1")
+    return [sparse_ops.serialize_many_sparse(value, out_type=dtypes.variant)]
+
   def _from_tensor_list(self, flat_value):
     if (len(flat_value) != 1 or flat_value[0].dtype != dtypes.variant or
         not flat_value[0].shape.is_compatible_with(tensor_shape.vector(3))):
@@ -505,3 +566,8 @@ class SparseTensorStructure(Structure):
     return SparseTensorStructure(
         self._dtype,
         tensor_shape.TensorShape([batch_size]).concatenate(self._dense_shape))
+
+  def _unbatch(self):
+    if self._dense_shape.ndims == 0:
+      raise ValueError("Unbatching a tensor is only supported for rank >= 1")
+    return SparseTensorStructure(self._dtype, self._dense_shape[1:])
diff --git a/tensorflow/python/data/util/structure_test.py b/tensorflow/python/data/util/structure_test.py
index 3425179e42..91dcfa6f60 100644
--- a/tensorflow/python/data/util/structure_test.py
+++ b/tensorflow/python/data/util/structure_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import constant_op
@@ -34,7 +35,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
-class StructureTest(test.TestCase, parameterized.TestCase):
+class StructureTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   # NOTE(mrry): The arguments must be lifted into lambdas because otherwise they
   # will be executed before the (eager- or graph-mode) test environment has been
@@ -418,6 +419,75 @@ class StructureTest(test.TestCase, parameterized.TestCase):
     self.assertTrue(
         expected_batched_structure.is_compatible_with(batched_structure))
 
+  @parameterized.named_parameters(
+      ("Tensor", structure.TensorStructure(dtypes.float32, [32]),
+       structure.TensorStructure(dtypes.float32, [])),
+      ("TensorUnknown", structure.TensorStructure(dtypes.float32, [None]),
+       structure.TensorStructure(dtypes.float32, [])),
+      ("SparseTensor",
+       structure.SparseTensorStructure(dtypes.float32, [32, None]),
+       structure.SparseTensorStructure(dtypes.float32, [None])),
+      ("SparseTensorUnknown",
+       structure.SparseTensorStructure(dtypes.float32, [None, 4]),
+       structure.SparseTensorStructure(dtypes.float32, [4])),
+      ("Nest", structure.NestedStructure({
+          "a": structure.TensorStructure(dtypes.float32, [128]),
+          "b": (structure.SparseTensorStructure(dtypes.int32, [128, 2, 2]),
+                structure.TensorStructure(dtypes.string, [None]))}),
+       structure.NestedStructure({
+           "a": structure.TensorStructure(dtypes.float32, []),
+           "b": (structure.SparseTensorStructure(dtypes.int32, [2, 2]),
+                 structure.TensorStructure(dtypes.string, []))})),
+  )
+  def testUnbatch(self, element_structure, expected_unbatched_structure):
+    unbatched_structure = element_structure._unbatch()
+    self.assertTrue(
+        unbatched_structure.is_compatible_with(expected_unbatched_structure))
+    self.assertTrue(
+        expected_unbatched_structure.is_compatible_with(unbatched_structure))
+
+  # pylint: disable=g-long-lambda
+  @parameterized.named_parameters(
+      ("Tensor", lambda: constant_op.constant([[1.0, 2.0], [3.0, 4.0]]),
+       lambda: constant_op.constant([1.0, 2.0])),
+      ("SparseTensor", lambda: sparse_tensor.SparseTensor(
+          indices=[[0, 0], [1, 1]], values=[13, 27], dense_shape=[2, 2]),
+       lambda: sparse_tensor.SparseTensor(
+           indices=[[0]], values=[13], dense_shape=[2])),
+      ("Nest", lambda: (
+          constant_op.constant([[1.0, 2.0], [3.0, 4.0]]),
+          sparse_tensor.SparseTensor(
+              indices=[[0, 0], [1, 1]], values=[13, 27], dense_shape=[2, 2])),
+       lambda: (constant_op.constant([1.0, 2.0]), sparse_tensor.SparseTensor(
+           indices=[[0]], values=[13], dense_shape=[2]))),
+  )
+  def testToBatchedTensorList(self, value_fn, element_0_fn):
+    batched_value = value_fn()
+    s = structure.Structure.from_value(batched_value)
+    batched_tensor_list = s._to_batched_tensor_list(batched_value)
+
+    # The batch dimension is 2 for all of the test cases.
+    # NOTE(mrry): `tf.shape()` does not currently work for the DT_VARIANT
+    # tensors in which we store sparse tensors.
+    for t in batched_tensor_list:
+      if t.dtype != dtypes.variant:
+        self.assertEqual(2, self.evaluate(array_ops.shape(t)[0]))
+
+    # Test that the 0th element from the unbatched tensor is equal to the
+    # expected value.
+    expected_element_0 = self.evaluate(element_0_fn())
+    unbatched_s = s._unbatch()
+    actual_element_0 = unbatched_s._from_tensor_list(
+        [t[0] for t in batched_tensor_list])
+
+    for expected, actual in zip(
+        nest.flatten(expected_element_0), nest.flatten(actual_element_0)):
+      if sparse_tensor.is_sparse(expected):
+        self.assertSparseValuesEqual(expected, actual)
+      else:
+        self.assertAllEqual(expected, actual)
+
+  # pylint: enable=g-long-lambda
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 7f3228ccf0147e73b986e00f4bcc3e915203ea54 Mon Sep 17 00:00:00 2001
From: Yong Tang <yong.tang.github@outlook.com>
Date: Fri, 7 Dec 2018 18:08:28 -0800
Subject: [PATCH 260/873] Fix `Experimental clang-format Check` failure

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
---
 .../tensorrt/resources/trt_allocator_test.cc      | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
index 55186d5992..beb1284208 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_allocator_test.cc
@@ -48,11 +48,14 @@ TEST(TRTAllocatorTest, Align) {
         513ul, 700ul, 12345ul, 1ul << 32}) {
     for (uint64_t alignment = 1; alignment <= space * 4; alignment *= 2) {
       for (const uintptr_t ptr_val :
-           {static_cast<uint64_t>(1), alignment == 1 ? static_cast<uint64_t>(1) : alignment - 1, alignment, alignment + 1,
-            alignment + (alignment / 2)}) {
+           {static_cast<uint64_t>(1),
+            alignment == 1 ? static_cast<uint64_t>(1) : alignment - 1,
+            alignment, alignment + 1, alignment + (alignment / 2)}) {
         if (ptr_val % alignment == 0) {
           for (const uint64_t size :
-               {static_cast<uint64_t>(1), space == 1 ? static_cast<uint64_t>(1) : space - 1, space, space + 1}) {
+               {static_cast<uint64_t>(1),
+                space == 1 ? static_cast<uint64_t>(1) : space - 1, space,
+                space + 1}) {
             EXPECT_EQ(space >= size, RunTest(alignment, size, ptr_val, space));
           }
         } else {
@@ -62,8 +65,10 @@ TEST(TRTAllocatorTest, Align) {
             EXPECT_TRUE(
                 RunTest(alignment, space - diff, ptr_val + diff, space - diff));
             for (const uint64_t size :
-                 {static_cast<uint64_t>(1), space - diff > 1 ? space - diff - 1 : static_cast<uint64_t>(1), space - diff,
-                  space - diff + 1, space - 1}) {
+                 {static_cast<uint64_t>(1),
+                  space - diff > 1 ? space - diff - 1
+                                   : static_cast<uint64_t>(1),
+                  space - diff, space - diff + 1, space - 1}) {
               EXPECT_EQ(space - diff >= size,
                         RunTest(alignment, size, ptr_val, space));
             }
-- 
GitLab


From cc52e9bfc9a30422868826514f41065bf35167ca Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 7 Dec 2018 18:02:49 -0800
Subject: [PATCH 261/873] [tf.data] Enable `shuffle_and_repeat_fusion`
 optimization by default

PiperOrigin-RevId: 224606882
---
 .../optimization/optimize_dataset_test.py          | 14 +++++++++-----
 .../optimization/shuffle_and_repeat_fusion_test.py |  5 -----
 .../data/experimental/ops/optimization_options.py  |  5 +++--
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
index 05d88eb071..230b74e9e8 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
@@ -230,10 +230,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     self.assertGreaterEqual(len(w), 1)
     expected = ("tf.data static optimizations are not compatible with "
-                "tf.Variable. The following optimizations will be disabled: "
-                "map_and_batch_fusion, noop_elimination. To enable "
-                "optimizations, use resource variables instead by calling "
-                "`tf.enable_resource_variables()` at the start of the program.")
+                "tf.Variable. The following optimizations will be disabled: %s."
+                " To enable optimizations, use resource variables instead by "
+                "calling `tf.enable_resource_variables()` at the start of the "
+                "program." % (", ".join(opt_options._static_optimizations())))
     self.assertTrue(any([expected in str(warning) for warning in w]))
 
     # Check that outputs are the same in the optimized and unoptimized cases,
@@ -256,7 +256,11 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testOptimizationEnabledByDefault(self):
     """Tests that some optimizations are applied to datasets by default."""
     options = dataset_ops.Options()
-    expected_optimizations = ["noop_elimination", "map_and_batch_fusion"]
+    expected_optimizations = [
+        "map_and_batch_fusion",
+        "noop_elimination",
+        "shuffle_and_repeat_fusion",
+    ]
     self.assertEqual(
         set(options._static_optimizations()), set(expected_optimizations))
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/shuffle_and_repeat_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/shuffle_and_repeat_fusion_test.py
index 5f746ec63a..594b59375f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/shuffle_and_repeat_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/shuffle_and_repeat_fusion_test.py
@@ -18,7 +18,6 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
@@ -32,10 +31,6 @@ class ShuffleAndRepeatFusionTest(test_base.DatasetTestBase):
   def testShuffleAndRepeatFusion(self):
     dataset = dataset_ops.Dataset.range(10).apply(
         optimization.assert_next(["ShuffleAndRepeat"])).shuffle(10).repeat(2)
-    options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
-    options.experimental_optimization.shuffle_and_repeat_fusion = True
-    dataset = dataset.with_options(options)
     get_next = self.getNext(dataset)
 
     for _ in range(2):
diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py
index 73849435a9..11b8b86f64 100644
--- a/tensorflow/python/data/experimental/ops/optimization_options.py
+++ b/tensorflow/python/data/experimental/ops/optimization_options.py
@@ -86,7 +86,8 @@ class OptimizationOptions(options.OptionsBase):
   shuffle_and_repeat_fusion = options.create_option(
       name="shuffle_and_repeat_fusion",
       ty=bool,
-      docstring="Whether to fuse shuffle and repeat transformations.")
+      docstring="Whether to fuse shuffle and repeat transformations. If None, "
+      "defaults to True.")
 
   def _static_optimizations(self):
     """Produces the list of enabled static optimizations."""
@@ -98,7 +99,6 @@ class OptimizationOptions(options.OptionsBase):
         "map_fusion",
         "map_parallelization",
         "map_vectorization",
-        "shuffle_and_repeat_fusion",
     ]
     for optimization in optimizations_to_enable:
       if getattr(self, optimization):
@@ -110,6 +110,7 @@ class OptimizationOptions(options.OptionsBase):
       optimizations_to_disable = [
           "map_and_batch_fusion",
           "noop_elimination",
+          "shuffle_and_repeat_fusion",
       ]
       for optimization in optimizations_to_disable:
         if getattr(self, optimization) is not False:
-- 
GitLab


From 798543c1e5cfaa6acf56b087fcb56bd0e227db5d Mon Sep 17 00:00:00 2001
From: Eddie Zhou <eddz@google.com>
Date: Fri, 7 Dec 2018 18:10:29 -0800
Subject: [PATCH 262/873] Also skip test_session post-setUp().

PiperOrigin-RevId: 224607628
---
 tensorflow/python/framework/test_util.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 28f25daa6d..df3cebd2e0 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1717,6 +1717,8 @@ class TensorFlowTestCase(googletest.TestCase):
                    use_gpu=False,
                    force_gpu=False):
     """Use cached_session instead."""
+    if self.id().endswith(".test_session"):
+      self.skipTest("Not a test.")
     if context.executing_eagerly():
       yield None
     else:
-- 
GitLab


From 1881396166c56fb04963683998e6a1569ebf3b19 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Fri, 7 Dec 2018 18:11:02 -0800
Subject: [PATCH 263/873] Internal change.

PiperOrigin-RevId: 224607664
---
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 2820e466f3..62e1eaa366 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -123,7 +123,8 @@ fi
 run_configure_for_cpu_build
 
 bazel build --announce_rc --config=opt ${EXTRA_BUILD_FLAGS} \
-  tensorflow/tools/pip_package:build_pip_package || exit $?
+  tensorflow/tools/pip_package:build_pip_package \
+  --incompatible_remove_native_http_archive=false || exit $?
 
 if [[ "$SKIP_TEST" == 1 ]]; then
   exit 0
-- 
GitLab


From 237851c57093edd5126561b2651f216d1726487b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 20:34:57 -0800
Subject: [PATCH 264/873] Internal Change

PiperOrigin-RevId: 224616455
---
 .../base_api/api_def_RaggedGather.pbtxt       |    6 +-
 .../base_api/api_def_RaggedRange.pbtxt        |    2 +-
 .../api_def_RaggedTensorToSparse.pbtxt        |    2 +-
 tensorflow/core/kernels/unicode_ops.cc        |    2 +-
 .../kernel_tests/unicode_encode_op_test.py    |  127 +-
 tensorflow/python/ops/ragged/BUILD            |   56 +-
 tensorflow/python/ops/ragged/__init__.py      |   76 +-
 ...vert_to_tensor_or_ragged_tensor_op_test.py |   19 +-
 .../python/ops/ragged/ragged_array_ops.py     |  415 ++----
 .../ops/ragged/ragged_batch_gather_op_test.py |   59 +-
 .../ops/ragged/ragged_boolean_mask_op_test.py |   36 +-
 .../ops/ragged/ragged_concat_op_test.py       |   35 +-
 .../python/ops/ragged/ragged_const_op_test.py |   46 +-
 .../ragged/ragged_constant_value_op_test.py   |   13 +-
 .../ops/ragged/ragged_conversion_ops.py       |  402 +-----
 .../python/ops/ragged/ragged_dispatch.py      |   25 +-
 .../python/ops/ragged/ragged_dispatch_test.py |   90 +-
 .../python/ops/ragged/ragged_eager_test.py    |   22 +-
 .../ops/ragged/ragged_expand_dims_op_test.py  |    8 +-
 .../python/ops/ragged/ragged_factory_ops.py   |  465 +------
 .../ops/ragged/ragged_from_sparse_op_test.py  |   87 +-
 .../ops/ragged/ragged_from_tensor_op_test.py  |   60 +-
 .../ops/ragged/ragged_functional_ops.py       |   33 +-
 .../ops/ragged/ragged_gather_nd_op_test.py    |   23 +-
 .../ops/ragged/ragged_gather_op_test.py       |  128 +-
 .../python/ops/ragged/ragged_getitem.py       |   12 +-
 ...t.py => ragged_map_flat_values_op_test.py} |   60 +-
 .../ops/ragged/ragged_map_fn_op_test.py       |   96 +-
 .../python/ops/ragged/ragged_map_ops.py       |   44 +-
 .../python/ops/ragged/ragged_math_ops.py      |   49 +-
 .../ops/ragged/ragged_operators_test.py       |   88 +-
 .../python/ops/ragged/ragged_range_op_test.py |  159 +--
 .../ops/ragged/ragged_reduce_op_test.py       |   27 +-
 .../ops/ragged/ragged_row_lengths_op_test.py  |   81 +-
 ...agged_row_splits_to_segment_ids_op_test.py |   13 +-
 ...agged_segment_ids_to_row_splits_op_test.py |   22 +-
 .../ops/ragged/ragged_segment_op_test.py      |   43 +-
 .../python/ops/ragged/ragged_stack_op_test.py |   13 +-
 .../python/ops/ragged/ragged_string_ops.py    |   19 +-
 tensorflow/python/ops/ragged/ragged_tensor.py | 1207 +++++++++++++++--
 .../ragged_tensor_bounding_shape_op_test.py   |   47 +-
 .../python/ops/ragged/ragged_tensor_shape.py  |   44 +-
 .../ops/ragged/ragged_tensor_shape_test.py    |   66 +-
 .../python/ops/ragged/ragged_tensor_test.py   |  623 ++++-----
 .../python/ops/ragged/ragged_tensor_value.py  |   15 +-
 .../python/ops/ragged/ragged_test_util.py     |   95 ++
 .../python/ops/ragged/ragged_tile_op_test.py  |   22 +-
 .../ops/ragged/ragged_to_sparse_op_test.py    |  225 ++-
 .../ops/ragged/ragged_to_tensor_op_test.py    |   33 +-
 .../python/ops/ragged/ragged_util_test.py     |   11 +-
 .../python/ops/ragged/ragged_where_op_test.py |   16 +-
 tensorflow/tools/pip_package/BUILD            |    1 +
 52 files changed, 2523 insertions(+), 2845 deletions(-)
 rename tensorflow/python/ops/ragged/{ragged_map_inner_values_op_test.py => ragged_map_flat_values_op_test.py} (79%)
 create mode 100644 tensorflow/python/ops/ragged/ragged_test_util.py

diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
index 240c987dda..9c40332ea2 100644
--- a/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedGather.pbtxt
@@ -11,8 +11,8 @@ END
   in_arg {
     name: "params_dense_values"
     description: <<END
-The `inner_values` for the `params` RaggedTensor. There was a terminology change
-at the python level from dense_values to inner_values, so dense_values is the
+The `flat_values` for the `params` RaggedTensor. There was a terminology change
+at the python level from dense_values to flat_values, so dense_values is the
 deprecated name.
 END
   }
@@ -32,7 +32,7 @@ END
   }
   out_arg {
     name: "output_dense_values"
-    description: "The `inner_values` for the returned RaggedTensor."
+    description: "The `flat_values` for the returned RaggedTensor."
   }
   attr {
     name: "PARAMS_RAGGED_RANK"
diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt
index 927e839b72..4a9b2af804 100644
--- a/tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedRange.pbtxt
@@ -19,7 +19,7 @@ op {
   }
   out_arg{
     name: "rt_dense_values"
-    description: "The `inner_values` for the returned `RaggedTensor`."
+    description: "The `flat_values` for the returned `RaggedTensor`."
   }
   summary: <<END
 Returns a `RaggedTensor` containing the specified sequences of numbers.
diff --git a/tensorflow/core/api_def/base_api/api_def_RaggedTensorToSparse.pbtxt b/tensorflow/core/api_def/base_api/api_def_RaggedTensorToSparse.pbtxt
index 8c73ea644c..958c71185e 100644
--- a/tensorflow/core/api_def/base_api/api_def_RaggedTensorToSparse.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_RaggedTensorToSparse.pbtxt
@@ -7,7 +7,7 @@ op {
   }
   in_arg {
     name: "rt_dense_values"
-    description: "The `inner_values` for the `RaggedTensor`."
+    description: "The `flat_values` for the `RaggedTensor`."
   }
   out_arg {
     name: "sparse_indices"
diff --git a/tensorflow/core/kernels/unicode_ops.cc b/tensorflow/core/kernels/unicode_ops.cc
index 6c4ed1eaaf..3ee0edb35a 100644
--- a/tensorflow/core/kernels/unicode_ops.cc
+++ b/tensorflow/core/kernels/unicode_ops.cc
@@ -493,7 +493,7 @@ class UnicodeEncodeOp : public OpKernel {
     const Tensor& input_splits = context->input(1);
     const auto input_splits_flat = input_splits.flat<int64>();
 
-    // Since we limit to a 2-D input (inner_values of rank 1 and a single splits
+    // Since we limit to a 2-D input (flat_values of rank 1 and a single splits
     // tensor), our output dimension will be 1 with it's size equal to the
     // number of splits (outer dimension or ragged tensor).
     TensorShape output_shape({input_splits.dim_size(0) - 1});
diff --git a/tensorflow/python/kernel_tests/unicode_encode_op_test.py b/tensorflow/python/kernel_tests/unicode_encode_op_test.py
index 72c8a26766..2f3cd8a657 100644
--- a/tensorflow/python/kernel_tests/unicode_encode_op_test.py
+++ b/tensorflow/python/kernel_tests/unicode_encode_op_test.py
@@ -23,15 +23,25 @@ import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors_impl as errors
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_string_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.ops.ragged import ragged_tensor_value
 from tensorflow.python.platform import test
 
 
 class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
 
+  def assertRaggedEqual(self, rt, expected):
+    with self.cached_session() as sess:
+      value = sess.run(rt)
+      if isinstance(value, np.ndarray):
+        value = value.tolist()
+      elif isinstance(value, ragged_tensor_value.RaggedTensorValue):
+        value = value.to_list()
+      self.assertEqual(value, expected)
+
   def testScalar(self):
     with self.cached_session():
       with self.assertRaises(ValueError):
@@ -54,9 +64,10 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   def testStrictErrors(self, encoding):
     test_value = np.array([72, 101, 2147483647, -1, 111], np.int32)
-    with self.cached_session():
+    with self.cached_session() as session:
       with self.assertRaises(errors.InvalidArgumentError):
-        ragged_string_ops.unicode_encode(test_value, encoding, "strict").eval()
+        session.run(
+            ragged_string_ops.unicode_encode(test_value, encoding, "strict"))
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -65,8 +76,8 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
     expected_value = u"Heo".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding,
                                                          "ignore")
-    with self.cached_session():
-      result = unicode_encode_op.eval()
+    with self.cached_session() as session:
+      result = session.run(unicode_encode_op)
       self.assertIsInstance(result, bytes)
       self.assertAllEqual(result, expected_value)
 
@@ -77,37 +88,27 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
     expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding,
                                                          "replace")
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(result, bytes)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
     # Test custom replacement character
     test_value = np.array([72, 101, 2147483647, -1, 111], np.int32)
     expected_value = u"Heooo".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding,
                                                          "replace", 111)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(result, bytes)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
     # Verify "replace" is default
     test_value = np.array([72, 101, 2147483647, -1, 111], np.int32)
     expected_value = u"He\U0000fffd\U0000fffdo".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(result, bytes)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
     # Replacement_char must be within range
     test_value = np.array([72, 101, 2147483647, -1, 111], np.int32)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding,
                                                          "replace", 1114112)
-    with self.cached_session():
-      with self.assertRaises(errors.InvalidArgumentError):
-        unicode_encode_op.eval()
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(unicode_encode_op)
 
   # -- regular Tensor tests -- #
 
@@ -117,35 +118,23 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
     test_value = np.array([72, 101, 108, 108, 111], np.int32)
     expected_value = u"Hello".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(result, bytes)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
     test_value = np.array([72, 101, 195, 195, 128516], np.int32)
     expected_value = u"He\xc3\xc3\U0001f604".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(result, bytes)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
     # Single character string
     test_value = np.array([72], np.int32)
     expected_value = u"H".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(result, bytes)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
     test_value = np.array([128516], np.int32)
     expected_value = u"\U0001f604".encode(encoding)
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(result, bytes)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -156,10 +145,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
         u"H\U0001f604llo".encode(encoding), u"W\U0001f604rld".encode(encoding)
     ]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(unicode_encode_op, ops.Tensor)
-      self.assertAllEqual(result, expected_value)
+    self.assertAllEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -172,10 +158,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
                       [u"fixed".encode(encoding), u"words".encode(encoding)],
                       [u"Hyper".encode(encoding), u"cube.".encode(encoding)]]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(unicode_encode_op, ops.Tensor)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -191,10 +174,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
                       [[u"Hyper".encode(encoding)],
                        [u"cube.".encode(encoding)]]]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(unicode_encode_op, ops.Tensor)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   # -- Ragged Tensor tests -- #
 
@@ -207,10 +187,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
         u"H\xc3llo".encode(encoding), u"W\U0001f604rld.".encode(encoding)
     ]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertIsInstance(unicode_encode_op, ops.Tensor)
-      self.assertAllEqual(result, expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -227,10 +204,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
                           u"cube.".encode(encoding)
                       ]]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertEqual(unicode_encode_op.ragged_rank, 1)
-      self.assertAllEqual(result.tolist(), expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -245,10 +219,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
                           u"w\xc3rry, be".encode(encoding)
                       ], [u"\U0001f604".encode(encoding), u"".encode(encoding)]]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertEqual(unicode_encode_op.ragged_rank, 1)
-      self.assertAllEqual(result.tolist(), expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -259,10 +230,7 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
     expected_value = [[u"Hello".encode(encoding), u"World.".encode(encoding)],
                       [], [u"\U0001f604".encode(encoding)]]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertEqual(unicode_encode_op.ragged_rank, 1)
-      self.assertAllEqual(result.tolist(), expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
@@ -273,41 +241,30 @@ class UnicodeEncodeOpTest(test.TestCase, parameterized.TestCase):
     expected_value = [[[u"Hello".encode(encoding), u"World".encode(encoding)]],
                       [[u"".encode(encoding)], [u"Hype".encode(encoding)]]]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertEqual(unicode_encode_op.ragged_rank, 2)
-      self.assertAllEqual(result.tolist(), expected_value)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
   @parameterized.parameters("UTF-8", "UTF-16-BE", "UTF-32-BE")
   @test_util.run_v1_only("b/120545219")
   def testRaggedMatrixWithMultiDimensionInnerValues(self, encoding):
-    test_inner_values = constant_op.constant([[[72, 101, 108, 108, 111],
-                                               [87, 111, 114, 108, 100]],
-                                              [[102, 105, 120, 101, 100],
-                                               [119, 111, 114, 100, 115]],
-                                              [[72, 121, 112, 101, 114],
-                                               [99, 117, 98, 101, 46]]])
+    test_flat_values = constant_op.constant([[[72, 101, 108, 108, 111],
+                                              [87, 111, 114, 108, 100]],
+                                             [[102, 105, 120, 101, 100],
+                                              [119, 111, 114, 100, 115]],
+                                             [[72, 121, 112, 101, 114],
+                                              [99, 117, 98, 101, 46]]])
     test_row_splits = [
         constant_op.constant([0, 2, 3], dtype=np.int64),
         constant_op.constant([0, 1, 1, 3], dtype=np.int64)
     ]
-    test_value = ragged_factory_ops.from_nested_row_splits(test_inner_values,
-                                                           test_row_splits)
+    test_value = ragged_tensor.RaggedTensor.from_nested_row_splits(
+        test_flat_values, test_row_splits)
     expected_value = [[[[u"Hello".encode(encoding), u"World".encode(encoding)]],
                        []],
                       [[[u"fixed".encode(encoding), u"words".encode(encoding)],
                         [u"Hyper".encode(encoding),
                          u"cube.".encode(encoding)]]]]
     unicode_encode_op = ragged_string_ops.unicode_encode(test_value, encoding)
-    with self.cached_session():
-      result = unicode_encode_op.eval()
-      self.assertEqual(unicode_encode_op.ragged_rank, 2)
-      self.assertAllEqual(result.tolist(), expected_value)
-      # These next two assertions don't necessarily need to be here as they test
-      # internal representations and we already verified the value is correct.
-      self.assertAllEqual(len(result.nested_row_splits), len(test_row_splits))
-      self.assertEqual(unicode_encode_op.inner_values.shape.ndims,
-                       test_inner_values.shape.ndims - 1)
+    self.assertRaggedEqual(unicode_encode_op, expected_value)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index fcd9adad21..d88543c400 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -184,6 +184,8 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged_tensor_value",
+        ":ragged_util",
+        ":segment_id_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:session",
         "//tensorflow/python:tensor_shape",
@@ -295,6 +297,19 @@ py_library(
 # RaggedTensor Tests
 #-------------------------------------------------------------------------------
 
+py_library(
+    name = "ragged_test_util",
+    srcs = ["ragged_test_util.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":ragged_tensor",
+        ":ragged_tensor_value",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:platform_test",
+    ],
+)
+
 py_test(
     name = "ragged_tensor_test",
     size = "medium",
@@ -305,6 +320,7 @@ py_test(
     ],
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
@@ -323,6 +339,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -336,6 +353,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -348,6 +366,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -359,6 +378,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
         "@absl_py//absl/testing:parameterized",
@@ -371,6 +391,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
@@ -387,6 +408,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -402,6 +424,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
@@ -417,6 +440,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -429,6 +453,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -441,6 +466,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:framework_test_lib",
@@ -459,6 +485,7 @@ py_test(
     ],
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -475,6 +502,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -490,6 +518,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
@@ -504,6 +533,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:errors",
@@ -519,6 +549,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
@@ -530,11 +561,12 @@ py_test(
 )
 
 py_test(
-    name = "ragged_map_inner_values_op_test",
-    srcs = ["ragged_map_inner_values_op_test.py"],
+    name = "ragged_map_flat_values_op_test",
+    srcs = ["ragged_map_flat_values_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
@@ -551,8 +583,8 @@ py_test(
     srcs = ["ragged_const_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged_factory_ops",
-        ":ragged_tensor",
+        ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -569,6 +601,7 @@ py_test(
     ],
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
         "//third_party/py/numpy",
@@ -582,6 +615,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
@@ -597,6 +631,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
@@ -612,6 +647,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
@@ -628,6 +664,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -640,8 +677,8 @@ py_test(
     srcs = ["ragged_tile_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged_array_ops",
-        ":ragged_factory_ops",
+        ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
@@ -656,6 +693,7 @@ py_test(
     srcs = ["ragged_util_test.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":ragged_test_util",
         ":ragged_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -672,6 +710,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
         "@absl_py//absl/testing:parameterized",
@@ -684,6 +723,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
         "@absl_py//absl/testing:parameterized",
@@ -696,6 +736,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:clip_ops",
         "//tensorflow/python:dtypes",
@@ -717,6 +758,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
     ],
@@ -729,6 +771,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
@@ -747,6 +790,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_test_util",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
diff --git a/tensorflow/python/ops/ragged/__init__.py b/tensorflow/python/ops/ragged/__init__.py
index bfcaa366fc..3d915ee269 100644
--- a/tensorflow/python/ops/ragged/__init__.py
+++ b/tensorflow/python/ops/ragged/__init__.py
@@ -9,47 +9,6 @@ different lengths.  For example, the inner (column) dimension of
 description of ragged tensors, see the `tf.RaggedTensor`
 class documentation.
 
-## `RaggedTensor` Operations
-
-### `RaggedTensor` Factory ops
-
-* `tf.ragged.constant`
-* `tf.ragged.from_row_splits`
-* `tf.ragged.from_row_splits`
-* `tf.ragged.from_row_lengths`
-* `tf.ragged.from_row_starts`
-* `tf.ragged.from_row_limits`
-* `tf.ragged.from_value_rowids`
-* `tf.ragged.from_nested_row_splits`
-* `tf.ragged.from_nested_value_rowids`
-
-### `RaggedTensor` Conversion ops
-
-* `tf.ragged.from_tensor`
-* `tf.ragged.to_tensor`
-* `tf.ragged.from_sparse`
-* `tf.ragged.to_sparse`
-* `tf.ragged.from_variant`
-* `tf.ragged.to_variant`
-* `tf.ragged.convert_to_tensor_or_ragged_tensor`
-
-### `RaggedTensor` Shape ops
-
-* `tf.ragged.row_splits`
-* `tf.ragged.row_lengths`
-* `tf.ragged.row_starts`
-* `tf.ragged.row_limits`
-* `tf.ragged.value_rowids`
-* `tf.ragged.nrows`
-* `tf.ragged.nested_row_splits`
-* `tf.ragged.row_splits_to_segment_ids`
-* `tf.ragged.segment_ids_to_row_splits`
-* `tf.ragged.bounding_shape`
-
-### Functional ops
-* `tf.ragged.map_inner_values`
-
-
 <!-- Ragged Classes & related helper functions -->
 @@RaggedTensor
 @@RaggedTensorType
@@ -57,15 +16,9 @@ class documentation.
 @@is_ragged
 
 <!-- Factory Ops -->
+@@ragged_factory_ops
 @@constant
 @@constant_value
-@@from_row_splits
-@@from_row_lengths
-@@from_row_starts
-@@from_row_limits
-@@from_value_rowids
-@@from_nested_row_splits
-@@from_nested_value_rowids
 @@convert_to_tensor_or_ragged_tensor
 
 <!-- Conversion Ops -->
@@ -77,14 +30,6 @@ class documentation.
 @@segment_ids_to_row_splits
 
 <!-- Array Ops -->
-@@row_splits
-@@row_lengths
-@@row_starts
-@@row_limits
-@@value_rowids
-@@nrows
-@@nested_row_splits
-@@bounding_shape
 @@gather
 @@batch_gather
 @@gather_nd
@@ -114,7 +59,7 @@ class documentation.
 @@reduce_any
 
 <!-- Functional Ops -->
-@@map_inner_values
+@@map_flat_values
 @@map_fn
 
 <!-- Shape & broadcasting -->
@@ -133,18 +78,12 @@ from tensorflow.python.ops.ragged import ragged_string_ops
 
 from tensorflow.python.ops.ragged.ragged_array_ops import batch_gather
 from tensorflow.python.ops.ragged.ragged_array_ops import boolean_mask
-from tensorflow.python.ops.ragged.ragged_array_ops import bounding_shape
 from tensorflow.python.ops.ragged.ragged_array_ops import concat
 from tensorflow.python.ops.ragged.ragged_array_ops import expand_dims
 from tensorflow.python.ops.ragged.ragged_array_ops import gather
 from tensorflow.python.ops.ragged.ragged_array_ops import gather_nd
-from tensorflow.python.ops.ragged.ragged_array_ops import nrows
-from tensorflow.python.ops.ragged.ragged_array_ops import row_lengths
-from tensorflow.python.ops.ragged.ragged_array_ops import row_limits
-from tensorflow.python.ops.ragged.ragged_array_ops import row_starts
 from tensorflow.python.ops.ragged.ragged_array_ops import stack
 from tensorflow.python.ops.ragged.ragged_array_ops import tile
-from tensorflow.python.ops.ragged.ragged_array_ops import value_rowids
 from tensorflow.python.ops.ragged.ragged_array_ops import where
 
 from tensorflow.python.ops.ragged.ragged_conversion_ops import from_sparse
@@ -154,16 +93,8 @@ from tensorflow.python.ops.ragged.ragged_conversion_ops import to_tensor
 
 from tensorflow.python.ops.ragged.ragged_factory_ops import constant
 from tensorflow.python.ops.ragged.ragged_factory_ops import constant_value
-from tensorflow.python.ops.ragged.ragged_factory_ops import convert_to_tensor_or_ragged_tensor
-from tensorflow.python.ops.ragged.ragged_factory_ops import from_nested_row_splits
-from tensorflow.python.ops.ragged.ragged_factory_ops import from_nested_value_rowids
-from tensorflow.python.ops.ragged.ragged_factory_ops import from_row_lengths
-from tensorflow.python.ops.ragged.ragged_factory_ops import from_row_limits
-from tensorflow.python.ops.ragged.ragged_factory_ops import from_row_splits
-from tensorflow.python.ops.ragged.ragged_factory_ops import from_row_starts
-from tensorflow.python.ops.ragged.ragged_factory_ops import from_value_rowids
 
-from tensorflow.python.ops.ragged.ragged_functional_ops import map_inner_values
+from tensorflow.python.ops.ragged.ragged_functional_ops import map_flat_values
 
 from tensorflow.python.ops.ragged.ragged_map_ops import map_fn
 
@@ -184,6 +115,7 @@ from tensorflow.python.ops.ragged.ragged_math_ops import segment_prod
 from tensorflow.python.ops.ragged.ragged_math_ops import segment_sqrt_n
 from tensorflow.python.ops.ragged.ragged_math_ops import segment_sum
 
+from tensorflow.python.ops.ragged.ragged_tensor import convert_to_tensor_or_ragged_tensor
 from tensorflow.python.ops.ragged.ragged_tensor import is_ragged
 from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor
 from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensorType
diff --git a/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py b/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py
index ef3464f243..b88f18c8b6 100644
--- a/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.convert_to_tensor_or_ragged_tensor."""
+"""Tests for ragged.convert_to_tensor_or_ragged."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -25,11 +25,13 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedConvertToTensorOrRaggedTensorTest(test_util.TensorFlowTestCase,
-                                              parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedConvertToTensorOrRaggedTensorTest(
+    ragged_test_util.RaggedTensorTestCase, parameterized.TestCase):
 
   #=============================================================================
   # Tests where the 'value' param is a RaggedTensor
@@ -90,7 +92,6 @@ class RaggedConvertToTensorOrRaggedTensorTest(test_util.TensorFlowTestCase,
           preferred_dtype=dtypes.string,
           expected_dtype=dtypes.int32),
   ])
-  @test_util.run_deprecated_v1
   def testConvertRaggedTensorValue(self,
                                    value,
                                    dtype=None,
@@ -102,8 +103,7 @@ class RaggedConvertToTensorOrRaggedTensorTest(test_util.TensorFlowTestCase,
         value, dtype, preferred_dtype)
     self.assertEqual(value.ragged_rank, converted.ragged_rank)
     self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype)
-    with self.test_session():
-      self.assertEqual(value.tolist(), self.evaluate(converted).tolist())
+    self.assertEqual(value.to_list(), self.eval_to_list(converted))
 
   @parameterized.parameters([
       dict(
@@ -131,8 +131,7 @@ class RaggedConvertToTensorOrRaggedTensorTest(test_util.TensorFlowTestCase,
     tensor = constant_op.constant(pylist)
     converted = ragged.convert_to_tensor_or_ragged_tensor(
         tensor, dtype, preferred_dtype)
-    with self.test_session():
-      self.assertIs(tensor, converted)
+    self.assertIs(tensor, converted)
 
   @parameterized.parameters([
       dict(
@@ -146,7 +145,6 @@ class RaggedConvertToTensorOrRaggedTensorTest(test_util.TensorFlowTestCase,
           message=('Tensor conversion requested dtype string for '
                    'Tensor with dtype int32')),
   ])
-  @test_util.run_deprecated_v1
   def testConvertTensorError(self,
                              pylist,
                              message,
@@ -189,8 +187,7 @@ class RaggedConvertToTensorOrRaggedTensorTest(test_util.TensorFlowTestCase,
     converted = ragged.convert_to_tensor_or_ragged_tensor(
         value, dtype, preferred_dtype)
     self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype)
-    with self.test_session():
-      self.assertAllEqual(value, converted)
+    self.assertAllEqual(value, converted)
 
   @parameterized.parameters([
       dict(
diff --git a/tensorflow/python/ops/ragged/ragged_array_ops.py b/tensorflow/python/ops/ragged/ragged_array_ops.py
index 25317ba93e..b5917bc4ee 100644
--- a/tensorflow/python/ops/ragged/ragged_array_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_array_ops.py
@@ -27,282 +27,12 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import gen_ragged_array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_conversion_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_functional_ops
 from tensorflow.python.ops.ragged import ragged_math_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_util
 from tensorflow.python.ops.ragged import segment_id_ops
 
-#===============================================================================
-# Row Partitioning
-#===============================================================================
-
-
-def value_rowids(rt_input, name=None):
-  """Returns the row indices for the `values` in the given ragged tensor.
-
-  `value_rowids(rt)` corresponds one-to-one with the outermost dimension of
-  `rt.values`, and specifies the row containing each value.  In particular,
-  the row `rt[row]` consists of the values `rt.values[j]` where
-  `value_rowids(rt)[j] == row`.
-
-  Args:
-    rt_input: The RaggedTensor whose row indices should be returned.
-    name: A name prefix for the returned tensor (optional).
-
-  Returns:
-    A 1-D `int64` `Tensor` with shape `self.values.shape[:1]`.
-    The returned tensor is nonnegative, and is sorted in ascending order.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
-    >>> rt.values.eval()
-    [3, 1, 4, 1, 5, 9, 2, 6]
-    >>> ragged.value_rowids(rt).eval()
-    [0, 0, 0, 0, 2, 2, 2, 3]  # corresponds 1:1 with rt.values
-    ```
-  """
-  if not ragged_tensor.is_ragged(rt_input):
-    raise TypeError(
-        'rt_input expected RaggedTensor, got %s' % type(rt_input).__name__)
-  if (isinstance(rt_input, ragged_tensor.RaggedTensor) and
-      rt_input.cached_value_rowids is not None):
-    return rt_input.cached_value_rowids
-
-  with ops.name_scope(name, 'RaggedValueRowIds', [rt_input]):
-    return segment_id_ops.row_splits_to_segment_ids(rt_input.row_splits)
-
-
-def nrows(rt_input, out_type=dtypes.int64, name=None):
-  """Returns the number of rows in the given potentially ragged tensor.
-
-  I.e., the size of the outermost dimension of the tensor.
-
-  Args:
-    rt_input: The potentially ragged tensor whose number of rows should be
-      returned.
-    out_type: `dtype` for the returned tensor.
-    name: A name prefix for the returned tensor (optional).
-
-  Returns:
-    A scalar `Tensor` with dtype `out_type`.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
-    >>> ragged.nrows(rt).eval()  # rt has 5 rows.
-    5
-    ```
-  """
-  if (isinstance(rt_input, ragged_tensor.RaggedTensor) and
-      rt_input.cached_nrows is not None):
-    return rt_input.cached_nrows
-
-  with ops.name_scope(name, 'RaggedNRows', [rt_input]):
-    if ragged_tensor.is_ragged(rt_input):
-      return array_ops.shape(rt_input.row_splits, out_type=out_type)[0] - 1
-    else:
-      return array_ops.shape(rt_input, out_type=out_type)[0]
-
-
-def row_starts(rt_input, name=None):
-  """Returns the start indices for rows in the given ragged tensor.
-
-  These indices specify where the values for each row begin in
-  `rt_input.values`.  `ragged.row_starts(rt_input)` is equal to
-  `rt_input.row_splits[:-1]`.
-
-  Args:
-    rt_input: The RaggedTensor whose row starts should be returned.
-    name: A name prefix for the returned tensor (optional).
-
-  Returns:
-    A 1-D Tensor of int64 with shape `[nrows]`.
-    The returned tensor is nonnegative, and is sorted in ascending order.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
-    >>> ragged.values(rt).eval()
-    [3, 1, 4, 1, 5, 9, 2, 6]
-    >>> ragged.row_starts(rt).eval()  # indices of row starts in ragged.values
-    [0, 4, 4, 7, 8]
-    ```
-  """
-  if not ragged_tensor.is_ragged(rt_input):
-    raise TypeError(
-        'rt_input expected RaggedTensor, got %s' % type(rt_input).__name__)
-  with ops.name_scope(name, 'RaggedRowStarts', [rt_input]):
-    return rt_input.row_splits[:-1]
-
-
-def row_limits(rt_input, name=None):
-  """Returns the limit indices for rows in the given ragged tensor.
-
-  These indices specify where the values for each row end in
-  `rt_input.values`.  `ragged.row_limits(rt_input)` is equal to
-  `rt_input.row_splits[:-1]`.
-
-  Args:
-    rt_input: The RaggedTensor whose row limits should be returned.
-    name: A name prefix for the returned tensor (optional).
-
-  Returns:
-    A 1-D Tensor of int64 with shape `[nrows]`.
-    The returned tensor is nonnegative, and is sorted in ascending order.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
-    >>> ragged.values(rt).eval()
-    [3, 1, 4, 1, 5, 9, 2, 6]
-    >>> ragged.row_limits(rt).eval()  # indices of row limits in ragged.values
-    [4, 4, 7, 8, 8]
-    ```
-  """
-  if not ragged_tensor.is_ragged(rt_input):
-    raise TypeError(
-        'rt_input expected RaggedTensor, got %s' % type(rt_input).__name__)
-  with ops.name_scope(name, 'RaggedRowLimits', [rt_input]):
-    return rt_input.row_splits[1:]
-
-
-def row_lengths(rt_input, axis=1, name=None):
-  """Returns the lengths of the rows in the given potentially ragged tensor.
-
-  `ragged.row_lengths(rt_input)[i]` indicates the number of values in the
-  `i`th row of `rt_input`.
-
-  Args:
-    rt_input: The potentially ragged tensor whose row lengths should be
-      returned.  Must have at least `axis+1` dimensions.
-    axis: An integer constant indicating the axis whose row lengths should be
-      returned.
-    name: A name prefix for the returned tensor (optional).
-
-  Returns:
-    A potentially Tensor of int64 with shape `rt_input.shape[:axis]`.
-
-  Raises:
-    ValueError: If rt_input is a scalar, or `axis` is out of bounds.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.constant([[[3, 1, 4], [1]], [], [[5, 9], [2]], [[6]], []])
-    >>> ragged.row_lengths(rt).eval()  # lengths of rows in rt
-    [2, 0, 2, 1, 0]
-    >>> ragged.row_lengths(rt, axis=2).eval()  # lengths of axis=2 rows.
-    [[3, 1], [], [2, 1], [1], []]
-    ```
-  """
-  if (isinstance(rt_input, ragged_tensor.RaggedTensor) and
-      rt_input.cached_row_lengths is not None):
-    return rt_input.cached_row_lengths
-
-  with ops.name_scope(name, 'RaggedRowLengths', [rt_input]):
-    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
-        rt_input, name='rt_input')
-    ndims = rt_input.shape.ndims
-    if ndims is not None:
-      if ndims == 0:
-        raise ValueError('rt_input may not be a scalar.')
-      elif not -ndims <= axis < ndims:
-        raise ValueError('axis=%d out of bounds: expected %d<=axis<%d.' %
-                         (axis, -ndims, ndims))
-    if ragged_tensor.is_ragged(rt_input):
-      axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims)
-      if axis == 0:
-        return nrows(rt_input)
-      elif axis == 1:
-        splits = rt_input.row_splits
-        return splits[1:] - splits[:-1]
-      else:
-        return rt_input.with_values(row_lengths(rt_input.values, axis - 1))
-    else:
-      shape = array_ops.shape(rt_input, out_type=dtypes.int64)
-      return array_ops.ones(shape[:axis], dtypes.int64) * shape[axis]
-
-
-def nested_row_lengths(rt_input, name=None):
-  """Returns a tuple containing the row_lengths for all ragged dimensions.
-
-  `nested_row_lengths(rt)` is a tuple containing the `row_lengths` tensors for
-  all ragged dimensions in `rt`, ordered from outermost to innermost.
-
-  Args:
-    rt_input: A potentially ragged tensor.
-    name: A name prefix for the returned tensors (optional).
-
-  Returns:
-    A `tuple` of 1-D `int64` `Tensors`.  The length of the tuple is equal to
-    `rt_input.ragged_rank`.
-  """
-  with ops.name_scope(name, 'RaggedNestedRowLengths', [rt_input]):
-    rt_nested_row_lengths = []
-    while isinstance(rt_input, ragged_tensor.RaggedTensor):
-      rt_nested_row_lengths.append(row_lengths(rt_input))
-      rt_input = rt_input.values
-    return tuple(rt_nested_row_lengths)
-
-
-#===============================================================================
-# Bounding Shape
-#===============================================================================
-def bounding_shape(rt_input, axis=None, name=None):
-  """Returns the tight bounding box shape for a potentially ragged tensor.
-
-  Args:
-    rt_input: A potentially ragged tensor.
-    axis: An integer scalar or vector indicating which axes to return the
-      bounding box for.  If not specified, then the full bounding box is
-      returned.
-    name: A name prefix for the returned tensor (optional).
-
-  Returns:
-    An int64 `Tensor`.  If `axis` is not specified, then `output`
-    is a vector with `output.shape=[rt_input.shape.ndims]`.  If `axis` is a
-    scalar, then the `output` is a scalar.  If `axis` is a vector, then
-    `output` is a vector, where `output[i]` is the bounding size for
-    dimension `axis[i]`.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.constant([[1, 2, 3, 4], [5], [], [6, 7, 8, 9], [10]])
-    >>> ragged.bounding_shape(rt).eval().tolist()
-    [5, 4]
-    ```
-  """
-  with ops.name_scope(name, 'RaggedBoundingBox', [rt_input, axis]):
-    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
-        rt_input, name='rt_input')
-    if not ragged_tensor.is_ragged(rt_input):
-      bbox = array_ops.shape(rt_input)
-      return bbox if axis is None else array_ops.gather(bbox, axis)
-
-    nested_splits = rt_input.nested_row_splits
-    rt_inner_values = rt_input.inner_values
-
-    # Optimized special cases for when axis=0 or axis=1:
-    if isinstance(axis, int):
-      if axis == 0:
-        return array_ops.shape(nested_splits[0], out_type=dtypes.int64)[0] - 1
-      elif axis == 1:
-        return math_ops.maximum(math_ops.reduce_max(row_lengths(rt_input)), 0)
-
-    splits_shape = array_ops.shape(rt_input.row_splits, out_type=dtypes.int64)
-    inner_values_shape = array_ops.shape(rt_inner_values, out_type=dtypes.int64)
-
-    ragged_dimensions = array_ops.stack([splits_shape[0] - 1] + [
-        math_ops.maximum(math_ops.reduce_max(splits[1:] - splits[:-1]), 0)
-        for splits in nested_splits
-    ])
-    inner_dimensions = inner_values_shape[1:]
-
-    bbox = array_ops.concat([ragged_dimensions, inner_dimensions], axis=0)
-    return bbox if axis is None else array_ops.gather(bbox, axis)
-
 
 #===============================================================================
 # ragged_gather
@@ -331,13 +61,13 @@ def gather(params, indices, validate_indices=None, axis=0, name=None):
   >>> ragged_params = ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
   >>> ragged_indices = ragged.constant([[3, 1, 2], [1], [], [0]])
 
-  >>> print ragged.gather(params, ragged_indices).eval().tolist()
+  >>> print ragged.gather(params, ragged_indices)
   [['d', 'b', 'c'], ['b'], [], ['a']]
 
-  >>> print ragged.gather(ragged_params, indices).eval().tolist()
+  >>> print ragged.gather(ragged_params, indices)
   [['e'], ['d'], [], ['d'], ['a', 'b', 'c']]
 
-  >>> print ragged.gather(ragged_params, ragged_indices).eval().tolist()
+  >>> print ragged.gather(ragged_params, ragged_indices)
   [[['e'], ['d'], []], [['d']], [], [['a', 'b', 'c']]]
   ```
 
@@ -363,9 +93,9 @@ def gather(params, indices, validate_indices=None, axis=0, name=None):
   if not isinstance(axis, int) or axis != 0:
     raise ValueError('axis>0 is not supported for ragged gather yet.')
   with ops.name_scope(name, 'RaggedGather', [params, indices]):
-    params = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         params, name='params')
-    indices = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         indices, name='indices')
 
     if ragged_tensor.is_ragged(indices):
@@ -380,13 +110,13 @@ def gather(params, indices, validate_indices=None, axis=0, name=None):
 
     result = gen_ragged_array_ops.ragged_gather(
         indices=indices,
-        params_dense_values=params.inner_values,
+        params_dense_values=params.flat_values,
         params_nested_splits=params.nested_row_splits,
         OUTPUT_RAGGED_RANK=indices.shape.ndims + len(params.nested_row_splits) -
         1)
 
     # Compose the RaggedTensor from splits & values.
-    return ragged_factory_ops.from_nested_row_splits(
+    return ragged_tensor.RaggedTensor.from_nested_row_splits(
         result.output_dense_values, result.output_nested_splits)
 
 
@@ -429,9 +159,9 @@ def batch_gather(params, indices, name=None):
     return array_ops.batch_gather(params, indices, name)
 
   with ops.name_scope(name, 'RaggedBatchGather', [params, indices]):
-    params = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         params, name='params')
-    indices = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         indices, name='indices')
     indices_ndims = indices.shape.ndims
     if indices_ndims is None:
@@ -448,7 +178,7 @@ def batch_gather(params, indices, name=None):
                            'not match params shape')
         checks = [check_ops.assert_equal(params.row_splits, indices.row_splits)]
         with ops.control_dependencies(checks):
-          return ragged_factory_ops.from_row_splits(
+          return ragged_tensor.RaggedTensor.from_row_splits(
               batch_gather(params.values, indices.values), indices.row_splits)
 
       # Otherwise, indices is a 2D ragged tensor with 1 ragged dimension.
@@ -462,11 +192,11 @@ def batch_gather(params, indices, name=None):
 
         # Adjust indices from within-batch to global (in params.values), and
         # then use ragged.gather to gather them.
-        num_indices = row_lengths(indices)
-        params_starts = row_starts(params)
+        num_indices = indices.row_lengths()
+        params_starts = params.row_starts()
         adjustments = ragged_util.repeat(params_starts, num_indices, axis=0)
         adjusted_index_values = math_ops.to_int64(indices.values) + adjustments
-        return ragged_factory_ops.from_row_splits(
+        return ragged_tensor.RaggedTensor.from_row_splits(
             gather(params.values, adjusted_index_values), indices.row_splits)
 
     else:  # params is a RaggedTensor and indices is a Tensor.
@@ -474,7 +204,7 @@ def batch_gather(params, indices, name=None):
         return gather(params, indices)
       elif indices_ndims == 2:
         # Adjust indices from batch-local to global (in params.values)
-        adjustments = array_ops.expand_dims(row_starts(params), 1)
+        adjustments = array_ops.expand_dims(params.row_starts(), 1)
         adjusted_indices = math_ops.to_int64(indices) + adjustments
         return gather(params.values, adjusted_indices)
       else:
@@ -532,9 +262,9 @@ def gather_nd(params, indices, name=None):
 
   with ops.name_scope(name, 'RaggedGatherNd', [params, indices]):
 
-    params = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    params = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         params, name='params')
-    indices = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    indices = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         indices, name='indices')
     indices_shape = indices.shape
     indices_ndims = indices_shape.ndims
@@ -548,7 +278,7 @@ def gather_nd(params, indices, name=None):
 
     # `index_size` is the "n" in "gather_nd" -- i.e., the number of dimensions
     # that each index slices into.
-    index_size = indices_shape[-1].value
+    index_size = tensor_shape.dimension_value(indices_shape[-1])
     if index_size is None:
       raise ValueError('indices.shape[-1] must be statically known.')
 
@@ -560,8 +290,7 @@ def gather_nd(params, indices, name=None):
       if indices_is_dense:
         indices = ragged_conversion_ops.from_tensor(
             indices, ragged_rank=indices_ndims - 2)
-      result = indices.with_inner_values(
-          gather_nd(params, indices.inner_values))
+      result = indices.with_flat_values(gather_nd(params, indices.flat_values))
       if (indices_is_dense and ragged_tensor.is_ragged(result) and
           result.ragged_rank == indices_ndims - 2):
         result = ragged_conversion_ops.to_tensor(result)
@@ -575,7 +304,7 @@ def gather_nd(params, indices, name=None):
     # Handle corner case: An empty index tuple selects the entire `params`
     # value.  So if `index_size` is zero, then tile `params`.
     if index_size == 0:
-      params_ndims = params.ragged_rank + array_ops.rank(params.inner_values)
+      params_ndims = params.ragged_rank + array_ops.rank(params.flat_values)
       for dim in range(indices_ndims - 1):
         params = expand_dims(params, axis=0)
       multiples = array_ops.concat([
@@ -613,7 +342,7 @@ def gather_nd(params, indices, name=None):
           return array_ops.gather_nd(flattened_params, flattened_index_tuples)
 
         flattened_index_tuples = array_ops.gather(
-            row_starts(flattened_params), flattened_index_tuples)
+            flattened_params.row_starts(), flattened_index_tuples)
         flattened_index_tuples += indices[..., dim]
         flattened_params = flattened_params.values
 
@@ -709,9 +438,8 @@ def boolean_mask(data, mask, keepdims=False, name=None):
   """
   with ops.name_scope(name, 'RaggedMask', [data, mask]):
     # Convert inputs to tensors.
-    data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
-        data, name='data')
-    mask = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
+    mask = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         mask, dtypes.bool, name='mask')
 
     # Get static rank of mask.
@@ -742,7 +470,7 @@ def boolean_mask(data, mask, keepdims=False, name=None):
           else:
             # Count the number of True mask values in each row to find the
             # lengths of the filtered rows; then convert to splits.
-            int_mask = ragged_functional_ops.map_inner_values(
+            int_mask = ragged_functional_ops.map_flat_values(
                 math_ops.cast, mask, dtype=dtypes.int64)
             masked_row_lengths = ragged_math_ops.reduce_sum(int_mask, axis=1)
             splits.append(ragged_util.lengths_to_splits(masked_row_lengths))
@@ -754,7 +482,7 @@ def boolean_mask(data, mask, keepdims=False, name=None):
 
         # Add the ragged `splits` back to the result.
         if keepdims:
-          masked_values = ragged_factory_ops.from_nested_row_splits(
+          masked_values = ragged_tensor.RaggedTensor.from_nested_row_splits(
               masked_values, splits)
 
         return masked_values
@@ -765,7 +493,7 @@ def boolean_mask(data, mask, keepdims=False, name=None):
       # Get the masked splits: first get the length of each row, then filter
       # out the rows that we are deleting, and convert that filtered set of
       # masks back to a splits tensor.
-      lengths = row_lengths(data)
+      lengths = data.row_lengths()
       masked_lengths = array_ops.boolean_mask(lengths, mask)
       masked_splits = ragged_util.lengths_to_splits(masked_lengths)
 
@@ -777,7 +505,8 @@ def boolean_mask(data, mask, keepdims=False, name=None):
       segment_mask = array_ops.gather(mask, segment_ids)
       masked_values = boolean_mask(data.values, segment_mask, keepdims=False)
 
-      return ragged_factory_ops.from_row_splits(masked_values, masked_splits)
+      return ragged_tensor.RaggedTensor.from_row_splits(masked_values,
+                                                        masked_splits)
 
     # If mask is non-ragged and has rank>1, then convert it to be ragged,
     # with a ragged rank matching data.
@@ -798,7 +527,7 @@ def boolean_mask(data, mask, keepdims=False, name=None):
         # and values to get the innermost ragged tensor.
         masked_lengths = math_ops.count_nonzero(mask, axis=-1)
         flattened_masked_lengths = array_ops.reshape(masked_lengths, [-1])
-        masked_values = ragged_factory_ops.from_row_lengths(
+        masked_values = ragged_tensor.RaggedTensor.from_row_lengths(
             masked_values, flattened_masked_lengths)
 
         # Wrap remaining ragged dimensions.
@@ -808,7 +537,7 @@ def boolean_mask(data, mask, keepdims=False, name=None):
           for dim in range(mask.shape.ndims - 3, -1, -1):
             elt_size = mask_shape[dim + 1]
             masked_splits = math_ops.range(split_size[dim]) * elt_size
-            masked_values = ragged_factory_ops.from_row_splits(
+            masked_values = ragged_tensor.RaggedTensor.from_row_splits(
                 masked_values, masked_splits)
 
       return masked_values
@@ -919,7 +648,7 @@ def _ragged_stack_concat_helper(rt_inputs, axis, stack_values):
 
   # Convert input tensors.
   rt_inputs = [
-      ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+      ragged_tensor.convert_to_tensor_or_ragged_tensor(
           rt_input, name='rt_input') for rt_input in rt_inputs
   ]
 
@@ -970,7 +699,7 @@ def _ragged_stack_concat_helper(rt_inputs, axis, stack_values):
     values = [rt.values for rt in rt_inputs]
     splits = [[rt_input.row_splits] for rt_input in rt_inputs]
     with ops.control_dependencies(ragged_util.assert_splits_match(splits)):
-      return ragged_factory_ops.from_row_splits(
+      return ragged_tensor.RaggedTensor.from_row_splits(
           _ragged_stack_concat_helper(values, axis - 1, stack_values),
           splits[0][0])
 
@@ -987,8 +716,8 @@ def _ragged_stack_concat_axis_0(rt_inputs, stack_values):
     A RaggedTensor.
   """
   # Concatenate the inner values together.
-  inner_values = [rt.inner_values for rt in rt_inputs]
-  concatenated_inner_values = array_ops.concat(inner_values, axis=0)
+  flat_values = [rt.flat_values for rt in rt_inputs]
+  concatenated_flat_values = array_ops.concat(flat_values, axis=0)
 
   # Concatenate the splits together for each ragged dimension (adjusting
   # split offsets as necessary).
@@ -1002,12 +731,12 @@ def _ragged_stack_concat_axis_0(rt_inputs, stack_values):
 
   # If we are performing a stack operation, then add another splits.
   if stack_values:
-    stack_lengths = array_ops.stack([nrows(rt) for rt in rt_inputs])
+    stack_lengths = array_ops.stack([_nrows(rt) for rt in rt_inputs])
     stack_splits = ragged_util.lengths_to_splits(stack_lengths)
     concatenated_nested_splits.insert(0, stack_splits)
 
-  return ragged_factory_ops.from_nested_row_splits(concatenated_inner_values,
-                                                   concatenated_nested_splits)
+  return ragged_tensor.RaggedTensor.from_nested_row_splits(
+      concatenated_flat_values, concatenated_nested_splits)
 
 
 def _ragged_stack_concat_axis_1(rt_inputs, stack_values):
@@ -1023,10 +752,10 @@ def _ragged_stack_concat_axis_1(rt_inputs, stack_values):
   """
   num_inputs = len(rt_inputs)
 
-  rt_nrows = nrows(rt_inputs[0])
+  rt_nrows = _nrows(rt_inputs[0])
   nrows_msg = 'Input tensors have incompatible shapes.'
   nrows_checks = [
-      check_ops.assert_equal(nrows(rt), rt_nrows, message=nrows_msg)
+      check_ops.assert_equal(_nrows(rt), rt_nrows, message=nrows_msg)
       for rt in rt_inputs[1:]
   ]
 
@@ -1050,14 +779,15 @@ def _ragged_stack_concat_axis_1(rt_inputs, stack_values):
       # Add a new splits tensor to group together the values.
       stack_splits = math_ops.range(0, rt_nrows * num_inputs + 1, num_inputs)
       _copy_row_shape(rt_inputs, stack_splits)
-      return ragged_factory_ops.from_row_splits(permuted_rt, stack_splits)
+      return ragged_tensor.RaggedTensor.from_row_splits(permuted_rt,
+                                                        stack_splits)
     else:
       # Merge together adjacent rows by dropping the row-split indices that
       # separate them.
       concat_splits = permuted_rt.row_splits[::num_inputs]
       _copy_row_shape(rt_inputs, concat_splits)
-      return ragged_factory_ops.from_row_splits(permuted_rt.values,
-                                                concat_splits)
+      return ragged_tensor.RaggedTensor.from_row_splits(permuted_rt.values,
+                                                        concat_splits)
 
 
 def _copy_row_shape(rt_inputs, splits):
@@ -1090,12 +820,12 @@ def tile(input, multiples, name=None):  # pylint: disable=redefined-builtin
   #### Example:
     ```python
     >>> rt = ragged.constant([[1, 2], [3]])
-    >>> ragged.tile(rt, [3, 2]).eval().tolist()
+    >>> ragged.tile(rt, [3, 2])
     [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]]
     ```
   """
   with ops.name_scope(name, 'RaggedTile', [input, multiples]):
-    input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         input, name='input')
     multiples = ragged_util.convert_to_int_tensor(
         multiples, name='multiples', dtype=dtypes.int64)
@@ -1107,16 +837,16 @@ def tile(input, multiples, name=None):  # pylint: disable=redefined-builtin
     # to skip tiling dimensions where `multiples=1`.
     const_multiples = tensor_util.constant_value(multiples)
 
-    return ragged_factory_ops.from_nested_row_splits(
+    return ragged_tensor.RaggedTensor.from_nested_row_splits(
         _tile_ragged_values(input, multiples, const_multiples),
         _tile_ragged_splits(input, multiples, const_multiples))
 
 
 def _tile_ragged_values(rt_input, multiples, const_multiples=None):
-  """Builds inner_values tensor for a tiled `RaggedTensor`.
+  """Builds flat_values tensor for a tiled `RaggedTensor`.
 
   Returns a tensor that repeats the values in
-  `rt_input.inner_values` in the
+  `rt_input.flat_values` in the
   appropriate pattern to construct a `RaggedTensor` that tiles `rt_input` as
   specified by `multiples`.
 
@@ -1128,19 +858,19 @@ def _tile_ragged_values(rt_input, multiples, const_multiples=None):
       dimensions where `multiples=1`.
 
   Returns:
-    A `Tensor` with the same type and rank as `rt_input.inner_values`.
+    A `Tensor` with the same type and rank as `rt_input.flat_values`.
 
   #### Example:
     ```python
     >>> rt = ragged.constant([[1, 2], [3]])
-    >>> _tile_ragged_values(rt, [3, 2]).eval().tolist()
+    >>> _tile_ragged_values(rt, [3, 2])
     [1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3]
     ```
   """
   ragged_rank = rt_input.ragged_rank
   nested_splits = rt_input.nested_row_splits
 
-  # Pointers to the values in `rt_input.inner_values`.
+  # Pointers to the values in `rt_input.flat_values`.
   inner_value_ids = math_ops.range(nested_splits[-1][-1])
 
   # For each ragged dimension (working from the innermost to outermost),
@@ -1163,9 +893,9 @@ def _tile_ragged_values(rt_input, multiples, const_multiples=None):
     prev_splits = splits
 
   # Gather the tiled inner values.
-  ragged_tiled_values = array_ops.gather(rt_input.inner_values, inner_value_ids)
+  ragged_tiled_values = array_ops.gather(rt_input.flat_values, inner_value_ids)
 
-  # Tile the inner_values for the uniform dimensions (i.e., for `axis=0` plus
+  # Tile the flat_values for the uniform dimensions (i.e., for `axis=0` plus
   # `axis=range(ragged_rank, rank)`).
   inner_repeats = array_ops.concat([multiples[:1], multiples[ragged_rank + 1:]],
                                    axis=0)
@@ -1192,7 +922,7 @@ def _tile_ragged_splits(rt_input, multiples, const_multiples=None):
   #### Example:
     ```python
     >>> rt = ragged.constant([[1, 2], [3]])
-    >>> _tile_ragged_splits(rt, [3, 2]).eval().tolist()
+    >>> _tile_ragged_splits(rt, [3, 2])
     [0, 4, 6, 10, 12, 16, 18]
     ```
   """
@@ -1293,20 +1023,20 @@ def expand_dims(input, axis, name=None):  # pylint: disable=redefined-builtin
     TensorShape([2, None])
 
     >>> expanded = ragged.expand_dims(rt, axis=0)
-    >>> print(expanded.shape, expanded.eval().tolist())
+    >>> print(expanded.shape, expanded)
     TensorShape([1, None, None]) [[[1, 2], [3]]]
 
     >>> expanded = ragged.expand_dims(rt, axis=1)
-    >>> print(expanded.shape, expanded.eval().tolist())
+    >>> print(expanded.shape, expanded)
     TensorShape([2, None, None]) [[[1, 2]], [[3]]]
 
     >>> expanded = ragged.expand_dims(rt, axis=2)
-    >>> print(expanded.shape, expanded.eval().tolist())
+    >>> print(expanded.shape, expanded)
     TensorShape([2, None, 1]) [[[1], [2]], [[3]]]
     ```
   """
   with ops.name_scope(name, 'RaggedExpandDims', [input]):
-    input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         input, name='input')
 
     if not ragged_tensor.is_ragged(input):
@@ -1316,15 +1046,15 @@ def expand_dims(input, axis, name=None):  # pylint: disable=redefined-builtin
     axis = ragged_util.get_positive_axis(axis, ndims)
     if axis == 0:
       values = input
-      splits = array_ops.stack([0, nrows(input)])
+      splits = array_ops.stack([0, input.nrows()])
     elif axis == 1:
       values = input
-      splits = math_ops.range(nrows(input) + 1)
+      splits = math_ops.range(input.nrows() + 1)
     else:
       values = expand_dims(input.values, axis - 1)
       splits = input.row_splits
 
-    return ragged_factory_ops.from_row_splits(values, splits)
+    return ragged_tensor.RaggedTensor.from_row_splits(values, splits)
 
 
 #===============================================================================
@@ -1401,13 +1131,13 @@ def where(condition, x=None, y=None, name=None):
   if (x is None) != (y is None):
     raise ValueError('x and y must be either both None or both non-None')
   with ops.name_scope('RaggedWhere', name, [condition, x, y]):
-    condition = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    condition = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         condition, name='condition')
     if x is None:
       return _coordinate_where(condition)
     else:
-      x = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(x, name='x')
-      y = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(y, name='y')
+      x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, name='x')
+      y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, name='y')
       return _elementwise_where(condition, x, y)
 
 
@@ -1421,15 +1151,15 @@ def _elementwise_where(condition, x, y):
     return array_ops.where(condition, x, y)
 
   elif condition_is_ragged and x_is_ragged and y_is_ragged:
-    return ragged_functional_ops.map_inner_values(array_ops.where, condition, x,
-                                                  y)
+    return ragged_functional_ops.map_flat_values(array_ops.where, condition, x,
+                                                 y)
   elif not condition_is_ragged:
     # Concatenate x and y, and then use `gather` to assemble the selected rows.
     condition.shape.assert_has_rank(1)
-    x_nrows = nrows(x)
+    x_nrows = _nrows(x)
     x_and_y = concat([x, y], axis=0)
     indices = array_ops.where(condition, math_ops.range(x_nrows),
-                              x_nrows + math_ops.range(nrows(y)))
+                              x_nrows + math_ops.range(_nrows(y)))
     return gather(x_and_y, indices)
 
   else:
@@ -1446,7 +1176,7 @@ def _coordinate_where(condition):
 
   # Convert the first index in each coordinate to a row index and column index.
   first_index = selected_coords[:, 0]
-  selected_rows = array_ops.gather(value_rowids(condition), first_index)
+  selected_rows = array_ops.gather(condition.value_rowids(), first_index)
   selected_row_starts = array_ops.gather(condition.row_splits, selected_rows)
   selected_cols = first_index - selected_row_starts
 
@@ -1482,3 +1212,12 @@ def _concat_ragged_splits(splits_list):
     pieces.append(splits[1:] + splits_offset)
     splits_offset += splits[-1]
   return array_ops.concat(pieces, axis=0)
+
+
+def _nrows(rt_input, out_type=dtypes.int64, name=None):
+  if isinstance(rt_input, ragged_tensor.RaggedTensor):
+    return rt_input.nrows(out_type=out_type, name=name)
+  else:
+    with ops.name_scope(name, 'RaggedNRows', [rt_input]):
+      return array_ops.shape(rt_input, out_type=out_type)[0]
+
diff --git a/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py b/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
index 7fe185641f..79f1ae591f 100644
--- a/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
@@ -20,15 +20,18 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedBatchGatherOpTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedBatchGatherOpTest(ragged_test_util.RaggedTensorTestCase,
                               parameterized.TestCase):
 
   @parameterized.parameters([
@@ -135,21 +138,16 @@ class RaggedBatchGatherOpTest(test_util.TensorFlowTestCase,
           expected=ragged.constant_value(
               [[[[b'c', b'a'], [b'd', b'd']], [[b'f', b'e']]]], ragged_rank=2)),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testRaggedBatchGather(self, descr, params, indices, expected):
     result = ragged.batch_gather(params, indices)
-    self.assertEqual(
-        getattr(result, 'ragged_rank', 0), getattr(expected, 'ragged_rank', 0))
-    with self.test_session():
-      if hasattr(expected, 'tolist'):
-        expected = expected.tolist()
-      self.assertEqual(result.eval().tolist(), expected)
+    self.assertRaggedEqual(result, expected)
 
-  @test_util.run_deprecated_v1
   def testRaggedBatchGatherUnknownRankError(self):
+    if context.executing_eagerly():
+      return
     params = [['a', 'b'], ['c', 'd']]
     indices = array_ops.placeholder(dtypes.int32, shape=None)
-    ragged_indices = ragged.from_row_splits(indices, [0, 2, 4])
+    ragged_indices = ragged.RaggedTensor.from_row_splits(indices, [0, 2, 4])
 
     with self.assertRaisesRegexp(
         ValueError, 'batch_gather does not allow indices with unknown shape.'):
@@ -161,38 +159,39 @@ class RaggedBatchGatherOpTest(test_util.TensorFlowTestCase,
 
   @parameterized.parameters([
       dict(
-          params=ragged.constant([['a'], ['b'], ['c']]),
-          indices=ragged.constant([[0], [0]]),
+          params=ragged.constant_value([['a'], ['b'], ['c']]),
+          indices=ragged.constant_value([[0], [0]]),
           message='Dimensions 3 and 2 are not compatible'),
       dict(
           params=[[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
-          indices=ragged.constant([[[0, 0], [0, 0, 0]], [[0]]]),
+          indices=ragged.constant_value([[[0, 0], [0, 0, 0]], [[0]]]),
           message='batch shape from indices does not match params shape'),
+      dict(  # rank mismatch
+          params=ragged.constant_value([[[0, 0], [0, 0, 0]], [[0]]]),
+          indices=ragged.constant_value([[[0, 0]], [[0, 0, 0]], [[0]]]),
+          error=(ValueError, errors.InvalidArgumentError)),
       dict(
-          params=ragged.constant([[[0, 0], [0, 0, 0]], [[0]]]),
-          indices=ragged.constant([[[0, 0]], [[0, 0, 0]], [[0]]]),
-          message='Dimensions must be equal, but are 3 and 4'),
-      dict(
-          params=ragged.constant([[[0, 0], [0, 0, 0]], [[0]], [[0]]]),
-          indices=ragged.constant([[[0, 0]], [[0, 0, 0]], [[0]]]),
+          params=ragged.constant_value([[[0, 0], [0, 0, 0]], [[0]], [[0]]]),
+          indices=ragged.constant_value([[[0, 0]], [[0, 0, 0]], [[0]]]),
           error=errors.InvalidArgumentError,
-          message='Condition x == y did not hold element-wise'),
+          message='.*Condition x == y did not hold.*'),
+      dict(
+          params=ragged.constant_value(['a', 'b', 'c']),
+          indices=ragged.constant_value([[0], [0]]),
+          message='batch shape from indices does not match params shape'),
+      dict(
+          params=ragged.constant_value([['a']]),
+          indices=0,
+          message='indices.rank must be at least 1.'),
       dict(
-          params=ragged.constant(['a', 'b', 'c']),
-          indices=ragged.constant([[0], [0]]),
+          params=ragged.constant_value([['a']]),
+          indices=[[[0]]],
           message='batch shape from indices does not match params shape'),
-      dict(params=ragged.constant_value([['a']]),
-           indices=0,
-           message='indices.rank must be at least 1.'),
-      dict(params=ragged.constant_value([['a']]),
-           indices=[[[0]]],
-           message='batch shape from indices does not match params shape'),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testRaggedBatchGatherStaticError(self,
                                        params,
                                        indices,
-                                       message,
+                                       message=None,
                                        error=ValueError):
     with self.assertRaisesRegexp(error, message):
       ragged.batch_gather(params, indices)
diff --git a/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py b/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
index 763b016405..b0f7459322 100644
--- a/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
@@ -20,15 +20,18 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedBooleanMaskOpTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
                               parameterized.TestCase):
   # Define short constants for true & false, so the data & mask can be lined
   # up in the examples below.  This makes it easier to read the examples, to
@@ -298,26 +301,18 @@ class RaggedBooleanMaskOpTest(test_util.TensorFlowTestCase,
           keepdims=True,
           expected=ragged.constant_value([[[1], [4, 6]], [[7, 9], []]])),
   ])  # pyformat: disable
-  @test_util.run_v1_only('b/120545219')
   def testBooleanMask(self, descr, data, mask, keepdims, expected):
     actual = ragged.boolean_mask(data, mask, keepdims=keepdims)
-    self.assertEqual(
-        getattr(actual, 'ragged_rank', 0), getattr(expected, 'ragged_rank', 0))
-    with self.test_session():
-      if isinstance(expected, ragged.RaggedTensorValue):
-        expected = expected.tolist()
-      self.assertEqual(actual.eval().tolist(), expected)
+    self.assertRaggedEqual(actual, expected)
 
-  @test_util.run_deprecated_v1
   def testErrors(self):
-    self.assertRaisesRegexp(ValueError,
-                            r'mask\.shape\.ndims must be kown statically',
-                            ragged.boolean_mask, [[1, 2]],
-                            array_ops.placeholder(dtypes.bool))
+    if not context.executing_eagerly():
+      self.assertRaisesRegexp(ValueError,
+                              r'mask\.shape\.ndims must be kown statically',
+                              ragged.boolean_mask, [[1, 2]],
+                              array_ops.placeholder(dtypes.bool))
 
-    self.assertRaisesRegexp(TypeError,
-                            "Expected bool, got 0 of type 'int' instead.",
-                            ragged.boolean_mask, [[1, 2]], [[0, 1]])
+    self.assertRaises(TypeError, ragged.boolean_mask, [[1, 2]], [[0, 1]])
     self.assertRaisesRegexp(
         ValueError, 'Tensor conversion requested dtype bool for '
         'RaggedTensor with dtype int32', ragged.boolean_mask,
@@ -327,15 +322,6 @@ class RaggedBooleanMaskOpTest(test_util.TensorFlowTestCase,
         ValueError, r'Shapes \(1, 2\) and \(1, 3\) are incompatible',
         ragged.boolean_mask, [[1, 2]], [[True, False, True]])
 
-    # self.assertRaisesRegexp(ValueError,
-    #                         r'data=.* is non-ragged but mask=.* is ragged',
-    #                         ragged.boolean_mask, [[1, 2]],
-    #                         ragged.constant([[True, False]]))
-
-    # self.assertRaisesRegexp(
-    #     ValueError, r'data=.* is ragged but mask=.* is non-ragged',
-    #     ragged.boolean_mask, ragged.constant([[1, 2]]), [[True, False]])
-
     self.assertRaisesRegexp(errors.InvalidArgumentError,
                             r'Inputs must have identical ragged splits',
                             ragged.boolean_mask, ragged.constant([[1, 2]]),
diff --git a/tensorflow/python/ops/ragged/ragged_concat_op_test.py b/tensorflow/python/ops/ragged/ragged_concat_op_test.py
index ba7867418a..e72afb0448 100644
--- a/tensorflow/python/ops/ragged/ragged_concat_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_concat_op_test.py
@@ -20,16 +20,20 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedConcatOpTest(ragged_test_util.RaggedTensorTestCase,
+                         parameterized.TestCase):
 
   def _rt_inputs_to_tensors(self, rt_inputs, ragged_ranks=None):
     if ragged_ranks is None:
@@ -221,7 +225,6 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=0,
           expected=[[b'a00', b'a01'], [], [b'a20', b'a21']]),
   )   # pyformat: disable
-  @test_util.run_v1_only('b/120545219')
   def testRaggedConcat(self,
                        descr,
                        rt_inputs,
@@ -236,8 +239,7 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       self.assertEqual(concatenated.ragged_rank, expected_ragged_rank)
     if expected_shape is not None:
       self.assertEqual(concatenated.shape.as_list(), expected_shape)
-    with self.test_session():
-      self.assertEqual(concatenated.eval().tolist(), expected)
+    self.assertRaggedEqual(concatenated, expected)
 
   @parameterized.parameters(
       dict(
@@ -264,11 +266,14 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           ragged_ranks=(0, 0),
           rt_inputs=([[1, 2]], [[3, 4], [5, 6]]),
           axis=1,
-          error=ValueError,
-          message='Dimension 0 in both shapes must be equal'),
+          error=(ValueError, errors.InvalidArgumentError)),
   )
-  @test_util.run_deprecated_v1
-  def testStaticError(self, rt_inputs, axis, error, message, ragged_ranks=None):
+  def testStaticError(self,
+                      rt_inputs,
+                      axis,
+                      error,
+                      message=None,
+                      ragged_ranks=None):
     rt_inputs = self._rt_inputs_to_tensors(rt_inputs, ragged_ranks)
     self.assertRaisesRegexp(error, message, ragged.concat, rt_inputs, axis)
 
@@ -280,18 +285,20 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           error=errors.InvalidArgumentError,
           message='Input tensors have incompatible shapes'),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testRuntimeError(self, rt_inputs, axis, error, message,
                        ragged_ranks=None):
+    if context.executing_eagerly():
+      return
     rt_inputs = [
         array_ops.placeholder_with_default(rt, shape=None) for rt in rt_inputs
     ]
     concatenated = ragged.concat(rt_inputs, axis)
-    with self.test_session():
-      self.assertRaisesRegexp(error, message, concatenated.eval)
+    with self.assertRaisesRegexp(error, message):
+      self.evaluate(concatenated)
 
-  @test_util.run_deprecated_v1
   def testNegativeAxisWithUnknownRankError(self):
+    if context.executing_eagerly():
+      return
     rt_inputs = [
         array_ops.placeholder(dtypes.int64),
         array_ops.placeholder(dtypes.int64)
@@ -300,7 +307,6 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         ValueError, r'axis may only be negative if ndims is statically known.',
         ragged.concat, rt_inputs, -1)
 
-  @test_util.run_deprecated_v1
   def testSingleTensorInput(self):
     """Tests ragged_concat with a single tensor input.
 
@@ -310,8 +316,7 @@ class RaggedConcatOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     """
     rt_inputs = ragged.constant([[1, 2], [3, 4]])
     concatenated = ragged.concat(rt_inputs, 0)
-    with self.test_session():
-      self.assertEqual(concatenated.eval().tolist(), [[1, 2], [3, 4]])
+    self.assertRaggedEqual(concatenated, [[1, 2], [3, 4]])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_const_op_test.py b/tensorflow/python/ops/ragged/ragged_const_op_test.py
index 2505b23912..c014f71030 100644
--- a/tensorflow/python/ops/ragged/ragged_const_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_const_op_test.py
@@ -20,15 +20,16 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedConstOpTest(ragged_test_util.RaggedTensorTestCase,
+                        parameterized.TestCase):
 
   @parameterized.parameters(
       #=========================================================================
@@ -133,7 +134,6 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       dict(pylist=[[b'a', b'b'], [b'c'], [b'd', b'e', b'f']],
            dtype=dtypes.string),
   )
-  @test_util.run_deprecated_v1
   def testRaggedConst(self,
                       pylist,
                       dtype=None,
@@ -157,7 +157,7 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       expected_dtype: The expected dtype for the resulting ragged tensor (used
         to test default/inferred types when dtype=None).
     """
-    rt = ragged_factory_ops.constant(
+    rt = ragged.constant(
         pylist, dtype=dtype, ragged_rank=ragged_rank, inner_shape=inner_shape)
 
     # If dtype was explicitly specified, check it.
@@ -168,31 +168,22 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
     # If ragged_rank was explicitly specified, check it.
     if ragged_rank is not None:
-      if isinstance(rt, ragged_tensor.RaggedTensor):
+      if isinstance(rt, ragged.RaggedTensor):
         self.assertEqual(rt.ragged_rank, ragged_rank)
       else:
         self.assertEqual(0, ragged_rank)
 
     # If inner_shape was explicitly specified, check it.
     if inner_shape is not None:
-      if isinstance(rt, ragged_tensor.RaggedTensor):
-        self.assertEqual(rt.inner_values.shape.as_list()[1:], list(inner_shape))
+      if isinstance(rt, ragged.RaggedTensor):
+        self.assertEqual(rt.flat_values.shape.as_list()[1:], list(inner_shape))
       else:
         self.assertEqual(rt.shape.as_list(), list(inner_shape))
 
     if expected_shape is not None:
       self.assertEqual(tuple(rt.shape.as_list()), expected_shape)
 
-    with self.test_session():
-      result = self.evaluate(rt)
-      if rt.shape.ndims > 0:
-        self.assertEqual(result.tolist(), pylist)
-        if expected_shape is not None:
-          self.assertEqual(result.shape, expected_shape)
-      else:
-        self.assertEqual(result, pylist)
-        if expected_shape is not None:
-          self.assertEqual((), expected_shape)
+    self.assertRaggedEqual(rt, pylist)
 
   @parameterized.parameters(
       dict(
@@ -236,11 +227,7 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           exception=ValueError,
           message='pylist has scalar values depth 2, but ragged_rank=2 '
           'requires scalar value depth greater than 2'),
-      dict(
-          pylist=[1, 2, 3],
-          inner_shape=(1, 1),
-          exception=TypeError,
-          message='Expected Tensor\'s shape'),
+      dict(pylist=[1, 2, 3], inner_shape=(1, 1), exception=TypeError),
       dict(
           pylist=[[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
           inner_shape=(2, 2),
@@ -259,7 +246,6 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           exception=ValueError,
           message='inner values have inconsistent shape'),
   )
-  @test_util.run_deprecated_v1
   def testRaggedConstError(self,
                            pylist,
                            dtype=None,
@@ -271,7 +257,7 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertRaisesRegexp(
         exception,
         message,
-        ragged_factory_ops.constant,
+        ragged.constant,
         pylist,
         dtype=dtype,
         ragged_rank=ragged_rank,
@@ -310,10 +296,10 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     if exception is not None:
       self.assertRaisesRegexp(
           exception, message,
-          ragged_factory_ops._find_scalar_and_max_depth, pylist)
+          ragged.ragged_factory_ops._find_scalar_and_max_depth, pylist)
     else:
       self.assertEqual(
-          ragged_factory_ops._find_scalar_and_max_depth(pylist),
+          ragged.ragged_factory_ops._find_scalar_and_max_depth(pylist),
           (scalar_depth, max_depth))
 
   @parameterized.parameters([
@@ -360,11 +346,11 @@ class RaggedConstOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     if exception is not None:
       self.assertRaisesRegexp(
           exception, message,
-          ragged_factory_ops._default_inner_shape_for_pylist, pylist,
+          ragged.ragged_factory_ops._default_inner_shape_for_pylist, pylist,
           ragged_rank)
     else:
       self.assertEqual(
-          ragged_factory_ops._default_inner_shape_for_pylist(
+          ragged.ragged_factory_ops._default_inner_shape_for_pylist(
               pylist, ragged_rank), inner_shape)
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py b/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py
index d80518930d..56768a9a47 100644
--- a/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py
@@ -23,10 +23,12 @@ import numpy as np
 
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedConstantValueOpTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedConstantValueOpTest(ragged_test_util.RaggedTensorTestCase,
                                 parameterized.TestCase):
 
   @parameterized.parameters(
@@ -144,7 +146,7 @@ class RaggedConstantValueOpTest(test_util.TensorFlowTestCase,
                        inner_shape=None,
                        expected_shape=None,
                        expected_dtype=None):
-    """Tests that `ragged_value(pylist).tolist() == pylist`."""
+    """Tests that `ragged_value(pylist).to_list() == pylist`."""
     rt = ragged.constant_value(
         pylist, dtype=dtype, ragged_rank=ragged_rank, inner_shape=inner_shape)
 
@@ -164,7 +166,7 @@ class RaggedConstantValueOpTest(test_util.TensorFlowTestCase,
     # If inner_shape was explicitly specified, check it.
     if inner_shape is not None:
       if isinstance(rt, ragged.RaggedTensorValue):
-        self.assertEqual(rt.inner_values.shape[1:], inner_shape)
+        self.assertEqual(rt.flat_values.shape[1:], inner_shape)
       else:
         self.assertEqual(rt.shape, inner_shape)
 
@@ -172,7 +174,10 @@ class RaggedConstantValueOpTest(test_util.TensorFlowTestCase,
       self.assertEqual(tuple(rt.shape), expected_shape)
 
     if rt.shape:
-      self.assertEqual(rt.tolist(), pylist)
+      if isinstance(rt, ragged.RaggedTensorValue):
+        self.assertEqual(rt.to_list(), pylist)
+      else:
+        self.assertEqual(rt.tolist(), pylist)
       if expected_shape is not None:
         self.assertEqual(rt.shape, expected_shape)
     else:
diff --git a/tensorflow/python/ops/ragged/ragged_conversion_ops.py b/tensorflow/python/ops/ragged/ragged_conversion_ops.py
index 83212e49cf..854c5b303c 100644
--- a/tensorflow/python/ops/ragged/ragged_conversion_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_conversion_ops.py
@@ -18,407 +18,27 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_ragged_conversion_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.ops.ragged import ragged_util
 
 
-#===============================================================================
-# RaggedTensor <-> Tensor conversion
-#===============================================================================
 def from_tensor(tensor, lengths=None, padding=None, ragged_rank=1, name=None):
-  """Converts a `Tensor` into a `RaggedTensor`.
-
-  The set of absent/default values may be specified using a vector of lengths
-  or a padding value (but not both).  If `lengths` is specified, then the
-  output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`.
-  If `padding` is specified, then any row *suffix* consisting entirely of
-  `padding` will be excluded from the returned `RaggedTensor`.  If neither
-  `lengths` nor `padding` is specified, then the returned `RaggedTensor` will
-  have no absent/default values.
-
-  Examples:
-
-  ```python
-  >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
-  >>> ragged.from_tensor(dt).eval().tolist()
-  [[5, 7, 0], [0, 3, 0], [6, 0, 0]]
-  >>> ragged.from_tensor(dt, lengths=[2, 0, 3]).eval().tolist()
-  [[5, 7], [], [6, 0, 0]]
-  >>> ragged.from_tensor(dt, padding=0).eval().tolist()
-  [[5, 7], [0, 3], [6]]
-  ```
-
-  Args:
-    tensor: The `Tensor` to convert.  Must have rank `ragged_rank + 1` or
-      higher.
-    lengths: An optional set of row lengths, specified using a 1-D integer
-      `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows in
-      `tensor`).  If specified, then `output[row]` will contain
-      `tensor[row][:lengths[row]]`.  Negative lengths are treated as zero.
-    padding: An optional padding value.  If specified, then any row suffix
-      consisting entirely of `padding` will be excluded from the returned
-      RaggedTensor.  `padding` is a `Tensor` with the same dtype as `tensor`
-      and with `shape=tensor.shape[ragged_rank + 1:]`.
-    ragged_rank: Integer specifying the ragged rank for the returned
-      `RaggedTensor`.  Must be greater than zero.
-    name: A name prefix for the returned tensors (optional).
-
-  Returns:
-    A `RaggedTensor` with the specified `ragged_rank`.  The shape of the
-    returned ragged tensor is compatible with the shape of `tensor`.
-  Raises:
-    ValueError: If both `lengths` and `padding` are specified.
-  """
-  if lengths is not None and padding is not None:
-    raise ValueError('Specify lengths or padding, but not both')
-  if not isinstance(ragged_rank, int):
-    raise TypeError('ragged_rank expected int, got %r' % ragged_rank)
-  if ragged_rank <= 0:
-    raise ValueError('ragged_rank must be greater than 0; got %s' % ragged_rank)
-
-  with ops.name_scope(name, 'RaggedFromTensor', [tensor, lengths, padding]):
-    tensor = ops.convert_to_tensor(tensor, name='tensor')
-    tensor.shape.with_rank_at_least(ragged_rank + 1)
-    input_shape = array_ops.shape(tensor, out_type=dtypes.int64)
-    ncols = input_shape[1]
-
-    # Handle ragged_rank>1 via recursion:
-    # If the output should have multiple ragged dimensions, then first
-    # flatten the tensor to eliminate all but the last ragged dimension,
-    # and recursively convert that flattened tensor.  Then add on the splits
-    # for the dimensions that we flattened out.
-    if ragged_rank > 1:
-      # Flatten `tensor` to eliminate all but the last ragged dimension.
-      new_shape = array_ops.concat(
-          [constant_op.constant([-1], dtypes.int64), input_shape[ragged_rank:]],
-          axis=0)
-      flattened = array_ops.reshape(tensor, new_shape)
-      # Recursively convert the flattened tensor.
-      values = from_tensor(flattened, lengths, padding)
-      # The total number of elements in each  dimension.  E.g., if
-      # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total.
-      dim_size = math_ops.cumprod(input_shape)
-      # Construct splits tensors for the dimensions that were flattened.
-      new_splits = [
-          math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim]
-          for dim in range(1, ragged_rank)
-      ]
-      return ragged_factory_ops.from_nested_row_splits(values, new_splits)
-
-    # If padding was specified, then use it to find row lengths.
-    if padding is not None:
-      padding = ops.convert_to_tensor(
-          padding, name='padding', dtype=tensor.dtype)
-      padding.shape.assert_is_compatible_with(tensor.shape[2:])
-
-      # Find places where the padding is equal to the tensor.  (This will
-      # broadcast `padding` across the outermost 2 dimensions of `tensor`,
-      # so `has_default_value.shape = tensor.shape`.)
-      has_default_value = math_ops.equal(padding, tensor)
-
-      # If the padding isn't a scalar, then require that all values in the
-      # padding match each item in the tensor.  After this block of code,
-      # `has_default.shape = tensor.shape[:2]`.  (Unfortunately, we can't just
-      # use reduce_all for both cases, becaue when you pass an empty `axis`
-      # list to reduce_all, it reduces all axes; but we want it to reduce no
-      # axes -- i.e., to be a no-op.)
-      tensor_rank = array_ops.rank(tensor)
-      reduce_axis = math_ops.range(2, tensor_rank)
-      has_default = control_flow_ops.cond(
-          tensor_rank > 2,
-          lambda: math_ops.reduce_all(has_default_value, axis=reduce_axis),
-          lambda: has_default_value)
-      has_default.set_shape(tensor_shape.TensorShape([None, None]))
-      has_default.set_shape(tensor.shape[:2])
-
-      # Use has_default it to find the length of each row: for each non-default
-      # item in a row, calculate the length that the row needs to have to
-      # include that item; and then take the max of those values (across each
-      # row).
-      has_nondefault = math_ops.logical_not(has_default)
-      has_nondefault = math_ops.cast(has_nondefault, dtypes.int64)
-      length_for_nondefault_value = (
-          has_nondefault * array_ops.expand_dims(
-              math_ops.range(1, ncols + 1), 0))
-      lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1)
-
-    # If we have lengths (either directly supplied, or computed from paddings),
-    # then use those to construct splits; and then use masking to get the
-    # corresponding values.
-    if lengths is not None:
-      lengths = ragged_util.convert_to_int_tensor(lengths, 'lengths',
-                                                  dtypes.int64)
-      lengths.shape.assert_has_rank(1)
-      lengths = math_ops.minimum(lengths, ncols)
-      lengths = math_ops.maximum(lengths, 0)
-      limits = math_ops.cumsum(lengths)
-      splits = array_ops.concat(
-          [array_ops.zeros([1], dtypes.int64), limits], axis=0)
-      mask = array_ops.sequence_mask(lengths, maxlen=ncols)
-      values = array_ops.boolean_mask(tensor, mask)
-      return ragged_factory_ops.from_row_splits(values, splits)
-
-    # If neither padding nor lengths were specified, then create a splits
-    # vector that contains no default values, and reshape the input tensor
-    # to form the values for the RaggedTensor.
-    nrows = input_shape[0]
-    nvals = nrows * ncols
-    splits = math_ops.range(nrows + 1) * ncols
-    values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0)
-    values = array_ops.reshape(tensor, values_shape)
-    return ragged_factory_ops.from_row_splits(values, splits)
+  if ragged_tensor.is_ragged(tensor):
+    return tensor
+  else:
+    return ragged_tensor.RaggedTensor.from_tensor(tensor, lengths, padding,
+                                                  ragged_rank, name)
 
 
 def to_tensor(rt_input, default_value=None, name=None):
-  """Converts a `RaggedTensor` into a `Tensor`.
-
-  Example:
-
-  ```python
-  >>> rt = ragged.constant([[9, 8, 7], [], [6, 5], [4]])
-  >>> print ragged.to_tensor(rt).eval()
-  [[9 8 7]
-   [0 0 0]
-   [6 5 0]
-   [4 0 0]]
-  ```
-
-  Args:
-    rt_input: The input `RaggedTensor`.
-    default_value: Value to set for indices not specified in `rt_input`.
-      Defaults to zero.  `default_value` must be broadcastable to
-      `rt_input.shape[rt_input.ragged_rank + 1:]`.
-    name: A name prefix for the returned tensors (optional).
-
-  Returns:
-    A `Tensor` with shape `ragged.bounding_shape(rt_input)` and the
-    values specified by the non-empty values in `rt_input`.  Empty values are
-    assigned `default_value`.
-  """
-  with ops.name_scope(name, 'RaggedToTensor', [rt_input, default_value]):
-    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
-        rt_input, name='rt_input')
-    if not ragged_tensor.is_ragged(rt_input):
-      return rt_input  # already dense
-    if default_value is not None:
-      default_value = ops.convert_to_tensor(
-          default_value, name='default_value', dtype=rt_input.dtype)
-
-    # If ragged_rank > 1, then recursively convert the ragged values into a
-    # `Tensor` before we proceed.
-    values = rt_input.values
-    if ragged_tensor.is_ragged(values):
-      values = to_tensor(values, default_value)
-
-    # Tile the default value, if necessary.
-    if default_value is not None:
-      if values.shape.ndims is not None:
-        default_value.shape.with_rank_at_most(values.shape.ndims - 1)
-      if (values.shape.ndims is None or default_value.shape.ndims is None or
-          values.shape.ndims != default_value.shape.ndims + 1):
-        value_shape = array_ops.shape(values)[1:]
-        default_value = array_ops.broadcast_to(default_value, value_shape)
-      default_value.shape.assert_is_compatible_with(values.shape[1:])
-
-    # Get the expected dense shape ([nrows, ncols] + value_shape).
-    rt_row_lengths = [rt_input.row_splits[1:] - rt_input.row_splits[:-1]]
-    nrows = array_ops.shape(rt_input.row_splits, out_type=dtypes.int64)[0] - 1
-    ncols = math_ops.maximum(math_ops.reduce_max(rt_row_lengths), 0)
-    values_shape = array_ops.shape(values, out_type=dtypes.int64)
-    value_shape = values_shape[1:]
-    nvals = values_shape[0]
-
-    # Build a default value if none was supplied.
-    if default_value is None:
-      default_value = array_ops.zeros(value_shape, dtype=values.dtype)
-    default_value.shape.assert_is_compatible_with(values.shape[1:])
-    default_value.set_shape(values.shape[1:])
-
-    # Get the row start indices, and expand to shape=[nrows, 1].
-    starts = array_ops.expand_dims(rt_input.row_splits[:-1], 1)
-
-    # Get the row limit indices, and expand to shape=[nrows, 1].
-    limits = array_ops.expand_dims(rt_input.row_splits[1:], 1)
-
-    # Get the column indices, and expand to shape=[1, ncols].
-    columns = array_ops.expand_dims(math_ops.range(0, ncols), 0)
-
-    # Build a list containing the values plus the default value.  We will use
-    # tf.gather to collect values from this list for the `Tensor` (using
-    # nvals as the index for the default value).
-    values_and_default = array_ops.concat(
-        [values, array_ops.stack([default_value])], axis=0)
-
-    # Construct a matrix "indices" pointing into values_and_default.  I.e.,
-    # output[r, c] = values_and_default[indices[r, c].
-    nondefault_index = starts + columns
-    has_value = nondefault_index < limits
-    default_index = array_ops.fill(array_ops.stack([nrows, ncols]), nvals)
-    indices = array_ops.where(has_value, nondefault_index, default_index)
-
-    # Gather the results into a `Tensor`.
-    return array_ops.gather(values_and_default, indices)
+  if ragged_tensor.is_ragged(rt_input):
+    return rt_input.to_tensor(default_value, name)
+  else:
+    return rt_input
 
 
-#===============================================================================
-# RaggedTensor <-> SparseTensor conversion
-#===============================================================================
 def to_sparse(rt_input, name=None):
-  """Converts a `RaggedTensor` into a sparse tensor.
-
-  Example:
-
-  ```python
-  >>> rt = ragged.constant([[1, 2, 3], [4], [], [5, 6]])
-  >>> ragged.to_sparse(rt).eval()
-  SparseTensorValue(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]],
-                    values=[1, 2, 3, 4, 5, 6],
-                    dense_shape=[4, 3])
-  ```
-
-  Args:
-    rt_input: The input `RaggedTensor`.
-    name: A name prefix for the returned tensors (optional).
-
-  Returns:
-    A SparseTensor with the same values as `rt_input`.
-  """
-  if not ragged_tensor.is_ragged(rt_input):
-    raise TypeError('Expected RaggedTensor, got %s' % type(rt_input).__name__)
-  with ops.name_scope(name, 'RaggedToSparse', [rt_input]):
-    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
-        rt_input, name='rt_input')
-    result = gen_ragged_conversion_ops.ragged_tensor_to_sparse(
-        rt_input.nested_row_splits, rt_input.inner_values, name=name)
-    return sparse_tensor.SparseTensor(
-        result.sparse_indices, result.sparse_values, result.sparse_dense_shape)
-
-
-@ops.RegisterGradient('RaggedTensorToSparse')
-def _ragged_tensor_to_sparse_gradient(op, unused_sparse_indices_grad,
-                                      sparse_values_grad,
-                                      unused_sparse_shape_grad):
-  """Gradient for ragged.to_sparse."""
-  op_inputs_nested_row_splits = op.inputs[:-1]
-  op_inputs_inner_values = op.inputs[-1]
-
-  # No gradient for the RaggedTensor's nested_row_splits.
-  nested_row_splits_gradient = [None] * len(op_inputs_nested_row_splits)
-
-  # Gradient for the RaggedTensor's inner_values is formed by reshaping
-  # the gradient for the SparseTensor's values.
-  inner_values_shape = array_ops.shape(op_inputs_inner_values)
-  inner_values_gradient = array_ops.reshape(sparse_values_grad,
-                                            inner_values_shape)
-
-  return nested_row_splits_gradient + [inner_values_gradient]
+  return rt_input.to_sparse(name)
 
 
 def from_sparse(st_input, name=None):
-  """Converts a 2D `SparseTensor` to a `RaggedTensor`.
-
-  Each row of the `output` `RaggedTensor` will contain the explicit values from
-  the same row in `st_input`.  `st_input` must be ragged-right.  If not it is
-  not ragged-right, then an error will be generated.
-
-  Example:
-
-  ```python
-  >>> st = SparseTensor(indices=[[0, 1], [0, 2], [0, 3], [1, 0], [3, 0]],
-  ...                   values=[1, 2, 3, 4, 5],
-  ...                   dense_shape=[4, 3])
-  >>> ragged.from_sparse(st).eval().tolist()
-  [[1, 2, 3], [4], [], [5]]
-  ```
-
-  Currently, only two-dimensional `SparseTensors` are supported.
-
-  Args:
-    st_input: The sparse tensor to convert.  Must have rank 2.
-    name: A name prefix for the returned tensors (optional).
-
-  Returns:
-    A `RaggedTensor` with the same values as `st_input`.
-    `output.ragged_rank = rank(st_input) - 1`.
-    `output.shape = [st_input.dense_shape[0], None]`.
-  Raises:
-    ValueError: If the number of dimensions in `st_input` is not known
-      statically, or is not two.
-  """
-  if not sparse_tensor.is_sparse(st_input):
-    raise TypeError('Expected SparseTensor, got %s' % type(st_input).__name__)
-  with ops.name_scope(name, 'RaggedFromSparse', [st_input]):
-    st_input = sparse_tensor.convert_to_tensor_or_sparse_tensor(
-        st_input, name='rt_input')
-
-    static_rank_from_dense_shape = (
-        None if st_input.dense_shape.shape.ndims is None
-        else st_input.dense_shape.shape.dims[0].value)
-    static_rank_from_indices = (
-        None if st_input.indices.shape.ndims is None
-        else st_input.indices.shape.dims[1].value)
-
-    if static_rank_from_dense_shape != 2 and static_rank_from_indices != 2:
-      raise ValueError('rank(st_input) must be 2')
-
-    with ops.control_dependencies(
-        _assert_sparse_indices_are_ragged_right(st_input.indices)):
-      # Treat sparse row indices as segment ids to generate a splits tensor that
-      # we can pair with the sparse tensor values.  (Ignore sparse column
-      # indices.)
-      segment_ids = st_input.indices[:, 0]
-      num_segments = st_input.dense_shape[0]
-      return ragged_factory_ops.from_value_rowids(st_input.values, segment_ids,
-                                                  num_segments)
-
-
-def _assert_sparse_indices_are_ragged_right(indices):
-  """Checks that the given SparseTensor.indices tensor is ragged-right.
-
-  Example: `indices = [[0, 0], [0, 1], [2, 0], [3, 1]]` is not ragged right
-  because the entry `[3, 1]` skips a cell.
-
-  Args:
-    indices: The SparseTensor indices to check.
-
-  Returns:
-    A list of control dependency op tensors.
-  """
-  index_prefix = indices[:, :-1]
-  index_suffix = indices[:, -1]
-
-  # Check whether each index is starting a new row in the innermost dimension
-  # (prefix[i] != prefix[i-1]) or continuing a row (prefix[i] == prefix[i-1]).
-  # (Note: this skips the first index; we will check that separately below.)
-  index_prefix_changed = math_ops.reduce_any(
-      math_ops.not_equal(index_prefix[1:], index_prefix[:-1]), axis=1)
-
-  # Check two cases:
-  #   * For indices that start a new row: index_suffix[i] must be zero.
-  #   * For indices that continue a row: index_suffix[i] must be equal to
-  #     index_suffix[i-1]+1.
-  index_ok = array_ops.where(
-      index_prefix_changed, math_ops.equal(index_suffix[1:], 0),
-      math_ops.equal(index_suffix[1:], index_suffix[:-1] + 1))
-
-  # Also check that the very first index didn't skip any cells.  The first
-  # index starts a new row (by definition), so its suffix should be zero.
-  sparse_indices_are_ragged_right = math_ops.logical_and(
-      math_ops.reduce_all(math_ops.equal(index_suffix[:1], 0)),
-      math_ops.reduce_all(index_ok))
-
-  message = [
-      'SparseTensor is not right-ragged',
-      'SparseTensor.indices =', indices
-  ]
-  return [control_flow_ops.Assert(sparse_indices_are_ragged_right, message)]
+  return ragged_tensor.RaggedTensor.from_sparse(st_input, name)
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index 7f44ac2ec1..7c74f7be62 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py
@@ -30,7 +30,6 @@ from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.ragged import ragged_array_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_math_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_tensor_shape
@@ -122,22 +121,22 @@ class UnaryRaggedElementwiseDispatcher(dispatch.OpDispatcher):
         nested_splits_lists = [
             elt.nested_row_splits for elt in x if ragged_tensor.is_ragged(elt)
         ]
-        inner_values = [
-            elt.inner_values if ragged_tensor.is_ragged(elt) else elt
+        flat_values = [
+            elt.flat_values if ragged_tensor.is_ragged(elt) else elt
             for elt in x
         ]
         with ops.control_dependencies(
             ragged_util.assert_splits_match(nested_splits_lists)):
-          return ragged_factory_ops.from_nested_row_splits(
-              self._original_op(inner_values, *args, **kwargs),
+          return ragged_tensor.RaggedTensor.from_nested_row_splits(
+              self._original_op(flat_values, *args, **kwargs),
               nested_splits_lists[0])
       else:
         return self.NOT_SUPPORTED
     else:
       found_ragged = ragged_tensor.is_ragged(x)
       if found_ragged:
-        mapped_values = self._original_op(x.inner_values, *args, **kwargs)
-        return x.with_inner_values(mapped_values)
+        mapped_values = self._original_op(x.flat_values, *args, **kwargs)
+        return x.with_flat_values(mapped_values)
       else:
         return self.NOT_SUPPORTED
 
@@ -191,8 +190,8 @@ class BinaryRaggedElementwiseDispatcher(dispatch.OpDispatcher):
       return self.NOT_SUPPORTED
 
     if ((x_is_ragged and y_is_ragged) or
-        (x_is_ragged and x.inner_values.shape.ndims <= y.shape.ndims) or
-        (y_is_ragged and y.inner_values.shape.ndims <= x.shape.ndims)):
+        (x_is_ragged and x.flat_values.shape.ndims <= y.shape.ndims) or
+        (y_is_ragged and y.flat_values.shape.ndims <= x.shape.ndims)):
       bcast_shape = ragged_tensor_shape.broadcast_dynamic_shape(
           ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(x),
           ragged_tensor_shape.RaggedTensorDynamicShape.from_tensor(y))
@@ -201,13 +200,13 @@ class BinaryRaggedElementwiseDispatcher(dispatch.OpDispatcher):
       y = ragged_tensor_shape.broadcast_to(
           y, bcast_shape, broadcast_inner_dimensions=False)
 
-    x_values = x.inner_values if ragged_tensor.is_ragged(x) else x
-    y_values = y.inner_values if ragged_tensor.is_ragged(y) else y
+    x_values = x.flat_values if ragged_tensor.is_ragged(x) else x
+    y_values = y.flat_values if ragged_tensor.is_ragged(y) else y
     mapped_values = self._original_op(x_values, y_values, *args, **kwargs)
     if ragged_tensor.is_ragged(x):
-      return x.with_inner_values(mapped_values)
+      return x.with_flat_values(mapped_values)
     else:
-      return y.with_inner_values(mapped_values)
+      return y.with_flat_values(mapped_values)
 
 
 class RaggedDispatcher(dispatch.OpDispatcher):
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
index 2533c60c4e..82827aa2aa 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -32,6 +33,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import ragged
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 # Constants listing various op types to test.  Each operation
@@ -126,8 +128,8 @@ BINARY_INT_OPS = [
 ]
 
 
-@test_util.run_v1_only('b/120545219')
-class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
                                parameterized.TestCase):
 
   def assertSameShape(self, x, y):
@@ -138,7 +140,7 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
       for (x_splits, y_splits) in zip(x.nested_row_splits, y.nested_row_splits):
         self.assertAllEqual(x_splits, y_splits)
       self.assertAllEqual(
-          array_ops.shape(x.inner_values), array_ops.shape(y.inner_values))
+          array_ops.shape(x.flat_values), array_ops.shape(y.flat_values))
     else:
       self.assertIsInstance(y, ops.Tensor)
       self.assertAllEqual(array_ops.shape(x), array_ops.shape(y))
@@ -215,19 +217,18 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
     result = op(x, **extra_args)
 
     # Run the wrapped op on the dense values, for comparison.
-    dense_x = x.inner_values if isinstance(x, ragged.RaggedTensor) else x
+    dense_x = x.flat_values if isinstance(x, ragged.RaggedTensor) else x
     expected_flat_values = array_ops.reshape(op(dense_x, **extra_args), [-1])
 
-    with self.test_session():
-      # Check that the result has the expected shape.
-      self.assertSameShape(x, result)
+    # Check that the result has the expected shape.
+    self.assertSameShape(x, result)
 
-      # Check that the result has the expected (flattened) values.
-      if isinstance(result, ragged.RaggedTensor):
-        result_flat_values = array_ops.reshape(result.inner_values, [-1])
-      else:
-        result_flat_values = array_ops.reshape(result, [-1])
-      self.assertAllEqual(expected_flat_values, result_flat_values)
+    # Check that the result has the expected (flattened) values.
+    if isinstance(result, ragged.RaggedTensor):
+      result_flat_values = array_ops.reshape(result.flat_values, [-1])
+    else:
+      result_flat_values = array_ops.reshape(result, [-1])
+    self.assertAllEqual(expected_flat_values, result_flat_values)
 
   @parameterized.parameters(
       [
@@ -327,21 +328,20 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
       result = op(x, y, **extra_args)
 
     # Run the wrapped op on the dense values, for comparison.
-    dense_x = x.inner_values if isinstance(x, ragged.RaggedTensor) else x
-    dense_y = y.inner_values if isinstance(y, ragged.RaggedTensor) else y
+    dense_x = x.flat_values if isinstance(x, ragged.RaggedTensor) else x
+    dense_y = y.flat_values if isinstance(y, ragged.RaggedTensor) else y
     expected_flat_values = array_ops.reshape(
         op(dense_x, dense_y, **extra_args), [-1])
 
-    with self.test_session():
-      # Check that the result has the expected shape.
-      self.assertSameShape(y, result)
+    # Check that the result has the expected shape.
+    self.assertSameShape(y, result)
 
-      # Check that the result has the expected (flattened) values.
-      if isinstance(result, ragged.RaggedTensor):
-        result_flat_values = array_ops.reshape(result.inner_values, [-1])
-      else:
-        result_flat_values = array_ops.reshape(result, [-1])
-      self.assertAllEqual(expected_flat_values, result_flat_values)
+    # Check that the result has the expected (flattened) values.
+    if isinstance(result, ragged.RaggedTensor):
+      result_flat_values = array_ops.reshape(result.flat_values, [-1])
+    else:
+      result_flat_values = array_ops.reshape(result, [-1])
+    self.assertAllEqual(expected_flat_values, result_flat_values)
 
   @parameterized.parameters(
       [
@@ -385,26 +385,27 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
 
     # Run the wrapped op on the dense values, for comparison.
     dense_inputs = [
-        x.inner_values if isinstance(x, ragged.RaggedTensor) else x
+        x.flat_values if isinstance(x, ragged.RaggedTensor) else x
         for x in inputs
     ]
     expected_flat_values = array_ops.reshape(
         op(dense_inputs, **extra_args), [-1])
 
-    with self.test_session():
-      # Check that the result has the expected shape.
-      self.assertSameShape(inputs[0], result)
+    # Check that the result has the expected shape.
+    self.assertSameShape(inputs[0], result)
 
-      # Check that the result has the expected (flattened) values.
-      if isinstance(result, ragged.RaggedTensor):
-        result_flat_values = array_ops.reshape(result.inner_values, [-1])
-      else:
-        result_flat_values = array_ops.reshape(result, [-1])
-      self.assertAllEqual(expected_flat_values, result_flat_values)
+    # Check that the result has the expected (flattened) values.
+    if isinstance(result, ragged.RaggedTensor):
+      result_flat_values = array_ops.reshape(result.flat_values, [-1])
+    else:
+      result_flat_values = array_ops.reshape(result, [-1])
+    self.assertAllEqual(expected_flat_values, result_flat_values)
 
   def testElementwiseOpUnknownRankError(self):
+    if context.executing_eagerly():
+      return
     x = ragged.constant([[1, 2], [3]])
-    y = ragged.from_row_splits(
+    y = ragged.RaggedTensor.from_row_splits(
         array_ops.placeholder_with_default([1, 2, 3], shape=None), x.row_splits)
     with self.assertRaisesRegexp(ValueError,
                                  r'Unable to broadcast: unknown rank'):
@@ -428,27 +429,22 @@ class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase,
     x = ragged.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
     y = ragged.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32)
     result = x + y
-    with self.cached_session():
-      self.assertEqual(result.eval().tolist(), expected)
+    self.assertRaggedEqual(result, expected)
 
   def testElementwiseOpShapeMismatch(self):
     x = ragged.constant([[1, 2, 3], [4, 5]])
     y = ragged.constant([[1, 2, 3], [4, 5, 6]])
-    with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                 'Incompatible shapes'):
-      with self.cached_session():
-        math_ops.add(x, y).eval()
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(math_ops.add(x, y))
 
   def testBinaryOpSparseAndRagged(self):
     x = ragged.constant([[1, 2, 3], [4, 5]])
     y = sparse_tensor.SparseTensor([[0, 0], [0, 1], [2, 0]], [1, 2, 3], [3, 2])
-    with self.assertRaises(TypeError):
-      with self.cached_session():
-        math_ops.add(x, y).eval()
+    with self.assertRaises((TypeError, ValueError)):
+      self.evaluate(math_ops.add(x, y))
 
-    with self.assertRaises(TypeError):
-      with self.cached_session():
-        math_ops.add_n([x, y]).eval()
+    with self.assertRaises((TypeError, ValueError)):
+      self.evaluate(math_ops.add_n([x, y]))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_eager_test.py b/tensorflow/python/ops/ragged/ragged_eager_test.py
index 731ff742aa..f1befbf961 100644
--- a/tensorflow/python/ops/ragged/ragged_eager_test.py
+++ b/tensorflow/python/ops/ragged/ragged_eager_test.py
@@ -17,17 +17,17 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import sys
 
 from absl.testing import parameterized
 
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
+                       parameterized.TestCase):
 
   @parameterized.parameters([
       dict(pylist=[[b'a', b'b'], [b'c']]),
@@ -36,21 +36,15 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
   ])
   def testRaggedTensorToList(self, pylist, ragged_rank=None):
     rt = ragged.constant(pylist, ragged_rank)
-    self.assertEqual(rt.tolist(), pylist)
-
-  expected = "RaggedTensor([['a', 'b'], ['c']])"
-  if sys.version_info[0] == 3:
-    expected = "RaggedTensor([[b'a', b'b'], [b'c']])"
+    self.assertRaggedEqual(rt, pylist)
 
   @parameterized.parameters([
-      dict(pylist=[['a', 'b'], ['c']],
-           expected=expected),
-      dict(pylist=[[[1, 2], [3]], [[4, 5, 6], [], [7]]],
-           expected='RaggedTensor([[[1, 2], [3]], [[4, 5, 6], [], [7]]])'),
+      dict(pylist=[[b'a', b'b'], [b'c']]),
+      dict(pylist=[[[1, 2], [3]], [[4, 5, 6], [], [7]]]),
   ])
-  def testRaggedTensorStr(self, pylist, expected):
+  def testRaggedTensorStr(self, pylist):
     rt = ragged.constant(pylist)
-    self.assertEqual(str(rt), expected)
+    self.assertEqual(str(rt), '<tf.RaggedTensor %s>' % pylist)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py b/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py
index 3ff66973b6..072f330e3c 100644
--- a/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py
@@ -22,10 +22,12 @@ from absl.testing import parameterized
 
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedExpandDimsOpTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedExpandDimsOpTest(ragged_test_util.RaggedTensorTestCase,
                              parameterized.TestCase):
 
   # An example 4-d ragged tensor with shape [3, (D2), (D3), 2], and the
@@ -105,7 +107,6 @@ class RaggedExpandDimsOpTest(test_util.TensorFlowTestCase,
            expected=EXAMPLE4D_EXPAND_AXIS[4],
            expected_shape=[3, None, None, 2, 1]),
   ])  # pyformat: disable
-  @test_util.run_deprecated_v1
   def testRaggedExpandDims(self,
                            rt_input,
                            axis,
@@ -118,8 +119,7 @@ class RaggedExpandDimsOpTest(test_util.TensorFlowTestCase,
     if expected_shape is not None:
       self.assertEqual(expanded.shape.as_list(), expected_shape)
 
-    with self.test_session():
-      self.assertEqual(expanded.eval().tolist(), expected)
+    self.assertRaggedEqual(expanded, expected)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_factory_ops.py b/tensorflow/python/ops/ragged/ragged_factory_ops.py
index d1f301bc58..2c63e1c799 100644
--- a/tensorflow/python/ops/ragged/ragged_factory_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_factory_ops.py
@@ -21,11 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_tensor_value
 
@@ -56,8 +52,8 @@ def constant(pylist, dtype=None, ragged_rank=None, inner_shape=None, name=None):
       `pylist`.
     ragged_rank: An integer specifying the ragged rank of the returned
       `RaggedTensor`.  Must be nonnegative and less than `K`. Defaults to
-      `max(0, K - 1)` if `inner_shape` is not specified.  Defaults to
-      `max(0, K - 1 - len(inner_shape))` if `inner_shape` is specified.
+      `max(0, K - 1)` if `inner_shape` is not specified.  Defaults to `max(0, K
+      - 1 - len(inner_shape))` if `inner_shape` is specified.
     inner_shape: A tuple of integers specifying the shape for individual inner
       values in the returned `RaggedTensor`.  Defaults to `()` if `ragged_rank`
       is not specified.  If `ragged_rank` is specified, then a default is chosen
@@ -72,9 +68,10 @@ def constant(pylist, dtype=None, ragged_rank=None, inner_shape=None, name=None):
     ValueError: If the scalar values in `pylist` have inconsistent nesting
       depth; or if ragged_rank or inner_shape are incompatible with `pylist`.
   """
-  with ops.name_scope(name, 'RaggedConstant'):
-    return _constant_value(from_row_splits, constant_op.constant, pylist, dtype,
-                           ragged_rank, inner_shape)
+  with ops.name_scope(name, "RaggedConstant"):
+    return _constant_value(ragged_tensor.RaggedTensor.from_row_splits,
+                           constant_op.constant, pylist, dtype, ragged_rank,
+                           inner_shape)
 
 
 def constant_value(pylist, dtype=None, ragged_rank=None, inner_shape=None):
@@ -153,29 +150,29 @@ def _constant_value(ragged_factory, inner_factory, pylist, dtype, ragged_rank,
       depth; or if ragged_rank or inner_shape are incompatible with `pylist`.
   """
   if ragged_tensor.is_ragged(pylist):
-    raise TypeError('pylist may not be a RaggedTensor or RaggedTensorValue.')
+    raise TypeError("pylist may not be a RaggedTensor or RaggedTensorValue.")
 
   if not isinstance(pylist, (list, tuple)):
     # Scalar value
     if ragged_rank is not None and ragged_rank != 0:
-      raise ValueError('Invalid pylist=%r: incompatible with ragged_rank=%d' %
+      raise ValueError("Invalid pylist=%r: incompatible with ragged_rank=%d" %
                        (pylist, ragged_rank))
     if inner_shape is not None and inner_shape:
       raise ValueError(
-          'Invalid pylist=%r: incompatible with dim(inner_shape)=%d' %
+          "Invalid pylist=%r: incompatible with dim(inner_shape)=%d" %
           (pylist, len(inner_shape)))
     return inner_factory(pylist, dtype, ())
 
   if ragged_rank is not None and ragged_rank < 0:
     raise ValueError(
-        'Invalid ragged_rank=%r: must be nonnegative' % ragged_rank)
+        "Invalid ragged_rank=%r: must be nonnegative" % ragged_rank)
 
   # Find the depth of scalar values in `pylist`.
   scalar_depth, max_depth = _find_scalar_and_max_depth(pylist)
   if scalar_depth is not None:
     if max_depth > scalar_depth:
-      raise ValueError('Invalid pylist=%r: empty list nesting is greater '
-                       'than scalar value nesting' % pylist)
+      raise ValueError("Invalid pylist=%r: empty list nesting is greater "
+                       "than scalar value nesting" % pylist)
 
   # If both inner_shape and ragged_rank were specified, then check that
   # they are compatible with pylist.
@@ -184,8 +181,8 @@ def _constant_value(ragged_factory, inner_factory, pylist, dtype, ragged_rank,
     if ((scalar_depth is not None and expected_depth != scalar_depth) or
         (scalar_depth is None and expected_depth < max_depth)):
       raise ValueError(
-          'Invalid pylist=%r: incompatible with ragged_rank=%d '
-          'and dim(inner_shape)=%d' % (pylist, ragged_rank, len(inner_shape)))
+          "Invalid pylist=%r: incompatible with ragged_rank=%d "
+          "and dim(inner_shape)=%d" % (pylist, ragged_rank, len(inner_shape)))
 
   # Check if the result is a `Tensor`.
   if (ragged_rank == 0 or
@@ -221,7 +218,7 @@ def _constant_value(ragged_factory, inner_factory, pylist, dtype, ragged_rank,
     values = concatenated_values
 
   values = inner_factory(
-      values, dtype=dtype, shape=(len(values),) + inner_shape, name='values')
+      values, dtype=dtype, shape=(len(values),) + inner_shape, name="values")
   for row_splits in reversed(nested_splits):
     values = ragged_factory(values, row_splits)
   return values
@@ -249,7 +246,7 @@ def _find_scalar_and_max_depth(pylist):
       child_scalar_depth, child_max_depth = _find_scalar_and_max_depth(child)
       if child_scalar_depth is not None:
         if scalar_depth is not None and scalar_depth != child_scalar_depth + 1:
-          raise ValueError('all scalar values must have the same nesting depth')
+          raise ValueError("all scalar values must have the same nesting depth")
         scalar_depth = child_scalar_depth + 1
       max_depth = max(max_depth, child_max_depth + 1)
     return (scalar_depth, max_depth)
@@ -273,436 +270,24 @@ def _default_inner_shape_for_pylist(pylist, ragged_rank):
     """Checks that `item` has a consistent shape matching `shape`."""
     is_nested = isinstance(item, (list, tuple))
     if is_nested != bool(shape):
-      raise ValueError('inner values have inconsistent shape')
+      raise ValueError("inner values have inconsistent shape")
     if is_nested:
       if shape[0] != len(item):
-        raise ValueError('inner values have inconsistent shape')
+        raise ValueError("inner values have inconsistent shape")
       for child in item:
         check_inner_shape(child, shape[1:])
 
   # Collapse the ragged layers to get the list of inner values.
-  inner_values = pylist
+  flat_values = pylist
   for dim in range(ragged_rank):
-    if not all(isinstance(v, (list, tuple)) for v in inner_values):
-      raise ValueError('pylist has scalar values depth %d, but ragged_rank=%d '
-                       'requires scalar value depth greater than %d' %
+    if not all(isinstance(v, (list, tuple)) for v in flat_values):
+      raise ValueError("pylist has scalar values depth %d, but ragged_rank=%d "
+                       "requires scalar value depth greater than %d" %
                        (dim + 1, ragged_rank, ragged_rank))
-    inner_values = sum((list(v) for v in inner_values), [])
+    flat_values = sum((list(v) for v in flat_values), [])
 
   # Compute the inner shape looking only at the leftmost elements; and then
   # use check_inner_shape to verify that other elements have the same shape.
-  inner_shape = get_inner_shape(inner_values)
-  check_inner_shape(inner_values, inner_shape)
+  inner_shape = get_inner_shape(flat_values)
+  check_inner_shape(flat_values, inner_shape)
   return inner_shape[1:]
-
-
-#===============================================================================
-# Convert value -> tensor
-#===============================================================================
-def convert_to_tensor_or_ragged_tensor(value,
-                                       dtype=None,
-                                       preferred_dtype=None,
-                                       name=None):
-  """Converts value to a `RaggedTensor` or `Tensor`.
-
-  * If `value` is a `RaggedTensor`, then return it as-is.
-  * If `value` is a `RaggedTensorValue`, return a corresponding constant
-    `RaggedTensor`.
-  * Otherwise, use `convert_to_tensor` to convert `value` to a `Tensor`.
-
-  Args:
-    value: A `RaggedTensor`, a `RaggedTensorValue`, or an object whose type has
-      a registered `Tensor` conversion function.
-    dtype: Optional element type for the returned tensor.  If missing the type
-      is inferred from the type of `value`.
-    preferred_dtype: Optional element type for the returned tensor, used when
-      dtype is None.  This argument has no effect if `value` is already a
-      tensor, or when conversion is not possible.
-    name: Optional name to use if a new `Tensor` is created.
-
-  Returns:
-    A `Tensor` or `RaggedTensor`.
-  """
-  if isinstance(value, ragged_tensor.RaggedTensor):
-    if dtype and not dtype.is_compatible_with(value.dtype):
-      raise ValueError('Tensor conversion requested dtype %s for '
-                       'RaggedTensor with dtype %s: %r' %
-                       (dtype.name, value.dtype.name, value))
-    return value
-  elif isinstance(value, ragged_tensor_value.RaggedTensorValue):
-    with ops.name_scope(name, 'ConvertToTensorOrRaggedTensor', []):
-      inner_values = ops.convert_to_tensor(
-          value=value.inner_values,
-          dtype=dtype,
-          preferred_dtype=preferred_dtype,
-          name='inner_values')
-      return from_nested_row_splits(inner_values, value.nested_row_splits)
-  else:
-    return ops.convert_to_tensor(
-        value=value, dtype=dtype, preferred_dtype=preferred_dtype, name=name)
-
-
-#===============================================================================
-# Ops to construct RaggedTensor from row-partitioned values.
-#===============================================================================
-
-
-def from_value_rowids(values, value_rowids, nrows=None, name=None):
-  """Creates a `RaggedTensor` with rows partitioned by `value_rowids`.
-
-  The returned `RaggedTensor` corresponds with the python list defined by:
-
-  ```python
-  result = [[values[i] for i in range(len(values)) if value_rowids[i] == row]
-            for row in range(nrows)]
-  ```
-
-  Warning: currently, this needs to cast value_rowids to int64 before
-  converting, since `tf.bincount` only supports `int32`.
-
-  Args:
-    values: A potentially ragged tensor with shape `[nvals, ...]`.
-    value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds
-      one-to-one with `values`, and specifies each value's row index.  Must be
-      nonnegative, and must be sorted in ascending order.
-    nrows: An int64 scalar specifying the number of rows.  This should be
-      specified if the `RaggedTensor` may containing empty training rows.  Must
-      be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty).
-      Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty).
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor`.  `result.rank = values.rank + 1`.
-    `result.ragged_rank = values.ragged_rank + 1`.
-
-  Raises:
-    ValueError: If `nrows` is incompatible with `value_rowids`.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.from_value_rowids(
-    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
-    ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
-    ...     nrows=5)
-    >>> rt.eval().tolist()
-    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
-    ```
-  """
-  with ops.name_scope(name, 'RaggedFromValueRowIds',
-                      [values, value_rowids, nrows]):
-    values = convert_to_tensor_or_ragged_tensor(values, name='values')
-    value_rowids = ops.convert_to_tensor(
-        value_rowids, dtypes.int64, name='value_rowids')
-    if nrows is None:
-      const_rowids = tensor_util.constant_value(value_rowids)
-      if const_rowids is None:
-        nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1
-        const_nrows = None
-      else:
-        const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0
-        nrows = ops.convert_to_tensor(const_nrows, dtypes.int64, name='nrows')
-    else:
-      nrows = ops.convert_to_tensor(nrows, dtypes.int64, 'nrows')
-      const_nrows = tensor_util.constant_value(nrows)
-      if const_nrows is not None:
-        if const_nrows < 0:
-          raise ValueError('Expected nrows >= 0; got %d' % const_nrows)
-        const_rowids = tensor_util.constant_value(value_rowids)
-        if const_rowids is not None and const_rowids.size > 0:
-          if not const_nrows >= const_rowids[-1] + 1:
-            raise ValueError(
-                'Expected nrows >= value_rowids[-1] + 1; got nrows=%d, '
-                'value_rowids[-1]=%d' % (const_nrows, const_rowids[-1]))
-
-    value_rowids.shape.assert_has_rank(1)
-    nrows.shape.assert_has_rank(0)
-    values.shape[:1].assert_is_compatible_with(value_rowids.shape)
-
-    # Convert value_rowids & nrows to row_splits.
-    # Note: we don't use segment_ids_to_row_splits() here because we want
-    # to save the intermediate value `row_lengths`, so we can cache it.
-    # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the cast
-    # (Remove the warning in the docstring when we do.)
-    value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
-    nrows_int32 = math_ops.cast(nrows, dtypes.int32)
-    row_lengths = math_ops.bincount(
-        value_rowids_int32,
-        minlength=nrows_int32,
-        maxlength=nrows_int32,
-        dtype=dtypes.int64)
-    row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)
-    if const_nrows is not None:
-      row_lengths.set_shape([const_nrows])
-      row_splits.set_shape([const_nrows + 1])
-
-    return ragged_tensor.RaggedTensor(
-        values,
-        row_splits,
-        cached_row_lengths=row_lengths,
-        cached_value_rowids=value_rowids,
-        cached_nrows=nrows,
-        internal=True)
-
-
-def from_row_splits(values, row_splits, name=None):
-  """Creates a `RaggedTensor` with rows partitioned by `row_splits`.
-
-  The returned `RaggedTensor` corresponds with the python list defined by:
-
-  ```python
-  result = [values[row_splits[i]:row_splits[i + 1]]
-            for i in range(len(row_splits) - 1)]
-  ```
-
-  Args:
-    values: A potentially ragged tensor with shape `[nvals, ...]`.
-    row_splits: A 1-D int64 tensor with shape `[nrows+1]`.  Must not be empty,
-      and must be sorted in ascending order.  `row_splits[0]` must be zero and
-      `row_splits[-1]` must be `nvals`.
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor`.  `result.rank = values.rank + 1`.
-    `result.ragged_rank = values.ragged_rank + 1`.
-
-  Raises:
-    ValueError: If `row_splits` is an empty list.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.from_row_splits(
-    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
-    ...     row_splits=[0, 4, 4, 7, 8, 8])
-    >>> rt.eval().tolist()
-    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
-    ```
-  """
-  if isinstance(row_splits, (list, tuple)) and not row_splits:
-    raise ValueError('row_splits tensor may not be empty.')
-  with ops.name_scope(name, 'RaggedFromRowSplits', [values, row_splits]):
-    values = convert_to_tensor_or_ragged_tensor(values, name='values')
-    row_splits = ops.convert_to_tensor(row_splits, dtypes.int64, 'row_splits')
-    row_splits.shape.assert_has_rank(1)
-    return ragged_tensor.RaggedTensor(
-        values=values, row_splits=row_splits, internal=True)
-
-
-def from_row_lengths(values, row_lengths, name=None):
-  """Creates a `RaggedTensor` with rows partitioned by `row_lengths`.
-
-  The returned `RaggedTensor` corresponds with the python list defined by:
-
-  ```python
-  result = [[values.pop(0) for i in range(length)]
-            for length in row_lengths]
-  ```
-
-  Args:
-    values: A potentially ragged tensor with shape `[nvals, ...]`.
-    row_lengths: A 1-D int64 tensor with shape `[nrows]`.  Must be nonnegative.
-      `sum(row_lengths)` must be `nvals`.
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor`.  `result.rank = values.rank + 1`.
-    `result.ragged_rank = values.ragged_rank + 1`.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.from_row_lengths(
-    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
-    ...     row_lengths=[4, 0, 3, 1, 0])
-    >>> rt.eval().tolist()
-    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
-    ```
-  """
-  with ops.name_scope(name, 'RaggedFromRowLengths', [values, row_lengths]):
-    values = convert_to_tensor_or_ragged_tensor(values, name='values')
-    row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64,
-                                        'row_lengths')
-    row_lengths.shape.assert_has_rank(1)
-    row_limits = math_ops.cumsum(row_lengths)
-    row_splits = array_ops.concat([[0], row_limits], axis=0)
-    return ragged_tensor.RaggedTensor(
-        values=values,
-        row_splits=row_splits,
-        cached_row_lengths=row_lengths,
-        internal=True)
-
-
-def from_row_starts(values, row_starts, name=None):
-  """Creates a `RaggedTensor` with rows partitioned by `row_starts`.
-
-  Equivalent to: `from_row_splits(values, concat([row_starts, nvals]))`.
-
-  Args:
-    values: A potentially ragged tensor with shape `[nvals, ...]`.
-    row_starts: A 1-D int64 tensor with shape `[nrows]`.  Must be nonnegative
-      and sorted in ascending order.  If `nrows>0`, then `row_starts[0]` must be
-      zero.
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor`.  `result.rank = values.rank + 1`.
-    `result.ragged_rank = values.ragged_rank + 1`.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.from_row_starts(
-    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
-    ...     row_starts=[0, 4, 4, 7, 8])
-    >>> rt.eval().tolist()
-    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
-    ```
-  """
-  with ops.name_scope(name, 'RaggedFromRowStarts', [values, row_starts]):
-    values = convert_to_tensor_or_ragged_tensor(values, name='values')
-    row_starts = ops.convert_to_tensor(row_starts, dtypes.int64, 'row_starts')
-    row_starts.shape.assert_has_rank(1)
-    nvals = array_ops.shape(values, out_type=dtypes.int64)[:1]
-    row_splits = array_ops.concat([row_starts, nvals], axis=0)
-    return ragged_tensor.RaggedTensor(
-        values=values, row_splits=row_splits, internal=True)
-
-
-def from_row_limits(values, row_limits, name=None):
-  """Creates a `RaggedTensor` with rows partitioned by `row_limits`.
-
-  Equivalent to: `from_row_splits(values, concat([0, row_limits]))`.
-
-  Args:
-    values: A potentially ragged tensor with shape `[nvals, ...]`.
-    row_limits: A 1-D int64 tensor with shape `[nrows]`.  Must be sorted in
-      ascending order.  If `nrows>0`, then `row_limits[-1]` must be `nvals`.
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor`.  `result.rank = values.rank + 1`.
-    `result.ragged_rank = values.ragged_rank + 1`.
-
-  #### Example:
-    ```python
-    >>> rt = ragged.from_row_limits(
-    ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
-    ...     row_limits=[4, 4, 7, 8, 8])
-    >>> rt.eval().tolist()
-    [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
-    ```
-  """
-  with ops.name_scope(name, 'RaggedFromRowLimits', [values, row_limits]):
-    values = convert_to_tensor_or_ragged_tensor(values, name='values')
-    row_limits = ops.convert_to_tensor(row_limits, dtypes.int64, 'row_limits')
-    row_limits.shape.assert_has_rank(1)
-    zero = array_ops.zeros([1], dtypes.int64)
-    row_splits = array_ops.concat([zero, row_limits], axis=0)
-    return ragged_tensor.RaggedTensor(
-        values=values, row_splits=row_splits, internal=True)
-
-
-def from_nested_value_rowids(inner_values,
-                             nested_value_rowids,
-                             nested_nrows=None,
-                             name=None):
-  """Creates a `RaggedTensor` from a nested list of `value_rowids` tensors.
-
-  Equivalent to:
-
-  ```python
-  result = inner_values
-  for (value_rowids, nrows) in reversed(zip(nested_value_rowids, nested_nrows)):
-    result = from_value_rowids(result, value_rowids, nrows)
-  ```
-
-  Args:
-    inner_values: A potentially ragged tensor.
-    nested_value_rowids: A list of 1-D int64 tensors.  The `i`th tensor is used
-      as the `value_rowids` for the `i`th ragged dimension.
-    nested_nrows: A list of int64 scalars.  The `i`th scalar is used as the
-      `nrows` for the `i`th ragged dimension.
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor` (or `inner_values` if `nested_value_rowids` is empty).
-
-  Raises:
-    ValueError: If `len(nested_values_rowids) != len(nested_nrows)`.
-  """
-  if isinstance(nested_value_rowids, ops.Tensor):
-    raise TypeError('nested_value_rowids must be a list of Tensors')
-  if nested_nrows is None:
-    nested_nrows = [None] * len(nested_value_rowids)
-  else:
-    if isinstance(nested_nrows, ops.Tensor):
-      raise TypeError('nested_nrows must be a list of Tensors')
-    if len(nested_nrows) != len(nested_value_rowids):
-      raise ValueError('nested_nrows must have the same length as '
-                       'nested_value_rowids')
-
-  with ops.name_scope(
-      name, 'RaggedFromNestedValueRowIds',
-      [inner_values] + list(nested_value_rowids) + list(nested_nrows)):
-    result = inner_values
-    for value_rowids, nrows in reversed(
-        list(zip(nested_value_rowids, nested_nrows))):
-      result = from_value_rowids(result, value_rowids, nrows)
-    return result
-
-
-def from_nested_row_splits(inner_values, nested_row_splits, name=None):
-  """Creates a `RaggedTensor` from a nested list of `row_splits` tensors.
-
-  Equivalent to:
-
-  ```python
-  result = inner_values
-  for row_splits in reversed(nested_row_splits):
-    result = from_row_splits(result, row_splits)
-  ```
-
-  Args:
-    inner_values: A potentially ragged tensor.
-    nested_row_splits: A list of 1-D int64 tensors.  The `i`th tensor is used as
-      the `row_splits` for the `i`th ragged dimension.
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor` (or `inner_values` if `nested_row_splits` is empty).
-  """
-  if isinstance(nested_row_splits, ops.Tensor):
-    raise TypeError('nested_row_splits must be a list of Tensors')
-  with ops.name_scope(name, 'RaggedFromNestedRowSplits',
-                      [inner_values] + list(nested_row_splits)):
-    result = inner_values
-    for splits in reversed(nested_row_splits):
-      result = from_row_splits(result, splits)
-    return result
-
-
-def from_nested_row_lengths(inner_values, nested_row_lengths, name=None):
-  """Creates a `RaggedTensor` from a nested list of `row_lengths` tensors.
-
-  Equivalent to:
-
-  ```python
-  result = inner_values
-  for row_lengths in reversed(nested_row_lengths):
-    result = from_row_lengths(result, row_lengths)
-  ```
-
-  Args:
-    inner_values: A potentially ragged tensor.
-    nested_row_lengths: A list of 1-D int64 tensors.  The `i`th tensor is used
-      as the `row_lengths` for the `i`th ragged dimension.
-    name: A name prefix for the RaggedTensor (optional).
-
-  Returns:
-    A `RaggedTensor` (or `inner_values` if `nested_row_lengths` is empty).
-  """
-  if isinstance(nested_row_lengths, ops.Tensor):
-    raise TypeError('nested_row_lengths must be a list of Tensors')
-  with ops.name_scope(name, 'RaggedFromNestedRowlengths',
-                      [inner_values] + list(nested_row_lengths)):
-    result = inner_values
-    for lengths in reversed(nested_row_lengths):
-      result = from_row_lengths(result, lengths)
-    return result
diff --git a/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py b/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
index 674dbab112..07cf910202 100644
--- a/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
@@ -12,77 +12,77 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.from_sparse."""
+"""Tests for RaggedTensor.from_sparse."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
+from tensorflow.python.ops.ragged import RaggedTensor
 from tensorflow.python.platform import googletest
 
 
-class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
 
-  @test_util.run_v1_only('b/120545219')
   def testDocStringExample(self):
     st = sparse_tensor.SparseTensor(
         indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]],
         values=[1, 2, 3, 4, 5],
         dense_shape=[4, 3])
-    rt = ragged.from_sparse(st)
+    rt = RaggedTensor.from_sparse(st)
 
-    with self.test_session():
-      self.assertEqual(rt.eval().tolist(), [[1, 2, 3], [4], [], [5]])
+    self.assertRaggedEqual(rt, [[1, 2, 3], [4], [], [5]])
 
-  @test_util.run_v1_only('b/120545219')
   def testEmpty(self):
     st = sparse_tensor.SparseTensor(
         indices=array_ops.zeros([0, 2], dtype=dtypes.int64),
         values=[],
         dense_shape=[4, 3])
-    rt = ragged.from_sparse(st)
+    rt = RaggedTensor.from_sparse(st)
 
-    with self.test_session():
-      self.assertEqual(rt.eval().tolist(), [[], [], [], []])
+    self.assertRaggedEqual(rt, [[], [], [], []])
 
-  @test_util.run_deprecated_v1
   def testBadSparseTensorRank(self):
     st1 = sparse_tensor.SparseTensor(indices=[[0]], values=[0], dense_shape=[3])
+    self.assertRaisesRegexp(ValueError, r'rank\(st_input\) must be 2',
+                            RaggedTensor.from_sparse, st1)
+
     st2 = sparse_tensor.SparseTensor(
         indices=[[0, 0, 0]], values=[0], dense_shape=[3, 3, 3])
-    st3 = sparse_tensor.SparseTensor(
-        indices=array_ops.placeholder(dtypes.int64),
-        values=[0],
-        dense_shape=array_ops.placeholder(dtypes.int64))
-    self.assertRaisesRegexp(ValueError, r'rank\(st_input\) must be 2',
-                            ragged.from_sparse, st1)
-    self.assertRaisesRegexp(ValueError, r'rank\(st_input\) must be 2',
-                            ragged.from_sparse, st2)
     self.assertRaisesRegexp(ValueError, r'rank\(st_input\) must be 2',
-                            ragged.from_sparse, st3)
+                            RaggedTensor.from_sparse, st2)
+
+    if not context.executing_eagerly():
+      st3 = sparse_tensor.SparseTensor(
+          indices=array_ops.placeholder(dtypes.int64),
+          values=[0],
+          dense_shape=array_ops.placeholder(dtypes.int64))
+      self.assertRaisesRegexp(ValueError, r'rank\(st_input\) must be 2',
+                              RaggedTensor.from_sparse, st3)
 
-  @test_util.run_v1_only('b/120545219')
   def testGoodPartialSparseTensorRank(self):
-    st1 = sparse_tensor.SparseTensor(
-        indices=[[0, 0]],
-        values=[0],
-        dense_shape=array_ops.placeholder(dtypes.int64))
-    st2 = sparse_tensor.SparseTensor(
-        indices=array_ops.placeholder(dtypes.int64),
-        values=[0],
-        dense_shape=[4, 3])
+    if not context.executing_eagerly():
+      st1 = sparse_tensor.SparseTensor(
+          indices=[[0, 0]],
+          values=[0],
+          dense_shape=array_ops.placeholder(dtypes.int64))
+      st2 = sparse_tensor.SparseTensor(
+          indices=array_ops.placeholder(dtypes.int64),
+          values=[0],
+          dense_shape=[4, 3])
 
-    # Shouldn't throw ValueError
-    ragged.from_sparse(st1)
-    ragged.from_sparse(st2)
+      # Shouldn't throw ValueError
+      RaggedTensor.from_sparse(st1)
+      RaggedTensor.from_sparse(st2)
 
-  @test_util.run_v1_only('b/120545219')
   def testNonRaggedSparseTensor(self):
     # "index_suffix" means the value of the innermost dimension of the index
     # (i.e., indices[i][-1]).
@@ -92,22 +92,21 @@ class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
     # index_suffix of first index is not zero.
     st1 = sparse_tensor.SparseTensor(
         indices=[[0, 1], [0, 2], [2, 0]], values=[1, 2, 3], dense_shape=[3, 3])
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'.*SparseTensor is not right-ragged'):
+      self.evaluate(RaggedTensor.from_sparse(st1))
     # index_suffix of an index that starts a new row is not zero.
     st2 = sparse_tensor.SparseTensor(
         indices=[[0, 0], [0, 1], [2, 1]], values=[1, 2, 3], dense_shape=[3, 3])
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'.*SparseTensor is not right-ragged'):
+      self.evaluate(RaggedTensor.from_sparse(st2))
     # index_suffix of an index that continues a row skips a cell.
     st3 = sparse_tensor.SparseTensor(
         indices=[[0, 1], [0, 1], [0, 3]], values=[1, 2, 3], dense_shape=[3, 3])
-    rt1 = ragged.from_sparse(st1)
-    rt2 = ragged.from_sparse(st2)
-    rt3 = ragged.from_sparse(st3)
-    with self.test_session():
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              r'.*SparseTensor is not right-ragged', rt1.eval)
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              r'.*SparseTensor is not right-ragged', rt2.eval)
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              r'.*SparseTensor is not right-ragged', rt3.eval)
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'.*SparseTensor is not right-ragged'):
+      self.evaluate(RaggedTensor.from_sparse(st3))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py b/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
index 09c17fad61..6a3d639c5e 100644
--- a/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.from_tensor."""
+"""Tests for RaggedTensor.from_tensor."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -24,29 +24,26 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
+from tensorflow.python.ops.ragged import RaggedTensor
 from tensorflow.python.platform import googletest
 
 
-class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
-                             parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase,
+                                 parameterized.TestCase):
 
-  @test_util.run_v1_only('b/120545219')
   def testDocStringExamples(self):
-    # The examples from ragged.from_tensor.__doc__.
+    # The examples from RaggedTensor.from_tensor.__doc__.
     dt = constant_op.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
-    with self.test_session():
-      self.assertEqual(
-          ragged.from_tensor(dt).eval().tolist(),
-          [[5, 7, 0], [0, 3, 0], [6, 0, 0]])
+    self.assertRaggedEqual(
+        RaggedTensor.from_tensor(dt), [[5, 7, 0], [0, 3, 0], [6, 0, 0]])
 
-      self.assertEqual(
-          ragged.from_tensor(dt, lengths=[1, 0, 3]).eval().tolist(),
-          [[5], [], [6, 0, 0]])
+    self.assertRaggedEqual(
+        RaggedTensor.from_tensor(dt, lengths=[1, 0, 3]), [[5], [], [6, 0, 0]])
 
-      self.assertEqual(
-          ragged.from_tensor(dt, padding=0).eval().tolist(),
-          [[5, 7], [0, 3], [6]])
+    self.assertRaggedEqual(
+        RaggedTensor.from_tensor(dt, padding=0), [[5, 7], [0, 3], [6]])
 
   @parameterized.parameters(
       # 2D test cases, no length or padding.
@@ -263,7 +260,6 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
                        [[[5, 6], [7]], [[0, 8], []]]]
       },
   )  # pyformat: disable
-  @test_util.run_v1_only('b/120545219')
   def testRaggedFromTensor(self,
                            tensor,
                            expected,
@@ -271,30 +267,27 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
                            padding=None,
                            ragged_rank=1):
     dt = constant_op.constant(tensor)
-    rt = ragged.from_tensor(dt, lengths, padding, ragged_rank)
-    self.assertEqual(type(rt), ragged.RaggedTensor)
+    rt = RaggedTensor.from_tensor(dt, lengths, padding, ragged_rank)
+    self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, ragged_rank)
     self.assertTrue(
         dt.shape.is_compatible_with(rt.shape),
         '%s is incompatible with %s' % (dt.shape, rt.shape))
-    with self.test_session():
-      self.assertEqual(rt.eval().tolist(), expected)
+    self.assertRaggedEqual(rt, expected)
 
-  @test_util.run_deprecated_v1
   def testHighDimensions(self):
     # Use distinct prime numbers for all dimension shapes in this test, so
     # we can see any errors that are caused by mixing up dimension sizes.
     dt = array_ops.reshape(
         math_ops.range(3 * 5 * 7 * 11 * 13 * 17), [3, 5, 7, 11, 13, 17])
     for ragged_rank in range(1, 4):
-      rt = ragged.from_tensor(dt, ragged_rank=ragged_rank)
-      self.assertEqual(type(rt), ragged.RaggedTensor)
+      rt = RaggedTensor.from_tensor(dt, ragged_rank=ragged_rank)
+      self.assertEqual(type(rt), RaggedTensor)
       self.assertEqual(rt.ragged_rank, ragged_rank)
       self.assertTrue(
           dt.shape.is_compatible_with(rt.shape),
           '%s is incompatible with %s' % (dt.shape, rt.shape))
-      with self.test_session():
-        self.assertEqual(rt.eval().tolist(), self.evaluate(dt).tolist())
+      self.assertRaggedEqual(rt, self.evaluate(dt).tolist())
 
   @parameterized.parameters(
       # With no padding or lengths
@@ -398,15 +391,13 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
           'expected': [[], []]
       },
   )
-  @test_util.run_v1_only('b/120545219')
   def testEmpty(self, dt_shape, expected, lengths=None, padding=None):
     dt = array_ops.zeros(dt_shape)
-    rt = ragged.from_tensor(dt, lengths, padding)
-    self.assertEqual(type(rt), ragged.RaggedTensor)
+    rt = RaggedTensor.from_tensor(dt, lengths, padding)
+    self.assertEqual(type(rt), RaggedTensor)
     self.assertEqual(rt.ragged_rank, 1)
     self.assertTrue(dt.shape.is_compatible_with(rt.shape))
-    with self.test_session():
-      self.assertEqual(rt.eval().tolist(), expected)
+    self.assertRaggedEqual(rt, expected)
 
   @parameterized.parameters(
       {
@@ -423,7 +414,7 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
       {
           'tensor': [[1]],
           'padding': 'a',
-          'error': (TypeError, "Expected int32, got 'a'.*")
+          'error': (TypeError, '.*')
       },
       {
           'tensor': [[1]],
@@ -451,7 +442,6 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
           'error': (ValueError, r'ragged_rank must be greater than 0; got -1')
       },
   )
-  @test_util.run_deprecated_v1
   def testErrors(self,
                  tensor,
                  lengths=None,
@@ -459,8 +449,8 @@ class RaggedFromTensorOpTest(test_util.TensorFlowTestCase,
                  ragged_rank=1,
                  error=None):
     dt = constant_op.constant(tensor)
-    self.assertRaisesRegexp(error[0], error[1], ragged.from_tensor, dt, lengths,
-                            padding, ragged_rank)
+    self.assertRaisesRegexp(error[0], error[1], RaggedTensor.from_tensor, dt,
+                            lengths, padding, ragged_rank)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_functional_ops.py b/tensorflow/python/ops/ragged/ragged_functional_ops.py
index 6b71d88435..751f2c7359 100644
--- a/tensorflow/python/ops/ragged/ragged_functional_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_functional_ops.py
@@ -19,15 +19,14 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_util
 
 
-def map_inner_values(op, *args, **kwargs):
+def map_flat_values(op, *args, **kwargs):
   """Applies `op` to the inner values of one or more RaggedTensors.
 
-  Replaces any `RaggedTensor` in `args` or `kwargs` with its `inner_values`
+  Replaces any `RaggedTensor` in `args` or `kwargs` with its `flat_values`
   tensor, and then calls `op`.  Returns a `RaggedTensor` that is constructed
   from the input `RaggedTensor`s' `splits` and the value returned by
   the `op`.
@@ -39,20 +38,20 @@ def map_inner_values(op, *args, **kwargs):
 
   ```python
   >>> rt = ragged.constant([[1, 2, 3], [], [4, 5], [6]])
-  >>> ragged.map_inner_values(tf.ones_like, rt).eval().tolist()
+  >>> ragged.map_flat_values(tf.ones_like, rt).eval().tolist()
   [[1, 1, 1], [], [1, 1], [1]]
-  >>> ragged.map_inner_values(tf.multiply, rt, rt).eval().tolist()
+  >>> ragged.map_flat_values(tf.multiply, rt, rt).eval().tolist()
   [[1, 4, 9], [], [16, 25], [36]]
-  >>> ragged.map_inner_values(tf.add, rt, 5).eval().tolist()
+  >>> ragged.map_flat_values(tf.add, rt, 5).eval().tolist()
   [[6, 7, 8], [], [9, 10], [11]]
   ```
 
   Args:
-    op: The operation that should be applied to the RaggedTensor `inner_values`.
+    op: The operation that should be applied to the RaggedTensor `flat_values`.
       `op` is typically an element-wise operation (such as math_ops.add), but
       any operation that preserves the size of the outermost dimension can be
       used.  I.e., `shape[0]` of the value returned by `op` must match
-      `shape[0]` of the `RaggedTensor`s' `inner_values` tensors.
+      `shape[0]` of the `RaggedTensor`s' `flat_values` tensors.
     *args: Arguments for `op`.
     **kwargs: Keyword arguments for `op`.
 
@@ -66,8 +65,8 @@ def map_inner_values(op, *args, **kwargs):
   # Replace RaggedTensors with their values; and collect the splits tensors
   # from each RaggedTensor.
   nested_splits_lists = []
-  inner_args = _replace_ragged_with_inner_values(args, nested_splits_lists)
-  inner_kwargs = _replace_ragged_with_inner_values(kwargs, nested_splits_lists)
+  inner_args = _replace_ragged_with_flat_values(args, nested_splits_lists)
+  inner_kwargs = _replace_ragged_with_flat_values(kwargs, nested_splits_lists)
   if not nested_splits_lists:
     return op(*args, **kwargs)
 
@@ -75,15 +74,15 @@ def map_inner_values(op, *args, **kwargs):
       ragged_util.assert_splits_match(nested_splits_lists)):
     # Delegate to op, and then compose the result from the transformed values
     # and the splits.
-    return ragged_factory_ops.from_nested_row_splits(
+    return ragged_tensor.RaggedTensor.from_nested_row_splits(
         op(*inner_args, **inner_kwargs), nested_splits_lists[0])
 
 
-def _replace_ragged_with_inner_values(value, nested_splits_lists):
-  """Replace RaggedTensors with their inner_values, and record their splits.
+def _replace_ragged_with_flat_values(value, nested_splits_lists):
+  """Replace RaggedTensors with their flat_values, and record their splits.
 
   Returns a copy of `value`, with any nested `RaggedTensor`s replaced by their
-  `inner_values` tensor.  Looks inside lists, tuples, and dicts.
+  `flat_values` tensor.  Looks inside lists, tuples, and dicts.
 
   Appends each `RaggedTensor`'s `nested_splits` to `nested_splits_lists`.
 
@@ -97,13 +96,13 @@ def _replace_ragged_with_inner_values(value, nested_splits_lists):
   """
   # Base case
   if ragged_tensor.is_ragged(value):
-    value = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(value)
+    value = ragged_tensor.convert_to_tensor_or_ragged_tensor(value)
     nested_splits_lists.append(value.nested_row_splits)
-    return value.inner_values
+    return value.flat_values
 
   # Recursion cases
   def recurse(v):
-    return _replace_ragged_with_inner_values(v, nested_splits_lists)
+    return _replace_ragged_with_flat_values(v, nested_splits_lists)
 
   if isinstance(value, list):
     return [recurse(v) for v in value]
diff --git a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
index b0a4fe9d31..6673192752 100644
--- a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
@@ -21,15 +21,18 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-@test_util.run_v1_only('b/120545219')
-class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedGatherNdOpTest(ragged_test_util.RaggedTensorTestCase,
                            parameterized.TestCase):
 
   DOCSTRING_PARAMS = [[['000', '001'], ['010']],
@@ -186,14 +189,11 @@ class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
   ])  # pyformat: disable
   def testRaggedGatherNd(self, descr, params, indices, expected):
     result = ragged.gather_nd(params, indices)
-    self.assertEqual(
-        getattr(result, 'ragged_rank', 0), getattr(expected, 'ragged_rank', 0))
-    with self.test_session() as sess:
-      if hasattr(expected, 'tolist'):
-        expected = expected.tolist()
-      self.assertEqual(self.evaluate(result).tolist(), expected)
+    self.assertRaggedEqual(result, expected)
 
   def testRaggedGatherNdUnknownRankError(self):
+    if context.executing_eagerly():
+      return
     params = ragged.constant([['a', 'b'], ['c', 'd']])
     indices1 = array_ops.placeholder(dtypes.int32, shape=None)
     indices2 = array_ops.placeholder(dtypes.int32, shape=[None])
@@ -209,21 +209,20 @@ class RaggedGatherNdOpTest(test_util.TensorFlowTestCase,
       dict(
           params=['a'],
           indices=0,
-          message='Shape must be at least rank 1 but is rank 0'
-          " for 'GatherNd'"),
+          error=(ValueError, errors.InvalidArgumentError)),
       dict(
           params=ragged.constant_value([['a']]),
           indices=0,
           message='indices.rank must be at least 1.'),
       dict(
           params=['a', 'b', 'c'],
-          indices=ragged.constant([[0]]),
+          indices=ragged.constant_value([[0]]),
           message='The innermost dimension of indices may not be ragged'),
   ])
   def testRaggedGatherNdStaticError(self,
                                     params,
                                     indices,
-                                    message,
+                                    message=None,
                                     error=ValueError):
     with self.assertRaisesRegexp(error, message):
       ragged.gather_nd(params, indices)
diff --git a/tensorflow/python/ops/ragged/ragged_gather_op_test.py b/tensorflow/python/ops/ragged/ragged_gather_op_test.py
index d371745b73..42efdc8a7d 100644
--- a/tensorflow/python/ops/ragged/ragged_gather_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_op_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -25,90 +26,75 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedTensorOpsTest(test_util.TensorFlowTestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedGatherOpTest(ragged_test_util.RaggedTensorTestCase):
 
-  @test_util.run_v1_only('b/120545219')
   def testDocStringExamples(self):
     params = constant_op.constant(['a', 'b', 'c', 'd', 'e'])
     indices = constant_op.constant([3, 1, 2, 1, 0])
     ragged_params = ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
     ragged_indices = ragged.constant([[3, 1, 2], [1], [], [0]])
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, ragged_indices).eval().tolist(),
-          [[b'd', b'b', b'c'], [b'b'], [], [b'a']])
-      self.assertEqual(
-          ragged.gather(ragged_params, indices).eval().tolist(),
-          [[b'e'], [b'd'], [], [b'd'], [b'a', b'b', b'c']])
-      self.assertEqual(
-          ragged.gather(ragged_params, ragged_indices).eval().tolist(),
-          [[[b'e'], [b'd'], []], [[b'd']], [], [[b'a', b'b', b'c']]])
-
-  @test_util.run_deprecated_v1
+    self.assertRaggedEqual(
+        ragged.gather(params, ragged_indices),
+        [[b'd', b'b', b'c'], [b'b'], [], [b'a']])
+    self.assertRaggedEqual(
+        ragged.gather(ragged_params, indices),
+        [[b'e'], [b'd'], [], [b'd'], [b'a', b'b', b'c']])
+    self.assertRaggedEqual(
+        ragged.gather(ragged_params, ragged_indices),
+        [[[b'e'], [b'd'], []], [[b'd']], [], [[b'a', b'b', b'c']]])
+
   def testTensorParamsAndTensorIndices(self):
     params = ['a', 'b', 'c', 'd', 'e']
     indices = [2, 0, 2, 1]
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, indices).eval().tolist(),
-          [b'c', b'a', b'c', b'b'])
-      self.assertEqual(type(ragged.gather(params, indices)), ops.Tensor)
+    self.assertRaggedEqual(
+        ragged.gather(params, indices), [b'c', b'a', b'c', b'b'])
+    self.assertIsInstance(ragged.gather(params, indices), ops.Tensor)
 
-  @test_util.run_deprecated_v1
   def testRaggedParamsAndTensorIndices(self):
     params = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
     indices = [2, 0, 2, 1]
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, indices).eval().tolist(),
-          [[b'f'], [b'a', b'b'], [b'f'], [b'c', b'd', b'e']])
+    self.assertRaggedEqual(
+        ragged.gather(params, indices),
+        [[b'f'], [b'a', b'b'], [b'f'], [b'c', b'd', b'e']])
 
-  @test_util.run_v1_only('b/120545219')
   def testTensorParamsAndRaggedIndices(self):
     params = ['a', 'b', 'c', 'd', 'e']
     indices = ragged.constant([[2, 1], [1, 2, 0], [3]])
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, indices).eval().tolist(),
-          [[b'c', b'b'], [b'b', b'c', b'a'], [b'd']])
+    self.assertRaggedEqual(
+        ragged.gather(params, indices),
+        [[b'c', b'b'], [b'b', b'c', b'a'], [b'd']])
 
-  @test_util.run_v1_only('b/120545219')
   def testRaggedParamsAndRaggedIndices(self):
     params = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
     indices = ragged.constant([[2, 1], [1, 2, 0], [3]])
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, indices).eval().tolist(),
-          [[[b'f'], [b'c', b'd', b'e']],                # [[p[2], p[1]      ],
-           [[b'c', b'd', b'e'], [b'f'], [b'a', b'b']],  #  [p[1], p[2], p[0]],
-           [[]]]                                        #  [p[3]            ]]
-      )  # pyformat: disable
-
-  @test_util.run_deprecated_v1
+    self.assertRaggedEqual(
+        ragged.gather(params, indices),
+        [[[b'f'], [b'c', b'd', b'e']],                # [[p[2], p[1]      ],
+         [[b'c', b'd', b'e'], [b'f'], [b'a', b'b']],  #  [p[1], p[2], p[0]],
+         [[]]]                                        #  [p[3]            ]]
+    )  # pyformat: disable
+
   def testRaggedParamsAndScalarIndices(self):
     params = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
     indices = 1
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, indices).eval().tolist(), [b'c', b'd', b'e'])
+    self.assertRaggedEqual(ragged.gather(params, indices), [b'c', b'd', b'e'])
 
-  @test_util.run_deprecated_v1
   def test3DRaggedParamsAnd2DTensorIndices(self):
     params = ragged.constant([[['a', 'b'], []], [['c', 'd'], ['e'], ['f']],
                               [['g']]])
     indices = [[1, 2], [0, 1], [2, 2]]
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, indices).eval().tolist(),
-          [[[[b'c', b'd'], [b'e'], [b'f']], [[b'g']]],            # [[p1, p2],
-           [[[b'a', b'b'], []], [[b'c', b'd'], [b'e'], [b'f']]],  #  [p0, p1],
-           [[[b'g']], [[b'g']]]]                                  #  [p2, p2]]
-      )  # pyformat: disable
-
-  @test_util.run_v1_only('b/120545219')
+    self.assertRaggedEqual(
+        ragged.gather(params, indices),
+        [[[[b'c', b'd'], [b'e'], [b'f']], [[b'g']]],            # [[p1, p2],
+         [[[b'a', b'b'], []], [[b'c', b'd'], [b'e'], [b'f']]],  #  [p0, p1],
+         [[[b'g']], [[b'g']]]]                                  #  [p2, p2]]
+    )  # pyformat: disable
+
   def testTensorParamsAnd4DRaggedIndices(self):
     indices = ragged.constant(
         [[[[3, 4], [0, 6]], []], [[[2, 1], [1, 0]], [[2, 5]], [[2, 3]]],
@@ -116,32 +102,30 @@ class RaggedTensorOpsTest(test_util.TensorFlowTestCase):
         ragged_rank=2,
         inner_shape=(2,))
     params = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
-    with self.test_session():
-      self.assertEqual(
-          ragged.gather(params, indices).eval().tolist(),
-          [[[[b'd', b'e'], [b'a', b'g']], []],
-           [[[b'c', b'b'], [b'b', b'a']], [[b'c', b'f']], [[b'c', b'd']]],
-           [[[b'b', b'a']]]])  # pyformat: disable
-
-  @test_util.run_v1_only('b/120545219')
+    self.assertRaggedEqual(
+        ragged.gather(params, indices),
+        [[[[b'd', b'e'], [b'a', b'g']], []],
+         [[[b'c', b'b'], [b'b', b'a']], [[b'c', b'f']], [[b'c', b'd']]],
+         [[[b'b', b'a']]]])  # pyformat: disable
+
   def testOutOfBoundsError(self):
     tensor_params = ['a', 'b', 'c']
     tensor_indices = [0, 1, 2]
     ragged_params = ragged.constant([['a', 'b'], ['c']])
     ragged_indices = ragged.constant([[0, 3]])
-    with self.test_session():
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              r'indices\[1\] = 3 is not in \[0, 3\)',
-                              ragged.gather(tensor_params, ragged_indices).eval)
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              r'indices\[2\] = 2 is not in \[0, 2\)',
-                              ragged.gather(ragged_params, tensor_indices).eval)
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              r'indices\[1\] = 3 is not in \[0, 2\)',
-                              ragged.gather(ragged_params, ragged_indices).eval)
-
-  @test_util.run_deprecated_v1
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'indices\[1\] = 3 is not in \[0, 3\)'):
+      self.evaluate(ragged.gather(tensor_params, ragged_indices))
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'indices\[2\] = 2 is not in \[0, 2\)'):
+      self.evaluate(ragged.gather(ragged_params, tensor_indices))
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'indices\[1\] = 3 is not in \[0, 2\)'):
+      self.evaluate(ragged.gather(ragged_params, ragged_indices))
+
   def testUnknownIndicesRankError(self):
+    if context.executing_eagerly():
+      return
     params = ragged.constant([], ragged_rank=1)
     indices = constant_op.constant([0], dtype=dtypes.int64)
     indices = array_ops.placeholder_with_default(indices, None)
diff --git a/tensorflow/python/ops/ragged/ragged_getitem.py b/tensorflow/python/ops/ragged/ragged_getitem.py
index 9821695046..0fa72a3658 100644
--- a/tensorflow/python/ops/ragged/ragged_getitem.py
+++ b/tensorflow/python/ops/ragged/ragged_getitem.py
@@ -24,7 +24,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_array_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_math_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 
@@ -137,8 +136,8 @@ def _ragged_getitem(rt_input, key_list):
   if row_key is array_ops.newaxis:
     inner_rt = _ragged_getitem(rt_input, inner_keys)
     nsplits = array_ops.shape(inner_rt.row_splits, out_type=dtypes.int64)[0]
-    return ragged_factory_ops.from_row_splits(inner_rt,
-                                              array_ops.stack([0, nsplits - 1]))
+    return ragged_tensor.RaggedTensor.from_row_splits(
+        inner_rt, array_ops.stack([0, nsplits - 1]))
 
   # Slicing a range of rows: first slice the outer dimension, and then
   # call `_ragged_getitem_inner_dimensions` to handle the inner keys.
@@ -184,7 +183,7 @@ def _slice_ragged_row_dimension(rt_input, row_key):
         axis=0)
     values_start = new_splits[0]
     values_limit = new_splits[-1]
-    return ragged_factory_ops.from_row_splits(
+    return ragged_tensor.RaggedTensor.from_row_splits(
         rt_input.values[values_start:values_limit], new_splits - values_start)
 
   # If there is a slice step (aka a strided slice), then use ragged_gather to
@@ -225,7 +224,8 @@ def _ragged_getitem_inner_dimensions(rt_input, key_list):
   if column_key is array_ops.newaxis:
     inner_rt = _ragged_getitem_inner_dimensions(rt_input, key_list[1:])
     nsplits = array_ops.shape(inner_rt.row_splits, out_type=dtypes.int64)[0]
-    return ragged_factory_ops.from_row_splits(inner_rt, math_ops.range(nsplits))
+    return ragged_tensor.RaggedTensor.from_row_splits(inner_rt,
+                                                      math_ops.range(nsplits))
 
   # Slicing a range of columns in a ragged inner dimension.  We use a
   # recursive call to process the values, and then assemble a RaggedTensor
@@ -239,7 +239,7 @@ def _ragged_getitem_inner_dimensions(rt_input, key_list):
     else:
       # Nontrivial slice: use ragged_gather to extract the indicated slice as
       # a new RaggedTensor (inner_rt), and then recursively process its values.
-      # The splits can be taken from ragged.row_splits(inner_rt).
+      # The splits can be taken from inner_rt.row_splits().
       inner_rt_starts = rt_input.row_splits[:-1]
       inner_rt_limits = rt_input.row_splits[1:]
       if column_key.start is not None and column_key.start != 0:
diff --git a/tensorflow/python/ops/ragged/ragged_map_inner_values_op_test.py b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
similarity index 79%
rename from tensorflow/python/ops/ragged/ragged_map_inner_values_op_test.py
rename to tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
index dc214f4437..8b28cac99d 100644
--- a/tensorflow/python/ops/ragged/ragged_map_inner_values_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
@@ -12,14 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.map_inner_values."""
+"""Tests for ragged.map_flat_values."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from absl.testing import parameterized
-
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -27,11 +25,12 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
-                                 parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
 
   def assertRaggedMapInnerValuesReturns(self,
                                         op,
@@ -39,23 +38,21 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
                                         args=(),
                                         kwargs=None):
     kwargs = kwargs or {}
-    result = ragged.map_inner_values(op, *args, **kwargs)
+    result = ragged.map_flat_values(op, *args, **kwargs)
     with self.test_session():
-      self.assertEqual(result.eval().tolist(), expected)
+      self.assertRaggedEqual(result, expected)
 
-  @test_util.run_v1_only('b/120545219')
   def testDocStringExamples(self):
     """Test the examples in apply_op_to_ragged_values.__doc__."""
     rt = ragged.constant([[1, 2, 3], [], [4, 5], [6]])
-    v1 = ragged.map_inner_values(array_ops.ones_like, rt)
-    v2 = ragged.map_inner_values(math_ops.multiply, rt, rt)
-    v3 = ragged.map_inner_values(math_ops.add, rt, 5)
+    v1 = ragged.map_flat_values(array_ops.ones_like, rt)
+    v2 = ragged.map_flat_values(math_ops.multiply, rt, rt)
+    v3 = ragged.map_flat_values(math_ops.add, rt, 5)
     with self.test_session():
-      self.assertEqual(v1.eval().tolist(), [[1, 1, 1], [], [1, 1], [1]])
-      self.assertEqual(v2.eval().tolist(), [[1, 4, 9], [], [16, 25], [36]])
-      self.assertEqual(v3.eval().tolist(), [[6, 7, 8], [], [9, 10], [11]])
+      self.assertRaggedEqual(v1, [[1, 1, 1], [], [1, 1], [1]])
+      self.assertRaggedEqual(v2, [[1, 4, 9], [], [16, 25], [36]])
+      self.assertRaggedEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
 
-  @test_util.run_deprecated_v1
   def testOpWithSingleRaggedTensorArg(self):
     tensor = ragged.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
@@ -63,20 +60,17 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         args=(tensor,),
         expected=[[0, 0, 0], [], [0, 0]])
 
-  @test_util.run_v1_only('b/120545219')
   def testOpWithTwoRaggedTensorArgs(self):
     x = ragged.constant([[3, 1, 4], [], [1, 5]])
     y = ragged.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply, args=(x, y), expected=[[3, 2, 12], [], [4, 25]])
 
-  @test_util.run_deprecated_v1
   def testOpWithRaggedTensorAndScalarArgs(self):
     y = ragged.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply, args=(5, y), expected=[[5, 10, 15], [], [20, 25]])
 
-  @test_util.run_v1_only('b/120545219')
   def testOpWithThreeRaggedTensorArgs(self):
     condition = ragged.constant(
         [[True, True, False], [], [True, False]])  # pyformat: disable
@@ -87,7 +81,6 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         args=(condition, x, y),
         expected=[[b'a', b'b', b'C'], [], [b'd', b'E']])
 
-  @test_util.run_v1_only('b/120545219')
   def testOpWithRaggedTensorListArg(self):
     x = ragged.constant([[1, 2, 3], [], [4, 5]])
     y = ragged.constant([[10, 20, 30], [], [40, 50]])
@@ -96,7 +89,6 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         args=([x, y, x],),
         expected=[[12, 24, 36], [], [48, 60]])
 
-  @test_util.run_v1_only('b/120545219')
   def testOpWithKeywordArgs(self):
     x = ragged.constant([[3, 1, 4], [], [1, 5]])
     y = ragged.constant([[1, 2, 3], [], [4, 5]])
@@ -105,7 +97,6 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         kwargs=dict(x=x, y=y),
         expected=[[3, 2, 12], [], [4, 25]])
 
-  @test_util.run_v1_only('b/120545219')
   def testOpWithMixedPositionalAndKeywordArgs(self):
     x = ragged.constant([[3, 1, 4], [], [1, 5]])
     y = ragged.constant([[1, 2, 3], [], [4, 5]])
@@ -115,7 +106,6 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         kwargs=dict(y=y),
         expected=[[3, 2, 12], [], [4, 25]])
 
-  @test_util.run_deprecated_v1
   def testNonElementWiseOp(self):
     x = ragged.constant(
         [[[3, 1, 4], [1, 5, 9], [2, 6, 5]], [], [[3, 5, 8], [9, 7, 9]]],
@@ -128,15 +118,13 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         },
         expected=[[8, 15, 13], [], [16, 25]])
 
-  @test_util.run_v1_only('b/120545219')
   def testOpWithRaggedRankGreaterThanOne(self):
     # ragged_rank=0
     x0 = [3, 1, 4, 1, 5, 9, 2, 6, 5]
     y0 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     with self.test_session():
-      self.assertEqual(
-          math_ops.multiply(x0, y0).eval().tolist(),
-          [3, 2, 12, 4, 25, 54, 14, 48, 45])
+      self.assertRaggedEqual(
+          math_ops.multiply(x0, y0), [3, 2, 12, 4, 25, 54, 14, 48, 45])
 
     # ragged_rank=1
     x1 = ragged.constant([[3, 1, 4], [], [1, 5], [9, 2], [6, 5]])
@@ -173,7 +161,6 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
             [[[54, 14], [48, 45]]]    # row 3
         ])  # pyformat: disable
 
-  @test_util.run_v1_only('b/120545219')
   def testOpWithRaggedRankThree(self):
     x = ragged.constant([[[3, 1, 4]], [], [[], [1, 5]]])
     y = ragged.constant([[[1, 2, 3]], [], [[], [4, 5]]])
@@ -182,7 +169,6 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
         args=(x, y),
         expected=[[[3, 2, 12]], [], [[], [4, 25]]])
 
-  @test_util.run_deprecated_v1
   def testOpWithInnerValuesOnly(self):
     x = constant_op.constant([[1, 2], [3, 4], [5, 6]])
     y = constant_op.constant(2)
@@ -194,29 +180,25 @@ class RaggedMapInnerValuesOpTest(test_util.TensorFlowTestCase,
     y = ragged.constant([[[3, 1, 4], []], [], [[1, 5]]])
     self.assertRaisesRegexp(ValueError,
                             r'Inputs must have identical ragged splits.*',
-                            ragged.map_inner_values, math_ops.add, x, y)
+                            ragged.map_flat_values, math_ops.add, x, y)
 
   def testRaggedTensorSplitsValueMismatchError(self):
     x = ragged.constant([[3, 1, 4], [], [1, 5]])
     y = ragged.constant([[1], [2, 3], [4, 5]])
     self.assertRaisesRegexp(errors.InvalidArgumentError,
                             r'Inputs must have identical ragged splits.*',
-                            ragged.map_inner_values, math_ops.add, x, y)
+                            ragged.map_flat_values, math_ops.add, x, y)
 
-  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorSplitsMismatchErrorAtRuntime(self):
     splits1 = array_ops.placeholder_with_default(
         constant_op.constant([0, 3, 3, 5], dtypes.int64), None)
     splits2 = array_ops.placeholder_with_default(
         constant_op.constant([0, 1, 3, 5], dtypes.int64), None)
-    x = ragged.from_row_splits([3, 1, 4, 1, 5], splits1)
-    y = ragged.from_row_splits([1, 2, 3, 4, 5], splits2)
-    result = ragged.map_inner_values(math_ops.add, x, y)
-    with self.test_session():
-      self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          r'\[Inputs must have identical ragged splits\] '
-          r'\[Condition x == y did not hold element-wise:\].*', result.eval)
+    x = ragged.RaggedTensor.from_row_splits([3, 1, 4, 1, 5], splits1)
+    y = ragged.RaggedTensor.from_row_splits([1, 2, 3, 4, 5], splits2)
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'.*Inputs must have identical ragged splits'):
+      self.evaluate(ragged.map_flat_values(math_ops.add, x, y))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
index ab70d5a123..49c0996b24 100644
--- a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
@@ -27,11 +27,14 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops as mo
 from tensorflow.python.ops import ragged
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-@test_util.run_v1_only('b/120545219')
-class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
+                      parameterized.TestCase):
+
   @parameterized.parameters([
       # The following test sets map over a RaggedTensor and apply a
       # transformation that returns with shape:
@@ -54,6 +57,7 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           elems=[[1, 2, 3], [4, 5], [6, 7]],
           expected_output=[[2, 6], [4.5, 9], [6.5, 13]],
           dtype=dtypes.float32,
+          expected_ragged_rank=0,
       ),
       # [d1, (d2)] -> [d1, (d2)]
       dict(
@@ -61,8 +65,8 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           elems=[[1, 2, 3], [4, 5], [6, 7]],
           expected_output=[[2, 3, 4], [5, 6], [7, 8]],
           dtype=dtypes.int64,
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=1),
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1, (d2), d3] -> [d1, (d2), d3]
       dict(
@@ -70,41 +74,41 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           elems=[[[1, 2], [3, 4]], [], [[5, 6], [7, 8], [9, 0]]],
           elems_ragged_rank=1,
           expected_ragged_rank=1,
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=1),
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=1),
           expected_output=[[[2, 3], [4, 5]], [], [[6, 7], [8, 9], [10, 1]]],
       ),
       # [d1, (d2)] -> [d1, (d2), (d3)]
       dict(
-          fn=lambda x: ragged.from_row_starts(x, [0]),
+          fn=lambda x: ragged.RaggedTensor.from_row_starts(x, [0]),
           elems=[[1, 2, 3], [4, 5], [6, 7]],
           expected_output=[[[1, 2, 3]], [[4, 5]], [[6, 7]]],
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=2),
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=2),
       ),
       # [d1, (d2), (d3)] -> [d1, (d2), (d3)]
       dict(
-          fn=lambda x: ragged.map_inner_values(mo.add, x, 1),
+          fn=lambda x: ragged.map_flat_values(mo.add, x, 1),
           elems=[[[1, 2, 3]], [[4, 5], [6, 7]]],
           expected_output=[[[2, 3, 4]], [[5, 6], [7, 8]]],
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=2),
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=2),
       ),
       # [d1, (d2), (d3)] -> [d1, (d2)]
       dict(
           fn=lambda x: ragged.reduce_sum(x, axis=1),
           elems=[[[1, 2, 3]], [[4, 5], [6, 7]]],
           expected_output=[[6], [9, 13]],
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=1),
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1, (d2), (d3)] -> [d1, (d3)]
       dict(
           fn=lambda x: ragged.reduce_sum(x, axis=0),
           elems=[[[1, 2, 3]], [[4, 5], [6, 7]]],
           expected_output=[[1, 2, 3], [10, 12]],
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=1),
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1, (d2), (d3)] -> [d1]
       dict(
@@ -118,27 +122,26 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           fn=mo.range,
           elems=[4, 0, 2],
           expected_output=[[0, 1, 2, 3], [], [0, 1]],
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=1),
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1] -> [d1, (d2), (d3)]
       dict(
           fn=lambda x: ragged.range(mo.range(x)),
           elems=[5, 0, 3],
-          expected_output=[
-              [[], [0], [0, 1], [0, 1, 2], [0, 1, 2, 3]], [], [[], [0], [0, 1]]
-          ],
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=2),
+          expected_output=[[[], [0], [0, 1], [0, 1, 2], [0, 1, 2, 3]], [],
+                           [[], [0], [0, 1]]],
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=2),
       ),
       # [d1, (d2), (d3), (d4a), (d5)] ->  [d1, (d2), (d3), (d4b), (d5)]
       dict(
           fn=lambda x: x + np.int64(1),
           elems=[[[[[1, 2, 3]], [[4], [5]]]], [[[[6, 7]]], [[[8], []]]]],
-          expected_output=[[[[[2, 3, 4]], [[5], [6]]]],
-                           [[[[7, 8]]], [[[9], []]]]],
-          result_dtype=ragged.RaggedTensorType(dtype=dtypes.int64,
-                                               ragged_rank=4),
+          expected_output=[[[[[2, 3, 4]], [[5], [6]]]], [[[[7, 8]]], [[[9],
+                                                                       []]]]],
+          result_dtype=ragged.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=4),
       ),
   ])
 
@@ -160,16 +163,12 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
     expected_rt = ragged.constant(
         expected_output, ragged_rank=expected_ragged_rank)
-    with self.test_session():
-      if ragged.is_ragged(expected_output):
-        self.assertEqual(output.ragged_rank, expected_rt.ragged_rank)
-      output_values = self.evaluate(output)
-      self.assertAllEqual(expected_output, output_values.tolist())
+    self.assertRaggedEqual(expected_rt, output)
 
   def testRaggedMapOnStructure(self):
     batman = ragged.constant([[1, 2, 3], [4], [5, 6, 7]])
     # [[10, 20, 30], [40], [50, 60, 70]]
-    robin = ragged.map_inner_values(mo.multiply, batman, 10)
+    robin = ragged.map_flat_values(mo.multiply, batman, 10)
 
     features = {'batman': batman, 'robin': robin}
 
@@ -182,14 +181,13 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         dtype=dtypes.int32,
     )
 
-    with self.test_session():
-      self.assertAllEqual(output.eval().tolist(), [66, 44, 198])
+    self.assertRaggedEqual(output, [66, 44, 198])
 
   # Test mapping over a dict of RTs can produce a dict of RTs.
   def testRaggedMapOnStructure_RaggedOutputs(self):
     batman = ragged.constant([[1, 2, 3], [4], [5, 6, 7]])
     # [[10, 20, 30], [40], [50, 60, 70]]
-    robin = ragged.map_inner_values(mo.multiply, batman, 10)
+    robin = ragged.map_flat_values(mo.multiply, batman, 10)
 
     features = {'batman': batman, 'robin': robin}
 
@@ -211,17 +209,13 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         },
     )
 
-    with self.test_session():
-      self.assertAllEqual(output['batman'].eval().tolist(),
-                          [[2, 3, 4], [5], [6, 7, 8]])
-      self.assertAllEqual(output['robin'].eval().tolist(),
-                          [[11, 21, 31], [41], [51, 61, 71]])
+    self.assertRaggedEqual(output['batman'], [[2, 3, 4], [5], [6, 7, 8]])
+    self.assertRaggedEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
 
   def testZip(self):
     x = ragged.constant([[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]],
                         dtypes.int64)
-    y = array_ops.expand_dims(
-        mo.range(ragged.nrows(x), dtype=dtypes.int64), axis=1)
+    y = array_ops.expand_dims(mo.range(x.nrows(), dtype=dtypes.int64), axis=1)
 
     def _zip(foo):
       y_val, x_val = foo
@@ -233,11 +227,9 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         dtype=ragged.RaggedTensorType(dtype=dtypes.int64, ragged_rank=1),
         infer_shape=False)
 
-    with self.test_session():
-      result = self.evaluate(output).tolist()
-      self.assertAllEqual(
-          result, [[[0, 10], [0, 20]], [[1, 30], [1, 40]], [[2, 50], [2, 60]],
-                   [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
+    self.assertRaggedEqual(
+        output, [[[0, 10], [0, 20]], [[1, 30], [1, 40]], [[2, 50], [2, 60]],
+                 [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
 
   def testBatchGather(self):
     tokens = ragged.constant([['hello', '.', 'there'], ['merhaba'],
@@ -255,10 +247,8 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         dtype=ragged.RaggedTensorType(dtype=dtypes.string, ragged_rank=1),
         infer_shape=False)
 
-    with self.test_session():
-      self.assertAllEqual(
-          self.evaluate(out).tolist(),
-          [[b'hello', b'there'], [b'merhaba'], [b'bonjour', b'ca va']])
+    self.assertRaggedEqual(
+        out, [[b'hello', b'there'], [b'merhaba'], [b'bonjour', b'ca va']])
 
   def testMismatchRaggedRank(self):
     elems = ragged.constant([[[1, 2, 3]], [[4, 5], [6, 7]]])
@@ -272,7 +262,7 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
   def testMismatchRaggedRank2(self):
     elems = ragged.constant([[1, 2, 3], [4, 5], [6, 7]])
-    fn = lambda x: ragged.from_row_starts(x, [0])
+    fn = lambda x: ragged.RaggedTensor.from_row_starts(x, [0])
     with self.assertRaisesWithLiteralMatch(
         ValueError, r'The declared ragged rank (10) mismatches the result (1)'):
       _ = ragged.map_fn(
diff --git a/tensorflow/python/ops/ragged/ragged_map_ops.py b/tensorflow/python/ops/ragged/ragged_map_ops.py
index fafa23b8dc..af40352b1d 100644
--- a/tensorflow/python/ops/ragged/ragged_map_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_map_ops.py
@@ -27,12 +27,11 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variable_scope as vs
-from tensorflow.python.ops.ragged import ragged_array_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
@@ -216,8 +215,8 @@ def map_fn(fn,
         varscope_caching_device_was_none = True
 
     elems_flat = [
-        ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
-            elem, name="elem") for elem in elems_flat
+        ragged_tensor.convert_to_tensor_or_ragged_tensor(elem, name="elem")
+        for elem in elems_flat
     ]
 
     # We can either infer the output, or we can assume that it will be the same
@@ -226,7 +225,7 @@ def map_fn(fn,
 
     # Find the number of iterations, n may be known statically.
     if isinstance(elems_flat[0], ragged_tensor.RaggedTensor):
-      n = ragged_array_ops.nrows(elems_flat[0], out_type=dtypes.int32)
+      n = elems_flat[0].nrows(out_type=dtypes.int32)
     else:
       static_shape = elems_flat[0].shape
       if static_shape.ndims is not None and static_shape.ndims < 1:
@@ -236,7 +235,8 @@ def map_fn(fn,
         else:
           raise ValueError(
               "elements in elems must be 1+ dimensional Tensors, not scalars")
-      n = static_shape[0].value or array_ops.shape(elems_flat[0])[0]
+      n = (tensor_shape.dimension_value(static_shape[0]) or
+           array_ops.shape(elems_flat[0])[0])
 
     # Create a flat list of TAs.
 
@@ -334,7 +334,7 @@ def map_fn(fn,
 class _RaggedTensorComponents(
     collections.namedtuple(
         "_RaggedTensorComponents",
-        ["inner_values", "nested_row_lengths", "outer_row_length"])):
+        ["flat_values", "nested_row_lengths", "outer_row_length"])):
   """A namedtuple of components which represent a `RaggedTensor`.
 
   _RaggedTensorComponents is a list of components which can be used to create a
@@ -344,7 +344,7 @@ class _RaggedTensorComponents(
 
   The following are a list of components for a `RaggedTensor`:
 
-  inner_values: The flat and inner values of a RaggedTensor. This could be
+  flat_values: The flat and inner values of a RaggedTensor. This could be
     a `Tensor`, a `TensorArray`, or a data type.
   nested_row_lengths: a tuple containing the row lengths of each rank. The
     elements of the tuple could be `Tensor`s or `TensorArray`s.
@@ -357,12 +357,12 @@ class _RaggedTensorComponents(
 
 
 def _concat_ragged_tensor_components(rt_ta):
-  inner_values = rt_ta.inner_values.concat()
+  flat_values = rt_ta.flat_values.concat()
   nested_row_lengths = tuple(
       row_lengths_ta.concat() for row_lengths_ta in rt_ta.nested_row_lengths)
   outer_row_length = rt_ta.outer_row_length.concat()
   return _RaggedTensorComponents(
-      inner_values=inner_values,
+      flat_values=flat_values,
       nested_row_lengths=nested_row_lengths,
       outer_row_length=outer_row_length)
 
@@ -374,17 +374,17 @@ def _maybe_decompose_tensor(rt):
 
   # The three component pieces we need:
   # - inner values
-  inner_values = rt.inner_values
+  flat_values = rt.flat_values
 
   # - row_splits of the RT
   splits = rt.nested_row_splits
   nested_row_lengths = tuple(split[1:] - split[:-1] for split in splits)
 
   # - outer row length
-  outer_row_length = array_ops.expand_dims(ragged_array_ops.nrows(rt), axis=0)
+  outer_row_length = array_ops.expand_dims(rt.nrows(), axis=0)
 
   return _RaggedTensorComponents(
-      inner_values=inner_values,
+      flat_values=flat_values,
       nested_row_lengths=nested_row_lengths,
       outer_row_length=outer_row_length,
   )
@@ -395,11 +395,12 @@ def _maybe_recompose_tensor(t):
   if not isinstance(t, _RaggedTensorComponents):
     return t
 
-  values = t.inner_values
+  values = t.flat_values
   nested_row_lengths = tuple(t.nested_row_lengths)
   for nested_row_length in reversed(nested_row_lengths):
-    values = ragged_factory_ops.from_row_lengths(values, nested_row_length)
-  return ragged_factory_ops.from_row_lengths(values, t.outer_row_length)
+    values = ragged_tensor.RaggedTensor.from_row_lengths(
+        values, nested_row_length)
+  return ragged_tensor.RaggedTensor.from_row_lengths(values, t.outer_row_length)
 
 
 def _maybe_decompose_dtype(d):
@@ -408,7 +409,7 @@ def _maybe_decompose_dtype(d):
     return d
 
   result = _RaggedTensorComponents(
-      inner_values=d.dtype,
+      flat_values=d.dtype,
       nested_row_lengths=tuple(dtypes.int64 for i in range(d.ragged_rank - 1)),
       outer_row_length=dtypes.int64,
   )
@@ -435,10 +436,13 @@ def _convert_declared(fn_output_flat, output_declared):
               "The declared ragged rank (%d) mismatches the result (1)" %
               declared.ragged_rank)
 
-        row_length = array_ops.expand_dims(
-            ragged_array_ops.nrows(current), axis=0)
+        if isinstance(current, ragged_tensor.RaggedTensor):
+          nrows = current.nrows()
+        else:
+          nrows = array_ops.shape(current, out_type=dtypes.int64)[0]
+        row_length = array_ops.expand_dims(nrows, axis=0)
         rt = _RaggedTensorComponents(
-            inner_values=current,
+            flat_values=current,
             nested_row_lengths=(),
             outer_row_length=row_length)
         yield rt
diff --git a/tensorflow/python/ops/ragged/ragged_math_ops.py b/tensorflow/python/ops/ragged/ragged_math_ops.py
index d661563a9f..92f82be84a 100644
--- a/tensorflow/python/ops/ragged/ragged_math_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_math_ops.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
+
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
@@ -25,7 +27,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import gen_ragged_math_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_functional_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_util
@@ -97,8 +98,8 @@ def range(starts, limits=None, deltas=1, dtype=None, name=None):
           [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64])
 
     result = gen_ragged_math_ops.ragged_range(starts, limits, deltas, name=name)
-    return ragged_factory_ops.from_row_splits(result.rt_dense_values,
-                                              result.rt_nested_splits)
+    return ragged_tensor.RaggedTensor.from_row_splits(result.rt_dense_values,
+                                                      result.rt_nested_splits)
 
 
 def _infer_matching_dtype(tensors, dtype_hierarchy):
@@ -184,9 +185,8 @@ def _ragged_segment_aggregate(unsorted_segment_op,
 
   with ops.name_scope(name, 'RaggedSegment',
                       [data, segment_ids, num_segments]) as name:
-    data = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
-        data, name='data')
-    segment_ids = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
+    segment_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         segment_ids, name='segment_ids')
 
     if ragged_tensor.is_ragged(segment_ids):
@@ -236,7 +236,8 @@ def _ragged_segment_aggregate(unsorted_segment_op,
     output_values = _ragged_segment_aggregate(unsorted_segment_op, data.values,
                                               data_val_to_out_val_index,
                                               output_splits[-1])
-    return ragged_factory_ops.from_row_splits(output_values, output_splits)
+    return ragged_tensor.RaggedTensor.from_row_splits(output_values,
+                                                      output_splits)
 
 
 def segment_sum(data, segment_ids, num_segments, name=None):
@@ -272,11 +273,11 @@ def segment_mean(data, segment_ids, num_segments, name=None):
   with ops.name_scope(name, 'RaggedSegmentMean',
                       [data, segment_ids, num_segments]):
     total = segment_sum(data, segment_ids, num_segments)
-    ones = ragged_factory_ops.from_nested_row_splits(
-        array_ops.ones_like(data.inner_values), data.nested_row_splits)
+    ones = ragged_tensor.RaggedTensor.from_nested_row_splits(
+        array_ops.ones_like(data.flat_values), data.nested_row_splits)
     count = segment_sum(ones, segment_ids, num_segments)
-    return ragged_factory_ops.from_nested_row_splits(
-        total.inner_values / count.inner_values, total.nested_row_splits)
+    return ragged_tensor.RaggedTensor.from_nested_row_splits(
+        total.flat_values / count.flat_values, total.nested_row_splits)
 
 
 def segment_sqrt_n(data, segment_ids, num_segments, name=None):
@@ -284,11 +285,11 @@ def segment_sqrt_n(data, segment_ids, num_segments, name=None):
   with ops.name_scope(name, 'RaggedSegmentSqrtN',
                       [data, segment_ids, num_segments]):
     total = segment_sum(data, segment_ids, num_segments)
-    ones = ragged_factory_ops.from_nested_row_splits(
-        array_ops.ones_like(data.inner_values), data.nested_row_splits)
+    ones = ragged_tensor.RaggedTensor.from_nested_row_splits(
+        array_ops.ones_like(data.flat_values), data.nested_row_splits)
     count = segment_sum(ones, segment_ids, num_segments)
-    return ragged_factory_ops.from_nested_row_splits(
-        total.inner_values / math_ops.sqrt(count.inner_values),
+    return ragged_tensor.RaggedTensor.from_nested_row_splits(
+        total.flat_values / math_ops.sqrt(count.flat_values),
         total.nested_row_splits)
 
 
@@ -440,10 +441,12 @@ def _ragged_reduce_aggregate(reduce_op,
     axis = tensor_util.constant_value(axis)
     if axis is None:
       raise ValueError('axis must be known at graph construction time.')
+    if isinstance(axis, np.ndarray):
+      axis = axis.tolist()
 
   # When reducing all axes, just ignore splits & reduce the inner values.
   if axis is None:
-    return reduce_op(rt_input.inner_values, None, name=name)
+    return reduce_op(rt_input.flat_values, None, name=name)
 
   with ops.name_scope(name, 'RaggedReduce', [rt_input, axis]):
     if isinstance(axis, (tuple, list)):
@@ -464,7 +467,7 @@ def _ragged_reduce_aggregate(reduce_op,
 
     axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims)
 
-    rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(
+    rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         rt_input, name='rt_input')
 
     if axis == 0:
@@ -522,22 +525,22 @@ def reduce_mean(input_tensor, axis=None, keepdims=None, name=None):
   with ops.name_scope(name, 'RaggedReduceMean', [input_tensor, axis]):
     total = reduce_sum(input_tensor, axis, keepdims)
     if ragged_tensor.is_ragged(input_tensor):
-      ones = ragged_factory_ops.from_nested_row_splits(
-          array_ops.ones_like(input_tensor.inner_values),
+      ones = ragged_tensor.RaggedTensor.from_nested_row_splits(
+          array_ops.ones_like(input_tensor.flat_values),
           input_tensor.nested_row_splits)
     else:
       ones = array_ops.ones_like(input_tensor)
     count = reduce_sum(ones, axis, keepdims)
     if ragged_tensor.is_ragged(total):
-      return ragged_factory_ops.from_nested_row_splits(
-          total.inner_values / count.inner_values, total.nested_row_splits)
+      return ragged_tensor.RaggedTensor.from_nested_row_splits(
+          total.flat_values / count.flat_values, total.nested_row_splits)
     else:
       return total / count
 
 
 def _cast(input_tensor, dtype):
-  return ragged_functional_ops.map_inner_values(math_ops.cast, input_tensor,
-                                                dtype)
+  return ragged_functional_ops.map_flat_values(math_ops.cast, input_tensor,
+                                               dtype)
 
 
 def reduce_all(input_tensor, axis=None, keepdims=None, name=None):
diff --git a/tensorflow/python/ops/ragged/ragged_operators_test.py b/tensorflow/python/ops/ragged/ragged_operators_test.py
index c1223db307..78bb37c341 100644
--- a/tensorflow/python/ops/ragged/ragged_operators_test.py
+++ b/tensorflow/python/ops/ragged/ragged_operators_test.py
@@ -20,79 +20,71 @@ from __future__ import print_function
 
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-@test_util.run_v1_only('b/120545219')
-class RaggedElementwiseOpsTest(test_util.TensorFlowTestCase):
-  # @TODO(edloper): Test right-handed versions of operators once we add
-  # broadcasting support for elementwise ops.
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase):
 
   def testOrderingOperators(self):
     x = ragged.constant([[1, 5], [3]])
     y = ragged.constant([[4, 5], [1]])
-    with self.test_session():
-      self.assertEqual((x > y).eval().tolist(), [[False, False], [True]])
-      self.assertEqual((x >= y).eval().tolist(), [[False, True], [True]])
-      self.assertEqual((x < y).eval().tolist(), [[True, False], [False]])
-      self.assertEqual((x <= y).eval().tolist(), [[True, True], [False]])
-
-  def assertEqual(self, a, b):
-    if a != b:
-      print('%30s %s' % (b, a))
+    self.assertRaggedEqual((x > y), [[False, False], [True]])
+    self.assertRaggedEqual((x >= y), [[False, True], [True]])
+    self.assertRaggedEqual((x < y), [[True, False], [False]])
+    self.assertRaggedEqual((x <= y), [[True, True], [False]])
 
   def testArithmeticOperators(self):
     x = ragged.constant([[1.0, -2.0], [8.0]])
     y = ragged.constant([[4.0, 4.0], [2.0]])
-    with self.test_session():
-      self.assertEqual(abs(x).eval().tolist(), [[1.0, 2.0], [8.0]])
+    self.assertRaggedEqual(abs(x), [[1.0, 2.0], [8.0]])
 
-      self.assertEqual((-x).eval().tolist(), [[-1.0, 2.0], [-8.0]])
+    self.assertRaggedEqual((-x), [[-1.0, 2.0], [-8.0]])
 
-      self.assertEqual((x + y).eval().tolist(), [[5.0, 2.0], [10.0]])
-      self.assertEqual((3.0 + y).eval().tolist(), [[7.0, 7.0], [5.0]])
-      self.assertEqual((x + 3.0).eval().tolist(), [[4.0, 1.0], [11.0]])
+    self.assertRaggedEqual((x + y), [[5.0, 2.0], [10.0]])
+    self.assertRaggedEqual((3.0 + y), [[7.0, 7.0], [5.0]])
+    self.assertRaggedEqual((x + 3.0), [[4.0, 1.0], [11.0]])
 
-      self.assertEqual((x - y).eval().tolist(), [[-3.0, -6.0], [6.0]])
-      self.assertEqual((3.0 - y).eval().tolist(), [[-1.0, -1.0], [1.0]])
-      self.assertEqual((x + 3.0).eval().tolist(), [[4.0, 1.0], [11.0]])
+    self.assertRaggedEqual((x - y), [[-3.0, -6.0], [6.0]])
+    self.assertRaggedEqual((3.0 - y), [[-1.0, -1.0], [1.0]])
+    self.assertRaggedEqual((x + 3.0), [[4.0, 1.0], [11.0]])
 
-      self.assertEqual((x * y).eval().tolist(), [[4.0, -8.0], [16.0]])
-      self.assertEqual((3.0 * y).eval().tolist(), [[12.0, 12.0], [6.0]])
-      self.assertEqual((x * 3.0).eval().tolist(), [[3.0, -6.0], [24.0]])
+    self.assertRaggedEqual((x * y), [[4.0, -8.0], [16.0]])
+    self.assertRaggedEqual((3.0 * y), [[12.0, 12.0], [6.0]])
+    self.assertRaggedEqual((x * 3.0), [[3.0, -6.0], [24.0]])
 
-      self.assertEqual((x / y).eval().tolist(), [[0.25, -0.5], [4.0]])
-      self.assertEqual((y / x).eval().tolist(), [[4.0, -2.0], [0.25]])
-      self.assertEqual((2.0 / y).eval().tolist(), [[0.5, 0.5], [1.0]])
-      self.assertEqual((x / 2.0).eval().tolist(), [[0.5, -1.0], [4.0]])
+    self.assertRaggedEqual((x / y), [[0.25, -0.5], [4.0]])
+    self.assertRaggedEqual((y / x), [[4.0, -2.0], [0.25]])
+    self.assertRaggedEqual((2.0 / y), [[0.5, 0.5], [1.0]])
+    self.assertRaggedEqual((x / 2.0), [[0.5, -1.0], [4.0]])
 
-      self.assertEqual((x // y).eval().tolist(), [[0.0, -1.0], [4.0]])
-      self.assertEqual((y // x).eval().tolist(), [[4.0, -2.0], [0.0]])
-      self.assertEqual((2.0 // y).eval().tolist(), [[0.0, 0.0], [1.0]])
-      self.assertEqual((x // 2.0).eval().tolist(), [[0.0, -1.0], [4.0]])
+    self.assertRaggedEqual((x // y), [[0.0, -1.0], [4.0]])
+    self.assertRaggedEqual((y // x), [[4.0, -2.0], [0.0]])
+    self.assertRaggedEqual((2.0 // y), [[0.0, 0.0], [1.0]])
+    self.assertRaggedEqual((x // 2.0), [[0.0, -1.0], [4.0]])
 
-      self.assertEqual((x % y).eval().tolist(), [[1.0, 2.0], [0.0]])
-      self.assertEqual((y % x).eval().tolist(), [[0.0, -0.0], [2.0]])
-      self.assertEqual((2.0 % y).eval().tolist(), [[2.0, 2.0], [0.0]])
-      self.assertEqual((x % 2.0).eval().tolist(), [[1.0, 0.0], [0.0]])
+    self.assertRaggedEqual((x % y), [[1.0, 2.0], [0.0]])
+    self.assertRaggedEqual((y % x), [[0.0, -0.0], [2.0]])
+    self.assertRaggedEqual((2.0 % y), [[2.0, 2.0], [0.0]])
+    self.assertRaggedEqual((x % 2.0), [[1.0, 0.0], [0.0]])
 
   def testLogicalOperators(self):
     a = ragged.constant([[True, True], [False]])
     b = ragged.constant([[True, False], [False]])
-    with self.test_session():
-      self.assertEqual((~a).eval().tolist(), [[False, False], [True]])
+    self.assertRaggedEqual((~a), [[False, False], [True]])
 
-      self.assertEqual((a & b).eval().tolist(), [[True, False], [False]])
-      self.assertEqual((a & True).eval().tolist(), [[True, True], [False]])
-      self.assertEqual((True & b).eval().tolist(), [[True, False], [False]])
+    self.assertRaggedEqual((a & b), [[True, False], [False]])
+    self.assertRaggedEqual((a & True), [[True, True], [False]])
+    self.assertRaggedEqual((True & b), [[True, False], [False]])
 
-      self.assertEqual((a | b).eval().tolist(), [[True, True], [False]])
-      self.assertEqual((a | False).eval().tolist(), [[True, True], [False]])
-      self.assertEqual((False | b).eval().tolist(), [[True, False], [False]])
+    self.assertRaggedEqual((a | b), [[True, True], [False]])
+    self.assertRaggedEqual((a | False), [[True, True], [False]])
+    self.assertRaggedEqual((False | b), [[True, False], [False]])
 
-      self.assertEqual((a ^ b).eval().tolist(), [[False, True], [False]])
-      self.assertEqual((a ^ True).eval().tolist(), [[False, False], [True]])
-      self.assertEqual((True ^ b).eval().tolist(), [[False, True], [True]])
+    self.assertRaggedEqual((a ^ b), [[False, True], [False]])
+    self.assertRaggedEqual((a ^ True), [[False, False], [True]])
+    self.assertRaggedEqual((True ^ b), [[False, True], [True]])
 
   def testDummyOperators(self):
     a = ragged.constant([[True, True], [False]])
diff --git a/tensorflow/python/ops/ragged/ragged_range_op_test.py b/tensorflow/python/ops/ragged/ragged_range_op_test.py
index 644423ecb7..5ab3d4abc3 100644
--- a/tensorflow/python/ops/ragged/ragged_range_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_range_op_test.py
@@ -21,111 +21,102 @@ from __future__ import print_function
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedRangeOpTest(test_util.TensorFlowTestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedRangeOpTest(ragged_test_util.RaggedTensorTestCase):
 
-  @test_util.run_deprecated_v1
   def testDocStringExamples(self):
     """Examples from ragged_range.__doc__."""
-    with self.test_session():
-      rt1 = ragged.range([3, 5, 2]).eval().tolist()
-      self.assertEqual(rt1, [[0, 1, 2], [0, 1, 2, 3, 4], [0, 1]])
+    rt1 = ragged.range([3, 5, 2])
+    self.assertRaggedEqual(rt1, [[0, 1, 2], [0, 1, 2, 3, 4], [0, 1]])
 
-      rt2 = ragged.range([0, 5, 8], [3, 3, 12]).eval().tolist()
-      self.assertEqual(rt2, [[0, 1, 2], [], [8, 9, 10, 11]])
+    rt2 = ragged.range([0, 5, 8], [3, 3, 12])
+    self.assertRaggedEqual(rt2, [[0, 1, 2], [], [8, 9, 10, 11]])
 
-      rt3 = ragged.range([0, 5, 8], [3, 3, 12], 2).eval().tolist()
-      self.assertEqual(rt3, [[0, 2], [], [8, 10]])
+    rt3 = ragged.range([0, 5, 8], [3, 3, 12], 2)
+    self.assertRaggedEqual(rt3, [[0, 2], [], [8, 10]])
 
-  @test_util.run_deprecated_v1
   def testBasicRanges(self):
-    with self.test_session():
-      # Specify limits only.
-      self.assertEqual(
-          ragged.range([0, 3, 5]).eval().tolist(),
-          [list(range(0)), list(range(3)), list(range(5))])
-
-      # Specify starts and limits.
-      self.assertEqual(
-          ragged.range([0, 3, 5], [2, 3, 10]).eval().tolist(),
-          [list(range(0, 2)), list(range(3, 3)), list(range(5, 10))])
-
-      # Specify starts, limits, and deltas.
-      self.assertEqual(
-          ragged.range([0, 3, 5], [4, 4, 15], [2, 3, 4]).eval().tolist(),
-          [list(range(0, 4, 2)), list(range(3, 4, 3)),
-           list(range(5, 15, 4))])
-
-  @test_util.run_deprecated_v1
+    # Specify limits only.
+    self.assertRaggedEqual(
+        ragged.range([0, 3, 5]),
+        [list(range(0)), list(range(3)),
+         list(range(5))])
+
+    # Specify starts and limits.
+    self.assertRaggedEqual(
+        ragged.range([0, 3, 5], [2, 3, 10]),
+        [list(range(0, 2)),
+         list(range(3, 3)),
+         list(range(5, 10))])
+
+    # Specify starts, limits, and deltas.
+    self.assertRaggedEqual(
+        ragged.range([0, 3, 5], [4, 4, 15], [2, 3, 4]),
+        [list(range(0, 4, 2)),
+         list(range(3, 4, 3)),
+         list(range(5, 15, 4))])
+
   def testFloatRanges(self):
-    with self.test_session():
-      expected = [[0.0, 0.4, 0.8, 1.2, 1.6, 2.0, 2.4, 2.8, 3.2, 3.6], [3.0],
-                  [5.0, 7.2, 9.4, 11.6, 13.8]]
-      actual = ragged.range([0.0, 3.0, 5.0], [3.9, 4.0, 15.0],
-                            [0.4, 1.5, 2.2]).eval().tolist()
-      self.assertEqual(expected, [[round(v, 5) for v in row] for row in actual])
-
-  @test_util.run_deprecated_v1
+    expected = [[0.0, 0.4, 0.8, 1.2, 1.6, 2.0, 2.4, 2.8, 3.2, 3.6], [3.0],
+                [5.0, 7.2, 9.4, 11.6, 13.8]]
+    actual = ragged.range([0.0, 3.0, 5.0], [3.9, 4.0, 15.0], [0.4, 1.5, 2.2])
+    self.assertEqual(
+        expected,
+        [[round(v, 5) for v in row] for row in self.eval_to_list(actual)])
+
   def testNegativeDeltas(self):
-    with self.test_session():
-      self.assertEqual(
-          ragged.range([0, 3, 5], limits=0, deltas=-1).eval().tolist(),
-          [list(range(0, 0, -1)), list(range(3, 0, -1)),
-           list(range(5, 0, -1))])
-
-      self.assertEqual(
-          ragged.range([0, -3, 5], limits=0, deltas=[-1, 1,
-                                                     -2]).eval().tolist(),
-          [list(range(0, 0, -1)), list(range(-3, 0, 1)),
-           list(range(5, 0, -2))])
-
-  @test_util.run_deprecated_v1
+    self.assertRaggedEqual(
+        ragged.range([0, 3, 5], limits=0, deltas=-1),
+        [list(range(0, 0, -1)),
+         list(range(3, 0, -1)),
+         list(range(5, 0, -1))])
+
+    self.assertRaggedEqual(
+        ragged.range([0, -3, 5], limits=0, deltas=[-1, 1, -2]),
+        [list(range(0, 0, -1)),
+         list(range(-3, 0, 1)),
+         list(range(5, 0, -2))])
+
   def testBroadcast(self):
-    with self.test_session():
-      # Specify starts and limits, broadcast deltas.
-      self.assertEqual(
-          ragged.range([0, 3, 5], [4, 4, 15], 3).eval().tolist(),
-          [list(range(0, 4, 3)), list(range(3, 4, 3)),
-           list(range(5, 15, 3))])
-
-      # Broadcast all arguments.
-      self.assertEqual(
-          ragged.range(0, 5, 1).eval().tolist(), [list(range(0, 5, 1))])
-
-  @test_util.run_deprecated_v1
+    # Specify starts and limits, broadcast deltas.
+    self.assertRaggedEqual(
+        ragged.range([0, 3, 5], [4, 4, 15], 3),
+        [list(range(0, 4, 3)),
+         list(range(3, 4, 3)),
+         list(range(5, 15, 3))])
+
+    # Broadcast all arguments.
+    self.assertRaggedEqual(ragged.range(0, 5, 1), [list(range(0, 5, 1))])
+
   def testEmptyRanges(self):
     rt1 = ragged.range([0, 5, 3], [0, 3, 5])
     rt2 = ragged.range([0, 5, 5], [0, 3, 5], -1)
-    with self.test_session():
-      self.assertEqual(rt1.eval().tolist(), [[], [], [3, 4]])
-      self.assertEqual(rt2.eval().tolist(), [[], [5, 4], []])
+    self.assertRaggedEqual(rt1, [[], [], [3, 4]])
+    self.assertRaggedEqual(rt2, [[], [5, 4], []])
 
-  @test_util.run_deprecated_v1
   def testShapeFnErrors(self):
-    with self.test_session():
-      self.assertRaisesRegexp(ValueError, r'Shape must be at most rank 1.*',
-                              ragged.range, [[0]], 5)
-      self.assertRaisesRegexp(ValueError, r'Shape must be at most rank 1.*',
-                              ragged.range, 0, [[5]])
-      self.assertRaisesRegexp(ValueError, r'Shape must be at most rank 1.*',
-                              ragged.range, 0, 5, [[0]])
-      self.assertRaisesRegexp(ValueError, r'Dimensions must be equal.*',
-                              ragged.range, [0], [1, 2])
-
-  @test_util.run_deprecated_v1
+    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
+                      [[0]], 5)
+    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
+                      0, [[5]])
+    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
+                      0, 5, [[0]])
+    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
+                      [0], [1, 2])
+
   def testKernelErrors(self):
-    with self.test_session():
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              r'Requires delta != 0',
-                              ragged.range(0, 0, 0).eval)
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 r'Requires delta != 0'):
+      self.evaluate(ragged.range(0, 0, 0))
 
-  @test_util.run_deprecated_v1
   def testShape(self):
-    self.assertEqual(ragged.range(0, 0, 0).shape.as_list(), [1, None])
-    self.assertEqual(ragged.range([1, 2, 3]).shape.as_list(), [3, None])
-    self.assertEqual(
+    self.assertRaggedEqual(ragged.range(0, 0, 1).shape.as_list(), [1, None])
+    self.assertRaggedEqual(ragged.range([1, 2, 3]).shape.as_list(), [3, None])
+    self.assertRaggedEqual(
         ragged.range([1, 2, 3], [4, 5, 6]).shape.as_list(), [3, None])
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_reduce_op_test.py b/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
index 2e4db2a423..890460221b 100644
--- a/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
@@ -21,11 +21,13 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 _MAX_INT32 = dtypes.int32.max
@@ -37,7 +39,9 @@ def mean(*values):
   return 1.0 * sum(values) / len(values)
 
 
-class RaggedReduceOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedReduceOpsTest(ragged_test_util.RaggedTensorTestCase,
+                          parameterized.TestCase):
 
   @parameterized.parameters(
       #=========================================================================
@@ -300,19 +304,16 @@ class RaggedReduceOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=2,
           expected=[[mean(1, 2), mean(3, 4, 5)], [mean(6, 7), 8], [9]]),
   )
-  @test_util.run_v1_only('b/120545219')
   def testReduce(self, ragged_reduce_op, rt_input, axis, expected):
     rt_input = ragged.constant(rt_input)
     reduced = ragged_reduce_op(rt_input, axis)
-    with self.test_session():
-      self.assertEqual(reduced.eval().tolist(), expected)
+    self.assertRaggedEqual(reduced, expected)
 
   def assertEqualWithNan(self, actual, expected):
     """Like assertEqual, but NaN==NaN."""
     self.assertTrue(
         ((actual == expected) | (np.isnan(actual) & np.isnan(expected))).all())
 
-  @test_util.run_deprecated_v1
   def testMeanNan(self):
     rt_as_list = [[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]]
     expected = (
@@ -320,24 +321,22 @@ class RaggedReduceOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
             [4, 1, 0, 2, 1, 2]))
     rt_input = ragged.constant(rt_as_list)
     reduced = ragged.reduce_mean(rt_input, axis=1)
-    with self.test_session():
-      self.assertEqualWithNan(reduced.eval(), expected)
+    self.assertEqualWithNan(self.evaluate(reduced), expected)
 
-  @test_util.run_deprecated_v1
   def testMeanWithTensorInputs(self):
     tensor = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
     expected = [2.0, 20.0]
     reduced = ragged.reduce_mean(tensor, axis=1)
-    with self.test_session():
-      self.assertAllEqual(reduced.eval(), expected)
+    self.assertRaggedEqual(reduced, expected)
 
-  @test_util.run_deprecated_v1
   def testErrors(self):
     rt_input = ragged.constant([[1, 2, 3], [4, 5]])
     axis = array_ops.placeholder_with_default(constant_op.constant([0]), None)
-    self.assertRaisesRegexp(ValueError,
-                            r'axis must be known at graph construction time.',
-                            ragged.reduce_sum, rt_input, axis)
+
+    if not context.executing_eagerly():
+      self.assertRaisesRegexp(
+          ValueError, r'axis must be known at graph construction time.',
+          ragged.reduce_sum, rt_input, axis)
     self.assertRaisesRegexp(TypeError,
                             r'axis must be an int; got str.*',
                             ragged.reduce_sum, rt_input, ['x'])
diff --git a/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py b/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
index 9f4aa1b136..15112d6c9c 100644
--- a/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
@@ -20,12 +20,16 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedRowLengthsOp(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedRowLengthsOp(ragged_test_util.RaggedTensorTestCase,
+                         parameterized.TestCase):
 
   @parameterized.parameters([
       # Docstring Example
@@ -37,24 +41,6 @@ class RaggedRowLengthsOp(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=2,
           expected=[[3, 1], [], [2, 1], [1], []]),
 
-      # 1D tensor
-      dict(
-          rt_input=[1, 2, 3, 4, 5],
-          ragged_rank=0,
-          axis=0,
-          expected=5),
-
-      # 2D Tensor (0 ragged dimensions)
-      dict(
-          rt_input=[[1, 2], [3, 4], [5, 6], [7, 8]],
-          ragged_rank=0,
-          expected=[2, 2, 2, 2]),
-      dict(
-          rt_input=[[1, 2], [3, 4], [5, 6], [7, 8]],
-          ragged_rank=0,
-          axis=0,
-          expected=4),
-
       # 2D Tensor (1 ragged dimension)
       dict(
           rt_input=[['a'], ['b', 'c', 'd'], ['e'], [], ['f']],
@@ -79,24 +65,6 @@ class RaggedRowLengthsOp(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=0,
           expected=0),
 
-      # 3D Tensor (0 ragged dimensions)
-      dict(
-          rt_input=[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]],
-          ragged_rank=0,
-          axis=0,
-          expected=2),
-      dict(
-          rt_input=[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]],
-          ragged_rank=0,
-          axis=1,
-          expected=[3, 3]),
-      dict(
-          rt_input=[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]],
-          ragged_rank=0,
-          axis=2,
-          expected=[[2, 2, 2], [2, 2, 2]],
-          expected_ragged_rank=0),
-
       # 3D Tensor (1 ragged dimension)
       dict(
           rt_input=[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10]]],
@@ -143,7 +111,6 @@ class RaggedRowLengthsOp(test_util.TensorFlowTestCase, parameterized.TestCase):
           expected=[[2, 3, 0], [4, 1]],
           expected_ragged_rank=1),
   ])  # pyformat: disable
-  @test_util.run_v1_only('b/120545219')
   def testRowLengths(self,
                      rt_input,
                      expected,
@@ -151,34 +118,28 @@ class RaggedRowLengthsOp(test_util.TensorFlowTestCase, parameterized.TestCase):
                      ragged_rank=None,
                      expected_ragged_rank=None):
     rt = ragged.constant(rt_input, ragged_rank=ragged_rank)
-    lengths = ragged.row_lengths(rt, axis)
-    with self.test_session():
-      self.assertEqual(lengths.eval().tolist(), expected)
-      if expected_ragged_rank is not None:
-        if isinstance(lengths, ragged.RaggedTensor):
-          self.assertEqual(lengths.ragged_rank, expected_ragged_rank)
-        else:
-          self.assertEqual(0, expected_ragged_rank)
+    lengths = rt.row_lengths(axis)
+    self.assertRaggedEqual(lengths, expected)
+    if expected_ragged_rank is not None:
+      if isinstance(lengths, ragged.RaggedTensor):
+        self.assertEqual(lengths.ragged_rank, expected_ragged_rank)
+      else:
+        self.assertEqual(0, expected_ragged_rank)
 
   @parameterized.parameters([
-      dict(
-          rt_input=10,
-          exception=ValueError,
-          message='rt_input may not be a scalar.'),
-      dict(
-          rt_input=[10, 20],
-          axis=1,
-          exception=ValueError,
-          message='axis=1 out of bounds: expected -1<=axis<1.'),
-      dict(
+      dict(  # axis=2 out of bounds: expected -2<=axis<2.
+          rt_input=[[10, 20], [30]],
+          axis=2,
+          exception=(ValueError, errors.InvalidArgumentError)),
+      dict(  # axis=-3 out of bounds: expected -2<=axis<2.
           rt_input=[[2, 3, 0], [4, 1, 2]],
           axis=-3,
-          exception=ValueError,
-          message='axis=-3 out of bounds: expected -2<=axis<2.'),
+          exception=(ValueError, errors.InvalidArgumentError)),
   ])
-  def testErrors(self, rt_input, exception, message, axis=1):
+  def testErrors(self, rt_input, exception, message=None, axis=1):
+    rt = ragged.constant(rt_input)
     with self.assertRaisesRegexp(exception, message):
-      ragged.row_lengths(rt_input, axis)
+      rt.row_lengths(axis)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py b/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py
index 7f5f4e91bd..2970540f3e 100644
--- a/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py
@@ -21,27 +21,24 @@ from __future__ import print_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedSplitsToSegmentIdsOpTest(test_util.TensorFlowTestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedSplitsToSegmentIdsOpTest(ragged_test_util.RaggedTensorTestCase):
 
-  @test_util.run_deprecated_v1
   def testDocStringExample(self):
     splits = [0, 3, 3, 5, 6, 9]
     expected = [0, 0, 0, 2, 2, 3, 4, 4, 4]
     segment_ids = ragged.row_splits_to_segment_ids(splits)
-    with self.test_session():
-      self.assertEqual(segment_ids.eval().tolist(), expected)
+    self.assertAllEqual(segment_ids, expected)
 
-  @test_util.run_deprecated_v1
   def testEmptySplits(self):
     # Note: the splits for an empty ragged tensor contains a single zero.
     segment_ids = ragged.row_splits_to_segment_ids([0])
-    with self.test_session():
-      self.assertEqual(segment_ids.eval().tolist(), [])
+    self.assertAllEqual(segment_ids, [])
 
-  @test_util.run_deprecated_v1
   def testErrors(self):
     self.assertRaisesRegexp(ValueError, r'Invalid row_splits: \[\]',
                             ragged.row_splits_to_segment_ids, [])
diff --git a/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py b/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py
index 7e52f2d844..4ed9626767 100644
--- a/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py
@@ -21,25 +21,23 @@ from __future__ import print_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedSplitsToSegmentIdsOpTest(test_util.TensorFlowTestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedSplitsToSegmentIdsOpTest(ragged_test_util.RaggedTensorTestCase):
 
-  @test_util.run_deprecated_v1
   def testDocStringExample(self):
     segment_ids = [0, 0, 0, 2, 2, 3, 4, 4, 4]
     expected = [0, 3, 3, 5, 6, 9]
     splits = ragged.segment_ids_to_row_splits(segment_ids)
-    with self.test_session():
-      self.assertEqual(splits.eval().tolist(), expected)
+    self.assertAllEqual(splits, expected)
 
-  @test_util.run_deprecated_v1
   def testEmptySegmentIds(self):
     # Note: the splits for an empty ragged tensor contains a single zero.
     segment_ids = ragged.segment_ids_to_row_splits([])
-    with self.test_session():
-      self.assertEqual(segment_ids.eval().tolist(), [0])
+    self.assertAllEqual(segment_ids, [0])
 
   def testErrors(self):
     self.assertRaisesRegexp(TypeError,
@@ -51,16 +49,13 @@ class RaggedSplitsToSegmentIdsOpTest(test_util.TensorFlowTestCase):
     self.assertRaisesRegexp(ValueError, r'Shape \(1, 1\) must have rank 1',
                             ragged.segment_ids_to_row_splits, [[0]])
 
-  @test_util.run_deprecated_v1
   def testNumSegments(self):
     segment_ids = [0, 0, 0, 2, 2, 3, 4, 4, 4]
     num_segments = 7
     expected = [0, 3, 3, 5, 6, 9, 9, 9]
     splits = ragged.segment_ids_to_row_splits(segment_ids, num_segments)
-    with self.test_session():
-      self.assertEqual(splits.eval().tolist(), expected)
+    self.assertAllEqual(splits, expected)
 
-  @test_util.run_deprecated_v1
   def testUnsortedSegmentIds(self):
     # Segment ids are not required to be sorted.
     segment_ids = [0, 4, 3, 2, 4, 4, 2, 0, 0]
@@ -69,9 +64,8 @@ class RaggedSplitsToSegmentIdsOpTest(test_util.TensorFlowTestCase):
 
     splits2 = ragged.segment_ids_to_row_splits(segment_ids, 7)
     expected2 = [0, 3, 3, 5, 6, 9, 9, 9]
-    with self.test_session():
-      self.assertEqual(splits1.eval().tolist(), expected1)
-      self.assertEqual(splits2.eval().tolist(), expected2)
+    self.assertAllEqual(splits1, expected1)
+    self.assertAllEqual(splits2, expected2)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_segment_op_test.py b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
index 52fe739342..be1f39afef 100644
--- a/tensorflow/python/ops/ragged/ragged_segment_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
@@ -26,6 +26,7 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
@@ -45,21 +46,10 @@ def sqrt_n(values):
   return 1.0 * sum(values) / math.sqrt(len(values))
 
 
-class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedSegmentOpsTest(ragged_test_util.RaggedTensorTestCase,
                            parameterized.TestCase):
 
-  def assertNestedListAmostEqual(self, lhs, rhs, places=7, context='value'):
-    self.assertEqual(type(lhs), type(rhs))
-    if isinstance(lhs, (list, tuple)):
-      self.assertEqual(len(lhs), len(rhs), 'Length differs for %s' % context)
-      for i in range(len(lhs)):
-        self.assertNestedListAmostEqual(lhs[i], rhs[i], places,
-                                        '%s[%s]' % (context, i))
-    else:
-      self.assertAlmostEqual(
-          lhs, rhs, places,
-          '%s != %s within %s places at %s' % (lhs, rhs, places, context))
-
   def expected_value(self, data, segment_ids, num_segments, combiner):
     """Find the expected value for a call to ragged_segment_<aggregate>.
 
@@ -110,7 +100,6 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
       (ragged.segment_mean, mean, [5, 4, 3, 2, 1, 0]),
       (ragged.segment_mean, mean, [0, 0, 0, 10, 10, 10]),
   )
-  @test_util.run_deprecated_v1
   def testRaggedSegment_Int(self, segment_op, combiner, segment_ids):
     rt_as_list = [[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]]
     rt = ragged.constant(rt_as_list)
@@ -119,7 +108,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
                                    combiner)
 
     segmented = segment_op(rt, segment_ids, num_segments)
-    self.assertListEqual(self.evaluate(segmented).tolist(), expected)
+    self.assertRaggedEqual(segmented, expected)
 
   @parameterized.parameters(
       (ragged.segment_sum, sum, [0, 0, 1, 1, 2, 2]),
@@ -147,7 +136,6 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
       (ragged.segment_sqrt_n, sqrt_n, [5, 4, 3, 2, 1, 0]),
       (ragged.segment_sqrt_n, sqrt_n, [0, 0, 0, 10, 10, 10]),
   )
-  @test_util.run_deprecated_v1
   def testRaggedSegment_Float(self, segment_op, combiner, segment_ids):
     rt_as_list = [[0., 1., 2., 3.], [4.], [], [5., 6.], [7.], [8., 9.]]
     rt = ragged.constant(rt_as_list)
@@ -156,10 +144,8 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
                                    combiner)
 
     segmented = segment_op(rt, segment_ids, num_segments)
-    self.assertNestedListAmostEqual(
-        self.evaluate(segmented).tolist(), expected, places=5)
+    self.assertRaggedAlmostEqual(segmented, expected, places=5)
 
-  @test_util.run_deprecated_v1
   def testRaggedRankTwo(self):
     rt = ragged.constant([
         [[111, 112, 113, 114], [121],],  # row 0
@@ -173,16 +159,15 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
                  [],                                # row 1
                  [[411, 412], [321, 322], [331]]    # row 2
                 ]  # pyformat: disable
-    self.assertEqual(self.evaluate(segmented1).tolist(), expected1)
+    self.assertRaggedEqual(segmented1, expected1)
 
     segment_ids2 = [1, 2, 1, 1]
     segmented2 = ragged.segment_sum(rt, segment_ids2, 3)
     expected2 = [[],
                  [[111+411, 112+412, 113, 114], [121+321, 322], [331]],
                  []]  # pyformat: disable
-    self.assertEqual(self.evaluate(segmented2).tolist(), expected2)
+    self.assertRaggedEqual(segmented2, expected2)
 
-  @test_util.run_v1_only('b/120545219')
   def testRaggedSegmentIds(self):
     rt = ragged.constant([
         [[111, 112, 113, 114], [121],],  # row 0
@@ -195,7 +180,7 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
     expected = [[],
                 [111+321, 112+322, 113, 114],
                 [121+331+411, 412]]  # pyformat: disable
-    self.assertEqual(self.evaluate(segmented).tolist(), expected)
+    self.assertRaggedEqual(segmented, expected)
 
   def testShapeMismatchError1(self):
     dt = constant_op.constant([1, 2, 3, 4, 5, 6])
@@ -205,7 +190,6 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
         'but segment_ids is ragged and data is not.', ragged.segment_sum, dt,
         segment_ids, 3)
 
-  @test_util.run_v1_only('b/120545219')
   def testShapeMismatchError2(self):
     rt = ragged.constant([
         [[111, 112, 113, 114], [121]],  # row 0
@@ -222,14 +206,13 @@ class RaggedSegmentOpsTest(test_util.TensorFlowTestCase,
         ragged.segment_sum, rt, segment_ids, 3)
 
     # Otherwise, error is raised when we run the graph.
-    segment_ids2 = ragged.from_row_splits(
+    segment_ids2 = ragged.RaggedTensor.from_row_splits(
         array_ops.placeholder_with_default(segment_ids.values, None),
         array_ops.placeholder_with_default(segment_ids.row_splits, None))
-    segmented2 = ragged.segment_sum(rt, segment_ids2, 3)
-    with self.cached_session():
-      self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          'segment_ids.shape must be a prefix of data.shape.*', segmented2.eval)
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError,
+        'segment_ids.shape must be a prefix of data.shape.*'):
+      self.evaluate(ragged.segment_sum(rt, segment_ids2, 3))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_stack_op_test.py b/tensorflow/python/ops/ragged/ragged_stack_op_test.py
index ed07aca6d4..17d80b5aad 100644
--- a/tensorflow/python/ops/ragged/ragged_stack_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_stack_op_test.py
@@ -23,10 +23,13 @@ from absl.testing import parameterized
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedStackOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedStackOpTest(ragged_test_util.RaggedTensorTestCase,
+                        parameterized.TestCase):
 
   @parameterized.parameters(
       dict(
@@ -265,7 +268,6 @@ class RaggedStackOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           axis=0,
           expected=[[[b'a00', b'a01'], [], [b'a20', b'a21']]]),
   )   # pyformat: disable
-  @test_util.run_v1_only('b/120545219')
   def testRaggedStack(self,
                       descr,
                       rt_inputs,
@@ -286,8 +288,7 @@ class RaggedStackOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       self.assertEqual(stacked.ragged_rank, expected_ragged_rank)
     if expected_shape is not None:
       self.assertEqual(stacked.shape.as_list(), expected_shape)
-    with self.test_session():
-      self.assertEqual(stacked.eval().tolist(), expected)
+    self.assertRaggedEqual(stacked, expected)
 
   @parameterized.parameters(
       dict(
@@ -314,7 +315,6 @@ class RaggedStackOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
   def testError(self, rt_inputs, axis, error, message):
     self.assertRaisesRegexp(error, message, ragged.stack, rt_inputs, axis)
 
-  @test_util.run_deprecated_v1
   def testSingleTensorInput(self):
     """Tests ragged_stack with a single tensor input.
 
@@ -324,8 +324,7 @@ class RaggedStackOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     """
     rt_inputs = ragged.constant([[1, 2], [3, 4]])
     stacked = ragged.stack(rt_inputs, 0)
-    with self.test_session():
-      self.assertEqual(stacked.eval().tolist(), [[[1, 2], [3, 4]]])
+    self.assertRaggedEqual(stacked, [[[1, 2], [3, 4]]])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_string_ops.py b/tensorflow/python/ops/ragged/ragged_string_ops.py
index cdcdbdff07..1f9f0abe4f 100644
--- a/tensorflow/python/ops/ragged/ragged_string_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_string_ops.py
@@ -23,7 +23,6 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_string_ops
 from tensorflow.python.ops.ragged import ragged_conversion_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.util.tf_export import tf_export
 
@@ -65,16 +64,16 @@ def unicode_encode(input, output_encoding, errors="replace",
     ```
   """
   with ops.name_scope(name, "UnicodeEncode", [input]):
-    input_tensor = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(input)
+    input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input)
     if input_tensor.shape.ndims is None:
       raise ValueError("Rank of input_tensor must be statically known.")
     if ragged_tensor.is_ragged(input_tensor):
-      if input_tensor.inner_values.shape.ndims > 1:
-        # If the inner_values of our ragged tensor is multi-dimensional, we can
+      if input_tensor.flat_values.shape.ndims > 1:
+        # If the flat_values of our ragged tensor is multi-dimensional, we can
         # process it separately and our output will have the same nested splits
         # as our input.
-        return input_tensor.with_inner_values(
-            unicode_encode(input_tensor.inner_values, output_encoding, errors,
+        return input_tensor.with_flat_values(
+            unicode_encode(input_tensor.flat_values, output_encoding, errors,
                            replacement_char))
       elif input_tensor.ragged_rank > 1:
         # Recursively process the values of the ragged tensor.
@@ -82,7 +81,7 @@ def unicode_encode(input, output_encoding, errors="replace",
             unicode_encode(input_tensor.values, output_encoding, errors,
                            replacement_char))
       else:
-        # Our ragged tensor is of the correct shape (rank 1 inner_values tensor
+        # Our ragged tensor is of the correct shape (rank 1 flat_values tensor
         # with ragged_rank of 1) so we can process it as normal.
         return gen_string_ops.unicode_encode(
             input_values=input_tensor.values,
@@ -110,10 +109,10 @@ def unicode_encode(input, output_encoding, errors="replace",
         # Our input tensor is rank 1, so we create a ragged tensor with an added
         # dimension to create the correct input shape & type, and then remove
         # the additional dimension from the output and return the string scalar.
-        ragged_input_tensor = ragged_factory_ops.from_row_splits(
+        ragged_input_tensor = ragged_tensor.RaggedTensor.from_row_splits(
             input_tensor,
-            array_ops.stack([0, array_ops.shape(input_tensor,
-                                                out_type=dtypes.int64)[0]]))
+            array_ops.stack(
+                [0, array_ops.shape(input_tensor, out_type=dtypes.int64)[0]]))
         output_tensor = unicode_encode(ragged_input_tensor, output_encoding,
                                        errors, replacement_char)
         return array_ops.reshape(output_tensor, [])
diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py
index ddeabfb464..567c50203a 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor.py
@@ -19,9 +19,19 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_ragged_conversion_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_tensor_value
+from tensorflow.python.ops.ragged import ragged_util
+from tensorflow.python.ops.ragged import segment_id_ops
 
 # pylint: disable=protected-access
 _eval_using_default_session = ops._eval_using_default_session
@@ -84,10 +94,10 @@ class RaggedTensor(object):
   Example:
 
   ```python
-  >>> rt = ragged.from_row_splits(values=[3, 1, 4, 1, 5, 9, 2, 6],
-  ...                             row_splits=[0, 4, 4, 7, 8, 8])
-  >>> rt.tolist()
-  [[3, 1, 4, 1], [], [5, 9, 2], [6], []]
+  >>> print(tf.RaggedTensor.from_row_splits(
+  ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
+  ...     row_splits=[0, 4, 4, 7, 8, 8]))
+  <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
   ```
 
   ### Alternative Row-Partitioning Schemes
@@ -116,13 +126,12 @@ class RaggedTensor(object):
 
   ```python
   >>> values = [3, 1, 4, 1, 5, 9, 2, 6]
-  >>> rt1 = ragged.from_row_splits(values, row_splits=[0, 4, 4, 7, 8, 8])
-  >>> rt2 = ragged.from_row_lengths(values, row_lengths=[4, 0, 3, 1, 0])
-  >>> rt3 = ragged.from_value_rowids(values,
-  ...                                value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
-  ...                                nrows=5)
-  >>> rt4 = ragged.from_row_starts(values, row_starts=[0, 4, 4, 7, 8])
-  >>> rt5 = ragged.from_row_limits(values, row_limits=[4, 4, 7, 8, 8])
+  >>> rt1 = RaggedTensor.from_row_splits(values, row_splits=[0, 4, 4, 7, 8, 8])
+  >>> rt2 = RaggedTensor.from_row_lengths(values, row_lengths=[4, 0, 3, 1, 0])
+  >>> rt3 = RaggedTensor.from_value_rowids(
+  ...     values, value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5)
+  >>> rt4 = RaggedTensor.from_row_starts(values, row_starts=[0, 4, 4, 7, 8])
+  >>> rt5 = RaggedTensor.from_row_limits(values, row_limits=[4, 4, 7, 8, 8])
   ```
 
   ### Multiple Ragged Dimensions
@@ -132,24 +141,24 @@ class RaggedTensor(object):
   adds a single ragged dimension.
 
   ```python
-  >>> inner_rt = ragged.from_row_splits(  # =rt1 from above
+  >>> inner_rt = RaggedTensor.from_row_splits(  # =rt1 from above
   ...     values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8])
-  >>> outer_rt = ragged.from_row_splits(
+  >>> outer_rt = RaggedTensor.from_row_splits(
   ...     values=inner_rt, row_splits=[0, 3, 3, 5])
-  >>> print outer_rt.tolist()
+  >>> print outer_rt.to_list()
   [[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]]
   >>> print outer_rt.ragged_rank
   2
   ```
 
-  The factory function `ragged.from_nested_row_splits` may be used to
+  The factory function `RaggedTensor.from_nested_row_splits` may be used to
   construct a `RaggedTensor` with multiple ragged dimensions directly, by
   providing a list of `row_splits` tensors:
 
   ```python
-  >>> ragged.from_nested_row_splits(
-  ...     inner_values=[3, 1, 4, 1, 5, 9, 2, 6],
-  ...     nested_row_splits=([0, 3, 3, 5], [0, 4, 4, 7, 8, 8])).tolist()
+  >>> RaggedTensor.from_nested_row_splits(
+  ...     flat_values=[3, 1, 4, 1, 5, 9, 2, 6],
+  ...     nested_row_splits=([0, 3, 3, 5], [0, 4, 4, 7, 8, 8])).to_list()
   [[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]]
   ```
 
@@ -159,12 +168,13 @@ class RaggedTensor(object):
   by using a multidimensional `Tensor` for `values`.
 
   ```python
-  >>> rt = ragged.from_row_splits(values=tf.ones([5, 3]), row_splits=[0, 2, 5])
-  >>> print rt.tolist()
+  >>> rt = RaggedTensor.from_row_splits(values=tf.ones([5, 3]),
+  ..                                    row_splits=[0, 2, 5])
+  >>> print rt.to_list()
   [[[1, 1, 1], [1, 1, 1]],
    [[1, 1, 1], [1, 1, 1], [1, 1, 1]]]
-   >>> print rt.shape.as_list()
-   [2, None, 3]
+   >>> print rt.shape
+   (2, ?, 3)
   ```
 
   ### RaggedTensor Shape Restrictions
@@ -181,31 +191,6 @@ class RaggedTensor(object):
   dimension followed by a ragged dimension.
   """
 
-  #=============================================================================
-  # Implementation notes
-  #=============================================================================
-  # Currently, the RaggedTensor class uses a single row-partitioning scheme
-  # (row_splits).
-  #
-  # We are considering adding value_rowids+nvals as a secondary
-  # row-partitioning scheme.  This change would not impact the functional
-  # interface of the RaggedTensor class, but it would impact the efficiency
-  # of several operations.  In particular:
-  #
-  #   * The functions `ragged.value_rowids` and `ragged.nrows` would always
-  #     return pre-existing tensors; they would not need to add any ops to
-  #     the graph.
-  #
-  #   * The `RaggedTensor` constructor would construct all row-partitioning
-  #     tensors (row_splits, value_rowids, and nvals).  In eager mode, this
-  #     would mean that conversion operations would occur whenever a
-  #     `RaggedTensor` is constructed.  But in graph mode, the converted
-  #     row-partitioning tensors would only be evaluated if they are used.
-  #
-  # Since this change impacts efficiency but not functionality, we would like
-  # to perform additional profiling with real-world use cases before we
-  # decide whether to make this change.
-
   #=============================================================================
   # Constructor (private)
   #=============================================================================
@@ -221,13 +206,14 @@ class RaggedTensor(object):
     This constructor is private -- please use one of the following ops to
     build `RaggedTensor`s:
 
-      * [`ragged.from_row_lengths()`](from_row_lengths.md)
-      * [`ragged.from_value_rowids()`](from_value_rowids.md)
-      * [`ragged.from_row_splits()`](from_row_splits.md)
-      * [`ragged.from_row_starts()`](from_row_starts.md)
-      * [`ragged.from_row_limits()`](from_row_limits.md)
-      * [`ragged.from_nested_row_splits()`](from_nested_row_splits.md)
-      * [`ragged.from_nested_value_rowids()`](from_nested_value_rowids.md)
+      * `tf.RaggedTensor.from_row_lengths`
+      * `tf.RaggedTensor.from_value_rowids`
+      * `tf.RaggedTensor.from_row_splits`
+      * `tf.RaggedTensor.from_row_starts`
+      * `tf.RaggedTensor.from_row_limits`
+      * `tf.RaggedTensor.from_nested_row_splits`
+      * `tf.RaggedTensor.from_nested_row_lengths`
+      * `tf.RaggedTensor.from_nested_value_rowids`
 
     Args:
       values: A potentially ragged tensor of any dtype and shape `[nvals, ...]`.
@@ -248,7 +234,7 @@ class RaggedTensor(object):
     if not internal:
       raise ValueError("RaggedTensor constructor is private; please use one "
                        "of the factory methods instead (e.g., "
-                       "ragged.from_row_lengths())")
+                       "RaggedTensor.from_row_lengths())")
 
     # Validate the arguments.
     if not isinstance(values, (RaggedTensor, ops.Tensor)):
@@ -272,6 +258,364 @@ class RaggedTensor(object):
     self._cached_value_rowids = cached_value_rowids
     self._cached_nrows = cached_nrows
 
+  #=============================================================================
+  # Factory Methods
+  #=============================================================================
+
+  @classmethod
+  def from_value_rowids(cls, values, value_rowids, nrows=None, name=None):
+    """Creates a `RaggedTensor` with rows partitioned by `value_rowids`.
+
+    The returned `RaggedTensor` corresponds with the python list defined by:
+
+    ```python
+    result = [[values[i] for i in range(len(values)) if value_rowids[i] == row]
+              for row in range(nrows)]
+    ```
+
+    Warning: currently, this needs to cast value_rowids to int64 before
+    converting, since `tf.bincount` only supports `int32`.
+
+    Args:
+      values: A potentially ragged tensor with shape `[nvals, ...]`.
+      value_rowids: A 1-D int64 tensor with shape `[nvals]`, which corresponds
+        one-to-one with `values`, and specifies each value's row index.  Must be
+        nonnegative, and must be sorted in ascending order.
+      nrows: An int64 scalar specifying the number of rows.  This should be
+        specified if the `RaggedTensor` may containing empty training rows. Must
+        be greater than `value_rowids[-1]` (or zero if `value_rowids` is empty).
+        Defaults to `value_rowids[-1]` (or zero if `value_rowids` is empty).
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor`.  `result.rank = values.rank + 1`.
+      `result.ragged_rank = values.ragged_rank + 1`.
+
+    Raises:
+      ValueError: If `nrows` is incompatible with `value_rowids`.
+
+    #### Example:
+      ```python
+      >>> print(tf.RaggedTensor.from_value_rowids(
+      ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
+      ...     value_rowids=[0, 0, 0, 0, 2, 2, 2, 3],
+      ...     nrows=5))
+      <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
+      ```
+    """
+    with ops.name_scope(name, "RaggedFromValueRowIds",
+                        [values, value_rowids, nrows]):
+      values = convert_to_tensor_or_ragged_tensor(values, name="values")
+      value_rowids = ops.convert_to_tensor(
+          value_rowids, dtypes.int64, name="value_rowids")
+      if nrows is None:
+        const_rowids = tensor_util.constant_value(value_rowids)
+        if const_rowids is None:
+          nrows = array_ops.concat([value_rowids[-1:], [-1]], axis=0)[0] + 1
+          const_nrows = None
+        else:
+          const_nrows = const_rowids[-1] + 1 if const_rowids.size > 0 else 0
+          nrows = ops.convert_to_tensor(const_nrows, dtypes.int64, name="nrows")
+      else:
+        nrows = ops.convert_to_tensor(nrows, dtypes.int64, "nrows")
+        const_nrows = tensor_util.constant_value(nrows)
+        if const_nrows is not None:
+          if const_nrows < 0:
+            raise ValueError("Expected nrows >= 0; got %d" % const_nrows)
+          const_rowids = tensor_util.constant_value(value_rowids)
+          if const_rowids is not None and const_rowids.size > 0:
+            if not const_nrows >= const_rowids[-1] + 1:
+              raise ValueError(
+                  "Expected nrows >= value_rowids[-1] + 1; got nrows=%d, "
+                  "value_rowids[-1]=%d" % (const_nrows, const_rowids[-1]))
+
+      value_rowids.shape.assert_has_rank(1)
+      nrows.shape.assert_has_rank(0)
+      values.shape[:1].assert_is_compatible_with(value_rowids.shape)
+
+      # Convert value_rowids & nrows to row_splits.
+      # Note: we don't use segment_ids_to_row_splits() here because we want
+      # to save the intermediate value `row_lengths`, so we can cache it.
+      # TODO(b/116708836) Upgrade bincount to accept int64 so we can skip the
+      # cast (Remove the warning in the docstring when we do.)
+      value_rowids_int32 = math_ops.cast(value_rowids, dtypes.int32)
+      nrows_int32 = math_ops.cast(nrows, dtypes.int32)
+      row_lengths = math_ops.bincount(
+          value_rowids_int32,
+          minlength=nrows_int32,
+          maxlength=nrows_int32,
+          dtype=dtypes.int64)
+      row_splits = array_ops.concat([[0], math_ops.cumsum(row_lengths)], axis=0)
+      if const_nrows is not None:
+        row_lengths.set_shape([const_nrows])
+        row_splits.set_shape([const_nrows + 1])
+
+      return cls(
+          values,
+          row_splits,
+          cached_row_lengths=row_lengths,
+          cached_value_rowids=value_rowids,
+          cached_nrows=nrows,
+          internal=True)
+
+  @classmethod
+  def from_row_splits(cls, values, row_splits, name=None):
+    """Creates a `RaggedTensor` with rows partitioned by `row_splits`.
+
+    The returned `RaggedTensor` corresponds with the python list defined by:
+
+    ```python
+    result = [values[row_splits[i]:row_splits[i + 1]]
+              for i in range(len(row_splits) - 1)]
+    ```
+
+    Args:
+      values: A potentially ragged tensor with shape `[nvals, ...]`.
+      row_splits: A 1-D int64 tensor with shape `[nrows+1]`.  Must not be empty,
+        and must be sorted in ascending order.  `row_splits[0]` must be zero and
+        `row_splits[-1]` must be `nvals`.
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor`.  `result.rank = values.rank + 1`.
+      `result.ragged_rank = values.ragged_rank + 1`.
+
+    Raises:
+      ValueError: If `row_splits` is an empty list.
+
+    #### Example:
+      ```python
+      >>> print(tf.RaggedTensor.from_row_splits(
+      ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
+      ...     row_splits=[0, 4, 4, 7, 8, 8]))
+      <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
+      ```
+    """
+    if isinstance(row_splits, (list, tuple)) and not row_splits:
+      raise ValueError("row_splits tensor may not be empty.")
+    with ops.name_scope(name, "RaggedFromRowSplits", [values, row_splits]):
+      values = convert_to_tensor_or_ragged_tensor(values, name="values")
+      row_splits = ops.convert_to_tensor(row_splits, dtypes.int64, "row_splits")
+      row_splits.shape.assert_has_rank(1)
+      return cls(values=values, row_splits=row_splits, internal=True)
+
+  @classmethod
+  def from_row_lengths(cls, values, row_lengths, name=None):
+    """Creates a `RaggedTensor` with rows partitioned by `row_lengths`.
+
+    The returned `RaggedTensor` corresponds with the python list defined by:
+
+    ```python
+    result = [[values.pop(0) for i in range(length)]
+              for length in row_lengths]
+    ```
+
+    Args:
+      values: A potentially ragged tensor with shape `[nvals, ...]`.
+      row_lengths: A 1-D int64 tensor with shape `[nrows]`.  Must be
+        nonnegative.  `sum(row_lengths)` must be `nvals`.
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor`.  `result.rank = values.rank + 1`.
+      `result.ragged_rank = values.ragged_rank + 1`.
+
+    #### Example:
+      ```python
+      >>> print(tf.RaggedTensor.from_row_lengths(
+      ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
+      ...     row_lengths=[4, 0, 3, 1, 0]))
+      <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []])>
+      ```
+    """
+    with ops.name_scope(name, "RaggedFromRowLengths", [values, row_lengths]):
+      values = convert_to_tensor_or_ragged_tensor(values, name="values")
+      row_lengths = ops.convert_to_tensor(row_lengths, dtypes.int64,
+                                          "row_lengths")
+      row_lengths.shape.assert_has_rank(1)
+      row_limits = math_ops.cumsum(row_lengths)
+      row_splits = array_ops.concat([[0], row_limits], axis=0)
+      return cls(
+          values=values,
+          row_splits=row_splits,
+          cached_row_lengths=row_lengths,
+          internal=True)
+
+  @classmethod
+  def from_row_starts(cls, values, row_starts, name=None):
+    """Creates a `RaggedTensor` with rows partitioned by `row_starts`.
+
+    Equivalent to: `from_row_splits(values, concat([row_starts, nvals]))`.
+
+    Args:
+      values: A potentially ragged tensor with shape `[nvals, ...]`.
+      row_starts: A 1-D int64 tensor with shape `[nrows]`.  Must be nonnegative
+        and sorted in ascending order.  If `nrows>0`, then `row_starts[0]` must
+        be zero.
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor`.  `result.rank = values.rank + 1`.
+      `result.ragged_rank = values.ragged_rank + 1`.
+
+    #### Example:
+      ```python
+      >>> print(tf.RaggedTensor.from_row_starts(
+      ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
+      ...     row_starts=[0, 4, 4, 7, 8]))
+      <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
+      ```
+    """
+    with ops.name_scope(name, "RaggedFromRowStarts", [values, row_starts]):
+      values = convert_to_tensor_or_ragged_tensor(values, name="values")
+      row_starts = ops.convert_to_tensor(row_starts, dtypes.int64, "row_starts")
+      row_starts.shape.assert_has_rank(1)
+      nvals = array_ops.shape(values, out_type=dtypes.int64)[:1]
+      row_splits = array_ops.concat([row_starts, nvals], axis=0)
+      return cls(values=values, row_splits=row_splits, internal=True)
+
+  @classmethod
+  def from_row_limits(cls, values, row_limits, name=None):
+    """Creates a `RaggedTensor` with rows partitioned by `row_limits`.
+
+    Equivalent to: `from_row_splits(values, concat([0, row_limits]))`.
+
+    Args:
+      values: A potentially ragged tensor with shape `[nvals, ...]`.
+      row_limits: A 1-D int64 tensor with shape `[nrows]`.  Must be sorted in
+        ascending order.  If `nrows>0`, then `row_limits[-1]` must be `nvals`.
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor`.  `result.rank = values.rank + 1`.
+      `result.ragged_rank = values.ragged_rank + 1`.
+
+    #### Example:
+      ```python
+      >>> print(tf.RaggedTensor.from_row_limits(
+      ...     values=[3, 1, 4, 1, 5, 9, 2, 6],
+      ...     row_limits=[4, 4, 7, 8, 8]))
+      <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
+      ```
+    """
+    with ops.name_scope(name, "RaggedFromRowLimits", [values, row_limits]):
+      values = convert_to_tensor_or_ragged_tensor(values, name="values")
+      row_limits = ops.convert_to_tensor(row_limits, dtypes.int64, "row_limits")
+      row_limits.shape.assert_has_rank(1)
+      zero = array_ops.zeros([1], dtypes.int64)
+      row_splits = array_ops.concat([zero, row_limits], axis=0)
+      return cls(values=values, row_splits=row_splits, internal=True)
+
+  @classmethod
+  def from_nested_value_rowids(cls,
+                               flat_values,
+                               nested_value_rowids,
+                               nested_nrows=None,
+                               name=None):
+    """Creates a `RaggedTensor` from a nested list of `value_rowids` tensors.
+
+    Equivalent to:
+
+    ```python
+    result = flat_values
+    for (rowids, nrows) in reversed(zip(nested_value_rowids, nested_nrows)):
+      result = from_value_rowids(result, rowids, nrows)
+    ```
+
+    Args:
+      flat_values: A potentially ragged tensor.
+      nested_value_rowids: A list of 1-D int64 tensors.  The `i`th tensor is
+        used as the `value_rowids` for the `i`th ragged dimension.
+      nested_nrows: A list of int64 scalars.  The `i`th scalar is used as the
+        `nrows` for the `i`th ragged dimension.
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor` (or `flat_values` if `nested_value_rowids` is empty).
+
+    Raises:
+      ValueError: If `len(nested_values_rowids) != len(nested_nrows)`.
+    """
+    if isinstance(nested_value_rowids, ops.Tensor):
+      raise TypeError("nested_value_rowids must be a list of Tensors")
+    if nested_nrows is None:
+      nested_nrows = [None] * len(nested_value_rowids)
+    else:
+      if isinstance(nested_nrows, ops.Tensor):
+        raise TypeError("nested_nrows must be a list of Tensors")
+      if len(nested_nrows) != len(nested_value_rowids):
+        raise ValueError("nested_nrows must have the same length as "
+                         "nested_value_rowids")
+
+    with ops.name_scope(
+        name, "RaggedFromNestedValueRowIds",
+        [flat_values] + list(nested_value_rowids) + list(nested_nrows)):
+      result = flat_values
+      for value_rowids, nrows in reversed(
+          list(zip(nested_value_rowids, nested_nrows))):
+        result = cls.from_value_rowids(result, value_rowids, nrows)
+      return result
+
+  @classmethod
+  def from_nested_row_splits(cls, flat_values, nested_row_splits, name=None):
+    """Creates a `RaggedTensor` from a nested list of `row_splits` tensors.
+
+    Equivalent to:
+
+    ```python
+    result = flat_values
+    for row_splits in reversed(nested_row_splits):
+      result = from_row_splits(result, row_splits)
+    ```
+
+    Args:
+      flat_values: A potentially ragged tensor.
+      nested_row_splits: A list of 1-D int64 tensors.  The `i`th tensor is used
+        as the `row_splits` for the `i`th ragged dimension.
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor` (or `flat_values` if `nested_row_splits` is empty).
+    """
+    if isinstance(nested_row_splits, ops.Tensor):
+      raise TypeError("nested_row_splits must be a list of Tensors")
+    with ops.name_scope(name, "RaggedFromNestedRowSplits",
+                        [flat_values] + list(nested_row_splits)):
+      result = flat_values
+      for splits in reversed(nested_row_splits):
+        result = cls.from_row_splits(result, splits)
+      return result
+
+  @classmethod
+  def from_nested_row_lengths(cls, flat_values, nested_row_lengths, name=None):
+    """Creates a `RaggedTensor` from a nested list of `row_lengths` tensors.
+
+    Equivalent to:
+
+    ```python
+    result = flat_values
+    for row_lengths in reversed(nested_row_lengths):
+      result = from_row_lengths(result, row_lengths)
+    ```
+
+    Args:
+      flat_values: A potentially ragged tensor.
+      nested_row_lengths: A list of 1-D int64 tensors.  The `i`th tensor is used
+        as the `row_lengths` for the `i`th ragged dimension.
+      name: A name prefix for the RaggedTensor (optional).
+
+    Returns:
+      A `RaggedTensor` (or `flat_values` if `nested_row_lengths` is empty).
+    """
+    if isinstance(nested_row_lengths, ops.Tensor):
+      raise TypeError("nested_row_lengths must be a list of Tensors")
+    with ops.name_scope(name, "RaggedFromNestedRowlengths",
+                        [flat_values] + list(nested_row_lengths)):
+      result = flat_values
+      for lengths in reversed(nested_row_lengths):
+        result = cls.from_row_lengths(result, lengths)
+      return result
+
   #=============================================================================
   # Accessors
   #=============================================================================
@@ -334,8 +678,8 @@ class RaggedTensor(object):
     #### Example:
       ```python
       >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
-      >>> rt.values.eval()
-      [3, 1, 4, 1, 5, 9, 2, 6]
+      >>> print rt.values
+      tf.Tensor([3, 1, 4, 1, 5, 9, 2, 6])
       ```
     """
     return self._values
@@ -357,26 +701,24 @@ class RaggedTensor(object):
     #### Example:
       ```python
       >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
-      >>> rt.values.eval()
-      [3, 1, 4, 1, 5, 9, 2, 6]
-      >>> rt.row_splits.eval()  # indices of row splits in ragged.values
-      [0, 4, 4, 7, 8, 8]
+      >>> print rt.row_splits  # indices of row splits in rt.values
+      tf.Tensor([0, 4, 4, 7, 8, 8])
       ```
     """
     return self._row_splits
 
   @property
-  def inner_values(self):
+  def flat_values(self):
     """The innermost `values` tensor for this ragged tensor.
 
-    Concretely, if `rt.values` is a `Tensor`, then `rt.inner_values` is
-    `rt.values`; otherwise, `rt.inner_values` is `rt.values.inner_values`.
+    Concretely, if `rt.values` is a `Tensor`, then `rt.flat_values` is
+    `rt.values`; otherwise, `rt.flat_values` is `rt.values.flat_values`.
 
-    Conceptually, `inner_values` is the tensor formed by flattening the
+    Conceptually, `flat_values` is the tensor formed by flattening the
     outermost dimension and all of the ragged dimensions into a single
     dimension.
 
-    `rt.inner_values.shape = [nvals] + rt.shape[rt.ragged_rank + 1:]`
+    `rt.flat_values.shape = [nvals] + rt.shape[rt.ragged_rank + 1:]`
     (where `nvals` is the number of items in the flattened dimensions).
 
     Returns:
@@ -386,8 +728,8 @@ class RaggedTensor(object):
 
       ```python
       >>> rt = ragged.constant([[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]])
-      >>> ragged.inner_values(rt).eval()
-      [3, 1, 4, 1, 5, 9, 2, 6]
+      >>> print rt.flat_values()
+      tf.Tensor([3, 1, 4, 1, 5, 9, 2, 6])
       ```
     """
     rt_values = self.values
@@ -413,8 +755,8 @@ class RaggedTensor(object):
 
       ```python
       >>> rt = ragged.constant([[[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]]])
-      >>> for i, splits in enumerate(ragged.nested_row_splits(rt)):
-      ...   print('Splits for dimension %d: %s' % (i+1, splits.eval()))
+      >>> for i, splits in enumerate(rt.nested_row_splits()):
+      ...   print('Splits for dimension %d: %s' % (i+1, splits))
       Splits for dimension 1: [0, 1]
       Splits for dimension 2: [0, 3, 3, 5]
       Splits for dimension 3: [0, 4, 4, 7, 8, 8]
@@ -428,38 +770,220 @@ class RaggedTensor(object):
       rt_values = rt_values.values
     return tuple(rt_nested_splits)
 
-  @property
-  def cached_value_rowids(self):
-    """The row lengths for this `RaggedTensor`, or `None`.
+  def value_rowids(self, name=None):
+    """Returns the row indices for the `values` in this ragged tensor.
+
+    `rt.value_rowids()` corresponds one-to-one with the outermost dimension of
+    `rt.values`, and specifies the row containing each value.  In particular,
+    the row `rt[row]` consists of the values `rt.values[j]` where
+    `rt.value_rowids()[j] == row`.
+
+    Args:
+      name: A name prefix for the returned tensor (optional).
 
     Returns:
-      The `value_rowids` tensor that was used to construct this `RaggedTensor`
-      if it was constructed using
-      [`ragged.from_value_rowids`](from_value_rowids.md); or `None` otherwise.
+      A 1-D `int64` `Tensor` with shape `self.values.shape[:1]`.
+      The returned tensor is nonnegative, and is sorted in ascending order.
+
+    #### Example:
+      ```python
+      >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
+      >>> rt.values
+      tf.Tensor([3, 1, 4, 1, 5, 9, 2, 6])
+      >>> rt.value_rowids()
+      tf.Tensor([0, 0, 0, 0, 2, 2, 2, 3])  # corresponds 1:1 with rt.values
+      ```
     """
-    return self._cached_value_rowids
+    if self._cached_value_rowids is not None:
+      return self._cached_value_rowids
 
-  @property
-  def cached_nrows(self):
-    """The row lengths for this `RaggedTensor`, or `None`.
+    with ops.name_scope(name, "RaggedValueRowIds", [self]):
+      return segment_id_ops.row_splits_to_segment_ids(self.row_splits)
+
+  def nrows(self, out_type=dtypes.int64, name=None):
+    """Returns the number of rows in this ragged tensor.
+
+    I.e., the size of the outermost dimension of the tensor.
+
+    Args:
+      out_type: `dtype` for the returned tensor.
+      name: A name prefix for the returned tensor (optional).
 
     Returns:
-      The `nrows` tensor that was used to construct this `RaggedTensor`
-      if it was constructed using
-      [`ragged.from_value_rowids`](from_value_rowids.md); or `None` otherwise.
+      A scalar `Tensor` with dtype `out_type`.
+
+    #### Example:
+      ```python
+      >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
+      >>> rt.nrows()  # rt has 5 rows.
+      5
+      ```
     """
-    return self._cached_nrows
+    if self._cached_nrows is not None:
+      return self._cached_nrows
 
-  @property
-  def cached_row_lengths(self):
-    """The row lengths for this `RaggedTensor`, or `None`.
+    with ops.name_scope(name, "RaggedNRows", [self]):
+      return array_ops.shape(self.row_splits, out_type=out_type)[0] - 1
+
+  def row_starts(self, name=None):
+    """Returns the start indices for rows in this ragged tensor.
+
+    These indices specify where the values for each row begin in
+    `self.values`.  `rt.row_starts()` is equal to `rt.row_splits[:-1]`.
+
+    Args:
+      name: A name prefix for the returned tensor (optional).
+
+    Returns:
+      A 1-D Tensor of int64 with shape `[nrows]`.
+      The returned tensor is nonnegative, and is sorted in ascending order.
+
+    #### Example:
+      ```python
+      >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
+      >>> rt.values
+      tf.Tensor([3, 1, 4, 1, 5, 9, 2, 6])
+      >>> rt.row_starts()  # indices of row starts in rt.values
+      tf.Tensor([0, 4, 4, 7, 8])
+      ```
+    """
+    with ops.name_scope(name, "RaggedRowStarts", [self]):
+      return self.row_splits[:-1]
+
+  def row_limits(self, name=None):
+    """Returns the limit indices for rows in this ragged tensor.
+
+    These indices specify where the values for each row end in
+    `self.values`.  `rt.row_limits(self)` is equal to `rt.row_splits[:-1]`.
+
+    Args:
+      name: A name prefix for the returned tensor (optional).
+
+    Returns:
+      A 1-D Tensor of int64 with shape `[nrows]`.
+      The returned tensor is nonnegative, and is sorted in ascending order.
+
+    #### Example:
+      ```python
+      >>> rt = ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
+      >>> rt.values
+      tf.Tensor([3, 1, 4, 1, 5, 9, 2, 6])
+      >>> rt.row_limits()  # indices of row limits in rt.values
+      tf.Tensor([4, 4, 7, 8, 8])
+      ```
+    """
+    with ops.name_scope(name, "RaggedRowLimits", [self]):
+      return self.row_splits[1:]
+
+  def row_lengths(self, axis=1, name=None):
+    """Returns the lengths of the rows in this ragged tensor.
+
+    `rt.row_lengths()[i]` indicates the number of values in the
+    `i`th row of `rt`.
+
+    Args:
+      axis: An integer constant indicating the axis whose row lengths should be
+        returned.
+      name: A name prefix for the returned tensor (optional).
+
+    Returns:
+      A potentially ragged Tensor of int64 with shape `self.shape[:axis]`.
+
+    Raises:
+      ValueError: If `axis` is out of bounds.
+
+    #### Example:
+      ```python
+      >>> rt = ragged.constant([[[3, 1, 4], [1]], [], [[5, 9], [2]], [[6]], []])
+      >>> rt.row_lengths(rt)  # lengths of rows in rt
+      tf.Tensor([2, 0, 2, 1, 0])
+      >>> rt.row_lengths(axis=2)  # lengths of axis=2 rows.
+      <tf.RaggedTensor [[3, 1], [], [2, 1], [1], []]>
+      ```
+    """
+    if self._cached_row_lengths is not None:
+      return self._cached_row_lengths
+
+    with ops.name_scope(name, "RaggedRowLengths", [self]):
+      axis = ragged_util.get_positive_axis(axis, self.shape.ndims)
+      if axis == 0:
+        return self.nrows()
+      elif axis == 1:
+        splits = self.row_splits
+        return splits[1:] - splits[:-1]
+      elif isinstance(self.values, RaggedTensor):
+        return self.with_values(self.values.row_lengths(axis - 1))
+      else:
+        shape = array_ops.shape(self.values, out_type=dtypes.int64)
+        return self.with_values(
+            array_ops.ones(shape[:axis - 1], dtypes.int64) * shape[axis - 1])
+
+  def nested_row_lengths(self, name=None):
+    """Returns a tuple containing the row_lengths for all ragged dimensions.
+
+    `rtnested_row_lengths()` is a tuple containing the `row_lengths` tensors for
+    all ragged dimensions in `rt`, ordered from outermost to innermost.
+
+    Args:
+      name: A name prefix for the returned tensors (optional).
+
+    Returns:
+      A `tuple` of 1-D `int64` `Tensors`.  The length of the tuple is equal to
+      `self.ragged_rank`.
+    """
+    with ops.name_scope(name, "RaggedNestedRowLengths", [self]):
+      rt_nested_row_lengths = []
+      rt = self
+      while isinstance(rt, RaggedTensor):
+        rt_nested_row_lengths.append(rt.row_lengths())
+        rt = rt.values
+      return tuple(rt_nested_row_lengths)
+
+  def bounding_shape(self, axis=None, name=None):
+    """Returns the tight bounding box shape for this `RaggedTensor`.
+
+    Args:
+      axis: An integer scalar or vector indicating which axes to return the
+        bounding box for.  If not specified, then the full bounding box is
+        returned.
+      name: A name prefix for the returned tensor (optional).
 
     Returns:
-      The `row_lengths` tensor that was used to construct this `RaggedTensor`
-      if it was constructed using
-      [`ragged.from_row_lengths`](from_row_lengths.md); or `None` otherwise.
+      An int64 `Tensor`.  If `axis` is not specified, then `output`
+      is a vector with `output.shape=[self.shape.ndims]`.  If `axis` is a
+      scalar, then the `output` is a scalar.  If `axis` is a vector, then
+      `output` is a vector, where `output[i]` is the bounding size for
+      dimension `axis[i]`.
+
+    #### Example:
+      ```python
+      >>> rt = ragged.constant([[1, 2, 3, 4], [5], [], [6, 7, 8, 9], [10]])
+      >>> rt.bounding_shape()
+      [5, 4]
+      ```
     """
-    return self._cached_row_lengths
+    with ops.name_scope(name, "RaggedBoundingBox", [self, axis]):
+      nested_splits = self.nested_row_splits
+      rt_flat_values = self.flat_values
+
+      # Optimized special cases for when axis=0 or axis=1:
+      if isinstance(axis, int):
+        if axis == 0:
+          return array_ops.shape(nested_splits[0], out_type=dtypes.int64)[0] - 1
+        elif axis == 1:
+          return math_ops.maximum(math_ops.reduce_max(self.row_lengths()), 0)
+
+      splits_shape = array_ops.shape(self.row_splits, out_type=dtypes.int64)
+      flat_values_shape = array_ops.shape(rt_flat_values, out_type=dtypes.int64)
+
+      ragged_dimensions = array_ops.stack([splits_shape[0] - 1] + [
+          math_ops.maximum(math_ops.reduce_max(splits[1:] - splits[:-1]), 0)
+          for splits in nested_splits
+      ])
+      inner_dimensions = flat_values_shape[1:]
+
+      bbox = array_ops.concat([ragged_dimensions, inner_dimensions], axis=0)
+      return bbox if axis is None else array_ops.gather(bbox, axis)
 
   #=============================================================================
   # Transformation
@@ -481,7 +1005,7 @@ class RaggedTensor(object):
       `result.ragged_rank = 1 + new_values.ragged_rank`
     """
     new_values.shape.with_rank_at_least(1)
-    self.values.shape[0].assert_is_compatible_with(new_values.shape[0])
+    self.values.shape[:1].assert_is_compatible_with(new_values.shape[:1])
     return RaggedTensor(
         new_values,
         self._row_splits,
@@ -490,16 +1014,16 @@ class RaggedTensor(object):
         self._cached_nrows,
         internal=True)
 
-  def with_inner_values(self, new_values):
-    """Returns a copy of `self` with `inner_values` replaced by `new_value`.
+  def with_flat_values(self, new_values):
+    """Returns a copy of `self` with `flat_values` replaced by `new_value`.
 
     Preserves cached row-partitioning tensors such as `self.cached_nrows` and
     `self.cached_value_rowids` if they have values.
 
     Args:
       new_values: Potentially ragged tensor that should replace
-      `self.inner_values`.  Must have `rank > 0`, and must have the same
-      number of rows as `self.inner_values`.
+      `self.flat_values`.  Must have `rank > 0`, and must have the same
+      number of rows as `self.flat_values`.
 
     Returns:
       A `RaggedTensor`.
@@ -509,46 +1033,369 @@ class RaggedTensor(object):
     if isinstance(self._values, ops.Tensor):
       return self.with_values(new_values)
     else:
-      return self.with_values(self.values.with_inner_values(new_values))
+      return self.with_values(self.values.with_flat_values(new_values))
+
+  #=============================================================================
+  # Tensor Type Conversions
+  #=============================================================================
+
+  @classmethod
+  def from_tensor(cls,
+                  tensor,
+                  lengths=None,
+                  padding=None,
+                  ragged_rank=1,
+                  name=None):
+    """Converts a `tf.Tensor` into a `RaggedTensor`.
+
+    The set of absent/default values may be specified using a vector of lengths
+    or a padding value (but not both).  If `lengths` is specified, then the
+    output tensor will satisfy `output[row] = tensor[row][:lengths[row]]`.
+    If `padding` is specified, then any row *suffix* consisting entirely of
+    `padding` will be excluded from the returned `RaggedTensor`.  If neither
+    `lengths` nor `padding` is specified, then the returned `RaggedTensor` will
+    have no absent/default values.
+
+    Examples:
+
+    ```python
+    >>> dt = tf.constant([[5, 7, 0], [0, 3, 0], [6, 0, 0]])
+    >>> tf.RaggedTensor.from_tensor(dt)
+    <tf.RaggedTensor [[5, 7, 0], [0, 3, 0], [6, 0, 0]]>
+    >>> tf.RaggedTensor.from_tensor(dt, lengths=[2, 0, 3])
+    <tf.RaggedTensor [[5, 7], [], [6, 0, 0]]>
+    >>> tf.RaggedTensor.from_tensor(dt, padding=0)
+    <tf.RaggedTensor [[5, 7], [0, 3], [6]]>
+    ```
+
+    Args:
+      tensor: The `Tensor` to convert.  Must have rank `ragged_rank + 1` or
+        higher.
+      lengths: An optional set of row lengths, specified using a 1-D integer
+        `Tensor` whose length is equal to `tensor.shape[0]` (the number of rows
+        in `tensor`).  If specified, then `output[row]` will contain
+        `tensor[row][:lengths[row]]`.  Negative lengths are treated as zero.
+      padding: An optional padding value.  If specified, then any row suffix
+        consisting entirely of `padding` will be excluded from the returned
+        RaggedTensor.  `padding` is a `Tensor` with the same dtype as `tensor`
+        and with `shape=tensor.shape[ragged_rank + 1:]`.
+      ragged_rank: Integer specifying the ragged rank for the returned
+        `RaggedTensor`.  Must be greater than zero.
+      name: A name prefix for the returned tensors (optional).
+
+    Returns:
+      A `RaggedTensor` with the specified `ragged_rank`.  The shape of the
+      returned ragged tensor is compatible with the shape of `tensor`.
+    Raises:
+      ValueError: If both `lengths` and `padding` are specified.
+    """
+    if lengths is not None and padding is not None:
+      raise ValueError("Specify lengths or padding, but not both")
+    if not isinstance(ragged_rank, int):
+      raise TypeError("ragged_rank expected int, got %r" % ragged_rank)
+    if ragged_rank <= 0:
+      raise ValueError(
+          "ragged_rank must be greater than 0; got %s" % ragged_rank)
+
+    with ops.name_scope(name, "RaggedFromTensor", [tensor, lengths, padding]):
+      tensor = ops.convert_to_tensor(tensor, name="tensor")
+      tensor.shape.with_rank_at_least(ragged_rank + 1)
+      input_shape = array_ops.shape(tensor, out_type=dtypes.int64)
+      ncols = input_shape[1]
+
+      # Handle ragged_rank>1 via recursion:
+      # If the output should have multiple ragged dimensions, then first
+      # flatten the tensor to eliminate all but the last ragged dimension,
+      # and recursively convert that flattened tensor.  Then add on the splits
+      # for the dimensions that we flattened out.
+      if ragged_rank > 1:
+        # Flatten `tensor` to eliminate all but the last ragged dimension.
+        new_shape = array_ops.concat([
+            constant_op.constant([-1], dtypes.int64), input_shape[ragged_rank:]
+        ],
+                                     axis=0)
+        flattened = array_ops.reshape(tensor, new_shape)
+        # Recursively convert the flattened tensor.
+        values = cls.from_tensor(flattened, lengths, padding)
+        # The total number of elements in each  dimension.  E.g., if
+        # input_shape=[3, 4, 5, 6], then dim[2] has 3*4*5 elements in total.
+        dim_size = math_ops.cumprod(input_shape)
+        # Construct splits tensors for the dimensions that were flattened.
+        new_splits = [
+            math_ops.range(0, dim_size[dim - 1] + 1) * input_shape[dim]
+            for dim in range(1, ragged_rank)
+        ]
+        return cls.from_nested_row_splits(values, new_splits)
+
+      # If padding was specified, then use it to find row lengths.
+      if padding is not None:
+        padding = ops.convert_to_tensor(
+            padding, name="padding", dtype=tensor.dtype)
+        padding.shape.assert_is_compatible_with(tensor.shape[2:])
+
+        # Find places where the padding is equal to the tensor.  (This will
+        # broadcast `padding` across the outermost 2 dimensions of `tensor`,
+        # so `has_default_value.shape = tensor.shape`.)
+        has_default_value = math_ops.equal(padding, tensor)
+
+        # If the padding isn't a scalar, then require that all values in the
+        # padding match each item in the tensor.  After this block of code,
+        # `has_default.shape = tensor.shape[:2]`.  (Unfortunately, we can't just
+        # use reduce_all for both cases, becaue when you pass an empty `axis`
+        # list to reduce_all, it reduces all axes; but we want it to reduce no
+        # axes -- i.e., to be a no-op.)
+        tensor_rank = array_ops.rank(tensor)
+        reduce_axis = math_ops.range(2, tensor_rank)
+        has_default = control_flow_ops.cond(
+            tensor_rank > 2,
+            lambda: math_ops.reduce_all(has_default_value, axis=reduce_axis),
+            lambda: has_default_value)
+        has_default.set_shape(tensor_shape.TensorShape([None, None]))
+        has_default.set_shape(tensor.shape[:2])
+
+        # Use has_default it to find the length of each row: for each
+        # non-default item in a row, calculate the length that the row needs to
+        # have to include that item; and then take the max of those values
+        # (across each row).
+        has_nondefault = math_ops.logical_not(has_default)
+        has_nondefault = math_ops.cast(has_nondefault, dtypes.int64)
+        length_for_nondefault_value = (
+            has_nondefault * array_ops.expand_dims(
+                math_ops.range(1, ncols + 1), 0))
+        lengths = math_ops.reduce_max(length_for_nondefault_value, axis=1)
+
+      # If we have lengths (either directly supplied, or computed from
+      # paddings), then use those to construct splits; and then use masking
+      # to get the corresponding values.
+      if lengths is not None:
+        lengths = ragged_util.convert_to_int_tensor(lengths, "lengths",
+                                                    dtypes.int64)
+        lengths.shape.assert_has_rank(1)
+        lengths = math_ops.minimum(lengths, ncols)
+        lengths = math_ops.maximum(lengths, 0)
+        limits = math_ops.cumsum(lengths)
+        splits = array_ops.concat([array_ops.zeros([1], dtypes.int64), limits],
+                                  axis=0)
+        mask = array_ops.sequence_mask(lengths, maxlen=ncols)
+        values = array_ops.boolean_mask(tensor, mask)
+        return cls.from_row_splits(values, splits)
+
+      # If neither padding nor lengths were specified, then create a splits
+      # vector that contains no default values, and reshape the input tensor
+      # to form the values for the RaggedTensor.
+      nrows = input_shape[0]
+      nvals = nrows * ncols
+      splits = math_ops.range(nrows + 1) * ncols
+      values_shape = array_ops.concat([[nvals], input_shape[2:]], axis=0)
+      values = array_ops.reshape(tensor, values_shape)
+      return cls.from_row_splits(values, splits)
+
+  def to_tensor(self, default_value=None, name=None):
+    """Converts this `RaggedTensor` into a `tf.Tensor`.
+
+    Example:
+
+    ```python
+    >>> rt = ragged.constant([[9, 8, 7], [], [6, 5], [4]])
+    >>> print rt.to_tensor()
+    [[9 8 7]
+     [0 0 0]
+     [6 5 0]
+     [4 0 0]]
+    ```
+
+    Args:
+      default_value: Value to set for indices not specified in `self`. Defaults
+        to zero.  `default_value` must be broadcastable to
+        `self.shape[self.ragged_rank + 1:]`.
+      name: A name prefix for the returned tensors (optional).
+
+    Returns:
+      A `Tensor` with shape `ragged.bounding_shape(self)` and the
+      values specified by the non-empty values in `self`.  Empty values are
+      assigned `default_value`.
+    """
+    with ops.name_scope(name, "RaggedToTensor", [self, default_value]):
+      if default_value is not None:
+        default_value = ops.convert_to_tensor(
+            default_value, name="default_value", dtype=self.dtype)
+
+      # If ragged_rank > 1, then recursively convert the ragged values into a
+      # `Tensor` before we proceed.
+      values = self.values
+      if is_ragged(values):
+        values = values.to_tensor(default_value)
+
+      # Tile the default value, if necessary.
+      if default_value is not None:
+        if values.shape.ndims is not None:
+          default_value.shape.with_rank_at_most(values.shape.ndims - 1)
+        if (values.shape.ndims is None or default_value.shape.ndims is None or
+            values.shape.ndims != default_value.shape.ndims + 1):
+          value_shape = array_ops.shape(values)[1:]
+          default_value = array_ops.broadcast_to(default_value, value_shape)
+        default_value.shape.assert_is_compatible_with(values.shape[1:])
+
+      # Get the expected dense shape ([nrows, ncols] + value_shape).
+      rt_row_lengths = [self.row_splits[1:] - self.row_splits[:-1]]
+      nrows = array_ops.shape(self.row_splits, out_type=dtypes.int64)[0] - 1
+      ncols = math_ops.maximum(math_ops.reduce_max(rt_row_lengths), 0)
+      values_shape = array_ops.shape(values, out_type=dtypes.int64)
+      value_shape = values_shape[1:]
+      nvals = values_shape[0]
+
+      # Build a default value if none was supplied.
+      if default_value is None:
+        default_value = array_ops.zeros(value_shape, dtype=values.dtype)
+      default_value.shape.assert_is_compatible_with(values.shape[1:])
+      default_value.set_shape(values.shape[1:])
+
+      # Get the row start indices, and expand to shape=[nrows, 1].
+      starts = array_ops.expand_dims(self.row_splits[:-1], 1)
+
+      # Get the row limit indices, and expand to shape=[nrows, 1].
+      limits = array_ops.expand_dims(self.row_splits[1:], 1)
+
+      # Get the column indices, and expand to shape=[1, ncols].
+      columns = array_ops.expand_dims(math_ops.range(0, ncols), 0)
+
+      # Build a list containing the values plus the default value.  We will use
+      # tf.gather to collect values from this list for the `Tensor` (using
+      # nvals as the index for the default value).
+      values_and_default = array_ops.concat(
+          [values, array_ops.stack([default_value])], axis=0)
+
+      # Construct a matrix "indices" pointing into values_and_default.  I.e.,
+      # output[r, c] = values_and_default[indices[r, c].
+      nondefault_index = starts + columns
+      has_value = nondefault_index < limits
+      default_index = array_ops.fill(array_ops.stack([nrows, ncols]), nvals)
+      indices = array_ops.where(has_value, nondefault_index, default_index)
+
+      # Gather the results into a `Tensor`.
+      return array_ops.gather(values_and_default, indices)
+
+  @classmethod
+  def from_sparse(cls, st_input, name=None):
+    """Converts a 2D `tf.SparseTensor` to a `RaggedTensor`.
+
+    Each row of the `output` `RaggedTensor` will contain the explicit values
+    from the same row in `st_input`.  `st_input` must be ragged-right.  If not
+    it is not ragged-right, then an error will be generated.
+
+    Example:
+
+    ```python
+    >>> st = SparseTensor(indices=[[0, 1], [0, 2], [0, 3], [1, 0], [3, 0]],
+    ...                   values=[1, 2, 3, 4, 5],
+    ...                   dense_shape=[4, 3])
+    >>> rt.RaggedTensor.from_sparse(st).eval().tolist()
+    [[1, 2, 3], [4], [], [5]]
+    ```
+
+    Currently, only two-dimensional `SparseTensors` are supported.
+
+    Args:
+      st_input: The sparse tensor to convert.  Must have rank 2.
+      name: A name prefix for the returned tensors (optional).
+
+    Returns:
+      A `RaggedTensor` with the same values as `st_input`.
+      `output.ragged_rank = rank(st_input) - 1`.
+      `output.shape = [st_input.dense_shape[0], None]`.
+    Raises:
+      ValueError: If the number of dimensions in `st_input` is not known
+        statically, or is not two.
+    """
+    if not sparse_tensor.is_sparse(st_input):
+      raise TypeError("Expected SparseTensor, got %s" % type(st_input).__name__)
+    with ops.name_scope(name, "RaggedFromSparse", [st_input]):
+      st_input = sparse_tensor.convert_to_tensor_or_sparse_tensor(
+          st_input, name="st_input")
+
+      if st_input.dense_shape.shape.ndims is None:
+        static_rank_from_dense_shape = None
+      else:
+        static_rank_from_dense_shape = st_input.dense_shape.shape.dims[0].value
+
+      if st_input.indices.shape.ndims is None:
+        static_rank_from_indices = None
+      else:
+        static_rank_from_indices = st_input.indices.shape.dims[1].value
+
+      if static_rank_from_dense_shape != 2 and static_rank_from_indices != 2:
+        raise ValueError("rank(st_input) must be 2")
+
+      with ops.control_dependencies(
+          _assert_sparse_indices_are_ragged_right(st_input.indices)):
+        # Treat sparse row indices as segment ids to generate a splits tensor
+        # thta we can pair with the sparse tensor values.  (Ignore sparse column
+        # indices.)
+        segment_ids = st_input.indices[:, 0]
+        num_segments = st_input.dense_shape[0]
+        return cls.from_value_rowids(st_input.values, segment_ids, num_segments)
+
+  def to_sparse(self, name=None):
+    """Converts this `RaggedTensor` into a `tf.SparseTensor`.
+
+    Example:
+
+    ```python
+    >>> rt = ragged.constant([[1, 2, 3], [4], [], [5, 6]])
+    >>> rt.to_sparse().eval()
+    SparseTensorValue(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]],
+                      values=[1, 2, 3, 4, 5, 6],
+                      dense_shape=[4, 3])
+    ```
+
+    Args:
+      name: A name prefix for the returned tensors (optional).
+
+    Returns:
+      A SparseTensor with the same values as `self`.
+    """
+    with ops.name_scope(name, "RaggedToSparse", [self]):
+      result = gen_ragged_conversion_ops.ragged_tensor_to_sparse(
+          self.nested_row_splits, self.flat_values, name=name)
+      return sparse_tensor.SparseTensor(result.sparse_indices,
+                                        result.sparse_values,
+                                        result.sparse_dense_shape)
 
   #=============================================================================
   # String Encoding
   #=============================================================================
   def __str__(self):
     if self._is_eager():
-      return "RaggedTensor(%s)" % self.tolist()
+      return "<tf.RaggedTensor %s>" % self.to_list()
     else:
       return self.__repr__()
 
   def __repr__(self):
-    return "RaggedTensor(values=%s, row_splits=%s)" % (self._values,
-                                                       self._row_splits)
+    return "tf.RaggedTensor(values=%s, row_splits=%s)" % (self._values,
+                                                          self._row_splits)
 
   #=============================================================================
   # Eager Execution Mode
   #=============================================================================
 
-  def tolist(self):
+  def to_list(self):
     """Returns a nested Python `list` with the values for this `RaggedTensor`.
 
-    If a `RaggedTensor` `rt` was constructed in graph execution mode, then
-    `rt.tolist()` is equivalent to `rt.eval().tolist()`.
-
-    If a `RaggedTensor` `rt` was constructed in eager execution mode, then
-    `rt.tolist()` builds the Python list based on `rt`'s `EagerTensor`
-    components.
+    Requires that `rt` was constructed in eager execution mode.
 
     Returns:
       A nested Python `list`.
     """
     if self._is_eager():
-      return self._eager_value().tolist()
+      return self._eager_value().to_list()
     else:
-      return self.eval().tolist()
+      raise ValueError("RaggedTensor.to_list() is only supported in eager "
+                       "mode; in graph mode, evaluate the RaggedTensor first "
+                       "and then use RaggedTensorValue.to_list().")
 
   def _eager_value(self):
     """Returns a RaggedTensorValue for self.  Requires self._is_eager()=true."""
-    value = self.inner_values.numpy()
+    value = self.flat_values.numpy()
     for row_splits in reversed(self.nested_row_splits):
       value = ragged_tensor_value.RaggedTensorValue(value, row_splits.numpy())
     return value
@@ -562,24 +1409,6 @@ class RaggedTensor(object):
       rt = rt.values
     return isinstance(rt, ops.EagerTensor)
 
-  #=============================================================================
-  # Evaluation
-  #=============================================================================
-  def eval(self, feed_dict=None, session=None):  # pylint: disable=redefined-outer-name
-    """Evaluates this ragged tensor in a `Session`.
-
-    Args:
-      feed_dict: A dictionary that maps `Tensor` objects to feed values. See
-        `tf.Session.run` for a description of the valid feed values.
-      session: The `Session` to be used to evaluate this ragged tensor. If none,
-        the default session will be used.
-
-    Returns:
-      A `RaggedTensorValue` object.
-    """
-    return _eval_using_default_session(self, feed_dict,
-                                       self._as_graph_element().graph, session)
-
   #=============================================================================
   # Indexing & Slicing
   #=============================================================================
@@ -613,6 +1442,53 @@ def is_ragged(value):
                     (RaggedTensor, ragged_tensor_value.RaggedTensorValue))
 
 
+#===============================================================================
+# Convert value -> tensor
+#===============================================================================
+def convert_to_tensor_or_ragged_tensor(value,
+                                       dtype=None,
+                                       preferred_dtype=None,
+                                       name=None):
+  """Converts value to a `RaggedTensor` or `Tensor`.
+
+  * If `value` is a `RaggedTensor`, then return it as-is.
+  * If `value` is a `RaggedTensorValue`, return a corresponding constant
+    `RaggedTensor`.
+  * Otherwise, use `convert_to_tensor` to convert `value` to a `Tensor`.
+
+  Args:
+    value: A `RaggedTensor`, a `RaggedTensorValue`, or an object whose type has
+      a registered `Tensor` conversion function.
+    dtype: Optional element type for the returned tensor.  If missing the type
+      is inferred from the type of `value`.
+    preferred_dtype: Optional element type for the returned tensor, used when
+      dtype is None.  This argument has no effect if `value` is already a
+      tensor, or when conversion is not possible.
+    name: Optional name to use if a new `Tensor` is created.
+
+  Returns:
+    A `Tensor` or `RaggedTensor`.
+  """
+  if isinstance(value, RaggedTensor):
+    if dtype and not dtype.is_compatible_with(value.dtype):
+      raise ValueError("Tensor conversion requested dtype %s for "
+                       "RaggedTensor with dtype %s: %r" %
+                       (dtype.name, value.dtype.name, value))
+    return value
+  elif isinstance(value, ragged_tensor_value.RaggedTensorValue):
+    with ops.name_scope(name, "ConvertToTensorOrRaggedTensor", []):
+      flat_values = ops.convert_to_tensor(
+          value=value.flat_values,
+          dtype=dtype,
+          preferred_dtype=preferred_dtype,
+          name="flat_values")
+      return RaggedTensor.from_nested_row_splits(flat_values,
+                                                 value.nested_row_splits)
+  else:
+    return ops.convert_to_tensor(
+        value=value, dtype=dtype, preferred_dtype=preferred_dtype, name=name)
+
+
 #===============================================================================
 # Register RaggedTensor for use with session.run.
 #===============================================================================
@@ -625,18 +1501,18 @@ def _ragged_tensor_value_from_components(components):
 
 
 def _ragged_tensor_session_fetch(rt):
-  components = rt.nested_row_splits + (rt.inner_values,)
+  components = rt.nested_row_splits + (rt.flat_values,)
   return (components, _ragged_tensor_value_from_components)
 
 
 def _ragged_tensor_session_feed(feed_key, feed_val):
-  key_components = feed_key.nested_row_splits + (feed_key.inner_values,)
-  val_components = feed_val.nested_row_splits + (feed_val.inner_values,)
+  key_components = feed_key.nested_row_splits + (feed_key.flat_values,)
+  val_components = feed_val.nested_row_splits + (feed_val.flat_values,)
   return zip(key_components, val_components)
 
 
 def _ragged_tensor_session_feed_for_partial_run(feed_key):
-  return feed_key.nested_row_splits + (feed_key.inner_values,)
+  return feed_key.nested_row_splits + (feed_key.flat_values,)
 
 
 session.register_session_run_conversion_functions(
@@ -644,6 +1520,9 @@ session.register_session_run_conversion_functions(
     _ragged_tensor_session_feed_for_partial_run)
 
 
+#===============================================================================
+# RaggedTensorType
+#===============================================================================
 class RaggedTensorType(object):
   """Encoding of a static type for a `RaggedTensor`.
 
@@ -663,3 +1542,67 @@ class RaggedTensorType(object):
 
   dtype = property(lambda self: self._dtype)
   ragged_rank = property(lambda self: self._ragged_rank)
+
+
+#===============================================================================
+# Helper Functions
+#===============================================================================
+def _assert_sparse_indices_are_ragged_right(indices):
+  """Checks that the given SparseTensor.indices tensor is ragged-right.
+
+  Example: `indices = [[0, 0], [0, 1], [2, 0], [3, 1]]` is not ragged right
+  because the entry `[3, 1]` skips a cell.
+
+  Args:
+    indices: The SparseTensor indices to check.
+
+  Returns:
+    A list of control dependency op tensors.
+  """
+  index_prefix = indices[:, :-1]
+  index_suffix = indices[:, -1]
+
+  # Check whether each index is starting a new row in the innermost dimension
+  # (prefix[i] != prefix[i-1]) or continuing a row (prefix[i] == prefix[i-1]).
+  # (Note: this skips the first index; we will check that separately below.)
+  index_prefix_changed = math_ops.reduce_any(
+      math_ops.not_equal(index_prefix[1:], index_prefix[:-1]), axis=1)
+
+  # Check two cases:
+  #   * For indices that start a new row: index_suffix[i] must be zero.
+  #   * For indices that continue a row: index_suffix[i] must be equal to
+  #     index_suffix[i-1]+1.
+  index_ok = array_ops.where(
+      index_prefix_changed, math_ops.equal(index_suffix[1:], 0),
+      math_ops.equal(index_suffix[1:], index_suffix[:-1] + 1))
+
+  # Also check that the very first index didn't skip any cells.  The first
+  # index starts a new row (by definition), so its suffix should be zero.
+  sparse_indices_are_ragged_right = math_ops.logical_and(
+      math_ops.reduce_all(math_ops.equal(index_suffix[:1], 0)),
+      math_ops.reduce_all(index_ok))
+
+  message = [
+      "SparseTensor is not right-ragged", "SparseTensor.indices =", indices
+  ]
+  return [control_flow_ops.Assert(sparse_indices_are_ragged_right, message)]
+
+
+@ops.RegisterGradient("RaggedTensorToSparse")
+def _ragged_tensor_to_sparse_gradient(op, unused_sparse_indices_grad,
+                                      sparse_values_grad,
+                                      unused_sparse_shape_grad):
+  """Gradient for RaggedTensorToSparse."""
+  op_inputs_nested_row_splits = op.inputs[:-1]
+  op_inputs_flat_values = op.inputs[-1]
+
+  # No gradient for the RaggedTensor's nested_row_splits.
+  nested_row_splits_gradient = [None] * len(op_inputs_nested_row_splits)
+
+  # Gradient for the RaggedTensor's flat_values is formed by reshaping
+  # the gradient for the SparseTensor's values.
+  flat_values_shape = array_ops.shape(op_inputs_flat_values)
+  flat_values_gradient = array_ops.reshape(sparse_values_grad,
+                                           flat_values_shape)
+
+  return nested_row_splits_gradient + [flat_values_gradient]
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py b/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
index befe30f0e1..4e6ebdf332 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
@@ -20,47 +20,42 @@ from __future__ import print_function
 
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedTensorBoundingShapeOp(test_util.TensorFlowTestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase):
 
   def testDocStringExample(self):
     # This is the example from ragged.bounding_shape.__doc__.
     rt = ragged.constant([[1, 2, 3, 4], [5], [], [6, 7, 8, 9], [10]])
-    self.assertEqual(self.evaluate(ragged.bounding_shape(rt)).tolist(), [5, 4])
+    self.assertRaggedEqual(rt.bounding_shape(), [5, 4])
 
   def test2DRaggedTensorWithOneRaggedDimension(self):
     values = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
-    rt1 = ragged.from_row_splits(values, [0, 2, 5, 6, 6, 7])
-    rt2 = ragged.from_row_splits(values, [0, 7])
-    rt3 = ragged.from_row_splits(values, [0, 0, 7, 7])
-    self.assertEqual(self.evaluate(ragged.bounding_shape(rt1)).tolist(), [5, 3])
-    self.assertEqual(self.evaluate(ragged.bounding_shape(rt2)).tolist(), [1, 7])
-    self.assertEqual(self.evaluate(ragged.bounding_shape(rt3)).tolist(), [3, 7])
+    rt1 = ragged.RaggedTensor.from_row_splits(values, [0, 2, 5, 6, 6, 7])
+    rt2 = ragged.RaggedTensor.from_row_splits(values, [0, 7])
+    rt3 = ragged.RaggedTensor.from_row_splits(values, [0, 0, 7, 7])
+    self.assertRaggedEqual(rt1.bounding_shape(), [5, 3])
+    self.assertRaggedEqual(rt2.bounding_shape(), [1, 7])
+    self.assertRaggedEqual(rt3.bounding_shape(), [3, 7])
 
   def test3DRaggedTensorWithOneRaggedDimension(self):
     values = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]]
-    rt1 = ragged.from_row_splits(values, [0, 2, 5, 6, 6, 7])
-    rt2 = ragged.from_row_splits(values, [0, 7])
-    rt3 = ragged.from_row_splits(values, [0, 0, 7, 7])
-    self.assertEqual(
-        self.evaluate(ragged.bounding_shape(rt1)).tolist(), [5, 3, 2])
-    self.assertEqual(
-        self.evaluate(ragged.bounding_shape(rt2)).tolist(), [1, 7, 2])
-    self.assertEqual(
-        self.evaluate(ragged.bounding_shape(rt3)).tolist(), [3, 7, 2])
-
-  def testNonRaggedTensor(self):
-    dt = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
-    self.assertEqual(self.evaluate(ragged.bounding_shape(dt)).tolist(), [4, 3])
+    rt1 = ragged.RaggedTensor.from_row_splits(values, [0, 2, 5, 6, 6, 7])
+    rt2 = ragged.RaggedTensor.from_row_splits(values, [0, 7])
+    rt3 = ragged.RaggedTensor.from_row_splits(values, [0, 0, 7, 7])
+    self.assertRaggedEqual(rt1.bounding_shape(), [5, 3, 2])
+    self.assertRaggedEqual(rt2.bounding_shape(), [1, 7, 2])
+    self.assertRaggedEqual(rt3.bounding_shape(), [3, 7, 2])
 
   def testExplicitAxisOptimizations(self):
-    rt = ragged.from_row_splits(b'a b c d e f g'.split(), [0, 2, 5, 6, 6, 7])
-    self.assertEqual(self.evaluate(ragged.bounding_shape(rt, 0)).tolist(), 5)
-    self.assertEqual(self.evaluate(ragged.bounding_shape(rt, 1)).tolist(), 3)
-    self.assertEqual(
-        self.evaluate(ragged.bounding_shape(rt, [1, 0])).tolist(), [3, 5])
+    rt = ragged.RaggedTensor.from_row_splits(b'a b c d e f g'.split(),
+                                             [0, 2, 5, 6, 6, 7])
+    self.assertRaggedEqual(rt.bounding_shape(0), 5)
+    self.assertRaggedEqual(rt.bounding_shape(1), 3)
+    self.assertRaggedEqual(rt.bounding_shape([1, 0]), [3, 5])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_shape.py b/tensorflow/python/ops/ragged/ragged_tensor_shape.py
index 9129b4b10b..706881da74 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_shape.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_shape.py
@@ -21,13 +21,13 @@ from __future__ import print_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_array_ops
 from tensorflow.python.ops.ragged import ragged_conversion_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_util
 
@@ -55,7 +55,7 @@ class RaggedTensorDynamicShape(object):
       be ragged.
 
     * "Inner dimensions" are dimensions that are encoded using a
-      `RaggedTensor`'s `inner_values`.  Inner dimensions are always uniform.
+      `RaggedTensor`'s `flat_values`.  Inner dimensions are always uniform.
 
   The sizes of partitioned dimensions are recorded using `partitioned_dim_sizes`
   and `inner_dim_sizes`:
@@ -161,15 +161,15 @@ class RaggedTensorDynamicShape(object):
   def from_tensor(cls, rt_input):
     """Constructs a ragged shape for a potentially ragged tensor."""
     with ops.name_scope(None, 'RaggedTensorDynamicShapeFromTensor', [rt_input]):
-      rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(rt_input)
+      rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input)
       if not ragged_tensor.is_ragged(rt_input):
         return cls([], array_ops.shape(rt_input))
       else:
-        partitioned_dim_sizes = ((ragged_array_ops.nrows(rt_input),) +
-                                 ragged_array_ops.nested_row_lengths(rt_input))
+        partitioned_dim_sizes = (
+            (rt_input.nrows(),) + rt_input.nested_row_lengths())
         return RaggedTensorDynamicShape(
             partitioned_dim_sizes,
-            array_ops.shape(rt_input.inner_values)[1:])
+            array_ops.shape(rt_input.flat_values)[1:])
 
   def dimension_size(self, axis):
     """Returns the size of slices across the specified dimension."""
@@ -197,7 +197,7 @@ class RaggedTensorDynamicShape(object):
   @property
   def rank(self):
     """The number of dimensions in this shape, or None if unknown."""
-    inner_ndims = self._inner_dim_sizes.shape[0].value
+    inner_ndims = tensor_shape.dimension_value(self._inner_dim_sizes.shape[0])
     if inner_ndims is None:
       return None
     else:
@@ -229,7 +229,7 @@ class RaggedTensorDynamicShape(object):
   @property
   def num_inner_dimensions(self):
     """The number of inner dimensions, or `None` if not statically known."""
-    return self._inner_dim_sizes.shape[0].value
+    return tensor_shape.dimension_value(self._inner_dim_sizes.shape[0])
 
   def broadcast_to_rank(self, rank):
     """Adds leading size-1 dimensions to broadcast `self` to the given rank.
@@ -456,7 +456,7 @@ def broadcast_to(rt_input, shape, broadcast_inner_dimensions=True):
   """
   if not isinstance(shape, RaggedTensorDynamicShape):
     raise TypeError('shape must be a RaggedTensorDynamicShape')
-  rt_input = ragged_factory_ops.convert_to_tensor_or_ragged_tensor(rt_input)
+  rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(rt_input)
 
   # Broadcasting to a uniform shape.
   if shape.num_partitioned_dimensions == 0:
@@ -497,17 +497,20 @@ def _broadcast_to_ragged_shape(rt_input, dst_shape, broadcast_inner_dimensions):
       rt_input = array_ops.reshape(
           rt_input, array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0))
     for _ in range(dst_shape.rank - rt_input.shape.ndims):
-      rt_input = ragged_factory_ops.from_row_lengths(
-          rt_input, [ragged_array_ops.nrows(rt_input)])
+      if ragged_tensor.is_ragged(rt_input):
+        nrows = rt_input.nrows()
+      else:
+        nrows = array_ops.shape(rt_input, out_type=dtypes.int64)[0]
+      rt_input = ragged_tensor.RaggedTensor.from_row_lengths(rt_input, [nrows])
 
   # Add ragged dimensions to match dst_shape.
   if ragged_tensor.is_ragged(rt_input):
     inner_rank_diff = (
-        rt_input.inner_values.shape.ndims - 1 - dst_shape.num_inner_dimensions)
+        rt_input.flat_values.shape.ndims - 1 - dst_shape.num_inner_dimensions)
     if inner_rank_diff > 0:
-      rt_input = rt_input.with_inner_values(
+      rt_input = rt_input.with_flat_values(
           ragged_conversion_ops.from_tensor(
-              rt_input.inner_values, ragged_rank=inner_rank_diff))
+              rt_input.flat_values, ragged_rank=inner_rank_diff))
   else:
     rt_input = ragged_conversion_ops.from_tensor(
         rt_input, ragged_rank=dst_shape.num_partitioned_dimensions - 1)
@@ -528,9 +531,9 @@ def _broadcast_to_ragged_shape(rt_input, dst_shape, broadcast_inner_dimensions):
     rt_input = ragged_array_ops.tile(rt_input, multiples)
 
   if broadcast_inner_dimensions:
-    rt_input = rt_input.with_inner_values(
+    rt_input = rt_input.with_flat_values(
         array_ops.reshape(
-            rt_input.inner_values,
+            rt_input.flat_values,
             array_ops.concat([[-1], dst_shape.inner_dim_sizes], axis=0)))
 
   # Do broadcasting for dimensions that become ragged.  We must do these from
@@ -555,7 +558,7 @@ def _ragged_tile_axis(rt_input, axis, repeats):
         _ragged_tile_axis(rt_input.values, axis - 1, repeats))
   else:
     src_row_splits = rt_input.nested_row_splits
-    src_row_lengths = ragged_array_ops.nested_row_lengths(rt_input)
+    src_row_lengths = rt_input.nested_row_lengths()
     splits = src_row_splits[0]
 
     dst_row_lengths = [repeats]
@@ -563,8 +566,7 @@ def _ragged_tile_axis(rt_input, axis, repeats):
       dst_row_lengths.append(
           ragged_util.repeat_ranges(src_row_lengths[i], splits, repeats))
       splits = array_ops.gather(src_row_splits[i], splits)
-    dst_values = ragged_util.repeat_ranges(rt_input.inner_values, splits,
+    dst_values = ragged_util.repeat_ranges(rt_input.flat_values, splits,
                                            repeats)
-    return ragged_factory_ops.from_nested_row_lengths(dst_values,
-                                                      dst_row_lengths)
-
+    return ragged_tensor.RaggedTensor.from_nested_row_lengths(
+        dst_values, dst_row_lengths)
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py b/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
index 699dcc2bdb..ec06aeaea5 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
@@ -24,26 +24,27 @@ import numpy as np
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
-                            parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase,
+                                  parameterized.TestCase):
 
   def assertShapeEq(self, x, y):
     assert isinstance(x, ragged.RaggedTensorDynamicShape)
     assert isinstance(y, ragged.RaggedTensorDynamicShape)
     x_partitioned_dim_sizes = [
-        splits.eval().tolist()  #
+        self.eval_to_list(splits)  #
         for splits in x.partitioned_dim_sizes
     ]
     y_partitioned_dim_sizes = [
-        splits.eval().tolist()  #
+        self.eval_to_list(splits)  #
         for splits in y.partitioned_dim_sizes
     ]
     self.assertEqual(x_partitioned_dim_sizes, y_partitioned_dim_sizes)
-    self.assertEqual(x.inner_dim_sizes.eval().tolist(),
-                     y.inner_dim_sizes.eval().tolist())
+    self.assertAllEqual(x.inner_dim_sizes, y.inner_dim_sizes)
 
   @parameterized.parameters([
       dict(value='x', expected_dim_sizes=[]),
@@ -82,13 +83,11 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
           value=ragged.constant_value([[[1, 2], [3]], [[4, 5]]]),
           expected_dim_sizes=[2, [2, 1], [2, 1, 2]]),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testFromTensor(self, value, expected_dim_sizes):
     shape = ragged.RaggedTensorDynamicShape.from_tensor(value)
     expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(
         expected_dim_sizes)
-    with self.cached_session():
-      self.assertShapeEq(shape, expected)
+    self.assertShapeEq(shape, expected)
 
   @parameterized.parameters([
       dict(dim_sizes=[], rank=0, expected_dim_sizes=[]),
@@ -106,15 +105,13 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
           rank=5,
           expected_dim_sizes=[1, 3, [3, 2, 4], 2, 3]),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testBroadcastToRank(self, dim_sizes, rank, expected_dim_sizes):
     shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
     expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(
         expected_dim_sizes)
     broadcasted_shape = shape.broadcast_to_rank(rank)
-    with self.cached_session():
-      self.assertShapeEq(broadcasted_shape, expected)
-      self.assertEqual(broadcasted_shape.rank, rank)
+    self.assertShapeEq(broadcasted_shape, expected)
+    self.assertEqual(broadcasted_shape.rank, rank)
 
   @parameterized.parameters([
       #=========================================================================
@@ -283,7 +280,6 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
            original_dim_sizes=[2, (2, 1), 2, 1],
            broadcast_dim_sizes=[2, (2, 1), 2, (2, 1, 2, 1, 2, 1)]),
   ])  # pyformat: disable
-  @test_util.run_v1_only('b/120545219')
   def testBroadcastDimension(self, axis, row_length, original_dim_sizes,
                              broadcast_dim_sizes):
     """Tests for the broadcast_dimension method.
@@ -306,17 +302,16 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
     broadcast_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(
         broadcast_dim_sizes)
     self.assertEqual(original_shape.rank, broadcast_shape.rank)
-    with self.cached_session():
-      # shape[axis].value == 1 and row_length > 1:
-      bcast1 = original_shape.broadcast_dimension(axis, row_length)
-      # shape[axis].value > 1 and row_length == shape[axis].value:
-      bcast2 = broadcast_shape.broadcast_dimension(axis, row_length)
-      # shape[axis].value > 1 and row_length == 1:
-      bcast3 = broadcast_shape.broadcast_dimension(axis, 1)
-
-      self.assertShapeEq(bcast1, broadcast_shape)
-      self.assertShapeEq(bcast2, broadcast_shape)
-      self.assertShapeEq(bcast3, broadcast_shape)
+    # shape[axis].value == 1 and row_length > 1:
+    bcast1 = original_shape.broadcast_dimension(axis, row_length)
+    # shape[axis].value > 1 and row_length == shape[axis].value:
+    bcast2 = broadcast_shape.broadcast_dimension(axis, row_length)
+    # shape[axis].value > 1 and row_length == 1:
+    bcast3 = broadcast_shape.broadcast_dimension(axis, 1)
+
+    self.assertShapeEq(bcast1, broadcast_shape)
+    self.assertShapeEq(bcast2, broadcast_shape)
+    self.assertShapeEq(bcast3, broadcast_shape)
 
   @parameterized.parameters(
       [
@@ -373,16 +368,14 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
               y_dims=[1, 1, 2, (2, 1)],
               expected_dims=[2, (2, 1), 2, (2, 1, 2, 1, 2, 1)]),
       ])
-  @test_util.run_v1_only('b/120545219')
   def testBroadcastDynamicShape(self, x_dims, y_dims, expected_dims):
     x_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(x_dims)
     y_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(y_dims)
     expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(expected_dims)
     result1 = ragged.broadcast_dynamic_shape(x_shape, y_shape)
     result2 = ragged.broadcast_dynamic_shape(y_shape, x_shape)
-    with self.cached_session():
-      self.assertShapeEq(expected, result1)
-      self.assertShapeEq(expected, result2)
+    self.assertShapeEq(expected, result1)
+    self.assertShapeEq(expected, result2)
 
   def testRepr(self):
     shape = ragged.RaggedTensorDynamicShape.from_dim_sizes([2, (2, 1), 2, 1])
@@ -420,17 +413,12 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
           dim_sizes=[3, [3, 0, 2]],
           expected=ragged.constant_value([[10, 10, 10], [], [10, 10]])),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testRaggedBroadcastTo(self, x, dim_sizes, expected):
     shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
     result = ragged.broadcast_to(x, shape)
-    with self.cached_session():
-      self.assertEqual(
-          getattr(result, 'ragged_rank', 0), getattr(expected, 'ragged_rank',
-                                                     0))
-      if hasattr(expected, 'tolist'):
-        expected = expected.tolist()
-      self.assertEqual(result.eval().tolist(), expected)
+    self.assertEqual(
+        getattr(result, 'ragged_rank', 0), getattr(expected, 'ragged_rank', 0))
+    self.assertRaggedEqual(result, expected)
 
   @parameterized.parameters([
       dict(
@@ -475,7 +463,6 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
               [[[[11, 21], [32]], [[13, 23], [34]]],
                [[[15, 25], [36]]]])),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testRaggedAddWithBroadcasting(self, x, y, expected, doc):
     expected_rrank = getattr(expected, 'ragged_rank', 0)
     x = ragged.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
@@ -485,8 +472,7 @@ class RaggedTensorShapeTest(test_util.TensorFlowTestCase,
     self.assertEqual(expected_rrank, result_rrank)
     if hasattr(expected, 'tolist'):
       expected = expected.tolist()
-    with self.cached_session():
-      self.assertEqual(result.eval().tolist(), expected)
+    self.assertRaggedEqual(result, expected)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py
index e86676f70a..b8f1d97137 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py
@@ -19,17 +19,20 @@ from __future__ import division
 from __future__ import print_function
 
 import re
-import sys
 
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
+from tensorflow.python.ops.ragged import RaggedTensor
 from tensorflow.python.platform import googletest
 
 
@@ -103,63 +106,62 @@ EXAMPLE_RAGGED_TENSOR_4D = [
 EXAMPLE_RAGGED_TENSOR_4D_SPLITS1 = [0, 2, 2, 3, 4]
 EXAMPLE_RAGGED_TENSOR_4D_SPLITS2 = [0, 3, 6, 9, 10]
 EXAMPLE_RAGGED_TENSOR_4D_VALUES = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
-                                   [11, 12], [13, 14], [15, 16], [17,
-                                                                  18], [19, 20]]
+                                   [11, 12], [13, 14], [15, 16], [17, 18],
+                                   [19, 20]]
 
 
-class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
+                       parameterized.TestCase):
   longMessage = True  # Property in unittest.Testcase. pylint: disable=invalid-name
 
   #=============================================================================
   # RaggedTensor class docstring examples
   #=============================================================================
 
-  @test_util.run_deprecated_v1
   def testClassDocStringExamples(self):
     # From section: "Component Tensors"
-    rt = ragged.from_row_splits(
+    rt = RaggedTensor.from_row_splits(
         values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8])
-    self.assertEqual(
-        self.evaluate(rt).tolist(), [[3, 1, 4, 1], [], [5, 9, 2], [6], []])
+    self.assertRaggedEqual(rt, [[3, 1, 4, 1], [], [5, 9, 2], [6], []])
     del rt
 
     # From section: "Alternative Row-Partitioning Schemes"
     values = [3, 1, 4, 1, 5, 9, 2, 6]
-    rt1 = ragged.from_row_splits(values, row_splits=[0, 4, 4, 7, 8, 8])
-    rt2 = ragged.from_row_lengths(values, row_lengths=[4, 0, 3, 1, 0])
-    rt3 = ragged.from_value_rowids(
+    rt1 = RaggedTensor.from_row_splits(values, row_splits=[0, 4, 4, 7, 8, 8])
+    rt2 = RaggedTensor.from_row_lengths(values, row_lengths=[4, 0, 3, 1, 0])
+    rt3 = RaggedTensor.from_value_rowids(
         values, value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5)
-    rt4 = ragged.from_row_starts(values, row_starts=[0, 4, 4, 7, 8])
-    rt5 = ragged.from_row_limits(values, row_limits=[4, 4, 7, 8, 8])
+    rt4 = RaggedTensor.from_row_starts(values, row_starts=[0, 4, 4, 7, 8])
+    rt5 = RaggedTensor.from_row_limits(values, row_limits=[4, 4, 7, 8, 8])
     for rt in (rt1, rt2, rt3, rt4, rt5):
-      self.assertEqual(
-          self.evaluate(rt).tolist(), [[3, 1, 4, 1], [], [5, 9, 2], [6], []])
+      self.assertRaggedEqual(rt, [[3, 1, 4, 1], [], [5, 9, 2], [6], []])
     del rt1, rt2, rt3, rt4, rt5
 
     # From section: "Multiple Ragged Dimensions"
-    inner_rt = ragged.from_row_splits(
+    inner_rt = RaggedTensor.from_row_splits(
         values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8])
-    outer_rt = ragged.from_row_splits(values=inner_rt, row_splits=[0, 3, 3, 5])
+    outer_rt = RaggedTensor.from_row_splits(
+        values=inner_rt, row_splits=[0, 3, 3, 5])
     self.assertEqual(outer_rt.ragged_rank, 2)
     self.assertEqual(
-        self.evaluate(outer_rt).tolist(),
+        self.eval_to_list(outer_rt),
         [[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]])
     del inner_rt, outer_rt
 
     # From section: "Multiple Ragged Dimensions"
-    rt = ragged.from_nested_row_splits(
-        inner_values=[3, 1, 4, 1, 5, 9, 2, 6],
+    rt = RaggedTensor.from_nested_row_splits(
+        flat_values=[3, 1, 4, 1, 5, 9, 2, 6],
         nested_row_splits=([0, 3, 3, 5], [0, 4, 4, 7, 8, 8]))
     self.assertEqual(
-        self.evaluate(rt).tolist(),
-        [[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]])
+        self.eval_to_list(rt), [[[3, 1, 4, 1], [], [5, 9, 2]], [], [[6], []]])
     del rt
 
     # From section: "Uniform Inner Dimensions"
-    rt = ragged.from_row_splits(
+    rt = RaggedTensor.from_row_splits(
         values=array_ops.ones([5, 3]), row_splits=[0, 2, 5])
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1], [1, 1, 1]]])
     self.assertEqual(rt.shape.as_list(), [2, None, 3])
     del rt
@@ -181,7 +183,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertAllEqual(splits, rt_value.row_splits)
     self.assertAllEqual(values, rt_value.values)
     self.assertAllEqual(splits, rt_value.nested_row_splits[0])
-    self.assertAllEqual(values, rt_value.inner_values)
+    self.assertAllEqual(values, rt_value.flat_values)
 
     # Test construction of a RaggedTensorValue with ragged_rank=2.
     rt_value = ragged.RaggedTensorValue(
@@ -194,21 +196,19 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertAllEqual(splits2, rt_value.nested_row_splits[0])
     self.assertAllEqual(splits, rt_value.nested_row_splits[1])
     self.assertAllEqual(values, rt_value.values.values)
-    self.assertAllEqual(values, rt_value.inner_values)
+    self.assertAllEqual(values, rt_value.flat_values)
 
   #=============================================================================
   # RaggedTensor Constructor (private)
   #=============================================================================
 
-  @test_util.run_deprecated_v1
   def testRaggedTensorConstruction(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)
-    rt = ragged.RaggedTensor(
-        values=values, row_splits=row_splits, internal=True)
+    rt = RaggedTensor(values=values, row_splits=row_splits, internal=True)
 
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
   def testRaggedTensorConstructionErrors(self):
@@ -217,117 +217,118 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
     with self.assertRaisesRegexp(ValueError,
                                  'RaggedTensor constructor is private'):
-      ragged.RaggedTensor(values=values, row_splits=row_splits)
+      RaggedTensor(values=values, row_splits=row_splits)
 
     with self.assertRaisesRegexp(TypeError,
                                  'values must be a Tensor or RaggedTensor'):
-      ragged.RaggedTensor(values=range(7), row_splits=row_splits, internal=True)
+      RaggedTensor(values=range(7), row_splits=row_splits, internal=True)
 
     with self.assertRaisesRegexp(TypeError,
                                  'Row-partitioning argument must be a Tensor'):
-      ragged.RaggedTensor(
-          values=values, row_splits=[0, 2, 2, 5, 6, 7], internal=True)
+      RaggedTensor(values=values, row_splits=[0, 2, 2, 5, 6, 7], internal=True)
 
     with self.assertRaisesRegexp(ValueError,
                                  r'Shape \(6, 1\) must have rank 1'):
-      ragged.RaggedTensor(
+      RaggedTensor(
           values=values,
           row_splits=array_ops.expand_dims(row_splits, 1),
           internal=True)
 
     with self.assertRaisesRegexp(TypeError,
                                  'Cached value must be a Tensor or None.'):
-      ragged.RaggedTensor(values=values, row_splits=row_splits,
-                          cached_row_lengths=[2, 3, 4], internal=True)
+      RaggedTensor(
+          values=values,
+          row_splits=row_splits,
+          cached_row_lengths=[2, 3, 4],
+          internal=True)
 
 
 #=============================================================================
 # RaggedTensor Factory Ops
 #=============================================================================
 
-  @test_util.run_deprecated_v1
   def testFromValueRowIdsWithDerivedNRows(self):
     # nrows is known at graph creation time.
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
 
-    rt = ragged.from_value_rowids(values, value_rowids)
+    rt = RaggedTensor.from_value_rowids(values, value_rowids)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [5, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
-    rt_value_rowids = ragged.value_rowids(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_value_rowids = rt.value_rowids()
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
     self.assertIs(rt_value_rowids, value_rowids)  # cached_value_rowids
     self.assertAllEqual(rt_value_rowids, value_rowids)
-    self.assertEqual(self.evaluate(rt_nrows), 5)
+    self.assertEqual(self.eval_to_list(rt_nrows), 5)
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
-  @test_util.run_deprecated_v1
   def testFromValueRowIdsWithDerivedNRowsDynamic(self):
     # nrows is not known at graph creation time.
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
     value_rowids = array_ops.placeholder_with_default(value_rowids, shape=None)
 
-    rt = ragged.from_value_rowids(values, value_rowids)
+    rt = RaggedTensor.from_value_rowids(values, value_rowids)
     self.assertEqual(rt.dtype, dtypes.string)
-    self.assertEqual(rt.shape.as_list(), [None, None])
+    if context.executing_eagerly():
+      self.assertEqual(rt.shape.as_list(), [5, None])
+    else:
+      self.assertEqual(rt.shape.as_list(), [None, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
-    rt_value_rowids = ragged.value_rowids(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_value_rowids = rt.value_rowids()
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
     self.assertIs(rt_value_rowids, value_rowids)  # cached_value_rowids
     self.assertAllEqual(rt_value_rowids, value_rowids)
-    self.assertEqual(self.evaluate(rt_nrows), 5)
+    self.assertEqual(self.eval_to_list(rt_nrows), 5)
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
-  @test_util.run_deprecated_v1
   def testFromValueRowIdsWithExplicitNRows(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
     nrows = constant_op.constant(7, dtypes.int64)
 
-    rt = ragged.from_value_rowids(values, value_rowids, nrows)
+    rt = RaggedTensor.from_value_rowids(values, value_rowids, nrows)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [7, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
-    rt_value_rowids = ragged.value_rowids(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_value_rowids = rt.value_rowids()
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
     self.assertIs(rt_value_rowids, value_rowids)  # cached_value_rowids
     self.assertIs(rt_nrows, nrows)  # cached_nrows
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g'], [], []])
 
-  @test_util.run_deprecated_v1
   def testFromValueRowIdsWithExplicitNRowsEqualToDefault(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
     nrows = constant_op.constant(5, dtypes.int64)
 
-    rt = ragged.from_value_rowids(values, value_rowids, nrows)
+    rt = RaggedTensor.from_value_rowids(values, value_rowids, nrows)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [5, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
-    rt_value_rowids = ragged.value_rowids(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_value_rowids = rt.value_rowids()
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
     self.assertIs(rt_value_rowids, value_rowids)  # cached_value_rowids
@@ -335,112 +336,106 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertAllEqual(rt_value_rowids, value_rowids)
     self.assertAllEqual(rt_nrows, nrows)
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
-  @test_util.run_deprecated_v1
   def testFromValueRowIdsWithEmptyValues(self):
-    rt = ragged.from_value_rowids([], [])
-    rt_nrows = ragged.nrows(rt)
+    rt = RaggedTensor.from_value_rowids([], [])
+    rt_nrows = rt.nrows()
     self.assertEqual(rt.dtype, dtypes.float32)
     self.assertEqual(rt.shape.as_list(), [0, None])
     self.assertEqual(rt.ragged_rank, 1)
     self.assertEqual(rt.values.shape.as_list(), [0])
-    self.assertEqual(ragged.value_rowids(rt).shape.as_list(), [0])
-    self.assertEqual(self.evaluate(rt_nrows).tolist(), 0)
-    self.assertEqual(self.evaluate(rt).tolist(), [])
+    self.assertEqual(rt.value_rowids().shape.as_list(), [0])
+    self.assertEqual(self.eval_to_list(rt_nrows), 0)
+    self.assertEqual(self.eval_to_list(rt), [])
 
-  @test_util.run_deprecated_v1
   def testFromRowSplits(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)
 
-    rt = ragged.from_row_splits(values, row_splits)
+    rt = RaggedTensor.from_row_splits(values, row_splits)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [5, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
     rt_row_splits = rt.row_splits
-    rt_nrows = ragged.nrows(rt)
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
     self.assertIs(rt_row_splits, row_splits)
-    self.assertEqual(self.evaluate(rt_nrows), 5)
+    self.assertEqual(self.eval_to_list(rt_nrows), 5)
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
   def testFromRowSplitsWithEmptySplits(self):
     err_msg = 'row_splits tensor may not be empty'
     with self.assertRaisesRegexp(ValueError, err_msg):
-      ragged.from_row_splits([], [])
+      RaggedTensor.from_row_splits([], [])
 
-  @test_util.run_deprecated_v1
   def testFromRowStarts(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     row_starts = constant_op.constant([0, 2, 2, 5, 6], dtypes.int64)
 
-    rt = ragged.from_row_starts(values, row_starts)
+    rt = RaggedTensor.from_row_starts(values, row_starts)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [5, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
-    rt_row_starts = ragged.row_starts(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_row_starts = rt.row_starts()
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
-    self.assertEqual(self.evaluate(rt_nrows), 5)
+    self.assertEqual(self.eval_to_list(rt_nrows), 5)
     self.assertAllEqual(rt_row_starts, row_starts)
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
-  @test_util.run_deprecated_v1
   def testFromRowLimits(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     row_limits = constant_op.constant([2, 2, 5, 6, 7], dtypes.int64)
 
-    rt = ragged.from_row_limits(values, row_limits)
+    rt = RaggedTensor.from_row_limits(values, row_limits)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [5, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
-    rt_row_limits = ragged.row_limits(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_row_limits = rt.row_limits()
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
-    self.assertEqual(self.evaluate(rt_nrows), 5)
+    self.assertEqual(self.eval_to_list(rt_nrows), 5)
     self.assertAllEqual(rt_row_limits, row_limits)
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
-  @test_util.run_deprecated_v1
   def testFromRowLengths(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     row_lengths = constant_op.constant([2, 0, 3, 1, 1], dtypes.int64)
 
-    rt = ragged.from_row_lengths(values, row_lengths)
+    rt = RaggedTensor.from_row_lengths(values, row_lengths)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [5, None])
     self.assertEqual(rt.ragged_rank, 1)
 
     rt_values = rt.values
-    rt_row_lengths = ragged.row_lengths(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_row_lengths = rt.row_lengths()
+    rt_nrows = rt.nrows()
 
     self.assertIs(rt_values, values)
     self.assertIs(rt_row_lengths, row_lengths)  # cached_nrows
-    self.assertEqual(self.evaluate(rt_nrows), 5)
+    self.assertEqual(self.eval_to_list(rt_nrows), 5)
     self.assertAllEqual(rt_row_lengths, row_lengths)
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
 
-  @test_util.run_deprecated_v1
   def testFromNestedValueRowIdsWithDerivedNRows(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     nested_value_rowids = [
@@ -448,24 +443,23 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
     ]
 
-    rt = ragged.from_nested_value_rowids(values, nested_value_rowids)
+    rt = RaggedTensor.from_nested_value_rowids(values, nested_value_rowids)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [4, None, None])
     self.assertEqual(rt.ragged_rank, 2)
 
     rt_values = rt.values
-    rt_value_rowids = ragged.value_rowids(rt)
+    rt_value_rowids = rt.value_rowids()
     rt_values_values = rt_values.values
-    rt_values_value_rowids = ragged.value_rowids(rt_values)
+    rt_values_value_rowids = rt_values.value_rowids()
 
     self.assertIs(rt_values_values, values)
     self.assertAllEqual(rt_value_rowids, nested_value_rowids[0])
     self.assertAllEqual(rt_values_value_rowids, nested_value_rowids[1])
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[[b'a', b'b'], []], [[b'c', b'd', b'e']], [], [[b'f'], [b'g']]])
 
-  @test_util.run_deprecated_v1
   def testFromNestedValueRowIdsWithExplicitNRows(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     nested_value_rowids = [
@@ -477,17 +471,18 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         constant_op.constant(6, dtypes.int64)
     ]
 
-    rt = ragged.from_nested_value_rowids(values, nested_value_rowids, nrows)
+    rt = RaggedTensor.from_nested_value_rowids(values, nested_value_rowids,
+                                               nrows)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [6, None, None])
     self.assertEqual(rt.ragged_rank, 2)
 
     rt_values = rt.values
-    rt_value_rowids = ragged.value_rowids(rt)
-    rt_nrows = ragged.nrows(rt)
+    rt_value_rowids = rt.value_rowids()
+    rt_nrows = rt.nrows()
     rt_values_values = rt_values.values
-    rt_values_value_rowids = ragged.value_rowids(rt_values)
-    rt_values_nrows = ragged.nrows(rt_values)
+    rt_values_value_rowids = rt_values.value_rowids()
+    rt_values_nrows = rt_values.nrows()
 
     self.assertIs(rt_values_values, values)
     self.assertAllEqual(rt_value_rowids, nested_value_rowids[0])
@@ -495,9 +490,8 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertAllEqual(rt_nrows, nrows[0])
     self.assertAllEqual(rt_values_nrows, nrows[1])
     self.assertEqual(
-        self.evaluate(rt).tolist(),
-        [[[b'a', b'b'], []], [[b'c', b'd', b'e']], [], [[b'f'], [b'g'], []], [],
-         []])
+        self.eval_to_list(rt), [[[b'a', b'b'], []], [[b'c', b'd', b'e']], [],
+                                [[b'f'], [b'g'], []], [], []])
 
   def testFromNestedValueRowIdsWithExplicitNRowsMismatch(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
@@ -509,28 +503,26 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     with self.assertRaisesRegexp(
         ValueError, 'nested_nrows must have the same '
         'length as nested_value_rowids'):
-      ragged.from_nested_value_rowids(values, nested_value_rowids, nrows)
+      RaggedTensor.from_nested_value_rowids(values, nested_value_rowids, nrows)
 
   def testFromNestedValueRowIdsWithNonListInput(self):
     with self.assertRaisesRegexp(
         TypeError, 'nested_value_rowids must be a list of Tensors'):
-      ragged.from_nested_value_rowids([1, 2, 3],
-                                      constant_op.constant(
-                                          [[0, 1, 2], [0, 1, 2]], dtypes.int64))
+      RaggedTensor.from_nested_value_rowids(
+          [1, 2, 3], constant_op.constant([[0, 1, 2], [0, 1, 2]], dtypes.int64))
     with self.assertRaisesRegexp(TypeError,
                                  'nested_nrows must be a list of Tensors'):
-      ragged.from_nested_value_rowids([1, 2, 3], [[0, 1, 2], [0, 1, 2]],
-                                      constant_op.constant([3, 3]))
+      RaggedTensor.from_nested_value_rowids([1, 2, 3], [[0, 1, 2], [0, 1, 2]],
+                                            constant_op.constant([3, 3]))
 
-  @test_util.run_deprecated_v1
   def testFromNestedRowSplits(self):
-    inner_values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
+    flat_values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     nested_row_splits = [
         constant_op.constant([0, 2, 3, 3, 5], dtypes.int64),
         constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)
     ]
 
-    rt = ragged.from_nested_row_splits(inner_values, nested_row_splits)
+    rt = RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits)
     self.assertEqual(rt.dtype, dtypes.string)
     self.assertEqual(rt.shape.as_list(), [4, None, None])
     self.assertEqual(rt.ragged_rank, 2)
@@ -540,19 +532,18 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     rt_values_values = rt_values.values
     rt_values_row_splits = rt_values.row_splits
 
-    self.assertIs(rt_values_values, inner_values)
+    self.assertIs(rt_values_values, flat_values)
     self.assertIs(rt_row_splits, nested_row_splits[0])
     self.assertIs(rt_values_row_splits, nested_row_splits[1])
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[[b'a', b'b'], []], [[b'c', b'd', b'e']], [], [[b'f'], [b'g']]])
 
   def testFromNestedRowSplitsWithNonListInput(self):
     with self.assertRaisesRegexp(TypeError,
                                  'nested_row_splits must be a list of Tensors'):
-      ragged.from_nested_row_splits([1, 2],
-                                    constant_op.constant([[0, 1, 2], [0, 1, 2]],
-                                                         dtypes.int64))
+      RaggedTensor.from_nested_row_splits(
+          [1, 2], constant_op.constant([[0, 1, 2], [0, 1, 2]], dtypes.int64))
 
   def testFromValueRowIdsWithBadNRows(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
@@ -560,7 +551,7 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     nrows = constant_op.constant(5, dtypes.int64)
 
     with self.assertRaisesRegexp(ValueError, r'Expected nrows >= 0; got -2'):
-      ragged.from_value_rowids(
+      RaggedTensor.from_value_rowids(
           values=values,
           value_rowids=array_ops.placeholder_with_default(value_rowids, None),
           nrows=-2)
@@ -568,113 +559,94 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     with self.assertRaisesRegexp(
         ValueError, r'Expected nrows >= value_rowids\[-1\] \+ 1; got nrows=2, '
         r'value_rowids\[-1\]=4'):
-      ragged.from_value_rowids(
+      RaggedTensor.from_value_rowids(
           values=values, value_rowids=value_rowids, nrows=2)
 
     with self.assertRaisesRegexp(
         ValueError, r'Expected nrows >= value_rowids\[-1\] \+ 1; got nrows=4, '
         r'value_rowids\[-1\]=4'):
-      ragged.from_value_rowids(
+      RaggedTensor.from_value_rowids(
           values=values, value_rowids=value_rowids, nrows=4)
 
     with self.assertRaisesRegexp(ValueError,
                                  r'Shape \(7, 1\) must have rank 1'):
-      ragged.from_value_rowids(
+      RaggedTensor.from_value_rowids(
           values=values,
           value_rowids=array_ops.expand_dims(value_rowids, 1),
           nrows=nrows)
 
     with self.assertRaisesRegexp(ValueError, r'Shape \(1,\) must have rank 0'):
-      ragged.from_value_rowids(
+      RaggedTensor.from_value_rowids(
           values=values,
           value_rowids=value_rowids,
           nrows=array_ops.expand_dims(nrows, 0))
 
-  @test_util.run_deprecated_v1
   def testGraphMismatch(self):
-    with ops.Graph().as_default():
-      values = constant_op.constant([1, 2, 3])
-    with ops.Graph().as_default():
-      splits = constant_op.constant([0, 2, 3])
-    self.assertRaisesRegexp(ValueError, '.* must be from the same graph as .*',
-                            ragged.from_row_splits, values, splits)
+    if not context.executing_eagerly():
+      with ops.Graph().as_default():
+        values = constant_op.constant([1, 2, 3], dtypes.int64)
+      with ops.Graph().as_default():
+        splits = constant_op.constant([0, 2, 3], dtypes.int64)
+      self.assertRaisesRegexp(ValueError,
+                              '.* must be from the same graph as .*',
+                              RaggedTensor.from_row_splits, values, splits)
 
   #=============================================================================
   # Ragged Value & Row-Partitioning Tensor Accessors
   #=============================================================================
 
-  @test_util.run_deprecated_v1
   def testRaggedTensorAccessors_2d(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)
     value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
-    rt1 = ragged.from_row_splits(values, row_splits)
-    rt2 = ragged.from_value_rowids(values, value_rowids)
+    rt1 = RaggedTensor.from_row_splits(values, row_splits)
+    rt2 = RaggedTensor.from_value_rowids(values, value_rowids)
 
     for rt in [rt1, rt2]:
-      self.assertEqual(
-          self.evaluate(rt).tolist(),
-          [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
-      self.assertEqual(
-          self.evaluate(rt.values).tolist(),
-          [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
+      self.assertRaggedEqual(
+          rt, [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
+      self.assertAllEqual(rt.values, [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
       self.assertEqual(rt.values.shape.dims[0].value, 7)
-      self.assertEqual(
-          self.evaluate(ragged.value_rowids(rt)).tolist(),
-          [0, 0, 2, 2, 2, 3, 4])
-      self.assertEqual(self.evaluate(ragged.nrows(rt)).tolist(), 5)
-      self.assertEqual(
-          self.evaluate(rt.row_splits).tolist(), [0, 2, 2, 5, 6, 7])
-      self.assertEqual(
-          self.evaluate(ragged.row_starts(rt)).tolist(), [0, 2, 2, 5, 6])
-      self.assertEqual(
-          self.evaluate(ragged.row_limits(rt)).tolist(), [2, 2, 5, 6, 7])
-      self.assertEqual(
-          self.evaluate(ragged.row_lengths(rt)).tolist(), [2, 0, 3, 1, 1])
-      self.assertEqual(
-          self.evaluate(rt.inner_values).tolist(),
-          [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
-      self.assertEqual(
-          [self.evaluate(s).tolist() for s in rt.nested_row_splits],
-          [[0, 2, 2, 5, 6, 7]])
+      self.assertAllEqual(rt.value_rowids(), [0, 0, 2, 2, 2, 3, 4])
+      self.assertAllEqual(rt.nrows(), 5)
+      self.assertAllEqual(rt.row_splits, [0, 2, 2, 5, 6, 7])
+      self.assertAllEqual(rt.row_starts(), [0, 2, 2, 5, 6])
+      self.assertAllEqual(rt.row_limits(), [2, 2, 5, 6, 7])
+      self.assertAllEqual(rt.row_lengths(), [2, 0, 3, 1, 1])
+      self.assertAllEqual(rt.flat_values,
+                          [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
+      self.assertLen(rt.nested_row_splits, 1)
+      self.assertAllEqual(rt.nested_row_splits[0], [0, 2, 2, 5, 6, 7])
 
-  @test_util.run_deprecated_v1
   def testRaggedTensorAccessors_3d_with_ragged_rank_1(self):
     values = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]]
     row_splits = constant_op.constant([0, 2, 2, 5, 6, 7], dtypes.int64)
     value_rowids = constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
-    rt1 = ragged.from_row_splits(values, row_splits)
-    rt2 = ragged.from_value_rowids(values, value_rowids)
+    rt1 = RaggedTensor.from_row_splits(values, row_splits)
+    rt2 = RaggedTensor.from_value_rowids(values, value_rowids)
 
     for rt in [rt1, rt2]:
       self.assertEqual(
-          self.evaluate(rt).tolist(),
+          self.eval_to_list(rt),
           [[[0, 1], [2, 3]], [], [[4, 5], [6, 7], [8, 9]], [[10, 11]],
            [[12, 13]]])
       self.assertEqual(
-          self.evaluate(rt.values).tolist(),
+          self.eval_to_list(rt.values),
           [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]])
       self.assertEqual(rt.values.shape.dims[0].value, 7)
       self.assertEqual(
-          self.evaluate(ragged.value_rowids(rt)).tolist(),
-          [0, 0, 2, 2, 2, 3, 4])
-      self.assertEqual(self.evaluate(ragged.nrows(rt)).tolist(), 5)
-      self.assertEqual(
-          self.evaluate(rt.row_splits).tolist(), [0, 2, 2, 5, 6, 7])
-      self.assertEqual(
-          self.evaluate(ragged.row_starts(rt)).tolist(), [0, 2, 2, 5, 6])
-      self.assertEqual(
-          self.evaluate(ragged.row_limits(rt)).tolist(), [2, 2, 5, 6, 7])
+          self.eval_to_list(rt.value_rowids()), [0, 0, 2, 2, 2, 3, 4])
+      self.assertEqual(self.eval_to_list(rt.nrows()), 5)
+      self.assertEqual(self.eval_to_list(rt.row_splits), [0, 2, 2, 5, 6, 7])
+      self.assertEqual(self.eval_to_list(rt.row_starts()), [0, 2, 2, 5, 6])
+      self.assertEqual(self.eval_to_list(rt.row_limits()), [2, 2, 5, 6, 7])
+      self.assertEqual(self.eval_to_list(rt.row_lengths()), [2, 0, 3, 1, 1])
       self.assertEqual(
-          self.evaluate(ragged.row_lengths(rt)).tolist(), [2, 0, 3, 1, 1])
-      self.assertEqual(
-          self.evaluate(rt.inner_values).tolist(),
+          self.eval_to_list(rt.flat_values),
           [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]])
-      self.assertEqual(
-          [self.evaluate(s).tolist() for s in rt.nested_row_splits],
-          [[0, 2, 2, 5, 6, 7]])
+      self.assertEqual([self.eval_to_list(s) for s in rt.nested_row_splits],
+                       [[0, 2, 2, 5, 6, 7]])
 
-  @test_util.run_deprecated_v1
   def testRaggedTensorAccessors_3d_with_ragged_rank_2(self):
     values = constant_op.constant(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
     nested_row_splits = [
@@ -685,73 +657,59 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         constant_op.constant([0, 0, 1, 3, 3], dtypes.int64),
         constant_op.constant([0, 0, 2, 2, 2, 3, 4], dtypes.int64)
     ]
-    rt1 = ragged.from_nested_row_splits(values, nested_row_splits)
-    rt2 = ragged.from_nested_value_rowids(values, nested_value_rowids)
+    rt1 = RaggedTensor.from_nested_row_splits(values, nested_row_splits)
+    rt2 = RaggedTensor.from_nested_value_rowids(values, nested_value_rowids)
 
     for rt in [rt1, rt2]:
       self.assertEqual(
-          self.evaluate(rt).tolist(),
+          self.eval_to_list(rt),
           [[[b'a', b'b'], []], [[b'c', b'd', b'e']], [], [[b'f'], [b'g']]])
       self.assertEqual(
-          self.evaluate(rt.values).tolist(),
+          self.eval_to_list(rt.values),
           [[b'a', b'b'], [], [b'c', b'd', b'e'], [b'f'], [b'g']])
       self.assertEqual(rt.values.shape.dims[0].value, 5)
+      self.assertEqual(self.eval_to_list(rt.value_rowids()), [0, 0, 1, 3, 3])
+      self.assertEqual(self.eval_to_list(rt.nrows()), 4)
+      self.assertEqual(self.eval_to_list(rt.row_splits), [0, 2, 3, 3, 5])
+      self.assertEqual(self.eval_to_list(rt.row_starts()), [0, 2, 3, 3])
+      self.assertEqual(self.eval_to_list(rt.row_limits()), [2, 3, 3, 5])
+      self.assertEqual(self.eval_to_list(rt.row_lengths()), [2, 1, 0, 2])
       self.assertEqual(
-          self.evaluate(ragged.value_rowids(rt)).tolist(), [0, 0, 1, 3, 3])
-      self.assertEqual(self.evaluate(ragged.nrows(rt)).tolist(), 4)
-      self.assertEqual(self.evaluate(rt.row_splits).tolist(), [0, 2, 3, 3, 5])
-      self.assertEqual(
-          self.evaluate(ragged.row_starts(rt)).tolist(), [0, 2, 3, 3])
-      self.assertEqual(
-          self.evaluate(ragged.row_limits(rt)).tolist(), [2, 3, 3, 5])
-      self.assertEqual(
-          self.evaluate(ragged.row_lengths(rt)).tolist(), [2, 1, 0, 2])
-      self.assertEqual(
-          self.evaluate(rt.inner_values).tolist(),
+          self.eval_to_list(rt.flat_values),
           [b'a', b'b', b'c', b'd', b'e', b'f', b'g'])
-      self.assertEqual(
-          [self.evaluate(s).tolist() for s in rt.nested_row_splits],
-          [[0, 2, 3, 3, 5], [0, 2, 2, 5, 6, 7]])
-
-  def testNRowsWithTensorInput(self):
-    dt = constant_op.constant([[1, 2, 3], [4, 5, 6]])
-    nrows = ragged.nrows(dt)
-    self.assertEqual(self.evaluate(nrows), 2)
-
-  def testRowLengthsWithTensorInput(self):
-    dt = constant_op.constant([[1, 2, 3], [4, 5, 6]])
-    row_lengths = ragged.row_lengths(dt)
-    self.assertEqual(self.evaluate(row_lengths).tolist(), [3, 3])
+      self.assertEqual([self.eval_to_list(s) for s in rt.nested_row_splits],
+                       [[0, 2, 3, 3, 5], [0, 2, 2, 5, 6, 7]])
 
   #=============================================================================
   # RaggedTensor.shape
   #=============================================================================
 
-  @test_util.run_deprecated_v1
   def testShape(self):
     """Tests for RaggedTensor.shape."""
-    rt1 = ragged.from_row_splits(b'a b c d e f g'.split(), [0, 2, 5, 6, 6, 7])
+    rt1 = RaggedTensor.from_row_splits(b'a b c d e f g'.split(),
+                                       [0, 2, 5, 6, 6, 7])
     self.assertEqual(rt1.shape.as_list(), [5, None])
 
-    rt2 = ragged.from_row_splits(
+    rt2 = RaggedTensor.from_row_splits(
         [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14]],
         [0, 2, 5, 6, 6, 7])
     self.assertEqual(rt2.shape.as_list(), [5, None, 2])
 
-    rt3 = ragged.from_row_splits(
+    rt3 = RaggedTensor.from_row_splits(
         [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]], [0, 2, 2, 3])
     self.assertEqual(rt3.shape.as_list(), [3, None, 2, 2])
 
-    rt4 = ragged.from_row_splits(rt3, [0, 1, 3, 3])
+    rt4 = RaggedTensor.from_row_splits(rt3, [0, 1, 3, 3])
     self.assertEqual(rt4.shape.as_list(), [3, None, None, 2, 2])
 
-    rt5 = ragged.from_row_splits(
-        array_ops.placeholder(dtype=dtypes.string), [0, 2, 3, 5])
-    self.assertEqual(rt5.shape.ndims, None)
+    if not context.executing_eagerly():
+      rt5 = RaggedTensor.from_row_splits(
+          array_ops.placeholder(dtype=dtypes.string), [0, 2, 3, 5])
+      self.assertEqual(rt5.shape.ndims, None)
 
-    rt6 = ragged.from_row_splits([1, 2, 3],
-                                 array_ops.placeholder(dtype=dtypes.int64))
-    self.assertEqual(rt6.shape.as_list(), [None, None])
+      rt6 = RaggedTensor.from_row_splits(
+          [1, 2, 3], array_ops.placeholder(dtype=dtypes.int64))
+      self.assertEqual(rt6.shape.as_list(), [None, None])
 
   #=============================================================================
   # RaggedTensor.__getitem__
@@ -777,15 +735,9 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     """
     tensor_slice_spec1 = _make_tensor_slice_spec(slice_spec, True)
     tensor_slice_spec2 = _make_tensor_slice_spec(slice_spec, False)
-    value1 = self.evaluate(rt.__getitem__(slice_spec))
-    value2 = self.evaluate(rt.__getitem__(tensor_slice_spec1))
-    value3 = self.evaluate(rt.__getitem__(tensor_slice_spec2))
-    if hasattr(value1, 'tolist'):
-      value1 = value1.tolist()
-    if hasattr(value2, 'tolist'):
-      value2 = value2.tolist()
-    if hasattr(value3, 'tolist'):
-      value3 = value3.tolist()
+    value1 = self.eval_to_list(rt.__getitem__(slice_spec))
+    value2 = self.eval_to_list(rt.__getitem__(tensor_slice_spec1))
+    value3 = self.eval_to_list(rt.__getitem__(tensor_slice_spec2))
     self.assertEqual(value1, expected, 'slice_spec=%s' % (slice_spec,))
     self.assertEqual(value2, expected, 'slice_spec=%s' % (slice_spec,))
     self.assertEqual(value3, expected, 'slice_spec=%s' % (slice_spec,))
@@ -861,23 +813,26 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       (SLICE_BUILDER[:, -2:], [row[-2:] for row in EXAMPLE_RAGGED_TENSOR_2D]),
       # TODO(edloper): Add tests for strided slices, once support is added.
   )
-  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorGetItemWithRaggedRank1(self, slice_spec, expected):
     """Test that rt.__getitem__(slice_spec) == expected."""
     # Ragged tensor
-    rt = ragged.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES,
-                                EXAMPLE_RAGGED_TENSOR_2D_SPLITS)
+    rt = RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES,
+                                      EXAMPLE_RAGGED_TENSOR_2D_SPLITS)
 
-    self.assertEqual(self.evaluate(rt).tolist(), EXAMPLE_RAGGED_TENSOR_2D)
+    self.assertEqual(self.eval_to_list(rt), EXAMPLE_RAGGED_TENSOR_2D)
     self._TestGetItem(rt, slice_spec, expected)
 
   # pylint: disable=invalid-slice-index
   @parameterized.parameters(
       # Tests for out-of-bound errors
-      (SLICE_BUILDER[5], ValueError, '.*out of bounds.*'),
-      (SLICE_BUILDER[-6], ValueError, '.*out of bounds.*'),
-      (SLICE_BUILDER[0, 2], ValueError, '.*out of bounds.*'),
-      (SLICE_BUILDER[3, 0], ValueError, '.*out of bounds.*'),
+      (SLICE_BUILDER[5],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
+      (SLICE_BUILDER[-6],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
+      (SLICE_BUILDER[0, 2],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
+      (SLICE_BUILDER[3, 0],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
 
       # Indexing into an inner ragged dimension
       (SLICE_BUILDER[:, 3], ValueError,
@@ -889,8 +844,8 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
       # Tests for type errors
       (SLICE_BUILDER[0.5], TypeError, re.escape(array_ops._SLICE_TYPE_ERROR)),
-      (SLICE_BUILDER[1:3:0.5], TypeError,
-       re.escape(array_ops._SLICE_TYPE_ERROR)),
+      (SLICE_BUILDER[1:3:0.5], TypeError, re.escape(
+          array_ops._SLICE_TYPE_ERROR)),
       (SLICE_BUILDER[:, 1:3:0.5], TypeError,
        'slice strides must be integers or None'),
       (SLICE_BUILDER[:, 0.5:1.5], TypeError,
@@ -903,17 +858,14 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       (SLICE_BUILDER[..., 0, 0, 0], IndexError,
        'Too many indices for RaggedTensor'),
   )
-  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorGetItemErrorsWithRaggedRank1(self, slice_spec, expected,
                                                    message):
     """Test that rt.__getitem__(slice_spec) == expected."""
     # Ragged tensor
-    rt = ragged.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES,
-                                EXAMPLE_RAGGED_TENSOR_2D_SPLITS)
-    # if sys.version_info[0] == 3:
-    #   message = 'must be str, not int'
+    rt = RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES,
+                                      EXAMPLE_RAGGED_TENSOR_2D_SPLITS)
 
-    self.assertEqual(self.evaluate(rt).tolist(), EXAMPLE_RAGGED_TENSOR_2D)
+    self.assertEqual(self.eval_to_list(rt), EXAMPLE_RAGGED_TENSOR_2D)
     self._TestGetItemException(rt, slice_spec, expected, message)
 
   @parameterized.parameters(
@@ -982,13 +934,12 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       # TODO(edloper): Add tests slicing inner ragged dimensions, one support
       # is added.
   )
-  @test_util.run_v1_only('b/120545219')
   def testRaggedTensorGetItemWithRaggedRank2(self, slice_spec, expected):
     """Test that rt.__getitem__(slice_spec) == expected."""
-    rt = ragged.from_nested_row_splits(
+    rt = RaggedTensor.from_nested_row_splits(
         EXAMPLE_RAGGED_TENSOR_4D_VALUES,
         [EXAMPLE_RAGGED_TENSOR_4D_SPLITS1, EXAMPLE_RAGGED_TENSOR_4D_SPLITS2])
-    self.assertEqual(self.evaluate(rt).tolist(), EXAMPLE_RAGGED_TENSOR_4D)
+    self.assertEqual(self.eval_to_list(rt), EXAMPLE_RAGGED_TENSOR_4D)
     self._TestGetItem(rt, slice_spec, expected)
 
   @parameterized.parameters(
@@ -999,19 +950,22 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
        'Cannot index into an inner ragged dimension.'),
 
       # Test for out-of-bounds errors.
-      (SLICE_BUILDER[1, 0], ValueError, '.*out of bounds.*'),
-      (SLICE_BUILDER[0, 0, 3], ValueError, '.*out of bounds.*'),
-      (SLICE_BUILDER[5], ValueError, '.*out of bounds.*'),
-      (SLICE_BUILDER[0, 5], ValueError, '.*out of bounds.*'),
+      (SLICE_BUILDER[1, 0],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
+      (SLICE_BUILDER[0, 0, 3],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
+      (SLICE_BUILDER[5],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
+      (SLICE_BUILDER[0, 5],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
   )
-  @test_util.run_deprecated_v1
   def testRaggedTensorGetItemErrorsWithRaggedRank2(self, slice_spec, expected,
                                                    message):
     """Test that rt.__getitem__(slice_spec) == expected."""
-    rt = ragged.from_nested_row_splits(
+    rt = RaggedTensor.from_nested_row_splits(
         EXAMPLE_RAGGED_TENSOR_4D_VALUES,
         [EXAMPLE_RAGGED_TENSOR_4D_SPLITS1, EXAMPLE_RAGGED_TENSOR_4D_SPLITS2])
-    self.assertEqual(self.evaluate(rt).tolist(), EXAMPLE_RAGGED_TENSOR_4D)
+    self.assertEqual(self.eval_to_list(rt), EXAMPLE_RAGGED_TENSOR_4D)
     self._TestGetItemException(rt, slice_spec, expected, message)
 
   @parameterized.parameters(
@@ -1019,21 +973,21 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       (SLICE_BUILDER[2:], []),
       (SLICE_BUILDER[:-3], []),
   )
-  @test_util.run_deprecated_v1
   def testRaggedTensorGetItemWithEmptyTensor(self, slice_spec, expected):
     """Test that rt.__getitem__(slice_spec) == expected."""
-    rt = ragged.from_row_splits([], [0])
+    rt = RaggedTensor.from_row_splits([], [0])
     self._TestGetItem(rt, slice_spec, expected)
 
   @parameterized.parameters(
-      (SLICE_BUILDER[0], ValueError, '.*out of bounds.*'),
-      (SLICE_BUILDER[-1], ValueError, '.*out of bounds.*'),
+      (SLICE_BUILDER[0],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
+      (SLICE_BUILDER[-1],
+       (ValueError, errors.InvalidArgumentError), '.*out of bounds.*'),
   )
-  @test_util.run_deprecated_v1
   def testRaggedTensorGetItemErrorsWithEmptyTensor(self, slice_spec, expected,
                                                    message):
     """Test that rt.__getitem__(slice_spec) == expected."""
-    rt = ragged.from_row_splits([], [0])
+    rt = RaggedTensor.from_row_splits([], [0])
     self._TestGetItemException(rt, slice_spec, expected, message)
 
   @parameterized.parameters(
@@ -1045,7 +999,6 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       (SLICE_BUILDER[0, 1], EXAMPLE_RAGGED_TENSOR_2D[0][1]),
       (SLICE_BUILDER[-3, 0], EXAMPLE_RAGGED_TENSOR_2D[-3][0]),
   )
-  @test_util.run_deprecated_v1
   def testRaggedTensorGetItemWithPlaceholderShapes(self, slice_spec, expected):
     """Test that rt.__getitem__(slice_spec) == expected."""
     # Intentionally use an unknown shape for `splits`, to force the code path
@@ -1053,29 +1006,28 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     splits = constant_op.constant(
         EXAMPLE_RAGGED_TENSOR_2D_SPLITS, dtype=dtypes.int64)
     splits = array_ops.placeholder_with_default(splits, None)
-    rt = ragged.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES, splits)
-    self.assertEqual(self.evaluate(rt).tolist(), EXAMPLE_RAGGED_TENSOR_2D)
+    rt = RaggedTensor.from_row_splits(EXAMPLE_RAGGED_TENSOR_2D_VALUES, splits)
+    self.assertEqual(self.eval_to_list(rt), EXAMPLE_RAGGED_TENSOR_2D)
     self._TestGetItem(rt, slice_spec, expected)
 
   @parameterized.parameters(
       (SLICE_BUILDER[..., 2], ValueError,
        'Ellipsis not supported for unknown shape RaggedTensors'),)
-  @test_util.run_deprecated_v1
   def testRaggedTensorGetItemErrorsWithPlaceholderShapes(
       self, slice_spec, expected, message):
     """Test that rt.__getitem__(slice_spec) == expected."""
-    # Intentionally use an unknown shape for `values`.
-    values = array_ops.placeholder_with_default([0], None)
-    rt = ragged.from_row_splits(values, [0, 1])
-    self._TestGetItemException(rt, slice_spec, expected, message)
+    if not context.executing_eagerly():
+      # Intentionally use an unknown shape for `values`.
+      values = array_ops.placeholder_with_default([0], None)
+      rt = RaggedTensor.from_row_splits(values, [0, 1])
+      self._TestGetItemException(rt, slice_spec, expected, message)
 
-  @test_util.run_v1_only('b/120545219')
   def testGetItemNewAxis(self):
     # rt: [[[['a', 'b'], ['c', 'd']], [], [['e', 'f']]], []]
     splits1 = [0, 3, 3]
     splits2 = [0, 2, 2, 3]
     values = constant_op.constant([['a', 'b'], ['c', 'd'], ['e', 'f']])
-    rt = ragged.from_nested_row_splits(values, [splits1, splits2])
+    rt = RaggedTensor.from_nested_row_splits(values, [splits1, splits2])
     rt_newaxis0 = rt[array_ops.newaxis]
     rt_newaxis1 = rt[:, array_ops.newaxis]
     rt_newaxis2 = rt[:, :, array_ops.newaxis]
@@ -1083,22 +1035,22 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     rt_newaxis4 = rt[:, :, :, :, array_ops.newaxis]
 
     self.assertEqual(
-        self.evaluate(rt).tolist(),
+        self.eval_to_list(rt),
         [[[[b'a', b'b'], [b'c', b'd']], [], [[b'e', b'f']]], []])
     self.assertEqual(
-        self.evaluate(rt_newaxis0).tolist(),
+        self.eval_to_list(rt_newaxis0),
         [[[[[b'a', b'b'], [b'c', b'd']], [], [[b'e', b'f']]], []]])
     self.assertEqual(
-        self.evaluate(rt_newaxis1).tolist(),
+        self.eval_to_list(rt_newaxis1),
         [[[[[b'a', b'b'], [b'c', b'd']], [], [[b'e', b'f']]]], [[]]])
     self.assertEqual(
-        self.evaluate(rt_newaxis2).tolist(),
+        self.eval_to_list(rt_newaxis2),
         [[[[[b'a', b'b'], [b'c', b'd']]], [[]], [[[b'e', b'f']]]], []])
     self.assertEqual(
-        self.evaluate(rt_newaxis3).tolist(),
+        self.eval_to_list(rt_newaxis3),
         [[[[[b'a', b'b']], [[b'c', b'd']]], [], [[[b'e', b'f']]]], []])
     self.assertEqual(
-        self.evaluate(rt_newaxis4).tolist(),
+        self.eval_to_list(rt_newaxis4),
         [[[[[b'a'], [b'b']], [[b'c'], [b'd']]], [], [[[b'e'], [b'f']]]], []])
 
     self.assertEqual(rt.ragged_rank, 2)
@@ -1117,104 +1069,118 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
   #=============================================================================
   # RaggedTensor.__str__
   #=============================================================================
-  @test_util.run_deprecated_v1
   def testRaggedTensorStr(self):
-    rt1 = ragged.from_row_splits(b'a b c d e f g'.split(), [0, 2, 5, 6, 6, 7])
-    expected1 = ('RaggedTensor(values=Tensor("RaggedFromRowSplits/values:0", '
-                 'shape=(7,), dtype=string), row_splits='
-                 'Tensor("RaggedFromRowSplits/row_splits:0", '
-                 'shape=(6,), dtype=int64))')
-    self.assertEqual(str(rt1), expected1)
-    self.assertEqual(repr(rt1), expected1)
+    values = [b'a', b'b', b'c', b'd', b'e', b'f', b'g']
+    row_splits = [0, 2, 5, 6, 6, 7]
+    rt = RaggedTensor.from_row_splits(values, row_splits)
+    if context.executing_eagerly():
+      expected_str = '<tf.RaggedTensor {}>'.format([[b'a', b'b'],
+                                                    [b'c', b'd', b'e'], [b'f'],
+                                                    [], [b'g']])
+      expected_repr = (
+          'tf.RaggedTensor(values=tf.Tensor([{}], shape=(7,), dtype=string), '
+          'row_splits=tf.Tensor([{}], shape=(6,), dtype=int64))'.format(
+              ' '.join(repr(x) for x in values), ' '.join(
+                  repr(x) for x in row_splits)))
+      self.assertEqual(str(rt), expected_str)
+      self.assertEqual(repr(rt), expected_repr)
+    else:
+      expected_repr = (
+          'tf.RaggedTensor(values=Tensor("RaggedFromRowSplits/values:0", '
+          'shape=(7,), dtype=string), row_splits='
+          'Tensor("RaggedFromRowSplits/row_splits:0", '
+          'shape=(6,), dtype=int64))')
+      self.assertEqual(repr(rt), expected_repr)
+      self.assertEqual(str(rt), expected_repr)
 
   def testRaggedTensorValueStr(self):
+    values = [b'a', b'b', b'c', b'd', b'e', b'f', b'g']
+    row_splits = [0, 2, 5, 6, 6, 7]
     rt = ragged.RaggedTensorValue(
-        values=np.array(b'a b c d e f g'.split()),
-        row_splits=np.array([0, 2, 5, 6, 6, 7], dtype=np.int64))
-    if sys.version_info[0] == 2:
-      self.assertEqual(' '.join(str(rt).split()),
-                       (r"<RaggedTensorValue [['a', 'b'], ['c', 'd', 'e'], "
-                        "['f'], [], ['g']]>"))
-      self.assertEqual(
-          ' '.join(repr(rt).split()),
-          (r"RaggedTensorValue(values=array(['a', 'b', 'c', 'd', "
-           "'e', 'f', 'g'], dtype='|S1'), row_splits=array([0, 2, 5,"
-           ' 6, 6, 7]))'))
-    else:
-      self.assertEqual(
-          ' '.join(str(rt).split()),
-          (r"<RaggedTensorValue [[b'a', b'b'], [b'c', b'd', b'e'], "
-           "[b'f'], [], [b'g']]>"))
-      self.assertEqual(
-          ' '.join(repr(rt).split()),
-          (r"RaggedTensorValue(values=array([b'a', b'b', b'c', b'd', "
-           "b'e', b'f', b'g'], dtype='|S1'), row_splits=array([0, 2, 5,"
-           ' 6, 6, 7]))'))
+        np.array(values), np.array(row_splits, dtype=np.int64))
+    expected_str = '<tf.RaggedTensorValue {}>'.format([[b'a', b'b'],
+                                                       [b'c', b'd', b'e'],
+                                                       [b'f'], [], [b'g']])
+    expected_repr = ("tf.RaggedTensorValue(values=array({}, dtype='|S1'), "
+                     'row_splits=array({}))'.format(values, row_splits))
+    self.assertEqual(' '.join(str(rt).split()), expected_str)
+    self.assertEqual(' '.join(repr(rt).split()), expected_repr)
 
   #=============================================================================
-  # RaggedTensor.with_values() and RaggedTensor.with_inner_values().
+  # RaggedTensor.with_values() and RaggedTensor.with_flat_values().
   #=============================================================================
 
-  @test_util.run_v1_only('b/120545219')
   def testWithValues(self):
     rt1 = ragged.constant([[1, 2], [3, 4, 5], [6], [], [7]])
     rt2 = ragged.constant([[[1, 2], [3, 4, 5]], [[6]], [], [[], [7]]])
 
     rt1_plus_10 = rt1.with_values(rt1.values + 10)
-    rt2_times_10 = rt2.with_inner_values(rt2.inner_values * 10)
+    rt2_times_10 = rt2.with_flat_values(rt2.flat_values * 10)
     rt1_expanded = rt1.with_values(array_ops.expand_dims(rt1.values, axis=1))
 
     self.assertEqual(
-        self.evaluate(rt1_plus_10).tolist(),
+        self.eval_to_list(rt1_plus_10),
         [[11, 12], [13, 14, 15], [16], [], [17]])
     self.assertEqual(
-        self.evaluate(rt2_times_10).tolist(),
+        self.eval_to_list(rt2_times_10),
         [[[10, 20], [30, 40, 50]], [[60]], [], [[], [70]]])
     self.assertEqual(
-        self.evaluate(rt1_expanded).tolist(),
+        self.eval_to_list(rt1_expanded),
         [[[1], [2]], [[3], [4], [5]], [[6]], [], [[7]]])
 
   #=============================================================================
   # Session.run
   #=============================================================================
-  @test_util.run_deprecated_v1
   def testSessionRun(self):
+    if context.executing_eagerly():
+      return
+
     rt1 = ragged.constant([[1, 2, 3], [4]])
     rt2 = ragged.constant([[[], [1, 2]], [[3]]])
     with self.test_session() as session:
       result = session.run({'rt1': rt1, 'rt2': rt2})
       self.assertCountEqual(sorted(result.keys()), ['rt1', 'rt2'])
-      self.assertEqual(result['rt1'].tolist(), [[1, 2, 3], [4]])
-      self.assertEqual(result['rt2'].tolist(), [[[], [1, 2]], [[3]]])
+      self.assertEqual(result['rt1'].to_list(), [[1, 2, 3], [4]])
+      self.assertEqual(result['rt2'].to_list(), [[[], [1, 2]], [[3]]])
 
-  @test_util.run_deprecated_v1
   def testSessionRunFeed(self):
-    rt1 = ragged.from_row_splits(
+    if context.executing_eagerly():
+      return
+
+    rt1 = RaggedTensor.from_row_splits(
         array_ops.placeholder(dtypes.int32),
         array_ops.placeholder(dtypes.int64))
-    rt2 = ragged.from_nested_row_splits(
-        array_ops.placeholder(dtypes.int32),
-        [array_ops.placeholder(dtypes.int64),
-         array_ops.placeholder(dtypes.int64)])
+    rt2 = RaggedTensor.from_nested_row_splits(
+        array_ops.placeholder(dtypes.int32), [
+            array_ops.placeholder(dtypes.int64),
+            array_ops.placeholder(dtypes.int64)
+        ])
 
     rt1_feed_val = ragged.constant_value([[1, 2, 3], [4]])
     rt2_feed_val = ragged.constant_value([[[], [1, 2]], [[3]]])
 
     with self.test_session() as session:
-      result = session.run({'rt1': rt1, 'rt2': rt2},
-                           feed_dict={rt1: rt1_feed_val,
-                                      rt2: rt2_feed_val})
+      result = session.run({
+          'rt1': rt1,
+          'rt2': rt2
+      },
+                           feed_dict={
+                               rt1: rt1_feed_val,
+                               rt2: rt2_feed_val
+                           })
       self.assertCountEqual(sorted(result.keys()), ['rt1', 'rt2'])
-      self.assertEqual(result['rt1'].tolist(), [[1, 2, 3], [4]])
-      self.assertEqual(result['rt2'].tolist(), [[[], [1, 2]], [[3]]])
+      self.assertEqual(result['rt1'].to_list(), [[1, 2, 3], [4]])
+      self.assertEqual(result['rt2'].to_list(), [[[], [1, 2]], [[3]]])
 
-  @test_util.run_v1_only('b/120545219')
   def testSessionPartialRunFeed(self):
+    if context.executing_eagerly():
+      return
+
     # Placeholder inputs.
-    a = ragged.from_row_splits(
+    a = RaggedTensor.from_row_splits(
         array_ops.placeholder(dtypes.int32, shape=[None], name='a.values'),
         array_ops.placeholder(dtypes.int64, name='a.row_splits'))
-    b = ragged.from_row_splits(
+    b = RaggedTensor.from_row_splits(
         array_ops.placeholder(dtypes.int32, shape=[None], name='b.values'),
         array_ops.placeholder(dtypes.int64, name='b.row_splits'))
     c = array_ops.placeholder(dtypes.int32, shape=[], name='c')
@@ -1232,11 +1198,10 @@ class RaggedTensorTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       handle = session.partial_run_setup([r1, r2], [a, b, c])
 
       res1 = session.partial_run(handle, r1, feed_dict={a: a_val, b: b_val})
-      self.assertEqual(res1.tolist(), [22, 8])
+      self.assertAllEqual(res1, [22, 8])
 
       res2 = session.partial_run(handle, r2, feed_dict={c: c_val})
-      self.assertEqual(res2.tolist(), [15, 7])
-
+      self.assertAllEqual(res2, [15, 7])
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_value.py b/tensorflow/python/ops/ragged/ragged_tensor_value.py
index 39d3249c99..bf0ac4482a 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_value.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_value.py
@@ -53,7 +53,7 @@ class RaggedTensorValue(object):
       doc="""The numpy dtype of values in this tensor.""")
 
   @property
-  def inner_values(self):
+  def flat_values(self):
     """The innermost `values` array for this ragged tensor value."""
     rt_values = self.values
     while isinstance(rt_values, RaggedTensorValue):
@@ -82,15 +82,18 @@ class RaggedTensorValue(object):
     return (self._row_splits.shape[0] - 1,) + (None,) + self._values.shape[1:]
 
   def __str__(self):
-    return "<RaggedTensorValue %s>" % self.tolist()
+    return "<tf.RaggedTensorValue %s>" % self.to_list()
 
   def __repr__(self):
-    return "RaggedTensorValue(values=%r, row_splits=%r)" % (self._values,
-                                                            self._row_splits)
+    return "tf.RaggedTensorValue(values=%r, row_splits=%r)" % (self._values,
+                                                               self._row_splits)
 
-  def tolist(self):
+  def to_list(self):
     """Returns this ragged tensor value as a nested Python list."""
-    values_as_list = self._values.tolist()
+    if isinstance(self._values, RaggedTensorValue):
+      values_as_list = self._values.to_list()
+    else:
+      values_as_list = self._values.tolist()
     return [
         values_as_list[self._row_splits[i]:self._row_splits[i + 1]]
         for i in range(len(self._row_splits) - 1)
diff --git a/tensorflow/python/ops/ragged/ragged_test_util.py b/tensorflow/python/ops/ragged/ragged_test_util.py
new file mode 100644
index 0000000000..027417664d
--- /dev/null
+++ b/tensorflow/python/ops/ragged/ragged_test_util.py
@@ -0,0 +1,95 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# pylint: disable=invalid-name
+"""Test utils for tensorflow RaggedTensors."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import ragged
+
+
+class RaggedTensorTestCase(test_util.TensorFlowTestCase):
+  """Base class for RaggedTensor test cases."""
+
+  def _GetPyList(self, a):
+    """Converts a to a nested python list."""
+    if isinstance(a, ragged.RaggedTensor):
+      return self.evaluate(a).to_list()
+    elif isinstance(a, ops.Tensor):
+      a = self.evaluate(a)
+      return a.tolist() if isinstance(a, np.ndarray) else a
+    elif isinstance(a, np.ndarray):
+      return a.tolist()
+    elif isinstance(a, ragged.RaggedTensorValue):
+      return a.to_list()
+    else:
+      return np.array(a).tolist()
+
+  def assertRaggedEqual(self, a, b):
+    """Asserts that two potentially ragged tensors are equal."""
+    a_list = self._GetPyList(a)
+    b_list = self._GetPyList(b)
+    self.assertEqual(a_list, b_list)
+
+    if not (isinstance(a, (list, tuple)) or isinstance(b, (list, tuple))):
+      a_ragged_rank = a.ragged_rank if ragged.is_ragged(a) else 0
+      b_ragged_rank = b.ragged_rank if ragged.is_ragged(b) else 0
+      self.assertEqual(a_ragged_rank, b_ragged_rank)
+
+  def assertRaggedAlmostEqual(self, a, b, places=7):
+    a_list = self._GetPyList(a)
+    b_list = self._GetPyList(b)
+    self.assertNestedListAlmostEqual(a_list, b_list, places, context='value')
+
+    if not (isinstance(a, (list, tuple)) or isinstance(b, (list, tuple))):
+      a_ragged_rank = a.ragged_rank if ragged.is_ragged(a) else 0
+      b_ragged_rank = b.ragged_rank if ragged.is_ragged(b) else 0
+      self.assertEqual(a_ragged_rank, b_ragged_rank)
+
+  def assertNestedListAlmostEqual(self, a, b, places=7, context='value'):
+    self.assertEqual(type(a), type(b))
+    if isinstance(a, (list, tuple)):
+      self.assertLen(a, len(b), 'Length differs for %s' % context)
+      for i in range(len(a)):
+        self.assertNestedListAlmostEqual(a[i], b[i], places,
+                                         '%s[%s]' % (context, i))
+    else:
+      self.assertAlmostEqual(
+          a, b, places,
+          '%s != %s within %s places at %s' % (a, b, places, context))
+
+  def eval_to_list(self, tensor):
+    value = self.evaluate(tensor)
+    if ragged.is_ragged(value):
+      return value.to_list()
+    elif isinstance(value, np.ndarray):
+      return value.tolist()
+    else:
+      return value
+
+  def _eval_tensor(self, tensor):
+    if ragged.is_ragged(tensor):
+      return ragged.RaggedTensorValue(
+          self._eval_tensor(tensor.values),
+          self._eval_tensor(tensor.row_splits))
+    else:
+      return test_util.TensorFlowTestCase._eval_tensor(self, tensor)
diff --git a/tensorflow/python/ops/ragged/ragged_tile_op_test.py b/tensorflow/python/ops/ragged/ragged_tile_op_test.py
index f335b15dd1..d3445571bf 100644
--- a/tensorflow/python/ops/ragged/ragged_tile_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tile_op_test.py
@@ -24,12 +24,14 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops.ragged import ragged_array_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedTileOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTileOpTest(ragged_test_util.RaggedTensorTestCase,
+                       parameterized.TestCase):
 
   @parameterized.parameters([
       #=========================================================================
@@ -181,14 +183,13 @@ class RaggedTileOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
                     [[[5], [6]]]]),
 
   ])  # pyformat: disable
-  @test_util.run_deprecated_v1
   def testRaggedTile(self,
                      descr,
                      rt_input,
                      multiples,
                      expected,
                      ragged_rank=None):
-    rt = ragged_factory_ops.constant(rt_input, ragged_rank)
+    rt = ragged.constant(rt_input, ragged_rank)
 
     expected_shape = [
         None if dim is None else dim * multiple
@@ -202,24 +203,21 @@ class RaggedTileOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         const_multiples, shape=[len(multiples)])
 
     for multiples_tensor in (const_multiples, non_const_multiples):
-      tiled = ragged_array_ops.tile(rt, multiples_tensor)
+      tiled = ragged.tile(rt, multiples_tensor)
       self.assertEqual(tiled.ragged_rank, rt.ragged_rank)
       self.assertEqual(tiled.shape.ndims, rt.shape.ndims)
       if multiples_tensor is const_multiples:
         self.assertEqual(tiled.shape.as_list(), expected_shape)
-      with self.test_session():
-        self.assertEqual(tiled.eval().tolist(), expected)
+      self.assertRaggedEqual(tiled, expected)
 
-  @test_util.run_deprecated_v1
   def testRaggedTileWithTensorInput(self):
     # When the input is a `Tensor`, ragged_tile just delegates to tf.tile.
     dt = constant_op.constant([[1, 2], [3, 4]])
-    tiled = ragged_array_ops.tile(dt, [3, 2])
+    tiled = ragged.tile(dt, [3, 2])
     expected = [[1, 2, 1, 2], [3, 4, 3, 4],
                 [1, 2, 1, 2], [3, 4, 3, 4],
                 [1, 2, 1, 2], [3, 4, 3, 4]]  # pyformat: disable
-    with self.test_session():
-      self.assertEqual(tiled.eval().tolist(), expected)
+    self.assertRaggedEqual(tiled, expected)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py b/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
index 9863e3b583..46d7a56a7c 100644
--- a/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
@@ -25,176 +26,168 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedTensorToSparseOpTest(test_util.TensorFlowTestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
 
-  @test_util.run_deprecated_v1
   def testDocStringExample(self):
     rt = ragged.constant([[1, 2, 3], [4], [], [5, 6]])
-    st = ragged.to_sparse(rt)
-    expected = ('SparseTensorValue(indices='
-                'array([[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]]), '
-                'values=array([1, 2, 3, 4, 5, 6], dtype=int32), '
-                'dense_shape=array([4, 3]))')
-    with self.test_session():
-      self.assertEqual(' '.join(repr(st.eval()).split()), expected)
-
-  @test_util.run_deprecated_v1
+    st = self.evaluate(rt.to_sparse())
+    self.assertAllEqual(st.indices,
+                        [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]])
+    self.assertAllEqual(st.values, [1, 2, 3, 4, 5, 6])
+    self.assertAllEqual(st.dense_shape, [4, 3])
+
   def test2DRaggedTensorWithOneRaggedDimension(self):
     rt = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
-    with self.test_session():
-      st = ragged.to_sparse(rt).eval()
-      self.assertAllEqual(
-          st.indices, [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0], [4, 0]])
-      self.assertAllEqual(st.values, b'a b c d e f g'.split())
-      self.assertAllEqual(st.dense_shape, [5, 3])
-
-  @test_util.run_deprecated_v1
+    st = self.evaluate(rt.to_sparse())
+    self.assertAllEqual(
+        st.indices, [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0], [4, 0]])
+    self.assertAllEqual(st.values, b'a b c d e f g'.split())
+    self.assertAllEqual(st.dense_shape, [5, 3])
+
   def test3DRaggedTensorWithOneRaggedDimension(self):
     rt = ragged.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8], [9, 10]],
                           [[11, 12]], [], [[13, 14]]],
                          ragged_rank=1)
-    with self.test_session():
-      st = ragged.to_sparse(rt).eval()
-      self.assertAllEqual(
-          st.indices, [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0],
-                       [1, 0, 1], [1, 1, 0], [1, 1, 1], [1, 2, 0], [1, 2, 1],
-                       [2, 0, 0], [2, 0, 1], [4, 0, 0], [4, 0, 1]])
-      self.assertAllEqual(st.values,
-                          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
-      self.assertAllEqual(st.dense_shape, [5, 3, 2])
-
-  @test_util.run_deprecated_v1
+    st = self.evaluate(rt.to_sparse())
+    self.assertAllEqual(st.indices,
+                        [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0],
+                         [1, 0, 1], [1, 1, 0], [1, 1, 1], [1, 2, 0], [1, 2, 1],
+                         [2, 0, 0], [2, 0, 1], [4, 0, 0], [4, 0, 1]])
+    self.assertAllEqual(st.values,
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+    self.assertAllEqual(st.dense_shape, [5, 3, 2])
+
   def test4DRaggedTensorWithOneRaggedDimension(self):
     rt = ragged.constant(
         [[[[1, 2], [3, 4]], [[5, 6], [7, 8]]], [], [[[9, 10], [11, 12]]]],
         ragged_rank=1)
-    with self.test_session():
-      st = ragged.to_sparse(rt).eval()
-      self.assertAllEqual(st.values, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
-      self.assertAllEqual(
-          st.indices,
-          [
-              [0, 0, 0, 0],  # index for value=1
-              [0, 0, 0, 1],  # index for value=2
-              [0, 0, 1, 0],  # index for value=3
-              [0, 0, 1, 1],  # index for value=4
-              [0, 1, 0, 0],  # index for value=5
-              [0, 1, 0, 1],  # index for value=6
-              [0, 1, 1, 0],  # index for value=7
-              [0, 1, 1, 1],  # index for value=8
-              [2, 0, 0, 0],  # index for value=9
-              [2, 0, 0, 1],  # index for value=10
-              [2, 0, 1, 0],  # index for value=11
-              [2, 0, 1, 1],  # index for value=12
-          ])
-      self.assertAllEqual(st.dense_shape, [3, 2, 2, 2])
-
-  @test_util.run_deprecated_v1
+    st = self.evaluate(rt.to_sparse())
+    self.assertAllEqual(st.values, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
+    self.assertAllEqual(
+        st.indices,
+        [
+            [0, 0, 0, 0],  # index for value=1
+            [0, 0, 0, 1],  # index for value=2
+            [0, 0, 1, 0],  # index for value=3
+            [0, 0, 1, 1],  # index for value=4
+            [0, 1, 0, 0],  # index for value=5
+            [0, 1, 0, 1],  # index for value=6
+            [0, 1, 1, 0],  # index for value=7
+            [0, 1, 1, 1],  # index for value=8
+            [2, 0, 0, 0],  # index for value=9
+            [2, 0, 0, 1],  # index for value=10
+            [2, 0, 1, 0],  # index for value=11
+            [2, 0, 1, 1],  # index for value=12
+        ])
+    self.assertAllEqual(st.dense_shape, [3, 2, 2, 2])
+
   def test4DRaggedTensorWithTwoRaggedDimensions(self):
     rt = ragged.constant([[[[1, 2], [3, 4]], [[5, 6], [7, 8], [9, 10]]],
                           [[[11, 12]], [], [[13, 14]]], []],
                          ragged_rank=2)
-    with self.test_session():
-      st = ragged.to_sparse(rt).eval()
-      self.assertAllEqual(
-          st.indices,
-          [
-              [0, 0, 0, 0],  # index for value=1
-              [0, 0, 0, 1],  # index for value=2
-              [0, 0, 1, 0],  # index for value=3
-              [0, 0, 1, 1],  # index for value=4
-              [0, 1, 0, 0],  # index for value=5
-              [0, 1, 0, 1],  # index for value=6
-              [0, 1, 1, 0],  # index for value=7
-              [0, 1, 1, 1],  # index for value=8
-              [0, 1, 2, 0],  # index for value=9
-              [0, 1, 2, 1],  # index for value=10
-              [1, 0, 0, 0],  # index for value=11
-              [1, 0, 0, 1],  # index for value=12
-              [1, 2, 0, 0],  # index for value=13
-              [1, 2, 0, 1],  # index for value=14
-          ])
-      self.assertAllEqual(st.values,
-                          [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
-      self.assertAllEqual(st.dense_shape, [3, 3, 3, 2])
+    st = self.evaluate(rt.to_sparse())
+    self.assertAllEqual(
+        st.indices,
+        [
+            [0, 0, 0, 0],  # index for value=1
+            [0, 0, 0, 1],  # index for value=2
+            [0, 0, 1, 0],  # index for value=3
+            [0, 0, 1, 1],  # index for value=4
+            [0, 1, 0, 0],  # index for value=5
+            [0, 1, 0, 1],  # index for value=6
+            [0, 1, 1, 0],  # index for value=7
+            [0, 1, 1, 1],  # index for value=8
+            [0, 1, 2, 0],  # index for value=9
+            [0, 1, 2, 1],  # index for value=10
+            [1, 0, 0, 0],  # index for value=11
+            [1, 0, 0, 1],  # index for value=12
+            [1, 2, 0, 0],  # index for value=13
+            [1, 2, 0, 1],  # index for value=14
+        ])
+    self.assertAllEqual(st.values,
+                        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
+    self.assertAllEqual(st.dense_shape, [3, 3, 3, 2])
 
   def testShape(self):
     rt = ragged.constant([[1, 2], [3, 4, 5], [6], [], [7]])
-    st = ragged.to_sparse(rt)
+    st = rt.to_sparse()
     self.assertEqual(st.indices.shape.as_list(), [7, 2])
     self.assertEqual(st.values.shape.as_list(), [7])
     self.assertEqual(st.dense_shape.shape.as_list(), [2])
 
     rt = ragged.constant([[[1, 2]], [], [[3, 4]], []], ragged_rank=1)
-    st = ragged.to_sparse(rt)
+    st = rt.to_sparse()
     self.assertEqual(st.indices.shape.as_list(), [4, 3])
     self.assertEqual(st.values.shape.as_list(), [4])
     self.assertEqual(st.dense_shape.shape.as_list(), [3])
 
     rt = ragged.constant([[[1], [2, 3, 4, 5, 6, 7]], [[]]])
-    st = ragged.to_sparse(rt)
+    st = rt.to_sparse()
     self.assertEqual(st.indices.shape.as_list(), [7, 3])
     self.assertEqual(st.values.shape.as_list(), [7])
     self.assertEqual(st.dense_shape.shape.as_list(), [3])
 
-  @test_util.run_v1_only('b/120545219')
   def testKernelErrors(self):
     # An empty vector, defined using a placeholder to ensure that we can't
     # determine that it's invalid at graph-construction time.
     empty_vector = array_ops.placeholder_with_default(
         array_ops.zeros([0], dtypes.int64), shape=None)
 
-    bad_rt1 = ragged.from_row_splits(row_splits=[2, 3], values=[1, 2, 3])
-    with self.test_session():
-      bad_split0_error = r'First value of ragged splits must be 0.*'
-      self.assertRaisesRegexp(errors.InvalidArgumentError, bad_split0_error,
-                              ragged.to_sparse(bad_rt1).eval)
+    bad_rt1 = ragged.RaggedTensor.from_row_splits(
+        row_splits=[2, 3], values=[1, 2, 3])
+    bad_split0 = r'First value of ragged splits must be 0.*'
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, bad_split0):
+      self.evaluate(bad_rt1.to_sparse())
 
-    bad_rt2 = ragged.from_row_splits(row_splits=[0, 5], values=empty_vector)
-    bad_rt3 = ragged.from_row_splits(
+    bad_rt2 = ragged.RaggedTensor.from_row_splits(
+        row_splits=[0, 5], values=empty_vector)
+    bad_rt3 = ragged.RaggedTensor.from_row_splits(
         row_splits=[0, 1],
-        values=ragged.from_row_splits(row_splits=[0, 5], values=empty_vector))
-    with self.test_session():
-      split_mismatch1_error = r'Final value of ragged splits must match.*'
-      for rt in [bad_rt2, bad_rt3]:
-        self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                split_mismatch1_error,
-                                ragged.to_sparse(rt).eval)
-
-    bad_rt4 = ragged.from_row_splits(
+        values=ragged.RaggedTensor.from_row_splits(
+            row_splits=[0, 5], values=empty_vector))
+    split_mismatch1_error = r'Final value of ragged splits must match.*'
+    for rt in [bad_rt2, bad_rt3]:
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   split_mismatch1_error):
+        self.evaluate(rt.to_sparse())
+
+    bad_rt4 = ragged.RaggedTensor.from_row_splits(
         row_splits=[0, 5],
-        values=ragged.from_row_splits(row_splits=[0], values=empty_vector))
-    with self.test_session():
-      split_mismatch2_error = r'Final value of ragged splits must match.*'
-      self.assertRaisesRegexp(errors.InvalidArgumentError,
-                              split_mismatch2_error,
-                              ragged.to_sparse(bad_rt4).eval)
-
-    bad_rt5 = ragged.from_row_splits(row_splits=empty_vector, values=[])
-    with self.test_session():
-      empty_splits_error = (r'ragged splits may not be empty.*')
-      self.assertRaisesRegexp(errors.InvalidArgumentError, empty_splits_error,
-                              ragged.to_sparse(bad_rt5).eval)
-
-  @test_util.run_v1_only('b/120545219')
+        values=ragged.RaggedTensor.from_row_splits(
+            row_splits=[0], values=empty_vector))
+    split_mismatch2_error = r'Final value of ragged splits must match.*'
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 split_mismatch2_error):
+      self.evaluate(bad_rt4.to_sparse())
+
+    bad_rt5 = ragged.RaggedTensor.from_row_splits(
+        row_splits=empty_vector, values=[])
+    empty_splits_error = (r'ragged splits may not be empty.*')
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 empty_splits_error):
+      self.evaluate(bad_rt5.to_sparse())
+
   def testGradient(self):
+    if context.executing_eagerly():
+      return
     # rt1.shape == rt2.shape == [2, (D2), (D3), 2].
     rt1 = ragged.constant([[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0]]]],
                           ragged_rank=2)
     rt2 = ragged.constant([[[[9.0, 8.0], [7.0, 6.0]], [[5.0, 4.0]]]],
                           ragged_rank=2)
-    rt = ragged.map_inner_values(math_ops.add, rt1, rt2 * 2.0)
-    st = ragged.to_sparse(rt)
+    rt = ragged.map_flat_values(math_ops.add, rt1, rt2 * 2.0)
+    st = rt.to_sparse()
 
-    g1, g2 = gradients_impl.gradients(st.values, [rt1.inner_values,
-                                                  rt2.inner_values])
+    g1, g2 = gradients_impl.gradients(st.values,
+                                      [rt1.flat_values, rt2.flat_values])
     print(g1, g2)
-    with self.test_session():
-      self.assertEqual(g1.eval().tolist(), [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
-      self.assertEqual(g2.eval().tolist(), [[2.0, 2.0], [2.0, 2.0], [2.0, 2.0]])
+    self.assertRaggedEqual(g1, [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
+    self.assertRaggedEqual(g2, [[2.0, 2.0], [2.0, 2.0], [2.0, 2.0]])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py b/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
index 77499b9cb3..ffcc2be52e 100644
--- a/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
@@ -24,23 +24,19 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedTensorToTensorOpTest(test_util.TensorFlowTestCase,
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedTensorToTensorOpTest(ragged_test_util.RaggedTensorTestCase,
                                  parameterized.TestCase):
 
-  @test_util.run_deprecated_v1
   def testDocStringExamples(self):
     """Example from ragged_to_tensor.__doc__."""
     rt = ragged.constant([[9, 8, 7], [], [6, 5], [4]])
-    dt = ragged.to_tensor(rt)
-    with self.test_session():
-      self.assertEqual(str(dt.eval()),
-                       '[[9 8 7]\n'
-                       ' [0 0 0]\n'
-                       ' [6 5 0]\n'
-                       ' [4 0 0]]')  # pyformat: disable
+    dt = rt.to_tensor()
+    self.assertAllEqual(dt, [[9, 8, 7], [0, 0, 0], [6, 5, 0], [4, 0, 0]])
 
   @parameterized.parameters(
       {
@@ -98,7 +94,6 @@ class RaggedTensorToTensorOpTest(test_util.TensorFlowTestCase,
           'expected': [[[[1], [2]], [[9], [9]], [[3], [9]]]],
       },
   )
-  @test_util.run_deprecated_v1
   def testRaggedTensorToTensor(self,
                                rt_input,
                                expected,
@@ -106,15 +101,14 @@ class RaggedTensorToTensorOpTest(test_util.TensorFlowTestCase,
                                default=None,
                                expected_shape=None):
     rt = ragged.constant(rt_input, ragged_rank=ragged_rank)
-    dt = ragged.to_tensor(rt, default)
-    self.assertEqual(type(dt), ops.Tensor)
+    dt = rt.to_tensor(default)
+    self.assertIsInstance(dt, ops.Tensor)
     self.assertEqual(rt.dtype, dt.dtype)
     self.assertTrue(dt.shape.is_compatible_with(rt.shape))
-    with self.test_session():
-      self.assertEqual(dt.eval().tolist(), expected)
-      if expected_shape is not None:
-        dt_shape = array_ops.shape(dt)
-        self.assertEqual(dt_shape.eval().tolist(), expected_shape)
+    self.assertAllEqual(self.eval_to_list(dt), expected)
+    if expected_shape is not None:
+      dt_shape = array_ops.shape(dt)
+      self.assertAllEqual(dt_shape, expected_shape)
 
   @parameterized.parameters(
       {
@@ -131,14 +125,13 @@ class RaggedTensorToTensorOpTest(test_util.TensorFlowTestCase,
       {
           'rt_input': [[1, 2, 3]],
           'default': 'a',
-          'error': (TypeError, "Expected int32, got 'a' of type 'str' instead"),
+          'error': (TypeError, '.*'),
       },
   )
-  @test_util.run_deprecated_v1
   def testError(self, rt_input, default, error, ragged_rank=None):
     rt = ragged.constant(rt_input, ragged_rank=ragged_rank)
     with self.assertRaisesRegexp(error[0], error[1]):
-      ragged.to_tensor(rt, default)
+      rt.to_tensor(default)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_util_test.py b/tensorflow/python/ops/ragged/ragged_util_test.py
index 69c605dbf9..72a4155930 100644
--- a/tensorflow/python/ops/ragged/ragged_util_test.py
+++ b/tensorflow/python/ops/ragged/ragged_util_test.py
@@ -24,6 +24,7 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.ops.ragged import ragged_util
 from tensorflow.python.platform import googletest
 
@@ -41,7 +42,9 @@ TENSOR_4D = [[[[('%d%d%d%d' % (i, j, k, l)).encode('utf-8')
              for i in range(4)]
 
 
-class RaggedRepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedUtilTest(ragged_test_util.RaggedTensorTestCase,
+                     parameterized.TestCase):
 
   @parameterized.parameters([
       # Docstring examples
@@ -87,11 +90,10 @@ class RaggedRepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       # Empty tensor
       dict(data=[], repeats=[], axis=0, expected=[]),
   ])
-  @test_util.run_v1_only('b/120545219')
   def testRepeat(self, data, repeats, expected, axis=None):
     result = ragged_util.repeat(data, repeats, axis)
     with self.test_session():
-      self.assertEqual(result.eval().tolist(), expected)
+      self.assertAllEqual(result, expected)
 
   @parameterized.parameters([
       dict(mode=mode, **args)
@@ -136,7 +138,6 @@ class RaggedRepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           dict(data=TENSOR_4D, repeats=[1, 3, 0, 0, 2], axis=3),
       ]
   ])
-  @test_util.run_v1_only('b/120545219')
   def testValuesMatchesNumpy(self, mode, data, repeats, axis):
     # Exception: we can't handle negative axis if data.ndims is unknown.
     if axis < 0 and mode == 'unknown_shape':
@@ -158,7 +159,7 @@ class RaggedRepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
     result = ragged_util.repeat(data, repeats, axis)
     with self.test_session():
-      self.assertEqual(result.eval().tolist(), expected.tolist())
+      self.assertAllEqual(result, expected)
 
   @parameterized.parameters([
       dict(
diff --git a/tensorflow/python/ops/ragged/ragged_where_op_test.py b/tensorflow/python/ops/ragged/ragged_where_op_test.py
index 6d645eefac..b3cd5a2deb 100644
--- a/tensorflow/python/ops/ragged/ragged_where_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_where_op_test.py
@@ -22,10 +22,13 @@ from absl.testing import parameterized
 
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
 
-class RaggedWhereOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+@test_util.run_all_in_graph_and_eager_modes
+class RaggedWhereOpTest(ragged_test_util.RaggedTensorTestCase,
+                        parameterized.TestCase):
 
   @parameterized.parameters([
       #=========================================================================
@@ -165,18 +168,9 @@ class RaggedWhereOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           y=ragged.constant_value([[[['a']]], [[['b']]]]),
           expected=ragged.constant_value([[[[], [b'A']]], [[[b'b']]]])),
   ])   # pyformat: disable
-  @test_util.run_v1_only('b/120545219')
   def testRaggedWhere(self, condition, expected, x=None, y=None):
     result = ragged.where(condition, x, y)
-    self.assertEqual(
-        getattr(result, 'ragged_rank', 0), getattr(expected, 'ragged_rank', 0))
-    with self.test_session():
-      result_value = self.evaluate(result)
-      if hasattr(result_value, 'tolist'):
-        result_value = result_value.tolist()
-      if hasattr(expected, 'tolist'):
-        expected = expected.tolist()
-      self.assertEqual(result_value, expected)
+    self.assertRaggedEqual(result, expected)
 
   @parameterized.parameters([
       dict(
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index f34ac4c3e8..baacb87239 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -113,6 +113,7 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python/eager:eager_pip",
     "//tensorflow/python/kernel_tests/signal:test_util",
     "//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files",
+    "//tensorflow/python/ops/ragged:ragged_test_util",
     "//tensorflow/python/saved_model:saved_model",
     "//tensorflow/python/tools:tools_pip",
     "//tensorflow/python/tools/api/generator:create_python_api",
-- 
GitLab


From 49123b01d3c18ad82cb9f754b8e130baa691a409 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Fri, 7 Dec 2018 22:47:27 -0800
Subject: [PATCH 265/873] Fix flexbuffer byte type and annotation.

PiperOrigin-RevId: 224623431
---
 tensorflow/lite/schema/schema.fbs         |  2 +-
 tensorflow/lite/schema/schema_generated.h | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs
index 6436167303..980f13b19b 100644
--- a/tensorflow/lite/schema/schema.fbs
+++ b/tensorflow/lite/schema/schema.fbs
@@ -45,7 +45,7 @@ enum TensorType : byte {
 // Custom quantization parameters for experimenting with new quantization
 // techniques.
 table CustomQuantization {
-  custom:[byte];
+  custom:[ubyte] (force_align: 16);
 }
 
 // Represents a specific quantization technique's parameters.
diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h
index af8b143364..637cbafabd 100755
--- a/tensorflow/lite/schema/schema_generated.h
+++ b/tensorflow/lite/schema/schema_generated.h
@@ -2247,7 +2247,7 @@ inline const char *EnumNameCustomOptionsFormat(CustomOptionsFormat e) {
 
 struct CustomQuantizationT : public flatbuffers::NativeTable {
   typedef CustomQuantization TableType;
-  std::vector<int8_t> custom;
+  std::vector<uint8_t> custom;
   CustomQuantizationT() {
   }
 };
@@ -2257,8 +2257,8 @@ struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   enum {
     VT_CUSTOM = 4
   };
-  const flatbuffers::Vector<int8_t> *custom() const {
-    return GetPointer<const flatbuffers::Vector<int8_t> *>(VT_CUSTOM);
+  const flatbuffers::Vector<uint8_t> *custom() const {
+    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM);
   }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
@@ -2274,7 +2274,7 @@ struct CustomQuantization FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
 struct CustomQuantizationBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
-  void add_custom(flatbuffers::Offset<flatbuffers::Vector<int8_t>> custom) {
+  void add_custom(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom) {
     fbb_.AddOffset(CustomQuantization::VT_CUSTOM, custom);
   }
   explicit CustomQuantizationBuilder(flatbuffers::FlatBufferBuilder &_fbb)
@@ -2291,7 +2291,7 @@ struct CustomQuantizationBuilder {
 
 inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(
     flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<int8_t>> custom = 0) {
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> custom = 0) {
   CustomQuantizationBuilder builder_(_fbb);
   builder_.add_custom(custom);
   return builder_.Finish();
@@ -2299,10 +2299,10 @@ inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(
 
 inline flatbuffers::Offset<CustomQuantization> CreateCustomQuantizationDirect(
     flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<int8_t> *custom = nullptr) {
+    const std::vector<uint8_t> *custom = nullptr) {
   return tflite::CreateCustomQuantization(
       _fbb,
-      custom ? _fbb.CreateVector<int8_t>(*custom) : 0);
+      custom ? _fbb.CreateVector<uint8_t>(*custom) : 0);
 }
 
 flatbuffers::Offset<CustomQuantization> CreateCustomQuantization(flatbuffers::FlatBufferBuilder &_fbb, const CustomQuantizationT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-- 
GitLab


From 8ca891f53de9145113d889f876ab0cc9efee3c34 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 7 Dec 2018 23:09:51 -0800
Subject: [PATCH 266/873] Deflake optimize_dataset_test in tsan mode.

PiperOrigin-RevId: 224624656
---
 .../python/data/experimental/kernel_tests/optimization/BUILD    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
index 2fc243aa13..bf868ebe79 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/BUILD
@@ -286,7 +286,7 @@ py_test(
 
 py_test(
     name = "optimize_dataset_test",
-    size = "small",
+    size = "medium",
     srcs = ["optimize_dataset_test.py"],
     srcs_version = "PY2AND3",
     tags = [
-- 
GitLab


From d7fd24856d77b14021524b5cd3833cfd322e665d Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Fri, 7 Dec 2018 23:10:18 -0800
Subject: [PATCH 267/873] Add tf.estimator as a valid v1 api prefix in
 testAllAPIV1 check in tf_upgrade_v2_test.py.

PiperOrigin-RevId: 224624681
---
 tensorflow/tools/compatibility/tf_upgrade_v2_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index 63aa5f0c6b..0fc7a18734 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -198,6 +198,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
             _, _, _, text = self._upgrade("tf." + name)
             if (text and
                 not text.startswith("tf.compat.v1") and
+                not text.startswith("tf.estimator") and
                 text not in v1_symbols):
               self.assertFalse(
                   True, "Symbol %s generated from %s not in v1 API" % (
-- 
GitLab


From a3f24855d6697c37f1842e097a4563d6613d906b Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Fri, 7 Dec 2018 23:37:13 -0800
Subject: [PATCH 268/873] Allow TensorFlow demo app to switch between devices

Please approve this CL. It will be submitted automatically, and its GitHub pull request will be marked as merged.

- ListView gui for model (float and quant)
- ListView gui for devices (optional gpu, nnapi and cpu)
- Add a few AAR options.

PiperOrigin-RevId: 224626109
---
 tensorflow/lite/java/demo/app/build.gradle    |  12 ++
 .../Camera2BasicFragment.java                 | 164 ++++++++++++++----
 .../tflitecamerademo/GpuDelegateHelper.java   |  49 ++++++
 .../tflitecamerademo/ImageClassifier.java     |  14 ++
 .../ImageClassifierFloatMobileNet.java        |  94 ++++++++++
 .../src/main/res/drawable/item_selector.xml   |  11 ++
 .../res/layout-v26/fragment_camera2_basic.xml |  98 +++++++----
 .../res/layout/fragment_camera2_basic.xml     |  88 +++++++---
 .../app/src/main/res/layout/listview_row.xml  |  17 ++
 .../demo/app/src/main/res/values/colors.xml   |   3 +
 .../demo/app/src/main/res/values/strings.xml  |   6 +
 11 files changed, 463 insertions(+), 93 deletions(-)
 create mode 100644 tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/GpuDelegateHelper.java
 create mode 100644 tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatMobileNet.java
 create mode 100644 tensorflow/lite/java/demo/app/src/main/res/drawable/item_selector.xml
 create mode 100644 tensorflow/lite/java/demo/app/src/main/res/layout/listview_row.xml

diff --git a/tensorflow/lite/java/demo/app/build.gradle b/tensorflow/lite/java/demo/app/build.gradle
index 5e50ed4b94..b8fc282cb1 100644
--- a/tensorflow/lite/java/demo/app/build.gradle
+++ b/tensorflow/lite/java/demo/app/build.gradle
@@ -40,6 +40,15 @@ repositories {
         url 'https://google.bintray.com/tensorflow'
     }
 }
+allprojects {
+    repositories {
+        // Uncomment if you want to use a local repo.
+        // mavenLocal()
+        jcenter()
+    }
+}
+
+
 
 dependencies {
     compile fileTree(dir: 'libs', include: ['*.jar'])
@@ -49,7 +58,10 @@ dependencies {
     compile 'com.android.support:support-annotations:25.3.1'
     compile 'com.android.support:support-v13:25.2.0'
 
+    // Build off of nightly TensorFlow Lite
     compile 'org.tensorflow:tensorflow-lite:0.0.0-nightly'
+    // Use local TensorFlow library
+    // compile 'org.tensorflow:tensorflow-lite-local:0.0.0'
 }
 
 def targetFolder = "src/main/assets"
diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
index 20e96f586a..165d335101 100644
--- a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
+++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java
@@ -56,8 +56,10 @@ import android.view.Surface;
 import android.view.TextureView;
 import android.view.View;
 import android.view.ViewGroup;
+import android.widget.AdapterView;
+import android.widget.ArrayAdapter;
+import android.widget.ListView;
 import android.widget.NumberPicker;
-import android.widget.RadioButton;
 import android.widget.TextView;
 import android.widget.Toast;
 import java.io.IOException;
@@ -69,6 +71,7 @@ import java.util.List;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.TimeUnit;
 
+
 /** Basic fragments for the Camera. */
 public class Camera2BasicFragment extends Fragment
     implements FragmentCompat.OnRequestPermissionsResultCallback {
@@ -88,9 +91,9 @@ public class Camera2BasicFragment extends Fragment
   private TextView textView;
   private NumberPicker np;
   private ImageClassifier classifier;
+  private ListView deviceView;
+  private ListView modelView;
 
-  enum InferenceEngine { CPU, NNAPI };
-  private InferenceEngine inferenceEngine = InferenceEngine.CPU;
 
   /** Max preview width that is guaranteed by Camera2 API */
   private static final int MAX_PREVIEW_WIDTH = 1920;
@@ -124,6 +127,15 @@ public class Camera2BasicFragment extends Fragment
         public void onSurfaceTextureUpdated(SurfaceTexture texture) {}
       };
 
+  // Model parameter constants.
+  private String gpu;
+  private String cpu;
+  private String nnApi;
+  private String mobilenetV1Quant;
+  private String mobilenetV1Float;
+
+
+
   /** ID of the current {@link CameraDevice}. */
   private String cameraId;
 
@@ -170,6 +182,14 @@ public class Camera2BasicFragment extends Fragment
         }
       };
 
+  private ArrayList<String> deviceStrings = new ArrayList<String>();
+  private ArrayList<String> modelStrings = new ArrayList<String>();
+
+  /** Current indices of device and model. */
+  int currentDevice = -1;
+
+  int currentModel = -1;
+
   /** An additional thread for running tasks that shouldn't block the UI. */
   private HandlerThread backgroundThread;
 
@@ -299,11 +319,115 @@ public class Camera2BasicFragment extends Fragment
     return inflater.inflate(R.layout.fragment_camera2_basic, container, false);
   }
 
+  private void updateActiveModel() {
+    // Get UI information before delegating to background
+    final int modelIndex = modelView.getCheckedItemPosition();
+    final int deviceIndex = deviceView.getCheckedItemPosition();
+
+    backgroundHandler.post(() -> {
+      if (modelIndex == currentModel && deviceIndex == currentDevice) {
+        return;
+      }
+      currentModel = modelIndex;
+      currentDevice = deviceIndex;
+
+      // Disable classifier while updating
+      if (classifier != null) {
+        classifier.close();
+        classifier = null;
+      }
+
+      // Lookup names of parameters.
+      String model = modelStrings.get(modelIndex);
+      String device = deviceStrings.get(deviceIndex);
+
+      Log.i(TAG, "Changing model to " + model + " device " + device);
+
+      // Try to load model.
+      try {
+        if (model.equals(mobilenetV1Quant)) {
+          classifier = new ImageClassifierQuantizedMobileNet(getActivity());
+        } else if (model.equals(mobilenetV1Float)) {
+          classifier = new ImageClassifierFloatMobileNet(getActivity());
+        } else {
+          showToast("Failed to load model");
+        }
+      } catch (IOException e) {
+        Log.d(TAG, "Failed to load", e);
+        classifier = null;
+      }
+
+      // Customzie the interpreter to the type of device we want to use.
+      if (device.equals(cpu)) {
+      } else if (device.equals(gpu)) {
+        if (!GpuDelegateHelper.isGpuDelegateAvailable()) {
+          showToast("gpu not in this build.");
+          classifier = null;
+        } else if (model.equals(mobilenetV1Quant)) {
+          showToast("gpu requires float model.");
+          classifier = null;
+        } else {
+          classifier.useGpu();
+        }
+      } else if (device.equals(nnApi)) {
+        classifier.useNNAPI();
+      }
+    });
+  }
+
   /** Connect the buttons to their event handler. */
   @Override
   public void onViewCreated(final View view, Bundle savedInstanceState) {
+    gpu = getString(R.string.gpu);
+    cpu = getString(R.string.cpu);
+    nnApi = getString(R.string.nnapi);
+    mobilenetV1Quant = getString(R.string.mobilenetV1Quant);
+    mobilenetV1Float = getString(R.string.mobilenetV1Float);
+
+    // Get references to widgets.
     textureView = (AutoFitTextureView) view.findViewById(R.id.texture);
     textView = (TextView) view.findViewById(R.id.text);
+    deviceView = (ListView) view.findViewById(R.id.device);
+    modelView = (ListView) view.findViewById(R.id.model);
+
+    // Build list of models
+    modelStrings.add(mobilenetV1Quant);
+    modelStrings.add(mobilenetV1Float);
+
+    // Build list of devices
+    int defaultModelIndex = 0;
+    deviceStrings.add(cpu);
+    if (GpuDelegateHelper.isGpuDelegateAvailable()) {
+      deviceStrings.add(gpu);
+    }
+    deviceStrings.add(nnApi);
+
+    deviceView.setAdapter(
+        new ArrayAdapter<String>(
+            getContext(), R.layout.listview_row, R.id.listview_row_text, deviceStrings));
+    deviceView.setChoiceMode(ListView.CHOICE_MODE_SINGLE);
+    deviceView.setOnItemClickListener(
+        new AdapterView.OnItemClickListener() {
+          @Override
+          public void onItemClick(AdapterView<?> parent, View view, int position, long id) {
+            updateActiveModel();
+          }
+        });
+    deviceView.setItemChecked(0, true);
+
+    modelView.setChoiceMode(ListView.CHOICE_MODE_SINGLE);
+    ArrayAdapter<String> modelAdapter =
+        new ArrayAdapter<>(
+            getContext(), R.layout.listview_row, R.id.listview_row_text, modelStrings);
+    modelView.setAdapter(modelAdapter);
+    modelView.setItemChecked(defaultModelIndex, true);
+    modelView.setOnItemClickListener(
+        new AdapterView.OnItemClickListener() {
+          @Override
+          public void onItemClick(AdapterView<?> parent, View view, int position, long id) {
+            updateActiveModel();
+          }
+        });
 
     np = (NumberPicker) view.findViewById(R.id.np);
     np.setMinValue(1);
@@ -317,43 +441,13 @@ public class Camera2BasicFragment extends Fragment
           }
         });
 
-    RadioButton cpuButton = (RadioButton) view.findViewById(R.id.radio_cpu);
-    cpuButton.setChecked(true);  // TFLite runs on CPU by default.
-    cpuButton.setOnClickListener(
-        new View.OnClickListener() {
-          @Override
-          public void onClick(View view) {
-            if (inferenceEngine == InferenceEngine.CPU) {
-              return;
-            }
-            inferenceEngine = InferenceEngine.CPU;
-            backgroundHandler.post(() -> classifier.useCPU());
-          }
-        });
-
-    ((RadioButton) view.findViewById(R.id.radio_nnapi)).setOnClickListener(
-        new View.OnClickListener() {
-          @Override
-          public void onClick(View view) {
-            if (inferenceEngine == InferenceEngine.NNAPI) {
-              return;
-            }
-            inferenceEngine = InferenceEngine.NNAPI;
-            backgroundHandler.post(() -> classifier.useNNAPI());
-          }
-        });
+    // Start initial model.
   }
 
   /** Load the model and labels. */
   @Override
   public void onActivityCreated(Bundle savedInstanceState) {
     super.onActivityCreated(savedInstanceState);
-    try {
-      // create either a new ImageClassifierQuantizedMobileNet or an ImageClassifierFloatInception
-      classifier = new ImageClassifierQuantizedMobileNet(getActivity());
-    } catch (IOException e) {
-      Log.e(TAG, "Failed to initialize an image classifier.", e);
-    }
     startBackgroundThread();
   }
 
@@ -581,10 +675,12 @@ public class Camera2BasicFragment extends Fragment
     backgroundThread = new HandlerThread(HANDLE_THREAD_NAME);
     backgroundThread.start();
     backgroundHandler = new Handler(backgroundThread.getLooper());
+    // Start the classification train & load an initial model.
     synchronized (lock) {
       runClassifier = true;
     }
     backgroundHandler.post(periodicClassify);
+    updateActiveModel();
   }
 
   /** Stops the background thread and its {@link Handler}. */
diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/GpuDelegateHelper.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/GpuDelegateHelper.java
new file mode 100644
index 0000000000..8dca17744e
--- /dev/null
+++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/GpuDelegateHelper.java
@@ -0,0 +1,49 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.tflitecamerademo;
+
+import org.tensorflow.lite.Delegate;
+
+/**
+ * Helper class for {@code GpuDelegate}.
+ *
+ * <p>WARNING: This is an experimental API and subject to change.
+ */
+public class GpuDelegateHelper {
+  private GpuDelegateHelper() {}
+
+  /** Checks whether {@code GpuDelegate} is available. */
+  public static boolean isGpuDelegateAvailable() {
+    try {
+      Class.forName("org.tensorflow.lite.experimental.GpuDelegate");
+      return true;
+    } catch (Exception e) {
+      return false;
+    }
+  }
+
+  /** Returns an instance of {@code GpuDelegate} if available. */
+  public static Delegate createGpuDelegate() {
+    try {
+      return Class.forName("org.tensorflow.lite.experimental.GpuDelegate")
+          .asSubclass(Delegate.class)
+          .getDeclaredConstructor()
+          .newInstance();
+    } catch (Exception e) {
+      throw new IllegalStateException(e);
+    }
+  }
+}
diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
index 700efc1c1a..512f8b64db 100644
--- a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
+++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java
@@ -38,6 +38,7 @@ import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.PriorityQueue;
+import org.tensorflow.lite.Delegate;
 import org.tensorflow.lite.Interpreter;
 
 /**
@@ -93,6 +94,9 @@ public abstract class ImageClassifier {
             }
           });
 
+  /** holds a gpu delegate */
+  Delegate gpuDelegate = null;
+
   /** Initializes an {@code ImageClassifier}. */
   ImageClassifier(Activity activity) throws IOException {
     tfliteModel = loadModelFile(activity);
@@ -159,10 +163,20 @@ public abstract class ImageClassifier {
   private void recreateInterpreter() {
     if (tflite != null) {
       tflite.close();
+      // TODO(b/120679982)
+      // gpuDelegate.close();
       tflite = new Interpreter(tfliteModel, tfliteOptions);
     }
   }
 
+  public void useGpu() {
+    if (gpuDelegate == null && GpuDelegateHelper.isGpuDelegateAvailable()) {
+      gpuDelegate = GpuDelegateHelper.createGpuDelegate();
+      tfliteOptions.addDelegate(gpuDelegate);
+      recreateInterpreter();
+    }
+  }
+
   public void useCPU() {
     tfliteOptions.setUseNNAPI(false);
     recreateInterpreter();
diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatMobileNet.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatMobileNet.java
new file mode 100644
index 0000000000..c87ffff8f6
--- /dev/null
+++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatMobileNet.java
@@ -0,0 +1,94 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package com.example.android.tflitecamerademo;
+
+import android.app.Activity;
+import java.io.IOException;
+
+/** This classifier works with the float MobileNet model. */
+public class ImageClassifierFloatMobileNet extends ImageClassifier {
+
+  /**
+   * An array to hold inference results, to be feed into Tensorflow Lite as outputs. This isn't part
+   * of the super class, because we need a primitive array here.
+   */
+  private float[][] labelProbArray = null;
+
+  /**
+   * Initializes an {@code ImageClassifierFloatMobileNet}.
+   *
+   * @param activity
+   */
+  ImageClassifierFloatMobileNet(Activity activity) throws IOException {
+    super(activity);
+    labelProbArray = new float[1][getNumLabels()];
+  }
+
+  @Override
+  protected String getModelPath() {
+    // you can download this file from
+    // see build.gradle for where to obtain this file. It should be auto
+    // downloaded into assets.
+    return "mobilenet_v1_1.0_224.tflite";
+  }
+
+  @Override
+  protected String getLabelPath() {
+    return "labels_mobilenet_quant_v1_224.txt";
+  }
+
+  @Override
+  protected int getImageSizeX() {
+    return 224;
+  }
+
+  @Override
+  protected int getImageSizeY() {
+    return 224;
+  }
+
+  @Override
+  protected int getNumBytesPerChannel() {
+    return 4; // Float.SIZE / Byte.SIZE;
+  }
+
+  @Override
+  protected void addPixelValue(int pixelValue) {
+    imgData.putFloat(((pixelValue >> 16) & 0xFF) / 255.f);
+    imgData.putFloat(((pixelValue >> 8) & 0xFF) / 255.f);
+    imgData.putFloat((pixelValue & 0xFF) / 255.f);
+  }
+
+  @Override
+  protected float getProbability(int labelIndex) {
+    return labelProbArray[0][labelIndex];
+  }
+
+  @Override
+  protected void setProbability(int labelIndex, Number value) {
+    labelProbArray[0][labelIndex] = value.floatValue();
+  }
+
+  @Override
+  protected float getNormalizedProbability(int labelIndex) {
+    return labelProbArray[0][labelIndex];
+  }
+
+  @Override
+  protected void runInference() {
+    tflite.run(imgData, labelProbArray);
+  }
+}
diff --git a/tensorflow/lite/java/demo/app/src/main/res/drawable/item_selector.xml b/tensorflow/lite/java/demo/app/src/main/res/drawable/item_selector.xml
new file mode 100644
index 0000000000..202c900769
--- /dev/null
+++ b/tensorflow/lite/java/demo/app/src/main/res/drawable/item_selector.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="utf-8"?>
+<selector xmlns:android="http://schemas.android.com/apk/res/android">
+
+    <!-- pressed -->
+    <item android:drawable="@color/selection_highlight" android:state_pressed="true" />
+    <!-- focused -->
+    <item android:drawable="@color/selection_focus" android:state_activated="true" />
+    <!-- default -->
+    <item android:drawable="@color/item_normal" />
+
+</selector>
diff --git a/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml b/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml
index 19e0a9bab4..70eedfdd02 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/layout-v26/fragment_camera2_basic.xml
@@ -57,49 +57,83 @@
             android:textStyle="bold" />
 
     </LinearLayout>
-    <LinearLayout
-        android:orientation="horizontal"
-        android:background="#513400"
-        android:layout_alignParentBottom="true"
 
-        android:layout_width="match_parent"
+    <LinearLayout
         android:id="@+id/bottom_info_view"
+        android:layout_width="match_parent"
+        android:layout_height="200dp"
+
+        android:layout_alignParentBottom="true"
         android:layout_marginBottom="10dp"
-        android:layout_height="50dp">
-        <TextView
-            android:layout_width="wrap_content"
-            android:layout_height="match_parent"
-            android:textColor="@android:color/white"
-            android:textAlignment="center"
-            android:gravity="center"
-            android:text="Threads:"/>
-        <NumberPicker
-            android:id="@+id/np"
+        android:background="#513400"
+        android:orientation="horizontal">
+
+        <LinearLayout
             android:layout_width="wrap_content"
-            android:layout_height="wrap_content"
-            android:layout_marginLeft="10dp"
-            android:theme="@style/AppTheme.Picker"
-            android:visibility="visible" />
-        <RadioGroup
-            android:gravity="center"
-            android:layout_width="match_parent"
             android:layout_height="match_parent"
-            android:orientation="horizontal">
-            <RadioButton
-                android:id="@+id/radio_cpu"
-                android:background="#0000000f"
+            android:orientation="vertical">
+
+            <TextView
                 android:layout_width="wrap_content"
                 android:layout_height="wrap_content"
-                android:text="@string/cpu"
+                android:gravity="center"
+                android:text="Threads"
+                android:textAlignment="center"
                 android:textColor="@android:color/white" />
-            <RadioButton
-                android:id="@+id/radio_nnapi"
-                android:background="#0000000f"
+
+            <NumberPicker
+                android:id="@+id/np"
                 android:layout_width="wrap_content"
                 android:layout_height="wrap_content"
-                android:text="@string/nnapi"
+                android:layout_marginLeft="10dp"
+                android:theme="@style/AppTheme.Picker"
+                android:visibility="visible" />
+
+        </LinearLayout>
+
+        <LinearLayout
+            android:id="@+id/modelLayout"
+            android:layout_width="150dp"
+            android:layout_height="match_parent"
+            android:orientation="vertical">
+
+            <TextView
+                android:id="@+id/textView"
+                android:layout_width="match_parent"
+                android:layout_height="20dp"
+                android:text="@string/modelLabel"
+                android:textAlignment="center"
                 android:textColor="@android:color/white" />
-        </RadioGroup>
+
+            <ListView
+                android:id="@+id/model"
+                android:layout_width="match_parent"
+                android:layout_height="180dp">
+
+            </ListView>
+        </LinearLayout>
+
+        <LinearLayout
+            android:id="@+id/deviceLayout"
+            android:layout_width="140dp"
+            android:layout_height="match_parent"
+            android:orientation="vertical">
+
+            <TextView
+                android:id="@+id/textView2"
+                android:layout_width="match_parent"
+                android:layout_height="20dp"
+                android:text="@string/deviceLabel"
+                android:textAlignment="center"
+                android:textColor="@android:color/white" />
+
+            <ListView
+                android:id="@+id/device"
+                android:layout_width="match_parent"
+                android:layout_height="180dp" />
+
+        </LinearLayout>
+
     </LinearLayout>
 
 
diff --git a/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml b/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
index be66eeac75..f8312cc0f7 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/layout/fragment_camera2_basic.xml
@@ -57,22 +57,30 @@
             android:textStyle="bold" />
 
     </LinearLayout>
-    <LinearLayout
-        android:orientation="horizontal"
-        android:background="#aa7700"
-        android:layout_alignParentBottom="true"
 
-        android:layout_width="match_parent"
+    <LinearLayout
         android:id="@+id/bottom_info_view"
+        android:layout_width="match_parent"
+        android:layout_height="200dp"
+
+        android:layout_alignParentBottom="true"
         android:layout_marginBottom="10dp"
-        android:layout_height="50dp">
-        <TextView
+        android:background="#513400"
+        android:orientation="horizontal">
+
+      <LinearLayout
             android:layout_width="wrap_content"
             android:layout_height="match_parent"
-            android:textColor="@android:color/white"
-            android:textAlignment="center"
+            android:orientation="vertical">
+
+        <TextView
+            android:layout_width="wrap_content"
+                android:layout_height="wrap_content"
             android:gravity="center"
-            android:text="@string/threads" />
+                android:text="Threads"
+            android:textAlignment="center"
+            android:textColor="@android:color/white" />
+
         <NumberPicker
             android:id="@+id/np"
             android:layout_width="wrap_content"
@@ -80,25 +88,51 @@
             android:layout_marginLeft="10dp"
             android:theme="@style/AppTheme.Picker"
             android:visibility="visible" />
-        <RadioGroup
-            android:gravity="center"
-            android:layout_width="match_parent"
+
+        </LinearLayout>
+
+        <LinearLayout
+            android:id="@+id/modelLayout"
+            android:layout_width="150dp"
             android:layout_height="match_parent"
-            android:orientation="horizontal">
-            <RadioButton
-                android:id="@+id/radio_cpu"
-                android:background="#0000000f"
-                android:layout_width="wrap_content"
-                android:layout_height="wrap_content"
-                android:text="@string/cpu"
+            android:orientation="vertical">
+
+            <TextView
+                android:id="@+id/textView"
+                android:layout_width="match_parent"
+                android:layout_height="20dp"
+                android:text="@string/modelLabel"
+                android:textAlignment="center"
                 android:textColor="@android:color/white" />
-            <RadioButton
-                android:id="@+id/radio_nnapi"
-                android:background="#0000000f"
-                android:layout_width="wrap_content"
-                android:layout_height="wrap_content"
-                android:text="@string/nnapi"
+
+            <ListView
+                android:id="@+id/model"
+                android:layout_width="match_parent"
+                android:layout_height="180dp">
+
+            </ListView>
+        </LinearLayout>
+
+        <LinearLayout
+            android:id="@+id/deviceLayout"
+            android:layout_width="140dp"
+            android:layout_height="match_parent"
+            android:orientation="vertical">
+
+            <TextView
+                android:id="@+id/textView2"
+                android:layout_width="match_parent"
+                android:layout_height="20dp"
+                android:text="@string/deviceLabel"
+                android:textAlignment="center"
                 android:textColor="@android:color/white" />
-        </RadioGroup>
+
+            <ListView
+                android:id="@+id/device"
+                android:layout_width="match_parent"
+                android:layout_height="180dp" />
+
+        </LinearLayout>
+
     </LinearLayout>
 </RelativeLayout>
diff --git a/tensorflow/lite/java/demo/app/src/main/res/layout/listview_row.xml b/tensorflow/lite/java/demo/app/src/main/res/layout/listview_row.xml
new file mode 100644
index 0000000000..349b0f63b4
--- /dev/null
+++ b/tensorflow/lite/java/demo/app/src/main/res/layout/listview_row.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="utf-8"?>
+<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
+    android:layout_width="match_parent"
+    android:layout_height="match_parent">
+
+
+    <TextView
+        android:id="@+id/listview_row_text"
+        android:layout_width="match_parent"
+        android:layout_height="match_parent"
+        android:layout_marginRight="2dp"
+        android:background="@drawable/item_selector"
+        android:padding="10dp"
+        android:textSize="18sp"
+        android:textStyle="bold" />
+
+</LinearLayout>
\ No newline at end of file
diff --git a/tensorflow/lite/java/demo/app/src/main/res/values/colors.xml b/tensorflow/lite/java/demo/app/src/main/res/values/colors.xml
index 4b75d2b2bd..c30f1dc3ac 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/values/colors.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/values/colors.xml
@@ -16,4 +16,7 @@
 -->
 <resources>
     <color name="control_background">#cc4285f4</color>
+    <color name="selection_highlight">#aaaaaa</color>
+    <color name="selection_focus">#eeaa55</color>
+    <color name="item_normal">#eeeeee</color>
 </resources>
diff --git a/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml b/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml
index 45b12850e5..8cc88f2565 100644
--- a/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml
+++ b/tensorflow/lite/java/demo/app/src/main/res/values/strings.xml
@@ -23,5 +23,11 @@
     <string name="toggle">Use NNAPI</string>
     <string name="tflite">tflite</string>
     <string name="nnapi">NNAPI</string>
+    <string name="gpu">GPU</string>
     <string name="cpu">CPU</string>
+    <string name="modelLabel">Model</string>
+    <string name="deviceLabel">Device</string>
+    <string name="mobilenetV1Quant">mobilenet v1 quant</string>;
+    <string name="mobilenetV1Float">mobilenet v1 float</string>;;
+
 </resources>
-- 
GitLab


From ee0f5b839d47ef35b1c42321ffb7ddecc3c8a4ba Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 8 Dec 2018 01:02:37 -0800
Subject: [PATCH 269/873] compat: Update forward compatibility horizon to
 2018-12-08

PiperOrigin-RevId: 224631588
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index e66c29ae39..1fcd0501d7 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 7)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 8)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From e5bb37762ea4597b1d855e89009041b94dcc30dc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 8 Dec 2018 04:40:13 -0800
Subject: [PATCH 270/873] Internal Change

PiperOrigin-RevId: 224642874
---
 tensorflow/python/ops/ragged/ragged_tensor_value.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/python/ops/ragged/ragged_tensor_value.py b/tensorflow/python/ops/ragged/ragged_tensor_value.py
index bf0ac4482a..e94ca4afac 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_value.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_value.py
@@ -98,3 +98,10 @@ class RaggedTensorValue(object):
         values_as_list[self._row_splits[i]:self._row_splits[i + 1]]
         for i in range(len(self._row_splits) - 1)
     ]
+
+  def value_rowids(self, name=None):
+    del name
+    row_lengths = self._row_splits[1:] - self._row_splits[:-1]
+    nrows = self._row_splits.shape[-1] - 1
+    indices = np.arange(nrows)
+    return np.repeat(indices, repeats=row_lengths, axis=0)
-- 
GitLab


From 01ecd5b53121be09de84f9e3e0a09f033bd2a3ca Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Sat, 8 Dec 2018 10:59:06 -0800
Subject: [PATCH 271/873] Export tf.train.* hooks or hooks-related classes to
 tf.estimator.* (exporting to both v1 and v2). Keep the existing only in v1.

PiperOrigin-RevId: 224658867
---
 .../training/basic_session_run_hooks.py       | 18 +++----
 ...ow.estimator.-checkpoint-saver-hook.pbtxt} |  2 +-
 ...estimator.-checkpoint-saver-listener.pbtxt | 24 +++++++++
 .../tensorflow.estimator.-feed-fn-hook.pbtxt} |  2 +-
 ...ensorflow.estimator.-final-ops-hook.pbtxt} |  2 +-
 ....estimator.-global-step-waiter-hook.pbtxt} |  2 +-
 ...flow.estimator.-logging-tensor-hook.pbtxt} |  2 +-
 ...ator.-nan-loss-during-training-error.pbtxt | 12 +++++
 ...nsorflow.estimator.-nan-tensor-hook.pbtxt} |  2 +-
 .../tensorflow.estimator.-profiler-hook.pbtxt | 30 +++++++++++
 ...flow.estimator.-second-or-step-timer.pbtxt | 26 ++++++++++
 ...orflow.estimator.-step-counter-hook.pbtxt} |  2 +-
 ...orflow.estimator.-stop-at-step-hook.pbtxt} |  2 +-
 ...rflow.estimator.-summary-saver-hook.pbtxt} |  2 +-
 ...perimental.-in-memory-evaluator-hook.pbtxt |  2 +-
 .../api/golden/v1/tensorflow.estimator.pbtxt  | 52 +++++++++++++++++++
 ...low.estimator.-checkpoint-saver-hook.pbtxt | 30 +++++++++++
 ...estimator.-checkpoint-saver-listener.pbtxt | 24 +++++++++
 .../tensorflow.estimator.-feed-fn-hook.pbtxt  | 30 +++++++++++
 ...tensorflow.estimator.-final-ops-hook.pbtxt | 34 ++++++++++++
 ...w.estimator.-global-step-waiter-hook.pbtxt | 30 +++++++++++
 ...rflow.estimator.-logging-tensor-hook.pbtxt | 30 +++++++++++
 ...ator.-nan-loss-during-training-error.pbtxt | 12 +++++
 ...ensorflow.estimator.-nan-tensor-hook.pbtxt | 30 +++++++++++
 .../tensorflow.estimator.-profiler-hook.pbtxt | 30 +++++++++++
 ...flow.estimator.-second-or-step-timer.pbtxt | 26 ++++++++++
 ...sorflow.estimator.-step-counter-hook.pbtxt | 30 +++++++++++
 ...sorflow.estimator.-stop-at-step-hook.pbtxt | 30 +++++++++++
 ...orflow.estimator.-summary-saver-hook.pbtxt | 30 +++++++++++
 ...perimental.-in-memory-evaluator-hook.pbtxt |  2 +-
 .../api/golden/v2/tensorflow.estimator.pbtxt  | 52 +++++++++++++++++++
 .../api/golden/v2/tensorflow.train.pbtxt      | 36 -------------
 .../api/lib/python_object_to_proto_visitor.py |  3 ++
 tensorflow/tools/compatibility/renames_v2.py  | 17 ++++--
 34 files changed, 598 insertions(+), 60 deletions(-)
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-checkpoint-saver-hook.pbtxt => v1/tensorflow.estimator.-checkpoint-saver-hook.pbtxt} (96%)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.estimator.-checkpoint-saver-listener.pbtxt
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-feed-fn-hook.pbtxt => v1/tensorflow.estimator.-feed-fn-hook.pbtxt} (96%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-final-ops-hook.pbtxt => v1/tensorflow.estimator.-final-ops-hook.pbtxt} (96%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-global-step-waiter-hook.pbtxt => v1/tensorflow.estimator.-global-step-waiter-hook.pbtxt} (95%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-logging-tensor-hook.pbtxt => v1/tensorflow.estimator.-logging-tensor-hook.pbtxt} (96%)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.estimator.-nan-loss-during-training-error.pbtxt
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-nan-tensor-hook.pbtxt => v1/tensorflow.estimator.-nan-tensor-hook.pbtxt} (96%)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.estimator.-profiler-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.estimator.-second-or-step-timer.pbtxt
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-step-counter-hook.pbtxt => v1/tensorflow.estimator.-step-counter-hook.pbtxt} (96%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-stop-at-step-hook.pbtxt => v1/tensorflow.estimator.-stop-at-step-hook.pbtxt} (96%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-summary-saver-hook.pbtxt => v1/tensorflow.estimator.-summary-saver-hook.pbtxt} (96%)
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-listener.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-feed-fn-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-ops-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-global-step-waiter-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-logging-tensor-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-loss-during-training-error.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-tensor-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-profiler-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-second-or-step-timer.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-step-counter-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-stop-at-step-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-summary-saver-hook.pbtxt

diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py
index b64c7ada62..86718ab45f 100644
--- a/tensorflow/python/training/basic_session_run_hooks.py
+++ b/tensorflow/python/training/basic_session_run_hooks.py
@@ -163,7 +163,7 @@ class NeverTriggerTimer(_HookTimer):
     return None
 
 
-@tf_export("train.LoggingTensorHook")
+@tf_export(v1=["train.LoggingTensorHook"])
 class LoggingTensorHook(session_run_hook.SessionRunHook):
   """Prints the given tensors every N local steps, every N seconds, or at end.
 
@@ -373,7 +373,7 @@ class _MultiStepStopAtStepHook(session_run_hook.SessionRunHook):
       self._update_steps_per_run_variable(global_step, run_context.session)
 
 
-@tf_export("train.StopAtStepHook")
+@tf_export(v1=["train.StopAtStepHook"])
 class StopAtStepHook(session_run_hook.SessionRunHook):
   """Hook that requests stop at a specified step."""
 
@@ -495,7 +495,7 @@ class CheckpointSaverListener(object):
     pass
 
 
-@tf_export("train.CheckpointSaverHook")
+@tf_export(v1=["train.CheckpointSaverHook"])
 class CheckpointSaverHook(session_run_hook.SessionRunHook):
   """Saves checkpoints every N steps or seconds."""
 
@@ -634,7 +634,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
     return savers[0]
 
 
-@tf_export("train.StepCounterHook")
+@tf_export(v1=["train.StepCounterHook"])
 class StepCounterHook(session_run_hook.SessionRunHook):
   """Hook that counts steps per second."""
 
@@ -725,7 +725,7 @@ class NanLossDuringTrainingError(RuntimeError):
     return "NaN loss during training."
 
 
-@tf_export("train.NanTensorHook")
+@tf_export(v1=["train.NanTensorHook"])
 class NanTensorHook(session_run_hook.SessionRunHook):
   """Monitors the loss tensor and stops training if loss is NaN.
 
@@ -757,7 +757,7 @@ class NanTensorHook(session_run_hook.SessionRunHook):
         run_context.request_stop()
 
 
-@tf_export("train.SummarySaverHook")
+@tf_export(v1=["train.SummarySaverHook"])
 class SummarySaverHook(session_run_hook.SessionRunHook):
   """Saves summaries every N steps."""
 
@@ -866,7 +866,7 @@ class SummarySaverHook(session_run_hook.SessionRunHook):
     return summary_op
 
 
-@tf_export("train.GlobalStepWaiterHook")
+@tf_export(v1=["train.GlobalStepWaiterHook"])
 class GlobalStepWaiterHook(session_run_hook.SessionRunHook):
   """Delays execution until global step reaches `wait_until_step`.
 
@@ -914,7 +914,7 @@ class GlobalStepWaiterHook(session_run_hook.SessionRunHook):
       time.sleep(0.5)
 
 
-@tf_export("train.FinalOpsHook")
+@tf_export(v1=["train.FinalOpsHook"])
 class FinalOpsHook(session_run_hook.SessionRunHook):
   """A hook which evaluates `Tensors` at the end of a session."""
 
@@ -958,7 +958,7 @@ class FinalOpsHook(session_run_hook.SessionRunHook):
         raise e
 
 
-@tf_export("train.FeedFnHook")
+@tf_export(v1=["train.FeedFnHook"])
 class FeedFnHook(session_run_hook.SessionRunHook):
   """Runs `feed_fn` and sets the `feed_dict` accordingly."""
 
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint-saver-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-checkpoint-saver-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint-saver-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-checkpoint-saver-hook.pbtxt
index c3037baa8c..f9e1504b49 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint-saver-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-checkpoint-saver-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.CheckpointSaverHook"
+path: "tensorflow.estimator.CheckpointSaverHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.CheckpointSaverHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-checkpoint-saver-listener.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-checkpoint-saver-listener.pbtxt
new file mode 100644
index 0000000000..111b7583f2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-checkpoint-saver-listener.pbtxt
@@ -0,0 +1,24 @@
+path: "tensorflow.estimator.CheckpointSaverListener"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.CheckpointSaverListener\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "after_save"
+    argspec: "args=[\'self\', \'session\', \'global_step_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_save"
+    argspec: "args=[\'self\', \'session\', \'global_step_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\', \'global_step_value\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-feed-fn-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-feed-fn-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-feed-fn-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-feed-fn-hook.pbtxt
index 7bec4d032c..f24de493f2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-feed-fn-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-feed-fn-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.FeedFnHook"
+path: "tensorflow.estimator.FeedFnHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.FeedFnHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-final-ops-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-ops-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-final-ops-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-ops-hook.pbtxt
index 31cf9aaeb2..6651170ba3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-final-ops-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-final-ops-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.FinalOpsHook"
+path: "tensorflow.estimator.FinalOpsHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.FinalOpsHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-global-step-waiter-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-global-step-waiter-hook.pbtxt
similarity index 95%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-global-step-waiter-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-global-step-waiter-hook.pbtxt
index 147448618e..37db48bc64 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-global-step-waiter-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-global-step-waiter-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.GlobalStepWaiterHook"
+path: "tensorflow.estimator.GlobalStepWaiterHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.GlobalStepWaiterHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-logging-tensor-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-logging-tensor-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-logging-tensor-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-logging-tensor-hook.pbtxt
index 9801c05df1..425f0167a1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-logging-tensor-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-logging-tensor-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.LoggingTensorHook"
+path: "tensorflow.estimator.LoggingTensorHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.LoggingTensorHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-nan-loss-during-training-error.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-nan-loss-during-training-error.pbtxt
new file mode 100644
index 0000000000..6cf6e17e43
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-nan-loss-during-training-error.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.estimator.NanLossDuringTrainingError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.NanLossDuringTrainingError\'>"
+  is_instance: "<type \'exceptions.RuntimeError\'>"
+  member {
+    name: "args"
+    mtype: "<type \'getset_descriptor\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-nan-tensor-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-nan-tensor-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-nan-tensor-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-nan-tensor-hook.pbtxt
index 7d1c89f9b3..82293c2c0c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-nan-tensor-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-nan-tensor-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.NanTensorHook"
+path: "tensorflow.estimator.NanTensorHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.NanTensorHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-profiler-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-profiler-hook.pbtxt
new file mode 100644
index 0000000000..65b5fb16b0
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-profiler-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.ProfilerHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.ProfilerHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'save_steps\', \'save_secs\', \'output_dir\', \'show_dataflow\', \'show_memory\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'\', \'True\', \'False\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-second-or-step-timer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-second-or-step-timer.pbtxt
new file mode 100644
index 0000000000..64051d2bd6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-second-or-step-timer.pbtxt
@@ -0,0 +1,26 @@
+path: "tensorflow.estimator.SecondOrStepTimer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.SecondOrStepTimer\'>"
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks._HookTimer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'every_secs\', \'every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "last_triggered_step"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "should_trigger_for_step"
+    argspec: "args=[\'self\', \'step\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_last_triggered_step"
+    argspec: "args=[\'self\', \'step\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-step-counter-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-step-counter-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-step-counter-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-step-counter-hook.pbtxt
index 13261f6dde..4368e04df3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-step-counter-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-step-counter-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.StepCounterHook"
+path: "tensorflow.estimator.StepCounterHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.StepCounterHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-stop-at-step-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-stop-at-step-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-stop-at-step-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-stop-at-step-hook.pbtxt
index e388599b0b..938b189a8c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-stop-at-step-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-stop-at-step-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.StopAtStepHook"
+path: "tensorflow.estimator.StopAtStepHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.StopAtStepHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-summary-saver-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-summary-saver-hook.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-summary-saver-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-summary-saver-hook.pbtxt
index 697c3667b0..104157315f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-summary-saver-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-summary-saver-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.SummarySaverHook"
+path: "tensorflow.estimator.SummarySaverHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.SummarySaverHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt
index aba120218c..5a2a01cd53 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.experimental.InMemoryEvaluatorHook"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.hooks.InMemoryEvaluatorHook\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.hooks.hooks.InMemoryEvaluatorHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
   is_instance: "<type \'object\'>"
   member_method {
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt
index c5b0085b8d..d3656ae045 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt
@@ -24,6 +24,14 @@ tf_module {
     name: "BoostedTreesRegressor"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "CheckpointSaverHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CheckpointSaverListener"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "DNNClassifier"
     mtype: "<type \'type\'>"
@@ -64,10 +72,22 @@ tf_module {
     name: "Exporter"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "FeedFnHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "FinalExporter"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "FinalOpsHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "GlobalStepWaiterHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "LatestExporter"
     mtype: "<type \'type\'>"
@@ -84,14 +104,46 @@ tf_module {
     name: "LinearRegressor"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "LoggingTensorHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "ModeKeys"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "NanLossDuringTrainingError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "NanTensorHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "ProfilerHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RunConfig"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SecondOrStepTimer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "StepCounterHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "StopAtStepHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SummarySaverHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TrainSpec"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-hook.pbtxt
new file mode 100644
index 0000000000..f9e1504b49
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.CheckpointSaverHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.CheckpointSaverHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'checkpoint_dir\', \'save_secs\', \'save_steps\', \'saver\', \'checkpoint_basename\', \'scaffold\', \'listeners\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'model.ckpt\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-listener.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-listener.pbtxt
new file mode 100644
index 0000000000..111b7583f2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-checkpoint-saver-listener.pbtxt
@@ -0,0 +1,24 @@
+path: "tensorflow.estimator.CheckpointSaverListener"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.CheckpointSaverListener\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "after_save"
+    argspec: "args=[\'self\', \'session\', \'global_step_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_save"
+    argspec: "args=[\'self\', \'session\', \'global_step_value\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\', \'global_step_value\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-feed-fn-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-feed-fn-hook.pbtxt
new file mode 100644
index 0000000000..f24de493f2
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-feed-fn-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.FeedFnHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.FeedFnHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'feed_fn\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-ops-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-ops-hook.pbtxt
new file mode 100644
index 0000000000..6651170ba3
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-final-ops-hook.pbtxt
@@ -0,0 +1,34 @@
+path: "tensorflow.estimator.FinalOpsHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.FinalOpsHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "final_ops_values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'final_ops\', \'final_ops_feed_dict\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-global-step-waiter-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-global-step-waiter-hook.pbtxt
new file mode 100644
index 0000000000..37db48bc64
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-global-step-waiter-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.GlobalStepWaiterHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.GlobalStepWaiterHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'wait_until_step\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-logging-tensor-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-logging-tensor-hook.pbtxt
new file mode 100644
index 0000000000..425f0167a1
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-logging-tensor-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.LoggingTensorHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.LoggingTensorHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'tensors\', \'every_n_iter\', \'every_n_secs\', \'at_end\', \'formatter\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-loss-during-training-error.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-loss-during-training-error.pbtxt
new file mode 100644
index 0000000000..6cf6e17e43
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-loss-during-training-error.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.estimator.NanLossDuringTrainingError"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.NanLossDuringTrainingError\'>"
+  is_instance: "<type \'exceptions.RuntimeError\'>"
+  member {
+    name: "args"
+    mtype: "<type \'getset_descriptor\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-tensor-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-tensor-hook.pbtxt
new file mode 100644
index 0000000000..82293c2c0c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-nan-tensor-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.NanTensorHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.NanTensorHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'loss_tensor\', \'fail_on_nan_loss\'], varargs=None, keywords=None, defaults=[\'True\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-profiler-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-profiler-hook.pbtxt
new file mode 100644
index 0000000000..65b5fb16b0
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-profiler-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.ProfilerHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.ProfilerHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'save_steps\', \'save_secs\', \'output_dir\', \'show_dataflow\', \'show_memory\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'\', \'True\', \'False\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-second-or-step-timer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-second-or-step-timer.pbtxt
new file mode 100644
index 0000000000..64051d2bd6
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-second-or-step-timer.pbtxt
@@ -0,0 +1,26 @@
+path: "tensorflow.estimator.SecondOrStepTimer"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.SecondOrStepTimer\'>"
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks._HookTimer\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'every_secs\', \'every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "last_triggered_step"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "reset"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "should_trigger_for_step"
+    argspec: "args=[\'self\', \'step\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "update_last_triggered_step"
+    argspec: "args=[\'self\', \'step\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-step-counter-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-step-counter-hook.pbtxt
new file mode 100644
index 0000000000..4368e04df3
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-step-counter-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.StepCounterHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.StepCounterHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'every_n_steps\', \'every_n_secs\', \'output_dir\', \'summary_writer\'], varargs=None, keywords=None, defaults=[\'100\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-stop-at-step-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-stop-at-step-hook.pbtxt
new file mode 100644
index 0000000000..938b189a8c
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-stop-at-step-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.StopAtStepHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.StopAtStepHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'num_steps\', \'last_step\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-summary-saver-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-summary-saver-hook.pbtxt
new file mode 100644
index 0000000000..104157315f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-summary-saver-hook.pbtxt
@@ -0,0 +1,30 @@
+path: "tensorflow.estimator.SummarySaverHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.basic_session_run_hooks.SummarySaverHook\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'save_steps\', \'save_secs\', \'output_dir\', \'summary_writer\', \'scaffold\', \'summary_op\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt
index aba120218c..5a2a01cd53 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.-in-memory-evaluator-hook.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.experimental.InMemoryEvaluatorHook"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.hooks.InMemoryEvaluatorHook\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.hooks.hooks.InMemoryEvaluatorHook\'>"
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
   is_instance: "<type \'object\'>"
   member_method {
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
index c5b0085b8d..d3656ae045 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
@@ -24,6 +24,14 @@ tf_module {
     name: "BoostedTreesRegressor"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "CheckpointSaverHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "CheckpointSaverListener"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "DNNClassifier"
     mtype: "<type \'type\'>"
@@ -64,10 +72,22 @@ tf_module {
     name: "Exporter"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "FeedFnHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "FinalExporter"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "FinalOpsHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "GlobalStepWaiterHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "LatestExporter"
     mtype: "<type \'type\'>"
@@ -84,14 +104,46 @@ tf_module {
     name: "LinearRegressor"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "LoggingTensorHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "ModeKeys"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "NanLossDuringTrainingError"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "NanTensorHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "ProfilerHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RunConfig"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SecondOrStepTimer"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "StepCounterHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "StopAtStepHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SummarySaverHook"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "TrainSpec"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index 3ff4b69d39..8c327f88f3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -12,10 +12,6 @@ tf_module {
     name: "CheckpointManager"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "CheckpointSaverHook"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "ClusterDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
@@ -52,22 +48,10 @@ tf_module {
     name: "Features"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "FeedFnHook"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "FinalOpsHook"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "FloatList"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "GlobalStepWaiterHook"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "Int64List"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
@@ -76,14 +60,6 @@ tf_module {
     name: "JobDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "LoggingTensorHook"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "NanTensorHook"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "ProximalGradientDescentOptimizer"
     mtype: "<type \'type\'>"
@@ -104,18 +80,6 @@ tf_module {
     name: "SessionRunHook"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "StepCounterHook"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "StopAtStepHook"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "SummarySaverHook"
-    mtype: "<type \'type\'>"
-  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
index 70df38ba8b..5102066730 100644
--- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
+++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py
@@ -37,6 +37,9 @@ _CORNER_CASES = {
     'train.NanLossDuringTrainingError': {
         'message': {}
     },
+    'estimator.NanLossDuringTrainingError': {
+        'message': {}
+    },
 }
 
 # Python 2 vs. 3 differences
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index e4ef8eb528..b757ad4647 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -590,31 +590,40 @@ renames = {
     'tf.train.AdagradDAOptimizer': 'tf.compat.v1.train.AdagradDAOptimizer',
     'tf.train.AdagradOptimizer': 'tf.compat.v1.train.AdagradOptimizer',
     'tf.train.AdamOptimizer': 'tf.compat.v1.train.AdamOptimizer',
-    'tf.train.CheckpointSaverListener': 'tf.compat.v1.train.CheckpointSaverListener',
+    'tf.train.CheckpointSaverHook': 'tf.estimator.CheckpointSaverHook',
+    'tf.train.CheckpointSaverListener': 'tf.estimator.CheckpointSaverListener',
     'tf.train.ChiefSessionCreator': 'tf.compat.v1.train.ChiefSessionCreator',
+    'tf.train.FeedFnHook': 'tf.estimator.FeedFnHook',
+    'tf.train.FinalOpsHook': 'tf.estimator.FinalOpsHook',
     'tf.train.FtrlOptimizer': 'tf.compat.v1.train.FtrlOptimizer',
+    'tf.train.GlobalStepWaiterHook': 'tf.estimator.GlobalStepWaiterHook',
     'tf.train.GradientDescentOptimizer': 'tf.compat.v1.train.GradientDescentOptimizer',
+    'tf.train.LoggingTensorHook': 'tf.estimator.LoggingTensorHook',
     'tf.train.LooperThread': 'tf.compat.v1.train.LooperThread',
     'tf.train.MomentumOptimizer': 'tf.compat.v1.train.MomentumOptimizer',
     'tf.train.MonitoredSession': 'tf.compat.v1.train.MonitoredSession',
     'tf.train.MonitoredTrainingSession': 'tf.compat.v1.train.MonitoredTrainingSession',
-    'tf.train.NanLossDuringTrainingError': 'tf.compat.v1.train.NanLossDuringTrainingError',
+    'tf.train.NanLossDuringTrainingError': 'tf.estimator.NanLossDuringTrainingError',
+    'tf.train.NanTensorHook': 'tf.estimator.NanTensorHook',
     'tf.train.NewCheckpointReader': 'tf.compat.v1.train.NewCheckpointReader',
     'tf.train.Optimizer': 'tf.compat.v1.train.Optimizer',
-    'tf.train.ProfilerHook': 'tf.compat.v1.train.ProfilerHook',
+    'tf.train.ProfilerHook': 'tf.estimator.ProfilerHook',
     'tf.train.ProximalAdagradOptimizer': 'tf.compat.v1.train.ProximalAdagradOptimizer',
     'tf.train.QueueRunner': 'tf.compat.v1.train.QueueRunner',
     'tf.train.RMSPropOptimizer': 'tf.compat.v1.train.RMSPropOptimizer',
     'tf.train.Saver': 'tf.compat.v1.train.Saver',
     'tf.train.SaverDef': 'tf.compat.v1.train.SaverDef',
     'tf.train.Scaffold': 'tf.compat.v1.train.Scaffold',
-    'tf.train.SecondOrStepTimer': 'tf.compat.v1.train.SecondOrStepTimer',
+    'tf.train.SecondOrStepTimer': 'tf.estimator.SecondOrStepTimer',
     'tf.train.SessionCreator': 'tf.compat.v1.train.SessionCreator',
     'tf.train.SessionManager': 'tf.compat.v1.train.SessionManager',
     'tf.train.SessionRunArgs': 'tf.compat.v1.train.SessionRunArgs',
     'tf.train.SessionRunContext': 'tf.compat.v1.train.SessionRunContext',
     'tf.train.SessionRunValues': 'tf.compat.v1.train.SessionRunValues',
     'tf.train.SingularMonitoredSession': 'tf.compat.v1.train.SingularMonitoredSession',
+    'tf.train.StepCounterHook': 'tf.estimator.StepCounterHook',
+    'tf.train.StopAtStepHook': 'tf.estimator.StopAtStepHook',
+    'tf.train.SummarySaverHook': 'tf.estimator.SummarySaverHook',
     'tf.train.Supervisor': 'tf.compat.v1.train.Supervisor',
     'tf.train.SyncReplicasOptimizer': 'tf.compat.v1.train.SyncReplicasOptimizer',
     'tf.train.VocabInfo': 'tf.estimator.VocabInfo',
-- 
GitLab


From 7f88af511429354e67318b80d8478e6eddf9bfd1 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Sat, 8 Dec 2018 15:12:52 -0800
Subject: [PATCH 272/873] Addressing review comments - v2

---
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index 4beb70f74f..1214711edc 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -295,8 +295,10 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
   this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
 }
 
-REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest, OneByOneConvolution,
-                           SpatialConvolution, OneByOneConvolutionAndRelu,
+REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,
+                           OneByOneConvolution,    //
+                           SpatialConvolution,     //
+                           OneByOneConvolutionAndRelu,   //
                            SpatialConvolutionAndRelu);
 
 using MklFusedBiasAddDataTypes = ::testing::Types<float>;
-- 
GitLab


From 766eb63f2f3e43cd8b23c1cbb05fe63dd918ffa3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 9 Dec 2018 01:02:35 -0800
Subject: [PATCH 273/873] compat: Update forward compatibility horizon to
 2018-12-09

PiperOrigin-RevId: 224696976
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 1fcd0501d7..51cd68436a 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 8)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 9)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 38ac3c811b346117a854be9875d7ea21f94d9eae Mon Sep 17 00:00:00 2001
From: jcf94 <xff252595680@gmail.com>
Date: Sun, 9 Dec 2018 22:49:19 +0800
Subject: [PATCH 274/873] Bug Fix for verbs

---
 tensorflow/contrib/verbs/rdma_mgr.cc          | 27 ++++++++++++++++---
 .../core/common_runtime/process_state.cc      |  9 +++++++
 .../core/common_runtime/process_state.h       |  3 +++
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 2784bf124c..7e821d6be2 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/contrib/verbs/grpc_verbs_client.h"
 #include "tensorflow/contrib/verbs/verbs_service.pb.h"
+#include "tensorflow/core/common_runtime/bfc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/pool_allocator.h"
@@ -256,6 +257,21 @@ void MRDeleter(ibv_mr* mr) {
   }
 }
 
+// TODO: This is to fix the bug of "local protection error when doing rdma send"
+//       Bug caused by commit 33170cc. The new design of Allocator/SubAllocator is
+//       good but not working correctly with this part.
+//       Waiting to migrate all the "cpu_allocator()" to "ProcessState::singleton()",
+//       and this patch will nolonger be needed.
+class BFCRdmaAllocatorFactory : public AllocatorFactory {
+ public:
+  Allocator* CreateAllocator() { return ProcessState::singleton()->GetCPUAllocator(port::kNUMANoAffinity); }
+
+  SubAllocator* CreateSubAllocator(int numa_node) {
+    return new BasicCPUAllocator(numa_node, ProcessState::singleton()->GetCPUAllocatorVisitor(), ProcessState::singleton()->GetCPUFreeVisitor());
+  }
+};
+REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
+
 void RdmaMgr::InitAllocators() {
   static std::once_flag flag;
   std::call_once(
@@ -277,9 +293,15 @@ void RdmaMgr::InitAllocators() {
   ProcessState::singleton()->AddCPUFreeVisitor(free_visitor);
 
 #if GOOGLE_CUDA
+  GPUProcessState::singleton()->AddCUDAHostAllocVisitor(0, alloc_visitor);
+  GPUProcessState::singleton()->AddCUDAHostFreeVisitor(0, free_visitor);
   if (IsGDRAvailable()) {
     // Note we don't free allocated GPU memory so there is no free visitor
-    int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
+
+    // TODO: This is to fix the 'invalid use of member in static member function bug'.
+    //       Waiting for better implementation.
+    // int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
+    int32_t bus_id = 0;
 
     SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
                                                   size_t num_bytes) {
@@ -288,9 +310,6 @@ void RdmaMgr::InitAllocators() {
     };
     GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id,
                                                      cuda_alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id,
-                                                          alloc_visitor);
-    GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor);
     LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id;
   }
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc
index 3d8ac9b134..a33d6e0830 100644
--- a/tensorflow/core/common_runtime/process_state.cc
+++ b/tensorflow/core/common_runtime/process_state.cc
@@ -147,4 +147,13 @@ void ProcessState::TestOnlyReset() {
   gtl::STLDeleteElements(&cpu_al_);
 }
 
+const std::vector<SubAllocator::Visitor>& ProcessState::GetCPUAllocatorVisitor()
+{
+  return cpu_alloc_visitors_;
+}
+const std::vector<SubAllocator::Visitor>& ProcessState::GetCPUFreeVisitor()
+{
+  return cpu_free_visitors_;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h
index 6849d305b3..fc9d60bdc3 100644
--- a/tensorflow/core/common_runtime/process_state.h
+++ b/tensorflow/core/common_runtime/process_state.h
@@ -74,6 +74,9 @@ class ProcessState {
   // REQUIRES: must be called before GetCPUAllocator.
   void AddCPUFreeVisitor(SubAllocator::Visitor v);
 
+  const std::vector<SubAllocator::Visitor>& GetCPUAllocatorVisitor();
+  const std::vector<SubAllocator::Visitor>& GetCPUFreeVisitor();
+
   typedef std::unordered_map<const void*, MemDesc> MDMap;
 
  protected:
-- 
GitLab


From 36999ac8c0d3854e1637381e689c7c0016c11364 Mon Sep 17 00:00:00 2001
From: Zhenyu Tan <tanzheny@google.com>
Date: Sun, 9 Dec 2018 19:52:58 -0800
Subject: [PATCH 275/873] Remove some op test.

PiperOrigin-RevId: 224750894
---
 tensorflow/core/kernels/training_ops_test.cc | 34 --------------------
 1 file changed, 34 deletions(-)

diff --git a/tensorflow/core/kernels/training_ops_test.cc b/tensorflow/core/kernels/training_ops_test.cc
index 1ec57b4522..09804f95dc 100644
--- a/tensorflow/core/kernels/training_ops_test.cc
+++ b/tensorflow/core/kernels/training_ops_test.cc
@@ -151,40 +151,6 @@ static void BM_Momentum(int iters, int params) {
 }
 BENCHMARK(BM_Momentum)->Arg(128 << 10)->Arg(256 << 10);
 
-static void KerasMomentum(int32 n, Graph** init_g, Graph** train_g) {
-  TensorShape shape({n});
-  {
-    Graph* g = new Graph(OpRegistry::Global());
-    auto var = Var(g, n);
-    auto accum = Var(g, n);
-    auto zero = Zeros(g, n);
-    test::graph::Assign(g, var, zero);
-    test::graph::Assign(g, accum, zero);
-    *init_g = g;
-  }
-  {
-    Graph* g = new Graph(OpRegistry::Global());
-    auto var = Var(g, n);
-    auto accum = Var(g, n);
-    auto lr = Scalar(g, 0.01);
-    auto grad = Random(g, n);
-    auto mom = Scalar(g, 0.01);
-    test::graph::Multi(g, "ApplyKerasMomentum", {var, accum, lr, grad, mom});
-    *train_g = g;
-  }
-}
-
-static void BM_KerasMomentum(int iters, int params) {
-  const int64 tot = static_cast<int64>(iters) * params;
-  testing::ItemsProcessed(tot);
-  testing::BytesProcessed(tot * sizeof(float));
-  Graph* init;
-  Graph* train;
-  KerasMomentum(params, &init, &train);
-  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
-}
-BENCHMARK(BM_KerasMomentum)->Arg(128 << 10)->Arg(256 << 10);
-
 static void Adam(int32 n, Graph** init_g, Graph** train_g) {
   TensorShape shape({n});
   {
-- 
GitLab


From 54b110ae4369f86518f3950f11be749df2507c29 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 9 Dec 2018 22:01:10 -0800
Subject: [PATCH 276/873] Internal Change

PiperOrigin-RevId: 224757952
---
 tensorflow/python/ops/ragged/ragged_tensor_value.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tensorflow/python/ops/ragged/ragged_tensor_value.py b/tensorflow/python/ops/ragged/ragged_tensor_value.py
index e94ca4afac..bf0ac4482a 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_value.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_value.py
@@ -98,10 +98,3 @@ class RaggedTensorValue(object):
         values_as_list[self._row_splits[i]:self._row_splits[i + 1]]
         for i in range(len(self._row_splits) - 1)
     ]
-
-  def value_rowids(self, name=None):
-    del name
-    row_lengths = self._row_splits[1:] - self._row_splits[:-1]
-    nrows = self._row_splits.shape[-1] - 1
-    indices = np.arange(nrows)
-    return np.repeat(indices, repeats=row_lengths, axis=0)
-- 
GitLab


From 2ae0b450a741c37959a9fb9322f79e4ad476e8b7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 01:02:37 -0800
Subject: [PATCH 277/873] compat: Update forward compatibility horizon to
 2018-12-10

PiperOrigin-RevId: 224771346
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 51cd68436a..f11e97b211 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 9)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 10)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 18e98e57a7b9db07017d2f4f953e3b820b2e01e6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 04:37:56 -0800
Subject: [PATCH 278/873] Continue conversion of opensource-only files to
 opensource_only.files.

PiperOrigin-RevId: 224792124
---
 tensorflow/opensource_only.files | 35 +++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 688a837dac..347dc9fc6b 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -1,3 +1,31 @@
+tensorflow/contrib/tpu/profiler/pip_package/BUILD
+tensorflow/contrib/tpu/profiler/pip_package/setup.py
+tensorflow/contrib/tpu/profiler/pip_package/README
+tensorflow/contrib/tpu/profiler/pip_package/build_pip_package.sh
+tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
+tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/__init__.py
+tensorflow/contrib/mpi/BUILD
+tensorflow/tools/ci_build/remote/BUILD
+tensorflow/tools/pip_package/README
+tensorflow/tools/pip_package/MANIFEST.in
+tensorflow/tools/pip_package/simple_console.py
+tensorflow/tools/pip_package/build_pip_package.sh
+tensorflow/tools/pip_package/check_load_py_test.py
+tensorflow/tools/pip_package/pip_smoke_test.py
+tensorflow/tools/pip_package/simple_console_for_windows.py
+tensorflow/tools/pip_package/setup.py
+tensorflow/tools/pip_package/BUILD
+tensorflow/tools/lib_package/concat_licenses.sh
+tensorflow/tools/lib_package/libtensorflow_test.c
+tensorflow/tools/lib_package/LibTensorFlowTest.java
+tensorflow/tools/lib_package/BUILD
+tensorflow/tools/lib_package/libtensorflow_test.sh
+tensorflow/tools/lib_package/README.md
+tensorflow/tools/lib_package/libtensorflow_java_test.sh
+tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
+tensorflow/tools/def_file_filter/BUILD
+tensorflow/tools/def_file_filter/BUILD.tpl
+tensorflow/tools/def_file_filter/def_file_filter.py.tpl
 tensorflow/third_party/mkl/MKL_LICENSE
 tensorflow/third_party/mkl/LICENSE
 tensorflow/third_party/mkl/BUILD
@@ -207,4 +235,9 @@ tensorflow/third_party/jsoncpp.BUILD
 tensorflow/third_party/tflite_ovic_testdata.BUILD
 tensorflow/third_party/libxsmm.BUILD
 tensorflow/third_party/zlib.BUILD
-tensorflow/third_party/eigen.BUILD
\ No newline at end of file
+tensorflow/third_party/eigen.BUILD
+tensorflow/stream_executor/BUILD
+tensorflow/api_template_v1.__init__.py
+tensorflow/compat_template_v1.__init__.py
+tensorflow/api_template.__init__.py
+tensorflow/__init__.py
\ No newline at end of file
-- 
GitLab


From 8855358cff24b8b29296a01eabf3bf9bbff3509c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 06:09:33 -0800
Subject: [PATCH 279/873] Put arm_compiler.BUILD into the right spot.

PiperOrigin-RevId: 224800160
---
 .../opensource_only/arm_compiler.BUILD => arm_compiler.BUILD      | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tensorflow/opensource_only/arm_compiler.BUILD => arm_compiler.BUILD (100%)

diff --git a/tensorflow/opensource_only/arm_compiler.BUILD b/arm_compiler.BUILD
similarity index 100%
rename from tensorflow/opensource_only/arm_compiler.BUILD
rename to arm_compiler.BUILD
-- 
GitLab


From 41cfa5da49577fb04908997c70946a4881c85430 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 10 Dec 2018 09:30:43 -0800
Subject: [PATCH 280/873] Actually restrict parse_expression to expression
 nodes.

PiperOrigin-RevId: 224826621
---
 tensorflow/python/autograph/pyct/parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/autograph/pyct/parser.py b/tensorflow/python/autograph/pyct/parser.py
index 39fc1a7ed0..d04a40157e 100644
--- a/tensorflow/python/autograph/pyct/parser.py
+++ b/tensorflow/python/autograph/pyct/parser.py
@@ -117,7 +117,7 @@ def parse_expression(src):
   """
   node = parse_str(src)
   assert isinstance(node, gast.Module)
-  if len(node.body) != 1 and not isinstance(node.body[0], gast.Expr):
+  if len(node.body) != 1 or not isinstance(node.body[0], gast.Expr):
     raise ValueError(
         'Expected a single expression, found instead %s' % node.body)
   return node.body[0].value
-- 
GitLab


From e6432106f41facbca0cf2d51a2bf6dec72ad8961 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 10 Dec 2018 09:32:43 -0800
Subject: [PATCH 281/873] Strengthen the checks in side_effect_guards a bit.
 This is still not fully robust, but the converter is about to be deprecated
 anyway.

PiperOrigin-RevId: 224827008
---
 .../converters/side_effect_guards.py          | 25 +++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/autograph/converters/side_effect_guards.py b/tensorflow/python/autograph/converters/side_effect_guards.py
index 98e29ec8e1..d7c0951fcc 100644
--- a/tensorflow/python/autograph/converters/side_effect_guards.py
+++ b/tensorflow/python/autograph/converters/side_effect_guards.py
@@ -85,11 +85,26 @@ class SideEffectGuardTransformer(converter.Base):
         new_alias_map.update(alias_map)
         alias_map = new_alias_map
         current_dest = new_dest
-    if reindent_requested and not current_dest:
-      # TODO(mdan): There may still be something that could be done.
-      raise ValueError('Unable to insert statement into the computation flow: '
-                       'it is not followed by any computation which '
-                       'the statement could gate.')
+
+    if reindent_requested:
+      no_controls_to_gate = False
+      if not current_dest:
+        no_controls_to_gate = True
+      if len(current_dest) == 1:
+        if ast_util.matches(current_dest[0], 'return'):
+          no_controls_to_gate = True
+        if ast_util.matches(current_dest[0], 'return ()'):
+          no_controls_to_gate = True
+        if ast_util.matches(current_dest[0], 'return []'):
+          no_controls_to_gate = True
+        if ast_util.matches(current_dest[0], 'return {}'):
+          no_controls_to_gate = True
+      if no_controls_to_gate:
+        # TODO(mdan): There may still be something that could be done.
+        raise ValueError(
+            'Unable to insert statement into the computation flow: it is not'
+            ' followed by any computation which the statement could gate.')
+
     return new_nodes
 
   def visit_FunctionDef(self, node):
-- 
GitLab


From 1d850778ac31dfe4a6fdb0846739f3294e47b8c4 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 10 Dec 2018 09:38:06 -0800
Subject: [PATCH 282/873] Allow non-expressions in the pattern matcher.

PiperOrigin-RevId: 224828066
---
 tensorflow/python/autograph/converters/call_trees.py | 4 ++--
 tensorflow/python/autograph/pyct/ast_util.py         | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index 9b85fc8367..3e0b40290f 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -323,12 +323,12 @@ class CallTreeTransformer(converter.Base):
 
       # 1. super() calls - these are preserved. The class conversion mechanism
       # will ensure that they return the correct value.
-      if ast_util.matches(node, 'super(_)'):
+      if ast_util.matches(node, parser.parse_expression('super(_)')):
         return node
 
       # 2. super().method calls - these are preserved as well, when the
       # conversion processes the entire class.
-      if (ast_util.matches(node, 'super(_)._(_)') and
+      if (ast_util.matches(node, parser.parse_expression('super(_)._(_)')) and
           self.ctx.info.owner_type is not None):
         return node
 
diff --git a/tensorflow/python/autograph/pyct/ast_util.py b/tensorflow/python/autograph/pyct/ast_util.py
index ea7eca6463..3dc10cf349 100644
--- a/tensorflow/python/autograph/pyct/ast_util.py
+++ b/tensorflow/python/autograph/pyct/ast_util.py
@@ -200,7 +200,8 @@ def matches(node, pattern):
     bool
   """
   if isinstance(pattern, str):
-    pattern = parser.parse_expression(pattern)
+    pattern, = parser.parse_str(pattern).body
+
   matcher = PatternMatcher(pattern)
   matcher.visit(node)
   return matcher.matches
-- 
GitLab


From e88a87c89195a820c62433784bcde063ab568cbc Mon Sep 17 00:00:00 2001
From: Pooya Davoodi <pdavoodi@nvidia.com>
Date: Mon, 10 Dec 2018 09:56:11 -0800
Subject: [PATCH 283/873] Update README.md

Tensorflow -> TensorFlow

uring -> using
---
 tensorflow/contrib/tensorrt/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md
index dedac2c748..1310b3cd27 100644
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@@ -6,13 +6,13 @@ This module is under active development.
 
 ## Installing TF-TRT
 
-Currently Tensorflow nightly builds include TF-TRT by default,
+Currently TensorFlow nightly builds include TF-TRT by default,
 which means you don't need to install TF-TRT separately.
 You can pull the latest TF containers from docker hub or
 install the latest TF pip package to get access to the latest TF-TRT.
 
 If you want to use TF-TRT on NVIDIA Jetson platform, you can find
-the download links for the relevant Tensorflow pip packages here:
+the download links for the relevant TensorFlow pip packages here:
 https://docs.nvidia.com/deeplearning/dgx/index.html#installing-frameworks-for-jetson
 
 ## Installing TensorRT
@@ -42,7 +42,7 @@ and verified models, explains best practices with troubleshooting guides.
 
 TF-TRT includes both Python tests and C++ unit tests.
 Most of Python tests are located in the test directory
-and they can be executed uring `bazel test` or directly
+and they can be executed using `bazel test` or directly
 with the Python command. Most of the C++ unit tests are
 used to test the conversion functions that convert each TF op to
 a number of TensorRT layers.
-- 
GitLab


From 97164413d009aa6506f269eff7fb78411419146d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 09:54:09 -0800
Subject: [PATCH 284/873] Internal Change

PiperOrigin-RevId: 224830708
---
 tensorflow/python/ops/array_ops.py            |   2 +
 .../python/ops/ragged/ragged_array_ops.py     |   2 +-
 .../python/ops/ragged/ragged_dispatch.py      |  20 ++-
 .../python/ops/ragged/ragged_dispatch_test.py | 133 ++++++++++++++++++
 .../python/ops/ragged/ragged_math_ops.py      |  22 +--
 5 files changed, 167 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index 9dabbffb13..e10d9036cd 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -87,6 +87,7 @@ def identity(input, name=None):  # pylint: disable=redefined-builtin
 
 # pylint: disable=redefined-builtin,protected-access
 @tf_export(v1=["expand_dims"])
+@dispatch.add_dispatch_support
 @deprecation.deprecated_args(None, "Use the `axis` argument instead", "dim")
 def expand_dims(input, axis=None, name=None, dim=None):
   """Inserts a dimension of 1 into a tensor's shape.
@@ -3256,6 +3257,7 @@ reverse_sequence_v2.__doc__ = deprecation.rewrite_argument_docstring(
 
 
 @tf_export(v1=["gather"])
+@dispatch.add_dispatch_support
 def gather(params, indices, validate_indices=None, name=None, axis=0):
   del validate_indices
   if axis != 0:
diff --git a/tensorflow/python/ops/ragged/ragged_array_ops.py b/tensorflow/python/ops/ragged/ragged_array_ops.py
index b5917bc4ee..dfa9790cd8 100644
--- a/tensorflow/python/ops/ragged/ragged_array_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_array_ops.py
@@ -587,7 +587,7 @@ def concat(values, axis, name=None):
     return _ragged_stack_concat_helper(values, axis, stack_values=False)
 
 
-def stack(values, axis, name=None):
+def stack(values, axis=0, name=None):
   """Stacks potentially ragged tensors along one dimension.
 
   Given a list of tensors with the same rank `K` (`K >= axis`), returns a
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index 7c74f7be62..f334f1fc8e 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py
@@ -374,15 +374,31 @@ _BINARY_ELEMENTWISE_OPS = [
     math_ops.truncatemod,
 ]
 
+
+def _ragged_gather_v1(params, indices, validate_indices=None, name=None,
+                      axis=0):
+  return ragged_array_ops.gather(params=params, indices=indices,
+                                 validate_indices=validate_indices,
+                                 axis=axis, name=name)
+
+
+def _ragged_expand_dims_v1(input, axis=None, name=None, dim=None):  # pylint: disable=redefined-builtin
+  if dim is not None:
+    axis = dim
+  return ragged_array_ops.expand_dims(input=input, axis=axis, name=name)
+
+
 # (original_op, ragged_op, ragged_args)
 _RAGGED_DISPATCH_OPS = [
     (array_ops.batch_gather, ragged_array_ops.batch_gather,
      ['params', 'indices']),
-    (array_ops.concat, ragged_array_ops.concat, ['values']),
+    (array_ops.concat, ragged_array_ops.concat, ['[values]']),
+    (array_ops.expand_dims, _ragged_expand_dims_v1, ['input']),
     (array_ops.expand_dims_v2, ragged_array_ops.expand_dims, ['input']),
+    (array_ops.gather, _ragged_gather_v1, ['params', 'indices']),
     (array_ops.gather_v2, ragged_array_ops.gather, ['params', 'indices']),
     (array_ops.gather_nd, ragged_array_ops.gather_nd, ['params', 'indices']),
-    (array_ops.stack, ragged_array_ops.stack, ['values']),
+    (array_ops.stack, ragged_array_ops.stack, ['[values]']),
     (array_ops.tile, ragged_array_ops.tile, ['input']),
     (array_ops.where, ragged_array_ops.where, ['condition', 'x', 'y']),
     (math_ops.unsorted_segment_sum, ragged_math_ops.segment_sum,
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
index 82827aa2aa..9d63dcf7c4 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
@@ -446,6 +446,139 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
     with self.assertRaises((TypeError, ValueError)):
       self.evaluate(math_ops.add_n([x, y]))
 
+  @parameterized.parameters([
+      dict(
+          op=array_ops.batch_gather,
+          args=(ragged.constant_value([[5, 6, 7], [8, 9]]),
+                ragged.constant_value([[2, 1, 0], [1]])),
+          expected=ragged.constant_value([[7, 6, 5], [9]])),
+      dict(
+          op=array_ops.concat,
+          args=([ragged.constant_value([[1, 2, 3], [4]], dtype=np.int32),
+                 np.array([[5, 6]], dtype=np.int32)],),
+          kwargs={'axis': 0},
+          expected=ragged.constant_value([[1, 2, 3], [4], [5, 6]])),
+      dict(
+          op=array_ops.expand_dims,
+          kwargs={'input': ragged.constant_value([[1, 2], [3]]),
+                  'axis': 0},
+          expected=ragged.constant_value([[[1, 2], [3]]])),
+      dict(
+          op=array_ops.expand_dims_v2,
+          kwargs={'input': ragged.constant_value([[1, 2], [3]]),
+                  'axis': -1},
+          expected=ragged.constant_value([[[1], [2]], [[3]]],
+                                         ragged_rank=1),),
+      dict(
+          op=array_ops.gather,
+          kwargs={'params': ragged.constant_value([[1, 2], [3]]),
+                  'indices': [1, 0, 1]},
+          expected=ragged.constant_value([[3], [1, 2], [3]])),
+      dict(
+          op=array_ops.gather_v2,
+          kwargs={'params': ragged.constant_value([[1, 2], [3]]),
+                  'indices': ragged.constant_value([[1, 0], [1]])},
+          expected=ragged.constant_value([[[3], [1, 2]], [[3]]])),
+      dict(
+          op=array_ops.gather_nd,
+          kwargs={'params': ragged.constant_value([[7, 8], [9]]),
+                  'indices': [[0, 1], [1, 0], [0, 0]]},
+          expected=ragged.constant_value([8, 9, 7])),
+      dict(
+          op=array_ops.stack,
+          args=([ragged.constant_value([[1, 2, 3], [4]], dtype=np.int32),
+                 np.array([[5, 6]], dtype=np.int32)],),
+          expected=ragged.constant_value([[[1, 2, 3], [4]], [[5, 6]]])),
+      dict(
+          op=array_ops.tile,
+          args=([ragged.constant_value([[1, 2], [3]], dtype=np.int32), [2, 3]]),
+          expected=ragged.constant_value([[1, 2, 1, 2, 1, 2], [3, 3, 3],
+                                          [1, 2, 1, 2, 1, 2], [3, 3, 3]])),
+      dict(
+          op=array_ops.where,
+          args=(ragged.constant_value([[True, False], [True]]),
+                ragged.constant_value([[b'A', b'B'], [b'C']]),
+                ragged.constant_value([[b'a', b'b'], [b'c']])),
+          expected=ragged.constant_value([[b'A', b'b'], [b'C']])),
+      dict(
+          op=math_ops.unsorted_segment_sum,
+          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
+                  'segment_ids': ragged.constant_value([[0, 2], [0]]),
+                  'num_segments': 3},
+          expected=[4, 0, 2]),
+      dict(
+          op=math_ops.unsorted_segment_prod,
+          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
+                  'segment_ids': ragged.constant_value([[0, 2], [0]]),
+                  'num_segments': 3},
+          expected=[3, 1, 2]),
+      dict(
+          op=math_ops.unsorted_segment_min,
+          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
+                  'segment_ids': ragged.constant_value([[0, 1], [0]]),
+                  'num_segments': 2},
+          expected=[1, 2]),
+      dict(
+          op=math_ops.unsorted_segment_max,
+          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
+                  'segment_ids': ragged.constant_value([[0, 1], [0]]),
+                  'num_segments': 2},
+          expected=[3, 2]),
+      dict(
+          op=math_ops.unsorted_segment_mean,
+          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
+                  'segment_ids': ragged.constant_value([[0, 1], [0]]),
+                  'num_segments': 2},
+          expected=[2, 2]),
+      dict(
+          op=math_ops.unsorted_segment_sqrt_n,
+          kwargs={'data': ragged.constant_value([[1.0, 2.0], [3.0, 4.0, 6.0]]),
+                  'segment_ids': ragged.constant_value([[0, 1], [0, 0, 0]]),
+                  'num_segments': 2},
+          expected=[7.0, 2.0]),
+      dict(
+          op=math_ops.reduce_sum,
+          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
+                  'axis': 1},
+          expected=[3, 12]),
+      dict(
+          op=math_ops.reduce_prod,
+          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
+                  'axis': 1},
+          expected=[2, 60]),
+      dict(
+          op=math_ops.reduce_min,
+          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
+                  'axis': 1},
+          expected=[1, 3]),
+      dict(
+          op=math_ops.reduce_max,
+          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
+                  'axis': 1},
+          expected=[2, 5]),
+      dict(
+          op=math_ops.reduce_mean,
+          kwargs={'input_tensor': ragged.constant_value([[1, 3], [3, 4, 5]]),
+                  'axis': 1},
+          expected=[2, 4]),
+      dict(
+          op=math_ops.reduce_any,
+          kwargs={'input_tensor': ragged.constant_value([[True, False],
+                                                         [True, True, True]]),
+                  'axis': 1},
+          expected=[True, True]),
+      dict(
+          op=math_ops.reduce_all,
+          kwargs={'input_tensor': ragged.constant_value([[True, False],
+                                                         [True, True, True]]),
+                  'axis': 1},
+          expected=[False, True]),
+  ])
+  def testRaggedDispatch(self, op, expected, args=(), kwargs=None):
+    if kwargs is None: kwargs = {}
+    result = op(*args, **kwargs)
+    self.assertRaggedEqual(result, expected)
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/ops/ragged/ragged_math_ops.py b/tensorflow/python/ops/ragged/ragged_math_ops.py
index 92f82be84a..f774c1eb58 100644
--- a/tensorflow/python/ops/ragged/ragged_math_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_math_ops.py
@@ -269,28 +269,32 @@ def segment_max(data, segment_ids, num_segments, name=None):
 
 
 def segment_mean(data, segment_ids, num_segments, name=None):
-  # For docs, see: _RAGGED_SEGMENT_DOCSTRING
+  """For docs, see: _RAGGED_SEGMENT_DOCSTRING."""
   with ops.name_scope(name, 'RaggedSegmentMean',
                       [data, segment_ids, num_segments]):
     total = segment_sum(data, segment_ids, num_segments)
     ones = ragged_tensor.RaggedTensor.from_nested_row_splits(
         array_ops.ones_like(data.flat_values), data.nested_row_splits)
     count = segment_sum(ones, segment_ids, num_segments)
-    return ragged_tensor.RaggedTensor.from_nested_row_splits(
-        total.flat_values / count.flat_values, total.nested_row_splits)
+    if ragged_tensor.is_ragged(total):
+      return total.with_flat_values(total.flat_values / count.flat_values)
+    else:
+      return total / count
 
 
 def segment_sqrt_n(data, segment_ids, num_segments, name=None):
-  # For docs, see: _RAGGED_SEGMENT_DOCSTRING
+  """For docs, see: _RAGGED_SEGMENT_DOCSTRING."""
   with ops.name_scope(name, 'RaggedSegmentSqrtN',
                       [data, segment_ids, num_segments]):
     total = segment_sum(data, segment_ids, num_segments)
     ones = ragged_tensor.RaggedTensor.from_nested_row_splits(
         array_ops.ones_like(data.flat_values), data.nested_row_splits)
     count = segment_sum(ones, segment_ids, num_segments)
-    return ragged_tensor.RaggedTensor.from_nested_row_splits(
-        total.flat_values / math_ops.sqrt(count.flat_values),
-        total.nested_row_splits)
+    if ragged_tensor.is_ragged(total):
+      return total.with_flat_values(
+          total.flat_values / math_ops.sqrt(count.flat_values))
+    else:
+      return total / math_ops.sqrt(count)
 
 
 def _set_ragged_segment_docstring(func, combination, combined):
@@ -465,11 +469,11 @@ def _ragged_reduce_aggregate(reduce_op,
         return _ragged_reduce_aggregate(reduce_op, unsorted_segment_op,
                                         inner_reduced, axis[:-1], keepdims)
 
-    axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims)
-
     rt_input = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         rt_input, name='rt_input')
 
+    axis = ragged_util.get_positive_axis(axis, rt_input.shape.ndims)
+
     if axis == 0:
       # out[i_1, i_2, ..., i_N] = sum_{j} rt_input[j, i_1, i_2, ..., i_N]
       row_lengths = rt_input.row_splits[1:] - rt_input.row_splits[:-1]
-- 
GitLab


From 95b5a2c831fbe71e6a1202a7b8585f74bb74ee0c Mon Sep 17 00:00:00 2001
From: Pooya Davoodi <pdavoodi@nvidia.com>
Date: Mon, 10 Dec 2018 10:01:30 -0800
Subject: [PATCH 285/873] Update README.md

Remove NVIDIA link encouraging users to start from https://www.tensorflow.org/install/gpu for installing TensorRT to reduce the confusion about which TensorRT version to install.
---
 tensorflow/contrib/tensorrt/README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md
index 1310b3cd27..cb827c35d1 100644
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@@ -18,9 +18,8 @@ https://docs.nvidia.com/deeplearning/dgx/index.html#installing-frameworks-for-je
 ## Installing TensorRT
 
 In order to make use of TF-TRT, you will need a local installation
-of TensorRT from the
-[NVIDIA Developer website](https://developer.nvidia.com/tensorrt).
-Installation instructions for compatibility with TensorFlow are provided on the
+of TensorRT. Installation instructions for compatibility with TensorFlow
+are provided on the
 [TensorFlow GPU support](https://www.tensorflow.org/install/gpu) guide.
 
 ## Examples
-- 
GitLab


From c07297759059a953351f1d5e531b6e6af878365c Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 10 Dec 2018 10:04:47 -0800
Subject: [PATCH 286/873] [XLA:CPU] Add missing intrinsics on Mac OS X.

Fixes crashes seen in JAX test suite on Mac OS.

PiperOrigin-RevId: 224832861
---
 tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index efccadedf2..bd6868d397 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -296,6 +296,9 @@ bool RegisterKnownJITSymbols() {
   REGISTER_LIBM_SYMBOL(sin, double (*)(double));
 #ifdef __APPLE__
   REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
+  registry->Register("__sincosf_stret",
+                     reinterpret_cast<void*>(__sincosf_stret));
+  registry->Register("__sincos_stret", reinterpret_cast<void*>(__sincos_stret));
 #else
   REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
 #endif
@@ -311,6 +314,12 @@ bool RegisterKnownJITSymbols() {
   registry->Register("memcpy", reinterpret_cast<void*>(memcpy));
   registry->Register("memmove", reinterpret_cast<void*>(memmove));
   registry->Register("memset", reinterpret_cast<void*>(memset));
+
+#ifdef __APPLE__
+  registry->Register("memset_pattern16",
+                     reinterpret_cast<void*>(memset_pattern16));
+#endif
+
   return true;
 }
 
-- 
GitLab


From a73776b102701792d1464042ec0c61f5142e9c18 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Mon, 10 Dec 2018 10:09:27 -0800
Subject: [PATCH 287/873] Annotate additional tests with @run_v1_only

PiperOrigin-RevId: 224833857
---
 tensorflow/python/eager/ops_test.py          | 1 +
 tensorflow/python/keras/layers/core_test.py  | 2 ++
 tensorflow/python/keras/layers/local_test.py | 2 +-
 tensorflow/python/layers/core_test.py        | 7 +------
 4 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py
index 17a090d526..91d0d5c6f0 100644
--- a/tensorflow/python/eager/ops_test.py
+++ b/tensorflow/python/eager/ops_test.py
@@ -330,6 +330,7 @@ class OpsTest(test_util.TensorFlowTestCase):
     self.assertEquals(t, dtypes.string)
     self.assertEquals(r[0].dtype, dtypes.string)
 
+  @test_util.run_v1_only('b/120545219')
   def testFlattenLayer(self):
     flatten_layer = core.Flatten()
     x = constant_op.constant([[[-10, -20], [-30, -40]], [[10, 20], [30, 40]]])
diff --git a/tensorflow/python/keras/layers/core_test.py b/tensorflow/python/keras/layers/core_test.py
index f138adf760..b8def07190 100644
--- a/tensorflow/python/keras/layers/core_test.py
+++ b/tensorflow/python/keras/layers/core_test.py
@@ -135,6 +135,7 @@ class CoreLayersTest(test.TestCase):
           kwargs={'dims': (1, 4, 2)}, input_shape=(3, 2, 4))
 
   @tf_test_util.run_in_graph_and_eager_modes
+  @tf_test_util.run_v1_only('b/120545219')
   def test_flatten(self):
     testing_utils.layer_test(
         keras.layers.Flatten, kwargs={}, input_shape=(3, 2, 4))
@@ -150,6 +151,7 @@ class CoreLayersTest(test.TestCase):
     self.assertAllClose(outputs, target_outputs)
 
   @tf_test_util.run_in_graph_and_eager_modes
+  @tf_test_util.run_v1_only('b/120545219')
   def test_flatten_scalar_channels(self):
     testing_utils.layer_test(
         keras.layers.Flatten, kwargs={}, input_shape=(3,))
diff --git a/tensorflow/python/keras/layers/local_test.py b/tensorflow/python/keras/layers/local_test.py
index e4f4d0a639..6db5bf385e 100644
--- a/tensorflow/python/keras/layers/local_test.py
+++ b/tensorflow/python/keras/layers/local_test.py
@@ -235,7 +235,7 @@ class LocallyConnected2DLayersTest(test.TestCase):
 
 class LocallyConnectedImplementationModeTest(test.TestCase):
 
-  @tf_test_util.run_deprecated_v1
+  @tf_test_util.run_v1_only('b/120545219')
   def test_locallyconnected_implementation(self):
     with self.cached_session():
       num_samples = 4
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index b40a268238..3338e55f82 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -463,9 +463,9 @@ class DropoutTest(test.TestCase):
       self.assertAllClose(np.ones((5, 5)), np_output)
 
 
+@test_util.run_v1_only('b/120545219')
 class FlattenTest(test.TestCase):
 
-  @test_util.run_deprecated_v1
   def testCreateFlatten(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
@@ -490,7 +490,6 @@ class FlattenTest(test.TestCase):
     shape = core_layers.Flatten().compute_output_shape((None, 3, None))
     self.assertEqual(shape.as_list(), [None, None])
 
-  @test_util.run_deprecated_v1
   def testDataFormat5d(self):
     np_input_channels_last = np.arange(
         120, dtype='float32').reshape([1, 5, 4, 3, 2])
@@ -508,7 +507,6 @@ class FlattenTest(test.TestCase):
 
       self.assertAllEqual(np_output_cl, np_output_cf)
 
-  @test_util.run_deprecated_v1
   def testDataFormat4d(self):
     np_input_channels_last = np.arange(
         24, dtype='float32').reshape([1, 4, 3, 2])
@@ -526,13 +524,11 @@ class FlattenTest(test.TestCase):
 
       self.assertAllEqual(np_output_cl, np_output_cf)
 
-  @test_util.run_deprecated_v1
   def testFunctionalFlatten(self):
     x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
     y = core_layers.flatten(x, name='flatten')
     self.assertEqual(y.get_shape().as_list(), [None, 6])
 
-  @test_util.run_deprecated_v1
   def testFlatten0D(self):
     x = array_ops.placeholder(shape=(None,), dtype='float32')
     y = core_layers.Flatten()(x)
@@ -541,7 +537,6 @@ class FlattenTest(test.TestCase):
     self.assertEqual(list(np_output.shape), [5, 1])
     self.assertEqual(y.shape.as_list(), [None, 1])
 
-  @test_util.run_deprecated_v1
   def testFlattenUnknownAxes(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(shape=(5, None, None), dtype='float32')
-- 
GitLab


From 929ae05b8c98d1885ceff2f6cf07db66d1bdb737 Mon Sep 17 00:00:00 2001
From: Nupur Garg <nupurgarg@google.com>
Date: Mon, 10 Dec 2018 10:09:39 -0800
Subject: [PATCH 288/873] Internal change.

PiperOrigin-RevId: 224833908
---
 tensorflow/lite/toco/python/BUILD | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/tensorflow/lite/toco/python/BUILD b/tensorflow/lite/toco/python/BUILD
index 07056f66c3..8a6e82ec46 100644
--- a/tensorflow/lite/toco/python/BUILD
+++ b/tensorflow/lite/toco/python/BUILD
@@ -1,4 +1,8 @@
-package(default_visibility = ["//visibility:public"])
+package(default_visibility = [
+    "//tensorflow/contrib/lite:__subpackages__",
+    "//tensorflow/lite:__subpackages__",
+    "//tensorflow/tools/pip_package:__subpackages__",
+])
 
 licenses(["notice"])  # Apache 2.0
 
@@ -9,7 +13,10 @@ load("//tensorflow:tensorflow.bzl", "py_binary")
 config_setting(
     name = "tflite_convert_with_select_tf_ops",
     define_values = {"tflite_convert_with_select_tf_ops": "true"},
-    visibility = ["//visibility:public"],
+    visibility = [
+        "//tensorflow/contrib/lite:__subpackages__",
+        "//tensorflow/lite:__subpackages__",
+    ],
 )
 
 cc_library(
@@ -37,6 +44,12 @@ cc_library(
 tf_py_wrap_cc(
     name = "tensorflow_wrap_toco",
     srcs = ["toco.i"],
+    visibility = [
+        "//learning/expander/pod/deep_pod/utils:__subpackages__",
+        "//research/handwriting/converters/tflite:__subpackages__",
+        "//tensorflow/contrib/lite:__subpackages__",
+        "//tensorflow/lite:__subpackages__",
+    ],
     deps = [
         ":toco_python_api",
         "//tensorflow/lite/toco:model_flags_proto_cc",
-- 
GitLab


From 7250f8531a1f35cd22899fa3b124bcbe252281c5 Mon Sep 17 00:00:00 2001
From: Niranjan Hasabnis <niranjan.hasabnis@intel.com>
Date: Mon, 10 Dec 2018 10:25:19 -0800
Subject: [PATCH 289/873] Addressing review comments - V3

---
 tensorflow/core/kernels/mkl_fused_ops_test.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc
index 1214711edc..1003fa5e4f 100644
--- a/tensorflow/core/kernels/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc
@@ -32,7 +32,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Helper class for converting MKL tesnors to TF tensors and comparing to
+// Helper class for converting MKL tensors to TF tensors and comparing to
 // expected values
 
 static const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0};
@@ -295,10 +295,10 @@ TYPED_TEST_P(MklFusedConv2DWithBiasOpTest, SpatialConvolutionAndRelu) {
   this->VerifyConv2DWithBiasAndRelu(filter_size, filter_count);
 }
 
-REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,
-                           OneByOneConvolution,    //
-                           SpatialConvolution,     //
-                           OneByOneConvolutionAndRelu,   //
+REGISTER_TYPED_TEST_CASE_P(MklFusedConv2DWithBiasOpTest,  //
+                           OneByOneConvolution,           //
+                           SpatialConvolution,            //
+                           OneByOneConvolutionAndRelu,    //
                            SpatialConvolutionAndRelu);
 
 using MklFusedBiasAddDataTypes = ::testing::Types<float>;
-- 
GitLab


From 3d5b131ab82e0ea065ea2705b1aa251711850562 Mon Sep 17 00:00:00 2001
From: Pooya Davoodi <pdavoodi@nvidia.com>
Date: Mon, 10 Dec 2018 10:31:08 -0800
Subject: [PATCH 290/873] Update README.md

documentaion --> documentation
---
 tensorflow/contrib/tensorrt/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md
index cb827c35d1..79b4886cce 100644
--- a/tensorflow/contrib/tensorrt/README.md
+++ b/tensorflow/contrib/tensorrt/README.md
@@ -33,7 +33,7 @@ performance of TF-TRT. For more information see
 
 ## Documentation
 
-[TF-TRT documentaion](https://docs.nvidia.com/deeplearning/dgx/integrate-tf-trt/index.html)
+[TF-TRT documentation](https://docs.nvidia.com/deeplearning/dgx/integrate-tf-trt/index.html)
 gives an overview of the supported functionalities, provides tutorials
 and verified models, explains best practices with troubleshooting guides.
 
-- 
GitLab


From c9d6b87d824378c0076542ab337beeb33d9dff50 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Mon, 10 Dec 2018 10:44:47 -0800
Subject: [PATCH 291/873] Build `node_name_to_cost_id_map_` after graph
 optimization passes.

Graph optimization passes might overwrite feed/fetch nodes.

PiperOrigin-RevId: 224840744
---
 tensorflow/core/common_runtime/graph_execution_state.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc
index 880806f120..04d658f047 100644
--- a/tensorflow/core/common_runtime/graph_execution_state.cc
+++ b/tensorflow/core/common_runtime/graph_execution_state.cc
@@ -546,10 +546,6 @@ Status GraphExecutionState::InitBaseGraph(const BuildGraphOptions& options) {
   std::unique_ptr<Graph> new_graph(new Graph(OpRegistry::Global()));
   GraphConstructorOptions opts;
   TF_RETURN_IF_ERROR(ConvertGraphDefToGraph(opts, *graph_def, new_graph.get()));
-  for (const Node* n : new_graph->nodes()) {
-    VLOG(2) << "Mapping " << n->name() << " to " << n->cost_id();
-    node_name_to_cost_id_map_[n->name()] = n->cost_id();
-  }
   if (session_options_ &&
       session_options_->config.graph_options().place_pruned_graph()) {
     // Rewrite the graph before placement.
@@ -578,6 +574,11 @@ Status GraphExecutionState::InitBaseGraph(const BuildGraphOptions& options) {
   TF_RETURN_IF_ERROR(OptimizationPassRegistry::Global()->RunGrouping(
       OptimizationPassRegistry::POST_PLACEMENT, optimization_options));
 
+  for (const Node* n : new_graph->nodes()) {
+    VLOG(2) << "Mapping " << n->name() << " to " << n->cost_id();
+    node_name_to_cost_id_map_[n->name()] = n->cost_id();
+  }
+
   SaveStatefulNodes(new_graph.get());
   graph_ = new_graph.release();
   return Status::OK();
-- 
GitLab


From c5fe1e476b651877022b6d43a851f0ad9ed6880a Mon Sep 17 00:00:00 2001
From: Tom Hennigan <tomhennigan@google.com>
Date: Mon, 10 Dec 2018 10:47:29 -0800
Subject: [PATCH 292/873] Make execution callback an enum.

PiperOrigin-RevId: 224841335
---
 tensorflow/contrib/eager/python/tfe.py        |  4 +
 .../python/eager/execution_callbacks.py       | 77 +++++++++----------
 .../python/eager/execution_callbacks_test.py  | 11 ++-
 tensorflow/python/ops/math_grad_test.py       |  4 +-
 4 files changed, 52 insertions(+), 44 deletions(-)

diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py
index 33c988fd90..8882a863c3 100644
--- a/tensorflow/contrib/eager/python/tfe.py
+++ b/tensorflow/contrib/eager/python/tfe.py
@@ -41,6 +41,8 @@ To use, at program startup, call `tf.enable_eager_execution()`.
 
 @@add_execution_callback
 @@clear_execution_callbacks
+@@errstate
+@@ExecutionCallback
 @@inf_callback
 @@inf_nan_callback
 @@nan_callback
@@ -119,6 +121,8 @@ from tensorflow.python.eager.context import set_server_def
 from tensorflow.python.eager.def_function import function
 from tensorflow.python.eager.execution_callbacks import add_execution_callback
 from tensorflow.python.eager.execution_callbacks import clear_execution_callbacks
+from tensorflow.python.eager.execution_callbacks import errstate
+from tensorflow.python.eager.execution_callbacks import ExecutionCallback
 from tensorflow.python.eager.execution_callbacks import inf_callback
 from tensorflow.python.eager.execution_callbacks import inf_nan_callback
 from tensorflow.python.eager.execution_callbacks import nan_callback
diff --git a/tensorflow/python/eager/execution_callbacks.py b/tensorflow/python/eager/execution_callbacks.py
index 28b6b84a82..af1afa3454 100644
--- a/tensorflow/python/eager/execution_callbacks.py
+++ b/tensorflow/python/eager/execution_callbacks.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import contextlib
 import functools
+import enum  # pylint: disable=g-bad-import-order
 
 import numpy as np
 
@@ -29,13 +30,25 @@ from tensorflow.python.eager import core
 from tensorflow.python.eager import execute
 from tensorflow.python.platform import tf_logging as logging
 
-IGNORE = "ignore"
-PRINT = "print"
-RAISE = "raise"
-WARN = "warn"
 
-_DEFAULT_CALLBACK_ACTION = RAISE
-_VALID_CALLBACK_ACTIONS = (None, IGNORE, PRINT, RAISE, WARN)
+class ExecutionCallback(enum.Enum):
+  """Valid callback actions.
+
+  These can be passed to `seterr` or `errstate` to create callbacks when
+  specific events occur (e.g. an operation produces `NaN`s).
+
+  IGNORE: take no action.
+  PRINT:  print a warning to `stdout`.
+  RAISE:  raise an error (e.g. `InfOrNanError`).
+  WARN:   print a warning using `tf.logging.warn`.
+  """
+
+  IGNORE = "ignore"
+  PRINT = "print"
+  RAISE = "raise"
+  WARN = "warn"
+
+_DEFAULT_CALLBACK_ACTION = ExecutionCallback.RAISE
 
 
 # TODO(cais): Consider moving this exception class to errors_impl.py.
@@ -139,11 +152,8 @@ def inf_nan_callback(op_type,
       the output tensor values.
     check_nan: (`bool`) Whether this callback should check for `nan` values in
       the output tensor values.
-    action: (`str`) Action to be taken by the callback when `inf` or `nan`
-      values are detected. Possible values {"raise", "warn", "print"}
-      `"raise"`: Raise a `InfOrNanError`.
-      `"warn"`: Log a warning using `tf.logging.warn`.
-      `"print"`: Print a message to `sys.stdout`.
+    action: (`ExecutionCallback`) Action to be taken by the callback when
+      `inf` or `nan` values are detected.
 
   Raises:
     InfOrNanError: iff `inf` or `nan` values are seen in any of `outputs` and
@@ -152,6 +162,7 @@ def inf_nan_callback(op_type,
   """
   del attrs, inputs  # Not used.
 
+  action = ExecutionCallback(action)
   ctx = context.context()
 
   for index, output in enumerate(outputs):
@@ -180,16 +191,16 @@ def inf_nan_callback(op_type,
           continue
 
         error = InfOrNanError(op_type, op_name, index, len(outputs), value)
-        if action == "print":
+        if action == ExecutionCallback.PRINT:
           print("Warning: %s" % str(error))
-        elif action == "warn":
+        elif action == ExecutionCallback.WARN:
           logging.warn(str(error))
-        elif action == "raise":
+        elif action == ExecutionCallback.RAISE:
           raise error
         else:
           raise ValueError(
               "Invalid action for inf_nan_callback: %s. Valid actions are: "
-              "{print | warn | raise}" % action)
+              "{PRINT | WARN | RAISE}" % action)
 
 
 def inf_callback(op_type,
@@ -282,7 +293,7 @@ def seterr(inf_or_nan=None):
 
   Example:
   ```python
-  tfe.seterr(inf_or_nan="raise")
+  tfe.seterr(inf_or_nan=ExecutionCallback.RAISE)
   a = tf.constant(10.0)
   b = tf.constant(0.0)
   try:
@@ -290,18 +301,14 @@ def seterr(inf_or_nan=None):
   except Exception as e:
     print("Caught Exception: %s" % e)
 
-  tfe.seterr(inf_or_nan="ignore")
+  tfe.seterr(inf_or_nan=ExecutionCallback.IGNORE)
   c = a / b  # <-- Does NOT raise exception anymore.
   ```
 
   Args:
-    inf_or_nan: Set action for infinity (`inf`) and NaN (`nan`) values.
-      Possible values: `{"ignore", "print", "raise", "warn"}`.
-      `"ignore"`: take no action when `inf` values appear.
-      `"print"`: print a warning to `stdout`.
-      `"raise"`: raise an `InfOrNanError`.
-      `"warn"`: print a warning using `tf.logging.warn`.
-      A value of `None` leads to no change in the action of the condition.
+    inf_or_nan: An `ExecutionCallback` determining the action for infinity
+      (`inf`) and NaN (`nan`) values. A value of `None` leads to no change in
+      the action of the condition.
 
   Returns:
     A dictionary of old actions.
@@ -309,12 +316,8 @@ def seterr(inf_or_nan=None):
   Raises:
     ValueError: If the value of any keyword arguments is invalid.
   """
-  if inf_or_nan not in _VALID_CALLBACK_ACTIONS:
-    raise ValueError(
-        "Invalid action value for inf_or_nan: %s. "
-        "Valid actions are %s." % (inf_or_nan, _VALID_CALLBACK_ACTIONS))
-
-  old_settings = {"inf_or_nan": "ignore"}
+  inf_or_nan = ExecutionCallback(inf_or_nan) if inf_or_nan is not None else None
+  old_settings = {"inf_or_nan": ExecutionCallback.IGNORE}
   default_context = context.context()
 
   carryover_callbacks = []
@@ -336,7 +339,7 @@ def seterr(inf_or_nan=None):
     default_context.clear_post_execution_callbacks()
     for callback in carryover_callbacks:
       default_context.add_post_execution_callback(callback)
-    if inf_or_nan != "ignore":
+    if inf_or_nan != ExecutionCallback.IGNORE:
       default_context.add_post_execution_callback(
           functools.partial(inf_nan_callback, action=inf_or_nan))
 
@@ -351,18 +354,14 @@ def errstate(inf_or_nan=None):
   ```
   c = tf.log(0.)  # -inf
 
-  with errstate(inf_or_nan="raise"):
+  with errstate(inf_or_nan=ExecutionCallback.RAISE):
     tf.log(0.)  # <-- Raises InfOrNanError.
   ```
 
   Args:
-    inf_or_nan: Set action for infinity (`inf`) and NaN (`nan`) values.
-      Possible values: `{IGNORE, PRINT, RAISE, WARN}`.
-      `IGNORE`: take no action when `inf` values appear.
-      `PRINT`: print a warning to `stdout`.
-      `RAISE`: raise an `InfOrNanError`.
-      `WARN`: print a warning using `tf.logging.warn`.
-      A value of `None` leads to no change in the action of the condition.
+    inf_or_nan: An `ExecutionCallback` determining the action for infinity
+      (`inf`) and NaN (`nan`) values. A value of `None` leads to no change in
+      the action of the condition.
 
   Yields:
     None.
diff --git a/tensorflow/python/eager/execution_callbacks_test.py b/tensorflow/python/eager/execution_callbacks_test.py
index 5594ab5f12..b8b786ad2e 100644
--- a/tensorflow/python/eager/execution_callbacks_test.py
+++ b/tensorflow/python/eager/execution_callbacks_test.py
@@ -24,6 +24,9 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
+RAISE = execution_callbacks.ExecutionCallback.RAISE
+IGNORE = execution_callbacks.ExecutionCallback.IGNORE
+
 
 def log_zero():
   """Computes `log(0.0)`."""
@@ -33,17 +36,17 @@ def log_zero():
 class ExecutionCallbacksTest(test.TestCase):
 
   def test_errstate_inf_raise(self):
-    with execution_callbacks.errstate(inf_or_nan=execution_callbacks.RAISE):
+    with execution_callbacks.errstate(inf_or_nan=RAISE):
       with self.assertRaises(execution_callbacks.InfOrNanError):
         log_zero()
 
   def test_errstate_inf_ignore(self):
-    with execution_callbacks.errstate(inf_or_nan=execution_callbacks.IGNORE):
+    with execution_callbacks.errstate(inf_or_nan=IGNORE):
       self.assertEqual(-float("inf"), log_zero().numpy())
 
   def test_errstate_nesting(self):
-    with execution_callbacks.errstate(inf_or_nan=execution_callbacks.RAISE):
-      with execution_callbacks.errstate(inf_or_nan=execution_callbacks.IGNORE):
+    with execution_callbacks.errstate(inf_or_nan=RAISE):
+      with execution_callbacks.errstate(inf_or_nan=IGNORE):
         self.assertEqual(-float("inf"), log_zero().numpy())
 
       with self.assertRaises(execution_callbacks.InfOrNanError):
diff --git a/tensorflow/python/ops/math_grad_test.py b/tensorflow/python/ops/math_grad_test.py
index 822f89768c..f415e65787 100644
--- a/tensorflow/python/ops/math_grad_test.py
+++ b/tensorflow/python/ops/math_grad_test.py
@@ -33,6 +33,8 @@ from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
+RAISE = execution_callbacks.ExecutionCallback.RAISE
+
 
 class SquaredDifferenceOpTest(test.TestCase):
 
@@ -385,7 +387,7 @@ class PowGradTest(test.TestCase):
     self.assertAllClose([-2., 0., 2.], g)
 
   def test_zero_grad_tape(self):
-    with execution_callbacks.errstate(inf_or_nan=execution_callbacks.RAISE):
+    with execution_callbacks.errstate(inf_or_nan=RAISE):
       x = constant_op.constant([-1, 0., 1.])
       with backprop.GradientTape() as tape:
         tape.watch(x)
-- 
GitLab


From 8eb8217c58edd2f6e7b7bd398ce6495ec29099af Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 10 Dec 2018 10:53:48 -0800
Subject: [PATCH 293/873] Optimize gemm_pack_rhs for row stride != 1

PiperOrigin-RevId: 224842783
---
 tensorflow/core/kernels/eigen_spatial_convolutions.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h
index 25c735d080..86d8c98ee6 100644
--- a/tensorflow/core/kernels/eigen_spatial_convolutions.h
+++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h
@@ -871,11 +871,9 @@ struct gemm_pack_rhs<
             const bool pad_col2 = dm2.padCol(c);
             const bool pad_col3 = dm3.padCol(c);
 
-            // We can squeeze reads along the `row` and `depth` dimensions if
-            // the row stride is `1`, which means that `row` and `depth`
-            // dimensions are contiguous (two innermost dimensions).
-            if (rhs.rowStride() == 1 &&                                //
-                !pad_col0 && !pad_col1 && !pad_col2 && !pad_col3 &&    //
+            // Check if we can squeeze reads along the `row` and `depth`
+            // dimensions (two innermost dimensions).
+            if (!pad_col0 && !pad_col1 && !pad_col2 && !pad_col3 &&    //
                 !dm0.padRow(start_row) && !dm0.padRow(max_row - 1) &&  //
                 !dm1.padRow(start_row) && !dm1.padRow(max_row - 1) &&  //
                 !dm2.padRow(start_row) && !dm2.padRow(max_row - 1) &&  //
-- 
GitLab


From ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8 Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Mon, 10 Dec 2018 10:58:15 -0800
Subject: [PATCH 294/873] Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723
---
 .../cluster_resolver/cluster_resolver.py      | 15 +++++-
 .../cluster_resolver/cluster_resolver_test.py | 53 +++++++++++++++++++
 .../cluster_resolver/gce_cluster_resolver.py  | 13 -----
 .../kubernetes_cluster_resolver.py            | 14 -----
 .../tfconfig_cluster_resolver.py              | 19 +------
 .../tfconfig_cluster_resolver_test.py         |  4 +-
 .../cluster_resolver/tpu_cluster_resolver.py  | 14 +++--
 7 files changed, 74 insertions(+), 58 deletions(-)

diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
index ca40e60a55..73188bd7ca 100644
--- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver.py
@@ -22,6 +22,8 @@ import abc
 
 import six
 
+from tensorflow.python.client import session
+from tensorflow.python.framework import ops
 from tensorflow.python.training.server_lib import ClusterSpec
 
 
@@ -32,6 +34,14 @@ def format_master_url(master, rpc_layer=None):
     return master
 
 
+def get_accelerator_devices(master, config_proto):
+  # TODO(frankchn): Add support for eager mode as well as graph mode.
+  with ops.Graph().as_default():
+    with session.Session(master, config=config_proto) as s:
+      devices = s.list_devices()
+  return devices
+
+
 @six.add_metaclass(abc.ABCMeta)
 class ClusterResolver(object):
   """Abstract class for all implementations of ClusterResolvers.
@@ -91,7 +101,6 @@ class ClusterResolver(object):
     """
     raise NotImplementedError()
 
-  @abc.abstractmethod
   def num_accelerators(self,
                        task_type=None,
                        task_index=None,
@@ -119,7 +128,9 @@ class ClusterResolver(object):
       config_proto: (Optional) Configuration for starting a new session to
         query how many accelerator cores it has.
     """
-    raise NotImplementedError()
+    master = self.master(task_type, task_index)
+    devices = get_accelerator_devices(master, config_proto)
+    return sum(1 for d in devices if d.device_type == accelerator_type)
 
   @abc.abstractproperty
   def environment(self):
diff --git a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py
index 3f7b469727..0ff6b6be62 100644
--- a/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/cluster_resolver_test.py
@@ -18,11 +18,64 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.client import session
+from tensorflow.python.distribute.cluster_resolver import ClusterResolver
 from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver
 from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver
 from tensorflow.python.platform import test
 from tensorflow.python.training import server_lib
 
+mock = test.mock
+
+
+class MockBaseClusterResolver(ClusterResolver):
+
+  def cluster_spec(self):
+    return None
+
+  def master(self, task_type=None, task_index=None, rpc_layer=None):
+    return ""
+
+  def environment(self):
+    return ""
+
+
+class BaseClusterResolverTest(test.TestCase):
+
+  @mock.patch.object(session.BaseSession, "list_devices")
+  def testNumAcceleratorsSuccess(self, mock_list_devices):
+    device_names = [
+        "/job:worker/task:0/device:GPU:0",
+        "/job:worker/task:0/device:GPU:1",
+        "/job:worker/task:0/device:GPU:2",
+        "/job:worker/task:0/device:GPU:3",
+    ]
+    device_list = [
+        session._DeviceAttributes(
+            name, "GPU", 1024, 0) for name in device_names
+    ]
+    mock_list_devices.return_value = device_list
+
+    resolver = MockBaseClusterResolver()
+    self.assertEqual(resolver.num_accelerators(), 4)
+
+  @mock.patch.object(session.BaseSession, "list_devices")
+  def testNumAcceleratorsFilterSuccess(self, mock_list_devices):
+    device_names = [
+        "/job:worker/task:0/device:TPU:0",
+        "/job:worker/task:0/device:TPU:1",
+        "/job:worker/task:0/device:TPU:2",
+        "/job:worker/task:0/device:TPU:3",
+    ]
+    device_list = [
+        session._DeviceAttributes(
+            name, "TPU", 1024, 0) for name in device_names
+    ]
+    mock_list_devices.return_value = device_list
+
+    resolver = MockBaseClusterResolver()
+    self.assertEqual(resolver.num_accelerators(), 0)
+
 
 class UnionClusterResolverTest(test.TestCase):
   # TODO(frankchn): Transform to parameterized test after it is included in the
diff --git a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py
index 2412f6dad0..06512613cb 100644
--- a/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/gce_cluster_resolver.py
@@ -51,7 +51,6 @@ class GceClusterResolver(ClusterResolver):
                task_type='worker',
                task_index=0,
                rpc_layer='grpc',
-               num_accelerators=0,
                credentials='default',
                service=None):
     """Creates a new GceClusterResolver object.
@@ -73,8 +72,6 @@ class GceClusterResolver(ClusterResolver):
         can be distinguished from each other.
       rpc_layer: The RPC layer TensorFlow should use to communicate across
         instances.
-      num_accelerators: Number of accelerators (GPUs) present per
-        instance.
       credentials: GCE Credentials. If nothing is specified, this defaults to
         GoogleCredentials.get_application_default().
       service: The GCE API object returned by the googleapiclient.discovery
@@ -90,7 +87,6 @@ class GceClusterResolver(ClusterResolver):
     self._task_type = task_type
     self._task_index = task_index
     self._rpc_layer = rpc_layer
-    self._num_accelerators = num_accelerators
     self._port = port
     self._credentials = credentials
 
@@ -201,12 +197,3 @@ class GceClusterResolver(ClusterResolver):
   @rpc_layer.setter
   def rpc_layer(self, rpc_layer):
     self._rpc_layer = rpc_layer
-
-  def num_accelerators(self,
-                       task_type=None,
-                       task_index=None,
-                       accelerator_type='GPU',
-                       config_proto=None):
-    # Unused
-    del task_type, task_index, accelerator_type, config_proto
-    return self._num_accelerators
diff --git a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
index b21c3676be..88625a5542 100644
--- a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.client import device_lib
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
 from tensorflow.python.training import server_lib
@@ -167,16 +166,3 @@ class KubernetesClusterResolver(ClusterResolver):
     on internal systems.
     """
     return ''
-
-  def num_accelerators(self,
-                       task_type=None,
-                       task_index=None,
-                       accelerator_type='GPU',
-                       config_proto=None):
-    # TODO(frankchn): Make querying non-local accelerators work
-    if task_type is not None or task_index is not None:
-      raise NotImplementedError('Querying non-local accelerators is not yet'
-                                'implemented.')
-
-    local_devices = device_lib.list_local_devices(config_proto)
-    return sum(d.device_type == accelerator_type for d in local_devices)
diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
index b4465714b2..8d530cc15a 100644
--- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver.py
@@ -54,8 +54,7 @@ class TFConfigClusterResolver(ClusterResolver):
                task_type=None,
                task_index=None,
                rpc_layer=None,
-               environment=None,
-               num_accelerators=0):
+               environment=None):
     """Creates a new TFConfigClusterResolver.
 
     Args:
@@ -66,17 +65,11 @@ class TFConfigClusterResolver(ClusterResolver):
       rpc_layer: (String, optional) Overrides the rpc layer TensorFlow uses.
       environment: (String, optional) Overrides the environment TensorFlow
         operates in.
-      num_accelerators: (Integer, optional) Specifies the number of
-        accelerators (e.g. GPUs, TPUs, others) that each node has.
     """
-    # TODO(frankchn): num_accelerators is a stop-gap and will be removed
-    # in favor of autodetection of devices soon.
-
     self._task_type = task_type
     self._task_index = task_index
     self._rpc_layer = rpc_layer
     self._environment = environment
-    self._num_accelerators = num_accelerators
 
   @property
   def task_type(self):
@@ -117,16 +110,6 @@ class TFConfigClusterResolver(ClusterResolver):
   def rpc_layer(self, rpc_layer):
     self._rpc_layer = rpc_layer
 
-  def num_accelerators(self,
-                       task_type=None,
-                       task_index=None,
-                       accelerator_type='GPU',
-                       config_proto=None):
-    # TODO(frankchn): Connect to server (w/ session_config) in the future.
-    # Unused, we do not connect to another server here right now.
-    del task_type, task_index, accelerator_type, config_proto
-    return self._num_accelerators
-
   def cluster_spec(self):
     """Returns a ClusterSpec based on the TF_CONFIG environment variable.
 
diff --git a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
index 197eba1739..36b3bb9c1e 100644
--- a/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/tfconfig_cluster_resolver_test.py
@@ -168,13 +168,11 @@ class TFConfigClusterResolverTest(test.TestCase):
     }
     """
 
-    cluster_resolver = TFConfigClusterResolver(task_type='ps', task_index=0,
-                                               num_accelerators=8)
+    cluster_resolver = TFConfigClusterResolver(task_type='ps', task_index=0)
 
     self.assertEqual('grpc://ps0:2222', cluster_resolver.master())
     self.assertEqual('ps', cluster_resolver.task_type)
     self.assertEqual(0, cluster_resolver.task_index)
-    self.assertEqual(8, cluster_resolver.num_accelerators())
 
     cluster_resolver.task_type = 'worker'
     cluster_resolver.task_index = 1
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index e907d6fde4..72a27b915c 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -25,11 +25,10 @@ import re
 from six.moves.urllib.request import Request
 from six.moves.urllib.request import urlopen
 
-from tensorflow.python.client import session
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
+from tensorflow.python.distribute.cluster_resolver.cluster_resolver import get_accelerator_devices
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
@@ -451,17 +450,16 @@ class TPUClusterResolver(ClusterResolver):
         retrieve the system metadata.
 
     Raises:
-      RuntimeError: If this is used with a non-TPU accelerator_type.
+      RuntimeError: If we cannot talk to a TPU worker after retrying or if the
+        number of TPU devices per host is different.
     """
     retry_count = 1
     # TODO(b/120564445): Replace with standard library for retries.
     while True:
       try:
-        with ops.Graph().as_default():
-          with session.Session(self.master(), config=config_proto) as s:
-            devices = s.list_devices()
-            device_details = _get_device_dict_and_cores(devices)
-            break
+        device_details = _get_device_dict_and_cores(
+            get_accelerator_devices(self.master(), config_proto=config_proto))
+        break
       except errors.DeadlineExceededError:
         error_message = ('Failed to connect to master. The TPU might not be '
                          'ready (e.g. still scheduling) or the master '
-- 
GitLab


From a9c129a66c6ec4328f16aac6a66f0d3d31f88581 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 11:16:09 -0800
Subject: [PATCH 295/873] Automated rollback of commit
 3640da49c3731807a3dbc27d813e8ab68a86328a

PiperOrigin-RevId: 224847522
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 96b9556e13..84816d70d0 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -2234,7 +2234,7 @@ class TPUEstimator(estimator_lib.Estimator):
     def computation():
       """Compute tpu tensors used in export_outputs.
 
-      Passed to rewrite so that model_fn will be called under
+      Passed to rewrite_for_inference so that model_fn will be called under
       the rewriting contexts. Only tpu tensors are returned, but export_outputs
       and scaffold are captured.
 
@@ -2243,7 +2243,7 @@ class TPUEstimator(estimator_lib.Estimator):
          outside_compilation.
       """
       # We should only call model fn once and it should be inside `computation`
-      # so that building the graph will happen under `rewrite`.
+      # so that building the graph will happen under `rewrite_for_inference`.
       mode = model_fn_lib.ModeKeys.PREDICT
       estimator_spec = self._call_model_fn(features, labels, mode, config)
 
@@ -2260,7 +2260,7 @@ class TPUEstimator(estimator_lib.Estimator):
       capture.capture((estimator_spec, tensors_dict, tensors))
       return tpu_tensors
 
-    tpu_tensors_on_cpu = tpu.rewrite(computation)
+    tpu_tensors_on_cpu = tpu.rewrite_for_inference(computation)
     estimator_spec, tensors_dict, tensors = capture.get()
 
     # Reconstruct `tensors`, but with `tpu_tensors` replaced with
-- 
GitLab


From 16bd4eb5f2f58111a55f8b223f161f0ce1c07be5 Mon Sep 17 00:00:00 2001
From: Pete Warden <petewarden@google.com>
Date: Mon, 10 Dec 2018 11:42:52 -0800
Subject: [PATCH 296/873] Add extra tests to the speech example's makefile

PiperOrigin-RevId: 224852926
---
 .../micro/examples/micro_speech/Makefile.inc  | 153 ++++++++++++++++++
 .../experimental/micro/tools/make/Makefile    |  80 +--------
 2 files changed, 156 insertions(+), 77 deletions(-)
 create mode 100644 tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc

diff --git a/tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc b/tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc
new file mode 100644
index 0000000000..0e42329cad
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/micro_speech/Makefile.inc
@@ -0,0 +1,153 @@
+
+# Tests loading and running a speech model.
+MICRO_SPEECH_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
+ALL_SRCS += $(MICRO_SPEECH_TEST_SRCS)
+MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS))))
+MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test
+ALL_BINARIES += $(MICRO_SPEECH_TEST_BINARY)
+$(MICRO_SPEECH_TEST_BINARY): $(MICRO_SPEECH_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(MICRO_SPEECH_TEST_BINARY) $(MICRO_SPEECH_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+micro_speech_test: $(MICRO_SPEECH_TEST_BINARY)
+micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin
+test_micro_speech: $(MICRO_SPEECH_TEST_BINARY)
+	$(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+# Source files that are used by multiple preprocessor tests.
+PREPROCESSOR_TEST_SHARED_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc
+
+# Test the float reference code for feature generation.
+PREPROCESSOR_REFERENCE_TEST_SRCS = \
+$(PREPROCESSOR_TEST_SHARED_SRCS) \
+tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc
+ALL_SRCS += $(PREPROCESSOR_REFERENCE_TEST_SRCS)
+PREPROCESSOR_REFERENCE_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_REFERENCE_TEST_SRCS))))
+PREPROCESSOR_REFERENCE_TEST_BINARY := $(BINDIR)preprocessor_reference_test
+ALL_BINARIES += $(PREPROCESSOR_REFERENCE_TEST_BINARY)
+$(PREPROCESSOR_REFERENCE_TEST_BINARY): $(PREPROCESSOR_REFERENCE_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(PREPROCESSOR_REFERENCE_TEST_BINARY) $(PREPROCESSOR_REFERENCE_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+preprocessor_reference_test: $(PREPROCESSOR_REFERENCE_TEST_BINARY)
+preprocessor_reference_test_bin: $(PREPROCESSOR_REFERENCE_TEST_BINARY).bin
+test_preprocessor_reference: $(PREPROCESSOR_REFERENCE_TEST_BINARY)
+	$(TEST_SCRIPT) $(PREPROCESSOR_REFERENCE_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+# Test the fixed point reference code for feature generation.
+PREPROCESSOR_FIXED_TEST_SRCS = \
+$(PREPROCESSOR_TEST_SHARED_SRCS) \
+tensorflow/lite/experimental/micro/examples/micro_speech/fixed_point/preprocessor.cc
+ALL_SRCS += $(PREPROCESSOR_FIXED_TEST_SRCS)
+PREPROCESSOR_FIXED_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FIXED_TEST_SRCS))))
+PREPROCESSOR_FIXED_TEST_BINARY := $(BINDIR)preprocessor_fixed_test
+ALL_BINARIES += $(PREPROCESSOR_FIXED_TEST_BINARY)
+$(PREPROCESSOR_FIXED_TEST_BINARY): $(PREPROCESSOR_FIXED_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(PREPROCESSOR_FIXED_TEST_BINARY) $(PREPROCESSOR_FIXED_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+preprocessor_fixed_test: $(PREPROCESSOR_FIXED_TEST_BINARY)
+preprocessor_fixed_test_bin: $(PREPROCESSOR_FIXED_TEST_BINARY).bin
+test_preprocessor_fixed: $(PREPROCESSOR_FIXED_TEST_BINARY)
+	$(TEST_SCRIPT) $(PREPROCESSOR_FIXED_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+# Tests the audio provider module.
+AUDIO_PROVIDER_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc
+ALL_SRCS += $(AUDIO_PROVIDER_TEST_SRCS)
+AUDIO_PROVIDER_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(AUDIO_PROVIDER_TEST_SRCS))))
+AUDIO_PROVIDER_TEST_BINARY := $(BINDIR)audio_provider_test
+ALL_BINARIES += $(AUDIO_PROVIDER_TEST_BINARY)
+$(AUDIO_PROVIDER_TEST_BINARY): $(AUDIO_PROVIDER_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(AUDIO_PROVIDER_TEST_BINARY) $(AUDIO_PROVIDER_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+audio_provider_test: $(AUDIO_PROVIDER_TEST_BINARY)
+audio_provider_test_bin: $(AUDIO_PROVIDER_TEST_BINARY).bin
+test_audio_provider: $(AUDIO_PROVIDER_TEST_BINARY)
+	$(TEST_SCRIPT) $(AUDIO_PROVIDER_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+# Tests the feature provider module.
+FEATURE_PROVIDER_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/timer.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc
+ALL_SRCS += $(FEATURE_PROVIDER_TEST_SRCS)
+FEATURE_PROVIDER_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(FEATURE_PROVIDER_TEST_SRCS))))
+FEATURE_PROVIDER_TEST_BINARY := $(BINDIR)feature_provider_test
+ALL_BINARIES += $(FEATURE_PROVIDER_TEST_BINARY)
+$(FEATURE_PROVIDER_TEST_BINARY): $(FEATURE_PROVIDER_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(FEATURE_PROVIDER_TEST_BINARY) $(FEATURE_PROVIDER_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+feature_provider_test: $(FEATURE_PROVIDER_TEST_BINARY)
+feature_provider_test_bin: $(FEATURE_PROVIDER_TEST_BINARY).bin
+test_feature_provider: $(FEATURE_PROVIDER_TEST_BINARY)
+	$(TEST_SCRIPT) $(FEATURE_PROVIDER_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+# Tests the timer module.
+TIMER_TEST_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/timer_test.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/timer.cc
+ALL_SRCS += $(TIMER_TEST_SRCS)
+TIMER_TEST_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(TIMER_TEST_SRCS))))
+TIMER_TEST_BINARY := $(BINDIR)timer_test
+ALL_BINARIES += $(TIMER_TEST_BINARY)
+$(TIMER_TEST_BINARY): $(TIMER_TEST_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(TIMER_TEST_BINARY) $(TIMER_TEST_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+timer_test: $(TIMER_TEST_BINARY)
+timer_test_bin: $(TIMER_TEST_BINARY).bin
+test_timer: $(TIMER_TEST_BINARY)
+	$(TEST_SCRIPT) $(TIMER_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
+
+# Builds a standalone speech command recognizer binary.
+MICRO_SPEECH_SRCS := \
+tensorflow/lite/experimental/micro/examples/micro_speech/main.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/model_settings.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/audio_provider.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/feature_provider.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/timer.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc \
+tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc
+ALL_SRCS += $(MICRO_SPEECH_SRCS)
+MICRO_SPEECH_OBJS := $(addprefix $(OBJDIR), \
+$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_SRCS))))
+MICRO_SPEECH_BINARY := $(BINDIR)micro_speech
+ALL_BINARIES += $(MICRO_SPEECH_BINARY)
+$(MICRO_SPEECH_BINARY): $(MICRO_SPEECH_OBJS) $(MICROLITE_LIB_PATH)
+	@mkdir -p $(dir $@)
+	$(CXX) $(CXXFLAGS) $(INCLUDES) \
+	-o $(MICRO_SPEECH_BINARY) $(MICRO_SPEECH_OBJS) \
+	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
+micro_speech: $(MICRO_SPEECH_BINARY)
+micro_speech_bin: $(MICRO_SPEECH_BINARY).bin
diff --git a/tensorflow/lite/experimental/micro/tools/make/Makefile b/tensorflow/lite/experimental/micro/tools/make/Makefile
index 0caf0ca099..20307e2b21 100644
--- a/tensorflow/lite/experimental/micro/tools/make/Makefile
+++ b/tensorflow/lite/experimental/micro/tools/make/Makefile
@@ -52,29 +52,6 @@ CC_PREFIX :=
 # runtime that can be linked in to other programs.
 MICROLITE_LIB_NAME := libtensorflow-microlite.a
 
-# Test binary for the microcontroller speech model.
-MICRO_SPEECH_TEST_SRCS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_features_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_features_data.cc
-
-# Test binary for the microcontroller speech model.
-PREPROCESSOR_TEST_SRCS := \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor_test.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_30ms_sample_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_30ms_sample_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/no_power_spectrum_data.cc \
-tensorflow/lite/experimental/micro/examples/micro_speech/yes_power_spectrum_data.cc
-
-PREPROCESSOR_REFERENCE_TEST_SRCS = \
-$(PREPROCESSOR_TEST_SRCS) \
-tensorflow/lite/experimental/micro/examples/micro_speech/preprocessor.cc
-
-PREPROCESSOR_FIXED_TEST_SRCS += \
-$(PREPROCESSOR_TEST_SRCS) \
-tensorflow/lite/experimental/micro/examples/micro_speech/fixed_point/preprocessor.cc
-
 MICROLITE_TEST_SRCS := \
 $(wildcard tensorflow/lite/experimental/micro/*test.cc) \
 $(wildcard tensorflow/lite/experimental/micro/kernels/*test.cc)
@@ -97,9 +74,6 @@ MICROLITE_CC_SRCS := $(filter-out $(MICROLITE_TEST_SRCS), $(MICROLITE_CC_BASE_SR
 include $(wildcard $(MAKEFILE_DIR)/targets/*_makefile.inc)
 
 ALL_SRCS := \
-	$(MICRO_SPEECH_TEST_SRCS) \
-	$(PREPROCESSOR_REFERENCE_TEST_SRCS) \
-	$(PREPROCESSOR_FIXED_TEST_SRCS) \
 	$(MICROLITE_CC_SRCS) \
 	$(MICROLITE_TEST_SRCS)
 
@@ -111,22 +85,12 @@ LIBDIR := $(GENDIR)lib/
 
 MICROLITE_LIB_PATH := $(LIBDIR)$(MICROLITE_LIB_NAME)
 
-MICRO_SPEECH_TEST_BINARY := $(BINDIR)micro_speech_test
-PREPROCESSOR_REFERENCE_TEST_BINARY := $(BINDIR)preprocessor_reference_test
-PREPROCESSOR_FIXED_TEST_BINARY := $(BINDIR)preprocessor_fixed_test
-
 CXX := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}g++
 CC := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}gcc
 AR := $(CC_PREFIX)${TARGET_TOOLCHAIN_PREFIX}ar
 
-MICRO_SPEECH_TEST_OBJS := $(addprefix $(OBJDIR), \
-$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICRO_SPEECH_TEST_SRCS))))
-
-PREPROCESSOR_REFERENCE_TEST_OBJS := $(addprefix $(OBJDIR), \
-$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_REFERENCE_TEST_SRCS))))
-
-PREPROCESSOR_FIXED_TEST_OBJS := $(addprefix $(OBJDIR), \
-$(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(PREPROCESSOR_FIXED_TEST_SRCS))))
+# Load the examples.
+include $(wildcard tensorflow/lite/experimental/micro/examples/*/Makefile.inc)
 
 MICROLITE_LIB_OBJS := $(addprefix $(OBJDIR), \
 $(patsubst %.cc,%.o,$(patsubst %.c,%.o,$(MICROLITE_CC_SRCS))))
@@ -145,7 +109,7 @@ $(OBJDIR)%.o: %.c
 	$(CC) $(CCFLAGS) $(INCLUDES) -c $< -o $@
 
 # The target that's compiled if there's no command-line arguments.
-all: $(MICROLITE_LIB_PATH) $(MICRO_SPEECH_TEST_BINARY) $(PREPROCESSOR_TEST_BINARY)
+all: $(MICROLITE_LIB_PATH) $(ALL_BINARIES)
 
 microlite: $(MICROLITE_LIB_PATH)
 
@@ -158,42 +122,6 @@ $(MICROLITE_LIB_PATH): tensorflow/lite/schema/schema_generated.h $(MICROLITE_LIB
 	@mkdir -p $(dir $@)
 	$(AR) $(ARFLAGS) $(MICROLITE_LIB_PATH) $(MICROLITE_LIB_OBJS)
 
-$(MICRO_SPEECH_TEST_BINARY): $(MICRO_SPEECH_TEST_OBJS) $(MICROLITE_LIB_PATH)
-	@mkdir -p $(dir $@)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) \
-	-o $(MICRO_SPEECH_TEST_BINARY) $(MICRO_SPEECH_TEST_OBJS) \
-	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
-
-micro_speech_test: $(MICRO_SPEECH_TEST_BINARY)
-micro_speech_test_bin: $(MICRO_SPEECH_TEST_BINARY).bin
-
-test_micro_speech: $(MICRO_SPEECH_TEST_BINARY)
-	$(TEST_SCRIPT) $(MICRO_SPEECH_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
-
-$(PREPROCESSOR_REFERENCE_TEST_BINARY): $(PREPROCESSOR_REFERENCE_TEST_OBJS) $(MICROLITE_LIB_PATH)
-	@mkdir -p $(dir $@)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) \
-	-o $(PREPROCESSOR_REFERENCE_TEST_BINARY) $(PREPROCESSOR_REFERENCE_TEST_OBJS) \
-	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
-
-preprocessor_reference_test: $(PREPROCESSOR_REFERENCE_TEST_BINARY)
-preprocessor_reference_test_bin: $(PREPROCESSOR_REFERENCE_TEST_BINARY).bin
-
-test_preprocessor_reference: $(PREPROCESSOR_REFERENCE_TEST_BINARY)
-	$(TEST_SCRIPT) $(PREPROCESSOR_REFERENCE_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
-
-$(PREPROCESSOR_FIXED_TEST_BINARY): $(PREPROCESSOR_FIXED_TEST_OBJS) $(MICROLITE_LIB_PATH)
-	@mkdir -p $(dir $@)
-	$(CXX) $(CXXFLAGS) $(INCLUDES) \
-	-o $(PREPROCESSOR_FIXED_TEST_BINARY) $(PREPROCESSOR_FIXED_TEST_OBJS) \
-	$(LIBFLAGS) $(MICROLITE_LIB_PATH) $(LDFLAGS) $(MICROLITE_LIBS)
-
-preprocessor_fixed_test: $(PREPROCESSOR_FIXED_TEST_BINARY)
-preprocessor_fixed_test_bin: $(PREPROCESSOR_FIXED_TEST_BINARY).bin
-
-test_preprocessor_fixed: $(PREPROCESSOR_FIXED_TEST_BINARY)
-	$(TEST_SCRIPT) $(PREPROCESSOR_FIXED_TEST_BINARY) '~~~ALL TESTS PASSED~~~'
-
 $(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
 	@mkdir -p $(dir $@)
 	$(CXX) $(CXXFLAGS) $(INCLUDES) \
@@ -203,8 +131,6 @@ $(BINDIR)%_test : $(OBJDIR)%_test.o $(MICROLITE_LIB_PATH)
 $(BINDIR)%.test_target: $(BINDIR)%_test
 	$(TEST_SCRIPT) $< '~~~ALL TESTS PASSED~~~'
 
-$(info $(MICROLITE_TEST_TARGETS))
-
 test: test_micro_speech $(MICROLITE_TEST_TARGETS)
 
 # Gets rid of all generated files.
-- 
GitLab


From 6c4622385b762da1537a83e21b67d135c2890640 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 11:48:32 -0800
Subject: [PATCH 297/873] Treat a threshold of None as invalid in Keras
 metrics._assert_thresholds_range.

PiperOrigin-RevId: 224853938
---
 tensorflow/python/keras/metrics.py      | 26 ++++++++++++-------------
 tensorflow/python/keras/metrics_test.py |  6 ++++++
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 331a8636d1..1d1f3b4586 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -171,8 +171,8 @@ class _ConfusionMatrix(Enum):
 
 
 def _assert_thresholds_range(thresholds):
-  invalid_thresholds = [t for t in thresholds if t < 0 or t > 1]
-  if any(invalid_thresholds):
+  invalid_thresholds = [t for t in thresholds if t is None or t < 0 or t > 1]
+  if invalid_thresholds:
     raise ValueError('Threshold values must be in [0, 1]. Invalid values: {}'
                      .format(invalid_thresholds))
 
@@ -870,11 +870,11 @@ class _ConfusionMatrixConditionCount(Metric):
     super(_ConfusionMatrixConditionCount, self).__init__(name=name, dtype=dtype)
     self._confusion_matrix_cond = confusion_matrix_cond
     self.thresholds = 0.5 if thresholds is None else thresholds
-    thresholds = to_list(thresholds)
-    _assert_thresholds_range(thresholds)
+    thresholds_list = to_list(self.thresholds)
+    _assert_thresholds_range(thresholds_list)
     self.accumulator = self.add_weight(
         'accumulator',
-        shape=(len(thresholds),),
+        shape=(len(thresholds_list),),
         initializer=init_ops.zeros_initializer)
 
   def update_state(self, y_true, y_pred, sample_weight=None):
@@ -1153,15 +1153,15 @@ class Precision(Metric):
     """
     super(Precision, self).__init__(name=name, dtype=dtype)
     self.thresholds = 0.5 if thresholds is None else thresholds
-    thresholds = to_list(thresholds)
-    _assert_thresholds_range(thresholds)
+    thresholds_list = to_list(self.thresholds)
+    _assert_thresholds_range(thresholds_list)
     self.tp = self.add_weight(
         'true_positives',
-        shape=(len(thresholds),),
+        shape=(len(thresholds_list),),
         initializer=init_ops.zeros_initializer)
     self.fp = self.add_weight(
         'false_positives',
-        shape=(len(thresholds),),
+        shape=(len(thresholds_list),),
         initializer=init_ops.zeros_initializer)
 
   def update_state(self, y_true, y_pred, sample_weight=None):
@@ -1238,15 +1238,15 @@ class Recall(Metric):
     """
     super(Recall, self).__init__(name=name, dtype=dtype)
     self.thresholds = 0.5 if thresholds is None else thresholds
-    thresholds = to_list(thresholds)
-    _assert_thresholds_range(thresholds)
+    thresholds_list = to_list(self.thresholds)
+    _assert_thresholds_range(thresholds_list)
     self.tp = self.add_weight(
         'true_positives',
-        shape=(len(thresholds),),
+        shape=(len(thresholds_list),),
         initializer=init_ops.zeros_initializer)
     self.fn = self.add_weight(
         'false_negatives',
-        shape=(len(thresholds),),
+        shape=(len(thresholds_list),),
         initializer=init_ops.zeros_initializer)
 
   def update_state(self, y_true, y_pred, sample_weight=None):
diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 92398acd8e..9cad948966 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -369,6 +369,12 @@ class KerasMetricsTest(test.TestCase):
     result = self.evaluate(result_t)
     self.assertAlmostEqual(result, 0.93, 2)  # 2.5/2.7
 
+  def test_assert_thresholds_range(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        r'Threshold values must be in \[0, 1\]. Invalid values: \[None\]'):
+      metrics._assert_thresholds_range([None, 0.5])
+
 
 def _get_simple_sequential_model(compile_metrics):
   model = Sequential()
-- 
GitLab


From 4e7564ef05c456a7961e37eb0a6a77a04ca028c5 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 10 Dec 2018 11:52:35 -0800
Subject: [PATCH 298/873] Add new flag to GrapplerItem::AllowedOptimizations

PiperOrigin-RevId: 224854657
---
 tensorflow/core/grappler/grappler_item.h            | 13 ++++++-------
 .../core/grappler/optimizers/function_optimizer.cc  |  2 +-
 .../core/grappler/optimizers/meta_optimizer.cc      |  7 ++++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tensorflow/core/grappler/grappler_item.h b/tensorflow/core/grappler/grappler_item.h
index 9051542988..1ae551f5ac 100644
--- a/tensorflow/core/grappler/grappler_item.h
+++ b/tensorflow/core/grappler/grappler_item.h
@@ -86,13 +86,12 @@ struct GrapplerItem {
     // Is it allowed to add nodes to the graph that do not have registered
     // gradient function.
     bool non_differentiable_rewrites = true;
-    // By default we are not allowed to inline ops with side effects into the
-    // main graph, because we can't guarantee that after pruning these ops will
-    // be executed. However if we are optimizing a function library (see
-    // meta_optimizer.cc) and a graph was instantiated by a function definition,
-    // we can do that, because functions guarantee that all side effects will be
-    // executed (see function_optimizer.cc for details).
-    bool inline_ops_with_side_effects = false;
+
+    // By default we are allowed to prune ops with side-effects from the main
+    // graph if they are not in transitive fanin of the fetch nodes. If we are
+    // optimizing a graph that was instantiated by a function definition, we
+    // must keep all side effects intact.
+    bool prune_ops_with_side_effects = true;
   };
 
   const std::unordered_set<string>& devices() const;
diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc
index 8beebb9049..7069e5ea20 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc
@@ -1472,7 +1472,7 @@ Status InlineIndirectFunctionCall(const NodeDef& func_node,
       // for the function body, because functions have strict semantics.
 
       if (num_fanouts == 0 && happens_after.empty() &&
-          !ctx->allowed_optimizations().inline_ops_with_side_effects) {
+          ctx->allowed_optimizations().prune_ops_with_side_effects) {
         return errors::Internal(
             "Can't inline a function with a side-effectful op with empty "
             "fanouts and empty output control edge set. Function body node: ",
diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
index 572cc41d76..7b788c613c 100644
--- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc
@@ -533,9 +533,10 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
         VLOG(3) << added_devices.error_message();
       }
 
-      // We can safely inline nested function calls with side-effectful ops into
-      // the function body (see function_optimizer.cc for details).
-      func_item.allowed_optimizations().inline_ops_with_side_effects = true;
+      // We are not allowed to prune side effects from the graph instantiated
+      // by the function definition, because we must guarantee function
+      // execution semantics wrt side effects (see function_optimizer.cc).
+      func_item.allowed_optimizations().prune_ops_with_side_effects = false;
 
       // Optimize function body graph.
       GraphDef optimized_func_graph;
-- 
GitLab


From 95358d2da35254bd0bcef84faf5094522178f4ea Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 10 Dec 2018 11:59:26 -0800
Subject: [PATCH 299/873] Changing the copy-on-write semantics of resource
 variables.

A variable now has a bit which can be turned on which, when turned on,
makes that variable act as copy-on-read instead of copy-on-write. This
allows sparse writes to happen concurrently while only holding a shared
lock, mimicking the use_locking behavior of ref variables.

PiperOrigin-RevId: 224855851
---
 tensorflow/compiler/jit/xla_device_context.cc |   7 +
 tensorflow/compiler/jit/xla_device_context.h  |   3 +
 .../gpu/gpu_util_platform_specific.cc         |   8 +
 .../core/common_runtime/gpu_device_context.h  |   4 +
 tensorflow/core/framework/device_base.h       |   7 +
 tensorflow/core/framework/rendezvous_test.cc  |   6 +
 tensorflow/core/framework/resource_var.h      |  50 ++-
 tensorflow/core/framework/tensor.h            |   8 +-
 tensorflow/core/kernels/BUILD                 |   1 +
 .../core/kernels/resource_variable_ops.cc     |  93 +++++-
 tensorflow/core/kernels/scatter_nd_op.cc      |   2 +-
 tensorflow/core/kernels/strided_slice_op.cc   |   4 +-
 .../core/kernels/training_op_helpers.cc       |  64 ----
 tensorflow/core/kernels/training_op_helpers.h | 178 +++++++++--
 tensorflow/core/kernels/training_ops.cc       | 292 ++++++++++--------
 .../resource_variable_ops_test.py             |  14 +
 16 files changed, 505 insertions(+), 236 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index 6e6532731e..1f3afe8822 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -79,6 +79,13 @@ XlaDeviceContext::XlaDeviceContext(
   }
 }
 
+void XlaDeviceContext::CopyTensorInSameDevice(const Tensor* input_tensor,
+                                              Device* device,
+                                              Tensor* output_tensor,
+                                              StatusCallback done) const {
+  done(errors::Unimplemented("XLA->XLA same-device copies not implemented."));
+}
+
 void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
                                              Device* device,
                                              Tensor* device_tensor,
diff --git a/tensorflow/compiler/jit/xla_device_context.h b/tensorflow/compiler/jit/xla_device_context.h
index 1e18df197a..e45db989fa 100644
--- a/tensorflow/compiler/jit/xla_device_context.h
+++ b/tensorflow/compiler/jit/xla_device_context.h
@@ -62,6 +62,9 @@ class XlaDeviceContext : public DeviceContext {
   void CopyDeviceTensorToCPU(const Tensor* device_tensor,
                              absl::string_view tensor_name, Device* device,
                              Tensor* cpu_tensor, StatusCallback done) override;
+  void CopyTensorInSameDevice(const Tensor* input_tensor, Device* device,
+                              Tensor* output_tensor,
+                              StatusCallback done) const override;
 
   xla::LocalClient* client() const { return client_; }
   se::Stream* stream() const { return stream_.get(); }
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc b/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
index 4bc88ffc8c..0ef39fb3d7 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util_platform_specific.cc
@@ -37,6 +37,14 @@ void GPUDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
   GPUUtil::CopyGPUTensorToCPU(device, this, device_tensor, cpu_tensor, done);
 }
 
+void GPUDeviceContext::CopyTensorInSameDevice(const Tensor* input_tensor,
+                                              Device* device,
+                                              Tensor* output_tensor,
+                                              StatusCallback done) const {
+  GPUUtil::CopyGPUTensorToSameGPU(device, this, input_tensor, output_tensor,
+                                  done);
+}
+
 Status GPUDeviceContext::ThenExecute(Device* device, se::Stream* stream,
                                      std::function<void()> func) {
   const DeviceBase::GpuDeviceInfo* gpu_info =
diff --git a/tensorflow/core/common_runtime/gpu_device_context.h b/tensorflow/core/common_runtime/gpu_device_context.h
index 3603808152..f513526724 100644
--- a/tensorflow/core/common_runtime/gpu_device_context.h
+++ b/tensorflow/core/common_runtime/gpu_device_context.h
@@ -57,6 +57,10 @@ class GPUDeviceContext : public DeviceContext {
                              Device* device, Tensor* cpu_tensor,
                              StatusCallback done) override;
 
+  void CopyTensorInSameDevice(const Tensor* input_tensor, Device* device,
+                              Tensor* output_tensor,
+                              StatusCallback done) const override;
+
   void MaintainLifetimeOnStream(const Tensor* t,
                                 se::Stream* stream) const override {}
 
diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h
index 446c31b17f..321947aca8 100644
--- a/tensorflow/core/framework/device_base.h
+++ b/tensorflow/core/framework/device_base.h
@@ -82,6 +82,13 @@ class DeviceContext : public core::RefCounted {
     done(errors::Internal("Unrecognized device type in CPU-to-device Copy"));
   }
 
+  // Copies a tensor in this device.
+  virtual void CopyTensorInSameDevice(const Tensor* input_tensor,
+                                      Device* device, Tensor* output_tensor,
+                                      StatusCallback done) const {
+    done(errors::Unimplemented("Copy in same device not implemented."));
+  }
+
   // "device_tensor" is a tensor on a non-CPU device.  Copies
   // device_tensor into "cpu_tensor".  "cpu_tensor" must be allocated
   // to be of the same size as "device_tensor".
diff --git a/tensorflow/core/framework/rendezvous_test.cc b/tensorflow/core/framework/rendezvous_test.cc
index de148f0bd3..7a777f064c 100644
--- a/tensorflow/core/framework/rendezvous_test.cc
+++ b/tensorflow/core/framework/rendezvous_test.cc
@@ -278,6 +278,12 @@ class DummyDeviceContext : public DeviceContext {
   ~DummyDeviceContext() override {}
   int stream_id() const { return stream_id_; }
 
+  void CopyTensorInSameDevice(const Tensor* input_tensor, Device* device,
+                              Tensor* output_tensor,
+                              StatusCallback done) const override {
+    done(Status::OK());
+  }
+
  private:
   const int stream_id_;
 };
diff --git a/tensorflow/core/framework/resource_var.h b/tensorflow/core/framework/resource_var.h
index ff7b3e78a7..f5de5dba88 100644
--- a/tensorflow/core/framework/resource_var.h
+++ b/tensorflow/core/framework/resource_var.h
@@ -20,14 +20,46 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Resource stored by variables in the resource manager
-// (new, resource-style version).
+// Resource stored by variables in the resource manager (new, resource-style
+// version).
+//
+// These variables have a mixed access mode: they can operate on copy-on-write
+// mode (the default) or copy-on-read mode (used only for sparse access).
+//
+// When copy-on-write mode is enabled reading the value of the variable involves
+// grabbing its mutex in shared mode and aliasing the internal tensor as the
+// output of the read operation, increasing its reference count. Writing,
+// conversely, works by, under an exclusive lock, detecting whether there are
+// outstanding aliases of the tensor, using the reference count, copying the
+// tensor if they exist, and writing to either the original or a copy with no
+// outstanding aliases. Sparse operations are not supported in copy-on-write
+// mode.
+//
+// When a variable is accessed sparsely it switches to copy-on-read mode. To
+// switch we need to grab an exclusive lock and might (if there are aliases)
+// need to copy the entire tensor. Once copy-on-read mode is enabled, no tensor
+// is allowed to alias the variable's internal tensor. This means dense reads
+// must return a copy of the variable, done while holding a shared lock. Dense
+// writes do not need to check whether aliases exist, and can always write
+// directly to the buffer without making a copy, while holding an exclusive
+// lock. Sparse reads and sparse writes, on the other hand, can be done under a
+// shared or exclusive mutex (the damage from writes under a shared mutex is
+// limited since no other buffer is allowed to alias the variable's
+// buffer). Using an exclusive mutex disallows concurrent writes and concurrent
+// sparse reads, providing some extra safety at the expense of performance,
+// while shared mutex allow for "hogwild" behavior. Doing sparse writes under a
+// shared mutex prevents them from overlapping with dense writes, which is
+// necessary as dense writes can change the shape the of the tensor.
+//
+// Transitioning a variable from copy-on-read mode to copy-on-write mode is
+// currently not supported. To upgrade a variable from copy-on-write to
+// copy-on-read use `EnsureSparseVariableAccess()`, and then grab the variable's
+// mutex as desired. To access the variable in dense mode grab the mutex either
+// directly or via `MaybeLockVariableInputMutexesInOrder` on all variables being
+// modified and then call `PrepareToUpdateVariable` on them in any order.
 class Var : public ResourceBase {
  public:
   explicit Var(DataType dtype) : tensor_(dtype) {}
-  // Not copyable or movable.
-  Var(const Var&) = delete;
-  Var& operator=(const Var&) = delete;
 
   // When locking multiple variables, the locks must be acquired in order of
   // increasing mu() address.
@@ -48,11 +80,19 @@ class Var : public ResourceBase {
   bool is_initialized = false;  // GUARDED_BY(mu_) but annotalysis doesn't like
                                 // it.
 
+  // Also fake-guarded by mu_. Should be set to True whenever any sparse
+  // operation uses the variable. Once this is true no tensor is allowed to
+  // alias the memory of the variable, and we always copy the variable on
+  // reads. This allows sparse operations to happen with only a shared lock if
+  // so desired.
+  std::atomic<bool> copy_on_read_mode{false};
+
  private:
   mutex mu_;
   Tensor tensor_;
 
   ~Var() override {}
+  TF_DISALLOW_COPY_AND_ASSIGN(Var);
 };
 
 }  //  end namespace tensorflow
diff --git a/tensorflow/core/framework/tensor.h b/tensorflow/core/framework/tensor.h
index 6e03cf9f6f..009dd0846d 100644
--- a/tensorflow/core/framework/tensor.h
+++ b/tensorflow/core/framework/tensor.h
@@ -45,6 +45,7 @@ class TensorBuffer;
 class TensorCApi;
 class TensorDescription;
 class TensorProto;
+class Var;
 
 namespace batch_util {
 Status CopyElementToSlice(Tensor element, Tensor* parent, int64 index);
@@ -581,11 +582,16 @@ class Tensor {
   friend class XlaTensor;             // For access to RefCountIsOne().
   friend class XlaTensorBuffer;  // For access to the private constructor taking
                                  // the buffer
+  friend class Var;
   template <typename Device, typename T>
   friend class AssignVariableOp;  // For access to RefCountIsOne().
   template <typename Device, typename T>
   friend Status PrepareToUpdateVariable(
-      OpKernelContext* ctx, Tensor* tensor);  // For access to RefCountIsOne().
+      OpKernelContext* ctx, Tensor* tensor,
+      bool copy_on_read_mode);  // For access to RefCountIsOne().
+  template <typename Device, typename T>
+  friend Status EnsureSparseVariableAccess(
+      OpKernelContext* ctx, Var* var);  // For access to RefCountIsOne().
   friend Status batch_util::CopyElementToSlice(
       Tensor element, Tensor* parent,
       int64 index);                // For access to RefCountIsOne().
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0e5d8d765a..e8b1dd270f 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -2196,6 +2196,7 @@ tf_kernel_library(
         ":state",
         ":training_op_helpers",
         ":variable_ops",
+        "//tensorflow/core:core_cpu_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index 170b08b4b7..4167b60051 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -55,6 +55,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/strings/str_join.h"
+#include "tensorflow/core/common_runtime/device.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/resource_mgr.h"
@@ -84,6 +85,47 @@ ReadVariableOp::ReadVariableOp(OpKernelConstruction* c) : OpKernel(c) {
   OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_));
 }
 
+namespace {
+Status CopyVariable(int output_idx, OpKernelContext* ctx, const Tensor* t) {
+  Tensor* output;
+  Notification n;
+  Status status;
+  AllocatorAttributes attr;
+  if (t->dtype() == DT_VARIANT) {
+    attr.set_on_host(true);
+  }
+  TF_RETURN_IF_ERROR(
+      ctx->allocate_output(output_idx, t->shape(), &output, attr));
+  if (t->dtype() == DT_VARIANT) {
+    output->flat<Variant>() = t->flat<Variant>();
+  } else if (ctx->op_device_context() != nullptr) {
+    // TODO(apassos): remove the down_cast by just returning Device* from
+    // OpKernelContext
+    Device* device = static_cast<Device*>(ctx->device());
+    ctx->op_device_context()->CopyTensorInSameDevice(
+        t, device, output, [&n, &status](const Status& s) {
+          status = s;
+          n.Notify();
+        });
+    n.WaitForNotification();
+    return status;
+  } else {
+    switch (t->dtype()) {
+#define HANDLER(type)                       \
+  case DataTypeToEnum<type>::value:         \
+    output->flat<type>() = t->flat<type>(); \
+    break;
+      TF_CALL_ALL_TYPES(HANDLER);
+#undef HANDLER
+      default:
+        return errors::Internal("Unsupported dtype", t->dtype());
+    }
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
 void ReadVariableOp::Compute(OpKernelContext* ctx) {
   Var* variable = nullptr;
   const ResourceHandle& handle = HandleFromInput(ctx, 0);
@@ -100,12 +142,16 @@ void ReadVariableOp::Compute(OpKernelContext* ctx) {
   // holding a shared lock to guarantee ordering of reads and
   // writes.
   tf_shared_lock ml(*variable->mu());
-  const Tensor& t = *variable->tensor();
-  OP_REQUIRES(ctx, dtype_ == t.dtype(),
+  const Tensor* t = variable->tensor();
+  OP_REQUIRES(ctx, dtype_ == t->dtype(),
               errors::InvalidArgument(
                   "Trying to read variable with wrong dtype. Expected ",
-                  DataTypeString(dtype_), " got ", DataTypeString(t.dtype())));
-  ctx->set_output(0, t);
+                  DataTypeString(dtype_), " got ", DataTypeString(t->dtype())));
+  if (variable->copy_on_read_mode.load()) {
+    OP_REQUIRES_OK(ctx, CopyVariable(0, ctx, t));
+  } else {
+    ctx->set_output(0, *t);
+  }
 }
 
 ReadVariablesOp::ReadVariablesOp(OpKernelConstruction* c) : OpKernel(c) {
@@ -146,14 +192,18 @@ void ReadVariablesOp::Compute(OpKernelContext* ctx) {
     // holding a shared lock to guarantee ordering of reads and
     // writes.
     tf_shared_lock ml(*variables[i]->mu());
-    const Tensor& t = *variables[i]->tensor();
-    OP_REQUIRES(ctx, dtypes_[i] == t.dtype(),
+    OP_REQUIRES(ctx, dtypes_[i] == variables[i]->tensor()->dtype(),
                 errors::InvalidArgument(
                     "Trying to read variable ", handles[i]->name(),
                     " from Container: ", handles[i]->container(),
                     " with wrong dtype. Expected ", DataTypeString(dtypes_[i]),
-                    " got ", DataTypeString(t.dtype())));
-    ctx->set_output(i, t);
+                    " got ", DataTypeString(variables[i]->tensor()->dtype())));
+    if (variables[i]->copy_on_read_mode.load()) {
+      OP_REQUIRES_OK(ctx, CopyVariable(i, ctx, variables[i]->tensor()));
+    } else {
+      const Tensor& t = *variables[i]->tensor();
+      ctx->set_output(i, t);
+    }
   }
 }
 
@@ -308,8 +358,23 @@ class AssignVariableOp : public OpKernel {
                     "Trying to assign variable with wrong dtype. Expected ",
                     DataTypeString(variable->tensor()->dtype()), " got ",
                     DataTypeString(dtype_)));
+    if (variable->copy_on_read_mode.load()) {
+      PersistentTensor unused;
+      Tensor* tmp;
+      AllocatorAttributes attr;
+      attr.set_gpu_compatible(true);
+      attr.set_nic_compatible(true);
+      OP_REQUIRES_OK(context,
+                     context->allocate_persistent(value.dtype(), value.shape(),
+                                                  &unused, &tmp, attr));
+      functor::DenseUpdate<Device, T, ASSIGN> copy_functor;
+      copy_functor(context->eigen_device<Device>(), tmp->flat<T>(),
+                   value.flat<T>());
+      *variable->tensor() = *tmp;
+    } else {
+      *variable->tensor() = value;
+    }
     variable->is_initialized = true;
-    *variable->tensor() = value;
   }
 
  private:
@@ -442,8 +507,9 @@ class AssignUpdateVariableOp : public OpKernel {
                                         " using a Tensor with shape ",
                                         value.shape().DebugString(),
                                         ", shapes must be equal."));
-    OP_REQUIRES_OK(context,
-                   PrepareToUpdateVariable<Device, T>(context, var_tensor));
+    OP_REQUIRES_OK(
+        context, PrepareToUpdateVariable<Device, T>(
+                     context, var_tensor, variable->copy_on_read_mode.load()));
     functor::DenseUpdate<Device, T, Op> update_functor;
     update_functor(context->eigen_device<Device>(), var_tensor->flat<T>(),
                    value.flat<T>());
@@ -524,6 +590,7 @@ class ResourceGatherOp : public OpKernel {
     Var* v = nullptr;
     OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
     core::ScopedUnref su(v);
+    OP_REQUIRES_OK(c, EnsureSparseVariableAccess<Device, T>(c, v));
     // NOTE: We hold the lock for the whole gather operation instead
     // of increasing the reference count of v->tensor() to avoid a
     // situation where a write to the same variable will see a
@@ -639,9 +706,9 @@ class ResourceScatterUpdateOp : public OpKernel {
     Var* v = nullptr;
     OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
     core::ScopedUnref unref_v(v);
-    mutex_lock ml(*v->mu());
+    OP_REQUIRES_OK(c, EnsureSparseVariableAccess<Device, T>(c, v));
+    tf_shared_lock ml(*v->mu());
     Tensor* params = v->tensor();
-    OP_REQUIRES_OK(c, PrepareToUpdateVariable<Device, T>(c, params));
     const Tensor& indices = c->input(1);
     const Tensor& updates = c->input(2);
 
diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc
index 63bb793fdc..b466e57249 100644
--- a/tensorflow/core/kernels/scatter_nd_op.cc
+++ b/tensorflow/core/kernels/scatter_nd_op.cc
@@ -231,6 +231,7 @@ class ScatterNdUpdateOp : public OpKernel {
       Var* v;
       OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
       core::ScopedUnref scoped_unref(v);
+      OP_REQUIRES_OK(c, EnsureSparseVariableAccess<Device, T>(c, v));
       mutex_lock m(*v->mu());
       DoCompute(c);
     } else if (use_exclusive_lock_) {
@@ -258,7 +259,6 @@ class ScatterNdUpdateOp : public OpKernel {
       Var* v;
       OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
       Tensor* t = v->tensor();
-      OP_REQUIRES_OK(c, PrepareToUpdateVariable<Device, T>(c, t));
       params = *t;
       params_shape = params.shape();
     } else if (IsRefType(c->input_dtype(0))) {
diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc
index 70a7ddbd06..6db68f937d 100644
--- a/tensorflow/core/kernels/strided_slice_op.cc
+++ b/tensorflow/core/kernels/strided_slice_op.cc
@@ -307,9 +307,9 @@ class StridedSliceAssignOp : public OpKernel {
       OP_REQUIRES_OK(context,
                      LookupResource(context, HandleFromInput(context, 0), &v));
       core::ScopedUnref scoped_unref(v);
-      mutex_lock ml(*v->mu());
       OP_REQUIRES_OK(context,
-                     PrepareToUpdateVariable<Device, T>(context, v->tensor()));
+                     EnsureSparseVariableAccess<Device, T>(context, v));
+      mutex_lock ml(*v->mu());
       old_lhs = v->tensor();
       OP_REQUIRES(context, old_lhs->dtype() == DataTypeToEnum<T>::value,
                   errors::InvalidArgument(
diff --git a/tensorflow/core/kernels/training_op_helpers.cc b/tensorflow/core/kernels/training_op_helpers.cc
index 4262a5404b..20c08cf8fb 100644
--- a/tensorflow/core/kernels/training_op_helpers.cc
+++ b/tensorflow/core/kernels/training_op_helpers.cc
@@ -19,70 +19,6 @@ limitations under the License.
 
 namespace tensorflow {
 
-mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input,
-                                Var** maybe_resource) {
-  *maybe_resource = nullptr;
-  if (ctx->input_dtype(input) == DT_RESOURCE) {
-    if (LookupResource(ctx, HandleFromInput(ctx, input), maybe_resource).ok()) {
-      return (*maybe_resource)->mu();
-    } else {
-      ctx->CtxFailureWithWarning(
-          errors::Internal("Invalid variable reference."));
-      return nullptr;
-    }
-  }
-  return ctx->input_ref_mutex(input);
-}
-
-// MaybeLockVariableInputMutexesInOrder is a helper function to acquire mutexes
-// in address order to mitigate deadlock.  Returns a structure that, when
-// deleted, will release the acquired mutexes. Safe to pass duplicates - will
-// only lock each distinct mutex once.  If do_lock is false, returns
-// immediately.  Note that this silently doesn't lock mutexes for invalid
-// variable references; in all usages this is followed by GetInputTensor which
-// will signal a failure.
-VariableInputLockHolder MaybeLockVariableInputMutexesInOrder(
-    OpKernelContext* ctx, bool do_lock, const std::vector<int>& input_ids) {
-  bool any_resource = false;
-  for (auto i : input_ids) {
-    if (ctx->input_dtype(i) == DT_RESOURCE) {
-      any_resource = true;
-      break;
-    }
-  }
-  if (!do_lock && !any_resource) {
-    return VariableInputLockHolder({}, {});
-  }
-  std::vector<Var*> vars;
-  std::vector<mutex*> mutexes;
-  std::vector<int> acquire_order;
-  for (auto input : input_ids) {
-    Var* var;
-    mutex* mutex = GetTrainingVariableMutex(ctx, input, &var);
-    if (var) vars.push_back(var);
-    // Only lock each mutex once if duplicates exist (n^2 but n is 2 or 3).
-    if (std::find(mutexes.begin(), mutexes.end(), mutex) == mutexes.end()) {
-      acquire_order.push_back(mutexes.size());
-      mutexes.push_back(mutex);
-    }
-  }
-  std::sort(acquire_order.begin(), acquire_order.end(),
-            [&mutexes](int a, int b) { return mutexes[a] < mutexes[b]; });
-
-  std::unique_ptr<std::vector<mutex_lock>> locks =
-      MakeUnique<std::vector<mutex_lock>>();
-  locks->reserve(acquire_order.size());
-
-  for (auto input : acquire_order) {
-    Var* var;
-    mutex* mu = GetTrainingVariableMutex(ctx, input, &var);
-    core::ScopedUnref scoped_unref(var);
-    if (mu != nullptr) {
-      locks->emplace_back(*mu);
-    }
-  }
-  return VariableInputLockHolder(std::move(vars), std::move(locks));
-}
 
 void MaybeForwardRefInputToRefOutput(OpKernelContext* ctx, int input,
                                      int output) {
diff --git a/tensorflow/core/kernels/training_op_helpers.h b/tensorflow/core/kernels/training_op_helpers.h
index 9f173a80f7..e96cd023fc 100644
--- a/tensorflow/core/kernels/training_op_helpers.h
+++ b/tensorflow/core/kernels/training_op_helpers.h
@@ -17,30 +17,72 @@ limitations under the License.
 #define TENSORFLOW_CORE_KERNELS_TRAINING_OP_HELPERS_H_
 
 #include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/kernels/dense_update_functor.h"
 #include "tensorflow/core/kernels/variable_ops.h"
 
 namespace tensorflow {
 
-// Returns a borrowed pointer to the mutex for the variable `input` in `ctx`.
-//
-// If `input` corresponds to a `DT_RESOURCE`-type variable input,
-// `*maybe_resource` will be updated to contain the underlying resource, and the
-// caller will be responsible for calling `Unref()` on that resource.
-mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input,
-                                Var** maybe_resource);
+// Must be called before performing a sparse operation on a variable. Ensures
+// that no concurrent dense operations can happen while holding the variable's
+// lock.
+template <typename Device, typename T>
+Status EnsureSparseVariableAccess(OpKernelContext* ctx, Var* var) {
+  if (var->copy_on_read_mode.load()) {
+    return Status::OK();
+  }
+  mutex_lock ml(*var->mu());
+  // Once copy-on-read mode is True the refcount is guaranteed to be 1. This can
+  // also happen if there are no concurrent reads of the variable and
+  // copy-on-read mode is false.
+  if (var->tensor()->RefCountIsOne()) {
+    var->copy_on_read_mode.store(true);
+    return Status::OK();
+  }
+  PersistentTensor unused;
+  Tensor* tmp;
+  if (std::is_same<T, Variant>::value) {
+    AllocatorAttributes attr;
+    attr.set_on_host(true);
+    TF_RETURN_IF_ERROR(ctx->allocate_persistent(
+        var->tensor()->dtype(), var->tensor()->shape(), &unused, &tmp, attr));
+
+    const auto elements_in = var->tensor()->flat<Variant>();
+    auto elements_out = tmp->flat<Variant>();
+    for (int64 i = 0; i < elements_in.size(); ++i) {
+      elements_out(i) = elements_in(i);
+    }
+  } else {
+    AllocatorAttributes attr;
+    attr.set_gpu_compatible(true);
+    attr.set_nic_compatible(true);
+    TF_RETURN_IF_ERROR(ctx->allocate_persistent(
+        var->tensor()->dtype(), var->tensor()->shape(), &unused, &tmp, attr));
+    functor::DenseUpdate<Device, T, ASSIGN> copy_functor;
+    copy_functor(ctx->eigen_device<Device>(), tmp->flat<T>(),
+                 const_cast<const Tensor*>(var->tensor())->flat<T>());
+  }
+  *var->tensor() = *tmp;
+  var->copy_on_read_mode.store(true);
+  return Status::OK();
+}
 
 // Utility structure that releases a sequence of borrowed mutexes when it is
 // deleted.
 struct VariableInputLockHolder {
  public:
-  VariableInputLockHolder(std::vector<Var*> vars,
-                          std::unique_ptr<std::vector<mutex_lock>> locks)
-      : vars_(std::move(vars)), locks_(std::move(locks)) {}
+  VariableInputLockHolder(
+      std::vector<Var*> vars, std::unique_ptr<std::vector<mutex_lock>> locks,
+      std::unique_ptr<std::vector<tf_shared_lock>> shared_locks)
+      : vars_(std::move(vars)),
+        locks_(std::move(locks)),
+        shared_locks_(std::move(shared_locks)) {}
 
   VariableInputLockHolder(VariableInputLockHolder&& other)
-      : vars_(std::move(other.vars_)), locks_(std::move(other.locks_)) {}
+      : vars_(std::move(other.vars_)),
+        locks_(std::move(other.locks_)),
+        shared_locks_(std::move(other.shared_locks_)) {}
 
   ~VariableInputLockHolder() {
     // Release the locks before unreffing the Vars, because each lock
@@ -56,10 +98,95 @@ struct VariableInputLockHolder {
   // NOTE: Use a `std::unique_ptr` instead of moving in a vector directly,
   // because a `std::vector<mutex_lock>` is not movable on all platforms.
   std::unique_ptr<std::vector<mutex_lock>> locks_;
+  std::unique_ptr<std::vector<tf_shared_lock>> shared_locks_;
 };
 
+// Returns a borrowed pointer to the mutex for the variable `input` in `ctx`.
+//
+// If `input` corresponds to a `DT_RESOURCE`-type variable input,
+// `*maybe_resource` will be updated to contain the underlying resource, and the
+// caller will be responsible for calling `Unref()` on that resource.
+template <typename Device, typename T>
+mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input, bool sparse,
+                                Var** maybe_resource) {
+  *maybe_resource = nullptr;
+  if (ctx->input_dtype(input) == DT_RESOURCE) {
+    if (LookupResource(ctx, HandleFromInput(ctx, input), maybe_resource).ok()) {
+      if (sparse) {
+        EnsureSparseVariableAccess<Device, T>(ctx, *maybe_resource);
+      }
+      return (*maybe_resource)->mu();
+    } else {
+      ctx->CtxFailureWithWarning(
+          errors::Internal("Invalid variable reference."));
+      return nullptr;
+    }
+  }
+  return ctx->input_ref_mutex(input);
+}
+
+// MaybeLockVariableInputMutexesInOrder is a helper function to acquire mutexes
+// in address order to mitigate deadlock.  Returns a structure that, when
+// deleted, will release the acquired mutexes. Safe to pass duplicates - will
+// only lock each distinct mutex once. If sparse is true will ensure the
+// variable gets switched to copy-on-read mode before trying to acquire the
+// locks. If do_lock is false, returns immediately for reference variables. For
+// resource variables in copy-on-read-mode it will grab a shared lock if do_lock
+// is false, exclusive lock otherwise.  Note that this silently doesn't lock
+// mutexes for invalid variable references; in all usages this is followed by
+// GetInputTensor which will signal a failure.
+template <typename Device, typename T>
 VariableInputLockHolder MaybeLockVariableInputMutexesInOrder(
-    OpKernelContext* ctx, bool do_lock, const std::vector<int>& input_ids);
+    OpKernelContext* ctx, bool do_lock, bool sparse,
+    const std::vector<int>& input_ids) {
+  bool any_resource = false;
+  for (auto i : input_ids) {
+    if (ctx->input_dtype(i) == DT_RESOURCE) {
+      any_resource = true;
+      break;
+    }
+  }
+  if (!do_lock && !any_resource) {
+    return VariableInputLockHolder({}, {}, {});
+  }
+  std::vector<Var*> vars;
+  std::vector<mutex*> mutexes;
+  std::vector<int> acquire_order;
+  for (auto input : input_ids) {
+    Var* var;
+    mutex* mutex =
+        GetTrainingVariableMutex<Device, T>(ctx, input, sparse, &var);
+    if (var) vars.push_back(var);
+    // Only lock each mutex once if duplicates exist (n^2 but n is 2 or 3).
+    if (std::find(mutexes.begin(), mutexes.end(), mutex) == mutexes.end()) {
+      acquire_order.push_back(mutexes.size());
+      mutexes.push_back(mutex);
+    }
+  }
+  std::sort(acquire_order.begin(), acquire_order.end(),
+            [&mutexes](int a, int b) { return mutexes[a] < mutexes[b]; });
+
+  std::unique_ptr<std::vector<mutex_lock>> locks =
+      absl::make_unique<std::vector<mutex_lock>>();
+  std::unique_ptr<std::vector<tf_shared_lock>> shared_locks =
+      absl::make_unique<std::vector<tf_shared_lock>>();
+  locks->reserve(acquire_order.size());
+
+  for (auto input : acquire_order) {
+    Var* var;
+    mutex* mu = GetTrainingVariableMutex<Device, T>(ctx, input, sparse, &var);
+    core::ScopedUnref scoped_unref(var);
+    if (mu != nullptr) {
+      if (do_lock) {
+        locks->emplace_back(*mu);
+      } else {
+        shared_locks->emplace_back(*mu);
+      }
+    }
+  }
+  return VariableInputLockHolder(std::move(vars), std::move(locks),
+                                 std::move(shared_locks));
+}
 
 void MaybeForwardRefInputToRefOutput(OpKernelContext* ctx, int input,
                                      int output);
@@ -68,8 +195,9 @@ void MaybeForwardRefInputToRefOutput(OpKernelContext* ctx, int input,
 // reference count of 1 before you update it.
 // REQUIRES: If you pass in variable->tensor(), *variable->mu() must be held.
 template <typename Device, typename T>
-Status PrepareToUpdateVariable(OpKernelContext* ctx, Tensor* tensor) {
-  if (!tensor->RefCountIsOne()) {
+Status PrepareToUpdateVariable(OpKernelContext* ctx, Tensor* tensor,
+                               bool copy_on_read_mode) {
+  if (copy_on_read_mode || !tensor->RefCountIsOne()) {
     // Tensor's buffer is in use by some read, so we need to copy before
     // updating.
     PersistentTensor unused;
@@ -100,12 +228,14 @@ Status PrepareToUpdateVariable(OpKernelContext* ctx, Tensor* tensor) {
   return Status::OK();
 }
 
-// This gives you `*out`, a tensor you can update, corresponding to a
-// variable passed as input index `input`.  This handles the
-// differences between reference and resource variables.  For resource
-// variables, we ensure `*out` has a reference count of 1 (using
-// PrepareToUpdateVariable() to copy if necessary) unless
-// sparse && !lock_held, in which case it never copies.
+// This gives you `*out`, a tensor you can update, corresponding to a variable
+// passed as input index `input`.  This handles the differences between
+// reference and resource variables. For reference variables we can just grab
+// the tensor, grabbing the lock if lock_held is False.
+//
+// For resource variables we, if sparse is true, ensure it's in copy-on-read
+// mode, and then, regardless of the value of sparse, ensure its refcount is 1
+// (by potentially copying its contents). In this case lock_held is ignored.
 template <typename Device, typename T>
 Status GetInputTensorFromVariable(OpKernelContext* ctx, int input,
                                   bool lock_held, bool sparse, Tensor* out) {
@@ -113,7 +243,13 @@ Status GetInputTensorFromVariable(OpKernelContext* ctx, int input,
     Var* var;
     TF_RETURN_IF_ERROR(LookupResource(ctx, HandleFromInput(ctx, input), &var));
     core::ScopedUnref unref_var(var);
-    TF_RETURN_IF_ERROR(PrepareToUpdateVariable<Device, T>(ctx, var->tensor()));
+    if (sparse) {
+      TF_RETURN_IF_ERROR(EnsureSparseVariableAccess<Device, T>(ctx, var));
+      *out = *var->tensor();
+      return Status::OK();
+    }
+    TF_RETURN_IF_ERROR(PrepareToUpdateVariable<Device, T>(
+        ctx, var->tensor(), var->copy_on_read_mode.load()));
     *out = *var->tensor();
     return Status::OK();
   }
diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc
index 6504ad1b09..b2239ab5c3 100644
--- a/tensorflow/core/kernels/training_ops.cc
+++ b/tensorflow/core/kernels/training_ops.cc
@@ -465,11 +465,12 @@ class ApplyGradientDescentOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -506,11 +507,12 @@ class ApplyGradientDescentOp<SYCLDevice, T> : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<SYCLDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -600,7 +602,8 @@ class ApplyAdadeltaOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     Var* resource;
-    mutex* mu = GetTrainingVariableMutex(ctx, 0, &resource);
+    const bool sparse = false;
+    mutex* mu = GetTrainingVariableMutex<Device, T>(ctx, 0, sparse, &resource);
     core::ScopedUnref scoped_unref(resource);
     if (use_exclusive_lock_ && mu != nullptr) {
       mutex_lock l1(*mu);
@@ -624,14 +627,16 @@ class ApplyAdadeltaOp : public OpKernel {
 
   void DoValidate(OpKernelContext* ctx) {
     Tensor var;
+    const bool sparse = false;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     Tensor accum_update;
-    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &accum_update));
+    OP_REQUIRES_OK(
+        ctx, GetInputTensorFromVariable<Device, T>(ctx, 2, use_exclusive_lock_,
+                                                   sparse, &accum_update));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -678,14 +683,16 @@ class ApplyAdadeltaOp : public OpKernel {
   void DoCompute(OpKernelContext* ctx) {
     const Device& device = ctx->template eigen_device<Device>();
     Tensor var;
+    const bool sparse = false;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     Tensor accum_update;
-    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &accum_update));
+    OP_REQUIRES_OK(
+        ctx, GetInputTensorFromVariable<Device, T>(ctx, 2, use_exclusive_lock_,
+                                                   sparse, &accum_update));
 
     const Tensor& lr = ctx->input(3);
     const Tensor& rho = ctx->input(4);
@@ -751,7 +758,8 @@ class SparseApplyAdadeltaOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     Var* var;
-    mutex* mu = GetTrainingVariableMutex(ctx, 0, &var);
+    const bool sparse = true;
+    mutex* mu = GetTrainingVariableMutex<CPUDevice, T>(ctx, 0, sparse, &var);
     core::ScopedUnref scoped_unref(var);
     // mu_accum is actually the same mutex as mu_var since currently we use a
     // global mutex.
@@ -767,14 +775,16 @@ class SparseApplyAdadeltaOp : public OpKernel {
 
   void DoCompute(OpKernelContext* ctx) {
     Tensor var;
+    const bool sparse = true;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum_grad;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 1, use_exclusive_lock_, true, &accum_grad));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum_grad));
     Tensor accum_update;
-    OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 2, use_exclusive_lock_, true, &accum_update));
+    OP_REQUIRES_OK(ctx,
+                   GetInputTensorFromVariable<CPUDevice, T>(
+                       ctx, 2, use_exclusive_lock_, sparse, &accum_update));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -907,11 +917,12 @@ class ApplyProximalGradientDescentOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -976,11 +987,12 @@ class SparseApplyProximalGradientDescentOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     OP_REQUIRES(ctx, TensorShapeUtils::IsVectorOrHigher(var.shape()),
                 errors::InvalidArgument("var must be at least 1 dimensional"));
 
@@ -1121,14 +1133,15 @@ class ApplyAdagradOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -1214,14 +1227,15 @@ class ApplyProximalAdagradOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -1316,14 +1330,15 @@ class SparseApplyAdagradOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 1, use_exclusive_lock_, true, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -1456,14 +1471,15 @@ class SparseApplyProximalAdagradOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 1, use_exclusive_lock_, true, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -1628,19 +1644,20 @@ class ApplyAdagradDAOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor gradient_accum;
     OP_REQUIRES_OK(
         ctx, GetInputTensorFromVariable<Device, T>(ctx, 1, use_exclusive_lock_,
-                                                   false, &gradient_accum));
+                                                   sparse, &gradient_accum));
     Tensor gradient_squared_accum;
     OP_REQUIRES_OK(
         ctx, GetInputTensorFromVariable<Device, T>(
-                 ctx, 2, use_exclusive_lock_, false, &gradient_squared_accum));
+                 ctx, 2, use_exclusive_lock_, sparse, &gradient_squared_accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -1729,19 +1746,20 @@ class SparseApplyAdagradDAOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor gradient_accum;
     OP_REQUIRES_OK(ctx,
                    GetInputTensorFromVariable<CPUDevice, T>(
-                       ctx, 1, use_exclusive_lock_, true, &gradient_accum));
+                       ctx, 1, use_exclusive_lock_, sparse, &gradient_accum));
     Tensor gradient_squared_accum;
     OP_REQUIRES_OK(
         ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                 ctx, 2, use_exclusive_lock_, true, &gradient_squared_accum));
+                 ctx, 2, use_exclusive_lock_, sparse, &gradient_squared_accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -1927,18 +1945,19 @@ class ApplyFtrlOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     Tensor linear;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &linear));
+                            ctx, 2, use_exclusive_lock_, sparse, &linear));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -2079,17 +2098,18 @@ class SparseApplyFtrlOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, true, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     Tensor linear;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, true, &linear));
+                            ctx, 2, use_exclusive_lock_, sparse, &linear));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -2353,15 +2373,16 @@ class ApplyMomentumOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -2454,15 +2475,16 @@ class SparseApplyMomentumOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 1, use_exclusive_lock_, true, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -2572,15 +2594,16 @@ class ApplyKerasMomentumOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -2671,15 +2694,16 @@ class SparseApplyKerasMomentumOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor accum;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 1, use_exclusive_lock_, true, &accum));
+                            ctx, 1, use_exclusive_lock_, sparse, &accum));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -2783,18 +2807,19 @@ class ApplyAdamOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor m;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &m));
+                            ctx, 1, use_exclusive_lock_, sparse, &m));
     Tensor v;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &v));
+                            ctx, 2, use_exclusive_lock_, sparse, &v));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -2873,18 +2898,19 @@ class ApplyAdamOp<SYCLDevice, T> : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<SYCLDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor m;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 1, use_exclusive_lock_, false, &m));
+                            ctx, 1, use_exclusive_lock_, sparse, &m));
     Tensor v;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<SYCLDevice, T>(
-                            ctx, 2, use_exclusive_lock_, false, &v));
+                            ctx, 2, use_exclusive_lock_, sparse, &v));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -3043,21 +3069,22 @@ class ApplyAdamWithAmsgradOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor m;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &m));
+                            ctx, 1, use_exclusive_lock_, sparse, &m));
     Tensor v;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &v));
+                            ctx, 2, use_exclusive_lock_, sparse, &v));
     Tensor vhat;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 3, use_exclusive_lock_, false, &vhat));
+                            ctx, 3, use_exclusive_lock_, sparse, &vhat));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -3184,18 +3211,19 @@ class ApplyAdaMaxOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor m;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &m));
+                            ctx, 1, use_exclusive_lock_, sparse, &m));
     Tensor v;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &v));
+                            ctx, 2, use_exclusive_lock_, sparse, &v));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -3312,18 +3340,19 @@ class ApplyRMSPropOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor ms;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &ms));
+                            ctx, 1, use_exclusive_lock_, sparse, &ms));
     Tensor mom;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &mom));
+                            ctx, 2, use_exclusive_lock_, sparse, &mom));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -3394,21 +3423,22 @@ class ApplyCenteredRMSPropOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2, 3});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2, 3});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor mg;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &mg));
+                            ctx, 1, use_exclusive_lock_, sparse, &mg));
     Tensor ms;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 2, use_exclusive_lock_, false, &ms));
+                            ctx, 2, use_exclusive_lock_, sparse, &ms));
     Tensor mom;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 3, use_exclusive_lock_, false, &mom));
+                            ctx, 3, use_exclusive_lock_, sparse, &mom));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -3553,18 +3583,19 @@ class SparseApplyRMSPropOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor ms;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 1, use_exclusive_lock_, true, &ms));
+                            ctx, 1, use_exclusive_lock_, sparse, &ms));
     Tensor mom;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 2, use_exclusive_lock_, true, &mom));
+                            ctx, 2, use_exclusive_lock_, sparse, &mom));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -3682,21 +3713,22 @@ class SparseApplyCenteredRMSPropOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override NO_THREAD_SAFETY_ANALYSIS {
-    auto locks = MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_,
-                                                      {0, 1, 2, 3});
+    const bool sparse = true;
+    auto locks = MaybeLockVariableInputMutexesInOrder<CPUDevice, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1, 2, 3});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 0, use_exclusive_lock_, true, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor mg;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 1, use_exclusive_lock_, true, &mg));
+                            ctx, 1, use_exclusive_lock_, sparse, &mg));
     Tensor ms;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 2, use_exclusive_lock_, true, &ms));
+                            ctx, 2, use_exclusive_lock_, sparse, &ms));
     Tensor mom;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<CPUDevice, T>(
-                            ctx, 3, use_exclusive_lock_, true, &mom));
+                            ctx, 3, use_exclusive_lock_, sparse, &mom));
 
     OP_REQUIRES(
         ctx, var.IsInitialized(),
@@ -3852,15 +3884,16 @@ class ApplyAddSignOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor m;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &m));
+                            ctx, 1, use_exclusive_lock_, sparse, &m));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
@@ -3958,15 +3991,16 @@ class ApplyPowerSignOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* ctx) override {
-    auto locks =
-        MaybeLockVariableInputMutexesInOrder(ctx, use_exclusive_lock_, {0, 1});
+    const bool sparse = false;
+    auto locks = MaybeLockVariableInputMutexesInOrder<Device, T>(
+        ctx, use_exclusive_lock_, sparse, {0, 1});
 
     Tensor var;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 0, use_exclusive_lock_, false, &var));
+                            ctx, 0, use_exclusive_lock_, sparse, &var));
     Tensor m;
     OP_REQUIRES_OK(ctx, GetInputTensorFromVariable<Device, T>(
-                            ctx, 1, use_exclusive_lock_, false, &m));
+                            ctx, 1, use_exclusive_lock_, sparse, &m));
     OP_REQUIRES(
         ctx, var.IsInitialized(),
         errors::FailedPrecondition(
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 433957fd1d..1dabcbb5c3 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import custom_gradient
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import list_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
@@ -953,6 +954,19 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       state_ops.scatter_sub(v, [1], [3])
       self.assertAllEqual([1.0, -1.0], v.numpy())
 
+  def testScatterUpdateVariant(self):
+    with context.eager_mode():
+      v = resource_variable_ops.ResourceVariable([
+          list_ops.empty_tensor_list(
+              element_dtype=dtypes.float32, element_shape=[])
+      ])
+      v.scatter_update(
+          ops.IndexedSlices(
+              list_ops.tensor_list_from_tensor([1., 2.], element_shape=[]), 0))
+      self.assertAllEqual(
+          list_ops.tensor_list_get_item(v[0], 0, element_dtype=dtypes.float32),
+          1.)
+
   def testScatterNdAddStateOps(self):
     with context.eager_mode():
       v = resource_variable_ops.ResourceVariable(
-- 
GitLab


From 841f5d9fc9fac4433ea57ee61fc4b4286cec5c2b Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 10 Dec 2018 12:02:47 -0800
Subject: [PATCH 300/873] Do not fail PartitionedCallOp kernel if Grappler
 failed

PiperOrigin-RevId: 224856604
---
 tensorflow/core/kernels/partitioned_function_ops.cc | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc
index ba51db219e..fbecd909be 100644
--- a/tensorflow/core/kernels/partitioned_function_ops.cc
+++ b/tensorflow/core/kernels/partitioned_function_ops.cc
@@ -191,10 +191,12 @@ class PartitionedCallOp : public AsyncOpKernel {
 
         // Run grappler passes on the graph. It is possible that these are
         // optimized by the graph executor already.
-        OP_REQUIRES_OK_ASYNC(ctx,
-                             OptimizeGraph(ctx, fbody->ret_nodes, overlay_lib,
-                                           device_set, cpu_device, &graph),
-                             done);
+        Status optimized = OptimizeGraph(ctx, fbody->ret_nodes, overlay_lib,
+                                         device_set, cpu_device, &graph);
+        if (!optimized.ok()) {
+          LOG(WARNING) << "Grappler optimization failed. Error: "
+                       << optimized.error_message();
+        }
 
         OP_REQUIRES_OK_ASYNC(
             ctx,
-- 
GitLab


From ee418c8ee26a4e816e6acf1954748aac4418e558 Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Mon, 10 Dec 2018 12:28:38 -0800
Subject: [PATCH 301/873] Add attribute to Keras model which generates an
 exportable tf.function. SaveModel save now looks for this attribute when
 searching for a function to export.

PiperOrigin-RevId: 224861089
---
 tensorflow/python/eager/def_function.py       |   4 +
 tensorflow/python/keras/BUILD                 |   1 +
 tensorflow/python/keras/engine/training.py    |   7 +-
 .../python/keras/engine/training_utils.py     |  60 +++++++
 .../keras/engine/training_utils_test.py       | 157 ++++++++++++++++++
 tensorflow/python/saved_model/save.py         |  26 +--
 tensorflow/python/saved_model/save_test.py    |  82 +++------
 7 files changed, 254 insertions(+), 83 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 6bacd7a962..3663d72999 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -342,6 +342,10 @@ class PolymorphicFunction(object):
     """The python function wrapped in this tf.function."""
     return self._python_function
 
+  @property
+  def input_signature(self):
+    return self._input_signature
+
   def get_initialization_function(self, *args, **kwargs):
     """Returns a `Function` object which initializes this function's variables.
 
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 36fea36389..faf58e0d93 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -848,6 +848,7 @@ py_test(
     deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python/saved_model:save_test",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 462694fda6..fe44bc20a1 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -1539,8 +1539,7 @@ class Model(Network):
 
     outputs = nest.flatten(outputs)
     self.outputs = outputs
-    self.output_names = [
-        'output_%d' % (i + 1) for i in range(len(self.outputs))]
+    self.output_names = training_utils.generic_output_names(outputs)
     self.built = True
 
   def fit(self,
@@ -2580,6 +2579,10 @@ class Model(Network):
       batch_size = 32
     return batch_size
 
+  @property
+  def _default_save_signature(self):
+    return training_utils.trace_model_call(self)
+
 
 class DistributedCallbackModel(Model):
   """Model that is used for callbacks with DistributionStrategy."""
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 01a09eb031..ec6b39704a 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -27,9 +27,11 @@ import six
 
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import callbacks as cbks
@@ -1191,3 +1193,61 @@ def get_static_batch_size(layer):
   if batch_input_shape is not None:
     return tensor_shape.as_dimension(batch_input_shape[0]).value
   return None
+
+
+def generic_output_names(outputs_list):
+  return ['output_%d' % (i + 1) for i in range(len(outputs_list))]
+
+
+def trace_model_call(model, input_signature=None):
+  """Trace the model call to create a tf.function for exporting a Keras model.
+
+  Args:
+    model: A Keras model.
+    input_signature: optional, a list of tf.TensorSpec objects specifying the
+      inputs to the model.
+
+  Returns:
+    A tf.function wrapping the model's call function with input signatures set.
+
+  Raises:
+    ValueError: if input signature cannot be inferred from the model.
+  """
+  if input_signature is None:
+    if isinstance(model.call, def_function.PolymorphicFunction):
+      input_signature = model.call.input_signature
+
+  if input_signature is None:
+    try:
+      inputs = model.inputs
+      input_names = model.input_names
+    except AttributeError:
+      raise ValueError(
+          'Model {} cannot be saved because the input shapes have not been '
+          'set. Usually, input shapes are automatically determined from calling'
+          ' .fit() or .predict(). To manually set the shapes, call '
+          'model._set_inputs(inputs).'.format(model))
+    input_specs = []
+    for input_tensor, input_name in zip(inputs, input_names):
+      input_specs.append(tensor_spec.TensorSpec(
+          shape=input_tensor.shape, dtype=input_tensor.dtype,
+          name=input_name))
+    # The input signature of the call function is a list with one element, since
+    # all tensor inputs must be passed in as the first argument.
+    input_signature = [input_specs] if len(input_specs) > 1 else input_specs
+
+  @def_function.function(input_signature=input_signature)
+  def _wrapped_model(*args):
+    """A concrete tf.function that wraps the model's call function."""
+    # When given a single input, Keras models will call the model on the tensor
+    # rather than a list consisting of the single tensor.
+    inputs = args[0] if len(input_signature) == 1 else list(args)
+    outputs_list = nest.flatten(model(inputs=inputs))
+    try:
+      output_names = model.output_names
+    except AttributeError:
+      output_names = generic_output_names(outputs_list)
+    return {name: output for name, output in zip(output_names, outputs_list)}
+
+  return _wrapped_model
+
diff --git a/tensorflow/python/keras/engine/training_utils_test.py b/tensorflow/python/keras/engine/training_utils_test.py
index 44ea23998f..0250e60426 100644
--- a/tensorflow/python/keras/engine/training_utils_test.py
+++ b/tensorflow/python/keras/engine/training_utils_test.py
@@ -18,13 +18,25 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 import numpy as np
 
+from tensorflow.python import keras
 from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils import tf_utils
+from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
+from tensorflow.python.saved_model import save as save_lib
+from tensorflow.python.saved_model import save_test
 
 
 class ModelInputsTest(test.TestCase):
@@ -85,5 +97,150 @@ class ModelInputsTest(test.TestCase):
       self.assertTrue(tf_utils.is_symbolic_tensor(vals['b']))
 
 
+class TraceModelCallTest(keras_parameterized.TestCase):
+
+  def _assert_all_close(self, expected, actual):
+    if not context.executing_eagerly():
+      with self.cached_session() as sess:
+        K._initialize_variables(sess)
+        self.assertAllClose(expected, actual)
+    else:
+      self.assertAllClose(expected, actual)
+
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
+  def test_trace_model_outputs(self):
+    input_dim = 5 if testing_utils.get_model_type() == 'functional' else None
+    model = testing_utils.get_small_mlp(10, 3, input_dim)
+    inputs = array_ops.ones((8, 5))
+
+    if input_dim is None:
+      with self.assertRaisesRegexp(ValueError,
+                                   'input shapes have not been set'):
+        training_utils.trace_model_call(model)
+      model._set_inputs(inputs)
+
+    fn = training_utils.trace_model_call(model)
+    signature_outputs = fn(inputs)
+    expected_outputs = {model.output_names[0]: model(inputs)}
+
+    self._assert_all_close(expected_outputs, signature_outputs)
+
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
+  def test_trace_model_outputs_after_fitting(self):
+    input_dim = 5 if testing_utils.get_model_type() == 'functional' else None
+    model = testing_utils.get_small_mlp(10, 3, input_dim)
+    model.compile(optimizer='sgd', loss='mse')
+    model.fit(x=np.random.random((8, 5)),
+              y=np.random.random((8, 3)), epochs=2)
+
+    inputs = array_ops.ones((8, 5))
+
+    fn = training_utils.trace_model_call(model)
+    signature_outputs = fn(inputs)
+    expected_outputs = {model.output_names[0]: model(inputs)}
+
+    self._assert_all_close(expected_outputs, signature_outputs)
+
+  @keras_parameterized.run_with_all_model_types(exclude_models='sequential')
+  @keras_parameterized.run_all_keras_modes
+  def test_trace_multi_io_model_outputs(self):
+    input_dim = 5
+    num_classes = 3
+    num_classes_b = 4
+    input_a = keras.layers.Input(shape=(input_dim,), name='input_a')
+    input_b = keras.layers.Input(shape=(input_dim,), name='input_b')
+
+    dense = keras.layers.Dense(num_classes, name='dense')
+    dense2 = keras.layers.Dense(num_classes_b, name='dense2')
+    dropout = keras.layers.Dropout(0.5, name='dropout')
+    branch_a = [input_a, dense]
+    branch_b = [input_b, dense, dense2, dropout]
+
+    model = testing_utils.get_multi_io_model(branch_a, branch_b)
+
+    input_a_np = np.random.random((10, input_dim)).astype(np.float32)
+    input_b_np = np.random.random((10, input_dim)).astype(np.float32)
+
+    if testing_utils.get_model_type() == 'subclass':
+      with self.assertRaisesRegexp(ValueError,
+                                   'input shapes have not been set'):
+        training_utils.trace_model_call(model)
+
+    model.compile(optimizer='sgd', loss='mse')
+    model.fit(x=[np.random.random((8, input_dim)).astype(np.float32),
+                 np.random.random((8, input_dim)).astype(np.float32)],
+              y=[np.random.random((8, num_classes)).astype(np.float32),
+                 np.random.random((8, num_classes_b)).astype(np.float32)],
+              epochs=2)
+
+    fn = training_utils.trace_model_call(model)
+    signature_outputs = fn([input_a_np, input_b_np])
+    outputs = model([input_a_np, input_b_np])
+    expected_outputs = {model.output_names[0]: outputs[0],
+                        model.output_names[1]: outputs[1]}
+
+    self._assert_all_close(expected_outputs, signature_outputs)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_specify_input_signature(self):
+    model = testing_utils.get_small_sequential_mlp(10, 3, None)
+    inputs = array_ops.ones((8, 5))
+
+    with self.assertRaisesRegexp(ValueError, 'input shapes have not been set'):
+      training_utils.trace_model_call(model)
+
+    fn = training_utils.trace_model_call(
+        model, [tensor_spec.TensorSpec(shape=[None, 5], dtype=dtypes.float32)])
+    signature_outputs = fn(inputs)
+    expected_outputs = {model.output_names[0]: model(inputs)}
+    self._assert_all_close(expected_outputs, signature_outputs)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_subclassed_model_with_input_signature(self):
+
+    class Model(keras.Model):
+
+      def __init__(self):
+        super(Model, self).__init__()
+        self.dense = keras.layers.Dense(3, name='dense')
+
+      @def_function.function(
+          input_signature=[[tensor_spec.TensorSpec([None, 5], dtypes.float32),
+                            tensor_spec.TensorSpec([None], dtypes.float32)]],)
+      def call(self, inputs, *args):
+        x, y = inputs
+        return self.dense(x) + y
+
+    model = Model()
+    fn = training_utils.trace_model_call(model)
+    x = array_ops.ones((8, 5), dtype=dtypes.float32)
+    y = array_ops.ones((3,), dtype=dtypes.float32)
+    expected_outputs = {'output_1': model([x, y])}
+    signature_outputs = fn([x, y])
+    self._assert_all_close(expected_outputs, signature_outputs)
+
+
+class ModelSaveTest(keras_parameterized.TestCase):
+
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+  def test_model_save(self):
+    input_dim = 5
+    model = testing_utils.get_small_mlp(10, 3, input_dim)
+    inputs = array_ops.ones((8, 5))
+
+    if testing_utils.get_model_type() == 'subclass':
+      model._set_inputs(inputs)
+
+    save_dir = os.path.join(self.get_temp_dir(), 'saved_model')
+    save_lib.save(model, save_dir)
+
+    self.assertAllClose(
+        {model.output_names[0]: model.predict_on_batch(inputs)},
+        save_test._import_and_infer(save_dir,
+                                    {model.input_names[0]: np.ones((8, 5))}))
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index ab6fcb7196..e2726087a5 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -31,7 +31,6 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import meta_graph
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_spec
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -50,28 +49,7 @@ from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
-
-def _check_for_functional_keras_model(root):
-  """Makes an export signature for `root` if it's a functional Keras Model."""
-  # If nothing is decorated yet but this is a functional Keras Model (duck
-  # typed), we'll try to make a signature ourselves.
-  try:
-    inputs = root.inputs
-    input_names = root.input_names
-  except AttributeError:
-    return None
-  input_signature = []
-  for input_tensor, input_name in zip(inputs, input_names):
-    input_signature.append(tensor_spec.TensorSpec(
-        shape=input_tensor.shape, dtype=input_tensor.dtype,
-        name=input_name))
-
-  @def_function.function(input_signature=input_signature)
-  def _wrapped_model(*args):
-    outputs_list = nest.flatten(root(inputs=list(args)))
-    return {name: output for name, output
-            in zip(root.output_names, outputs_list)}
-  return _wrapped_model
+DEFAULT_SIGNATURE_ATTR = "_default_save_signature"
 
 
 def _find_function_to_export(root):
@@ -93,7 +71,7 @@ def _find_function_to_export(root):
       exported_function = attribute_value
       previous_attribute_name = attribute_name
   if exported_function is None:
-    exported_function = _check_for_functional_keras_model(root)
+    exported_function = getattr(root, DEFAULT_SIGNATURE_ATTR, None)
   if exported_function is None:
     raise ValueError(
         ("Exporting an object with no tf.saved_model.save(..., signatures=...) "
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index 97218a98ea..1c6eb1b538 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -21,8 +21,6 @@ from __future__ import print_function
 import os
 import sys
 
-import numpy
-
 from tensorflow.python.client import session as session_lib
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import def_function
@@ -32,12 +30,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
-from tensorflow.python.keras.engine import input_layer
-from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.layers import core
-from tensorflow.python.keras.layers import merge
 from tensorflow.python.lib.io import file_io
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import lookup_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
@@ -50,10 +44,9 @@ from tensorflow.python.training.checkpointable import tracking
 from tensorflow.python.training.checkpointable import util
 
 
-class _ModelWithOptimizer(training.Model):
+class _ModelWithOptimizer(util.Checkpoint):
 
   def __init__(self):
-    super(_ModelWithOptimizer, self).__init__()
     self.dense = core.Dense(1)
     self.optimizer = adam.AdamOptimizer(0.01)
 
@@ -63,7 +56,7 @@ class _ModelWithOptimizer(training.Model):
   def call(self, x, y):
     with backprop.GradientTape() as tape:
       loss = math_ops.reduce_mean((self.dense(x) - y) ** 2.)
-    trainable_variables = self.trainable_variables
+    trainable_variables = self.dense.trainable_variables
     gradients = tape.gradient(loss, trainable_variables)
     self.optimizer.apply_gradients(zip(gradients, trainable_variables))
     return {"loss": loss}
@@ -179,10 +172,10 @@ class SaveTest(test.TestCase):
     x = constant_op.constant([[3., 4.]])
     y = constant_op.constant([2.])
     model = _ModelWithOptimizer()
-    first_loss = model(x, y)
+    first_loss = model.call(x, y)
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
     save.save(model, save_dir, model.call)
-    second_loss = model(x, y)
+    second_loss = model.call(x, y)
     self.assertNotEqual(first_loss, second_loss)
     self.assertAllClose(
         second_loss,
@@ -197,7 +190,7 @@ class SaveTest(test.TestCase):
     model = _ModelWithOptimizer()
     x = constant_op.constant([[3., 4.]])
     y = constant_op.constant([2.])
-    model(x, y)
+    model.call(x, y)
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
     save.save(model, save_dir)
     self.assertIn("loss",
@@ -217,25 +210,40 @@ class SaveTest(test.TestCase):
     model = _ModelWithOptimizer()
     x = constant_op.constant([[3., 4.]])
     y = constant_op.constant([2.])
-    model(x, y)
+    model.call(x, y)
     model.second_function = def_function.function(lambda: 1.)
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
     with self.assertRaisesRegexp(ValueError, "call.*second_function"):
       save.save(model, save_dir)
 
-  def test_subclassed_no_signature(self):
+  def test_no_signature(self):
 
-    class Subclassed(training.Model):
+    class Model(util.Checkpoint):
 
       def call(self, inputs):
         return inputs * 2.
 
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
-    model = Subclassed()
+    model = Model()
     with self.assertRaisesRegexp(
         ValueError, "no @tf.function-decorated methods"):
       save.save(model, save_dir)
 
+  def test_find_default_save_function(self):
+
+    class ObjWithDefaultSignature(util.Checkpoint):
+
+      @def_function.function(input_signature=[tensor_spec.TensorSpec(
+          shape=None, dtype=dtypes.float32)])
+      def _default_save_signature(self, x):
+        return x + x + 1
+
+    obj = ObjWithDefaultSignature()
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save.save(obj, save_dir)
+    self.assertAllClose(
+        {"output_0": 7.}, _import_and_infer(save_dir, {"x": 3.}))
+
   def test_docstring(self):
 
     class Adder(util.Checkpoint):
@@ -276,46 +284,6 @@ class SaveTest(test.TestCase):
       self.assertNotIn("T", complex_node.attr)
       self.assertNotIn("Tout", complex_node.attr)
 
-  def test_export_functional_keras_model(self):
-    x = input_layer.Input((4,), name="x")
-    y = core.Dense(4, name="out")(x)
-    model = training.Model(x, y)
-    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
-    save.save(model, save_dir)
-    self.assertAllClose(
-        {"out": model(array_ops.ones([1, 4]))},
-        _import_and_infer(save_dir, {"x": [[1., 1., 1., 1.]]}))
-
-  @test_util.run_v1_only("b/120545219")
-  def test_export_functional_keras_model_after_fit(self):
-    x = input_layer.Input((1,))
-    y = core.Dense(1, name="y")(x)
-    model = training.Model(x, y)
-    model.compile(optimizer="sgd", loss="mse")
-    model.fit(x=numpy.array([[1.]]),
-              y=numpy.array([2.]), epochs=2)
-    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
-    save.save(model, save_dir)
-    self.assertAllClose(
-        {"y": model(constant_op.constant([[1.], [2.]]))},
-        _import_and_infer(save_dir, {"input_1": [[1.], [2.]]}))
-
-  def test_export_multi_input_functional_keras_model(self):
-    x1 = input_layer.Input((2,), name="x1")
-    x2 = input_layer.Input((2,), name="x2")
-    y1 = core.Dense(4)(merge.Add()([x1, x2]))
-    y2 = core.Dense(4)(merge.Multiply()([x1, x2]))
-    model = training.Model([x1, x2], [y1, y2])
-    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
-    save.save(model, save_dir)
-    outputs = model([array_ops.ones([1, 2]), 2. * array_ops.ones([1, 2])])
-    self.assertAllClose(
-        {"dense": outputs[0], "dense_1": outputs[1]},
-        _import_and_infer(
-            save_dir,
-            {"x1": [[1., 1.]],
-             "x2": [[2., 2.]]}))
-
 
 class AssetTests(test.TestCase):
 
@@ -376,7 +344,7 @@ class MemoryTests(test.TestCase):
   def test_no_reference_cycles(self):
     x = constant_op.constant([[3., 4.]])
     y = constant_op.constant([2.])
-    self._model(x, y)
+    self._model.call(x, y)
     if sys.version_info[0] < 3:
       # TODO(allenl): debug reference cycles in Python 2.x
       self.skipTest("This test only works in Python 3+. Reference cycles are "
-- 
GitLab


From 1d54cbf4a2252215c5d2ce9accb5e498a7c2a704 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 10 Dec 2018 12:35:24 -0800
Subject: [PATCH 302/873] Introduce consolidated ENABLE_CONTROL_FLOW_V2 flag.

The new toggle replaces ENABLE_COND_V2, ENABLE_WHILE_V2, and
ENABLE_TENSOR_ARRAY_V2. This means that these can't be toggled
independently anymore, notably that v1 TensorArrays can only be run
with v1 loops, and v2 TensorArrays with v2 loops.

This also introduces a corresponding environment variable
TF_ENABLE_CONTROL_FLOW_V2. I kept the old env vars as well in case
people are using them. They all flip the new single toggle now.

In addition, this change removes some while_v2 code for dealing with
v1 TensorArrays, since this is no longer a supported configuration.

PiperOrigin-RevId: 224862245
---
 .../kernel_tests/map_and_batch_test.py        |  7 ++-
 tensorflow/python/framework/test_util.py      | 41 ++-----------
 .../kernel_tests/control_flow_ops_py_test.py  | 22 +++----
 .../kernel_tests/control_flow_util_v2_test.py | 10 ++--
 .../kernel_tests/tensor_array_ops_test.py     | 42 +++++---------
 tensorflow/python/ops/control_flow_ops.py     |  9 +--
 .../python/ops/control_flow_ops_benchmark.py  | 25 ++++----
 tensorflow/python/ops/control_flow_util.py    |  8 +++
 tensorflow/python/ops/tensor_array_ops.py     |  9 +--
 tensorflow/python/ops/while_v2.py             | 57 ++++---------------
 10 files changed, 75 insertions(+), 155 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index 5c115f7ae3..a8a65dde13 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -32,6 +32,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
@@ -500,10 +501,10 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testMapAndBatchControlFlow(self, numa_aware):
 
     def map_fn(x):
-      previous_cond_v2_value = control_flow_ops.ENABLE_COND_V2
-      control_flow_ops.ENABLE_COND_V2 = True
+      previous_control_flow_v2_value = control_flow_util.ENABLE_CONTROL_FLOW_V2
+      control_flow_util.ENABLE_CONTROL_FLOW_V2 = True
       return_value = control_flow_ops.cond(x < 50, lambda: x + 1, lambda: x * x)
-      control_flow_ops.ENABLE_COND_V2 = previous_cond_v2_value
+      control_flow_util.ENABLE_CONTROL_FLOW_V2 = previous_control_flow_v2_value
       return return_value
 
     dataset = dataset_ops.Dataset.range(100).apply(
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index df3cebd2e0..0e48d3c875 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -67,9 +67,8 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import versions
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import script_ops
-from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 from tensorflow.python.platform import tf_logging as logging
@@ -409,42 +408,12 @@ def enable_control_flow_v2(fn):
   """
 
   def wrapper(*args, **kwargs):
-    enable_cond_v2_old = control_flow_ops.ENABLE_COND_V2
-    enable_while_v2_old = control_flow_ops.ENABLE_WHILE_V2
-    enable_tensor_array_v2_old = tensor_array_ops.ENABLE_TENSOR_ARRAY_V2
-    control_flow_ops.ENABLE_COND_V2 = True
-    control_flow_ops.ENABLE_WHILE_V2 = True
-    tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 = True
+    enable_control_flow_v2_old = control_flow_util.ENABLE_CONTROL_FLOW_V2
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = True
     try:
       fn(*args, **kwargs)
     finally:
-      control_flow_ops.ENABLE_COND_V2 = enable_cond_v2_old
-      control_flow_ops.ENABLE_WHILE_V2 = enable_while_v2_old
-      tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 = enable_tensor_array_v2_old
-
-  return wrapper
-
-
-def enable_tensor_array_v2(fn):
-  """Decorator for enabling _GraphTensorArrayV2 on a test.
-
-  Note this enables _GraphTensorArrayV2 after running the test class's
-  setup/teardown methods.
-
-  Args:
-    fn: the function to be wrapped
-
-  Returns:
-    The wrapped function
-  """
-
-  def wrapper(*args, **kwargs):
-    enable_tensor_array_v2_old = tensor_array_ops.ENABLE_TENSOR_ARRAY_V2
-    tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 = True
-    try:
-      fn(*args, **kwargs)
-    finally:
-      tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 = enable_tensor_array_v2_old
+      control_flow_util.ENABLE_CONTROL_FLOW_V2 = enable_control_flow_v2_old
 
   return wrapper
 
@@ -493,7 +462,7 @@ def with_control_flow_v2(cls):
   Returns:
     cls with new test methods added
   """
-  if control_flow_ops.ENABLE_WHILE_V2 and control_flow_ops.ENABLE_COND_V2:
+  if control_flow_util.ENABLE_CONTROL_FLOW_V2:
     return cls
 
   for name, value in cls.__dict__.copy().items():
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 0fd293ebba..21ded25a11 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -43,6 +43,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gen_array_ops
@@ -700,7 +701,8 @@ class ControlFlowTest(test.TestCase):
       v1_msg = "The two structures don't have the same nested structure"
       v2_msg = "Outputs of true_fn and false_fn must have the same structure"
       with self.assertRaisesRegexp(
-          ValueError, v2_msg if control_flow_ops.ENABLE_COND_V2 else v1_msg):
+          ValueError,
+          v2_msg if control_flow_util.ENABLE_CONTROL_FLOW_V2 else v1_msg):
         r = control_flow_ops.cond(pred, fn1, fn2)
         self.evaluate(r)
 
@@ -859,7 +861,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(sess.run(grad, {pred: False, x: 1.0, y: 2.0}), 0.0)
 
       # v1 control flow gets None second derivative for some reason.
-      if not control_flow_ops.ENABLE_COND_V2:
+      if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
         self.assertIsNone(grad_grad)
         return
 
@@ -949,7 +951,7 @@ class ControlFlowTest(test.TestCase):
 
     # In defuns, all prints should execute in program order.
     # This doesn't work with legacy control flow.
-    if control_flow_ops.ENABLE_COND_V2:
+    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
 
       @eager_function.defun
       def cond():
@@ -1003,7 +1005,7 @@ class ControlFlowTest(test.TestCase):
 
     # In defuns, all prints should execute in program order.
     # This doesn't work with legacy control flow.
-    if control_flow_ops.ENABLE_WHILE_V2:
+    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
 
       @eager_function.defun
       def while_loop():
@@ -1161,7 +1163,7 @@ class ControlFlowTest(test.TestCase):
     gs = gradients_impl.gradients(loop_no_xla, v)
     self.evaluate(gs)  # This should execute without error.
 
-    if control_flow_ops.ENABLE_WHILE_V2:
+    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
       xla_context = control_flow_ops.XLAControlFlowContext()
       xla_context.Enter()
       with self.assertRaisesRegexp(
@@ -1219,7 +1221,7 @@ class ControlFlowTest(test.TestCase):
           lambda i, x: (i + 1, v * x), (0, 1.0),
           maximum_iterations=max_iter_holder[0])
 
-    if control_flow_ops.ENABLE_WHILE_V2:
+    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
       xla_context = control_flow_ops.XLAControlFlowContext()
       xla_context.Enter()
       with self.assertRaisesRegexp(
@@ -1863,7 +1865,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(sess.run(grad, {pred: True}), 8.0)
       self.assertEqual(sess.run(grad, {pred: False}), 0.0)
 
-      if not control_flow_ops.ENABLE_WHILE_V2:
+      if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
         return
 
       self.assertEqual(sess.run(grad_grad, {pred: True}), 0.0)
@@ -2399,7 +2401,7 @@ class ControlFlowTest(test.TestCase):
     #   outer_loop(x) = g(g(x)) = 4x + 81
     #   outer_loop'(x) = 4
     # Note that v1 control flow gets 4.0 as well if the cond is removed.
-    if control_flow_ops.ENABLE_WHILE_V2 and control_flow_ops.ENABLE_COND_V2:
+    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
       self.assertEqual(grad, 4.0)
 
   def testWhile_NestedInput(self):
@@ -2982,7 +2984,7 @@ class ControlFlowTest(test.TestCase):
 
     result = functional_ops.scan(fn, np.array([1., 2., 3.], dtype=np.float32))
     grad_theta = gradients_impl.gradients(result, theta)
-    if not control_flow_ops.ENABLE_WHILE_V2:
+    if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
       with self.assertRaisesRegexp(TypeError, "Second-order gradient"):
         gradients_impl.gradients(grad_theta, theta)
     grad_theta_stopped = array_ops.stop_gradient(grad_theta)
@@ -3514,7 +3516,7 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(r[1].eval(), 65536.0)
       self.assertEqual(grad.eval(), 524288.0)
       # while_v2 does not have stacks.
-      if not control_flow_ops.ENABLE_WHILE_V2:
+      if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
         self.assertEqual(
             len([op for op in x.graph.get_operations() if op.type == "StackV2"
                 ]), 1)
diff --git a/tensorflow/python/kernel_tests/control_flow_util_v2_test.py b/tensorflow/python/kernel_tests/control_flow_util_v2_test.py
index d0374a7700..08d3214e28 100644
--- a/tensorflow/python/kernel_tests/control_flow_util_v2_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_util_v2_test.py
@@ -23,6 +23,7 @@ from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import control_flow_util_v2
 from tensorflow.python.platform import test
 
@@ -30,14 +31,11 @@ from tensorflow.python.platform import test
 class ControlFlowUtilV2Test(test.TestCase):
 
   def setUp(self):
-    self._enable_cond_v2_old = control_flow_ops.ENABLE_COND_V2
-    self._enable_while_v2_old = control_flow_ops.ENABLE_WHILE_V2
-    control_flow_ops.ENABLE_COND_V2 = True
-    control_flow_ops.ENABLE_WHILE_V2 = True
+    self._enable_control_flow_v2_old = control_flow_util.ENABLE_CONTROL_FLOW_V2
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = True
 
   def tearDown(self):
-    control_flow_ops.ENABLE_COND_V2 = self._enable_cond_v2_old
-    control_flow_ops.ENABLE_WHILE_V2 = self._enable_while_v2_old
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = self._enable_control_flow_v2_old
 
   def _create_control_flow(self, expect_in_defun):
     """Helper method for testInDefun."""
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 88625841bc..6d8e3e8356 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -32,6 +32,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import gen_data_flow_ops
 from tensorflow.python.ops import gradients_impl
@@ -345,7 +346,7 @@ class TensorArrayTest(test.TestCase):
 
   @test_util.run_deprecated_v1
   def testSkipEagerTensorArrayGradGrad(self):
-    if not tensor_array_ops.ENABLE_TENSOR_ARRAY_V2:
+    if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
       self.skipTest("Legacy TensorArray does not support double derivatives.")
     with self.test_session(use_gpu=True) as session:
       x = constant_op.constant(4.0)
@@ -429,7 +430,7 @@ class TensorArrayTest(test.TestCase):
     with self.session(use_gpu=True):
       ta = _make_ta(3, "foo", dtype=dtypes.float32)
       # Test writing the wrong datatype
-      if (tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 and
+      if (control_flow_util.ENABLE_CONTROL_FLOW_V2 and
           not context.executing_eagerly()):
         error_msg = ("Invalid data types; op elements string but list elements "
                      "float")
@@ -440,7 +441,7 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaisesOpError(error_msg):
         self.evaluate(ta.write(0, "wrong_type_scalar").flow)
 
-      if (tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 and
+      if (control_flow_util.ENABLE_CONTROL_FLOW_V2 and
           not context.executing_eagerly()):
         error_msg = "Trying to modify element -1 in a list with 3 elements."
       else:
@@ -448,7 +449,7 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaisesOpError(error_msg):
         self.evaluate(ta.write(-1, 3.0).flow)
 
-      if (tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 and
+      if (control_flow_util.ENABLE_CONTROL_FLOW_V2 and
           not context.executing_eagerly()):
         error_msg = "Trying to modify element 3 in a list with 3 elements"
       else:
@@ -467,14 +468,14 @@ class TensorArrayTest(test.TestCase):
 
       # Test reading wrong datatype (only possible when constructing graphs).
       if (not context.executing_eagerly() and
-          not tensor_array_ops.ENABLE_TENSOR_ARRAY_V2):
+          not control_flow_util.ENABLE_CONTROL_FLOW_V2):
         r0_bad = gen_data_flow_ops.tensor_array_read_v3(
             handle=w0.handle, index=0, dtype=dtypes.float64, flow_in=w0.flow)
         with self.assertRaisesOpError(
             "TensorArray dtype is float but Op requested dtype double."):
           self.evaluate(r0_bad)
 
-      if (tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 and
+      if (control_flow_util.ENABLE_CONTROL_FLOW_V2 and
           not context.executing_eagerly()):
         error_msg = "Trying to access element -1 in a list with 3 elements."
       else:
@@ -483,7 +484,7 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaisesOpError(error_msg):
         self.evaluate(ta.read(-1))
 
-      if (tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 and
+      if (control_flow_util.ENABLE_CONTROL_FLOW_V2 and
           not context.executing_eagerly()):
         error_msg = "Trying to access element 3 in a list with 3 elements."
       else:
@@ -550,7 +551,7 @@ class TensorArrayTest(test.TestCase):
           ta.split([1.0, 2.0, 3.0], lengths).flow.eval(feed_dict={lengths: 1})
 
       error_msg = ("Unused values in tensor. Length of tensor: 3 Values used: 1"
-                   if tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 and
+                   if control_flow_util.ENABLE_CONTROL_FLOW_V2 and
                    not in_eager_mode else
                    r"Expected sum of lengths to be equal to values.shape\[0\], "
                    r"but sum of lengths is 1 and value's shape is: \[3\]")
@@ -558,7 +559,7 @@ class TensorArrayTest(test.TestCase):
         self.evaluate(ta.split([1.0, 2.0, 3.0], [1]).flow)
 
       ta = _make_ta(1, "baz")
-      if tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 and not in_eager_mode:
+      if control_flow_util.ENABLE_CONTROL_FLOW_V2 and not in_eager_mode:
         with self.assertRaisesRegexp(
             ValueError, "Shape must be at least rank 1 but is rank 0"):
           self.evaluate(ta.split(1.0, [1]).flow)
@@ -568,7 +569,7 @@ class TensorArrayTest(test.TestCase):
         ):
           self.evaluate(ta.split(1.0, [1]).flow)
 
-      if not tensor_array_ops.ENABLE_TENSOR_ARRAY_V2 or in_eager_mode:
+      if not control_flow_util.ENABLE_CONTROL_FLOW_V2 or in_eager_mode:
         ta = _make_ta(2, "buz")
         with self.assertRaisesOpError(
             r"TensorArray's size is not equal to the size of lengths "
@@ -1003,21 +1004,6 @@ class TensorArrayTest(test.TestCase):
     # self._testWhileLoopWritePackGradients(
     #     dynamic_size=False, dtype=tf.int64)
 
-  @test_util.disable_control_flow_v2("Testing v1 while_loop with v2 TA")
-  @test_util.enable_tensor_array_v2
-  def testWhileLoopV1WithTensorArrayV2(self):
-    size = 3
-    ta = tensor_array_ops.TensorArray(
-        dtype=dtypes.int32, size=size, element_shape=tensor_shape.scalar())
-
-    def Body(counter, ta):
-      return counter + 1, ta.write(counter, counter)
-
-    _, ta = control_flow_ops.while_loop(lambda i, _: i < size, Body, [0, ta])
-
-    for i in range(size):
-      self.assertEqual(self.evaluate(ta.read(i)), i)
-
   @test_util.disable_control_flow_v2("b/117943489 (dynamic_size)")
   @test_util.run_v1_only("b/117943489")
   def testSkipEagerWhileLoopDynamicWritePackGradients(self):
@@ -1270,7 +1256,7 @@ class TensorArrayTest(test.TestCase):
         self.assertEqual((2, 2), w0.read(1).get_shape())
       else:
         self.assertEqual(r0.get_shape().ndims, None)
-        if not tensor_array_ops.ENABLE_TENSOR_ARRAY_V2:
+        if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
           self.assertEqual(
               tensor_shape.TensorShape(
                   ta1.handle.op.get_attr("element_shape")).ndims, None)
@@ -1347,8 +1333,8 @@ class TensorArrayTest(test.TestCase):
           "TensorArray has size zero, but element shape <unknown> is not "
           "fully defined. Currently only static shapes are supported when "
           "packing zero-size TensorArrays.")
-      with self.assertRaisesOpError(v2_msg if tensor_array_ops
-                                    .ENABLE_TENSOR_ARRAY_V2 else v1_msg):
+      with self.assertRaisesOpError(
+          v2_msg if control_flow_util.ENABLE_CONTROL_FLOW_V2 else v1_msg):
         ta.stack().eval()
 
   @test_util.run_v1_only("b/120545219")
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index b7e50c1dae..99216d7fb1 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -24,13 +24,11 @@ from __future__ import print_function
 import abc
 import collections
 import functools
-import os
 
 import six
 
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.protobuf import control_flow_pb2
-from tensorflow.python import tf2
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -71,9 +69,6 @@ cond_v2 = LazyLoader("cond_v2", globals(),
 while_v2 = LazyLoader("while_v2", globals(),
                       "tensorflow.python.ops.while_v2")
 
-ENABLE_COND_V2 = tf2.enabled() or os.getenv("TF_ENABLE_COND_V2", "0") != "0"
-ENABLE_WHILE_V2 = tf2.enabled() or os.getenv("TF_ENABLE_WHILE_V2", "0") != "0"
-
 # We override the 'tuple' for a control flow op, so we keep python's
 # existing 'tuple' for later use in this module.
 _basetuple = tuple
@@ -2052,7 +2047,7 @@ def cond(pred,
   ```
 
   """
-  if ENABLE_COND_V2 and not context.executing_eagerly():
+  if util.ENABLE_CONTROL_FLOW_V2 and not context.executing_eagerly():
     return cond_v2.cond_v2(pred, true_fn, false_fn, name)
 
   # We needed to make true_fn/false_fn keyword arguments for
@@ -3487,7 +3482,7 @@ def while_loop(cond,
   ```
 
   """
-  if ENABLE_WHILE_V2 and not context.executing_eagerly():
+  if util.ENABLE_CONTROL_FLOW_V2 and not context.executing_eagerly():
     return while_v2.while_loop(
         cond,
         body,
diff --git a/tensorflow/python/ops/control_flow_ops_benchmark.py b/tensorflow/python/ops/control_flow_ops_benchmark.py
index 9ba5ff2c0f..9dd1e6673b 100644
--- a/tensorflow/python/ops/control_flow_ops_benchmark.py
+++ b/tensorflow/python/ops/control_flow_ops_benchmark.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
@@ -94,28 +95,28 @@ class CondWithManyIntermediatesBenchmark(test.Benchmark):
               iters=self.NUM_ITERS)
 
   def benchmark_cond_v1_defun(self):
-    old_val = control_flow_ops.ENABLE_COND_V2
-    control_flow_ops.ENABLE_COND_V2 = False
+    old_val = control_flow_util.ENABLE_CONTROL_FLOW_V2
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = False
     self._benchmark_defun()
-    control_flow_ops.ENABLE_COND_V2 = old_val
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = old_val
 
   def benchmark_cond_v2_defun(self):
-    old_val = control_flow_ops.ENABLE_COND_V2
-    control_flow_ops.ENABLE_COND_V2 = True
+    old_val = control_flow_util.ENABLE_CONTROL_FLOW_V2
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = True
     self._benchmark_defun()
-    control_flow_ops.ENABLE_COND_V2 = old_val
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = old_val
 
   def benchmark_cond_v1_graph(self):
-    old_val = control_flow_ops.ENABLE_COND_V2
-    control_flow_ops.ENABLE_COND_V2 = False
+    old_val = control_flow_util.ENABLE_CONTROL_FLOW_V2
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = False
     self._benchmark_graph()
-    control_flow_ops.ENABLE_COND_V2 = old_val
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = old_val
 
   def benchmark_cond_v2_graph(self):
-    old_val = control_flow_ops.ENABLE_COND_V2
-    control_flow_ops.ENABLE_COND_V2 = True
+    old_val = control_flow_util.ENABLE_CONTROL_FLOW_V2
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = True
     self._benchmark_graph()
-    control_flow_ops.ENABLE_COND_V2 = old_val
+    control_flow_util.ENABLE_CONTROL_FLOW_V2 = old_val
 
 if __name__ == "__main__":
   ops.enable_eager_execution()
diff --git a/tensorflow/python/ops/control_flow_util.py b/tensorflow/python/ops/control_flow_util.py
index cb628f4aa6..1747f06109 100644
--- a/tensorflow/python/ops/control_flow_util.py
+++ b/tensorflow/python/ops/control_flow_util.py
@@ -23,10 +23,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import traceback
 
+from tensorflow.python import tf2
 from tensorflow.python.platform import tf_logging as logging
 
+ENABLE_CONTROL_FLOW_V2 = (tf2.enabled() or
+                          os.getenv("TF_ENABLE_CONTROL_FLOW_V2", "0") != "0" or
+                          os.getenv("TF_ENABLE_COND_V2", "0") != "0" or
+                          os.getenv("TF_ENABLE_WHILE_V2", "0") != "0" or
+                          os.getenv("TF_ENABLE_TENSOR_ARRAY_V2", "0") != "0")
+
 
 def IsInXLAContext(op):
   try:
diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index d151694951..85333ee6b5 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py
@@ -20,10 +20,8 @@ from __future__ import division
 from __future__ import print_function
 
 import contextlib
-import os
 import weakref
 
-from tensorflow.python import tf2
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -32,6 +30,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import gen_control_flow_ops
 from tensorflow.python.ops import gen_data_flow_ops
 from tensorflow.python.ops import list_ops
@@ -40,10 +39,6 @@ from tensorflow.python.util import tf_should_use
 from tensorflow.python.util.tf_export import tf_export
 
 
-ENABLE_TENSOR_ARRAY_V2 = (
-    tf2.enabled() or os.getenv("TF_ENABLE_TENSOR_ARRAY_V2") is not None)
-
-
 # _GraphTensorArray accesses many of the hidden generated ops, but is in
 # fact built to wrap these methods.
 # pylint: disable=protected-access
@@ -1013,7 +1008,7 @@ class TensorArray(object):
     if context.executing_eagerly():
       implementation = _EagerTensorArray
     else:
-      if ENABLE_TENSOR_ARRAY_V2:
+      if control_flow_util.ENABLE_CONTROL_FLOW_V2:
         implementation = _GraphTensorArrayV2
       else:
         implementation = _GraphTensorArray
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index d00c158d15..f7566bac9b 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -52,13 +52,6 @@ from tensorflow.python.util import nest
 # to them and then pass those in as data inputs. This should probably be
 # handled in the CapturingGraph itself.
 
-# Op types that output a resource tensor representing a TensorArray handle.
-TENSOR_ARRAY_HANDLE_OPS = (
-    "TensorArrayV3",
-    "TensorArrayGradV3",
-    "TensorArrayGradWithShape",
-)
-
 
 def while_loop(cond,
                body,
@@ -257,24 +250,19 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
       "_maximum_iterations") if _is_in_xla_context() else None
   assert not _is_in_xla_context() or maximum_iterations is not None
 
-  # Set the incoming gradient of TensorArray handles to None. The gradient
-  # implementation currently assumes all resource tensors correspond to float32
-  # ResourceVariables, which can lead to runtime shape errors when used with a
-  # TensorArray. This is a workaround until TensorArrays are reimplemented with
-  # TensorLists instead of resources.
-  # Also set the incoming gradient of non-trainable inputs to None. It is
-  # possible that we receive non-None gradients for non-trainable types in
-  # nested while loops because we accumulate outputs of the inner while as
-  # variant tensors which are trainable and hence receive zeros_like tensors in
-  # the gradient pass. The non-trainable tensors then receive the popped zeros
-  # tensor from this zeros variant. The gradient for the loop vars corresponding
-  # to these tensors is None or zeros (this happens only if the loop var is
-  # accumulated as well) in _grad_fn so we reset these.
+  # Set the incoming gradient of non-trainable inputs to None. It is possible
+  # that we receive non-None gradients for non-trainable types in nested while
+  # loops because we accumulate outputs of the inner while as variant tensors
+  # which are trainable and hence receive zeros_like tensors in the gradient
+  # pass. The non-trainable tensors then receive the popped zeros tensor from
+  # this zeros variant. The gradient for the loop vars corresponding to these
+  # tensors is None or zeros (this happens only if the loop var is accumulated
+  # as well) in _grad_fn so we reset these.
   # TODO(b/118712257): Remove the IsTrainable filter once we can handle None
   # output grads in _grad_fn.
   grads = [
-      None if _is_tensor_array_handle(output) or not _is_trainable(output)
-      else grad for grad, output in zip(grads, body_graph.outputs)
+      None if not _is_trainable(output) else grad
+      for grad, output in zip(grads, body_graph.outputs)
   ]
 
   # Ensure that all non-resource trainable outputs have incoming gradients.
@@ -339,8 +327,7 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
   # See comment in while_loop.
   outputs = [array_ops.identity(t) for t in outputs]
 
-  # Set None as the output gradient for tensors with None input gradient
-  # e.g. TensorArray handles.
+  # Set None as the output gradient for tensors with None input gradient.
   # outputs[0] is the loop counter.
   # outputs[1] is the total number of loop iterations.
   index = 2
@@ -853,28 +840,6 @@ def _graph_name(graph):
   return "Base"
 
 
-def _is_tensor_array_handle(tensor):
-  """Returns whether tensor is a TensorArray handle."""
-  if tensor.dtype != dtypes.resource:
-    return False
-
-  if tensor.op.type == "While":
-    # We assume that any resource outputs of a While op correspond to a captured
-    # resource input (as opposed to a loop variable specified by the user).
-    # NOTE(skyewm): we could actually check this, but I can't think of when you
-    # would have a resource loop variable.
-    tensor = tensor.op.inputs[tensor.value_index]
-
-  # TODO(b/118452219): add test coverage for this.
-  tensor = func_graph_module.maybe_captured(tensor)
-
-  if isinstance(tensor, ops.EagerTensor):
-    # Eager execution doesn't quite support legacy tensorarray
-    return False
-
-  return tensor.op.type in TENSOR_ARRAY_HANDLE_OPS
-
-
 def _pack_sequence_as(structure_with_tas, loop_vars):
   """Like `nest.pack_sequence_as` but also replaces flows with TensorArrays."""
 
-- 
GitLab


From b51d81f87f5de3c26b2db59ae6ec6b5f963acd7d Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 10 Dec 2018 12:42:26 -0800
Subject: [PATCH 303/873] Update the default activation function for unified
 LSTM to 'sigmoid'.

I believe for historical reason, the activation function for LSTM is hard_sigmoid because it is faster compare to sigmoid. With the new LSTM, the performance issue should be fixed with grappler swapping the backend.

PiperOrigin-RevId: 224863406
---
 tensorflow/python/keras/layers/recurrent.py   | 17 ++++++------
 .../python/keras/layers/unified_lstm_test.py  | 27 ++++++++++---------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 86a69e45d9..fb4c1736b1 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -2546,13 +2546,11 @@ class UnifiedLSTM(LSTM):
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
-        Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation
-          is applied
-        (ie. "linear" activation: `a(x) = x`).
+      Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation
+      is applied (ie. "linear" activation: `a(x) = x`).
     recurrent_activation: Activation function to use for the recurrent step.
-        Default: hard sigmoid (`hard_sigmoid`). If you pass `None`, no
-          activation is applied
-        (ie. "linear" activation: `a(x) = x`).
+      Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
+      applied (ie. "linear" activation: `a(x) = x`).
     use_bias: Boolean, whether the layer uses a bias vector.
     kernel_initializer: Initializer for the `kernel` weights matrix, used for
       the linear transformation of the inputs..
@@ -2602,7 +2600,7 @@ class UnifiedLSTM(LSTM):
   def __init__(self,
                units,
                activation='tanh',
-               recurrent_activation='hard_sigmoid',
+               recurrent_activation='sigmoid',
                use_bias=True,
                kernel_initializer='glorot_uniform',
                recurrent_initializer='orthogonal',
@@ -2663,8 +2661,9 @@ class UnifiedLSTM(LSTM):
     self._num_inputs = None
     self._dropout_mask = None
     self.could_use_cudnn = (
-        activation == 'tanh' and recurrent_dropout == 0 and
-        not unroll and use_bias and bias_regularizer is None)
+        activation == 'tanh' and recurrent_activation == 'sigmoid' and
+        recurrent_dropout == 0 and not unroll and use_bias and
+        bias_regularizer is None)
 
   def call(self, inputs, mask=None, training=None, initial_state=None):
     # LSTM does not support constants. Ignore it during process.
diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 932b2d331d..a2b523b00e 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -161,17 +161,20 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         existing_loss = loss_value
 
   @parameterized.named_parameters(
-      ('_non_tan_activation', 'relu', 0, False, True, None),
-      ('_use_recurrent_dropout', 'tanh', 0.1, False, True, None),
-      ('_unroll', 'tanh', 0, True, True, None),
-      ('_not_use_bias', 'tanh', 0, False, False, None),
-      ('_use_bias_regularizer', 'tanh', 0, False, True, 'l2')
+      ('non_tan_activation', 'relu', 'sigmoid', 0, False, True, None),
+      ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True, None),
+      ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True, None),
+      ('unroll', 'tanh', 'sigmoid', 0, True, True, None),
+      ('not_use_bias', 'tanh', 'sigmoid', 0, False, False, None),
+      ('use_bias_regularizer', 'tanh', 'sigmoid', 0, False, True, 'l2')
   )
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_could_use_defun_backend(self, activation, recurrent_dropout,
-                                   unroll, use_bias, bias_regularizer):
+  def test_could_use_defun_backend(self, activation, recurrent_activation,
+                                   recurrent_dropout, unroll, use_bias,
+                                   bias_regularizer):
     layer = UnifiedLSTM(1,
                         activation=activation,
+                        recurrent_activation=recurrent_activation,
                         recurrent_dropout=recurrent_dropout,
                         unroll=unroll,
                         use_bias=use_bias,
@@ -270,22 +273,22 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     inputs = keras.layers.Input(
         shape=[timestep, input_shape], dtype=dtypes.float32)
     with test_util.device(use_gpu=False):
-      # Note that CuDNN use 'sigmoid' as activation. Force the CPU
-      # implementation to use 'sigmoid' so that it will generate same output as
-      # CuDNN implementation.
-      layer = UnifiedLSTM(rnn_state_size, recurrent_activation='sigmoid')
+      layer = UnifiedLSTM(rnn_state_size)
       output = layer(inputs)
       cpu_model = keras.models.Model(inputs, output)
       weights = cpu_model.get_weights()
       y_1 = cpu_model.predict(x_train)
 
     with test_util.device(use_gpu=True):
-      layer = UnifiedLSTM(rnn_state_size, recurrent_activation='sigmoid')
+      layer = UnifiedLSTM(rnn_state_size)
       output = layer(inputs)
       gpu_model = keras.models.Model(inputs, output)
       gpu_model.set_weights(weights)
       y_2 = gpu_model.predict(x_train)
 
+    # Note that CuDNN uses 'sigmoid' as activation, so the unified LSTM uses
+    # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve
+    # the same output.
     with test_util.device(use_gpu=True):
       layer = keras.layers.LSTM(rnn_state_size, recurrent_activation='sigmoid')
       output = layer(inputs)
-- 
GitLab


From 4bc66cd75aa040b05f744a3ed805afe6032f1848 Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Mon, 10 Dec 2018 12:44:23 -0800
Subject: [PATCH 304/873] Add TF_DefaultThreadOptions, TF_StartThread and
 TF_JoinThread.

PiperOrigin-RevId: 224863771
---
 tensorflow/c/env.cc      | 22 ++++++++++++++++++++++
 tensorflow/c/env.h       | 37 +++++++++++++++++++++++++++++++++++++
 tensorflow/c/env_test.cc | 27 +++++++++++++++++++++++++++
 3 files changed, 86 insertions(+)

diff --git a/tensorflow/c/env.cc b/tensorflow/c/env.cc
index 07b9e8b940..1c35ff9001 100644
--- a/tensorflow/c/env.cc
+++ b/tensorflow/c/env.cc
@@ -159,3 +159,25 @@ TF_CAPI_EXPORT extern uint64_t TF_NowMicros(void) {
 TF_CAPI_EXPORT extern uint64_t TF_NowSeconds(void) {
   return ::tensorflow::Env::Default()->NowSeconds();
 }
+
+void TF_DefaultThreadOptions(TF_ThreadOptions* options) {
+  options->stack_size = 0;
+  options->guard_size = 0;
+  options->numa_node = -1;
+}
+
+TF_Thread* TF_StartThread(const TF_ThreadOptions* options,
+                          const char* thread_name, void (*work_func)(void*),
+                          void* param) {
+  ::tensorflow::ThreadOptions cc_options;
+  cc_options.stack_size = options->stack_size;
+  cc_options.guard_size = options->guard_size;
+  cc_options.numa_node = options->numa_node;
+  return reinterpret_cast<TF_Thread*>(::tensorflow::Env::Default()->StartThread(
+      cc_options, thread_name, [=]() { (*work_func)(param); }));
+}
+
+void TF_JoinThread(TF_Thread* thread) {
+  // ::tensorflow::Thread joins on destruction
+  delete reinterpret_cast<::tensorflow::Thread*>(thread);
+}
diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h
index 9d27c5da37..15652353cd 100644
--- a/tensorflow/c/env.h
+++ b/tensorflow/c/env.h
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <stddef.h>
+#include <stdint.h>
+
 #ifndef TENSORFLOW_C_ENV_H_
 #define TENSORFLOW_C_ENV_H_
 
@@ -23,6 +26,7 @@ limitations under the License.
 
 struct TF_WritableFileHandle;
 struct TF_StringStream;
+struct TF_Thread;
 
 #ifdef __cplusplus
 extern "C" {
@@ -37,6 +41,20 @@ typedef struct TF_FileStatistics {
   bool is_directory;
 } TF_FileStatistics;
 
+typedef struct TF_ThreadOptions {
+  // Thread stack size to use (in bytes), zero implies that the system default
+  // will be used.
+  size_t stack_size;
+
+  // Guard area size to use near thread stacks to use (in bytes), zero implies
+  // that the system default will be used.
+  size_t guard_size;
+
+  // The NUMA node to use, -1 implies that there should be no NUMA affinity for
+  // this thread.
+  int numa_node;
+} TF_ThreadOptions;
+
 // Creates the specified directory. Typical status code are:
 //  * TF_OK - successfully created the directory
 //  * TF_ALREADY_EXISTS - directory already exists
@@ -150,6 +168,25 @@ TF_CAPI_EXPORT extern uint64_t TF_NowMicros(void);
 // Returns the number of seconds since the Unix epoch.
 TF_CAPI_EXPORT extern uint64_t TF_NowSeconds(void);
 
+// Populates a TF_ThreadOptions struct with system-default values.
+TF_CAPI_EXPORT extern void TF_DefaultThreadOptions(TF_ThreadOptions* options);
+
+// Returns a new thread that is running work_func and is identified
+// (for debugging/performance-analysis) by thread_name.
+//
+// The given param (which may be null) is passed to work_func when the thread
+// starts. In this way, data may be passed from the thread back to the caller.
+//
+// Caller takes ownership of the result and must call TF_JoinThread on it
+// eventually.
+TF_CAPI_EXPORT extern TF_Thread* TF_StartThread(const TF_ThreadOptions* options,
+                                                const char* thread_name,
+                                                void (*work_func)(void*),
+                                                void* param);
+
+// Waits for the given thread to finish execution, then deletes it.
+TF_CAPI_EXPORT extern void TF_JoinThread(TF_Thread* thread);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/tensorflow/c/env_test.cc b/tensorflow/c/env_test.cc
index e2206c6bef..687ad02413 100644
--- a/tensorflow/c/env_test.cc
+++ b/tensorflow/c/env_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "tensorflow/c/c_api.h"
 #include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
 
@@ -98,3 +99,29 @@ TEST(TestEnv, TestTimeFunctions) {
   ASSERT_GE(TF_NowMicros(), 946684800 * 1e6);
   ASSERT_GE(TF_NowNanos(), 946684800 * 1e9);
 }
+
+namespace {
+
+struct SomeThreadData {
+  ::tensorflow::mutex mu;
+  bool did_work = false;
+};
+
+void SomeThreadFunc(void* data) {
+  auto* real_data = static_cast<SomeThreadData*>(data);
+  ::tensorflow::mutex_lock l(real_data->mu);
+  real_data->did_work = true;
+}
+
+}  // namespace
+
+TEST(TestEnv, TestThreads) {
+  TF_ThreadOptions options;
+  TF_DefaultThreadOptions(&options);
+  SomeThreadData data;
+  TF_Thread* thread =
+      TF_StartThread(&options, "SomeThreadName", &SomeThreadFunc, &data);
+  TF_JoinThread(thread);
+  ::tensorflow::mutex_lock l(data.mu);
+  ASSERT_TRUE(data.did_work);
+}
-- 
GitLab


From 51a86aae7cd98e6b09cf548ce4e57406d7e3314c Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Mon, 10 Dec 2018 12:53:22 -0800
Subject: [PATCH 305/873] Remaining core kernel tests coverage.

PiperOrigin-RevId: 224865488
---
 .../data/kernel_tests/from_generator_test.py  | 358 +++-----
 .../python/data/kernel_tests/map_test.py      | 803 ++++++++----------
 2 files changed, 473 insertions(+), 688 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/from_generator_test.py b/tensorflow/python/data/kernel_tests/from_generator_test.py
index a6625534e7..11919bdaee 100644
--- a/tensorflow/python/data/kernel_tests/from_generator_test.py
+++ b/tensorflow/python/data/kernel_tests/from_generator_test.py
@@ -21,7 +21,6 @@ import threading
 
 import numpy as np
 
-from tensorflow.python.client import session
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -32,43 +31,27 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-class FromGeneratorTest(test_base.DatasetTestBase):
+@test_util.run_all_in_graph_and_eager_modes
+class DatasetConstructorTest(test_base.DatasetTestBase):
 
   def _testFromGenerator(self, generator, elem_sequence, num_repeats,
                          output_types=None):
     if output_types is None:
       output_types = dtypes.int64
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(generator, output_types=output_types)
-        .repeat(num_repeats)
-        .prefetch(5))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(2):  # Run twice to test reinitialization.
-        sess.run(init_op)
-        for _ in range(num_repeats):
-          for elem in elem_sequence:
-            self.assertAllEqual(elem, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=output_types).repeat(num_repeats).prefetch(5)
+    self.assertDatasetProduces(
+        dataset,
+        elem_sequence * num_repeats,
+        requires_initialization=True,
+        num_test_iterations=2)
 
   def _testFromGeneratorOneShot(self, generator, elem_sequence, num_repeats):
-    iterator = dataset_ops.make_one_shot_iterator(
-        dataset_ops.Dataset.from_generator(generator, output_types=dtypes.int64)
-        .repeat(num_repeats)
-        .prefetch(5))
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for _ in range(num_repeats):
-        for elem in elem_sequence:
-          self.assertAllEqual(elem, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.int64).repeat(num_repeats).prefetch(5)
+    self.assertDatasetProduces(
+        dataset, elem_sequence * num_repeats, num_test_iterations=2)
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorUsingFunction(self):
     def generator():
       for i in range(1, 100):
@@ -79,21 +62,18 @@ class FromGeneratorTest(test_base.DatasetTestBase):
     self._testFromGeneratorOneShot(generator, elem_sequence, 1)
     self._testFromGeneratorOneShot(generator, elem_sequence, 5)
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorUsingList(self):
     generator = lambda: [[i] * i for i in range(1, 100)]
     elem_sequence = list(generator())
     self._testFromGenerator(generator, elem_sequence, 1)
     self._testFromGenerator(generator, elem_sequence, 5)
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorUsingNdarray(self):
     generator = lambda: np.arange(100, dtype=np.int64)
     elem_sequence = list(generator())
     self._testFromGenerator(generator, elem_sequence, 1, output_types=np.int64)
     self._testFromGenerator(generator, elem_sequence, 5, output_types=np.int64)
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorUsingGeneratorExpression(self):
     # NOTE(mrry): Generator *expressions* are not repeatable (or in
     # general reusable), because they eagerly evaluate the `for`
@@ -105,7 +85,6 @@ class FromGeneratorTest(test_base.DatasetTestBase):
     self._testFromGenerator(generator, elem_sequence, 1)
     self._testFromGenerator(generator, elem_sequence, 5)
 
-  @test_util.run_deprecated_v1
   def testFromMultipleConcurrentGenerators(self):
     num_inner_repeats = 5
     num_outer_repeats = 100
@@ -128,22 +107,16 @@ class FromGeneratorTest(test_base.DatasetTestBase):
           output_shapes=([None], [3]))
               .repeat(num_inner_repeats).prefetch(5))
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(num_outer_repeats)
-        .interleave(interleave_fn, cycle_length=10,
-                    block_length=len(input_list)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(num_inner_repeats * num_outer_repeats):
-        for elem in input_list:
-          val0, val1 = sess.run(get_next)
-          self.assertAllEqual(elem[0], val0)
-          self.assertAllEqual(elem[1], val1)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.range(num_outer_repeats).interleave(
+        interleave_fn, cycle_length=10, block_length=len(input_list))
+    get_next = self.getNext(dataset)
+    for _ in range(num_inner_repeats * num_outer_repeats):
+      for elem in input_list:
+        val0, val1 = self.evaluate(get_next())
+        self.assertAllEqual(elem[0], val0)
+        self.assertAllEqual(elem[1], val1)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # TODO(b/67868766): Reenable this when the source of flakiness is discovered.
   def _testFromGeneratorsRunningInParallel(self):
@@ -186,22 +159,16 @@ class FromGeneratorTest(test_base.DatasetTestBase):
       return dataset_ops.Dataset.from_generator(
           generator, output_types=dtypes.int64, output_shapes=[]).prefetch(2)
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(num_parallel_iterators)
-        .interleave(
-            interleave_fn, cycle_length=num_parallel_iterators, block_length=1))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for elem in [0, 1]:
-        for _ in range(num_parallel_iterators):
-          self.assertAllEqual(elem, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.range(num_parallel_iterators).interleave(
+        interleave_fn, cycle_length=num_parallel_iterators, block_length=1)
+    get_next = self.getNext(dataset)
+
+    for elem in [0, 1]:
+      for _ in range(num_parallel_iterators):
+        self.assertAllEqual(elem, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorImplicitConversion(self):
     def generator():
       yield [1]
@@ -209,45 +176,28 @@ class FromGeneratorTest(test_base.DatasetTestBase):
       yield [3]
 
     for dtype in [dtypes.int8, dtypes.int32, dtypes.int64]:
-      iterator = dataset_ops.make_initializable_iterator(
-          dataset_ops.Dataset.from_generator(
-              generator, output_types=dtype, output_shapes=[1]))
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-
-      self.assertEqual(dtype, get_next.dtype)
-
-      with self.cached_session() as sess:
-        sess.run(init_op)
-        for expected in [[1], [2], [3]]:
-          next_val = sess.run(get_next)
-          self.assertEqual(dtype.as_numpy_dtype, next_val.dtype)
-          self.assertAllEqual(expected, next_val)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-  @test_util.run_deprecated_v1
+      dataset = dataset_ops.Dataset.from_generator(
+          generator, output_types=dtype, output_shapes=[1])
+      get_next = self.getNext(dataset)
+
+      for expected in [[1], [2], [3]]:
+        next_val = self.evaluate(get_next())
+        self.assertEqual(dtype.as_numpy_dtype, next_val.dtype)
+        self.assertAllEqual(expected, next_val)
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
+
   def testFromGeneratorString(self):
     def generator():
       yield "foo"
       yield b"bar"
       yield u"baz"
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(
-            generator, output_types=dtypes.string, output_shapes=[]))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for expected in [b"foo", b"bar", b"baz"]:
-        next_val = sess.run(get_next)
-        self.assertAllEqual(expected, next_val)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.string, output_shapes=[])
+    self.assertDatasetProduces(
+        dataset, expected_output=[b"foo", b"bar", b"baz"])
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorTypeError(self):
     def generator():
       yield np.array([1, 2, 3], dtype=np.int64)
@@ -255,23 +205,19 @@ class FromGeneratorTest(test_base.DatasetTestBase):
       yield "ERROR"
       yield np.array([7, 8, 9], dtype=np.int64)
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(
-            generator, output_types=dtypes.int64, output_shapes=[3]))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual([1, 2, 3], sess.run(get_next))
-      self.assertAllEqual([4, 5, 6], sess.run(get_next))
-      with self.assertRaisesOpError("The expected type was int64"):
-        sess.run(get_next)
-      self.assertAllEqual([7, 8, 9], sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.int64, output_shapes=[3])
+
+    get_next = self.getNext(dataset)
+
+    self.assertAllEqual([1, 2, 3], self.evaluate(get_next()))
+    self.assertAllEqual([4, 5, 6], self.evaluate(get_next()))
+    with self.assertRaisesOpError("The expected type was int64"):
+      self.evaluate(get_next())
+    self.assertAllEqual([7, 8, 9], self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorShapeError(self):
     def generator():
       yield np.array([1, 2, 3], dtype=np.int64)
@@ -279,23 +225,18 @@ class FromGeneratorTest(test_base.DatasetTestBase):
       yield np.array([7, 8, 9, 10], dtype=np.int64)
       yield np.array([11, 12, 13], dtype=np.int64)
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(
-            generator, output_types=dtypes.int64, output_shapes=[3]))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual([1, 2, 3], sess.run(get_next))
-      self.assertAllEqual([4, 5, 6], sess.run(get_next))
-      with self.assertRaisesOpError(r"element of shape \(3,\) was expected"):
-        sess.run(get_next)
-      self.assertAllEqual([11, 12, 13], sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.int64, output_shapes=[3])
+    get_next = self.getNext(dataset)
+
+    self.assertAllEqual([1, 2, 3], self.evaluate(get_next()))
+    self.assertAllEqual([4, 5, 6], self.evaluate(get_next()))
+    with self.assertRaisesOpError(r"element of shape \(3,\) was expected"):
+      self.evaluate(get_next())
+    self.assertAllEqual([11, 12, 13], self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorStructureError(self):
     def generator():
       yield 1, 2
@@ -304,46 +245,31 @@ class FromGeneratorTest(test_base.DatasetTestBase):
       yield 6, 7, 8
       yield 9, 10
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(
-            generator, output_types=(dtypes.int64, dtypes.int64)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertEqual((1, 2), sess.run(get_next))
-      self.assertEqual((3, 4), sess.run(get_next))
-      with self.assertRaisesOpError(
-          r"The expected structure was \(tf\.int64, tf\.int64\)"):
-        sess.run(get_next)
-      with self.assertRaisesOpError(
-          r"The expected structure was \(tf\.int64, tf\.int64\)"):
-        sess.run(get_next)
-      self.assertEqual((9, 10), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=(dtypes.int64, dtypes.int64))
+    get_next = self.getNext(dataset)
+
+    self.assertEqual((1, 2), self.evaluate(get_next()))
+    self.assertEqual((3, 4), self.evaluate(get_next()))
+    with self.assertRaisesOpError(
+        r"The expected structure was \(tf\.int64, tf\.int64\)"):
+      self.evaluate(get_next())
+    with self.assertRaisesOpError(
+        r"The expected structure was \(tf\.int64, tf\.int64\)"):
+      self.evaluate(get_next())
+    self.assertEqual((9, 10), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorHeterogeneous(self):
     def generator():
       yield 1
       yield [2, 3]
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(
-            generator, output_types=dtypes.int64))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.int64)
+    self.assertDatasetProduces(dataset, expected_output=[1, [2, 3]])
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual(1, sess.run(get_next))
-      self.assertAllEqual([2, 3], sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-  @test_util.run_deprecated_v1
   def testFromGeneratorStopShort(self):
 
     def generator():
@@ -351,18 +277,12 @@ class FromGeneratorTest(test_base.DatasetTestBase):
       yield 1
       yield 2
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(
-            generator, output_types=dtypes.int64))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual(0, sess.run(get_next))
-      self.assertAllEqual(1, sess.run(get_next))
+    dataset = dataset_ops.Dataset.from_generator(
+        generator, output_types=dtypes.int64)
+    get_next = self.getNext(dataset)
+    self.assertAllEqual(0, self.evaluate(get_next()))
+    self.assertAllEqual(1, self.evaluate(get_next()))
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorDestructorCalled(self):
     # Use an `Event` to signal that the generator has been deleted.
     event = threading.Event()
@@ -381,23 +301,18 @@ class FromGeneratorTest(test_base.DatasetTestBase):
       def __del__(self):
         event.set()
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_generator(
-            GeneratorWrapper, output_types=dtypes.int64).take(2))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_generator(
+        GeneratorWrapper, output_types=dtypes.int64).take(2)
+    get_next = self.getNext(dataset)
 
-    with session.Session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual(42, sess.run(get_next))
-      self.assertAllEqual(42, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-      # Test that `GeneratorWrapper` object is destroyed when the
-      # iterator terminates (and the generator iterator is deleted).
-      self.assertTrue(event.is_set())
+    self.assertAllEqual(42, self.evaluate(get_next()))
+    self.assertAllEqual(42, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+    # Test that `GeneratorWrapper` object is destroyed when the
+    # iterator terminates (and the generator iterator is deleted).
+    self.assertTrue(event.is_set())
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorWithArgs(self):
 
     def flat_map_fn(elem):
@@ -410,20 +325,10 @@ class FromGeneratorTest(test_base.DatasetTestBase):
           generator_with_arg, output_types=dtypes.int64, output_shapes=(),
           args=(elem,))
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(5).flat_map(flat_map_fn))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      expected = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
-      for x in expected:
-        self.assertEqual(x, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.range(5).flat_map(flat_map_fn)
+    self.assertDatasetProduces(
+        dataset, expected_output=[1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
 
-  @test_util.run_deprecated_v1
   def testFromGeneratorWithTwoArgs(self):
 
     def flat_map_fn(elem, message):
@@ -436,26 +341,17 @@ class FromGeneratorTest(test_base.DatasetTestBase):
           generator_with_arg, output_types=(dtypes.int64, dtypes.string),
           output_shapes=((), ()), args=(elem, message))
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.zip(
-            (dataset_ops.Dataset.range(5),
-             dataset_ops.Dataset.from_tensors("Hi!").repeat(None)))
-        .flat_map(flat_map_fn))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      expected = [(0, b"Hi!"),
-                  (0, b"Hi!"), (1, b"Hi!"),
-                  (0, b"Hi!"), (1, b"Hi!"), (2, b"Hi!"),
-                  (0, b"Hi!"), (1, b"Hi!"), (2, b"Hi!"), (3, b"Hi!")]
-      for x in expected:
-        self.assertEqual(x, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.range(5),
+         dataset_ops.Dataset.from_tensors("Hi!").repeat(None)
+        )).flat_map(flat_map_fn)
+
+    self.assertDatasetProduces(
+        dataset,
+        expected_output=[(0, b"Hi!"), (0, b"Hi!"), (1, b"Hi!"), (0, b"Hi!"),
+                         (1, b"Hi!"), (2, b"Hi!"), (0, b"Hi!"), (1, b"Hi!"),
+                         (2, b"Hi!"), (3, b"Hi!")])
 
-  @test_util.run_deprecated_v1
   def testGeneratorDatasetFinalizeFunctionCalled(self):
     # NOTE(mrry): This test tests the internal `_GeneratorDataset`,
     # which affords more control over what the finalize function can do than
@@ -472,19 +368,15 @@ class FromGeneratorTest(test_base.DatasetTestBase):
                                 stateful=True)
 
     dummy = constant_op.constant(37)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops._GeneratorDataset(
-            dummy, lambda x: x, lambda x: x, finalize_fn).take(2))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual(37, sess.run(get_next))
-      self.assertAllEqual(37, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-        self.assertTrue(event.is_set())
+    dataset = dataset_ops._GeneratorDataset(dummy, lambda x: x, lambda x: x,
+                                            finalize_fn).take(2)
+    get_next = self.getNext(dataset)
+
+    self.assertAllEqual(37, self.evaluate(get_next()))
+    self.assertAllEqual(37, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+      self.assertTrue(event.is_set())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/map_test.py b/tensorflow/python/data/kernel_tests/map_test.py
index e07706413d..67ef98f9fe 100644
--- a/tensorflow/python/data/kernel_tests/map_test.py
+++ b/tensorflow/python/data/kernel_tests/map_test.py
@@ -28,6 +28,7 @@ from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -46,6 +47,7 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -83,14 +85,19 @@ def _make_coordinated_sloppy_dataset(num_elements, num_parallel_calls):
   return next_element, coordination_events
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_all_in_graph_and_eager_modes
 class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _buildMapDataset(self, components, count):
+
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-    return (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn)
-            .repeat(count))
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).map(
+        _map_fn).repeat(count)
+    self.assertEqual([c.shape[1:] for c in components],
+                     [shape for shape in dataset.output_shapes])
+    return dataset
 
   def testMapDataset(self):
     """Test an dataset that maps a TF function across its input elements."""
@@ -99,34 +106,32 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     components = (np.arange(7),
                   np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
                   np.array(37.0) * np.arange(7))
-    count = array_ops.placeholder(dtypes.int64, shape=[])
 
-    dataset = self._buildMapDataset(components, count)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    self.assertEqual([c.shape[1:] for c in components],
-                     [t.shape for t in get_next])
+    # Test single-threaded access to the iterator.
+    get_next = self.getNext(self._buildMapDataset(components, 14))
+    for _ in range(14):
+      for i in range(7):
+        result = self.evaluate(get_next())
+        for component, result_component in zip(components, result):
+          self.assertAllEqual(component[i]**2, result_component)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
+  # TODO(b/117581999): add eager coverage, different threads run in graph
+  # context.
+  @test_util.run_v1_only("b/120545219")
+  def testSkipEagerMapDatasetMultithreaded(self):
+    # Test multi-threaded access to the same iterator.
+    components = (np.arange(7),
+                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
+                  np.array(37.0) * np.arange(7))
+    get_next = self.getNext(self._buildMapDataset(components, 18))
+    results = []
     with self.cached_session() as sess:
-      # Test single-threaded access to the iterator.
-      sess.run(init_op, feed_dict={count: 14})
-      for _ in range(14):
-        for i in range(7):
-          result = sess.run(get_next)
-          for component, result_component in zip(components, result):
-            self.assertAllEqual(component[i]**2, result_component)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-
-      # Test multi-threaded access to the same iterator.
-      sess.run(init_op, feed_dict={count: 18})
-      results = []
       def iterator_thread():
         while True:
           try:
-            results.append(sess.run(get_next))
+            results.append(sess.run(get_next()))
           except errors.OutOfRangeError:
             return
       threads = [self.checkedThread(target=iterator_thread) for _ in range(8)]
@@ -148,59 +153,66 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _buildParallelMapDataset(self, components, count, num_parallel_calls,
                                output_buffer_size):
+
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
-    return (dataset_ops.Dataset.from_tensor_slices(components)
-            .map(_map_fn, num_parallel_calls=num_parallel_calls)
-            .prefetch(output_buffer_size)
-            .repeat(count))
+
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).map(
+        _map_fn, num_parallel_calls=num_parallel_calls).prefetch(
+            output_buffer_size).repeat(count)
+
+    self.assertEqual([c.shape[1:] for c in components],
+                     [shape for shape in dataset.output_shapes])
+    return dataset
 
   def testParallelMapDataset(self):
     """Test an dataset that maps a TF function across its input elements."""
+
     # The pipeline is TensorSliceDataset -> ParallelMapDataset(square_3) ->
     # RepeatDataset(count).
-    components = (np.arange(7),
-                  np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
-                  np.array(37.0) * np.arange(7))
-    count = array_ops.placeholder(dtypes.int64, shape=[])
-    num_parallel_calls = array_ops.placeholder(dtypes.int32, shape=[])
-    output_buffer_size = array_ops.placeholder(dtypes.int64, shape=[])
+    def do_test(num_parallel_calls, output_buffer_size):
+
+      components = (np.arange(7),
+                    np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
+                    np.array(37.0) * np.arange(7))
+      # Test single-threaded access to the iterator.
+      get_next = self.getNext(
+          self._buildParallelMapDataset(components, 14, num_parallel_calls,
+                                        output_buffer_size))
+      for _ in range(14):
+        for i in range(7):
+          result = self.evaluate(get_next())
+          for component, result_component in zip(components, result):
+            self.assertAllEqual(component[i]**2, result_component)
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
 
-    dataset = self._buildParallelMapDataset(
-        components, count, num_parallel_calls, output_buffer_size)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    for num_parallel_calls_val, output_buffer_size_val in [(1, 1), (1, 2), (2,
+                                                                            2),
+                                                           (2, 4), (8, 8),
+                                                           (8, 16)]:
+      do_test(num_parallel_calls_val, output_buffer_size_val)
 
-    self.assertEqual([c.shape[1:] for c in components],
-                     [t.shape for t in get_next])
+  # TODO(b/117581999): add eager coverage, different threads run in graph
+  # context.
+  @test_util.run_v1_only("b/120545219")
+  def testSkipEagerParallelMapDatasetMultithreaded(self):
 
-    with self.cached_session() as sess:
+    def do_test(num_parallel_calls, output_buffer_size):
+      # Test multi-threaded access to the same iterator.
+      components = (np.arange(7),
+                    np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
+                    np.array(37.0) * np.arange(7))
+      get_next = self.getNext(
+          self._buildParallelMapDataset(components, 18, num_parallel_calls,
+                                        output_buffer_size))
+      results = []
+      with self.cached_session() as sess:
 
-      def do_test(num_parallel_calls_val, output_buffer_size_val):
-        # Test single-threaded access to the iterator.
-        sess.run(init_op, feed_dict={
-            count: 14,
-            num_parallel_calls: num_parallel_calls_val,
-            output_buffer_size: output_buffer_size_val})
-        for _ in range(14):
-          for i in range(7):
-            result = sess.run(get_next)
-            for component, result_component in zip(components, result):
-              self.assertAllEqual(component[i]**2, result_component)
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-        # Test multi-threaded access to the same iterator.
-        sess.run(init_op, feed_dict={
-            count: 18,
-            num_parallel_calls: num_parallel_calls_val,
-            output_buffer_size: output_buffer_size_val})
-        results = []
         def iterator_thread():
           while True:
             try:
-              results.append(sess.run(get_next))
+              results.append(sess.run(get_next()))
             except errors.OutOfRangeError:
               return
         threads = [self.checkedThread(target=iterator_thread)
@@ -237,14 +249,10 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = self._buildParallelMapDataset(components, 1000, 100, 100)
     # NOTE(mrry): Also test that the prefetching thread is cancelled correctly.
     dataset = dataset.prefetch(100)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(3):
-        sess.run(get_next)
+    for _ in range(3):
+      self.evaluate(get_next())
 
   def testParallelMapUnspecifiedOutputSize(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
@@ -252,14 +260,10 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = (dataset_ops.Dataset.from_tensor_slices(components)
                .map(lambda x: array_ops.check_numerics(x, "message"),
                     num_parallel_calls=2))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(3):
-        sess.run(get_next)
+    for _ in range(3):
+      self.evaluate(get_next())
 
   def testParallelMapError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
@@ -267,20 +271,16 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = (dataset_ops.Dataset.from_tensor_slices(components)
                .map(lambda x: array_ops.check_numerics(x, "message"),
                     num_parallel_calls=2))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(3):
-        sess.run(get_next)
-      # The 4th element is NaN, so `array_ops.check_numerics()` should fail.
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-      sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    for _ in range(3):
+      self.evaluate(get_next())
+    # The 4th element is NaN, so `array_ops.check_numerics()` should fail.
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(get_next())
+    self.evaluate(get_next())
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testPrefetchError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
@@ -288,20 +288,17 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = (dataset_ops.Dataset.from_tensor_slices(components)
                .map(lambda x: array_ops.check_numerics(x, "message"))
                .prefetch(2))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for _ in range(3):
-        sess.run(get_next)
-      # The 4th element is NaN, so `array_ops.check_numerics()` should fail.
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(get_next)
-      sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    get_next = self.getNext(dataset)
+
+    for _ in range(3):
+      self.evaluate(get_next())
+    # The 4th element is NaN, so `array_ops.check_numerics()` should fail.
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(get_next())
+    self.evaluate(get_next())
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testCaptureIterator(self):
 
@@ -314,23 +311,22 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       return dataset_ops.Dataset.range(10).map(_map_fn)
 
     def _build_graph():
-      captured_iterator = dataset_ops.make_initializable_iterator(
-          dataset_ops.Dataset.range(10))
+      if context.executing_eagerly():
+        captured_iterator = iter(dataset_ops.Dataset.range(10))
+      else:
+        captured_iterator = dataset_ops.Dataset.range(
+            10).make_initializable_iterator()
       ds = _build_ds(captured_iterator)
-      iterator = ds.make_initializable_iterator()
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      return captured_iterator.initializer, init_op, get_next
-
-    with ops.Graph().as_default() as g:
-      captured_init_op, init_op, get_next = _build_graph()
-      with self.session(graph=g) as sess:
-        sess.run(captured_init_op)
-        sess.run(init_op)
-        for i in range(10):
-          self.assertEqual(i * i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+      return captured_iterator, ds
+
+    captured_iter, ds = _build_graph()
+    if not context.executing_eagerly():
+      self.evaluate(captured_iter.initializer)
+    get_next = self.getNext(ds, requires_initialization=True)
+    for i in range(10):
+      self.assertEqual(i * i, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testCaptureHashTable(self):
     # NOTE(mrry): We must use the V2 variants of `HashTable`
@@ -345,41 +341,37 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     input_sentences = dataset_ops.Dataset.from_tensor_slices(
         ["brain brain tank salad surgery", "surgery brain"])
 
-    iterator = dataset_ops.make_initializable_iterator(
-        input_sentences
-        .map(lambda x: string_ops.string_split([x]).values).map(table.lookup))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = input_sentences.map(lambda x: string_ops.string_split([x]).values
+                                 ).map(table.lookup)
 
-    with self.cached_session() as sess:
-      sess.run(table.initializer)
-      sess.run(init_op)
-      sess.run(get_next)
-      sess.run(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    get_next = self.getNext(dataset, requires_initialization=True)
+
+    self.evaluate(table.initializer)
+    self.evaluate(get_next())
+    self.evaluate(get_next())
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testCaptureQueue(self):
     elements = np.random.randint(100, size=[200])
     queue = data_flow_ops.FIFOQueue(200, dtypes.int64, shapes=[])
     enqueue_op = queue.enqueue_many(elements)
     close_op = queue.close()
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(0).repeat(-1)
-        .map(lambda _: queue.dequeue()))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_tensors(0).repeat(
+        -1).map(lambda _: queue.dequeue())
 
-    with self.cached_session() as sess:
-      sess.run(enqueue_op)
-      sess.run(close_op)
-      sess.run(init_op)
-      for element in elements:
-        self.assertEqual(element, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    get_next = self.getNext(dataset, requires_initialization=True)
+    self.evaluate(enqueue_op)
+    self.evaluate(close_op)
+
+    for element in elements:
+      self.assertEqual(element, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-  def testCaptureSameResourceMultipleTimes(self):
+  # TODO(b/117581999): Possible deadlock in eager mode, debug.
+  @test_util.run_v1_only("b/120545219")
+  def testSkipEagerCaptureSameResourceMultipleTimes(self):
     elements = np.random.randint(100, size=[200])
     queue = data_flow_ops.FIFOQueue(
         200, dtypes.int64, shapes=[], shared_name="shared_queue")
@@ -389,101 +381,84 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     enqueue_op = queue.enqueue_many(elements)
     close_op = queue.close()
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(0).repeat(-1)
-        .map(lambda _: (queue.dequeue(), queue_2.dequeue())))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_tensors(0).repeat(
+        -1).map(lambda _: (queue.dequeue(), queue_2.dequeue()))
 
-    with self.cached_session() as sess:
-      sess.run(enqueue_op)
-      sess.run(close_op)
-      sess.run(init_op)
-      for i in range(100):
-        self.assertEqual(sorted([elements[i * 2], elements[i * 2 + 1]]),
-                         sorted(sess.run(get_next)))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    self.evaluate(enqueue_op)
+    self.evaluate(close_op)
+    get_next = self.getNext(dataset, requires_initialization=True)
+    for i in range(100):
+      self.assertCountEqual([elements[i * 2], elements[i * 2 + 1]],
+                            self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testCaptureVariable(self):
     counter_var = variable_scope.get_variable(
         "counter", (), dtypes.int32, use_resource=True)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(0).repeat(10)
-        .map(lambda _: counter_var.assign_add(1)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_tensors(0).repeat(
+        10).map(lambda _: counter_var.assign_add(1))
+    get_next = self.getNext(dataset, requires_initialization=True)
 
-    with self.cached_session() as sess:
-      sess.run(counter_var.initializer)
-      sess.run(init_op)
-      for i in range(10):
-        self.assertEqual(i, sess.run(counter_var))
-        self.assertEqual(i + 1, sess.run(get_next))
-      self.assertEqual(10, sess.run(counter_var))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-      self.assertEqual(10, sess.run(counter_var))
+    self.evaluate(counter_var.initializer)
 
-  def testCaptureUninitializedVariableError(self):
+    for i in range(10):
+      self.assertEqual(i, self.evaluate(counter_var))
+      self.assertEqual(i + 1, self.evaluate(get_next()))
+    self.assertEqual(10, self.evaluate(counter_var))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+    self.assertEqual(10, self.evaluate(counter_var))
+
+  # TODO(b/117581999): error not captured for eager mode, debug.
+  @test_util.run_v1_only("b/120545219")
+  def testSkipEagerCaptureUninitializedVariableError(self):
     counter_var = variable_scope.get_variable(
         "counter", (), dtypes.int32, use_resource=True)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(0).repeat(10)
-        .map(lambda _: counter_var.assign_add(1)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_tensors(0).repeat(
+        10).map(lambda _: counter_var.assign_add(1))
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      with self.assertRaises(errors.NotFoundError):
-        sess.run(get_next)
-
-  def testSeededStatefulOperatorIsProperlyStateful(self):
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(0).repeat(10)
-        .map(lambda _: random_ops.random_uniform((), seed=11)).batch(2))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    get_next = self.getNext(dataset, requires_initialization=True)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      random_values = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          random_values.extend(sess.run(get_next))
-      self.assertEqual(10, len(random_values))
-      self.assertGreater(np.abs(np.diff(random_values)).max(), 1e-6)
-      sess.run(init_op)
-      random_values_2 = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          random_values_2.extend(sess.run(get_next))
+    with self.assertRaises(errors.NotFoundError):
+      self.evaluate(get_next())
 
-      # Randomness is repeatable given same seed
-      self.assertAllClose(random_values, random_values_2)
+  def testSeededStatefulOperatorIsProperlyStateful(self):
+    dataset = dataset_ops.Dataset.from_tensors(0).repeat(
+        10).map(lambda _: random_ops.random_uniform((), seed=11)).batch(2)
+
+    get_next = self.getNext(dataset, requires_initialization=True)
+    random_values = []
+    with self.assertRaises(errors.OutOfRangeError):
+      while True:
+        random_values.extend(self.evaluate(get_next()))
+    self.assertLen(random_values, 10)
+    self.assertGreater(np.abs(np.diff(random_values)).max(), 1e-6)
+
+    get_next = self.getNext(dataset, requires_initialization=True)
+    random_values_2 = []
+    with self.assertRaises(errors.OutOfRangeError):
+      while True:
+        random_values_2.extend(self.evaluate(get_next()))
+
+    # Randomness is repeatable given same seed
+    self.assertAllClose(random_values, random_values_2)
 
   def testStatefulMapKeepsStateAcrossIterators(self):
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(0).repeat(10)
-        .map(lambda _: random_ops.random_uniform((), seed=11))
-        .repeat(1000)
-        .batch(10))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_tensors(0).repeat(10).map(
+        lambda _: random_ops.random_uniform((), seed=11)).repeat(1000).batch(10)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      random_values = sess.run(get_next)
-
-      # Assert that one of the next 99 batches yielded by the iterator is
-      # different from the first.
-      i = 0
-      while i < 99:
-        if np.any(random_values != sess.run(get_next)):
-          break
-        i += 1
-      self.assertLess(i, 99)
+    get_next = self.getNext(dataset)
+    random_values = self.evaluate(get_next())
+
+    # Assert that one of the next 99 batches yielded by the iterator is
+    # different from the first.
+    i = 0
+    while i < 99:
+      if np.any(random_values != self.evaluate(get_next())):
+        break
+      i += 1
+    self.assertLess(i, 99)
 
   def testStatefulOperationInShortCircuit(self):
     counter_var = variable_scope.get_variable(
@@ -493,36 +468,25 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       counter_var.assign_add(1)
       return x
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(10).map(increment_fn))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.range(10).map(increment_fn)
 
-    with self.cached_session() as sess:
-      sess.run(counter_var.initializer)
-      sess.run(init_op)
-      for i in range(10):
-        self.assertEqual(i, sess.run(counter_var))
-        self.assertEqual(i, sess.run(get_next))
-      self.assertEqual(10, sess.run(counter_var))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
-      self.assertEqual(10, sess.run(counter_var))
+    get_next = self.getNext(dataset, requires_initialization=True)
 
-  def testMapDict(self):
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(10)
-        .map(lambda x: {"foo": x * 2, "bar": x ** 2})
-        .map(lambda d: d["foo"] + d["bar"]))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    self.evaluate(counter_var.initializer)
+    for i in range(10):
+      self.assertEqual(i, self.evaluate(counter_var))
+      self.assertEqual(i, self.evaluate(get_next()))
+    self.assertEqual(10, self.evaluate(counter_var))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+    self.assertEqual(10, self.evaluate(counter_var))
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(10):
-        self.assertEqual(i * 2 + i**2, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+  def testMapDict(self):
+    dataset = dataset_ops.Dataset.range(10).map(
+        lambda x: {"foo": x * 2, "bar": x**2}).map(
+            lambda d: d["foo"] + d["bar"])
+    self.assertDatasetProduces(
+        dataset, expected_output=[i * 2 + i**2 for i in range(10)])
 
   def testMapNamedtuple(self, count=10):
     # construct dataset of tuples
@@ -545,33 +509,23 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset_tuple = dataset_tuple.map(preprocess_tuple)
     dataset_namedtuple = dataset_namedtuple.map(preprocess_namedtuple)
 
-    next_tuple = dataset_ops.make_one_shot_iterator(dataset_tuple).get_next()
-    next_namedtuple = dataset_ops.make_one_shot_iterator(
-        dataset_namedtuple).get_next()
+    next_tuple = self.getNext(dataset_tuple)
+    next_namedtuple = self.getNext(dataset_namedtuple)
 
     # make sure both datasets contain the same data
-    with self.cached_session() as sess:
-      for i in range(count):
-        tuple_, namedtuple_ = sess.run([next_tuple, next_namedtuple])
-        self.assertEqual(tuple_, namedtuple_)
-        self.assertEqual(tuple_, (i, -2 * i))
+    for i in range(count):
+      tuple_, namedtuple_ = self.evaluate([next_tuple(), next_namedtuple()])
+      self.assertEqual(tuple_, namedtuple_)
+      self.assertEqual(tuple_, (i, -2 * i))
 
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_namedtuple)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_namedtuple())
 
   def testUseStepContainerInMap(self):
     row = np.arange(6)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(row)
-        .map(lambda elems: functional_ops.map_fn(lambda x: x * x, elems)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual(row**2, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.from_tensors(
+        row).map(lambda elems: functional_ops.map_fn(lambda x: x * x, elems))
+    self.assertDatasetProduces(dataset, expected_output=[row**2])
 
   def testCaseAndCondInMap(self):
 
@@ -599,24 +553,19 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
           pred_fn_pairs, default=multiply, exclusive=True)
 
     def build_dataset(row, num):
-      iterator = dataset_ops.make_initializable_iterator(
-          dataset_ops.Dataset.from_tensor_slices(row).map(
-              lambda x: control_map_fn(x, num)))
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      return init_op, get_next
+      dataset = dataset_ops.Dataset.from_tensor_slices(
+          row).map(lambda x: control_map_fn(x, num))
+      return self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      row = np.arange(6)
-      for num in [2, 3, 4]:
-        init_op, get_next = build_dataset(row, num)
-        sess.run(init_op)
-        for i in range(6):
-          self.assertEqual(
-              (i // 2 if i % 2 else i * 2) if (num == 2 or num == 3) else i * 2,
-              sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+    row = np.arange(6)
+    for num in [2, 3, 4]:
+      get_next = build_dataset(row, num)
+      for i in range(6):
+        self.assertEqual(
+            (i // 2 if i % 2 else i * 2) if (num == 2 or num == 3) else i * 2,
+            self.evaluate(get_next()))
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
 
   def testCaseInWhileInMap(self):
 
@@ -638,24 +587,19 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     def build_dataset(row, num):
       # pylint: disable=g-long-lambda
-      iterator = dataset_ops.make_initializable_iterator(
-          dataset_ops.Dataset.from_tensors(row).map(
-              lambda elems: functional_ops.map_fn(
-                  lambda x: control_map_fn(x, num), elems)))
-      init_op = iterator.initializer
-      get_next = iterator.get_next()
-      return init_op, get_next
+      dataset = dataset_ops.Dataset.from_tensors(
+          row).map(lambda elems: functional_ops.map_fn(
+              lambda x: control_map_fn(x, num), elems))
+      return self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      row = np.arange(6)
-      for num in [2, 3, 4]:
-        init_op, get_next = build_dataset(row, num)
-        sess.run(init_op)
-        self.assertAllEqual(
-            [x // 2 if (num == 2 or num == 3) else x * 2 for x in row],
-            sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+    row = np.arange(6)
+    for num in [2, 3, 4]:
+      get_next = build_dataset(row, num)
+      self.assertAllEqual(
+          [x // 2 if (num == 2 or num == 3) else x * 2 for x in row],
+          self.evaluate(get_next()))
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
 
   def testCaseAndCondInWhileInMap(self):
 
@@ -685,21 +629,17 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     row = np.arange(6)
     num = 2
     # pylint: disable=g-long-lambda
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(row).map(
-            lambda elems: functional_ops.map_fn(
-                lambda x: control_map_fn(x, num), elems)))
+    dataset = dataset_ops.Dataset.from_tensors(
+        row).map(lambda elems: functional_ops.map_fn(
+            lambda x: control_map_fn(x, num), elems))
     # pylint: enable=g-long-lambda
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      self.assertAllEqual([(x // 2 if x % 2 else x * 2) if
-                           (num == 2 or num == 3) else x * 2 for x in row],
-                          sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    self.assertAllEqual([(x // 2 if x % 2 else x * 2) if
+                         (num == 2 or num == 3) else x * 2 for x in row],
+                        self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testPrefetch(self):
     # We will use this event to test that `_map_py_func()` has been
@@ -717,58 +657,54 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     def _map_fn(x):
       return script_ops.py_func(_map_py_func, [x], x.dtype)
 
-    buffer_size_placeholder = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(100)
-        .map(_map_fn)
-        .prefetch(buffer_size_placeholder))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    def do_test(buffer_size):
+      dataset = dataset_ops.Dataset.range(100).map(_map_fn).prefetch(
+          buffer_size)
 
-    with self.cached_session() as sess:
+      get_next = self.getNext(dataset)
       # Simple test that prefetch yields the expected values in the
       # expected order.
-      for buffer_size in [1, 10, 100, 1000]:
-        sess.run(init_op, feed_dict={buffer_size_placeholder: buffer_size})
-        for i in range(100):
-          self.assertEqual(i * i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
-
-      # We can indirectly observe that varying the buffer size has the
-      # intended effect by observing when `ev` is set (on the 6th
-      # invocation of `_map_py_func()`).
-      # NOTE(mrry): We do not test with `buffer_size ==
-      # set_event_during_invocation`, because we must consume at least
-      # one element to start the prefetching.
-      for buffer_size in range(1, set_event_during_invocation):
-        event_will_be_set_after_consuming = (
-            set_event_during_invocation - buffer_size + 1)
-
-        ev.clear()
-        sess.run(init_op, feed_dict={buffer_size_placeholder: buffer_size})
-        for i in range(event_will_be_set_after_consuming):
-          self.assertFalse(ev.is_set())
-          self.assertEqual(i * i, sess.run(get_next))
-        ev.wait()
-        for i in range(event_will_be_set_after_consuming, 100):
-          self.assertEqual(i * i, sess.run(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          sess.run(get_next)
+      for i in range(100):
+        self.assertEqual(i * i, self.evaluate(get_next()))
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
+
+    for buffer_size in [1, 10, 100, 1000]:
+      do_test(buffer_size)
+
+    # We can indirectly observe that varying the buffer size has the
+    # intended effect by observing when `ev` is set (on the 6th
+    # invocation of `_map_py_func()`).
+    # NOTE(mrry): We do not test with `buffer_size ==
+    # set_event_during_invocation`, because we must consume at least
+    # one element to start the prefetching.
+    def do_test_ev(buffer_size):
+      dataset = dataset_ops.Dataset.range(100).map(_map_fn).prefetch(
+          buffer_size)
+
+      get_next = self.getNext(dataset)
+
+      event_will_be_set_after_consuming = (
+          set_event_during_invocation - buffer_size + 1)
+
+      ev.clear()
+      for i in range(event_will_be_set_after_consuming):
+        self.assertFalse(ev.is_set())
+        self.assertEqual(i * i, self.evaluate(get_next()))
+      ev.wait()
+      for i in range(event_will_be_set_after_consuming, 100):
+        self.assertEqual(i * i, self.evaluate(get_next()))
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
 
-  def testReturnList(self):
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(10)
-        .map(lambda x: [x, constant_op.constant(37.0)]))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    for buffer_size in range(1, set_event_during_invocation):
+      do_test_ev(buffer_size)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(10):
-        self.assertEqual((i, 37.0), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+  def testReturnList(self):
+    dataset = dataset_ops.Dataset.range(
+        10).map(lambda x: [x, constant_op.constant(37.0)])
+    self.assertDatasetProduces(
+        dataset, expected_output=[(i, 37.0) for i in range(10)])
 
   def testMultiOutputPyFunc(self):
     # The `tf.py_func()` op returns a list of tensors for its outputs.
@@ -778,17 +714,9 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       return script_ops.py_func(
           _map_py_func, [x_tensor], [dtypes.int64, dtypes.float64])
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(10).map(_map_fn))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(10):
-        self.assertEqual((i, 37.0), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.range(10).map(_map_fn)
+    self.assertDatasetProduces(
+        dataset, expected_output=[(i, 37.0) for i in range(10)])
 
   def testSparse(self):
 
@@ -798,19 +726,9 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
           values=(i * np.array([1])),
           dense_shape=np.array([1, 1]))
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(10).map(_sparse))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(10):
-        actual = sess.run(get_next)
-        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
-        self.assertSparseValuesEqual(actual, _sparse(i))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.range(10).map(_sparse)
+    self.assertDatasetProduces(
+        dataset, expected_output=[_sparse(i) for i in range(10)])
 
   def testSparseChain(self):
 
@@ -824,19 +742,11 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertTrue(sparse_tensor.is_sparse(i))
       return sparse_ops.sparse_concat(0, [i, i])
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(10).map(_sparse).map(_check))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.range(10).map(_sparse).map(_check)
 
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(10):
-        actual = sess.run(get_next)
-        self.assertIsInstance(actual, sparse_tensor.SparseTensorValue)
-        self.assertSparseValuesEqual(actual, _check(_sparse(i)).eval())
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    self.assertDatasetProduces(
+        dataset,
+        expected_output=[self.evaluate(_check(_sparse(i))) for i in range(10)])
 
   def testParallelMapOutOfRangeError(self):
     def raising_py_func(i):
@@ -845,32 +755,18 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       else:
         return i
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(105)
-        .map(lambda x: script_ops.py_func(raising_py_func, [x], dtypes.int64),
-             num_parallel_calls=2))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(100):
-        self.assertEqual(i, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.range(105).map(
+        lambda x: script_ops.py_func(raising_py_func, [x], dtypes.int64),
+        num_parallel_calls=2)
+    get_next = self.getNext(dataset)
+    for i in range(100):
+      self.assertEqual(i, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testConstantOutput(self):
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(10).map(lambda x: [x, "hello", 10]))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(init_op)
-      for i in range(10):
-        self.assertEqual((i, b"hello", 10), sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    dataset = dataset_ops.Dataset.range(10).map(lambda x: [x, "hello", 10])
+    self.assertDatasetProduces(dataset, [(i, b"hello", 10) for i in range(10)])
 
   def testWarnOnLookupTable(self):
     def collecting_function(x):
@@ -899,7 +795,7 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
         dataset_ops.Dataset.from_tensor_slices).map(
             lambda ds: ds.batch(3)).flat_map(lambda x: x)
 
-    self.assertDatasetProduces(dataset, [[1.0, 2.0, 3.0]])
+    self.assertDatasetProduces(dataset, expected_output=[[1.0, 2.0, 3.0]])
 
   def testReturnValueError(self):
     dataset = dataset_ops.Dataset.from_tensors([1.0, 2.0, 3.0])
@@ -932,11 +828,8 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       return const_tensor
 
     dataset = dataset.map(broken_function)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-
-    with self.cached_session() as sess:
-      with self.assertRaisesRegexp(errors.InvalidArgumentError, "BrokenConst"):
-        sess.run(iterator.initializer)
+    self.assertDatasetProduces(
+        dataset, expected_error=(errors.InvalidArgumentError, "BrokenConst"))
 
 # pylint: disable=g-long-lambda
   @parameterized.named_parameters(
@@ -959,12 +852,10 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       return tids
 
     dataset = make_dataset_fn(dataset, _map_fn)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      tids = sess.run(get_next)
-      self.assertTrue(all(tids[0] == tid for tid in tids))
+    tids = self.evaluate(get_next())
+    self.assertTrue(all(tids[0] == tid for tid in tids))
 # pylint: enable=g-long-lambda
 
   @parameterized.named_parameters(
@@ -980,30 +871,28 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testShortCircuit(self, structure, map_fn, num_parallel_calls):
     dataset = self.structuredDataset(structure).repeat().map(
         map_fn, num_parallel_calls=num_parallel_calls)
-    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      if isinstance(structure, tuple):
-        expected = map_fn(*sess.run(self.structuredElement(structure)))
-      else:
-        expected = map_fn(sess.run(self.structuredElement(structure)))
-      self.assertEqual(expected, sess.run(get_next))
+    if isinstance(structure, tuple):
+      expected = map_fn(*self.evaluate(self.structuredElement(structure)))
+    else:
+      expected = map_fn(self.evaluate(self.structuredElement(structure)))
+    self.assertEqual(expected, self.evaluate(get_next()))
 
   @parameterized.named_parameters(
       ("Sequential", None),
       ("Parallel", 10),
   )
   def testShortCircuitCapturedInput(self, num_parallel_calls):
-    captured_t = array_ops.placeholder(dtypes.int64, shape=[])
+    captured_t = variables.Variable(42)
     dataset = self.structuredDataset(None).repeat().map(
         lambda x: captured_t, num_parallel_calls=num_parallel_calls)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    get_next = iterator.get_next()
+    self.evaluate(variables.global_variables_initializer())
+    get_next = self.getNext(dataset, requires_initialization=True)
 
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer, feed_dict={captured_t: 42})
-      self.assertEqual(42, sess.run(get_next))
+    self.assertEqual(42, self.evaluate(get_next()))
 
+  # TODO(b/117581999): Add eager coverage.
   @parameterized.named_parameters(
       ("1", 1, 1),
       ("2", 10, 1),
@@ -1012,7 +901,9 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("5", 100, 10),
       ("6", 100, 100),
   )
-  def testSloppyInterleaveInOrder(self, num_elements, num_parallel_calls):
+  @test_util.run_v1_only("b/120545219")
+  def testSkipEagerSloppyInterleaveInOrder(self, num_elements,
+                                           num_parallel_calls):
     get_next, coordination_events = _make_coordinated_sloppy_dataset(
         num_elements, num_parallel_calls)
     config = config_pb2.ConfigProto(
@@ -1025,12 +916,15 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  # TODO(b/117581999): Add eager coverage.
   @parameterized.named_parameters(
       ("1", 10, 10),
       ("2", 100, 10),
       ("3", 100, 100),
   )
-  def testSloppyInterleaveOutOfOrder(self, num_elements, num_parallel_calls):
+  @test_util.run_v1_only("b/120545219")
+  def testSkipEagerSloppyInterleaveOutOfOrder(self, num_elements,
+                                              num_parallel_calls):
     get_next, coordination_events = _make_coordinated_sloppy_dataset(
         num_elements, num_parallel_calls)
     config = config_pb2.ConfigProto(
@@ -1064,6 +958,5 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.evaluate(get_next())
 
 
-
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 542ec6d4282b9b43f8a1468b466e672bc8f7e32c Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Mon, 10 Dec 2018 13:13:12 -0800
Subject: [PATCH 306/873] [TF optimizers (v1)] Non-slot variables are
 ResourceVariables iff the input vars are.

This fixes a bug where Adam beta*_power variables were always created as RefVars
even if the optimizer acts on ResourceVars.  This broke certain defun + Adam
use cases.

Also fixed the unit tests, which *always* created ResourceVariables
(ever since variables.Variable() constructor became aliased to ResourceVariables).

PiperOrigin-RevId: 224869338
---
 tensorflow/python/training/adam_test.py | 19 +++++++++++++++----
 tensorflow/python/training/optimizer.py |  5 ++++-
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py
index b0bae27577..15958112bd 100644
--- a/tensorflow/python/training/adam_test.py
+++ b/tensorflow/python/training/adam_test.py
@@ -68,8 +68,8 @@ class AdamOptimizerTest(test.TestCase):
           var0 = resource_variable_ops.ResourceVariable(var0_np)
           var1 = resource_variable_ops.ResourceVariable(var1_np)
         else:
-          var0 = variables.Variable(var0_np)
-          var1 = variables.Variable(var1_np)
+          var0 = variables.RefVariable(var0_np)
+          var1 = variables.RefVariable(var1_np)
         grads0_np_indices = np.array([0, 1], dtype=np.int32)
         grads0 = ops.IndexedSlices(
             constant_op.constant(grads0_np),
@@ -156,6 +156,9 @@ class AdamOptimizerTest(test.TestCase):
                               self.evaluate(repeated_index_update_var))
 
   def doTestBasic(self, use_resource=False, use_callable_params=False):
+    if context.executing_eagerly() and not use_resource:
+      self.skipTest(
+          "Skipping test with use_resource=False and executing eagerly.")
     for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
       with self.session(graph=ops.Graph()):
         # Initialize variables for numpy implementation.
@@ -171,8 +174,8 @@ class AdamOptimizerTest(test.TestCase):
           var1 = resource_variable_ops.ResourceVariable(
               var1_np, name="var1_%d" % i)
         else:
-          var0 = variables.Variable(var0_np)
-          var1 = variables.Variable(var1_np)
+          var0 = variables.RefVariable(var0_np)
+          var1 = variables.RefVariable(var1_np)
         grads0 = constant_op.constant(grads0_np)
         grads1 = constant_op.constant(grads1_np)
 
@@ -194,6 +197,14 @@ class AdamOptimizerTest(test.TestCase):
         self.assertTrue(beta2_power is not None)
         self.assertIn(beta1_power, opt_variables)
         self.assertIn(beta2_power, opt_variables)
+        # Ensure that non-slot variables are the same type as the requested
+        # variables.
+        self.assertEqual(
+            use_resource,
+            resource_variable_ops.is_resource_variable(beta1_power))
+        self.assertEqual(
+            use_resource,
+            resource_variable_ops.is_resource_variable(beta2_power))
 
         if not context.executing_eagerly():
           with ops.Graph().as_default():
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index d9ebdcad1f..eaa563e84a 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -822,7 +822,10 @@ class Optimizer(
               name=name, shape=None)
           if restored_initial_value is not None:
             initial_value = restored_initial_value
-        v = variable_scope.variable(initial_value, name=name, trainable=False)
+        v = variable_scope.variable(
+            initial_value, name=name, trainable=False,
+            use_resource=resource_variable_ops.is_resource_variable(
+                colocate_with))
       # Restore this variable by name if necessary, but don't add a
       # Checkpointable dependency. Optimizers return the current graph's
       # non-slot variables from _checkpoint_dependencies explicitly rather
-- 
GitLab


From d9ab4a8fedd67b5b4944a4033acfdee5f5001492 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 10 Dec 2018 13:20:49 -0800
Subject: [PATCH 307/873] Internal change.

PiperOrigin-RevId: 224870669
---
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 2 +-
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 62e1eaa366..4c4e8ba1ca 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -133,7 +133,7 @@ fi
 # Create a python test directory to avoid package name conflict
 create_python_test_dir "${PY_TEST_DIR}"
 
-./bazel-bin/tensorflow/tools/pip_package/build_pip_package "$PWD/${PY_TEST_DIR}" "${EXTRA_PIP_FLAGS}"
+./bazel-bin/tensorflow/tools/pip_package/build_pip_package "$PWD/${PY_TEST_DIR}" ${EXTRA_PIP_FLAGS}
 
 if [[ "$TF_NIGHTLY" == 1 ]]; then
   exit 0
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index acafd9ebce..070235fcb2 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -136,7 +136,7 @@ fi
 create_python_test_dir "${PY_TEST_DIR}"
 
 ./bazel-bin/tensorflow/tools/pip_package/build_pip_package "$PWD/${PY_TEST_DIR}" \
-  --gpu "${EXTRA_PIP_FLAGS}"
+  --gpu ${EXTRA_PIP_FLAGS}
 
 if [[ "$TF_NIGHTLY" == 1 ]]; then
   exit 0
-- 
GitLab


From 2d86af34dede8ff45fbec5373e991f1259f5f447 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 13:43:23 -0800
Subject: [PATCH 308/873] Map backprop filter convolutions to depthwise
 convolutions in cases where the filter is depthwise.

PiperOrigin-RevId: 224874845
---
 .../compiler/tests/depthwise_conv_op_test.py  |   4 +-
 .../tf2xla/kernels/conv_op_helpers.cc         | 149 ++++++++++++++----
 2 files changed, 118 insertions(+), 35 deletions(-)

diff --git a/tensorflow/compiler/tests/depthwise_conv_op_test.py b/tensorflow/compiler/tests/depthwise_conv_op_test.py
index 174bfa9efb..6183d3ed5b 100644
--- a/tensorflow/compiler/tests/depthwise_conv_op_test.py
+++ b/tensorflow/compiler/tests/depthwise_conv_op_test.py
@@ -379,8 +379,8 @@ class DepthwiseConv2DTest(xla_test.XLATestCase):
     for index, (input_size, filter_size, output_size, stride,
                 padding) in enumerate(ConfigsToTest()):
       print("Testing DepthwiseConv2DFilterGradCompare,", index, "th config:",
-            input_size, "*", filter_size, "stride:", stride, "padding:",
-            padding)
+            input_size, "*", filter_size, "producing output", output_size,
+            "stride:", stride, "padding:", padding)
       self._CompareBackpropFilter(input_size, filter_size, output_size,
                                   stride, padding)
 
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
index 641fefafb3..399e6e1187 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -392,23 +392,31 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
                       builder->GetShape(activations));
   TF_ASSIGN_OR_RETURN(xla::Shape out_backprop_shape,
                       builder->GetShape(gradients));
+  xla::XlaOp filter_backprop;
+
+  xla::Shape input_shape = activations_shape;
+  xla::Shape output_shape = out_backprop_shape;
+
+  TensorShape input_tensor_shape, filter_tensor_shape, output_tensor_shape;
+  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(filter_shape, &filter_tensor_shape));
+  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(input_shape, &input_tensor_shape));
+  TF_RETURN_IF_ERROR(XLAShapeToTensorShape(output_shape, &output_tensor_shape));
+
   const xla::Shape expanded_filter_shape =
       attrs.depthwise ? ExpandedFilterShapeForDepthwiseConvolution(filter_shape)
                       : filter_shape;
-
   // Reuse dimension computation logic from conv_grad_ops.cc.
   ConvBackpropDimensions dims;
-  TF_RETURN_IF_ERROR(ConvBackpropComputeDimensionsV2XlaShapes(
-      type_string, attrs.num_spatial_dims, activations_shape,
-      expanded_filter_shape, out_backprop_shape, attrs.dilations, attrs.strides,
-      attrs.padding, attrs.data_format, &dims));
-
   // The filter gradients are computed by a convolution of the input
   // activations and the output gradients, with some appropriate padding.
   // See the comment at the top of conv_grad_ops.h for details.
-
   xla::ConvolutionDimensionNumbers dnums;
 
+  TF_RETURN_IF_ERROR(ConvBackpropComputeDimensionsV2XlaShapes(
+      type_string, attrs.num_spatial_dims, activations_shape,
+      expanded_filter_shape, out_backprop_shape, attrs.dilations, attrs.strides,
+      attrs.padding, attrs.data_format, &dims));
+
   // The activations (inputs) form the LHS of the convolution.
   // Activations have shape: [batch, in_rows, in_cols, ..., in_depth]
   // For the gradient computation, we flip the roles of the batch and
@@ -420,29 +428,97 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
   int n_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format);
   int c_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format);
 
-  // Swap n_dim and c_dim in the activations.
-  dnums.set_input_batch_dimension(c_dim);
-  dnums.set_input_feature_dimension(n_dim);
+  int64 total_spatial_size = 1;
+  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+    total_spatial_size *= dims.input_size(i);
+  }
 
-  // The gradients become the RHS of the convolution.
-  // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
-  // where the batch becomes the input feature for the convolution.
-  dnums.set_kernel_input_feature_dimension(n_dim);
-  dnums.set_kernel_output_feature_dimension(c_dim);
+  // We use this approach only for depthwise convolutions where feature counts
+  // are large but space dimensions are small.
+  bool should_perform_depthwise_conv =
+      (total_spatial_size < dims.in_depth) &&
+      filter_tensor_shape.dim_size(num_dims - 1) == 1 && attrs.depthwise;
+
+  int64 num_spatial_dims =
+      attrs.num_spatial_dims + (should_perform_depthwise_conv ? 1 : 0);
+
+  std::vector<std::pair<int64, int64>> padding(num_spatial_dims);
+  std::vector<int64> rhs_dilation(num_spatial_dims);
+  std::vector<int64> window_strides(num_spatial_dims);
+  std::vector<int64> ones(num_spatial_dims, 1);
+
+  if (should_perform_depthwise_conv) {
+    // This approach is similar to handling of grouped convolutions in
+    // the convolution_feature_group_converter.cc. Please refer to it for
+    // details.
+
+    // Add spatial dimension to the activation, and reshape.
+    std::vector<int64> activations_reshape_sizes, gradients_reshape_sizes;
+
+    activations_reshape_sizes.push_back(dims.batch_size);
+    gradients_reshape_sizes.push_back(dims.batch_size);
+    for (int i = 0; i < attrs.num_spatial_dims; i++) {
+      activations_reshape_sizes.push_back(dims.input_size(i));
+      gradients_reshape_sizes.push_back(dims.output_size(i));
+    }
+    activations_reshape_sizes.push_back(dims.in_depth);
+    activations_reshape_sizes.push_back(1);
+    gradients_reshape_sizes.push_back(dims.out_depth);
+    gradients_reshape_sizes.push_back(1);
+
+    activations = xla::Reshape(activations, activations_reshape_sizes);
+    gradients = xla::Reshape(gradients, gradients_reshape_sizes);
+
+    int64 new_spatial_dim = activations_reshape_sizes.size() - 1;
+
+    // Set the newly added dimension to be the batch.
+    dnums.set_input_batch_dimension(new_spatial_dim);
+    dnums.set_input_feature_dimension(c_dim);
+
+    // The gradients become the RHS of the convolution.
+    // The gradients have shape [batch, out_rows, out_cols, ..., out_depth, 1]
+    // where the batch becomes a spatial dimension, and 1 becomes
+    // the input feature for the convolution.
+    dnums.set_kernel_input_feature_dimension(new_spatial_dim);
+    dnums.set_kernel_output_feature_dimension(c_dim);
+
+    // Treat original batch dimension as a spatial dimension.
+    dnums.add_input_spatial_dimensions(n_dim);
+    dnums.add_kernel_spatial_dimensions(n_dim);
+  } else {
+    // The activations (inputs) form the LHS of the convolution.
+    // Activations have shape: [batch, in_rows, in_cols, ..., in_depth]
+    // For the gradient computation, we flip the roles of the batch and
+    // feature dimensions.
+    // Each spatial entry has size in_depth * batch
+
+    // Swap n_dim and c_dim in the activations.
+    dnums.set_input_batch_dimension(c_dim);
+    dnums.set_input_feature_dimension(n_dim);
+
+    // The gradients become the RHS of the convolution.
+    // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
+    // where the batch becomes the input feature for the convolution.
+    dnums.set_kernel_input_feature_dimension(n_dim);
+    dnums.set_kernel_output_feature_dimension(c_dim);
+  }
 
-  std::vector<std::pair<int64, int64>> padding(attrs.num_spatial_dims);
-  std::vector<int64> rhs_dilation(attrs.num_spatial_dims);
-  std::vector<int64> window_strides(attrs.num_spatial_dims);
-  std::vector<int64> ones(attrs.num_spatial_dims, 1);
+  dnums.set_output_batch_dimension(num_spatial_dims);
+  dnums.set_output_feature_dimension(num_spatial_dims + 1);
 
   // Tensorflow filter shape is [ H, W, ..., inC, outC ].
-  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+  for (int i = 0; i < num_spatial_dims; ++i) {
     dnums.add_output_spatial_dimensions(i);
   }
-  dnums.set_output_batch_dimension(attrs.num_spatial_dims);
-  dnums.set_output_feature_dimension(attrs.num_spatial_dims + 1);
 
-  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
+  if (should_perform_depthwise_conv) {
+    // Set the right parameters for the newly created spatial dimension.
+    padding[0] = {0, 0};
+    rhs_dilation[0] = 1;
+    window_strides[0] = 1;
+  }
+
+  for (int64 i = 0; i < attrs.num_spatial_dims; ++i) {
     int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i);
     dnums.add_input_spatial_dimensions(dim);
     dnums.add_kernel_spatial_dimensions(dim);
@@ -483,9 +559,10 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
     const int64 pad_before =
         attrs.padding == Padding::SAME ? std::max<int64>(pad_total / 2, 0) : 0;
 
-    padding[i] = {pad_before, pad_total - pad_before};
-    rhs_dilation[i] = dims.spatial_dims[i].stride;
-    window_strides[i] = attrs.dilations[dim];
+    int64 dim_being_operated = should_perform_depthwise_conv ? i + 1 : i;
+    padding[dim_being_operated] = {pad_before, pad_total - pad_before};
+    rhs_dilation[dim_being_operated] = dims.spatial_dims[i].stride;
+    window_strides[dim_being_operated] = attrs.dilations[dim];
   }
 
   // Besides padding the input, we will also expand output_rows to
@@ -496,13 +573,19 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
   //
   // This is done by specifying the window dilation factors in the
   // convolution HLO below.
-  auto filter_backprop =
-      xla::ConvGeneralDilated(activations, gradients, window_strides, padding,
-                              /*lhs_dilation=*/ones, rhs_dilation, dnums);
-
-  if (attrs.depthwise) {
-    filter_backprop = ContractFilterForDepthwiseBackprop(
-        filter_shape, filter_backprop, activations.builder());
+  filter_backprop = xla::ConvGeneralDilated(
+      activations, gradients, window_strides, padding,
+      /*lhs_dilation=*/ones, rhs_dilation, dnums,
+      /*feature_group_count=*/
+      should_perform_depthwise_conv ? dims.in_depth : 1);
+
+  if (should_perform_depthwise_conv) {
+    filter_backprop = xla::Reshape(filter_backprop, filter_shape.dimensions());
+  } else {
+    if (attrs.depthwise) {
+      filter_backprop = ContractFilterForDepthwiseBackprop(
+          filter_shape, filter_backprop, activations.builder());
+    }
   }
 
   return filter_backprop;
-- 
GitLab


From fddc04ba0780c7fb9211e6b4b9e7b0de3f7de957 Mon Sep 17 00:00:00 2001
From: Daniel Ingram <ingramds@appstate.edu>
Date: Mon, 10 Dec 2018 16:50:55 -0500
Subject: [PATCH 309/873] Add raise statement before NotImplementedError

---
 tensorflow/python/keras/engine/training_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index ec6b39704a..39b5b85ece 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -68,7 +68,7 @@ class Aggregator(object):
     Arguments:
       batch_outs: A list of batch-level outputs.
     """
-    NotImplementedError('Must be implemented in subclasses.')
+    raise NotImplementedError('Must be implemented in subclasses.')
 
   @abc.abstractmethod
   def aggregate(self, batch_outs, batch_start=None, batch_end=None):
@@ -81,12 +81,12 @@ class Aggregator(object):
       batch_end: The end index of this batch. Always `None` if `use_steps` is
         `True`.
     """
-    NotImplementedError('Must be implemented in subclasses.')
+    raise NotImplementedError('Must be implemented in subclasses.')
 
   @abc.abstractmethod
   def finalize(self):
     """Prepares the total results to be returned."""
-    NotImplementedError('Must be implemented in subclasses.')
+    raise NotImplementedError('Must be implemented in subclasses.')
 
 
 class MetricsAggregator(Aggregator):
-- 
GitLab


From a404d2edf57ac71034e93665454d238045786ae9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 13:49:07 -0800
Subject: [PATCH 310/873] Internal Change

PiperOrigin-RevId: 224875931
---
 tensorflow/python/framework/test_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 0e48d3c875..d06e1f574b 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1866,7 +1866,7 @@ class TensorFlowTestCase(googletest.TestCase):
     # If a is a tensor then convert it to ndarray
     if isinstance(a, ops.Tensor):
       if isinstance(a, ops._EagerTensorBase):
-        return a.numpy()
+        a = a.numpy()
       else:
         a = self.evaluate(a)
     if not isinstance(a, np.ndarray):
-- 
GitLab


From d19f1e45fcb7418fe07333fc99d102214129be3e Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 10 Dec 2018 13:49:51 -0800
Subject: [PATCH 311/873] Allow completely stateless(i.e., with no outputs)
 loops. Simplify the handling of stateless conditionals. This change will
 still not support stateless loops pre-v2 until we add auto deps. However, it
 works properly in tf.function.

PiperOrigin-RevId: 224876064
---
 .../autograph/converters/control_flow.py      | 143 +++++++++++-------
 .../autograph/operators/control_flow.py       |  19 ++-
 tensorflow/python/autograph/pyct/templates.py |   3 +
 3 files changed, 107 insertions(+), 58 deletions(-)

diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py
index bef6cae1bb..a39a0b0cdb 100644
--- a/tensorflow/python/autograph/converters/control_flow.py
+++ b/tensorflow/python/autograph/converters/control_flow.py
@@ -49,7 +49,13 @@ class ControlFlowTransformer(converter.Base):
 
   def _create_cond_branch(self, body_name, aliased_orig_names,
                           aliased_new_names, body, returns):
-    if len(returns) == 1:
+    if not returns:
+      # TODO(b/110167197): Replace with a plain return.
+      template = """
+        return 1
+      """
+      return_stmt = templates.replace(template)
+    elif len(returns) == 1:
       template = """
         return retval
       """
@@ -220,7 +226,7 @@ class ControlFlowTransformer(converter.Base):
       # branch functions will return a dummy value that ensures cond
       # actually has some return value as well.
       cond_results = None
-      # TODO(mdan): This doesn't belong here; it's specific to the operator.
+      # TODO(mdan): Replace with None once side_effect_guards is retired.
       returned_from_body = (templates.replace_as_expression(
           'ag__.match_staging_level(1, cond_var_name)',
           cond_var_name=cond_var_name),)
@@ -278,14 +284,6 @@ class ControlFlowTransformer(converter.Base):
           ' these symbols before the loop'.format(
               self._fmt_symbols(live_defs_in_loop)))
 
-    if not loop_state:
-      # TODO(mdan): Implement this properly.
-      # We need to check whether any variable created inside the body scope
-      # is used before being modified outside the scope. This should be done
-      # during activity analysis, and in general should cover the case where
-      # variables may not be initialized.
-      raise ValueError('cannot convert loop: no outputs')
-
     return loop_state, reserved_symbols
 
   def _state_constructs(self, loop_state, reserved_symbols):
@@ -337,26 +335,44 @@ class ControlFlowTransformer(converter.Base):
     node_body = ast_util.rename_symbols(node.body, ssf_map)
     test = ast_util.rename_symbols(node.test, ssf_map)
 
-    template = """
-      def test_name(state_ssf):
-        return test
-      def body_name(state_ssf):
-        body
-        return state_ssf,
-      state_ast_tuple = ag__.while_stmt(
-          test_name, body_name, (state,), (extra_deps,))
-    """
-    node = templates.replace(
-        template,
-        state=loop_state,
-        state_ssf=state_ssf,
-        state_ast_tuple=state_ast_tuple,
-        test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols),
-        test=test,
-        body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
-        body=node_body,
-        extra_deps=tuple(s.ast() for s in cond_closure),
-    )
+    if loop_state:
+      template = """
+        def test_name(state_ssf):
+          return test
+        def body_name(state_ssf):
+          body
+          return state_ssf,
+        state_ast_tuple = ag__.while_stmt(
+            test_name, body_name, (state,), (extra_deps,))
+      """
+      node = templates.replace(
+          template,
+          state=loop_state,
+          state_ssf=state_ssf,
+          state_ast_tuple=state_ast_tuple,
+          test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols),
+          test=test,
+          body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
+          body=node_body,
+          extra_deps=tuple(s.ast() for s in cond_closure),
+      )
+    else:
+      template = """
+        def test_name():
+          return test
+        def body_name():
+          body
+          return ()
+        ag__.while_stmt(test_name, body_name, (), (extra_deps,))
+      """
+      node = templates.replace(
+          template,
+          test_name=self.ctx.namer.new_symbol('loop_test', reserved_symbols),
+          test=test,
+          body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
+          body=node_body,
+          extra_deps=tuple(s.ast() for s in cond_closure),
+      )
 
     return node
 
@@ -373,29 +389,50 @@ class ControlFlowTransformer(converter.Base):
     else:
       extra_test = parser.parse_expression('True')
 
-    template = """
-      def extra_test_name(state_ssf):
-        return extra_test_expr
-      def body_name(loop_vars, state_ssf):
-        # Workaround for PEP-3113
-        iterate = loop_vars
-        body
-        return state_ssf,
-      state_ast_tuple = ag__.for_stmt(
-          iter_, extra_test_name, body_name, (state,))
-    """
-    node = templates.replace(
-        template,
-        state=loop_state,
-        state_ssf=state_ssf,
-        state_ast_tuple=state_ast_tuple,
-        iter_=node.iter,
-        iterate=node.target,
-        extra_test_name=self.ctx.namer.new_symbol('extra_test',
-                                                  reserved_symbols),
-        extra_test_expr=extra_test,
-        body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
-        body=node_body)
+    if loop_state:
+      template = """
+        def extra_test_name(state_ssf):
+          return extra_test_expr
+        def body_name(loop_vars, state_ssf):
+          # Workaround for PEP-3113
+          iterate = loop_vars
+          body
+          return state_ssf,
+        state_ast_tuple = ag__.for_stmt(
+            iter_, extra_test_name, body_name, (state,))
+      """
+      node = templates.replace(
+          template,
+          state=loop_state,
+          state_ssf=state_ssf,
+          state_ast_tuple=state_ast_tuple,
+          iter_=node.iter,
+          iterate=node.target,
+          extra_test_name=self.ctx.namer.new_symbol('extra_test',
+                                                    reserved_symbols),
+          extra_test_expr=extra_test,
+          body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
+          body=node_body)
+    else:
+      template = """
+        def extra_test_name():
+          return extra_test_expr
+        def body_name(loop_vars):
+          # Workaround for PEP-3113
+          iterate = loop_vars
+          body
+          return ()
+        ag__.for_stmt(iter_, extra_test_name, body_name, ())
+      """
+      node = templates.replace(
+          template,
+          iter_=node.iter,
+          iterate=node.target,
+          extra_test_name=self.ctx.namer.new_symbol('extra_test',
+                                                    reserved_symbols),
+          extra_test_expr=extra_test,
+          body_name=self.ctx.namer.new_symbol('loop_body', reserved_symbols),
+          body=node_body)
 
     return node
 
diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py
index 89f7b8522f..afa3787d42 100644
--- a/tensorflow/python/autograph/operators/control_flow.py
+++ b/tensorflow/python/autograph/operators/control_flow.py
@@ -87,7 +87,10 @@ def _known_len_for_stmt(iter_, extra_test, body, init_state):
   def while_body(iterate_index, *state):
     iterate = iter_[iterate_index]
     new_state = body(iterate, *state)
-    return (iterate_index + 1,) + new_state
+    if new_state:
+      return (iterate_index + 1,) + new_state
+    else:
+      return iterate_index + 1
 
   def while_cond(iterate_index, *state):
     return gen_math_ops.logical_and(iterate_index < n, extra_test(*state))
@@ -98,13 +101,19 @@ def _known_len_for_stmt(iter_, extra_test, body, init_state):
       init_state=(0,) + init_state,
       extra_deps=(iter_,),
       opts=dict(maximum_iterations=n))
+
   # Dropping the iteration index because it's not syntactically visible.
   # TODO(mdan): Don't.
-  results = results[1:]
+  if isinstance(results, (tuple, list)):
+    assert len(results) >= 1  # Has at least the iterate.
+    if len(results) > 1:
+      results = results[1:]
+    if len(results) == 1:
+      # TODO(mdan): Remove this special case.
+      results, = results
+  else:
+    results = ()
 
-  # TODO(mdan): Remove this special case.
-  if len(results) == 1:
-    return results[0]
   return results
 
 
diff --git a/tensorflow/python/autograph/pyct/templates.py b/tensorflow/python/autograph/pyct/templates.py
index 2272ea4208..43279b3ca0 100644
--- a/tensorflow/python/autograph/pyct/templates.py
+++ b/tensorflow/python/autograph/pyct/templates.py
@@ -184,6 +184,9 @@ class ReplaceTransformer(gast.NodeTransformer):
 
     new_nodes = self._prepare_replacement(node, node.id)
 
+    if not new_nodes:
+      return new_nodes
+
     # Preserve the target context.
     adjuster = ContextAdjuster(type(node.ctx))
     for n in new_nodes:
-- 
GitLab


From 341452772c51cf66fc8785081437cddc38ce1081 Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Mon, 10 Dec 2018 13:57:40 -0800
Subject: [PATCH 312/873] Use format_master_url for Kubernetes and Slurm
 Cluster Resolvers

PiperOrigin-RevId: 224877586
---
 .../cluster_resolver/kubernetes_cluster_resolver.py    |  8 +++-----
 .../cluster_resolver/slurm_cluster_resolver.py         | 10 +++++++---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
index 88625a5542..7ff6ec0f2d 100644
--- a/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/kubernetes_cluster_resolver.py
@@ -107,16 +107,14 @@ class KubernetesClusterResolver(ClusterResolver):
     Returns:
       The name or URL of the session master.
     """
+    task_type = task_type if task_type is not None else self.task_type
+    task_index = task_index if task_index is not None else self.task_index
+
     if task_type is not None and task_index is not None:
       return format_master_url(
           self.cluster_spec().task_address(task_type, task_index),
           rpc_layer or self.rpc_layer)
 
-    if self.task_type is not None and self.task_index is not None:
-      return format_master_url(
-          self.cluster_spec().task_address(self.task_type, self.task_index),
-          rpc_layer or self.rpc_layer)
-
     return ''
 
   def cluster_spec(self):
diff --git a/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py
index 1ab81731b7..9dbe25b613 100644
--- a/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/slurm_cluster_resolver.py
@@ -23,6 +23,7 @@ import os
 import subprocess
 
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
+from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
 from tensorflow.python.training.server_lib import ClusterSpec
 
 
@@ -206,10 +207,13 @@ class SlurmClusterResolver(ClusterResolver):
     """
     task_type = task_type if task_type is not None else self.task_type
     task_index = task_index if task_index is not None else self.task_index
-    rpc_layer = rpc_layer or self.rpc_layer
-    master = self.cluster_spec().task_address(task_type, task_index)
 
-    return '%s://%s' % (rpc_layer, master) if rpc_layer else master
+    if task_type is not None and task_index is not None:
+      return format_master_url(
+          self.cluster_spec().task_address(task_type, task_index),
+          rpc_layer or self.rpc_layer)
+
+    return ''
 
   @property
   def environment(self):
-- 
GitLab


From c25282f9e610479586c36b8435c984ceb2530d87 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 14:43:34 -0800
Subject: [PATCH 313/873] Adds support for arbitrarily nested `inputs` and
 `outputs` in `keras.backend.function`.

PiperOrigin-RevId: 224886577
---
 tensorflow/python/keras/backend.py      | 32 +++++++++---------------
 tensorflow/python/keras/backend_test.py | 33 +++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 420c457a0c..381e0ae3e3 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -2926,17 +2926,12 @@ class GraphExecutionFunction(object):
   def __init__(self, inputs, outputs, updates=None, name=None,
                **session_kwargs):
     updates = updates or []
-    if not isinstance(inputs, (list, tuple)):
-      raise TypeError('`inputs` to a Keras backend function '
-                      'should be a list or tuple.')
-    if not isinstance(outputs, (list, tuple)):
-      raise TypeError('`outputs` of a Keras backend function '
-                      'should be a list or tuple.')
     if not isinstance(updates, (list, tuple)):
       raise TypeError('`updates` in a Keras backend function '
                       'should be a list or tuple.')
-    self.inputs = list(inputs)
-    self.outputs = list(outputs)
+    self.inputs = nest.flatten(inputs)
+    self._outputs_structure = outputs
+    self.outputs = nest.flatten(outputs)
     with ops.control_dependencies(self.outputs):
       updates_ops = []
       for update in updates:
@@ -3033,8 +3028,7 @@ class GraphExecutionFunction(object):
         self.fetch_callbacks[fetch](output)
 
   def __call__(self, inputs):
-    if not isinstance(inputs, (list, tuple)):
-      raise TypeError('`inputs` should be a list or tuple.')
+    inputs = nest.flatten(inputs)
 
     session = get_session()
     feed_arrays = []
@@ -3077,7 +3071,8 @@ class GraphExecutionFunction(object):
     fetched = self._callable_fn(*array_vals,
                                 run_metadata=self.run_metadata)
     self._call_fetch_callbacks(fetched[-len(self._fetches):])
-    return fetched[:len(self.outputs)]
+    return nest.pack_sequence_as(self._outputs_structure,
+                                 fetched[:len(self.outputs)])
 
 
 class EagerExecutionFunction(object):
@@ -3093,17 +3088,12 @@ class EagerExecutionFunction(object):
 
   def __init__(self, inputs, outputs, updates=None, name=None):
     updates = updates or []
-    if not isinstance(inputs, (list, tuple)):
-      raise TypeError('`inputs` to a Keras backend function '
-                      'should be a list or tuple.')
-    if not isinstance(outputs, (list, tuple)):
-      raise TypeError('`outputs` of a Keras backend function '
-                      'should be a list or tuple.')
     if not isinstance(updates, (list, tuple)):
       raise TypeError('`updates` in a Keras backend function '
                       'should be a list or tuple.')
-    self.inputs = list(inputs)
-    self.outputs = list(outputs)
+    self.inputs = nest.flatten(inputs)
+    self._outputs_structure = outputs
+    self.outputs = nest.flatten(outputs)
     self.name = name
 
     graph = get_graph()
@@ -3153,6 +3143,7 @@ class EagerExecutionFunction(object):
               x.op.inputs[0])
 
   def __call__(self, inputs):
+    inputs = nest.flatten(inputs)
     converted_inputs = []
     for tensor, value in zip(self.inputs, inputs):
       if value is None:
@@ -3169,7 +3160,8 @@ class EagerExecutionFunction(object):
         value = math_ops.cast(value, tensor.dtype)
       converted_inputs.append(value)
     outputs = self._graph_fn(*converted_inputs)
-    return [x.numpy() for x in outputs]
+    return nest.pack_sequence_as(self._outputs_structure,
+                                 [x.numpy() for x in outputs])
 
 
 @tf_export('keras.backend.function')
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index af01b46fa9..4b83f0bf66 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -1695,6 +1695,39 @@ class BackendGraphTests(test.TestCase):
       self.assertEqual(callback.times_called, 1)
       self.assertEqual(callback.callback_result, 200)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_function_dict_outputs(self):
+    x_ph = keras.backend.placeholder(shape=(), name='x')
+    y_ph = keras.backend.placeholder(shape=(), name='y')
+    outputs = {'x*y': y_ph * x_ph, 'x*x': x_ph * x_ph}
+
+    f = keras.backend.function(inputs=[x_ph, y_ph], outputs=outputs)
+    x, y = 2., 5.
+    results = f([x, y])
+
+    self.assertEqual(results['x*y'], 10.)
+    self.assertEqual(results['x*x'], 4)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_function_dict_inputs(self):
+    placeholders = {
+        'x': keras.backend.placeholder(shape=()),
+        'y': keras.backend.placeholder(shape=())
+    }
+    outputs = [placeholders['x'] * placeholders['y']]
+
+    f = keras.backend.function(inputs=placeholders, outputs=outputs)
+    results = f({'x': 2., 'y': 3.})
+    self.assertEqual(results[0], 6.)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_function_single_input_output(self):
+    x_ph = keras.backend.placeholder(shape=(), name='x')
+    output = x_ph * x_ph
+    f = keras.backend.function(x_ph, output)
+    result = f(2.)
+    self.assertEqual(result, 4.)
+
   def test_placeholder(self):
     x = keras.backend.placeholder(shape=(3, 4))
     self.assertEqual(x.get_shape().as_list(), [3, 4])
-- 
GitLab


From a571aba264f9cc2e8273a4411b193229efce34cb Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 15:07:05 -0800
Subject: [PATCH 314/873] Internal Change

PiperOrigin-RevId: 224891138
---
 .../ops/ragged/ragged_map_flat_values_op_test.py  | 15 ++++++---------
 tensorflow/python/ops/ragged/ragged_util_test.py  |  6 ++----
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
index 8b28cac99d..45e60ff492 100644
--- a/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
@@ -39,8 +39,7 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
                                         kwargs=None):
     kwargs = kwargs or {}
     result = ragged.map_flat_values(op, *args, **kwargs)
-    with self.test_session():
-      self.assertRaggedEqual(result, expected)
+    self.assertRaggedEqual(result, expected)
 
   def testDocStringExamples(self):
     """Test the examples in apply_op_to_ragged_values.__doc__."""
@@ -48,10 +47,9 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
     v1 = ragged.map_flat_values(array_ops.ones_like, rt)
     v2 = ragged.map_flat_values(math_ops.multiply, rt, rt)
     v3 = ragged.map_flat_values(math_ops.add, rt, 5)
-    with self.test_session():
-      self.assertRaggedEqual(v1, [[1, 1, 1], [], [1, 1], [1]])
-      self.assertRaggedEqual(v2, [[1, 4, 9], [], [16, 25], [36]])
-      self.assertRaggedEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
+    self.assertRaggedEqual(v1, [[1, 1, 1], [], [1, 1], [1]])
+    self.assertRaggedEqual(v2, [[1, 4, 9], [], [16, 25], [36]])
+    self.assertRaggedEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
 
   def testOpWithSingleRaggedTensorArg(self):
     tensor = ragged.constant([[1, 2, 3], [], [4, 5]])
@@ -122,9 +120,8 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
     # ragged_rank=0
     x0 = [3, 1, 4, 1, 5, 9, 2, 6, 5]
     y0 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
-    with self.test_session():
-      self.assertRaggedEqual(
-          math_ops.multiply(x0, y0), [3, 2, 12, 4, 25, 54, 14, 48, 45])
+    self.assertRaggedEqual(
+        math_ops.multiply(x0, y0), [3, 2, 12, 4, 25, 54, 14, 48, 45])
 
     # ragged_rank=1
     x1 = ragged.constant([[3, 1, 4], [], [1, 5], [9, 2], [6, 5]])
diff --git a/tensorflow/python/ops/ragged/ragged_util_test.py b/tensorflow/python/ops/ragged/ragged_util_test.py
index 72a4155930..ab5436a91c 100644
--- a/tensorflow/python/ops/ragged/ragged_util_test.py
+++ b/tensorflow/python/ops/ragged/ragged_util_test.py
@@ -92,8 +92,7 @@ class RaggedUtilTest(ragged_test_util.RaggedTensorTestCase,
   ])
   def testRepeat(self, data, repeats, expected, axis=None):
     result = ragged_util.repeat(data, repeats, axis)
-    with self.test_session():
-      self.assertAllEqual(result, expected)
+    self.assertAllEqual(result, expected)
 
   @parameterized.parameters([
       dict(mode=mode, **args)
@@ -158,8 +157,7 @@ class RaggedUtilTest(ragged_test_util.RaggedTensorTestCase,
       repeats = array_ops.placeholder_with_default(repeats, None)
 
     result = ragged_util.repeat(data, repeats, axis)
-    with self.test_session():
-      self.assertAllEqual(result, expected)
+    self.assertAllEqual(result, expected)
 
   @parameterized.parameters([
       dict(
-- 
GitLab


From f1ad9aa9a1a01130577190611f73f23478f6563a Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 10 Dec 2018 15:21:57 -0800
Subject: [PATCH 315/873] Fixes build broken on mac compilers

PiperOrigin-RevId: 224893836
---
 tensorflow/core/kernels/training_op_helpers.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/training_op_helpers.h b/tensorflow/core/kernels/training_op_helpers.h
index e96cd023fc..98e2b3c0f2 100644
--- a/tensorflow/core/kernels/training_op_helpers.h
+++ b/tensorflow/core/kernels/training_op_helpers.h
@@ -113,7 +113,8 @@ mutex* GetTrainingVariableMutex(OpKernelContext* ctx, int input, bool sparse,
   if (ctx->input_dtype(input) == DT_RESOURCE) {
     if (LookupResource(ctx, HandleFromInput(ctx, input), maybe_resource).ok()) {
       if (sparse) {
-        EnsureSparseVariableAccess<Device, T>(ctx, *maybe_resource);
+        EnsureSparseVariableAccess<Device, T>(ctx, *maybe_resource)
+            .IgnoreError();
       }
       return (*maybe_resource)->mu();
     } else {
-- 
GitLab


From 512f0fa92d146b712df9551c7ab507c488abd033 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 10 Dec 2018 15:23:13 -0800
Subject: [PATCH 316/873] Add bfloat16 support to TileOp.

PiperOrigin-RevId: 224894043
---
 tensorflow/core/kernels/tile_ops.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc
index d714876bda..b9b37612ad 100644
--- a/tensorflow/core/kernels/tile_ops.cc
+++ b/tensorflow/core/kernels/tile_ops.cc
@@ -325,6 +325,7 @@ class TileGradientOp : public OpKernel {
     TF_CALL_int16(HANDLE_TYPE_NAME);
     TF_CALL_int64(HANDLE_TYPE_NAME);
     TF_CALL_half(HANDLE_TYPE_NAME);
+    TF_CALL_bfloat16(HANDLE_TYPE_NAME);
     TF_CALL_complex64(HANDLE_TYPE_NAME);
     TF_CALL_complex128(HANDLE_TYPE_NAME);
 
-- 
GitLab


From e943b2a6b2bffc925db1d37217696793da6131b2 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Mon, 10 Dec 2018 15:28:19 -0800
Subject: [PATCH 317/873] Automated rollback of commit
 df74b804064bd16e1fe4aed2940c5f536c993dfc

PiperOrigin-RevId: 224894987
---
 tensorflow/python/eager/function_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 50d1b4b6f7..8d1f8c21d9 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -544,7 +544,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertIsInstance(
         self.v, resource_variable_ops.ResourceVariable)
 
-  def disabled_testRunMetadata(self):
+  def testRunMetadata(self):
 
     @def_function.function
     def f(x):
-- 
GitLab


From 7ad28a7ee82f93ff1dd53b60798e603125ae541a Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Mon, 10 Dec 2018 15:40:06 -0800
Subject: [PATCH 318/873] Add validators for pooling, BN, Conv, Pad, Concat ops

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 107 ++++++++++++------
 1 file changed, 72 insertions(+), 35 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 777a80bbc4..18e8599a01 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1533,6 +1533,24 @@ enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV };
 tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) {
   const auto& inputs = params->inputs;
   const auto& node_def = params->node_def;
+  if (inputs.at(0).is_weights()) {
+    return tensorflow::errors::Unimplemented(
+        node_def.op(), " is only implemented for tensors, not weights, at ",
+        node_def.name());
+  }
+  if (inputs.at(1).is_tensor()) {
+    return tensorflow::errors::Unimplemented(
+        "Kernel for ", node_def.op(), " must be constant weights, at ",
+        node_def.name());
+  }
+  TRT_ShapedWeights weights_rsck = inputs.at(1).weights();
+  VLOG(2) << "weight shape: " << weights_rsck.DebugString();
+  if (weights_rsck.shape_.nbDims != 4) {
+    return tensorflow::errors::Internal(
+        "Conv2D expects kernel of dimension 4, at: " + node_def.name());
+  }
+  if (params->validation_only) return tensorflow::Status::OK();
+
   const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
   TFAttrs attrs(node_def);
 
@@ -1554,12 +1572,6 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) {
   if (num_groups == 0) num_groups = tensor_dim.d[0];  // depthwise convolution
   VLOG(2) << "groups count: " << num_groups;
 
-  TRT_ShapedWeights weights_rsck = inputs.at(1).weights();
-  VLOG(2) << "weight shape: " << weights_rsck.DebugString();
-  if (weights_rsck.shape_.nbDims != 4) {
-    return tensorflow::errors::Internal(
-        "Conv2D expects kernel of dimension 4, at: " + node_def.name());
-  }
   if (params->converter->precision_mode() == FP16MODE) {
     weights_rsck =
         ConvertFP32ToFP16(params->weight_store, inputs.at(1).weights());
@@ -2027,9 +2039,31 @@ tensorflow::Status ConvertConv2DDepthwise(OpConverterParams* params) {
 tensorflow::Status ConvertPool(OpConverterParams* params) {
   const auto& inputs = params->inputs;
   const auto& node_def = params->node_def;
-  const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
+  if (inputs.at(0).is_weights()) {
+    return tensorflow::errors::Unimplemented(
+        node_def.op(), " is only implemented for tensors, not weights, at ",
+        node_def.name());
+  }
+  nvinfer1::PoolingType type;
+  if (node_def.op() == "MaxPool") {
+    type = nvinfer1::PoolingType::kMAX;
+  } else if (node_def.op() == "AvgPool") {
+    type = nvinfer1::PoolingType::kAVERAGE;
+  } else {
+    return tensorflow::errors::Unimplemented("Unsupported pooling type: ",
+                                             node_def.op(), ", at ",
+                                             node_def.name());
+  }
   TFAttrs attrs(node_def);
+  const string padding_type = attrs.get<string>("padding");
+  if ((padding_type != "SAME") && (padding_type != "VALID")) {
+    return tensorflow::errors::Unimplemented("Unsupported padding type: ",
+                                             padding_type, ", at ",
+                                             node_def.name());
+  }
+  if (params->validation_only) return Status::OK();
 
+  const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
   int h_index = 2;
   int w_index = 3;
   const auto data_format = attrs.get<string>("data_format");
@@ -2040,16 +2074,6 @@ tensorflow::Status ConvertPool(OpConverterParams* params) {
         const_cast<nvinfer1::ITensor*>(tensor), {0, 3, 1, 2}, &tensor));
   }
 
-  nvinfer1::PoolingType type;
-  if (node_def.op() == "MaxPool") {
-    type = nvinfer1::PoolingType::kMAX;
-  } else if (node_def.op() == "AvgPool") {
-    type = nvinfer1::PoolingType::kAVERAGE;
-  } else {
-    return tensorflow::errors::Unimplemented("Unsupported pool type: ",
-                                             node_def.op());
-  }
-
   const auto tf_stride = attrs.get<std::vector<int>>("strides");
   const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
 
@@ -2058,7 +2082,6 @@ tensorflow::Status ConvertPool(OpConverterParams* params) {
 
   auto tensor_dim = tensor->getDimensions();
   std::vector<std::pair<int, int>> padding;
-  const string padding_type = attrs.get<string>("padding");
   if (padding_type == "SAME") {
     // This is NCHW tensor with no batch dimension.
     //  1 -> h
@@ -2068,9 +2091,6 @@ tensorflow::Status ConvertPool(OpConverterParams* params) {
         {static_cast<int>(tensor_dim.d[1]), static_cast<int>(tensor_dim.d[2])});
   } else if (padding_type == "VALID") {
     padding = {{0, 0}, {0, 0}};
-  } else {
-    return tensorflow::errors::Unimplemented("Unsupported padding type: ",
-                                             padding_type);
   }
 
   if (padding[0].first != padding[0].second ||
@@ -2837,6 +2857,7 @@ tensorflow::Status ConvertPad(OpConverterParams* params) {
     return tensorflow::errors::Unimplemented(
         "Padding layer does not support padding on dimension 1 and 3 yet");
   }
+  if (params->validation_only) return Status::OK();
 
   bool legit_pad = true;
   nvinfer1::DimsHW pre_padding(0, 0);
@@ -2940,6 +2961,7 @@ tensorflow::Status ConvertConcat(OpConverterParams* params) {
 
     inputs_vec.push_back(tensor_i);
   }
+  if (params->validation_only) return tensorflow::Status::OK();
 
   // nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
   nvinfer1::IConcatenationLayer* layer =
@@ -2961,12 +2983,26 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) {
   auto data_format = attrs.get<string>("data_format");
   if (data_format != "NCHW") {
     return tensorflow::errors::Unimplemented(
-        "only data_format=NCHW is supported, at " + node_def.name());
+        node_def.op(), " only supports data_format=NCHW, at ", node_def.name());
   }
   bool is_training = attrs.get<bool>("is_training");
   if (is_training) {
     return tensorflow::errors::Unimplemented(
-        "only is_training=false is supported, at " + node_def.name());
+        node_def.op(), " only supports is_training=false. If you are using "
+        "Keras, please use keras.backend.set_learning_phase(0). At ",
+        node_def.name());
+  }
+  if (inputs.at(0).is_weights()) {
+    return tensorflow::errors::Unimplemented(
+        node_def.op(), " is only implemented for tensor inputs, not weights, "
+        "at ", node_def.name());
+  }
+  for (int i = 1; i < 5; i++) {
+    if (inputs.at(i).is_tensor()) {
+      return tensorflow::errors::Unimplemented(
+          node_def.op(), " must have constant inputs for scale, offset, mean "
+          "and variance, at ", node_def.name());
+    }
   }
   nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
 
@@ -2981,7 +3017,7 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) {
   for (int i = 1; i < 5; i++) {
     if (inputs.at(i).weights().type_ != parameter_type) {
       return tensorflow::errors::Unimplemented(
-          "Inconsistent parameter type for batchnormis not supported, at: " +
+          "Inconsistent parameter type for batchnorm is not supported, at: " +
           node_def.name());
     }
   }
@@ -3001,6 +3037,8 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) {
           "Inconsistent batchnorm parameter count, at: " + node_def.name());
     }
   }
+  if (params->validation_only) return Status::OK();
+
   //  We could technically have two weights with different shape.
   //  that requires two addScale op, arguably less performant
   TRT_ShapedWeights combined_scale_weights =
@@ -3286,10 +3324,14 @@ static void RegisterValidatableOpConverters(
     std::unordered_map<string, OpConverter>* registration) {
   // TODO(laigd): support all op types.
   (*registration)["BiasAdd"] = ConvertBiasAdd;
+  (*registration)["ConcatV2"] = ConvertConcat;
   (*registration)["Const"] = ConvertConst;
+  (*registration)["Conv2D"] = ConvertConv2D;
+  (*registration)["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
   (*registration)["Transpose"] = ConvertTranspose;
   (*registration)["Reshape"] = ConvertReshape;
   (*registration)["MatMul"] = ConvertMatMul;
+  (*registration)["Pad"] = ConvertPad;
   (*registration)["Relu6"] = ConvertRelu6;
   (*registration)["Square"] = ConvertSquare;
   (*registration)["ExpandDims"] = ConvertExpandDims;
@@ -3307,6 +3349,12 @@ static void RegisterValidatableOpConverters(
   for (auto activation_op_type : {"Relu", "Sigmoid", "Tanh"}) {
     (*registration)[activation_op_type] = ConvertActivation;
   }
+  for (auto pool_op_type : {"AvgPool", "MaxPool"}) {
+    (*registration)[pool_op_type] = ConvertPool;
+  }
+  for (auto normalization_op_type : {"FusedBatchNorm", "FusedBatchNormV2"}) {
+    (*registration)[normalization_op_type] = ConvertFusedBatchNorm;
+  }
 }
 
 void TrtNodeValidator::RegisterOpValidators() {
@@ -3315,21 +3363,10 @@ void TrtNodeValidator::RegisterOpValidators() {
 
 void Converter::RegisterOpConverters() {
   RegisterValidatableOpConverters(&op_registry_);
-
-  op_registry_["Conv2D"] = ConvertConv2D;
-  op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
-  op_registry_["MaxPool"] = ConvertPool;
-  op_registry_["AvgPool"] = ConvertPool;
   // TODO(ben,jie): this is a temp hack.
   op_registry_["Identity"] = ConvertIdentity;  // Identity should be removed
   op_registry_["Snapshot"] = ConvertIdentity;  // Snapshot should be removed
 
-  op_registry_["Pad"] = ConvertPad;
-
-  op_registry_["ConcatV2"] = ConvertConcat;
-  op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm;
-  op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm;
-
   op_registry_["Rsqrt"] = ConvertUnary;
   op_registry_["Reciprocal"] = ConvertUnary;
   op_registry_["Exp"] = ConvertUnary;
-- 
GitLab


From e330f959df527156a40c86360151ec555c08f4ba Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Mon, 10 Dec 2018 15:40:27 -0800
Subject: [PATCH 319/873] Fix test

---
 tensorflow/contrib/tensorrt/test/quantization_mnist_test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py b/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py
index 31cbef89e2..b96d965bad 100644
--- a/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py
+++ b/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py
@@ -24,6 +24,7 @@ from tensorflow.contrib.tensorrt.python.ops import trt_engine_op
 # pylint: enable=unused-import
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import data
+#from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python import keras
 from tensorflow.python.estimator.estimator import Estimator
 from tensorflow.python.estimator.model_fn import EstimatorSpec
@@ -191,7 +192,7 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase):
               batch_size=batch_size,
               num_parallel_calls=8))
       dataset = dataset.repeat(count=1)
-      iterator = data.make_one_shot_iterator(dataset)
+      iterator = dataset.make_one_shot_iterator()
       features, labels = iterator.get_next()
       return features, labels
 
@@ -205,7 +206,7 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase):
               batch_size=batch_size,
               num_parallel_calls=8))
       dataset = dataset.repeat(count=num_epochs)
-      iterator = data.make_one_shot_iterator(dataset)
+      iterator = dataset.make_one_shot_iterator()
       features, labels = iterator.get_next()
       return features, labels
 
-- 
GitLab


From 2ab06b48d160fcd41d4d51d38b7d2cf7902790bc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 15:39:17 -0800
Subject: [PATCH 320/873] Fix GitHub link.

PiperOrigin-RevId: 224897071
---
 tensorflow/lite/g3doc/convert/cmdline_examples.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow/lite/g3doc/convert/cmdline_examples.md b/tensorflow/lite/g3doc/convert/cmdline_examples.md
index de81e2cfdd..169f2d91d8 100644
--- a/tensorflow/lite/g3doc/convert/cmdline_examples.md
+++ b/tensorflow/lite/g3doc/convert/cmdline_examples.md
@@ -95,11 +95,11 @@ tflite_convert \
 
 The TensorFlow Lite Converter is compatible with fixed point quantization models
 described
-[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/q
-uantize/README.md). These are float models with `FakeQuant*` ops inserted at the
-boundaries of fused layers to record min-max range information. This generates a
-quantized inference workload that reproduces the quantization behavior that was
-used during training.
+[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/quantize/README.md).
+These are float models with `FakeQuant*` ops inserted at the boundaries of fused
+layers to record min-max range information. This generates a quantized inference
+workload that reproduces the quantization behavior that was used during
+training.
 
 The following command generates a quantized TensorFlow Lite FlatBuffer from a
 "quantized" TensorFlow GraphDef.
-- 
GitLab


From cb3cb1ef838ddea1fcfc259b51b3702e80743277 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 10 Dec 2018 15:39:48 -0800
Subject: [PATCH 321/873] Rename {For|If}ReturnVoid to {For|If}

Returning void is more common than returning Status so pick the longer name for
the less common variant.

PiperOrigin-RevId: 224897169
---
 .../xla/service/cpu/dot_op_emitter.cc         |  57 +++--
 .../xla/service/gpu/ir_emitter_unnested.cc    |  74 +++----
 .../service/llvm_ir/kernel_support_library.cc |  23 +-
 .../service/llvm_ir/kernel_support_library.h  | 205 +++++++++---------
 .../compiler/xla/service/llvm_ir/sort_util.cc |  25 +--
 5 files changed, 193 insertions(+), 191 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index 97f9b85a60..a33035ad10 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -323,11 +323,11 @@ void ColumnMajorMatrixVectorProductEmitter::Emit() {
   int64 column_remainder = k() % tile_cols();
   int64 column_limit = k() - column_remainder;
 
-  ksl_.ForReturnVoid("dot.outer.tiled",
-                     /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols(),
-                     [&](llvm::Value* column, bool is_first_column) {
-                       EmitOuterLoopBody(column, tile_cols(), is_first_column);
-                     });
+  ksl_.For("dot.outer.tiled",
+           /*start=*/0, /*end=*/column_limit, /*step=*/tile_cols(),
+           [&](llvm::Value* column, bool is_first_column) {
+             EmitOuterLoopBody(column, tile_cols(), is_first_column);
+           });
 
   if (column_remainder != 0) {
     EmitOuterLoopBody(b_->getInt64(column_limit), column_remainder,
@@ -340,7 +340,7 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopTiled(
     int64 columns, bool is_first_column) {
   int64 row_limit = m() - (m() % tile_rows());
 
-  ksl_.ForReturnVoid(
+  ksl_.For(
       "dot.inner.tiled", /*start=*/0, /*end=*/row_limit,
       /*step=*/tile_rows(), [&](llvm::Value* row) {
         std::vector<llvm::Value*> lhs_tile =
@@ -372,7 +372,7 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
   //     // initialized.
   //   }
 
-  ksl_.ForReturnVoid(
+  ksl_.For(
       "dot.inner.epilg.outer", /*start=*/current_tile_col,
       /*end=*/b_->CreateAdd(columns_llvm, current_tile_col),
       /*step=*/1, /*peel_first_iteration=*/false,
@@ -381,14 +381,14 @@ void ColumnMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
         llvm::Value* total_offset = b_->CreateMul(col, b_->getInt64(m()));
         llvm::Value* lhs_base_pointer =
             vsl_.ComputeOffsetPointer(lhs_, total_offset);
-        ksl_.ForReturnVoid(
+        ksl_.For(
             "dot.inner.epilg.inner", /*start=*/row_start, /*end=*/m(),
             /*step=*/1, [&](llvm::Value* scalar_row) {
               llvm::Value* product = vsl_.Mul(
                   vsl_.LoadScalar(lhs_base_pointer, scalar_row), rhs_element);
               llvm::Value* setting_result_first_time = b_->CreateAnd(
                   is_first_scalar_col, b_->getInt1(is_first_tiled_column));
-              ksl_.IfReturnVoid(
+              ksl_.If(
                   setting_result_first_time,
                   /*true_block_generator=*/
                   [&]() {
@@ -568,10 +568,9 @@ void RowMajorMatrixVectorProductEmitter::Emit() {
   int64 row_remainder = m() % tile_rows();
   int64 row_limit = m() - row_remainder;
 
-  ksl_.ForReturnVoid(
-      "dot.outer.tiled",
-      /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows(),
-      [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows()); });
+  ksl_.For("dot.outer.tiled",
+           /*start=*/0, /*end=*/row_limit, /*step=*/tile_rows(),
+           [&](llvm::Value* row) { EmitOuterLoopBody(row, tile_rows()); });
 
   if (row_remainder != 0) {
     EmitOuterLoopBody(b_->getInt64(row_limit), row_remainder);
@@ -583,17 +582,17 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopTiled(
     std::vector<VectorVariable>* vector_accumulators) {
   int64 column_limit = k() - (k() % tile_cols());
 
-  ksl_.ForReturnVoid("dot.inner.tiled", /*start=*/0, /*end=*/column_limit,
-                     /*step=*/tile_cols(), [&](llvm::Value* col) {
-                       std::vector<llvm::Value*> lhs_tile =
-                           lhs_memory_tile->LoadTile(/*minor_dim_offset=*/col);
-                       llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col);
-                       for (int i = 0; i < rows; i++) {
-                         llvm::Value* old_sum = (*vector_accumulators)[i].Get();
-                         (*vector_accumulators)[i].Set(vsl_.Add(
-                             old_sum, vsl_.Mul(rhs_value, lhs_tile[i])));
-                       }
-                     });
+  ksl_.For("dot.inner.tiled", /*start=*/0, /*end=*/column_limit,
+           /*step=*/tile_cols(), [&](llvm::Value* col) {
+             std::vector<llvm::Value*> lhs_tile =
+                 lhs_memory_tile->LoadTile(/*minor_dim_offset=*/col);
+             llvm::Value* rhs_value = vsl_.LoadVector(rhs_, col);
+             for (int i = 0; i < rows; i++) {
+               llvm::Value* old_sum = (*vector_accumulators)[i].Get();
+               (*vector_accumulators)[i].Set(
+                   vsl_.Add(old_sum, vsl_.Mul(rhs_value, lhs_tile[i])));
+             }
+           });
 }
 
 void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
@@ -609,7 +608,7 @@ void RowMajorMatrixVectorProductEmitter::EmitInnerLoopEpilogue(
         b_->CreateAdd(b_->getInt64(r), current_tile_row), b_->getInt64(k()));
     llvm::Value* lhs_base_pointer =
         vsl_.ComputeOffsetPointer(lhs_, total_offset);
-    ksl_.ForReturnVoid(
+    ksl_.For(
         "dot.inner.epilg.inner", /*start=*/column_start, /*end=*/k(),
         /*step=*/1, [&](llvm::Value* scalar_col) {
           llvm::Value* product =
@@ -813,7 +812,7 @@ void TiledSmallGemmEmitter::HandleResiduesOnN() {
 
   if (n_start != dims().n()) {
     VectorSupportLibrary vsl(scalar_type(), 1, b_, "gemm");
-    ksl_.ForReturnVoid("epi.n", n_start, dims().n(), 1, [&](llvm::Value* n_i) {
+    ksl_.For("epi.n", n_start, dims().n(), 1, [&](llvm::Value* n_i) {
       llvm::Value* n_i_next = b_->CreateAdd(n_i, b_->getInt64(1));
       HandleResiduesOnK(&vsl, n_i, n_i_next);
     });
@@ -924,7 +923,7 @@ void TiledSmallGemmEmitter::EmitTiledGemm(
     VectorSupportLibrary* vsl, int64 tile_size_k, llvm::Value* k_start,
     llvm::Value* k_end, llvm::Value* n_start, llvm::Value* n_end,
     int64 tile_size_m, llvm::Value* m_start, llvm::Value* m_end) {
-  ksl_.ForReturnVoid(
+  ksl_.For(
       "dot.m", m_start, m_end, tile_size_m, [&](llvm::Value* m_i) {
         MemoryTile result_memory_tile(
             vsl, b_, /*matrix=*/result_,
@@ -935,11 +934,11 @@ void TiledSmallGemmEmitter::EmitTiledGemm(
                                    /*matrix_size_along_minor_dim=*/dims().k(),
                                    /*major_dim_offset=*/m_i,
                                    /*tile_size_along_major_dim=*/tile_size_m);
-        ksl_.ForReturnVoid(
+        ksl_.For(
             "dot.n", n_start, n_end, vsl->vector_size(), [&](llvm::Value* n_i) {
               TileVariable result_tile_var(vsl,
                                            result_memory_tile.LoadTile(n_i));
-              ksl_.ForReturnVoid(
+              ksl_.For(
                   "dot.k", k_start, k_end, tile_size_k, [&](llvm::Value* k_i) {
                     MemoryTile rhs_memory_tile(vsl, b_, rhs_, dims().n(), k_i,
                                                tile_size_k);
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index fb040aff30..c8b5343e61 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1389,7 +1389,7 @@ Status IrEmitterUnnested::EmitRowReduction(
       auto emit_z_tile_element_loop = [&](llvm::Value* z_indvar) -> Status {
         llvm::Value* z =
             NSWAdd(z_indvar, NSWMul(index_typed_constant(z_tile_size), z_tile));
-        TF_RETURN_IF_ERROR(ksl.For(
+        TF_RETURN_IF_ERROR(ksl.ForWithStatus(
             "x_tile",
             /*start=*/index_typed_constant(0),
             /*end=*/index_typed_constant(x_tile_loop_bound),
@@ -1461,29 +1461,29 @@ Status IrEmitterUnnested::EmitRowReduction(
         return Status::OK();
       };
 
-      return ksl.For("z_tile",
-                     /*start=*/index_typed_constant(0),
-                     /*end=*/index_typed_constant(z_tile_size),
-                     /*step=*/1, emit_z_tile_element_loop);
+      return ksl.ForWithStatus("z_tile",
+                               /*start=*/index_typed_constant(0),
+                               /*end=*/index_typed_constant(z_tile_size),
+                               /*step=*/1, emit_z_tile_element_loop);
     };
 
     llvm::Value* tile_in_bounds =
         Or(b_.getInt1(width % (x_tile_size * kWarpSize) == 0),
            ICmpULT(last_x, index_typed_constant(width)));
 
-    TF_RETURN_IF_ERROR(
-        ksl.If(tile_in_bounds,
-               /*true_block_generator=*/
-               [&]() -> Status {
-                 return emit_z_x_tile_element_loop(/*x_tile_in_bounds=*/true,
-                                                   x_tile_size);
-               },
-               /*false_block_generator=*/
-               [&]() -> Status {
-                 return emit_z_x_tile_element_loop(
-                     /*x_tile_in_bounds=*/false,
-                     CeilOfRatio(width % (x_tile_size * kWarpSize), kWarpSize));
-               }));
+    TF_RETURN_IF_ERROR(ksl.IfWithStatus(
+        tile_in_bounds,
+        /*true_block_generator=*/
+        [&]() -> Status {
+          return emit_z_x_tile_element_loop(/*x_tile_in_bounds=*/true,
+                                            x_tile_size);
+        },
+        /*false_block_generator=*/
+        [&]() -> Status {
+          return emit_z_x_tile_element_loop(
+              /*x_tile_in_bounds=*/false,
+              CeilOfRatio(width % (x_tile_size * kWarpSize), kWarpSize));
+        }));
 
     // After accumulating the elements of the z_x_tile, emit calls to
     // shfl_down that accumulate the partial reduction results of all
@@ -3121,11 +3121,9 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk(
   // pressure, since we touch threadIdx.x and blockIdx.x at the beginning of the
   // kernel *anyway*.
   std::vector<IrArray> output_arrays = ConstructIrArrayForOutputs(hlo);
-  TF_RETURN_IF_ERROR(
-      KernelSupportLibrary(&b_).If("emit_mof_tuple", IsBlock0Thread0(&b_), [&] {
-        llvm_ir::EmitTuple(GetIrArray(hlo, hlo), output_arrays, &b_, module_);
-        return Status::OK();
-      }));
+  KernelSupportLibrary{&b_}.If("emit_mof_tuple", IsBlock0Thread0(&b_), [&] {
+    llvm_ir::EmitTuple(GetIrArray(hlo, hlo), output_arrays, &b_, module_);
+  });
 
   // For multioutput fusion, we need to emit each operand and the root.
   TF_RETURN_IF_ERROR(
@@ -3241,7 +3239,7 @@ void EmitPartialTile(
     llvm::Value* x_loc =
         builder->CreateAdd(llvm::ConstantInt::get(index_ty, j), x);
 
-    ksl->IfReturnVoid(
+    ksl->If(
         "x_in_tile", builder->CreateICmpULT(x_loc, tile_width), [&] {
           // tile_height_bound =
           //   ceil(tile_height / num_threads_y) * num_threads_y
@@ -3252,13 +3250,13 @@ void EmitPartialTile(
           llvm::Value* tile_height_bound = builder->CreateMul(
               ceiling_of_ratio,
               llvm::ConstantInt::get(index_ty, num_threads_y));
-          ksl->ForReturnVoid(
+          ksl->For(
               loop_name, /*start=*/llvm::ConstantInt::get(index_ty, 0),
               /*end=*/tile_height_bound,
               /*step=*/llvm::ConstantInt::get(index_ty, num_threads_y),
               [&](llvm::Value* y_indvar) {
                 llvm::Value* y_loc = builder->CreateAdd(y_indvar, y);
-                ksl->IfReturnVoid(
+                ksl->If(
                     "y_in_tile", builder->CreateICmpULT(y_loc, tile_height),
                     [&] {
                       emit_elem_function(
@@ -3290,7 +3288,7 @@ void EmitTiledElementalCodeWithBoundsCheck(
   int64 tile_size_y = mapping_scheme->GetTileSizeForDimensionY();
   llvm::Type* index_ty = tile_width->getType();
 
-  ksl->IfReturnVoid(
+  ksl->If(
       "full_tile",
       builder->CreateAnd(
           builder->CreateICmpEQ(llvm::ConstantInt::get(index_ty, tile_size_x),
@@ -3419,15 +3417,14 @@ void IrEmitterUnnested::EmitBlock(const TileGenerator& emit_one_tile,
               Select(ICmpEQ(last_block_for_dim, block_id_for_dim),
                      last_block_size_for_dim, block_size_for_dim);
 
-          ksl.ForReturnVoid(
-              loop_name,
-              /*start=*/index_typed_constant(0),
-              /*end=*/num_tiles_in_block,
-              /*step=*/1, [&](llvm::Value* block_dim_induction_var) {
-                IrArray::Index tile_index = starting_tile.AddOffsetToDim(
-                    block_dim_induction_var, dim_id, &b_);
-                emit_next_block_dim(tile_index);
-              });
+          ksl.For(loop_name,
+                  /*start=*/index_typed_constant(0),
+                  /*end=*/num_tiles_in_block,
+                  /*step=*/1, [&](llvm::Value* block_dim_induction_var) {
+                    IrArray::Index tile_index = starting_tile.AddOffsetToDim(
+                        block_dim_induction_var, dim_id, &b_);
+                    emit_next_block_dim(tile_index);
+                  });
         }
       };
 
@@ -3524,13 +3521,12 @@ LaunchDimensions IrEmitterUnnested::EmitKernel(
   // since we touch threadIdx.x and blockIdx.x at the beginning of the kernel
   // *anyway*.
   if (unnested_hlo->IsMultiOutputFusion()) {
-    TF_CHECK_OK(KernelSupportLibrary(&b_).If(
+    KernelSupportLibrary{&b_}.If(
         "emit_mof_tuple", IsBlock0Thread0(&b_), [&] {
           llvm_ir::EmitTuple(GetIrArray(*unnested_hlo, *unnested_hlo),
                              ConstructIrArrayForOutputs(*unnested_hlo), &b_,
                              module_);
-          return Status::OK();
-        }));
+        });
   }
 
   // For each tiled parameter, cast its input IrArray to the corresponding
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
index bd0139f85b..5eeb29c478 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.cc
@@ -18,28 +18,29 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
 
 namespace xla {
-Status KernelSupportLibrary::For(
+Status KernelSupportLibrary::ForWithStatus(
     absl::string_view name, llvm::Value* start, llvm::Value* end,
     llvm::Value* step,
     const std::function<Status(llvm::Value*, bool)>& for_body_generator) {
-  return If(b_->CreateICmpSLT(start, end), [&]() -> Status {
+  return IfWithStatus(b_->CreateICmpSLT(start, end), [&]() -> Status {
     TF_RETURN_IF_ERROR(for_body_generator(start, /*is_first_iteration=*/true));
-    return For(name, b_->CreateAdd(start, step), end, step,
-               [&](llvm::Value* iv) { return for_body_generator(iv, false); });
+    return ForWithStatus(
+        name, b_->CreateAdd(start, step), end, step,
+        [&](llvm::Value* iv) { return for_body_generator(iv, false); });
   });
 }
 
-Status KernelSupportLibrary::For(
+Status KernelSupportLibrary::ForWithStatus(
     absl::string_view name, llvm::Value* start, llvm::Value* end,
     llvm::Value* step, bool peel_first_iteration,
     const std::function<Status(llvm::Value*, llvm::Value*)>&
         for_body_generator) {
   if (peel_first_iteration) {
-    return For(name, start, end, step, true,
-               [&](llvm::Value* indvar, bool is_first_iteration) -> Status {
-                 return for_body_generator(indvar,
-                                           b_->getInt1(is_first_iteration));
-               });
+    return ForWithStatus(
+        name, start, end, step, true,
+        [&](llvm::Value* indvar, bool is_first_iteration) -> Status {
+          return for_body_generator(indvar, b_->getInt1(is_first_iteration));
+        });
   } else {
     std::unique_ptr<llvm_ir::ForLoop> loop = llvm_ir::ForLoop::EmitForLoop(
         name, start, end, step, b_,
@@ -55,7 +56,7 @@ Status KernelSupportLibrary::For(
   }
 }
 
-Status KernelSupportLibrary::If(
+Status KernelSupportLibrary::IfWithStatus(
     absl::string_view name, llvm::Value* condition,
     const std::function<Status()>& true_block_generator,
     const std::function<Status()>& false_block_generator) {
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
index 43fec311f1..612b839cfa 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
@@ -48,41 +48,42 @@ class KernelSupportLibrary {
   //     for (i64 i = `start` + `step`; i s< `end`; i += `step`)
   //       `for_body_generator(/*ind_var=*/,i, /*is_first_iteration=*/false)`;
   //   }
-  Status For(
+  Status ForWithStatus(
       absl::string_view name, llvm::Value* start, llvm::Value* end,
       llvm::Value* step,
       const std::function<Status(llvm::Value* ind_var,
                                  bool is_first_iteration)>& for_body_generator);
 
-  void ForReturnVoid(
+  void For(
       absl::string_view name, llvm::Value* start, llvm::Value* end,
       llvm::Value* step,
       const std::function<void(llvm::Value* ind_var, bool is_first_iteration)>&
           for_body_generator) {
     CHECK_EQ(Status::OK(),
-             For(name, start, end, step,
+             ForWithStatus(
+                 name, start, end, step,
                  [&](llvm::Value* ind_var, bool is_first_iteration) -> Status {
                    for_body_generator(ind_var, is_first_iteration);
                    return Status::OK();
                  }));
   }
 
-  Status For(absl::string_view name, int64 start, int64 end, int64 step,
-             const std::function<Status(llvm::Value* ind_var,
-                                        bool is_first_iteration)>&
-                 for_body_generator) {
-    return For(name, /*start=*/b_->getInt64(start),
-               /*end=*/b_->getInt64(end),
-               /*step=*/b_->getInt64(step), for_body_generator);
+  Status ForWithStatus(
+      absl::string_view name, int64 start, int64 end, int64 step,
+      const std::function<Status(
+          llvm::Value* ind_var, bool is_first_iteration)>& for_body_generator) {
+    return ForWithStatus(name, /*start=*/b_->getInt64(start),
+                         /*end=*/b_->getInt64(end),
+                         /*step=*/b_->getInt64(step), for_body_generator);
   }
 
-  void ForReturnVoid(
+  void For(
       absl::string_view name, int64 start, int64 end, int64 step,
       const std::function<void(llvm::Value* ind_var, bool is_first_iteration)>&
           for_body_generator) {
-    ForReturnVoid(name, /*start=*/b_->getInt64(start),
-                  /*end=*/b_->getInt64(end),
-                  /*step=*/b_->getInt64(step), for_body_generator);
+    For(name, /*start=*/b_->getInt64(start),
+        /*end=*/b_->getInt64(end),
+        /*step=*/b_->getInt64(step), for_body_generator);
   }
 
   // Generates the following control flow structure if `peel_first_iteration` is
@@ -99,19 +100,19 @@ class KernelSupportLibrary {
   //   for (i64 i = `start`; i s< `end`; i += `step`)
   //     `for_body_generator(/*ind_var=*/,i,
   //                         /*is_first_iteration=*/,(i != `start`))`;
-  Status For(absl::string_view name, llvm::Value* start, llvm::Value* end,
-             llvm::Value* step, bool peel_first_iteration,
-             const std::function<Status(llvm::Value* ind_var,
-                                        llvm::Value* is_first_iteration)>&
-                 for_body_generator);
+  Status ForWithStatus(
+      absl::string_view name, llvm::Value* start, llvm::Value* end,
+      llvm::Value* step, bool peel_first_iteration,
+      const std::function<Status(llvm::Value* ind_var,
+                                 llvm::Value* is_first_iteration)>&
+          for_body_generator);
 
-  void ForReturnVoid(absl::string_view name, llvm::Value* start,
-                     llvm::Value* end, llvm::Value* step,
-                     bool peel_first_iteration,
-                     const std::function<void(llvm::Value* ind_var,
-                                              llvm::Value* is_first_iteration)>&
-                         for_body_generator) {
-    TF_CHECK_OK(For(
+  void For(absl::string_view name, llvm::Value* start, llvm::Value* end,
+           llvm::Value* step, bool peel_first_iteration,
+           const std::function<void(llvm::Value* ind_var,
+                                    llvm::Value* is_first_iteration)>&
+               for_body_generator) {
+    TF_CHECK_OK(ForWithStatus(
         name, start, end, step, peel_first_iteration,
         [&](llvm::Value* ind_var, llvm::Value* is_first_iteration) -> Status {
           for_body_generator(ind_var, is_first_iteration);
@@ -119,80 +120,81 @@ class KernelSupportLibrary {
         }));
   }
 
-  Status For(absl::string_view name, llvm::Value* start, llvm::Value* end,
-             int64 step, bool peel_first_iteration,
-             const std::function<Status(llvm::Value* ind_var,
-                                        llvm::Value* is_first_iteration)>&
-                 for_body_generator) {
-    return For(name, /*start=*/start, /*end=*/end,
-               /*step=*/llvm::ConstantInt::get(start->getType(), step),
-               peel_first_iteration, for_body_generator);
+  Status ForWithStatus(
+      absl::string_view name, llvm::Value* start, llvm::Value* end, int64 step,
+      bool peel_first_iteration,
+      const std::function<Status(llvm::Value* ind_var,
+                                 llvm::Value* is_first_iteration)>&
+          for_body_generator) {
+    return ForWithStatus(
+        name, /*start=*/start, /*end=*/end,
+        /*step=*/llvm::ConstantInt::get(start->getType(), step),
+        peel_first_iteration, for_body_generator);
   }
 
-  void ForReturnVoid(absl::string_view name, llvm::Value* start,
-                     llvm::Value* end, int64 step, bool peel_first_iteration,
-                     const std::function<void(llvm::Value* ind_var,
-                                              llvm::Value* is_first_iteration)>&
-                         for_body_generator) {
-    ForReturnVoid(name, /*start=*/start, /*end=*/end,
-                  /*step=*/llvm::ConstantInt::get(start->getType(), step),
-                  peel_first_iteration, for_body_generator);
+  void For(absl::string_view name, llvm::Value* start, llvm::Value* end,
+           int64 step, bool peel_first_iteration,
+           const std::function<void(llvm::Value* ind_var,
+                                    llvm::Value* is_first_iteration)>&
+               for_body_generator) {
+    For(name, /*start=*/start, /*end=*/end,
+        /*step=*/llvm::ConstantInt::get(start->getType(), step),
+        peel_first_iteration, for_body_generator);
   }
 
-  Status For(
+  Status ForWithStatus(
       absl::string_view name, llvm::Value* start, llvm::Value* end,
       llvm::Value* step,
       const std::function<Status(llvm::Value* ind_var)>& for_body_generator) {
-    return For(name, start, end, step,
-               /*peel_first_iteration=*/false,
-               [&](llvm::Value* indvar, llvm::Value*) -> Status {
-                 return for_body_generator(indvar);
-               });
+    return ForWithStatus(name, start, end, step,
+                         /*peel_first_iteration=*/false,
+                         [&](llvm::Value* indvar, llvm::Value*) -> Status {
+                           return for_body_generator(indvar);
+                         });
   }
 
-  void ForReturnVoid(
+  void For(
       absl::string_view name, llvm::Value* start, llvm::Value* end,
       llvm::Value* step,
       const std::function<void(llvm::Value* ind_var)>& for_body_generator) {
-    ForReturnVoid(name, start, end, step,
-                  /*peel_first_iteration=*/false,
-                  [&](llvm::Value* indvar, llvm::Value*) {
-                    return for_body_generator(indvar);
-                  });
+    For(name, start, end, step,
+        /*peel_first_iteration=*/false, [&](llvm::Value* indvar, llvm::Value*) {
+          return for_body_generator(indvar);
+        });
   }
 
-  Status For(
+  Status ForWithStatus(
       absl::string_view name, llvm::Value* start, llvm::Value* end, int64 step,
       const std::function<Status(llvm::Value* ind_var)>& for_body_generator) {
-    return For(name, start, end, llvm::ConstantInt::get(start->getType(), step),
-               /*peel_first_iteration=*/false,
-               [&](llvm::Value* indvar, llvm::Value*) -> Status {
-                 return for_body_generator(indvar);
-               });
+    return ForWithStatus(name, start, end,
+                         llvm::ConstantInt::get(start->getType(), step),
+                         /*peel_first_iteration=*/false,
+                         [&](llvm::Value* indvar, llvm::Value*) -> Status {
+                           return for_body_generator(indvar);
+                         });
   }
 
-  void ForReturnVoid(
+  void For(
       absl::string_view name, llvm::Value* start, llvm::Value* end, int64 step,
       const std::function<void(llvm::Value* ind_var)>& for_body_generator) {
-    ForReturnVoid(name, start, end,
-                  llvm::ConstantInt::get(start->getType(), step),
-                  for_body_generator);
+    For(name, start, end, llvm::ConstantInt::get(start->getType(), step),
+        for_body_generator);
   }
 
-  Status For(
+  Status ForWithStatus(
       absl::string_view name, int64 start, int64 end, int64 step,
       const std::function<Status(llvm::Value* ind_var)>& for_body_generator) {
-    return For(name, /*start=*/b_->getInt64(start),
-               /*end=*/b_->getInt64(end),
-               /*step=*/b_->getInt64(step), for_body_generator);
+    return ForWithStatus(name, /*start=*/b_->getInt64(start),
+                         /*end=*/b_->getInt64(end),
+                         /*step=*/b_->getInt64(step), for_body_generator);
   }
 
-  void ForReturnVoid(
+  void For(
       absl::string_view name, int64 start, int64 end, int64 step,
       const std::function<void(llvm::Value* ind_var)>& for_body_generator) {
-    ForReturnVoid(name, /*start=*/b_->getInt64(start),
-                  /*end=*/b_->getInt64(end),
-                  /*step=*/b_->getInt64(step), for_body_generator);
+    For(name, /*start=*/b_->getInt64(start),
+        /*end=*/b_->getInt64(end),
+        /*step=*/b_->getInt64(step), for_body_generator);
   }
 
   // Generates the following control flow structure:
@@ -201,38 +203,43 @@ class KernelSupportLibrary {
   //     `true_block_generator()`;
   //   else
   //      `false_block_generator()`;
-  Status If(absl::string_view name, llvm::Value* condition,
-            const std::function<Status()>& true_block_generator,
-            const std::function<Status()>& false_block_generator =
-                []() -> Status { return Status::OK(); });
+  Status IfWithStatus(
+      absl::string_view name, llvm::Value* condition,
+      const std::function<Status()>& true_block_generator,
+      const std::function<Status()>& false_block_generator = []() -> Status {
+        return Status::OK();
+      });
 
-  Status If(llvm::Value* condition,
-            const std::function<Status()>& true_block_generator,
-            const std::function<Status()>& false_block_generator =
-                []() -> Status { return Status::OK(); }) {
-    return If("", condition, true_block_generator, false_block_generator);
+  Status IfWithStatus(
+      llvm::Value* condition,
+      const std::function<Status()>& true_block_generator,
+      const std::function<Status()>& false_block_generator = []() -> Status {
+        return Status::OK();
+      }) {
+    return IfWithStatus("", condition, true_block_generator,
+                        false_block_generator);
   }
 
-  void IfReturnVoid(llvm::Value* condition,
-                    const std::function<void()>& true_block_generator,
-                    const std::function<void()>& false_block_generator = []() {
-                    }) {
-    IfReturnVoid("", condition, true_block_generator, false_block_generator);
+  void If(
+      llvm::Value* condition, const std::function<void()>& true_block_generator,
+      const std::function<void()>& false_block_generator = []() {}) {
+    If("", condition, true_block_generator, false_block_generator);
   }
 
-  void IfReturnVoid(absl::string_view name, llvm::Value* condition,
-                    const std::function<void()>& true_block_generator,
-                    const std::function<void()>& false_block_generator = []() {
-                    }) {
-    TF_CHECK_OK(If(name, condition,
-                   [&]() {
-                     true_block_generator();
-                     return Status::OK();
-                   },
-                   [&]() {
-                     false_block_generator();
-                     return Status::OK();
-                   }));
+  void If(
+      absl::string_view name, llvm::Value* condition,
+      const std::function<void()>& true_block_generator,
+      const std::function<void()>& false_block_generator = []() {}) {
+    TF_CHECK_OK(IfWithStatus(
+        name, condition,
+        [&]() {
+          true_block_generator();
+          return Status::OK();
+        },
+        [&]() {
+          false_block_generator();
+          return Status::OK();
+        }));
   }
 
   using ArgumentVector = absl::Span<llvm::Value* const>;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
index e22c2173c2..6a9406bfeb 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
@@ -108,7 +108,7 @@ void EmitCompareLoopBody(
 
   // if (is_smaller_index && index_is_inbounds)
   KernelSupportLibrary ksl(b);
-  ksl.IfReturnVoid("smaller_comparison_index", do_comparison, [&]() {
+  ksl.If("smaller_comparison_index", do_comparison, [&]() {
     auto key1 = read_element(0, current_keys_index);
     auto key2 = read_element(0, compare_keys_index);
     auto compare_key1 = key1;
@@ -155,7 +155,7 @@ void EmitCompareLoopBody(
       is_smaller_than = b->CreateOr(
           is_smaller_than, b->CreateAnd(keys_equal, index_is_smaller_than));
     }
-    ksl.IfReturnVoid("is_smaller_than", is_smaller_than, [&]() {
+    ksl.If("is_smaller_than", is_smaller_than, [&]() {
       // Swap key1 with key2.
       write_element(0, current_keys_index, key2);
       write_element(0, compare_keys_index, key1);
@@ -192,7 +192,7 @@ void EmitTiledCompareLoop(
             b->CreateShl(tiled_keys_index[dimension_to_sort], value_one);
         // We want to copy two adjacent elements. We first check whether the
         // first index position is within bounds.
-        ksl.IfReturnVoid(
+        ksl.If(
             "smaller_keys_index",
             b->CreateICmpSLT(current_keys_index,
                              tiled_keys_index.GetConstantWithIndexType(
@@ -203,15 +203,14 @@ void EmitTiledCompareLoop(
               // Increment to go the next index position.
               current_keys_index = b->CreateAdd(current_keys_index, value_one);
               // Here we check whether the next index position is within bounds.
-              ksl.IfReturnVoid(
-                  "inner_smaller_keys_index",
-                  b->CreateICmpSLT(current_keys_index,
-                                   tiled_keys_index.GetConstantWithIndexType(
-                                       dimension_to_sort_bound)),
-                  [&]() {
-                    cache_index = b->CreateAdd(cache_index, value_one);
-                    read_or_write(cache_index, current_keys_index);
-                  });
+              ksl.If("inner_smaller_keys_index",
+                     b->CreateICmpSLT(current_keys_index,
+                                      tiled_keys_index.GetConstantWithIndexType(
+                                          dimension_to_sort_bound)),
+                     [&]() {
+                       cache_index = b->CreateAdd(cache_index, value_one);
+                       read_or_write(cache_index, current_keys_index);
+                     });
             });
       };
 
@@ -253,7 +252,7 @@ void EmitTiledCompareLoop(
     if (dimension_to_sort_bound % tile_size) {
       // Otherwise we need a bounds check for the last tile. The last tile has
       // size 'dimension_to_sort_bound' % 'tile_size'.
-      ksl.IfReturnVoid(
+      ksl.If(
           "is_last_tile",
           b->CreateICmpUGE(
               b->CreateMul(tiled_keys_index[dimension_to_sort],
-- 
GitLab


From 97ea1e6c5e9c1881edc7f2c1aa25d4f66ea46be9 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 10 Dec 2018 15:58:18 -0800
Subject: [PATCH 322/873] Make UnifiedLSTM the default LSTM layer in tf 2.0.

Also stop exporting CuDNNLSTM since its all covered by unified LSTM.

PiperOrigin-RevId: 224900214
---
 .../python/keras/layers/cudnn_recurrent.py    |   2 +-
 tensorflow/python/keras/layers/recurrent.py   |   4 +-
 .../python/keras/layers/unified_lstm_test.py  |   3 +-
 ...rflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt | 197 ------------------
 .../v2/tensorflow.keras.layers.-l-s-t-m.pbtxt |   3 +-
 .../golden/v2/tensorflow.keras.layers.pbtxt   |   4 -
 tensorflow/tools/compatibility/renames_v2.py  |   1 +
 7 files changed, 7 insertions(+), 207 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt

diff --git a/tensorflow/python/keras/layers/cudnn_recurrent.py b/tensorflow/python/keras/layers/cudnn_recurrent.py
index 16692753af..e695a68b60 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent.py
@@ -335,7 +335,7 @@ class CuDNNGRU(_CuDNNRNN):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.CuDNNLSTM')
+@tf_export(v1=['keras.layers.CuDNNLSTM'])
 class CuDNNLSTM(_CuDNNRNN):
   """Fast LSTM implementation backed by cuDNN.
 
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index fb4c1736b1..a39db7e8b1 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -2274,7 +2274,7 @@ class PeepholeLSTMCell(LSTMCell):
     return c, o
 
 
-@tf_export('keras.layers.LSTM')
+@tf_export(v1=['keras.layers.LSTM'])
 class LSTM(RNN):
   """Long Short-Term Memory layer - Hochreiter 1997.
 
@@ -2532,7 +2532,7 @@ class LSTM(RNN):
       config['implementation'] = 1
     return cls(**config)
 
-
+@tf_export('keras.layers.LSTM', v1=[])
 class UnifiedLSTM(LSTM):
   """Long Short-Term Memory layer - Hochreiter 1997.
 
diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index a2b523b00e..0219e5e426 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -209,8 +209,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
       y_2 = lstm_model.predict(x_train)
 
       with test_util.device(use_gpu=True):
-        cudnn_layer = keras.layers.UnifiedLSTM(rnn_state_size,
-                                               recurrent_activation='sigmoid')
+        cudnn_layer = keras.layers.UnifiedLSTM(rnn_state_size)
         cudnn_model = keras.models.Model(inputs, cudnn_layer(inputs))
       cudnn_model.set_weights(weights)
       y_3 = cudnn_model.predict(x_train)
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
deleted file mode 100644
index 7c463ff125..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
+++ /dev/null
@@ -1,197 +0,0 @@
-path: "tensorflow.keras.layers.CuDNNLSTM"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.layers.cudnn_recurrent.CuDNNLSTM\'>"
-  is_instance: "<class \'tensorflow.python.keras.layers.cudnn_recurrent._CuDNNRNN\'>"
-  is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "cell"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "states"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'units\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\'], varargs=None, keywords=kwargs, defaults=[\'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'False\', \'False\', \'False\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_initial_state"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
index 529c750f98..9144a5b103 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
@@ -1,5 +1,6 @@
 path: "tensorflow.keras.layers.LSTM"
 tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.recurrent.UnifiedLSTM\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.LSTM\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
@@ -155,7 +156,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
+    argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'unit_forget_bias\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'time_major\', \'unroll\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
index 3b4724ef10..10ac3a7520 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
@@ -116,10 +116,6 @@ tf_module {
     name: "CuDNNGRU"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "CuDNNLSTM"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "Dense"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index b757ad4647..3ab5a0d0d6 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -238,6 +238,7 @@ renames = {
     'tf.is_strictly_increasing': 'tf.math.is_strictly_increasing',
     'tf.is_variable_initialized': 'tf.compat.v1.is_variable_initialized',
     'tf.keras.backend.get_session': 'tf.compat.v1.keras.backend.get_session',
+    'tf.keras.layers.CuDNNLSTM': 'tf.compat.v1.keras.layers.CuDNNLSTM',
     'tf.layers.AveragePooling1D': 'tf.compat.v1.layers.AveragePooling1D',
     'tf.layers.AveragePooling2D': 'tf.compat.v1.layers.AveragePooling2D',
     'tf.layers.AveragePooling3D': 'tf.compat.v1.layers.AveragePooling3D',
-- 
GitLab


From ce3a9e8eeca81a76e2f0ebb98418885fa5d75325 Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Mon, 10 Dec 2018 16:28:47 -0800
Subject: [PATCH 323/873] [XLA] Enable compare for float16 in HloEvaluator.

PiperOrigin-RevId: 224905468
---
 tensorflow/compiler/xla/service/hlo_evaluator.cc | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index 3a7652a8dc..e98fc0a5de 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -629,8 +629,11 @@ Status HloEvaluator::HandleCompare(HloInstruction* compare) {
           evaluated_[compare],
           Compare<int64>(compare->shape(), opcode, lhs_literal, rhs_literal));
     } break;
-    case F16:
-      return Unimplemented("unhandled primitive type: F16.");
+    case F16: {
+      TF_ASSIGN_OR_RETURN(
+          evaluated_[compare],
+          Compare<half>(compare->shape(), opcode, lhs_literal, rhs_literal));
+    } break;
     case BF16: {
       TF_ASSIGN_OR_RETURN(evaluated_[compare],
                           Compare<bfloat16>(compare->shape(), opcode,
-- 
GitLab


From 08feaa53d2f4c3cae623eb3ea9f8cce60c9eeca7 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Mon, 10 Dec 2018 17:07:10 -0800
Subject: [PATCH 324/873] Do not run examples_test on windows. it is a bash
 test, and language filters do not work properly on windows.

PiperOrigin-RevId: 224912071
---
 tensorflow/python/debug/BUILD | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index c6abd476d9..1dcdb880f5 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -1132,4 +1132,7 @@ sh_test(
         ":debug_tflearn_iris",
         ":offline_analyzer",
     ],
+    tags = [
+        "no_windows",
+    ],
 )
-- 
GitLab


From c136aa8255c2abfc068db18fff7c043e9da324db Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Mon, 10 Dec 2018 17:14:16 -0800
Subject: [PATCH 325/873] Tune keepalive timeouts for Tensorflow/GRPC

This disables the keepalive watchdog for TF/GRPC channels.  The watchdog ping timer is intended to monitor channels in case they have gone "stale".  If this occurs, any pending RPCs are marked failed.  This interacts poorly with large TF models, where we can saturate the network exchanging tensors, causing the watchdog ping to be delayed.

The timer is not essential (normal deadline processing and socket termination is still respected), so we can disable it with minimal risk here.

PiperOrigin-RevId: 224913045
---
 tensorflow/core/distributed_runtime/rpc/grpc_channel.cc    | 1 +
 tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc | 5 +++++
 tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h  | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
index 781b7d65cd..1420589f82 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
@@ -60,6 +60,7 @@ Status ValidateHostPortPair(const string& host_port) {
   // TODO(mrry): Implement secure channels.
   ::grpc::ChannelArguments args;
   args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH, std::numeric_limits<int32>::max());
+  args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, std::numeric_limits<int>::max());
   // NOTE(mrry): Some versions of gRPC use a 20-second minimum backoff
   // on connection failure, which makes our tests time out.
   args.SetInt("grpc.testing.fixed_reconnect_backoff_ms", 1000);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
index cbd5cd927e..33ff8e1ac4 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
@@ -110,6 +110,11 @@ GrpcServer::~GrpcServer() {
   // - worker_env_.compute_pool
 }
 
+void GrpcServer::MaybeMutateBuilder(::grpc::ServerBuilder* builder) {
+  builder->AddChannelArgument(GRPC_ARG_KEEPALIVE_TIME_MS,
+                              std::numeric_limits<int>::max());
+}
+
 Status GrpcServer::Init(
     ServiceInitFunction service_func,
     const RendezvousMgrCreationFunction& rendezvous_mgr_func,
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h
index c1395abdde..c7f543e5bf 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h
@@ -62,7 +62,7 @@ class GrpcServer : public ServerInterface {
   GrpcServer(const ServerDef& server_def, Env* env);
   // Allow children classes to override this and provide custom args to the
   // server before it is constructed. Default behavior is to do nothing.
-  virtual void MaybeMutateBuilder(::grpc::ServerBuilder* builder) {}
+  virtual void MaybeMutateBuilder(::grpc::ServerBuilder* builder);
 
  public:
   static Status Create(const ServerDef& server_def, Env* env,
-- 
GitLab


From 662053a4d35942d1c1b6800df98829e1046b1679 Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Mon, 10 Dec 2018 17:16:26 -0800
Subject: [PATCH 326/873] Fix kokoro tests by removing dependency on save_test.

PiperOrigin-RevId: 224913339
---
 tensorflow/python/keras/BUILD                 |  1 -
 .../keras/engine/training_utils_test.py       | 29 +++++++++++++++++--
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index faf58e0d93..36fea36389 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -848,7 +848,6 @@ py_test(
     deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
-        "//tensorflow/python/saved_model:save_test",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
diff --git a/tensorflow/python/keras/engine/training_utils_test.py b/tensorflow/python/keras/engine/training_utils_test.py
index 0250e60426..d8acec32cb 100644
--- a/tensorflow/python/keras/engine/training_utils_test.py
+++ b/tensorflow/python/keras/engine/training_utils_test.py
@@ -22,10 +22,13 @@ import os
 
 import numpy as np
 
+
+from tensorflow.python.client import session as session_lib
 from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend as K
@@ -35,8 +38,10 @@ from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
+from tensorflow.python.saved_model import loader
 from tensorflow.python.saved_model import save as save_lib
-from tensorflow.python.saved_model import save_test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import tag_constants
 
 
 class ModelInputsTest(test.TestCase):
@@ -222,6 +227,25 @@ class TraceModelCallTest(keras_parameterized.TestCase):
     self._assert_all_close(expected_outputs, signature_outputs)
 
 
+def _import_and_infer(save_dir, inputs):
+  """Import a SavedModel into a TF 1.x-style graph and run `signature_key`."""
+  graph = ops.Graph()
+  with graph.as_default(), session_lib.Session() as session:
+    model = loader.load(session, [tag_constants.SERVING], save_dir)
+    signature = model.signature_def[
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+    assert set(inputs.keys()) == set(signature.inputs.keys())
+    feed_dict = {}
+    for arg_name in inputs.keys():
+      feed_dict[graph.get_tensor_by_name(signature.inputs[arg_name].name)] = (
+          inputs[arg_name])
+    output_dict = {}
+    for output_name, output_tensor_info in signature.outputs.items():
+      output_dict[output_name] = graph.get_tensor_by_name(
+          output_tensor_info.name)
+    return session.run(output_dict, feed_dict=feed_dict)
+
+
 class ModelSaveTest(keras_parameterized.TestCase):
 
   @keras_parameterized.run_with_all_model_types
@@ -239,8 +263,7 @@ class ModelSaveTest(keras_parameterized.TestCase):
 
     self.assertAllClose(
         {model.output_names[0]: model.predict_on_batch(inputs)},
-        save_test._import_and_infer(save_dir,
-                                    {model.input_names[0]: np.ones((8, 5))}))
+        _import_and_infer(save_dir, {model.input_names[0]: np.ones((8, 5))}))
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 137638999f20055f1da45067f7191117ba640449 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 10 Dec 2018 17:22:41 -0800
Subject: [PATCH 327/873] Check if it's allowed to prune side effects in
 model_pruner

PiperOrigin-RevId: 224914276
---
 tensorflow/core/grappler/grappler_item.cc        |  9 +++++++++
 tensorflow/core/grappler/utils/functions.cc      | 16 ++++++++--------
 tensorflow/core/grappler/utils/functions_test.cc |  3 +--
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/grappler/grappler_item.cc b/tensorflow/core/grappler/grappler_item.cc
index 74bde67f19..2d71ac54cc 100644
--- a/tensorflow/core/grappler/grappler_item.cc
+++ b/tensorflow/core/grappler/grappler_item.cc
@@ -114,6 +114,15 @@ std::unordered_set<string> GrapplerItem::NodesToPreserve() const {
       result.insert(NodeName(queue_runner.cancel_op_name()));
     }
   }
+
+  if (!allowed_optimizations_.prune_ops_with_side_effects) {
+    for (const NodeDef& node : graph.node()) {
+      if (!IsFreeOfSideEffect(node)) {
+        result.insert(node.name());
+      }
+    }
+  }
+
   return result;
 }
 
diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc
index 57863a71f3..f2894a942b 100644
--- a/tensorflow/core/grappler/utils/functions.cc
+++ b/tensorflow/core/grappler/utils/functions.cc
@@ -347,6 +347,10 @@ GrapplerFunctionItem::GrapplerFunctionItem(
       fetch.push_back(output_tensor);
     }
   }
+
+  // It's unsafe to prune side-effectful ops from the graph instantiated from a
+  // function definition (see inlining in function_optimizer.cc).
+  allowed_optimizations().prune_ops_with_side_effects = false;
 }
 
 const string& GrapplerFunctionItem::description() const { return description_; }
@@ -561,7 +565,6 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
     inputs.push_back(std::move(input_expansion));
   }
 
-  std::vector<string> keep_nodes;
   // Add all function nodes to the function body
   for (const NodeDef& func_def_node : func.node_def()) {
     NodeDef* new_node = function_body.add_node();
@@ -577,11 +580,6 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
     // Register node output range in a function connectivity.
     TF_RETURN_IF_ERROR(RegisterFunctionBodyOutputs(*registration, func_def_node,
                                                    &connectivity));
-
-    // Ops with side effects must be preserved in a function body.
-    if (!IsFreeOfSideEffect(func_def_node)) {
-      keep_nodes.push_back(func_def_node.name());
-    }
   }
 
   // Rewrite inputs to use GraphDef format
@@ -612,12 +610,14 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
     outputs.push_back(std::move(output));
   }
 
+  std::vector<string> keep_ops;
   bool is_stateful = signature.is_stateful();
 
   *item = GrapplerFunctionItem(
-      /*func_name=*/signature.name(), /*description=*/signature.description(),
+      /*func_name=*/signature.name(),
+      /*description=*/signature.description(),
       /*func_attr=*/AttrSlice(&func.attr()), std::move(inputs),
-      std::move(outputs), std::move(keep_nodes), graph_def_version, is_stateful,
+      std::move(outputs), std::move(keep_ops), graph_def_version, is_stateful,
       std::move(function_body));
   return Status::OK();
 }
diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc
index 8639dec05a..5923850eca 100644
--- a/tensorflow/core/grappler/utils/functions_test.cc
+++ b/tensorflow/core/grappler/utils/functions_test.cc
@@ -599,8 +599,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithSideEffectfulOps) {
   EXPECT_EQ(3, item.function_body().node_size());
   EXPECT_EQ(1, item.input_size());
   EXPECT_EQ(0, item.output_size());
-  ASSERT_EQ(1, item.keep_ops.size());
-  EXPECT_EQ("update", item.keep_ops[0]);
+  EXPECT_EQ(false, item.allowed_optimizations().prune_ops_with_side_effects);
 }
 
 TEST_F(FunctionsTest, MakeFunctionDef) {
-- 
GitLab


From 3ef97d1ed381117282f72acd19582a729bf4b821 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 17:28:57 -0800
Subject: [PATCH 328/873] Generate informative error when attempting to execute
 SummaryImageOp with trivial dimensions.

This bypasses a nullptr error that appears downstream due to referencing a length-zero array in a temporary buffer.

PiperOrigin-RevId: 224915050
---
 tensorflow/core/kernels/summary_image_op.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/core/kernels/summary_image_op.cc b/tensorflow/core/kernels/summary_image_op.cc
index 29b21ee735..68f17c2e78 100644
--- a/tensorflow/core/kernels/summary_image_op.cc
+++ b/tensorflow/core/kernels/summary_image_op.cc
@@ -78,6 +78,11 @@ class SummaryImageOp : public OpKernel {
     const int hw = h * w;  // Compact these two dims for simplicity
     const int depth = static_cast<int>(tensor.dim_size(3));
 
+    OP_REQUIRES(c, hw > 0 && depth > 0,
+                errors::InvalidArgument(
+                    "input tensor must have non-zero dims. Found: [",
+                    batch_size, ", ", h, ", ", w, ", ", depth, "]."));
+
     Summary s;
     if (tensor.dtype() == DT_UINT8) {
       // For uint8 input, no normalization is necessary
-- 
GitLab


From 9aa32a6eacd0e8f507d1c57f0658d6c3ecaecaba Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Mon, 10 Dec 2018 17:32:56 -0800
Subject: [PATCH 329/873] Enable mixing value tensors (eager tensors or numpy
 arrays) and Keras symbolic tensors when building Keras graphs-of-layers in an
 eager scope. In these cases, the value tensors are treated as symbolic
 constants.

This enables the following pattern to work in the same way in both V1 and V2:

```
lstm = LSTM(2)
inputs = keras.Input((None, 3))
outputs = lstm(inputs, initial_state=tf.ones(shape))
```

(without this change, the above code works in V1 but fails in V2 with an artificial exception).

Known issue: in case a random tensor is used, there is a (usually harmless) behavior discrepancy remaining between V1 and V2, which is that in V2 we'd be using the same random value every time, whereas in V1 we'd be drawing new random values (since the tensor would be treated as a random op and not as a constant). We think this is not a problem because in V2 users should have the mental model "tensors are values" and thus would be expecting a random tensor to behave like a constant value and not like a random generator.

PiperOrigin-RevId: 224915621
---
 tensorflow/python/eager/execute.py            |  6 -----
 .../python/keras/engine/base_layer_test.py    | 25 ++++++++++++-------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/eager/execute.py b/tensorflow/python/eager/execute.py
index 6f8c780170..7415a0ae22 100644
--- a/tensorflow/python/eager/execute.py
+++ b/tensorflow/python/eager/execute.py
@@ -66,12 +66,6 @@ def quick_execute(op_name, num_outputs, inputs, attrs, ctx, name=None):
     six.raise_from(core._status_to_exception(e.code, message), None)
   except TypeError as e:
     if any(ops._is_keras_symbolic_tensor(x) for x in inputs):
-      if any(isinstance(x, ops.EagerTensor) for x in inputs):
-        raise TypeError("You are attempting to mix computation of symbolic "
-                        "Tensors (computation rooted at tf.keras.Input()) "
-                        "and concrete values. This is not supported. "
-                        "If you need this support, file an issue on the "
-                        "TensorFlow GitHub repository.")
       raise core._SymbolicException
     raise e
   # pylint: enable=protected-access
diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index 798775b6a5..fa4eb48d56 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py
@@ -167,19 +167,26 @@ class BaseLayerTest(test.TestCase):
   def test_mixing_keras_symbolic_tensors_and_eager_tensors(self):
     x1 = keras.Input((3,))
     x2 = array_ops.ones((3, 3))
-    with self.assertRaisesRegexp(
-        TypeError,
-        'mix computation of symbolic Tensors'):
-      math_ops.matmul(x1, x2)
+    y = math_ops.matmul(x1, x2)
+    self.assertEqual(y.graph, keras.backend.get_graph())
+    fn = keras.backend.function(inputs=[x1], outputs=[y])
+    x_val = np.random.random((3, 3))
+    y_val = np.ones((3, 3))
+    self.assertAllClose(fn([x_val])[0],
+                        np.matmul(x_val, y_val),
+                        atol=1e-5)
 
   def test_mixing_keras_symbolic_tensors_and_numpy_arrays(self):
-    # For the time being we treat Numpy arrays as EagerTensors when mixing both.
     x1 = keras.Input((3,))
     x2 = np.ones((3, 3), dtype='float32')
-    with self.assertRaisesRegexp(
-        TypeError,
-        'mix computation of symbolic Tensors'):
-      math_ops.matmul(x1, x2)
+    y = math_ops.matmul(x1, x2)
+    self.assertEqual(y.graph, keras.backend.get_graph())
+    fn = keras.backend.function(inputs=[x1], outputs=[y])
+    x_val = np.random.random((3, 3))
+    y_val = np.ones((3, 3))
+    self.assertAllClose(fn([x_val])[0],
+                        np.matmul(x_val, y_val),
+                        atol=1e-5)
 
 
 if __name__ == '__main__':
-- 
GitLab


From e5165302eb1593b8f52eb15c8668e1c81cc771ae Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 10 Dec 2018 18:15:31 -0800
Subject: [PATCH 330/873] Expose a `tensorflow.autograph` namespace, with a
 minimal core API under experimental. Clean the documentation for public
 symbols.

PiperOrigin-RevId: 224921147
---
 tensorflow/python/autograph/__init__.py       |  15 +-
 tensorflow/python/autograph/core/converter.py |  57 ++++---
 tensorflow/python/autograph/impl/api.py       | 148 ++++++++++++------
 tensorflow/python/ops/standard_ops.py         |   2 +
 .../tools/api/generator/api_init_files.bzl    |   2 +
 .../tools/api/generator/api_init_files_v1.bzl |   2 +
 ...flow.autograph.experimental.-feature.pbtxt |  28 ++++
 ...ow.autograph.experimental.-verbosity.pbtxt |  12 ++
 .../tensorflow.autograph.experimental.pbtxt   |  11 ++
 .../api/golden/v1/tensorflow.autograph.pbtxt  |  15 ++
 .../tools/api/golden/v1/tensorflow.pbtxt      |   4 +
 ...flow.autograph.experimental.-feature.pbtxt |  28 ++++
 ...ow.autograph.experimental.-verbosity.pbtxt |  12 ++
 .../tensorflow.autograph.experimental.pbtxt   |  11 ++
 .../api/golden/v2/tensorflow.autograph.pbtxt  |  15 ++
 .../tools/api/golden/v2/tensorflow.pbtxt      |   4 +
 16 files changed, 290 insertions(+), 76 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-verbosity.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-feature.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-verbosity.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt

diff --git a/tensorflow/python/autograph/__init__.py b/tensorflow/python/autograph/__init__.py
index 7252e0d9bf..6faeb01607 100644
--- a/tensorflow/python/autograph/__init__.py
+++ b/tensorflow/python/autograph/__init__.py
@@ -12,10 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Autograph compiles Python code into equivalent TensorFlow code.
+"""Conversion of plain Python into TensorFlow graph code.
 
-Equivalent here means that they have the same effect when executed.
+NOTE: In TensorFlow 2.0, AutoGraph is automatically applied when using
+`tf.function`. This module contains lower-level APIs for advanced use.
+
+For more information, see the
+[AutoGraph guide](https://www.tensorflow.org/guide/autograph).
+
+By equivalent graph code we mean code that generates a TensorFlow graph when
+run. The generated graph has the same effects as the original code when executed
+(for example with `tf.function` or `tf.compat.v1.Session.run`). In other words,
+using AutoGraph can be thought of as running Python in TensorFlow.
 """
+# TODO(b/119833526): Link to the new tf.function + autograph tutorial.
 
 from __future__ import absolute_import
 from __future__ import division
@@ -43,6 +53,7 @@ from tensorflow.python.autograph.lang.special_functions import tensor_list
 from tensorflow.python.autograph.pyct.transformer import AutographParseError
 from tensorflow.python.util.all_util import remove_undocumented
 
+# TODO(mdan): Revisit this list once we finalize the generated code mechanism.
 _allowed_symbols = [
     # Main API
     'ConversionOptions',
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index e88c4674ee..eea2621056 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -63,8 +63,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from enum import Enum
-from enum import IntEnum
+import enum
 
 from tensorflow.python.autograph.core import config
 from tensorflow.python.autograph.core import naming
@@ -83,6 +82,7 @@ from tensorflow.python.autograph.pyct.static_analysis import liveness
 from tensorflow.python.autograph.pyct.static_analysis import reaching_definitions
 from tensorflow.python.autograph.pyct.static_analysis import type_info
 from tensorflow.python.eager import function
+from tensorflow.python.util.tf_export import tf_export
 
 # TODO(mdan): These contexts can be refactored into first class objects.
 # For example, we could define Program and Entity abstractions that hold on
@@ -91,37 +91,42 @@ from tensorflow.python.eager import function
 # TODO(mdan): Add a test specific to this converter.
 
 
-class Verbosity(IntEnum):
-  """Different levels of verbosity for printing errors.
+@tf_export('autograph.experimental.Verbosity')
+class Verbosity(enum.IntEnum):
+  """Represents conversion verbosity levels.
 
   Attributes:
-   * BRIEF: No logging, minimal error messages.
-   * VERBOSE: Detailed logging of generated code, detailed error messages.
+    BRIEF: No logging, minimal error messages.
+    VERBOSE: Detailed logging of generated code, detailed error messages.
   """
+
   BRIEF = 0
   VERBOSE = 1
 
 
-class Feature(Enum):
-  """Constants to use when selecting AutoGraph features."""
+@tf_export('autograph.experimental.Feature')
+class Feature(enum.Enum):
+  """Represents conversion options that can be toggled on or off.
 
-  ALL = 'Enable all features.'
+  Attributes:
+    ALL: Enable all features.
+    AUTO_CONTROL_DEPS: Insert of control dependencies in the generated code.
+    DECORATORS: Allow decorators in local functions. Note that special
+      decorators, like `tf.function`, are allowed regardless of this toggle.
+    ERROR_REWRITING: Rewrite errors that occur in the generated code to
+      indicate the source code to which the failing code corresponds.
+    LISTS: Convert list idioms, like initializers, slices, append, etc.
+    NAME_SCOPES: Insert name scopes that name ops according to context, like the
+      function they were defined in.
+  """
 
-  AUTO_CONTROL_DEPS = (
-      'Insert of control dependencies in the generated code.')
-  DECORATORS = (
-      'Allow decorators in local functions. Note that special decorators,'
-      ' like ag.convert or tf.function are allowed regardless of this toggle.')
-  ERROR_REWRITING = (
-      'Rewrite errors that occur in the generated code to indicate the source'
-      ' code to which the failing code corresponds.')
-  LISTS = 'Convert list idioms, like initializers, slices, append, etc.'
-  NAME_SCOPES = (
-      'Insert name scopes that name ops according to context, like the'
-      ' function they were defined in.')
+  ALL = 'ALL'
 
-  def __repr__(self):
-    return self.name
+  AUTO_CONTROL_DEPS = 'AUTO_CONTROL_DEPS'
+  DECORATORS = 'DECORATORS'
+  ERROR_REWRITING = 'ERROR_REWRITING'
+  LISTS = 'LISTS'
+  NAME_SCOPES = 'NAME_SCOPES'
 
 
 class ConversionOptions(object):
@@ -157,7 +162,9 @@ class ConversionOptions(object):
     # TODO(mdan): Rename to conversion_recursion_depth?
     self.internal_convert_user_code = internal_convert_user_code
 
-    if isinstance(optional_features, Feature):
+    if optional_features is None:
+      optional_features = ()
+    elif isinstance(optional_features, Feature):
       optional_features = (optional_features,)
     optional_features = frozenset(optional_features)
     self.optional_features = optional_features
@@ -419,7 +426,7 @@ class AnnotatedDef(reaching_definitions.Definition):
     self.directives = {}
 
 
-class AgAnno(Enum):
+class AgAnno(enum.Enum):
   """Annotation labels specific to AutoGraph. See anno.py."""
 
   DIRECTIVES = 'User directives associated with the annotated statement.'
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index f7774888c8..54b46b1efd 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -40,6 +40,7 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
+from tensorflow.python.util.tf_export import tf_export
 
 # TODO(mdan): Properly document the type hints.
 # TODO(mdan): Reduce the type hint information to (module, type).
@@ -157,7 +158,6 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
   return decorator
 
 
-# TODO(mdan): Move to a private, undocumented module.
 def converted_call(f, owner, options, *args, **kwargs):
   """Compiles a function call inline. For internal use only."""
   if options.verbose >= converter.Verbosity.VERBOSE:
@@ -202,7 +202,7 @@ def converted_call(f, owner, options, *args, **kwargs):
     return f(*args, **kwargs)
 
   # Unwrap functools.partial objects
-  # TODO(allenl, mdan): Consider sharing unwrapping logic with tf_inspect.
+  # TODO(mdan): Consider sharing unwrapping logic with tf_inspect.
   while isinstance(f, functools.partial):
     args = f.args + args
     new_kwargs = {}
@@ -283,9 +283,9 @@ def converted_call(f, owner, options, *args, **kwargs):
       verbose=options.verbose,
       arg_values=arg_values,
       arg_types=arg_types,
-      partial_types=partial_types,
       strip_decorators=options.strip_decorators,
-      optional_features=options.optional_features)
+      optional_features=options.optional_features,
+      experimental_partial_types=partial_types)
 
   result = converted_f(*effective_args, **kwargs)
 
@@ -314,44 +314,81 @@ def _is_not_callable(obj):
   return False
 
 
-# TODO(mdan): Rename: to_ops?
-# TODO(mdan): Look into overloading as function and decorator, like tfe.defun?
-# TODO(mdan): Remove partial_types.
-def to_graph(e,
+@tf_export('autograph.to_graph')
+def to_graph(entity,
              recursive=True,
              verbose=converter.Verbosity.VERBOSE,
              arg_values=None,
              arg_types=None,
-             partial_types=None,
              strip_decorators=None,
-             optional_features=converter.Feature.ALL):
-  """Converts a Python entity into equivalent code that uses TensorFlow ops.
+             optional_features=converter.Feature.ALL,
+             experimental_partial_types=None):
+  """Converts a Python entity into a TensorFlow graph.
+
+  Also see: `tf.autograph.to_code`, `tf.function`.
+
+  Unlike `tf.function`, `to_graph` is a low-level transpiler that converts
+  Python code to TensorFlow graph code. It does not implement any caching,
+  variable management or create any actual ops, and is best used where greater
+  control over the generated TensorFlow graph is desired. Another difference
+  from `tf.function` is that `to_graph` will not wrap the graph into a
+  TensorFlow function or a Python callable. Internally, `tf.function` uses
+  `to_graph`.
+
+  _Example Usage_
+
+  ```python
+    def foo(x):
+      if x > 0:
+        y = x * x
+      else:
+        y = -x
+      return y
+
+    converted_foo = to_graph(foo)
+
+    x = tf.constant(1)
+    y = converted_foo(x)  # converted_foo is a TensorFlow Op-like.
+    assert is_tensor(y)
+  ```
 
   Supported Python entities include:
     * functions
     * classes
+    * object methods
+
+  Functions are converted into new functions with converted code.
 
-  Classes are converted by converting all their methods into a new class.
+  Classes are converted by generating a new class whose methods use converted
+  code.
+
+  Methods are converted into unbound function that have an additional first
+  argument called `self`.
 
   Args:
-    e: Union[Callable, Type], the Python entity to convert.
-    recursive: bool, whether to recursively convert any functions that the
+    entity: Python callable or class to convert.
+    recursive: Whether to recursively convert any functions that the
       converted function may call.
-    verbose: converter.Verbosity, the level of printing verbosity to use.
-    arg_values: Optional[Dict[Text, Any]], value hints for symbols including
-      function arguments.
-    arg_types: Optional[Dict[Text, Type]], type hints for symbols including
-      function arguments.
-    partial_types: Set[Type], reserved for internal use.
-    strip_decorators: Tuple[Callable], same as
-      ConversionOptions.strip_decorators.
-    optional_features: Union[Feature, Set[Feature]], same as
-      ConversionOptions.optional_features.
+    verbose: The level of printing verbosity to use, as a
+      `tf.autograph.experimental.Verbosity` value.
+    arg_values: Optional dict of value hints for symbols including
+      function arguments mapping string names to actual values. For example,
+      `arg_values={'a': 1}` will map the variable `a` to the value `1`.
+    arg_types: Optional dict of type hints for symbols including function
+      arguments. Type hints allow specifying just the type of a variable, rather
+      than a specific value.
+    strip_decorators: A tuple specifying decorators that should be
+      excluded from the compiled output. By default, when converting a function
+      before the decorators are applied, the compiled output will include those
+      decorators.
+    optional_features: `None`, a tuple of, or a single
+      `tf.autograph.experimental.Feature` value. Controls the use of
+      optional features in the conversion process.
+    experimental_partial_types: A `set` of `type` values, reserved for internal
+      use.
 
   Returns:
-    Union[Callable, Type], the converted entity, which is the same kind as e
-    (that is, a function is e is a function, a class if e is a class, etc.) but
-    its code has been converted to use TF ops.
+    Same as `entity`, the converted Python function or class.
 
   Raises:
     ValueError: If the entity could not be converted.
@@ -366,11 +403,11 @@ def to_graph(e,
           verbose=verbose,
           strip_decorators=strip_decorators,
           optional_features=optional_features),
-      partial_types=partial_types,
+      partial_types=experimental_partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
-  _, name, namespace = conversion.entity_to_graph(e, program_ctx, arg_values,
-                                                  arg_types)
+  _, name, namespace = conversion.entity_to_graph(entity, program_ctx,
+                                                  arg_values, arg_types)
 
   nodes = []
   for dep in reversed(program_ctx.conversion_order):
@@ -389,8 +426,8 @@ def to_graph(e,
       compiled_module.__dict__[key] = val
   compiled = getattr(compiled_module, name)
 
-  if tf_inspect.isfunction(e):
-    compiled.__defaults__ = e.__defaults__
+  if tf_inspect.isfunction(entity):
+    compiled.__defaults__ = entity.__defaults__
 
   if hasattr(compiled, '__globals__'):
     # Remove self to avoid circular references. This will probably only work
@@ -415,38 +452,51 @@ def to_graph(e,
   return compiled
 
 
-def to_code(e,
+@tf_export('autograph.to_code')
+def to_code(entity,
             recursive=True,
             arg_values=None,
             arg_types=None,
-            partial_types=None,
-            indentation='  '):
-  """Returns the equivalent code that uses TensorFlow ops.
+            indentation='  ',
+            optional_features=converter.Feature.ALL,
+            experimental_partial_types=None):
+  """Similar to `to_graph`, but returns Python source code as a string.
+
+  Also see: `tf.autograph.to_graph`.
 
-  Also see: `to_graph`, `convert`
+  `to_graph` returns the Python source code that can be used to generate a
+  TensorFlow graph that is functionally identical to the input Python code.
 
   Args:
-    e: Union[Callable, Type], the Python entity to convert.
-    recursive: bool, whether to recursively convert any functions that the
+    entity: Python callable or class to convert.
+    recursive: Whether to recursively convert any functions that the
       converted function may call.
-    arg_values: Optional[Dict[Text, Any]], value hints for symbols including
-      function arguments.
-    arg_types: Optional[Dict[Text, Type]], type hints for symbols including
-      function arguments.
-    partial_types: Set[Type], reserved for internal use.
-    indentation: Text, when to use for each level of indentation.
+    arg_values: Optional dict of value hints for symbols including
+      function arguments mapping string names to actual values. For example,
+      `arg_values={'a': 1}` will map the variable `a` to the value `1`.
+    arg_types: Optional dict of type hints for symbols including function
+      arguments. Type hints allow specifying just the type of a variable, rather
+      than a specific value.
+    indentation: The string to use for indenting. Typically two or four spaces,
+      or just the tab character.
+    optional_features: `None`, a tuple of, or a single
+      `tf.autograph.experimental.Feature` value. Controls the use of
+      optional features in the conversion process.
+    experimental_partial_types: A `set` of `type` values, reserved for internal
+      use.
 
   Returns:
-    Text, the converted code.
+    The converted code as string.
   """
   program_ctx = converter.ProgramContext(
       options=converter.ConversionOptions(
           recursive=recursive,
-          strip_decorators=(convert, do_not_convert, converted_call)),
-      partial_types=partial_types,
+          strip_decorators=(convert, do_not_convert, converted_call),
+          optional_features=optional_features),
+      partial_types=experimental_partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
-  conversion.entity_to_graph(e, program_ctx, arg_values, arg_types)
+  conversion.entity_to_graph(entity, program_ctx, arg_values, arg_types)
 
   code = '\n'.join(
       compiler.ast_to_source(program_ctx.dependency_cache[dep], indentation)
diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py
index c614d072ba..8ef0fe8070 100644
--- a/tensorflow/python/ops/standard_ops.py
+++ b/tensorflow/python/ops/standard_ops.py
@@ -22,6 +22,8 @@ from __future__ import print_function
 
 import sys as _sys
 
+from tensorflow.python import autograph
+
 # pylint: disable=g-bad-import-order
 # Imports the following modules so that @RegisterGradient get executed.
 from tensorflow.python.ops import array_grad
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 0245ac50a6..58913b3208 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -4,6 +4,8 @@
 TENSORFLOW_API_INIT_FILES = [
     # BEGIN GENERATED FILES
     "__init__.py",
+    "autograph/__init__.py",
+    "autograph/experimental/__init__.py",
     "bitwise/__init__.py",
     "compat/__init__.py",
     "data/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index e35b9c4374..0937f98e75 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -5,6 +5,8 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     # BEGIN GENERATED FILES
     "__init__.py",
     "app/__init__.py",
+    "autograph/__init__.py",
+    "autograph/experimental/__init__.py",
     "bitwise/__init__.py",
     "compat/__init__.py",
     "data/__init__.py",
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-feature.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-feature.pbtxt
new file mode 100644
index 0000000000..a71da113b4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-feature.pbtxt
@@ -0,0 +1,28 @@
+path: "tensorflow.autograph.experimental.Feature"
+tf_class {
+  is_instance: "<enum \'Feature\'>"
+  member {
+    name: "ALL"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "AUTO_CONTROL_DEPS"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "DECORATORS"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "ERROR_REWRITING"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "LISTS"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "NAME_SCOPES"
+    mtype: "<enum \'Feature\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-verbosity.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-verbosity.pbtxt
new file mode 100644
index 0000000000..c4d5b77c07
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.-verbosity.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.autograph.experimental.Verbosity"
+tf_class {
+  is_instance: "<enum \'Verbosity\'>"
+  member {
+    name: "BRIEF"
+    mtype: "<enum \'Verbosity\'>"
+  }
+  member {
+    name: "VERBOSE"
+    mtype: "<enum \'Verbosity\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt
new file mode 100644
index 0000000000..5747dac7ab
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.autograph.experimental.pbtxt
@@ -0,0 +1,11 @@
+path: "tensorflow.autograph.experimental"
+tf_module {
+  member {
+    name: "Feature"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+  member {
+    name: "Verbosity"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt
new file mode 100644
index 0000000000..34bdab95ff
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt
@@ -0,0 +1,15 @@
+path: "tensorflow.autograph"
+tf_module {
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
+  member_method {
+    name: "to_code"
+    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'indentation\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'  \', \'Feature.ALL\', \'None\'], "
+  }
+  member_method {
+    name: "to_graph"
+    argspec: "args=[\'entity\', \'recursive\', \'verbose\', \'arg_values\', \'arg_types\', \'strip_decorators\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'Verbosity.VERBOSE\', \'None\', \'None\', \'None\', \'Feature.ALL\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 584c74f99d..60ff59196b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -288,6 +288,10 @@ tf_module {
     name: "app"
     mtype: "<type \'module\'>"
   }
+  member {
+    name: "autograph"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "bfloat16"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-feature.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-feature.pbtxt
new file mode 100644
index 0000000000..a71da113b4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-feature.pbtxt
@@ -0,0 +1,28 @@
+path: "tensorflow.autograph.experimental.Feature"
+tf_class {
+  is_instance: "<enum \'Feature\'>"
+  member {
+    name: "ALL"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "AUTO_CONTROL_DEPS"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "DECORATORS"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "ERROR_REWRITING"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "LISTS"
+    mtype: "<enum \'Feature\'>"
+  }
+  member {
+    name: "NAME_SCOPES"
+    mtype: "<enum \'Feature\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-verbosity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-verbosity.pbtxt
new file mode 100644
index 0000000000..c4d5b77c07
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.-verbosity.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.autograph.experimental.Verbosity"
+tf_class {
+  is_instance: "<enum \'Verbosity\'>"
+  member {
+    name: "BRIEF"
+    mtype: "<enum \'Verbosity\'>"
+  }
+  member {
+    name: "VERBOSE"
+    mtype: "<enum \'Verbosity\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt
new file mode 100644
index 0000000000..5747dac7ab
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.autograph.experimental.pbtxt
@@ -0,0 +1,11 @@
+path: "tensorflow.autograph.experimental"
+tf_module {
+  member {
+    name: "Feature"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+  member {
+    name: "Verbosity"
+    mtype: "<class \'enum.EnumMeta\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt
new file mode 100644
index 0000000000..34bdab95ff
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt
@@ -0,0 +1,15 @@
+path: "tensorflow.autograph"
+tf_module {
+  member {
+    name: "experimental"
+    mtype: "<type \'module\'>"
+  }
+  member_method {
+    name: "to_code"
+    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'indentation\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'  \', \'Feature.ALL\', \'None\'], "
+  }
+  member_method {
+    name: "to_graph"
+    argspec: "args=[\'entity\', \'recursive\', \'verbose\', \'arg_values\', \'arg_types\', \'strip_decorators\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'Verbosity.VERBOSE\', \'None\', \'None\', \'None\', \'Feature.ALL\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 4432cae53b..0f11107dc3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -80,6 +80,10 @@ tf_module {
     name: "VariableSynchronization"
     mtype: "<class \'enum.EnumMeta\'>"
   }
+  member {
+    name: "autograph"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "bfloat16"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
-- 
GitLab


From 60f89ee911649a94b2483f71363e5dad6dda5901 Mon Sep 17 00:00:00 2001
From: "Meng, Peng" <peng.meng@intel.com>
Date: Tue, 11 Dec 2018 10:46:03 +0800
Subject: [PATCH 331/873] add comments about layout

Change-Id: Ie1e9f61046501d9e02586f96d232b748c77e0dd4
---
 tensorflow/core/kernels/mkl_softmax_op.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc
index c35bdd5487..b84fd79d75 100644
--- a/tensorflow/core/kernels/mkl_softmax_op.cc
+++ b/tensorflow/core/kernels/mkl_softmax_op.cc
@@ -76,8 +76,10 @@ class MklSoftmaxOp : public OpKernel {
       // "nc" for 2 dim tensor, "tnc" for 3 dim tensor, "nchw" for 4 dim tensor,
       // and "ncdhw" for 5 dim tensor. Each of the symbols has the following
       // meaning: n = batch, c = channels, t = sequence length, h = height, w =
-      // width, d = depth. When src tensor is MKL, layout_type here is only used 
-      // for setting TF layout type of output tensor.
+      // width, d = depth. When src tensor is MKL, layout_type here is only used
+      // for setting TF layout type of output tensor. When input is TF Tensor,
+      // layout here is no special sense. We use axis to define on which
+      // dimension to do softmax.
       switch (input_dims) {
         case 1:
           layout_type = memory::format::x;
-- 
GitLab


From c2ade32503f4109e4b8fcbd689f39a6e8cd96273 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 10 Dec 2018 18:54:49 -0800
Subject: [PATCH 332/873] [XLA] Add DefaultDebugOptionsIgnoringFlags()
 function.

This gets a DebugOptions struct with all the defaults filled in as though
XLA_FLAGS were empty.  This is useful when you want to run an XLA computation
and explicitly ignore any XLA_FLAGS passed to the binary.

PiperOrigin-RevId: 224925335
---
 .../compiler/xla/debug_options_flags.cc       | 51 +++++++++----------
 tensorflow/compiler/xla/debug_options_flags.h |  5 +-
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
index 20609cad58..e77d0ba63b 100644
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@@ -22,49 +22,48 @@ limitations under the License.
 #include "tensorflow/compiler/xla/parse_flags_from_env.h"
 
 namespace xla {
-namespace {
 
-DebugOptions* flag_values;
-std::vector<tensorflow::Flag>* flag_objects;
-std::once_flag flags_init;
-
-void SetDebugOptionsDefaults(DebugOptions* flags) {
-  flags->set_xla_llvm_enable_alias_scope_metadata(true);
-  flags->set_xla_llvm_enable_noalias_metadata(true);
-  flags->set_xla_llvm_enable_invariant_load_metadata(true);
-  flags->set_xla_llvm_disable_expensive_passes(false);
-  flags->set_xla_backend_optimization_level(3);
-  flags->set_xla_cpu_multi_thread_eigen(true);
-  flags->set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
-  flags->set_xla_eliminate_hlo_implicit_broadcast(true);
+DebugOptions DefaultDebugOptionsIgnoringFlags() {
+  DebugOptions opts;
+  opts.set_xla_llvm_enable_alias_scope_metadata(true);
+  opts.set_xla_llvm_enable_noalias_metadata(true);
+  opts.set_xla_llvm_enable_invariant_load_metadata(true);
+  opts.set_xla_llvm_disable_expensive_passes(false);
+  opts.set_xla_backend_optimization_level(3);
+  opts.set_xla_cpu_multi_thread_eigen(true);
+  opts.set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
+  opts.set_xla_eliminate_hlo_implicit_broadcast(true);
 #ifdef INTEL_MKL
-  flags->set_xla_cpu_use_mkl_dnn(true);
+  opts.set_xla_cpu_use_mkl_dnn(true);
 #endif  // INTEL_MKL
-  flags->set_xla_gpu_max_kernel_unroll_factor(4);
+  opts.set_xla_gpu_max_kernel_unroll_factor(4);
   // Set cudnn batchnorm off by default; it does not provide a performance win
   // on average.
-  flags->set_xla_gpu_use_cudnn_batchnorm(false);
+  opts.set_xla_gpu_use_cudnn_batchnorm(false);
 
   // Run all GPU work on one stream by default.  Using multiple streams
   // increases memory usage and we lack strong motivating benchmarks for tuning
   // the heuristics needed to decide when to run on multiple streams.  See
   // b/77879207.
-  flags->set_xla_gpu_disable_multi_streaming(true);
+  opts.set_xla_gpu_disable_multi_streaming(true);
 
   // TODO(jlebar): Disable fastmath once doing so is not a performance
   // regression.
-  flags->set_xla_cpu_enable_fast_math(true);
-  flags->set_xla_gpu_enable_fast_min_max(true);
+  opts.set_xla_cpu_enable_fast_math(true);
+  opts.set_xla_gpu_enable_fast_min_max(true);
 
-  flags->set_xla_force_host_platform_device_count(1);
+  opts.set_xla_force_host_platform_device_count(1);
+  return opts;
 }
 
+static DebugOptions* flag_values;
+static std::vector<tensorflow::Flag>* flag_objects;
+static std::once_flag flags_init;
+
 // Allocates flag_values and flag_objects; this function must not be called more
 // than once - its call done via call_once.
-void AllocateFlags() {
-  flag_values = new DebugOptions;
-
-  SetDebugOptionsDefaults(flag_values);
+static void AllocateFlags() {
+  flag_values = new DebugOptions(DefaultDebugOptionsIgnoringFlags());
 
   // Returns a lambda that calls "member_setter" on "flag_values" with the
   // argument passed in to the lambda.
@@ -344,8 +343,6 @@ void AllocateFlags() {
   ParseFlagsFromEnvAndDieIfUnknown("XLA_FLAGS", *flag_objects);
 }
 
-}  // namespace
-
 void AppendDebugOptionsFlags(std::vector<tensorflow::Flag>* flag_list) {
   std::call_once(flags_init, &AllocateFlags);
   flag_list->insert(flag_list->end(), flag_objects->begin(),
diff --git a/tensorflow/compiler/xla/debug_options_flags.h b/tensorflow/compiler/xla/debug_options_flags.h
index 60e59abc2a..dbf86a40f0 100644
--- a/tensorflow/compiler/xla/debug_options_flags.h
+++ b/tensorflow/compiler/xla/debug_options_flags.h
@@ -29,7 +29,10 @@ void AppendDebugOptionsFlags(std::vector<tensorflow::Flag>* flag_list);
 // Fetches a DebugOptions proto message from flags provided to the program.
 // Flags must be registered with the flags parser using AppendDebugOptionsFlags
 // first.
-xla::DebugOptions GetDebugOptionsFromFlags();
+DebugOptions GetDebugOptionsFromFlags();
+
+// Gets a DebugOptions proto that reflects the defaults as if no flags were set.
+DebugOptions DefaultDebugOptionsIgnoringFlags();
 
 }  // namespace xla
 
-- 
GitLab


From 5478c41e32d7ee455741fdee9473e60fa8e40a21 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 10 Dec 2018 19:13:04 -0800
Subject: [PATCH 333/873] [XLA] Don't pass XLA_FLAGS down to fake computations
 created by replay_computation.

When you pass XLA_FLAGS to replay_computation, you very likely want that only
to apply to the actual computation(s) being run, not to the XLA computations
that replay_computation synthesizes to generate fake data for the "real" ones'
arguments.

PiperOrigin-RevId: 224927003
---
 tensorflow/compiler/xla/client/lib/testing.cc | 20 +++++++++++--------
 tensorflow/compiler/xla/client/lib/testing.h  | 11 +++++++---
 .../compiler/xla/tools/replay_computation.cc  |  7 ++++++-
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc
index a95bbf2c8c..5db9d10dff 100644
--- a/tensorflow/compiler/xla/client/lib/testing.cc
+++ b/tensorflow/compiler/xla/client/lib/testing.cc
@@ -59,22 +59,25 @@ XlaOp BuildFakeDataOpOnDevice(const Shape& shape, XlaBuilder* builder) {
   return Tuple(builder, parts);
 }
 
-std::unique_ptr<GlobalData> MakeFakeDataViaDeviceOrDie(const Shape& shape,
-                                                       Client* client) {
+std::unique_ptr<GlobalData> MakeFakeDataViaDeviceOrDie(
+    const Shape& shape, Client* client, DebugOptions* debug_opts) {
   XlaBuilder b(absl::StrCat("make_fake_", ShapeUtil::HumanString(shape)));
   BuildFakeDataOpOnDevice(shape, &b);
   XlaComputation computation = b.Build().ConsumeValueOrDie();
 
   auto execution_options = CreateDefaultExecutionOptions();
   *execution_options.mutable_shape_with_output_layout() = shape.ToProto();
+  if (debug_opts) {
+    *execution_options.mutable_debug_options() = *debug_opts;
+  }
   return client->Execute(computation, /*arguments=*/{}, &execution_options)
       .ConsumeValueOrDie();
 }
 
 }  // namespace
 
-std::unique_ptr<GlobalData> MakeFakeDataOrDie(const Shape& shape,
-                                              Client* client) {
+std::unique_ptr<GlobalData> MakeFakeDataOrDie(
+    const Shape& shape, Client* client, DebugOptions* debug_opts /*=nullptr*/) {
   if (DataSizeOfShape(shape) < (1LL << 20)) {
     StatusOr<Literal> literal_status = MakeFakeLiteral(shape);
     if (!literal_status.ok()) {
@@ -82,24 +85,25 @@ std::unique_ptr<GlobalData> MakeFakeDataOrDie(const Shape& shape,
       // an on-device computation.
       CHECK_EQ(literal_status.status().code(),
                tensorflow::error::UNIMPLEMENTED);
-      return MakeFakeDataViaDeviceOrDie(shape, client);
+      return MakeFakeDataViaDeviceOrDie(shape, client, debug_opts);
     }
     return client->TransferToServer(literal_status.ValueOrDie()).ValueOrDie();
   }
 
   // If the data is large, generate it on-device.
-  return MakeFakeDataViaDeviceOrDie(shape, client);
+  return MakeFakeDataViaDeviceOrDie(shape, client, debug_opts);
 }
 
 std::vector<std::unique_ptr<GlobalData>> MakeFakeArgumentsOrDie(
-    const XlaComputation& computation, Client* client) {
+    const XlaComputation& computation, Client* client,
+    DebugOptions* debug_opts /*=nullptr*/) {
   CHECK(computation.proto().has_host_program_shape())
       << "Computation should have progran shape.";
   auto program_shape = computation.proto().host_program_shape();
 
   std::vector<std::unique_ptr<GlobalData>> results;
   for (const ShapeProto& shape : program_shape.parameters()) {
-    results.push_back(MakeFakeDataOrDie(Shape(shape), client));
+    results.push_back(MakeFakeDataOrDie(Shape(shape), client, debug_opts));
   }
   return results;
 }
diff --git a/tensorflow/compiler/xla/client/lib/testing.h b/tensorflow/compiler/xla/client/lib/testing.h
index 03695ce2a3..428fa3e93d 100644
--- a/tensorflow/compiler/xla/client/lib/testing.h
+++ b/tensorflow/compiler/xla/client/lib/testing.h
@@ -29,14 +29,19 @@ namespace xla {
 // Generates fake data of the given shape on the device or dies. The fake data
 // is created by performing a computation on the device rather than transferring
 // data from the host to the device.
-std::unique_ptr<GlobalData> MakeFakeDataOrDie(const Shape& shape,
-                                              Client* client);
+//
+// The optional DebugOptions are used when generating fake data on the device.
+std::unique_ptr<GlobalData> MakeFakeDataOrDie(
+    const Shape& shape, Client* client, DebugOptions* debug_opts = nullptr);
 
 // Returns vector of GlobalData handles of fake data (created using
 // MakeFakeDataOrDie) that are correctly shaped arguments for the given
 // xla computation.
+//
+// The optional DebugOptions are used when generating fake data on the device.
 std::vector<std::unique_ptr<GlobalData>> MakeFakeArgumentsOrDie(
-    const XlaComputation& computation, Client* client);
+    const XlaComputation& computation, Client* client,
+    DebugOptions* debug_opts = nullptr);
 
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index ff2c339992..1a51303148 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -118,7 +118,12 @@ StatusOr<Literal> ReplayComputation(const HloSnapshot& module,
   std::vector<std::unique_ptr<GlobalData>> global_data_arguments;
   std::vector<const ShapedBuffer*> argument_ptrs;
   if (opts.use_fake_data) {
-    global_data_arguments = MakeFakeArgumentsOrDie(computation, client);
+    // Run fake computations with debug options ignoring XLA_FLAGS.  Users very
+    // likely want XLA_FLAGS only to apply to the "real" computation being run,
+    // not to the fake computations we use for generating arguments.
+    auto debug_opts = DefaultDebugOptionsIgnoringFlags();
+    global_data_arguments =
+        MakeFakeArgumentsOrDie(computation, client, &debug_opts);
     for (const auto& data : global_data_arguments) {
       argument_ptrs.push_back(
           client->GlobalDataToShapedBuffer(data->handle(), /*device_ordinal=*/0)
-- 
GitLab


From ce6087616869670e0331cd4c873a0eb3d2296e0e Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Mon, 10 Dec 2018 19:27:48 -0800
Subject: [PATCH 334/873] [XLA] Add --xla_disable_all_hlo_passes flag.

Previously we only had a flag for disabling specific passes.  But being able to
disable all passes is helpful if you have some already-optimized HLO that you
just want to run.

PiperOrigin-RevId: 224928095
---
 tensorflow/compiler/xla/debug_options_flags.cc       | 10 ++++++++++
 tensorflow/compiler/xla/service/hlo_pass_pipeline.cc |  5 +++++
 tensorflow/compiler/xla/xla.proto                    | 10 ++++++++++
 3 files changed, 25 insertions(+)

diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
index e77d0ba63b..c55ebcd066 100644
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@@ -201,6 +201,16 @@ static void AllocateFlags() {
           "Comma-separated list of hlo passes to be disabled. These names "
           "must exactly match the passes' names; no whitespace around "
           "commas."),
+      tensorflow::Flag(
+          "xla_disable_all_hlo_passes",
+          bool_setter_for(&DebugOptions::set_xla_disable_all_hlo_passes), false,
+          "Disables all HLO passes.  Notes that some passes are necessary for "
+          "correctness and the invariants that must be satisfied by 'fully "
+          "optimized' HLO are different for different devices and may change "
+          "over time.  The only 'guarantee', such as it is, is that if you "
+          "compile XLA and dump the optimized HLO for some graph, you should "
+          "be able to run it again on the same device with the same build of "
+          "XLA."),
       tensorflow::Flag(
           "xla_embed_ir_in_executable",
           bool_setter_for(&DebugOptions::set_xla_embed_ir_in_executable),
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 51177f24f5..33ce7e23a8 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -77,6 +77,11 @@ std::vector<HloPassInterface*> HloPassPipeline::GetEnabledPasses(
   auto repeated_field = debug_options.xla_disable_hlo_passes();
   absl::flat_hash_set<string> disabled_pass_names(repeated_field.begin(),
                                                   repeated_field.end());
+  if (debug_options.xla_disable_all_hlo_passes()) {
+    VLOG(1) << "*All* passes disabled by --xla_disable_all_hlo_passes.";
+    return {};
+  }
+
   if (!disabled_pass_names.empty()) {
     VLOG(1) << "Passes disabled by --xla_disable_hlo_passes: "
             << absl::StrJoin(disabled_pass_names, ", ");
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index a37eac7fe4..32b51c104c 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -100,6 +100,14 @@ message DebugOptions {
   // names as specified by the HloPassInterface::name() method.
   repeated string xla_disable_hlo_passes = 30;
 
+  // Disables all HLO passes.  Notes that some passes are necessary for
+  // correctness and the invariants that must be satisfied by "fully optimized"
+  // HLO are different for different devices and may change over time.  The only
+  // "guarantee", such as it is, is that if you compile XLA and dump the
+  // optimized HLO for some graph, you should be able to run it again on the
+  // same device with the same build of XLA.
+  bool xla_disable_all_hlo_passes = 104;
+
   // Numerical optimization level for the XLA compiler backend; the specific
   // interpretation of this value is left to the backends.
   int32 xla_backend_optimization_level = 31;
@@ -216,6 +224,8 @@ message DebugOptions {
   // If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
   bool xla_gpu_disable_ptxas_optimizations = 103;
 
+  // Next id: 105
+
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
   map<string, string> xla_backend_extra_options = 500;
-- 
GitLab


From 0d822c01e54126dd7e38e9c5bb186039b736121b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 19:52:31 -0800
Subject: [PATCH 335/873] Fix so we preserve the value of
 `executing_eagerly_outside_functions()` in the specific case of: * Eager
 execution enabled * Inside a FuncGraph, inside a graph * In a replica context
 (such as in a call to   `tf.distribute.Strategy.call_for_each_replica()`).

PiperOrigin-RevId: 224930182
---
 .../python/mirrored_strategy_multigpu_test.py | 28 +++++++++++++++++++
 .../python/distribute/mirrored_strategy.py    | 21 ++++++++------
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index 36be5c83f8..337a86b342 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -183,6 +183,34 @@ class MirroredStrategyVariableCreatorStackTest(
       expected = ("main_thread:thread_0", "main_thread:thread_1")
       self.assertEqual(expected, result)
 
+@combinations.generate(combinations.combine(
+    distribution=[
+        combinations.mirrored_strategy_with_gpu_and_cpu,
+        combinations.core_mirrored_strategy_with_gpu_and_cpu],
+    mode=["graph", "eager"]))
+class MirroredStrategyCallForEachReplicaTest(test.TestCase):
+
+  def testExecutingEagerlyOutsideFunction(self, distribution):
+    """Verify we preserve the value of executing_eagerly_outside_functions()."""
+    def model_fn():
+      return ops.executing_eagerly_outside_functions()
+
+    originally = ops.executing_eagerly_outside_functions()
+    with distribution.scope():
+      in_scope = ops.executing_eagerly_outside_functions()
+      in_model_fn = distribution.extended.call_for_each_replica(model_fn)
+      unwrapped = distribution.unwrap(in_model_fn)
+      self.assertEqual(in_scope, unwrapped[0])
+      self.assertEqual(in_scope, originally)
+
+    # Verify this all again, but this time in a FuncGraph.
+    with func_graph.FuncGraph("fg").as_default(), distribution.scope():
+      in_scope = ops.executing_eagerly_outside_functions()
+      in_model_fn = distribution.extended.call_for_each_replica(model_fn)
+      unwrapped = distribution.unwrap(in_model_fn)
+      self.assertEqual(in_scope, unwrapped[0])
+      self.assertEqual(in_scope, originally)
+
 
 @combinations.generate(combinations.combine(
     distribution=[
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index cb94dfcfbd..9692c88dfc 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -50,8 +50,8 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 @contextlib.contextmanager
-def _enter_graph(g):
-  if context.executing_eagerly():
+def _enter_graph(g, eager):
+  if eager:
     with g.as_default(), context.eager_mode():
       yield
   else:
@@ -839,14 +839,19 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
       self.has_paused = threading.Event()
       # These fields have to do with inheriting various contexts from the
       # parent thread:
+      ctx = context.context()
+      self.in_eager = ctx.executing_eagerly()
       # pylint: disable=protected-access
-      self.context_mode = context.context()._eager_context.mode
-      if not context.context()._context_handle:
-        context.context()._initialize_handle_and_devices()
+      if not ctx._context_handle:
+        ctx._initialize_handle_and_devices()
       self.context_device_policy = (
           pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy(
-              context.context()._context_handle))
+              ctx._context_handle))
       self.graph = ops.get_default_graph()
+      with ops.init_scope():
+        self._init_in_eager = context.executing_eagerly()
+        self._init_graph = ops.get_default_graph()
+
       self._variable_creator_stack = self.graph._variable_creator_stack[:]
       self._captured_var_scope = variable_scope.get_variable_scope()
       # Adding a "/" at end lets us re-enter this scope later.
@@ -867,9 +872,9 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
         if self.coord.should_stop():
           return
         with self.coord.stop_on_exception(), \
-            context.context()._mode(self.context_mode), \
+            _enter_graph(self._init_graph, self._init_in_eager), \
+            _enter_graph(self.graph, self.in_eager), \
             context.context().device_policy(self.context_device_policy), \
-            _enter_graph(self.graph), \
             MirroredReplicaContext(self.distribution, constant_op.constant(
                 self.replica_id, dtypes.int32)), \
             ops.device(self.device), \
-- 
GitLab


From 10cab63fa54ee4c66c249b2c5427e080a625a8c7 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Mon, 10 Dec 2018 20:35:35 -0800
Subject: [PATCH 336/873] Outside compilation in "If" and "While".

PiperOrigin-RevId: 224933587
---
 tensorflow/compiler/jit/BUILD                 |   3 +
 .../jit/encapsulate_subgraphs_pass_test.cc    | 201 +++--
 .../jit/extract_outside_compilation_pass.cc   | 760 ++++++++++++++++--
 .../jit/extract_outside_compilation_pass.h    |   5 +-
 .../extract_outside_compilation_pass_test.cc  | 409 +++++++++-
 .../compiler/tf2xla/kernels/while_op.cc       |  22 +-
 .../compiler/tf2xla/side_effect_util.cc       |   2 +
 tensorflow/compiler/tf2xla/side_effect_util.h |   3 +
 tensorflow/compiler/tf2xla/tf2xla_util.cc     |   9 +
 9 files changed, 1239 insertions(+), 175 deletions(-)

diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 15dcbb2641..d8c88a9fca 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -515,6 +515,7 @@ cc_library(
         "//tensorflow/compiler/jit/ops:xla_ops",
         "//tensorflow/compiler/tf2xla:dump_graph",
         "//tensorflow/compiler/tf2xla:resource_operation_table",
+        "//tensorflow/compiler/tf2xla:side_effect_util",
         "//tensorflow/compiler/tf2xla:tf2xla_util",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/cc:xla_jit_ops",
@@ -613,6 +614,7 @@ tf_cc_test(
         "//tensorflow/cc:cc_ops",
         "//tensorflow/cc:cc_ops_internal",
         "//tensorflow/cc:function_ops",
+        "//tensorflow/cc:functional_ops",
         "//tensorflow/cc:ops",
         "//tensorflow/cc:resource_variable_ops",
         "//tensorflow/cc:scope",
@@ -625,6 +627,7 @@ tf_cc_test(
         "//tensorflow/compiler/tf2xla/cc:xla_ops",
         "//tensorflow/compiler/tf2xla/kernels:xla_dummy_ops",
         "//tensorflow/compiler/tf2xla/kernels:xla_ops",
+        "//tensorflow/compiler/xla:test",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:framework_internal",
diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
index de89be9a35..7476d1dc51 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
@@ -299,7 +299,7 @@ REGISTER_OP("XlaHostCompute")
     .Attr("Toutputs: list(type) >= 0")
     .Attr("ancestors: list(string) >= 0")
     .Attr("key: string")
-    .Attr("shape_inference_graph: string = ''")
+    .Attr("shape_inference_graph: func")
     .Attr("shapes: list(shape) >= 0")
     .SetShapeFn(::tensorflow::shape_inference::UnknownShape);
 
@@ -901,18 +901,22 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) {
   {
     GraphDefBuilder shape(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape.opts());
-    Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                            {DT_FLOAT, DT_FLOAT}, shape.opts());
+    Node* recv = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        shape.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1),
                      shape.opts()
                          .WithName("E")
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape.opts());
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected));
   }
 
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = test::function::XTimesTwo();
   *library_expected.add_function() = FunctionDefHelper::Create(
       "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval_retval:float"}, {},
@@ -931,8 +935,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph",
-             "_outside_compilation_shape_inference_F1_O1"},
+            {"shape_inference_graph", shape_inference_graph},
             {"shapes", absl::Span<const DataType>({})},
             {"_outside_compilation_subgraph", "O1"}},
            {"c"}},
@@ -948,8 +951,9 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) {
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                            {DT_FLOAT, DT_FLOAT}, b2.opts());
+    Node* recv = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1),
                      b2.opts()
                          .WithName("E")
@@ -957,7 +961,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) {
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
     Node* send = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
-                              b2.opts().WithControlInput(e));
+                              b2.opts().WithControlInput(e).WithAttr(
+                                  kXlaHasHostTransferAttrName, true));
 
     Node* s = Sequencer(
         b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}),
@@ -1022,14 +1027,16 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
   {
     GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape1.opts());
-    Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                            {DT_FLOAT, DT_FLOAT}, shape1.opts());
+    Node* recv = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1),
                      shape1.opts()
                          .WithName("E")
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts());
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected));
   }
@@ -1037,25 +1044,31 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
   {
     GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape2.opts());
-    Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                             {DT_FLOAT, DT_FLOAT}, shape2.opts());
+    Node* recv1 = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        shape2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
                      shape2.opts()
                          .WithName("E")
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
-    Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2",
-                             {DT_FLOAT, DT_FLOAT}, shape2.opts());
+    Node* recv2 = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT, DT_FLOAT},
+        shape2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* h = Binary(ops::NodeOut(recv2, 1), e,
                      shape2.opts()
                          .WithName("H")
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O2"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h}, shape2.opts());
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h},
+                 shape2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected));
   }
 
+  NameAttrList shape_inference_graph1, shape_inference_graph2;
+  shape_inference_graph1.set_name("_outside_compilation_shape_inference_F1_O1");
+  shape_inference_graph2.set_name("_outside_compilation_shape_inference_F1_O2");
   *library_expected.add_function() = FunctionDefHelper::Create(
       "F1", {"a_0_arg:float", "b_0_arg:float"}, {"i_0_retval_retval:float"}, {},
       {
@@ -1076,8 +1089,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O2"},
-            {"shape_inference_graph",
-             "_outside_compilation_shape_inference_F1_O2"},
+            {"shape_inference_graph", shape_inference_graph2},
             {"shapes", absl::Span<const DataType>({})},
             {"_outside_compilation_subgraph", "O2"}},
            {"F"}},
@@ -1088,8 +1100,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph",
-             "_outside_compilation_shape_inference_F1_O1"},
+            {"shape_inference_graph", shape_inference_graph1},
             {"shapes", absl::Span<const DataType>({})},
             {"_outside_compilation_subgraph", "O1"}},
            {"D"}},
@@ -1105,8 +1116,9 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                             {DT_FLOAT, DT_FLOAT}, b2.opts());
+    Node* recv1 = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
                      b2.opts()
                          .WithName("E")
@@ -1114,10 +1126,12 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
     Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
-                               b2.opts().WithControlInput(e));
+                               b2.opts().WithControlInput(e).WithAttr(
+                                   kXlaHasHostTransferAttrName, true));
 
-    Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2",
-                             {DT_FLOAT, DT_FLOAT}, b2.opts());
+    Node* recv2 = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT, DT_FLOAT},
+        b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* g = Binary(e, ops::NodeOut(recv2, 0),
                      b2.opts()
                          .WithName("G")
@@ -1130,7 +1144,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O2"));
     Node* send2 =
-        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h}, b2.opts());
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h},
+                     b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
 
     Node* s = Sequencer(b2.opts()
                             .WithName("F1_sequencer")
@@ -1212,7 +1227,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes",
              absl::Span<const TensorShapeProto>({shape_proto_expected})},
             {"_outside_compilation_subgraph", "O1"}},
@@ -1235,7 +1250,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F2_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes",
              absl::Span<const TensorShapeProto>({shape_proto_expected})},
             {"_outside_compilation_subgraph", "O1"}}},
@@ -1251,8 +1266,9 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
 
     Node* key_constant1 =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv1 = RecvAtHost(ops::NodeOut(key_constant1, 0), "F1", "O1",
-                             {DT_FLOAT, DT_FLOAT}, b2.opts());
+    Node* recv1 = RecvAtHost(
+        ops::NodeOut(key_constant1, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
                      b2.opts()
                          .WithName("E")
@@ -1260,7 +1276,8 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
     Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), "F1", "O1", {e},
-                               b2.opts().WithControlInput(e));
+                               b2.opts().WithControlInput(e).WithAttr(
+                                   kXlaHasHostTransferAttrName, true));
     Node* s1 = Sequencer(
         b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}),
         "F1");
@@ -1272,15 +1289,17 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
 
     Node* key_constant2 =
         KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder"));
-    Node* recv2 = RecvAtHost(ops::NodeOut(key_constant2, 0), "F2", "O1",
-                             {DT_FLOAT}, b2.opts());
+    Node* recv2 =
+        RecvAtHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* h = Binary(ops::NodeOut(call1, 1), recv2,
                      b2.opts()
                          .WithName("H")
                          .WithAttr("_encapsulate", "F2")
                          .WithAttr("_outside", "O1"));
-    Node* send2 = SendFromHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {h},
-                               b2.opts());
+    Node* send2 =
+        SendFromHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {h},
+                     b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
 
     Node* s2 = Sequencer(
         b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2}),
@@ -1358,7 +1377,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes",
              absl::Span<const TensorShapeProto>({shape_proto_expected})},
             {"_outside_compilation_subgraph", "O1"}},
@@ -1380,7 +1399,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F2_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes",
              absl::Span<const TensorShapeProto>({shape_proto_expected})},
             {"_outside_compilation_subgraph", "O1"}}},
@@ -1489,7 +1508,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes",
              absl::Span<const TensorShapeProto>({shape_proto_expected})},
             {"_outside_compilation_subgraph", "O1"}}},
@@ -1574,7 +1593,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes",
              absl::Span<const TensorShapeProto>({shape_proto_expected})},
             {"_outside_compilation_subgraph", "O1"}},
@@ -1657,7 +1676,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) {
             {"Toutputs", absl::Span<const DataType>({})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O1"}}},
       },
@@ -1739,7 +1758,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) {
             {"Toutputs", absl::Span<const DataType>({})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", ""},
+            {"shape_inference_graph", NameAttrList()},
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O1"}}},
       },
@@ -1816,17 +1835,21 @@ TEST(EncapsulateSubgraphsTest,
   {
     GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape2.opts());
-    Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2",
-                             {DT_FLOAT}, shape2.opts());
+    Node* recv2 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT},
+                   shape2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* g = Unary(ops::NodeOut(recv2, 0), shape2.opts()
                                                 .WithName("G")
                                                 .WithAttr("_encapsulate", "F1")
                                                 .WithAttr("_outside", "O2"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g}, shape2.opts());
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g},
+                 shape2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected));
   }
 
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O2");
   *library_expected.add_function() = FunctionDefHelper::Create(
       "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval_retval:float"}, {},
       {
@@ -1843,8 +1866,7 @@ TEST(EncapsulateSubgraphsTest,
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O2"},
-            {"shape_inference_graph",
-             "_outside_compilation_shape_inference_F1_O2"},
+            {"shape_inference_graph", shape_inference_graph},
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O2"}}},
       },
@@ -1863,15 +1885,17 @@ TEST(EncapsulateSubgraphsTest,
                            .WithAttr("_outside", "O1"));
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2",
-                            {DT_FLOAT}, b2.opts());
+    Node* recv =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* g = Unary(recv, b2.opts()
                               .WithName("G")
                               .WithAttr("_encapsulate", "F1")
                               .WithAttr("_outside", "O2")
                               .WithControlInput(e));
     Node* send =
-        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g}, b2.opts());
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g},
+                     b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* s1 = Sequencer(
         b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}),
         "F1");
@@ -1925,17 +1949,21 @@ TEST(EncapsulateSubgraphsTest,
   {
     GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape1.opts());
-    Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                             {DT_FLOAT}, shape1.opts());
+    Node* recv2 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Unary(ops::NodeOut(recv2, 0), shape1.opts()
                                                 .WithName("E")
                                                 .WithAttr("_encapsulate", "F1")
                                                 .WithAttr("_outside", "O1"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts());
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected));
   }
 
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = FunctionDefHelper::Create(
       "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval_retval:float"}, {},
       {
@@ -1952,8 +1980,7 @@ TEST(EncapsulateSubgraphsTest,
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph",
-             "_outside_compilation_shape_inference_F1_O1"},
+            {"shape_inference_graph", shape_inference_graph},
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O1"}}},
       },
@@ -1968,14 +1995,16 @@ TEST(EncapsulateSubgraphsTest,
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                            {DT_FLOAT}, b2.opts());
+    Node* recv =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Unary(recv, b2.opts()
                               .WithName("E")
                               .WithAttr("_encapsulate", "F1")
                               .WithAttr("_outside", "O1"));
     Node* send =
-        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts());
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                     b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     /*Node* g =*/Unary(a, b2.opts()
                               .WithName("G")
                               .WithAttr("_encapsulate", "F1")
@@ -2039,17 +2068,21 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
   {
     GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape1.opts());
-    Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                             {DT_FLOAT}, shape1.opts());
+    Node* recv2 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Unary(ops::NodeOut(recv2, 0), shape1.opts()
                                                 .WithName("E")
                                                 .WithAttr("_encapsulate", "F1")
                                                 .WithAttr("_outside", "O1"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape1.opts());
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected));
   }
 
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = FunctionDefHelper::Create(
       "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval_retval:float"}, {},
       {{{"C"}, "UnaryTest", {"a_0_arg"}},
@@ -2063,8 +2096,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
          {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
          {"ancestors", absl::Span<const string>({})},
          {"key", "host_compute_channel_F1_O1"},
-         {"shape_inference_graph",
-          "_outside_compilation_shape_inference_F1_O1"},
+         {"shape_inference_graph", shape_inference_graph},
          {"shapes", absl::Span<const TensorShapeProto>({})},
          {"_outside_compilation_subgraph", "O1"}}},
        {{"outside_compilation_O2_host_compute"},
@@ -2074,7 +2106,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
          {"Toutputs", absl::Span<const DataType>({})},
          {"ancestors", absl::Span<const string>({})},
          {"key", "host_compute_channel_F1_O2"},
-         {"shape_inference_graph", ""},
+         {"shape_inference_graph", NameAttrList()},
          {"shapes", absl::Span<const TensorShapeProto>({})},
          {"_outside_compilation_subgraph", "O2"}},
         {}},
@@ -2085,7 +2117,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
          {"Toutputs", absl::Span<const DataType>({})},
          {"ancestors", absl::Span<const string>({})},
          {"key", "host_compute_channel_F1_O3"},
-         {"shape_inference_graph", ""},
+         {"shape_inference_graph", NameAttrList()},
          {"shapes", absl::Span<const TensorShapeProto>({})},
          {"_outside_compilation_subgraph", "O3"}},
         {}}},
@@ -2100,23 +2132,27 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                             {DT_FLOAT}, b2.opts());
+    Node* recv1 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = Unary(recv1, b2.opts()
                                .WithName("E")
                                .WithAttr("_encapsulate", "F1")
                                .WithAttr("_outside", "O1"));
     Node* send =
-        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts());
-    Node* recv2 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2",
-                             {DT_FLOAT}, b2.opts());
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                     b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* recv2 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* g = Unary(recv2, b2.opts()
                                .WithName("G")
                                .WithAttr("_encapsulate", "F1")
                                .WithAttr("_outside", "O2")
                                .WithControlInput(e));
-    Node* recv3 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O3",
-                             {DT_FLOAT}, b2.opts());
+    Node* recv3 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O3", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     /*Node* i =*/Binary(recv3, e,
                         b2.opts()
                             .WithName("I")
@@ -2236,8 +2272,9 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
   {
     GraphDefBuilder shape(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape.opts());
-    Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                            {DT_FLOAT}, shape.opts());
+    Node* recv =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   shape.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* a = InputShaped(shape.opts().WithName("A"));
     Node* c = Unary(a, shape.opts().WithName("C"));
     Node* e = BinaryUnknownShape(c, recv,
@@ -2245,11 +2282,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
                                      .WithName("E")
                                      .WithAttr("_encapsulate", "F1")
                                      .WithAttr("_outside", "O1"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, shape.opts());
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape, "F1_O1", &library_expected));
   }
 
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = test::function::XTimesTwo();
   *library_expected.add_function() = FunctionDefHelper::Create(
       "F1", {"b_0_arg:float", "c_0_arg:float"}, {"f_0_retval_retval:float"}, {},
@@ -2267,8 +2307,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph",
-             "_outside_compilation_shape_inference_F1_O1"},
+            {"shape_inference_graph", shape_inference_graph},
             {"shapes", absl::Span<const DataType>({})},
             {"_outside_compilation_subgraph", "O1"}},
            {"c"}},
@@ -2285,8 +2324,9 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
-                            {DT_FLOAT}, b2.opts());
+    Node* recv =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     Node* e = BinaryUnknownShape(c, ops::NodeOut(recv, 0),
                                  b2.opts()
                                      .WithName("E")
@@ -2294,7 +2334,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
                                      .WithAttr("_encapsulate", "F1")
                                      .WithAttr("_outside", "O1"));
     Node* send = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
-                              b2.opts().WithControlInput(e));
+                              b2.opts().WithControlInput(e).WithAttr(
+                                  kXlaHasHostTransferAttrName, true));
 
     Node* s = Sequencer(
         b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}),
diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
index e3c7e2f89b..feac983884 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
@@ -20,8 +20,10 @@ limitations under the License.
 #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h"
 #include "tensorflow/compiler/jit/encapsulate_util.h"
 #include "tensorflow/compiler/tf2xla/dump_graph.h"
+#include "tensorflow/compiler/tf2xla/side_effect_util.h"
 #include "tensorflow/compiler/tf2xla/tf2xla_util.h"
 #include "tensorflow/core/common_runtime/function.h"
+#include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph_to_functiondef.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/node_def_util.h"
@@ -98,9 +100,12 @@ xla::StatusOr<Node*> BuildRecvAtHostNode(
   recv_at_host_builder.Attr("Toutputs", recv_at_host_dtypes);
   // The correct device_ordinal will be inserted during replication in a
   // subsequent rewrite.
-  recv_at_host_builder.Attr("device_ordinal", 0);
+  AttrValue device_ordinal_value;
+  device_ordinal_value.set_placeholder("device_ordinal");
+  recv_at_host_builder.Attr("device_ordinal", device_ordinal_value);
   recv_at_host_builder.Attr(
       "key", absl::StrCat("host_compute_channel_", oc_cluster_name));
+  recv_at_host_builder.Attr(kXlaHasHostTransferAttrName, true);
   recv_at_host_builder.Input(key_placeholder->name(), 0, DT_STRING);
   TF_RETURN_IF_ERROR(recv_at_host_builder.Finalize(&recv_at_host_def));
   Status s;
@@ -197,9 +202,12 @@ xla::StatusOr<Node*> BuildSendFromHostNode(
   send_from_host_builder.Attr("Tinputs", send_from_host_dtypes);
   // The correct device_ordinal will be inserted during replication in a
   // subsequent rewrite.
-  send_from_host_builder.Attr("device_ordinal", 0);
+  AttrValue device_ordinal_value;
+  device_ordinal_value.set_placeholder("device_ordinal");
+  send_from_host_builder.Attr("device_ordinal", device_ordinal_value);
   send_from_host_builder.Attr(
       "key", absl::StrCat("host_compute_channel_", oc_cluster_name));
+  send_from_host_builder.Attr(kXlaHasHostTransferAttrName, true);
   std::vector<NodeDefBuilder::NodeOut> inputs(send_from_host_dtypes.size());
   for (auto* n : ret_nodes) {
     int index;
@@ -357,6 +365,47 @@ Status ReplaceOrRemoveOutsideCompilationCallNode(
   return Status::OK();
 }
 
+// Resets "device_ordinal" attr to placeholder value for related nodes
+// (XlaRecvAtHost nodes; XlaSendFromHost nodes; If nodes containing
+// XlaRecvAtHost/XlaSendFromHost).
+Status ResetDeviceOrdinalToPlaceholderValue(Graph* g) {
+  AttrValue device_ordinal_value;
+  device_ordinal_value.set_placeholder("device_ordinal");
+  for (Node* n : g->nodes()) {
+    if (!HasNodeAttr(n->def(), kXlaHasHostTransferAttrName)) {
+      continue;
+    }
+
+    if (n->type_string() == "_XlaRecvAtHost" ||
+        n->type_string() == "_XlaSendFromHost") {
+      n->ClearAttr("device_ordinal");
+      n->AddAttr("device_ordinal", device_ordinal_value);
+    } else if (n->type_string() == "If") {
+      for (const string& attr_name :
+           std::vector<string>{"then_branch", "else_branch"}) {
+        NameAttrList branch_func;
+        TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), attr_name, &branch_func));
+        (*branch_func.mutable_attr())["device_ordinal"] = device_ordinal_value;
+        n->ClearAttr(attr_name);
+        n->AddAttr(attr_name, branch_func);
+      }
+    } else if (n->type_string() == "While") {
+      for (const string& attr_name : std::vector<string>{"cond", "body"}) {
+        NameAttrList branch_func;
+        TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), attr_name, &branch_func));
+        (*branch_func.mutable_attr())["device_ordinal"] = device_ordinal_value;
+        n->ClearAttr(attr_name);
+        n->AddAttr(attr_name, branch_func);
+      }
+    } else {
+      return errors::Internal("Unknown node marked with ",
+                              kXlaHasHostTransferAttrName, ": ",
+                              n->DebugString());
+    }
+  }
+  return Status::OK();
+}
+
 // For an XLA computation, builds host side graph given all outside compilation
 // graphs inside it. The host side graph contains:
 // 1) a "sequencer" node (we will add control edge between XlaRecvAtHost and
@@ -368,8 +417,8 @@ Status ReplaceOrRemoveOutsideCompilationCallNode(
 Status ConstructHostGraph(
     const string& xla_cluster_name, const string& outside_compilation_attr_name,
     const std::vector<string>& outside_compilation_host_graphs,
-    FunctionLibraryDefinition* fld, std::unique_ptr<Graph>* host_graph) {
-  host_graph->reset(new Graph(fld));
+    FunctionLibraryDefinition* fld, const string& host_graph_func_name) {
+  Graph host_graph(fld);
 
   // Create sequencer node in host graph.
   NodeDefBuilder sequencer_builder(absl::StrCat(xla_cluster_name, "_sequencer"),
@@ -378,24 +427,34 @@ Status ConstructHostGraph(
   NodeDef sequencer_def;
   TF_RETURN_IF_ERROR(sequencer_builder.Finalize(&sequencer_def));
   Status s;
-  Node* sequencer = (*host_graph)->AddNode(sequencer_def, &s);
+  Node* sequencer = host_graph.AddNode(sequencer_def, &s);
   TF_RETURN_IF_ERROR(s);
 
   // Create key placeholder in host graph.
   TF_ASSIGN_OR_RETURN(
       Node * key_placeholder,
-      AddHostComputeKeyPlaceholder(xla_cluster_name, host_graph->get()));
+      AddHostComputeKeyPlaceholder(xla_cluster_name, &host_graph));
 
   // For each outside compilation graph, copy them to host graph with the
   // following changes:
   // a) Use key_placeholder in host graph instead of its own.
-  // b) Add control edge from RecvAtHost/SendFromHost to sequencer.
+  // b) Add control edge from host transfer nodes (XlaRecvAtHost,
+  //    XlaSendFromHost, If/While nodes containing
+  //    XlaRecvAtHost/XlaSendFromHost) to sequencer node.
   // c) Clear node_def.device(), so device placer won't get confused.
   for (const string& host_func : outside_compilation_host_graphs) {
     VLOG(4) << "Expanding host graph " << host_func;
+    // Temporarily use "0" as "device_ordinal". It will be reset to placeholder
+    // value after we expanded all host graphs. We cannot just use placeholder
+    // value here because FunctionDef instantiation does not allow placeholder
+    // value for attributes.
+    AttrValue device_ordinal_attr;
+    device_ordinal_attr.set_i(0);
+    protobuf::Map<string, AttrValue> attrs;
+    attrs["device_ordinal"] = device_ordinal_attr;
     FunctionBody* host_fbody = nullptr;
     TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(
-        *fld->Find(host_func), AttrSlice(), fld,
+        *fld->Find(host_func), AttrSlice(&attrs), fld,
         [&](const string& op, const OpDef** sig) {
           return fld->LookUpOpDef(op, sig);
         },
@@ -408,8 +467,8 @@ Status ConstructHostGraph(
     FixupSourceAndSinkEdges(host_fbody->graph);
 
     std::map<const Node*, Node*> node_map;
-    node_map[host_fbody->graph->source_node()] = (*host_graph)->source_node();
-    node_map[host_fbody->graph->sink_node()] = (*host_graph)->sink_node();
+    node_map[host_fbody->graph->source_node()] = host_graph.source_node();
+    node_map[host_fbody->graph->sink_node()] = host_graph.sink_node();
     Status s;
     ReverseDFS(
         *host_fbody->graph, /*enter=*/nullptr,
@@ -431,7 +490,7 @@ Status ConstructHostGraph(
             NodeDef copy_def = n->def();
             // Change c).
             copy_def.clear_device();
-            copy = (*host_graph)->AddNode(copy_def, &s);
+            copy = host_graph.AddNode(copy_def, &s);
             if (!s.ok()) {
               return;
             }
@@ -446,22 +505,23 @@ Status ConstructHostGraph(
                                    e->src()->DebugString());
               return;
             }
-            (*host_graph)
-                ->AddEdge(node_map[e->src()], e->src_output(), copy,
-                          e->dst_input());
+            host_graph.AddEdge(node_map[e->src()], e->src_output(), copy,
+                               e->dst_input());
           }
 
           // Change b).
-          if (copy->type_string() == "_XlaRecvAtHost" ||
-              copy->type_string() == "_XlaSendFromHost") {
-            (*host_graph)->AddControlEdge(copy, sequencer);
+          if (HasNodeAttr(copy->def(), kXlaHasHostTransferAttrName)) {
+            host_graph.AddControlEdge(copy, sequencer);
           }
         },
         NodeComparatorID());
+
     if (!s.ok()) {
       return s;
     }
   }
+  // Reset "device_ordinal" to placeholder value.
+  TF_RETURN_IF_ERROR(ResetDeviceOrdinalToPlaceholderValue(&host_graph));
 
   // sequencer and key_placeholder might be dead nodes. Prune them if necessary.
   // - sequencer should be pruned iff it has no input control edges from
@@ -470,21 +530,30 @@ Status ConstructHostGraph(
   // - key_placeholder should be pruned iff there's no RecvAtHost/SendFromHost.
   //   We don't need to do anything special.
   if (!sequencer->in_edges().empty()) {
-    (*host_graph)->AddControlEdge(sequencer, (*host_graph)->sink_node());
+    host_graph.AddControlEdge(sequencer, host_graph.sink_node());
   }
   PruneForReverseReachability(
-      host_graph->get(),
-      std::unordered_set<const Node*>{(*host_graph)->sink_node()});
+      &host_graph, std::unordered_set<const Node*>{host_graph.sink_node()});
 
   // Postprocess edges between different outside compilations.
   TF_RETURN_IF_ERROR(PostprocessEdgesBetweenOutsideCompilations(
-      host_graph->get(), outside_compilation_attr_name));
+      &host_graph, outside_compilation_attr_name));
 
   if (VLOG_IS_ON(4)) {
     dump_graph::DumpGraphToFile(
         absl::StrCat("extract_outside_compilation_host_graph_for_",
                      xla_cluster_name),
-        **host_graph, fld);
+        host_graph, fld);
+  }
+
+  FunctionDef host_graph_fdef;
+  TF_RETURN_IF_ERROR(
+      GraphToFunctionDef(host_graph, host_graph_func_name, &host_graph_fdef));
+  if (fld->Find(host_graph_func_name)) {
+    TF_RETURN_IF_ERROR(
+        fld->ReplaceFunction(host_graph_func_name, host_graph_fdef));
+  } else {
+    TF_RETURN_IF_ERROR(fld->AddFunctionDef(host_graph_fdef));
   }
 
   return Status::OK();
@@ -492,8 +561,28 @@ Status ConstructHostGraph(
 
 // Expand XLA computation's outside compilation host side graph into main graph.
 // Add a control edge between sequencer node and the XLA computation node.
-Status ExpandHostGraphIntoMainGraph(Graph* main_graph, Graph* host_graph,
+Status ExpandHostGraphIntoMainGraph(Graph* main_graph,
+                                    FunctionLibraryDefinition* fld,
+                                    const string& host_graph_func_name,
                                     Node* xla_computation_node) {
+  // Temporarily use "0" as "device_ordinal". It will be rewritten with the
+  // correct value in a later pass. We cannot just use placeholder value here
+  // because FunctionDef instantiation does not allow placeholder value for
+  // attributes.
+  AttrValue device_ordinal_attr;
+  device_ordinal_attr.set_i(0);
+  protobuf::Map<string, AttrValue> attrs;
+  attrs["device_ordinal"] = device_ordinal_attr;
+  FunctionBody* fbody = nullptr;
+  TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(
+      *fld->Find(host_graph_func_name), AttrSlice(&attrs), fld,
+      [&](const string& op, const OpDef** sig) {
+        return fld->LookUpOpDef(op, sig);
+      },
+      &fbody));
+  std::unique_ptr<FunctionBody> fbody_deleter(fbody);
+  Graph* host_graph = fbody->graph;
+
   // We use ReverseDFS() to copy nodes. Make sure all nodes are reverse
   // reachable from sink node so all nodes will be copied.
   // TODO(b/77601805): consolidate copy graph functions.
@@ -559,9 +648,14 @@ Status ExpandHostGraphIntoMainGraph(Graph* main_graph, Graph* host_graph,
 Status RewriteShapeInferenceGraph(const string& shape_inference_graph_name,
                                   Graph* host_graph,
                                   FunctionLibraryDefinition* fld) {
+  // Use "0" as "device_ordinal". It does not matter for shape inference.
+  AttrValue device_ordinal_attr;
+  device_ordinal_attr.set_i(0);
+  protobuf::Map<string, AttrValue> attrs;
+  attrs["device_ordinal"] = device_ordinal_attr;
   FunctionBody* fbody = nullptr;
   TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(
-      *fld->Find(shape_inference_graph_name), AttrSlice(), fld,
+      *fld->Find(shape_inference_graph_name), AttrSlice(&attrs), fld,
       [&](const string& op, const OpDef** sig) {
         return fld->LookUpOpDef(op, sig);
       },
@@ -669,6 +763,567 @@ Status RewriteShapeInferenceGraph(const string& shape_inference_graph_name,
   return Status::OK();
 }
 
+// Builds XlaSendToHost node which sends cond predicate to host.
+xla::StatusOr<Node*> BuildSendIfPredNode(const string& name,
+                                         const string& host_transfer_key,
+                                         Node* pred_node, Graph* g) {
+  NodeDefBuilder send_pred_builder(name, "XlaSendToHost");
+  send_pred_builder.Attr("Tinput", DT_BOOL);
+  send_pred_builder.Attr("key", absl::StrCat(host_transfer_key, "_dtoh_0"));
+  send_pred_builder.Attr(kXlaTokenInputNodesAttrName,
+                         std::vector<string>{kXlaTokenArgNodeName});
+  send_pred_builder.Input(pred_node->name(), 0, DT_BOOL);
+  NodeDef send_pred_def;
+  TF_RETURN_IF_ERROR(send_pred_builder.Finalize(&send_pred_def));
+  Status s;
+  Node* send_pred_node = g->AddNode(send_pred_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  g->AddEdge(pred_node, 0, send_pred_node, 0);
+  return send_pred_node;
+}
+
+// Replaces key placeholder node with an _Arg node.
+Status ReplaceKeyPlaceholderWithArgNode(const string& xla_cluster_name,
+                                        const string& func_name,
+                                        FunctionLibraryDefinition* fld) {
+  // Temporarily use "0" as "device_ordinal". It will be reset to placeholder
+  // value after rewriting.
+  AttrValue device_ordinal_attr;
+  device_ordinal_attr.set_i(0);
+  protobuf::Map<string, AttrValue> attrs;
+  attrs["device_ordinal"] = device_ordinal_attr;
+  FunctionBody* fbody = nullptr;
+  TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(
+      *fld->Find(func_name), AttrSlice(&attrs), fld,
+      [&](const string& op, const OpDef** sig) {
+        return fld->LookUpOpDef(op, sig);
+      },
+      &fbody));
+  std::unique_ptr<FunctionBody> fbody_deleter(fbody);
+  Graph* g = fbody->graph;
+
+  // Find or create the key placeholder node.
+  Node* key_placeholder = nullptr;
+  for (Node* n : g->nodes()) {
+    if (IsKeyPlaceholderNode(*n)) {
+      key_placeholder = n;
+      break;
+    }
+  }
+  if (!key_placeholder) {
+    TF_ASSIGN_OR_RETURN(key_placeholder,
+                        AddHostComputeKeyPlaceholder(xla_cluster_name, g));
+  }
+
+  // Build the _Arg node, and replace key placeholder node with it.
+  NodeDefBuilder arg_builder("key_arg", FunctionLibraryDefinition::kArgOp);
+  arg_builder.Attr("T", DT_STRING);
+  arg_builder.Attr("index", 0);
+  NodeDef arg_def;
+  TF_RETURN_IF_ERROR(arg_builder.Finalize(&arg_def));
+  TF_RETURN_IF_ERROR(ReplaceNode(g, key_placeholder, arg_def).status());
+
+  // Reset "device_ordinal" to placeholder value.
+  TF_RETURN_IF_ERROR(ResetDeviceOrdinalToPlaceholderValue(g));
+
+  FunctionDef replace_fdef;
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(*g, func_name, &replace_fdef));
+  TF_RETURN_IF_ERROR(fld->ReplaceFunction(func_name, replace_fdef));
+  return Status::OK();
+}
+
+// Builds host side graph for If node.
+Status BuildHostGraphForIfNode(const string& xla_cluster_attr_name,
+                               const string& outside_compilation_attr_name,
+                               const string& xla_cluster_name,
+                               const string& if_node_name,
+                               const string& host_transfer_key,
+                               const string& host_graph_func_name,
+                               FunctionLibraryDefinition* fld,
+                               const string& then_branch_host_func_name,
+                               const string& else_branch_host_func_name) {
+  Graph host_graph(fld);
+  string outside_compilation_name = absl::StrCat("oc_if_", if_node_name);
+  AttrValue device_ordinal_value;
+  device_ordinal_value.set_placeholder("device_ordinal");
+
+  // Step 1: add key placeholder node.
+  TF_ASSIGN_OR_RETURN(
+      Node * key_placeholder,
+      AddHostComputeKeyPlaceholder(xla_cluster_name, &host_graph));
+
+  // Step 2: build XlaRecvAtHost node to recv predicate.
+  NodeDefBuilder recv_pred_builder(
+      absl::StrCat("recv_oc_if_pred_", if_node_name), "_XlaRecvAtHost");
+  recv_pred_builder.Attr("Toutputs", std::vector<DataType>{DT_BOOL});
+  recv_pred_builder.Attr("key", host_transfer_key);
+  recv_pred_builder.Attr("device_ordinal", device_ordinal_value);
+  recv_pred_builder.Attr(xla_cluster_attr_name, xla_cluster_name);
+  recv_pred_builder.Attr(outside_compilation_attr_name,
+                         outside_compilation_name);
+  recv_pred_builder.Attr(kXlaHasHostTransferAttrName, true);
+  recv_pred_builder.Input(key_placeholder->name(), 0, DT_STRING);
+  NodeDef recv_pred_def;
+  TF_RETURN_IF_ERROR(recv_pred_builder.Finalize(&recv_pred_def));
+  Status s;
+  Node* recv_pred_node = host_graph.AddNode(recv_pred_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  host_graph.AddEdge(key_placeholder, 0, recv_pred_node, 0);
+
+  // Step 3: rewrite `{then, else}_branch_host_func_name`, replace key
+  // placeholder with an _Arg node.
+  TF_RETURN_IF_ERROR(ReplaceKeyPlaceholderWithArgNode(
+      xla_cluster_name, then_branch_host_func_name, fld));
+  TF_RETURN_IF_ERROR(ReplaceKeyPlaceholderWithArgNode(
+      xla_cluster_name, else_branch_host_func_name, fld));
+
+  // Step 4: build If node to choose between `{then, else}_branch_host_graph`.
+  NodeDefBuilder if_builder(absl::StrCat("oc_if_", if_node_name), "If");
+  if_builder.Attr("Tcond", DT_BOOL);
+  if_builder.Attr("Tin", std::vector<DataType>{DT_STRING});
+  if_builder.Attr("Tout", std::vector<DataType>{});
+  NameAttrList host_then_branch, host_else_branch;
+  host_then_branch.set_name(then_branch_host_func_name);
+  (*host_then_branch.mutable_attr())["device_ordinal"] = device_ordinal_value;
+  host_else_branch.set_name(else_branch_host_func_name);
+  (*host_else_branch.mutable_attr())["device_ordinal"] = device_ordinal_value;
+  if_builder.Attr("then_branch", host_then_branch);
+  if_builder.Attr("else_branch", host_else_branch);
+  if_builder.Attr(kXlaHasHostTransferAttrName, true);
+  if_builder.Attr(xla_cluster_attr_name, xla_cluster_name);
+  if_builder.Attr(outside_compilation_attr_name, outside_compilation_name);
+  if_builder.Input(recv_pred_node->name(), 0, DT_BOOL);
+  std::vector<NodeDefBuilder::NodeOut> if_inputs{
+      {key_placeholder->name(), 0, DT_STRING}};
+  if_builder.Input(if_inputs);
+  NodeDef if_def;
+  TF_RETURN_IF_ERROR(if_builder.Finalize(&if_def));
+  Node* if_node = host_graph.AddNode(if_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  host_graph.AddEdge(recv_pred_node, 0, if_node, 0);
+  host_graph.AddEdge(key_placeholder, 0, if_node, 1);
+
+  // Convert `host_graph` to function, and add a "device_ordinal" attr.
+  FunctionDef oc_host_graph_fdef;
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(host_graph, host_graph_func_name,
+                                        &oc_host_graph_fdef));
+  if (fld->Find(host_graph_func_name)) {
+    TF_RETURN_IF_ERROR(
+        fld->ReplaceFunction(host_graph_func_name, oc_host_graph_fdef));
+  } else {
+    TF_RETURN_IF_ERROR(fld->AddFunctionDef(oc_host_graph_fdef));
+  }
+
+  return Status::OK();
+}
+
+// Rewrites loop cond to add a node which sends loop cond to host.
+Status AddSendLoopPredToLoopCond(FunctionLibraryDefinition* fld,
+                                 const NameAttrList& loop_cond_func,
+                                 const string& while_node_name,
+                                 const string& host_transfer_key) {
+  // Instantiate the loop cond function.
+  FunctionBody* fbody = nullptr;
+  TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(
+      *fld->Find(loop_cond_func.name()), AttrSlice(&loop_cond_func.attr()), fld,
+      [&](const string& op, const OpDef** sig) {
+        return fld->LookUpOpDef(op, sig);
+      },
+      &fbody));
+  std::unique_ptr<FunctionBody> fbody_deleter(fbody);
+  Graph* g = fbody->graph;
+
+  // Find the _Retval node and the loop cond node.
+  Node* ret_node = nullptr;
+  for (Node* n : g->nodes()) {
+    if (n->type_string() == "_Retval") {
+      if (ret_node) {
+        return errors::Internal("Multiple return node for loop cond function ",
+                                loop_cond_func.name(), ": ",
+                                ret_node->DebugString(), " and ",
+                                n->DebugString());
+      } else {
+        ret_node = n;
+      }
+    }
+  }
+  if (!ret_node) {
+    return errors::Internal("No _Retval node for loop cond function ",
+                            loop_cond_func.name());
+  }
+  Node* loop_cond;
+  TF_RETURN_IF_ERROR(ret_node->input_node(0, &loop_cond));
+
+  // Build the XlaSendToHost node.
+  NodeDefBuilder send_loop_cond_builder(
+      absl::StrCat("send_oc_while_cond_", while_node_name), "XlaSendToHost");
+  send_loop_cond_builder.Attr("Tinput", DT_BOOL);
+  send_loop_cond_builder.Attr("key",
+                              absl::StrCat(host_transfer_key, "_dtoh_0"));
+  send_loop_cond_builder.Attr(kXlaTokenInputNodesAttrName,
+                              std::vector<string>{kXlaTokenArgNodeName});
+  send_loop_cond_builder.Input(loop_cond->name(), 0, DT_BOOL);
+  NodeDef send_loop_cond_def;
+  TF_RETURN_IF_ERROR(send_loop_cond_builder.Finalize(&send_loop_cond_def));
+  Status s;
+  Node* send_loop_cond_node = g->AddNode(send_loop_cond_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  g->AddEdge(loop_cond, 0, send_loop_cond_node, 0);
+
+  // Replace original function.
+  FunctionDef replace_fdef;
+  TF_RETURN_IF_ERROR(
+      GraphToFunctionDef(*g, loop_cond_func.name(), &replace_fdef));
+  TF_RETURN_IF_ERROR(fld->ReplaceFunction(loop_cond_func.name(), replace_fdef));
+
+  return Status::OK();
+}
+
+// Rewrites while loop cond function for host.
+Status RewriteHostWhileLoopCond(
+    const string& cond_host_func_name, const string& while_node_name,
+    const string& host_transfer_key, const string& xla_cluster_attr_name,
+    const string& xla_cluster_name, const string& outside_compilation_attr_name,
+    const string& outside_compilation_name, FunctionLibraryDefinition* fld) {
+  // Replace key placeholder node with _Arg node.
+  TF_RETURN_IF_ERROR(ReplaceKeyPlaceholderWithArgNode(
+      xla_cluster_name, cond_host_func_name, fld));
+
+  // Instantiate cond function.
+  AttrValue device_ordinal_temp_value;
+  device_ordinal_temp_value.set_i(0);
+  protobuf::Map<string, AttrValue> attrs;
+  attrs["device_ordinal"] = device_ordinal_temp_value;
+  FunctionBody* cond_fbody = nullptr;
+  TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(
+      *fld->Find(cond_host_func_name), AttrSlice(&attrs), fld,
+      [&](const string& op, const OpDef** sig) {
+        return fld->LookUpOpDef(op, sig);
+      },
+      &cond_fbody));
+  std::unique_ptr<FunctionBody> cond_fbody_deleter(cond_fbody);
+  Graph* cond_graph = cond_fbody->graph;
+  Node* key_arg = nullptr;
+  for (Node* n : cond_graph->nodes()) {
+    if (n->type_string() == "_Arg") {
+      key_arg = n;
+    }
+  }
+  if (!key_arg) {
+    return errors::Internal(
+        "No _Arg node found for host compute key in function ",
+        cond_host_func_name);
+  }
+
+  // Add an XlaRecvAtHost node to use as cond function return value.
+  // We don't need to set kXlaHasHostTransferAttrName for this node, because
+  // it's already added for the "While" node on the host.
+  NodeDefBuilder recv_pred_builder(
+      absl::StrCat("recv_oc_while_cond_", while_node_name), "_XlaRecvAtHost");
+  recv_pred_builder.Attr("Toutputs", std::vector<DataType>{DT_BOOL});
+  recv_pred_builder.Attr("key", host_transfer_key);
+  AttrValue device_ordinal_value;
+  device_ordinal_value.set_placeholder("device_ordinal");
+  recv_pred_builder.Attr("device_ordinal", device_ordinal_value);
+  recv_pred_builder.Attr(xla_cluster_attr_name, xla_cluster_name);
+  recv_pred_builder.Attr(outside_compilation_attr_name,
+                         outside_compilation_name);
+  recv_pred_builder.Input(key_arg->name(), 0, DT_STRING);
+  NodeDef recv_pred_def;
+  TF_RETURN_IF_ERROR(recv_pred_builder.Finalize(&recv_pred_def));
+  Status s;
+  Node* recv_pred_node = cond_graph->AddNode(recv_pred_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  cond_graph->AddEdge(key_arg, 0, recv_pred_node, 0);
+  NodeDefBuilder ret_builder(
+      absl::StrCat("recv_oc_while_cond_ret_", while_node_name), "_Retval");
+  ret_builder.Attr("T", DT_BOOL);
+  ret_builder.Attr("index", 0);
+  ret_builder.Input(recv_pred_node->name(), 0, DT_BOOL);
+  NodeDef ret_def;
+  TF_RETURN_IF_ERROR(ret_builder.Finalize(&ret_def));
+  Node* ret_node = cond_graph->AddNode(ret_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  cond_graph->AddEdge(recv_pred_node, 0, ret_node, 0);
+
+  // Reset device_ordinal to placeholder value.
+  TF_RETURN_IF_ERROR(ResetDeviceOrdinalToPlaceholderValue(cond_graph));
+
+  // Replace original function.
+  FunctionDef cond_replace_fdef;
+  TF_RETURN_IF_ERROR(
+      GraphToFunctionDef(*cond_graph, cond_host_func_name, &cond_replace_fdef));
+  TF_RETURN_IF_ERROR(
+      fld->ReplaceFunction(cond_host_func_name, cond_replace_fdef));
+
+  return Status::OK();
+}
+
+// Rewrites while loop body function for host.
+Status RewriteHostWhileLoopBody(
+    const string& body_host_func_name, const string& while_node_name,
+    const string& host_transfer_key, const string& xla_cluster_attr_name,
+    const string& xla_cluster_name, const string& outside_compilation_attr_name,
+    const string& outside_compilation_name, FunctionLibraryDefinition* fld) {
+  // Replace key placeholder node with _Arg node.
+  TF_RETURN_IF_ERROR(ReplaceKeyPlaceholderWithArgNode(
+      xla_cluster_name, body_host_func_name, fld));
+
+  // Instantiate body function.
+  AttrValue device_ordinal_temp_value;
+  device_ordinal_temp_value.set_i(0);
+  protobuf::Map<string, AttrValue> attrs;
+  attrs["device_ordinal"] = device_ordinal_temp_value;
+  FunctionBody* body_fbody = nullptr;
+  TF_RETURN_IF_ERROR(FunctionDefToBodyHelper(
+      *fld->Find(body_host_func_name), AttrSlice(&attrs), fld,
+      [&](const string& op, const OpDef** sig) {
+        return fld->LookUpOpDef(op, sig);
+      },
+      &body_fbody));
+  std::unique_ptr<FunctionBody> body_fbody_deleter(body_fbody);
+  Graph* body_graph = body_fbody->graph;
+  Node* key_arg = nullptr;
+  for (Node* n : body_graph->nodes()) {
+    if (n->type_string() == "_Arg") {
+      key_arg = n;
+    }
+  }
+  if (!key_arg) {
+    return errors::Internal(
+        "No _Arg node found for host compute key in function ",
+        body_host_func_name);
+  }
+
+  // Add a _Retval node to loop body.
+  NodeDefBuilder ret_builder(
+      absl::StrCat("recv_oc_while_body_ret_", while_node_name), "_Retval");
+  ret_builder.Attr("T", DT_STRING);
+  ret_builder.Attr("index", 0);
+  ret_builder.Input(key_arg->name(), 0, DT_STRING);
+  NodeDef ret_def;
+  TF_RETURN_IF_ERROR(ret_builder.Finalize(&ret_def));
+  Status s;
+  Node* ret_node = body_graph->AddNode(ret_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  body_graph->AddEdge(key_arg, 0, ret_node, 0);
+
+  // Reset device_ordinal to placeholder value.
+  TF_RETURN_IF_ERROR(ResetDeviceOrdinalToPlaceholderValue(body_graph));
+
+  // Replace original function.
+  FunctionDef body_replace_fdef;
+  TF_RETURN_IF_ERROR(
+      GraphToFunctionDef(*body_graph, body_host_func_name, &body_replace_fdef));
+  TF_RETURN_IF_ERROR(
+      fld->ReplaceFunction(body_host_func_name, body_replace_fdef));
+
+  return Status::OK();
+}
+
+// Builds host side graph for while node.
+Status BuildHostGraphForWhileNode(
+    const string& xla_cluster_attr_name,
+    const string& outside_compilation_attr_name, const string& xla_cluster_name,
+    const string& while_node_name, const string& host_transfer_key,
+    const string& host_graph_func_name, FunctionLibraryDefinition* fld,
+    const string& cond_host_func_name, const string& body_host_func_name) {
+  Graph host_graph(fld);
+  string outside_compilation_name = absl::StrCat("oc_while_", while_node_name);
+
+  // Step 1: add key placeholder node.
+  TF_ASSIGN_OR_RETURN(
+      Node * key_placeholder,
+      AddHostComputeKeyPlaceholder(xla_cluster_name, &host_graph));
+
+  // Step 2: rewrite cond function.
+  TF_RETURN_IF_ERROR(RewriteHostWhileLoopCond(
+      cond_host_func_name, while_node_name, host_transfer_key,
+      xla_cluster_attr_name, xla_cluster_name, outside_compilation_attr_name,
+      outside_compilation_name, fld));
+
+  // Step 3: rewrite body function.
+  TF_RETURN_IF_ERROR(RewriteHostWhileLoopBody(
+      body_host_func_name, while_node_name, host_transfer_key,
+      xla_cluster_attr_name, xla_cluster_name, outside_compilation_attr_name,
+      outside_compilation_name, fld));
+
+  // Step 4: build While node.
+  NodeDefBuilder while_builder(absl::StrCat("oc_while_", while_node_name),
+                               "While");
+  while_builder.Attr("T", std::vector<DataType>{DT_STRING});
+  NameAttrList func;
+  AttrValue device_ordinal_value;
+  device_ordinal_value.set_placeholder("device_ordinal");
+  (*func.mutable_attr())["device_ordinal"] = device_ordinal_value;
+  func.set_name(cond_host_func_name);
+  while_builder.Attr("cond", func);
+  func.set_name(body_host_func_name);
+  while_builder.Attr("body", func);
+  while_builder.Attr(kXlaHasHostTransferAttrName, true);
+  while_builder.Attr(xla_cluster_attr_name, xla_cluster_name);
+  while_builder.Attr(outside_compilation_attr_name, outside_compilation_name);
+  std::vector<NodeDefBuilder::NodeOut> while_inputs{
+      {key_placeholder->name(), 0, DT_STRING}};
+  while_builder.Input(while_inputs);
+  NodeDef while_def;
+  TF_RETURN_IF_ERROR(while_builder.Finalize(&while_def));
+  Status s;
+  Node* while_node = host_graph.AddNode(while_def, &s);
+  TF_RETURN_IF_ERROR(s);
+  host_graph.AddEdge(key_placeholder, 0, while_node, 0);
+
+  // Convert `host_graph` to function.
+  FunctionDef oc_host_graph_fdef;
+  TF_RETURN_IF_ERROR(GraphToFunctionDef(host_graph, host_graph_func_name,
+                                        &oc_host_graph_fdef));
+  if (fld->Find(host_graph_func_name)) {
+    TF_RETURN_IF_ERROR(
+        fld->ReplaceFunction(host_graph_func_name, oc_host_graph_fdef));
+  } else {
+    TF_RETURN_IF_ERROR(fld->AddFunctionDef(oc_host_graph_fdef));
+  }
+
+  return Status::OK();
+}
+
+Status ExtractOutsideCompilationForNodesWithAssociatedFunctions(
+    Graph* g, const string& xla_cluster_attr_name,
+    const string& outside_compilation_attr_name, const string& xla_cluster_name,
+    const std::map<string, int>& host_compute_core,
+    FunctionLibraryDefinition* fld, std::vector<string>* host_graphs,
+    std::vector<string>* shape_inference_graphs,
+    bool* has_outside_compilation) {
+  std::vector<Node*> if_nodes, while_nodes;
+  for (Node* n : g->nodes()) {
+    if (n->type_string() == "If") {
+      if_nodes.push_back(n);
+    } else if (n->type_string() == "While") {
+      while_nodes.push_back(n);
+    }
+  }
+
+  for (Node* n : if_nodes) {
+    // Instantiate "then_branch" and "else_branch".
+    NameAttrList then_branch, else_branch;
+    TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "then_branch", &then_branch));
+    TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "else_branch", &else_branch));
+
+    // Extract outside compilation for then_branch and else_branch.
+    bool then_branch_has_outside_compilation = false;
+    bool else_branch_has_outside_compilation = false;
+    string then_branch_host_func_name =
+               absl::StrCat("oc_then_branch_host_if_", n->name()),
+           else_branch_host_func_name =
+               absl::StrCat("oc_else_branch_host_if_", n->name());
+    string then_branch_xla_func_name = absl::StrCat(then_branch.name(), "_oc"),
+           else_branch_xla_func_name = absl::StrCat(else_branch.name(), "_oc");
+    TF_RETURN_IF_ERROR(ExtractOutsideCompilationForFunction(
+        xla_cluster_attr_name, outside_compilation_attr_name, xla_cluster_name,
+        then_branch, then_branch_xla_func_name, then_branch_host_func_name,
+        host_compute_core, fld, shape_inference_graphs,
+        &then_branch_has_outside_compilation));
+    TF_RETURN_IF_ERROR(ExtractOutsideCompilationForFunction(
+        xla_cluster_attr_name, outside_compilation_attr_name, xla_cluster_name,
+        else_branch, else_branch_xla_func_name, else_branch_host_func_name,
+        host_compute_core, fld, shape_inference_graphs,
+        &else_branch_has_outside_compilation));
+
+    // If then/else branch do not have outside compilation, nothing to do.
+    if (!then_branch_has_outside_compilation &&
+        !else_branch_has_outside_compilation) {
+      continue;
+    }
+
+    *has_outside_compilation = true;
+
+    // Change If node to call the new functions.
+    then_branch.set_name(then_branch_xla_func_name);
+    n->ClearAttr("then_branch");
+    n->AddAttr("then_branch", then_branch);
+    else_branch.set_name(else_branch_xla_func_name);
+    n->ClearAttr("else_branch");
+    n->AddAttr("else_branch", else_branch);
+
+    string host_transfer_key = absl::StrCat("oc_if_pred_", n->name());
+
+    // XLA computation: add a SendToHost node to send cond predicate.
+    Node* pred_node;
+    TF_RETURN_IF_ERROR(n->input_node(0, &pred_node));
+    TF_ASSIGN_OR_RETURN(
+        Node * send_pred_node,
+        BuildSendIfPredNode(absl::StrCat("send_oc_if_pred_", n->name()),
+                            host_transfer_key, pred_node, g));
+    n->AddAttr(kXlaTokenInputNodesAttrName,
+               std::vector<string>{send_pred_node->name()});
+
+    // Build host side graph for the "If" node.
+    string oc_host_graph_name = absl::StrCat("oc_if_host_graph_", n->name());
+    TF_RETURN_IF_ERROR(BuildHostGraphForIfNode(
+        xla_cluster_attr_name, outside_compilation_attr_name, xla_cluster_name,
+        n->name(), host_transfer_key, oc_host_graph_name, fld,
+        then_branch_host_func_name, else_branch_host_func_name));
+    host_graphs->push_back(oc_host_graph_name);
+  }
+
+  for (Node* n : while_nodes) {
+    // Instantiate "cond" and "body".
+    NameAttrList cond, body;
+    TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "cond", &cond));
+    TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "body", &body));
+
+    // Extract outside compilation for cond and body.
+    bool cond_has_outside_compilation = false;
+    bool body_has_outside_compilation = false;
+    string cond_host_func_name = absl::StrCat("oc_cond_host_while_", n->name()),
+           body_host_func_name = absl::StrCat("oc_body_host_while_", n->name());
+    string cond_xla_func_name = absl::StrCat(cond.name(), "_oc"),
+           body_xla_func_name = absl::StrCat(body.name(), "_oc");
+    TF_RETURN_IF_ERROR(ExtractOutsideCompilationForFunction(
+        xla_cluster_attr_name, outside_compilation_attr_name, xla_cluster_name,
+        cond, cond_xla_func_name, cond_host_func_name, host_compute_core, fld,
+        shape_inference_graphs, &cond_has_outside_compilation));
+    TF_RETURN_IF_ERROR(ExtractOutsideCompilationForFunction(
+        xla_cluster_attr_name, outside_compilation_attr_name, xla_cluster_name,
+        body, body_xla_func_name, body_host_func_name, host_compute_core, fld,
+        shape_inference_graphs, &body_has_outside_compilation));
+
+    // If cond/body do not have outside compilation, nothing to do.
+    if (!cond_has_outside_compilation && !body_has_outside_compilation) {
+      continue;
+    }
+
+    *has_outside_compilation = true;
+
+    // Change While node to call the new functions.
+    cond.set_name(cond_xla_func_name);
+    n->ClearAttr("cond");
+    n->AddAttr("cond", cond);
+    body.set_name(body_xla_func_name);
+    n->ClearAttr("body");
+    n->AddAttr("body", body);
+
+    string host_transfer_key = absl::StrCat("oc_while_pred_", n->name());
+
+    // XLA computation: rewrite cond function to add a SendToHost node to send
+    // loop predicate.
+    TF_RETURN_IF_ERROR(
+        AddSendLoopPredToLoopCond(fld, cond, n->name(), host_transfer_key));
+    n->AddAttr(kXlaTokenInputNodesAttrName,
+               std::vector<string>{kXlaTokenArgNodeName});
+
+    // Build host side graph for the "While" node.
+    string oc_host_graph_name = absl::StrCat("oc_while_host_graph_", n->name());
+    TF_RETURN_IF_ERROR(BuildHostGraphForWhileNode(
+        xla_cluster_attr_name, outside_compilation_attr_name, xla_cluster_name,
+        n->name(), host_transfer_key, oc_host_graph_name, fld,
+        cond_host_func_name, body_host_func_name));
+    host_graphs->push_back(oc_host_graph_name);
+  }
+
+  return Status::OK();
+}
+
 }  // namespace
 
 Status RewriteOutsideCompilationSubgraphFn::operator()(
@@ -755,12 +1410,15 @@ Status RewriteOutsideCompilationSubgraphFn::operator()(
   // it with HostCompute node later.
   AddNodeAttr("_outside_compilation_subgraph", old_name, node_def);
   if (shapes) {
-    AddNodeAttr("shape_inference_graph", "", node_def);
+    NameAttrList shape_inference_graph;
+    AddNodeAttr("shape_inference_graph", shape_inference_graph, node_def);
     AddNodeAttr("shapes", *shapes, node_def);
   } else {
     string shape_inference_func_name =
         absl::StrCat("_outside_compilation_shape_inference_", new_name);
-    AddNodeAttr("shape_inference_graph", shape_inference_func_name, node_def);
+    NameAttrList shape_inference_graph;
+    shape_inference_graph.set_name(shape_inference_func_name);
+    AddNodeAttr("shape_inference_graph", shape_inference_graph, node_def);
     AddNodeAttr("shapes", std::vector<TensorShapeProto>{}, node_def);
   }
   AddNodeAttr("ancestors", std::vector<string>{}, node_def);
@@ -775,11 +1433,10 @@ Status ExtractOutsideCompilationForFunction(
     const string& xla_cluster_attr_name,
     const string& outside_compilation_attr_name, const string& xla_cluster_name,
     const NameAttrList& func_name_attrs, const string& new_func_name,
+    const string& host_graph_func_name,
     const std::map<string, int>& host_compute_core,
-    FunctionLibraryDefinition* fld, std::unique_ptr<Graph>* host_graph,
-    std::vector<string>* shape_inference_graphs,
+    FunctionLibraryDefinition* fld, std::vector<string>* shape_inference_graphs,
     bool* has_outside_compilation) {
-  // Early return if function does not have any outside compilation nodes.
   const string& func_name = func_name_attrs.name();
   const FunctionDef* fdef = fld->Find(func_name);
   if (!fdef) {
@@ -792,9 +1449,8 @@ Status ExtractOutsideCompilationForFunction(
       break;
     }
   }
-  if (!has_outside_compilation) {
-    return Status::OK();
-  }
+  // We cannot early return here, because we might have outside compilation in
+  // If/While function body.
 
   // Convert the function to graph.
   FunctionBody* fbody = nullptr;
@@ -835,11 +1491,11 @@ Status ExtractOutsideCompilationForFunction(
       // If we could not infer shapes for XlaSendFromHost inputs statically, we
       // will set the "shape_inference_graph" attribute. In that case, copy
       // outside compilation subgraph as shape inference graph in `fld`.
-      string shape_inference_graph;
+      NameAttrList shape_inference_graph;
       TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(), "shape_inference_graph",
                                      &shape_inference_graph));
-      if (!shape_inference_graph.empty()) {
-        shape_inference_graphs->push_back(shape_inference_graph);
+      if (!shape_inference_graph.name().empty()) {
+        shape_inference_graphs->push_back(shape_inference_graph.name());
 
         const FunctionDef* xla_fdef = fld->Find(n->name());
         if (!xla_fdef) {
@@ -847,9 +1503,9 @@ Status ExtractOutsideCompilationForFunction(
         }
         FunctionDef shape_inference_fdef = *xla_fdef;
         shape_inference_fdef.mutable_signature()->set_name(
-            shape_inference_graph);
-        if (fld->Find(shape_inference_graph)) {
-          TF_RETURN_IF_ERROR(fld->ReplaceFunction(shape_inference_graph,
+            shape_inference_graph.name());
+        if (fld->Find(shape_inference_graph.name())) {
+          TF_RETURN_IF_ERROR(fld->ReplaceFunction(shape_inference_graph.name(),
                                                   shape_inference_fdef));
         } else {
           TF_RETURN_IF_ERROR(fld->AddFunctionDef(shape_inference_fdef));
@@ -867,12 +1523,17 @@ Status ExtractOutsideCompilationForFunction(
         *graph_out, fld);
   }
 
+  // Handle nodes with associated functions.
+  TF_RETURN_IF_ERROR(ExtractOutsideCompilationForNodesWithAssociatedFunctions(
+      graph_out.get(), xla_cluster_attr_name, outside_compilation_attr_name,
+      xla_cluster_name, host_compute_core, fld,
+      &outside_compilation_host_graphs, shape_inference_graphs,
+      has_outside_compilation));
+
   // Construct host graph.
-  if (!outside_compilation_host_graphs.empty()) {
-    TF_RETURN_IF_ERROR(
-        ConstructHostGraph(xla_cluster_name, outside_compilation_attr_name,
-                           outside_compilation_host_graphs, fld, host_graph));
-  }
+  TF_RETURN_IF_ERROR(ConstructHostGraph(
+      xla_cluster_name, outside_compilation_attr_name,
+      outside_compilation_host_graphs, fld, host_graph_func_name));
 
   // Remove the outside compilation graphs from function library.
   for (const string& func : outside_compilation_host_graphs) {
@@ -909,14 +1570,15 @@ Status ExtractOutsideCompilation(
     auto const& host_compute_core = iter.second.host_compute_core;
 
     bool has_outside_compilation;
-    std::unique_ptr<Graph> host_graph;
+    string host_graph_func_name = absl::StrCat("oc_host_graph_", n->name());
     TF_RETURN_IF_ERROR(ExtractOutsideCompilationForFunction(
         xla_cluster_attr_name, outside_compilation_attr_name, xla_cluster_name,
-        func_name_attrs, func_name_attrs.name(), host_compute_core, fld,
-        &host_graph, &shape_inference_graphs, &has_outside_compilation));
-    if (host_graph) {
-      TF_RETURN_IF_ERROR(ExpandHostGraphIntoMainGraph(g, host_graph.get(), n));
-    }
+        func_name_attrs, func_name_attrs.name(), host_graph_func_name,
+        host_compute_core, fld, &shape_inference_graphs,
+        &has_outside_compilation));
+    TF_RETURN_IF_ERROR(
+        ExpandHostGraphIntoMainGraph(g, fld, host_graph_func_name, n));
+    TF_RETURN_IF_ERROR(fld->RemoveFunction(host_graph_func_name));
   }
 
   if (VLOG_IS_ON(4)) {
diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.h b/tensorflow/compiler/jit/extract_outside_compilation_pass.h
index 2a4f07cca2..e07e7c5dd0 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass.h
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.h
@@ -88,9 +88,10 @@ Status ExtractOutsideCompilationForFunction(
     const string& xla_cluster_attr_name,
     const string& outside_compilation_attr_name, const string& xla_cluster_name,
     const NameAttrList& func_name_attrs, const string& new_func_name,
+    const string& host_graph_func_name,
     const std::map<string, int>& host_compute_core,
-    FunctionLibraryDefinition* fld, std::unique_ptr<Graph>* host_graph,
-    std::vector<string>* shape_inference_graphs, bool* has_outside_compilation);
+    FunctionLibraryDefinition* fld, std::vector<string>* shape_inference_graphs,
+    bool* has_outside_compilation);
 
 // Rewrites XLA computation in `clusters` to replace outside compilation nodes
 // with XlaHostCompute, and moves those outside compilations into `g`. If shapes
diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
index bff956100d..0887fbcde9 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
@@ -19,8 +19,10 @@ limitations under the License.
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/array_ops.h"
 #include "tensorflow/cc/ops/function_ops.h"
+#include "tensorflow/cc/ops/functional_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/compiler/jit/encapsulate_util.h"
+#include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/core/common_runtime/function.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/function.h"
@@ -109,10 +111,10 @@ TEST(RewriteOutsideCompilationSubgraphFnTest, Basic) {
   }
   EXPECT_TRUE(has_control_edge_to_send_from_host);
   // Verify step 7: necessary attrs added to call_node_def.
-  string shape_inference_graph;
+  NameAttrList shape_inference_graph;
   TF_CHECK_OK(GetNodeAttr(AttrSlice(&call_node_def.attr()),
                           "shape_inference_graph", &shape_inference_graph));
-  EXPECT_EQ(shape_inference_graph,
+  EXPECT_EQ(shape_inference_graph.name(),
             "_outside_compilation_shape_inference_cluster_0");
 }
 
@@ -249,27 +251,26 @@ TEST(ExtractOutsideCompilationForFunctionTest, Basic) {
 
   protobuf::Map<string, tensorflow::AttrValue> attrs;
   std::map<string, int> host_compute_core = {{"0", 1}, {"1", 0}};
-  std::unique_ptr<Graph> host_graph;
   std::vector<string> shape_inference_graphs;
   bool has_outside_compilation;
   NameAttrList name_attrs;
   name_attrs.set_name("cluster");
   *name_attrs.mutable_attr() = attrs;
   TF_CHECK_OK(ExtractOutsideCompilationForFunction(
-      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten",
-      host_compute_core, &fld, &host_graph, &shape_inference_graphs,
+      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph",
+      host_compute_core, &fld, &shape_inference_graphs,
       &has_outside_compilation));
 
   // Get rewritten XLA computation function.
-  FunctionBody *fbody = nullptr;
-  TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"),
-                                      AttrSlice(), &fld,
-                                      [&](const string &op, const OpDef **sig) {
-                                        return fld.LookUpOpDef(op, sig);
-                                      },
-                                      &fbody));
-  std::unique_ptr<FunctionBody> fbody_deleter(fbody);
-  auto node_name_index = fbody->graph->BuildNodeNameIndex();
+  FunctionBody *xla_fbody = nullptr;
+  TF_CHECK_OK(FunctionDefToBodyHelper(
+      *fld.Find("cluster_rewritten"), AttrSlice(), &fld,
+      [&](const string &op, const OpDef **sig) {
+        return fld.LookUpOpDef(op, sig);
+      },
+      &xla_fbody));
+  std::unique_ptr<FunctionBody> xla_fbody_deleter(xla_fbody);
+  auto node_name_index = xla_fbody->graph->BuildNodeNameIndex();
 
   // Check XlaHostCompute nodes.
   Node *host_compute_0 = node_name_index["outside_compilation_0_host_compute"];
@@ -292,18 +293,31 @@ TEST(ExtractOutsideCompilationForFunctionTest, Basic) {
   EXPECT_EQ(shapes[0].dim_size(), 1);
   // Check XlaHostCompute nodes' "shape_inference_graph" attr. Both should have
   // empty values.
-  string shape_inference_graph;
+  NameAttrList shape_inference_graph;
   TF_CHECK_OK(GetNodeAttr(host_compute_0->attrs(), "shape_inference_graph",
                           &shape_inference_graph));
-  EXPECT_EQ(shape_inference_graph, "");
+  EXPECT_EQ(shape_inference_graph.name(), "");
   TF_CHECK_OK(GetNodeAttr(host_compute_1->attrs(), "shape_inference_graph",
                           &shape_inference_graph));
-  EXPECT_EQ(shape_inference_graph, "");
+  EXPECT_EQ(shape_inference_graph.name(), "");
 
   // Check `shape_inference_graphs`.
   EXPECT_EQ(shape_inference_graphs.size(), 0);
 
-  // Check `host_graph`: verify we have key placeholder and sequencer.
+  // Check host graph: verify we have key placeholder and sequencer.
+  FunctionBody *host_fbody = nullptr;
+  AttrValue device_ordinal_temp_value;
+  device_ordinal_temp_value.set_i(0);
+  protobuf::Map<string, AttrValue> host_func_attrs;
+  host_func_attrs["device_ordinal"] = device_ordinal_temp_value;
+  TF_CHECK_OK(FunctionDefToBodyHelper(
+      *fld.Find("host_graph"), AttrSlice(&host_func_attrs), &fld,
+      [&](const string &op, const OpDef **sig) {
+        return fld.LookUpOpDef(op, sig);
+      },
+      &host_fbody));
+  std::unique_ptr<FunctionBody> host_fbody_deleter(host_fbody);
+  Graph *host_graph = host_fbody->graph;
   Node *key_placeholder = nullptr, *sequencer = nullptr;
   for (Node *n : host_graph->nodes()) {
     if (n->type_string() == "Placeholder" &&
@@ -365,25 +379,37 @@ TEST(ExtractOutsideCompilationForFunctionTest, NoHostGraph) {
 
   protobuf::Map<string, tensorflow::AttrValue> attrs;
   std::map<string, int> host_compute_core = {{"0", 1}, {"1", 0}};
-  std::unique_ptr<Graph> host_graph;
   std::vector<string> shape_inference_graphs;
   bool has_outside_compilation;
   NameAttrList name_attrs;
   name_attrs.set_name("cluster");
   *name_attrs.mutable_attr() = attrs;
   TF_CHECK_OK(ExtractOutsideCompilationForFunction(
-      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten",
-      host_compute_core, &fld, &host_graph, &shape_inference_graphs,
+      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph",
+      host_compute_core, &fld, &shape_inference_graphs,
       &has_outside_compilation));
 
-  // Check `host_graph` is empty.
-  EXPECT_FALSE(host_graph);
+  // Check host graph is empty.
+  FunctionBody *host_fbody = nullptr;
+  AttrValue device_ordinal_temp_value;
+  device_ordinal_temp_value.set_i(0);
+  protobuf::Map<string, AttrValue> host_func_attrs;
+  host_func_attrs["device_ordinal"] = device_ordinal_temp_value;
+  TF_CHECK_OK(FunctionDefToBodyHelper(
+      *fld.Find("host_graph"), AttrSlice(&host_func_attrs), &fld,
+      [&](const string &op, const OpDef **sig) {
+        return fld.LookUpOpDef(op, sig);
+      },
+      &host_fbody));
+  std::unique_ptr<FunctionBody> host_fbody_deleter(host_fbody);
+  Graph *host_graph = host_fbody->graph;
+  EXPECT_EQ(host_graph->num_nodes(), 2);
 }
 
 TEST(ExtractOutsideCompilationForFunctionTest, XlaHostComputeRemoved) {
   // Build the XLA computation func.
   // "const0"
-  // "const1" (outside compilation clsuter "0")
+  // "const1" (outside compilation cluster "0")
   FunctionDefLibrary fdl;
   {
     tensorflow::Scope s = tensorflow::Scope::NewRootScope();
@@ -401,31 +427,43 @@ TEST(ExtractOutsideCompilationForFunctionTest, XlaHostComputeRemoved) {
 
   protobuf::Map<string, tensorflow::AttrValue> attrs;
   std::map<string, int> host_compute_core = {{"0", 1}, {"1", 0}};
-  std::unique_ptr<Graph> host_graph;
   std::vector<string> shape_inference_graphs;
   bool has_outside_compilation;
   NameAttrList name_attrs;
   name_attrs.set_name("cluster");
   *name_attrs.mutable_attr() = attrs;
   TF_CHECK_OK(ExtractOutsideCompilationForFunction(
-      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten",
-      host_compute_core, &fld, &host_graph, &shape_inference_graphs,
+      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph",
+      host_compute_core, &fld, &shape_inference_graphs,
       &has_outside_compilation));
 
   // Check rewritten XLA graph: verify that we have no XlaHostCompute.
-  FunctionBody *fbody = nullptr;
-  TF_CHECK_OK(FunctionDefToBodyHelper(*fld.Find("cluster_rewritten"),
-                                      AttrSlice(), &fld,
-                                      [&](const string &op, const OpDef **sig) {
-                                        return fld.LookUpOpDef(op, sig);
-                                      },
-                                      &fbody));
-  std::unique_ptr<FunctionBody> fbody_deleter(fbody);
-  for (Node *n : fbody->graph->nodes()) {
+  FunctionBody *xla_fbody = nullptr;
+  TF_CHECK_OK(FunctionDefToBodyHelper(
+      *fld.Find("cluster_rewritten"), AttrSlice(), &fld,
+      [&](const string &op, const OpDef **sig) {
+        return fld.LookUpOpDef(op, sig);
+      },
+      &xla_fbody));
+  std::unique_ptr<FunctionBody> xla_fbody_deleter(xla_fbody);
+  for (Node *n : xla_fbody->graph->nodes()) {
     EXPECT_NE(n->type_string(), "XlaHostCompute");
   }
 
-  // Check `host_graph`: verify we have no placeholder, but we have "const1".
+  // Check host graph: verify we have no placeholder, but we have "const1".
+  FunctionBody *host_fbody = nullptr;
+  AttrValue device_ordinal_temp_value;
+  device_ordinal_temp_value.set_i(0);
+  protobuf::Map<string, AttrValue> host_func_attrs;
+  host_func_attrs["device_ordinal"] = device_ordinal_temp_value;
+  TF_CHECK_OK(FunctionDefToBodyHelper(
+      *fld.Find("host_graph"), AttrSlice(&host_func_attrs), &fld,
+      [&](const string &op, const OpDef **sig) {
+        return fld.LookUpOpDef(op, sig);
+      },
+      &host_fbody));
+  std::unique_ptr<FunctionBody> host_fbody_deleter(host_fbody);
+  Graph *host_graph = host_fbody->graph;
   int num_key_placeholders = 0;
   for (Node *n : host_graph->nodes()) {
     if (n->type_string() == "Placeholder" &&
@@ -438,4 +476,301 @@ TEST(ExtractOutsideCompilationForFunctionTest, XlaHostComputeRemoved) {
   EXPECT_NE(node_name_index.find("const1"), node_name_index.end());
 }
 
+REGISTER_OP("XlaSendToHost")
+    .Input("input: Tinput")
+    .Attr("Tinput: type")
+    .Attr("key: string")
+    .SetIsStateful();
+
+REGISTER_OP("XlaRecvFromHost")
+    .Output("output: Toutput")
+    .Attr("Toutput: type")
+    .Attr("shape: shape")
+    .Attr("key: string")
+    .SetIsStateful();
+
+TEST(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) {
+  // Build the XLA computation func.
+  // "const0" (bool)
+  // "const1" (int32)
+  // "if0" (pred = "const0", input = "const1", then_branch = "true_fn",
+  //        else_branch = "false_fn")
+  FunctionDefLibrary fdl;
+  {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output arg = ops::_Arg(s.WithOpName("arg"), DT_INT32, 0);
+    Output identity = ops::Identity(s.WithOpName("identity_true_fn"), arg);
+    ops::_Retval retval(s.WithOpName("retval"), identity, 0);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+    auto node_name_image = g->BuildNodeNameIndex();
+    node_name_image["identity_true_fn"]->AddAttr("_oc", "0");
+    PartialTensorShape shape({2});
+    node_name_image["identity_true_fn"]->AddAttr(
+        kXlaInferredShapesAttrName, std::vector<PartialTensorShape>{shape});
+
+    FunctionDef *true_fn_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, "true_fn", true_fn_fdef));
+  }
+  {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output arg = ops::_Arg(s.WithOpName("arg"), DT_INT32, 0);
+    Output identity = ops::Identity(s.WithOpName("identity_false_fn"), arg);
+    ops::_Retval retval(s.WithOpName("retval"), identity, 0);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+    auto node_name_image = g->BuildNodeNameIndex();
+    node_name_image["identity_false_fn"]->AddAttr("_oc", "0");
+    PartialTensorShape shape({2});
+    node_name_image["identity_false_fn"]->AddAttr(
+        kXlaInferredShapesAttrName, std::vector<PartialTensorShape>{shape});
+
+    FunctionDef *false_fn_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, "false_fn", false_fn_fdef));
+  }
+  {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output cond = ops::Const(s.WithOpName("const0"), true, {2});
+    Output input = ops::Const(s.WithOpName("const1"), 1, {2});
+    NameAttrList true_fn;
+    true_fn.set_name("true_fn");
+    NameAttrList false_fn;
+    false_fn.set_name("false_fn");
+    auto if_op = ops::If(s.WithOpName("if"), cond,
+                         std::initializer_list<Input>{cond, input}, {DT_INT32},
+                         true_fn, false_fn);
+    ops::_Retval retval(s.WithOpName("retval"), if_op.output[0], 0);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+
+    FunctionDef *xla_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef));
+  }
+  FunctionLibraryDefinition fld(OpRegistry::Global(), fdl);
+
+  protobuf::Map<string, tensorflow::AttrValue> attrs;
+  std::map<string, int> host_compute_core;
+  std::vector<string> shape_inference_graphs;
+  bool has_outside_compilation;
+  NameAttrList name_attrs;
+  name_attrs.set_name("cluster");
+  *name_attrs.mutable_attr() = attrs;
+  TF_CHECK_OK(ExtractOutsideCompilationForFunction(
+      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph",
+      host_compute_core, &fld, &shape_inference_graphs,
+      &has_outside_compilation));
+
+  // Check host graph.
+  {
+    FunctionBody *host_fbody = nullptr;
+    AttrValue device_ordinal_temp_value;
+    device_ordinal_temp_value.set_i(0);
+    protobuf::Map<string, AttrValue> host_func_attrs;
+    host_func_attrs["device_ordinal"] = device_ordinal_temp_value;
+    TF_CHECK_OK(FunctionDefToBodyHelper(
+        *fld.Find("host_graph"), AttrSlice(&host_func_attrs), &fld,
+        [&](const string &op, const OpDef **sig) {
+          return fld.LookUpOpDef(op, sig);
+        },
+        &host_fbody));
+    std::unique_ptr<FunctionBody> host_fbody_deleter(host_fbody);
+    Graph *host_graph = host_fbody->graph;
+    auto node_name_index = host_graph->BuildNodeNameIndex();
+
+    // Verify we have XlaRecvAtHost to receive "If" predicate.
+    Node *recv_if_pred_node = node_name_index["recv_oc_if_pred_if"];
+    EXPECT_NE(recv_if_pred_node, nullptr);
+
+    // Verify we have an "If" to choose outside compilation between then_branch
+    // and else_branch, and it has `recv_if_pred_node` as cond input.
+    Node *if_oc_node = node_name_index["oc_if_if"];
+    EXPECT_NE(if_oc_node, nullptr);
+    Node *if_oc_node_cond_input;
+    TF_CHECK_OK(if_oc_node->input_node(0, &if_oc_node_cond_input));
+    EXPECT_EQ(if_oc_node_cond_input, recv_if_pred_node);
+
+    // Check that then_branch outside compilation has node "identity_true_fn".
+    const FunctionDef *true_def = fld.Find("oc_then_branch_host_if_if");
+    EXPECT_NE(true_def, nullptr);
+    bool has_identity_true_fn_node = false;
+    for (const auto &node_def : true_def->node_def()) {
+      if (node_def.name() == "identity_true_fn") {
+        has_identity_true_fn_node = true;
+        break;
+      }
+    }
+    EXPECT_TRUE(has_identity_true_fn_node);
+
+    // Check that else_branch outside compilation has node "identity_false_fn".
+    const FunctionDef *false_def = fld.Find("oc_else_branch_host_if_if");
+    EXPECT_NE(false_def, nullptr);
+    bool has_identity_false_fn_node = false;
+    for (const auto &node_def : false_def->node_def()) {
+      if (node_def.name() == "identity_false_fn") {
+        has_identity_false_fn_node = true;
+        break;
+      }
+    }
+    EXPECT_TRUE(has_identity_false_fn_node);
+  }
+
+  // Check XLA graph.
+  {
+    FunctionBody *xla_fbody = nullptr;
+    TF_CHECK_OK(FunctionDefToBodyHelper(
+        *fld.Find("cluster_rewritten"), AttrSlice(), &fld,
+        [&](const string &op, const OpDef **sig) {
+          return fld.LookUpOpDef(op, sig);
+        },
+        &xla_fbody));
+    std::unique_ptr<FunctionBody> xla_fbody_deleter(xla_fbody);
+    Graph *xla_graph = xla_fbody->graph;
+    auto node_name_index = xla_graph->BuildNodeNameIndex();
+
+    // Check that we have XlaSendToHost to send cond predicate to host.
+    Node *send_if_pred_node = node_name_index["send_oc_if_pred_if"];
+    EXPECT_NE(send_if_pred_node, nullptr);
+
+    // Check that the "If" node now has `send_if_pred_node` as attribute
+    // _xla_token_input_nodes.
+    Node *if_node = node_name_index["if"];
+    EXPECT_NE(if_node, nullptr);
+    std::vector<string> token_inputs;
+    TF_CHECK_OK(
+        GetNodeAttr(if_node->def(), "_xla_token_input_nodes", &token_inputs));
+    EXPECT_THAT(token_inputs, ::testing::ElementsAre("send_oc_if_pred_if"));
+  }
+}
+
+TEST(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInWhile) {
+  // Build the XLA computation func.
+  // "const0" (bool)
+  // "while0" (input = "const0", cond = "cond_fn", body = "body_fn")
+  FunctionDefLibrary fdl;
+  {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output arg = ops::_Arg(s.WithOpName("arg"), DT_BOOL, 0);
+    Output identity = ops::Identity(s.WithOpName("identity_cond_fn"), arg);
+    ops::_Retval retval(s.WithOpName("retval"), identity, 0);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+    auto node_name_image = g->BuildNodeNameIndex();
+    node_name_image["identity_cond_fn"]->AddAttr("_oc", "0");
+    PartialTensorShape shape({2});
+    node_name_image["identity_cond_fn"]->AddAttr(
+        kXlaInferredShapesAttrName, std::vector<PartialTensorShape>{shape});
+
+    FunctionDef *cond_fn_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, "cond_fn", cond_fn_fdef));
+  }
+  {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output arg = ops::_Arg(s.WithOpName("arg"), DT_BOOL, 0);
+    Output identity = ops::Identity(s.WithOpName("identity_body_fn"), arg);
+    ops::_Retval retval(s.WithOpName("retval"), identity, 0);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+    auto node_name_image = g->BuildNodeNameIndex();
+    node_name_image["identity_body_fn"]->AddAttr("_oc", "0");
+    PartialTensorShape shape({2});
+    node_name_image["identity_body_fn"]->AddAttr(
+        kXlaInferredShapesAttrName, std::vector<PartialTensorShape>{shape});
+
+    FunctionDef *body_fn_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, "body_fn", body_fn_fdef));
+  }
+  {
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output input = ops::Const(s.WithOpName("const0"), true, {2});
+    NameAttrList cond_fn;
+    cond_fn.set_name("cond_fn");
+    NameAttrList body_fn;
+    body_fn.set_name("body_fn");
+    auto while_op =
+        ops::While(s.WithOpName("while"), std::initializer_list<Input>{input},
+                   cond_fn, body_fn);
+    ops::_Retval retval(s.WithOpName("retval"), while_op.output[0], 0);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+
+    FunctionDef *xla_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, "cluster", xla_fdef));
+  }
+  FunctionLibraryDefinition fld(OpRegistry::Global(), fdl);
+
+  protobuf::Map<string, tensorflow::AttrValue> attrs;
+  std::map<string, int> host_compute_core;
+  std::vector<string> shape_inference_graphs;
+  bool has_outside_compilation;
+  NameAttrList name_attrs;
+  name_attrs.set_name("cluster");
+  *name_attrs.mutable_attr() = attrs;
+  TF_CHECK_OK(ExtractOutsideCompilationForFunction(
+      "_xla", "_oc", "cluster", name_attrs, "cluster_rewritten", "host_graph",
+      host_compute_core, &fld, &shape_inference_graphs,
+      &has_outside_compilation));
+
+  // Check host graph.
+  {
+    FunctionBody *host_fbody = nullptr;
+    AttrValue device_ordinal_temp_value;
+    device_ordinal_temp_value.set_i(0);
+    protobuf::Map<string, AttrValue> host_func_attrs;
+    host_func_attrs["device_ordinal"] = device_ordinal_temp_value;
+    TF_CHECK_OK(FunctionDefToBodyHelper(
+        *fld.Find("host_graph"), AttrSlice(&host_func_attrs), &fld,
+        [&](const string &op, const OpDef **sig) {
+          return fld.LookUpOpDef(op, sig);
+        },
+        &host_fbody));
+    std::unique_ptr<FunctionBody> host_fbody_deleter(host_fbody);
+    Graph *host_graph = host_fbody->graph;
+    auto node_name_index = host_graph->BuildNodeNameIndex();
+
+    // Verify we have an "While" to execute outside compilation.
+    Node *while_oc_node = node_name_index["oc_while_while"];
+    EXPECT_NE(while_oc_node, nullptr);
+
+    // Check that cond outside compilation has node "identity_cond_fn".
+    const FunctionDef *cond_def = fld.Find("oc_cond_host_while_while");
+    EXPECT_NE(cond_def, nullptr);
+    bool has_identity_cond_fn_node = false;
+    for (const auto &node_def : cond_def->node_def()) {
+      if (node_def.name() == "identity_cond_fn") {
+        has_identity_cond_fn_node = true;
+        break;
+      }
+    }
+    EXPECT_TRUE(has_identity_cond_fn_node);
+
+    // Check that body outside compilation has node "identity_body_fn".
+    const FunctionDef *body_def = fld.Find("oc_body_host_while_while");
+    EXPECT_NE(body_def, nullptr);
+    bool has_identity_body_fn_node = false;
+    for (const auto &node_def : body_def->node_def()) {
+      if (node_def.name() == "identity_body_fn") {
+        has_identity_body_fn_node = true;
+        break;
+      }
+    }
+    EXPECT_TRUE(has_identity_body_fn_node);
+  }
+
+  // Check XLA graph.
+  {
+    // Verify that rewritten cond fn has XlaSendToHost to send loop predicate to
+    // host.
+    const FunctionDef *cond_def = fld.Find("cond_fn_oc");
+    EXPECT_NE(cond_def, nullptr);
+    bool has_send_oc_while_cond_node = false;
+    for (const auto &node_def : cond_def->node_def()) {
+      if (node_def.name() == "send_oc_while_cond_while") {
+        has_send_oc_while_cond_node = true;
+        break;
+      }
+    }
+    EXPECT_TRUE(has_send_oc_while_cond_node);
+  }
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/kernels/while_op.cc b/tensorflow/compiler/tf2xla/kernels/while_op.cc
index ce007fc04a..89b577bfc0 100644
--- a/tensorflow/compiler/tf2xla/kernels/while_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/while_op.cc
@@ -41,8 +41,7 @@ Status MakeXlaCompilerArgumentsFromInputs(
   *has_uninitialized_vars = false;
   *has_tensor_arrays = false;
   for (int i = 0; i < ctx->num_inputs(); ++i) {
-    VLOG(2) << " Input " << i
-            << " type: " << DataTypeString(ctx->input_type(i))
+    VLOG(2) << " Input " << i << " type: " << DataTypeString(ctx->input_type(i))
             << " shape: " << ctx->InputShape(i).DebugString();
     XlaCompiler::Argument& arg = (*args)[i];
     DataType type = ctx->input_type(i);
@@ -233,13 +232,22 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
           xla::ShapeUtil::HumanString(body_input_shape), " vs. ",
           xla::ShapeUtil::HumanString(body.xla_output_shape)));
 
-  xla::Shape expected_cond_output_shape = xla::ShapeUtil::MakeTupleShape(
-      {xla::ShapeUtil::MakeShape(xla::PRED, {})});
+  xla::Shape expected_cond_output_shape_without_side_effect =
+      xla::ShapeUtil::MakeTupleShape(
+          {xla::ShapeUtil::MakeShape(xla::PRED, {})});
+  xla::Shape expected_cond_output_shape_with_side_effect =
+      xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::PRED, {}),
+                                      xla::ShapeUtil::MakeTokenShape()});
   OP_REQUIRES(ctx,
-              xla::ShapeUtil::Compatible(cond.xla_output_shape,
-                                         expected_cond_output_shape),
+              xla::ShapeUtil::Compatible(
+                  cond.xla_output_shape,
+                  expected_cond_output_shape_without_side_effect) ||
+                  xla::ShapeUtil::Compatible(
+                      cond.xla_output_shape,
+                      expected_cond_output_shape_with_side_effect),
               errors::InvalidArgument(
-                  "Output shape of loop condition should be (pred[]), got: ",
+                  "Output shape of loop condition should be (pred[]) or "
+                  "(pred[], token[]), got: ",
                   xla::ShapeUtil::HumanString(cond.xla_output_shape)));
 
   int num_inputs = body.input_mapping.size();
diff --git a/tensorflow/compiler/tf2xla/side_effect_util.cc b/tensorflow/compiler/tf2xla/side_effect_util.cc
index b233e6b2c2..b62f8e9115 100644
--- a/tensorflow/compiler/tf2xla/side_effect_util.cc
+++ b/tensorflow/compiler/tf2xla/side_effect_util.cc
@@ -24,6 +24,8 @@ const char kXlaTokenInputNodesAttrName[] = "_xla_token_input_nodes";
 
 const char kXlaTokenArgNodeName[] = "_xla_token_arg_node";
 
+const char kXlaHasHostTransferAttrName[] = "_xla_has_host_transfer";
+
 std::set<std::string> CalculateTokenInputsForOutputToken(const Graph& g) {
   std::set<std::string> results;
   Node* first_side_effecting_node_on_path = nullptr;
diff --git a/tensorflow/compiler/tf2xla/side_effect_util.h b/tensorflow/compiler/tf2xla/side_effect_util.h
index f22ddb2f58..7081b362c3 100644
--- a/tensorflow/compiler/tf2xla/side_effect_util.h
+++ b/tensorflow/compiler/tf2xla/side_effect_util.h
@@ -35,6 +35,9 @@ extern const char kXlaTokenInputNodesAttrName[];
 // node has side-effect dependency on current graph's token input.
 extern const char kXlaTokenArgNodeName[];
 
+// This node have XlaRecvAtHost/XlaSendFromHost in its associated functions.
+extern const char kXlaHasHostTransferAttrName[];
+
 // Calculates side-effect dependencies for the graph's token output.
 // Returns a set of node names representing these dependencies.
 std::set<std::string> CalculateTokenInputsForOutputToken(const Graph& g);
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc
index cc81772e8c..6cc8ae3afd 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc
@@ -557,6 +557,12 @@ bool HasAssociatedFunction(const NodeDef& node_def,
     return true;
   }
 
+  if (node_def.op() == "XlaHostCompute") {
+    // XlaHostCompute has "shape_inference_graph" func attr, but that's not
+    // related to graph execution.
+    return false;
+  }
+
   for (const auto& iter : node_def.attr()) {
     if (iter.second.has_func()) {
       return true;
@@ -578,6 +584,9 @@ std::vector<AssociatedFunctionInfo> GetAssociatedFunctions(
     // This is a SymbolicGradient op.
     AttrValueMap attrs(node.attrs().begin(), node.attrs().end());
     results.emplace_back(AssociatedFunctionInfo::SymbolicGradient(op, attrs));
+  } else if (node.type_string() == "XlaHostCompute") {
+    // XlaHostCompute has "shape_inference_graph" func attr, but that's not
+    // related to graph execution.
   } else {
     // Collect all function attrs for the node.
     for (auto& iter : node.attrs()) {
-- 
GitLab


From c07721a4aca474c09d2f07a667e0edeb4e826957 Mon Sep 17 00:00:00 2001
From: Taylor Robie <taylorrobie@google.com>
Date: Mon, 10 Dec 2018 21:17:21 -0800
Subject: [PATCH 337/873] Apply string compat function to address when creating
 a coordinator.

PiperOrigin-RevId: 224936924
---
 .../python/distribute/cluster_resolver/tpu_cluster_resolver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index 72a27b915c..52ac07d7ea 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -481,7 +481,8 @@ class TPUClusterResolver(ClusterResolver):
     return self._environment
 
   def _start_local_server(self):
-    address = self._requestComputeMetadata('instance/network-interfaces/0/ip')
+    address = compat.as_text(self._requestComputeMetadata(
+        'instance/network-interfaces/0/ip'))
     self._server = server_lib.Server(
         {
             'local': ['0.0.0.0:0']
-- 
GitLab


From 68834966daf6bd27add401f6d9402b5a0e3da5ec Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 10 Dec 2018 21:20:16 -0800
Subject: [PATCH 338/873] Export ragged ops.

PiperOrigin-RevId: 224937131
---
 tensorflow/python/ops/ragged/BUILD            |  12 +-
 .../python/ops/ragged/ragged_factory_ops.py   |   3 +
 .../ops/ragged/ragged_functional_ops.py       |   2 +
 .../python/ops/ragged/ragged_math_ops.py      |   2 +
 tensorflow/python/ops/ragged/ragged_tensor.py |   2 +
 .../python/ops/ragged/ragged_tensor_value.py  |   3 +
 .../python/ops/ragged/segment_id_ops.py       |   3 +
 .../tools/api/generator/api_init_files.bzl    |   1 +
 .../tools/api/generator/api_init_files_v1.bzl |   1 +
 .../python/tools/api/generator/doc_srcs.py    |   1 +
 .../golden/v1/tensorflow.-ragged-tensor.pbtxt | 125 ++++++++++++++++++
 .../tools/api/golden/v1/tensorflow.pbtxt      |   8 ++
 ...nsorflow.ragged.-ragged-tensor-value.pbtxt |  41 ++++++
 .../api/golden/v1/tensorflow.ragged.pbtxt     |  31 +++++
 .../golden/v2/tensorflow.-ragged-tensor.pbtxt | 125 ++++++++++++++++++
 .../tools/api/golden/v2/tensorflow.pbtxt      |   8 ++
 .../api/golden/v2/tensorflow.ragged.pbtxt     |  23 ++++
 tensorflow/tools/compatibility/renames_v2.py  |   3 +
 18 files changed, 393 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.ragged.-ragged-tensor-value.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.ragged.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.ragged.pbtxt

diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index d88543c400..c0db8bfbb5 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -62,6 +62,7 @@ py_library(
         "//tensorflow/python:ragged_array_ops_gen",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python:util",
     ],
 )
 
@@ -82,6 +83,7 @@ py_library(
         "//tensorflow/python:ragged_conversion_ops_gen",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
     ],
 )
 
@@ -95,6 +97,7 @@ py_library(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python:util",
         "//tensorflow/python/ops/ragged:ragged_tensor",
         "//tensorflow/python/ops/ragged:ragged_tensor_value",
         "//third_party/py/numpy",
@@ -110,6 +113,7 @@ py_library(
         ":ragged_tensor",
         ":ragged_util",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:util",
     ],
 )
 
@@ -147,6 +151,7 @@ py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:ragged_math_ops_gen",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python:util",
     ],
 )
 
@@ -189,6 +194,7 @@ py_library(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:session",
         "//tensorflow/python:tensor_shape",
+        "//tensorflow/python:util",
     ],
 )
 
@@ -216,7 +222,10 @@ py_library(
     name = "ragged_tensor_value",
     srcs = ["ragged_tensor_value.py"],
     srcs_version = "PY2AND3",
-    deps = ["//third_party/py/numpy"],
+    deps = [
+        "//tensorflow/python:util",
+        "//third_party/py/numpy",
+    ],
 )
 
 py_library(
@@ -245,6 +254,7 @@ py_library(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tensor_util",
+        "//tensorflow/python:util",
     ],
 )
 
diff --git a/tensorflow/python/ops/ragged/ragged_factory_ops.py b/tensorflow/python/ops/ragged/ragged_factory_ops.py
index 2c63e1c799..695accc652 100644
--- a/tensorflow/python/ops/ragged/ragged_factory_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_factory_ops.py
@@ -24,11 +24,13 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_tensor_value
+from tensorflow.python.util.tf_export import tf_export
 
 
 #===============================================================================
 # Op to construct a constant RaggedTensor from a nested Python list.
 #===============================================================================
+@tf_export("ragged.constant")
 def constant(pylist, dtype=None, ragged_rank=None, inner_shape=None, name=None):
   """Constructs a constant RaggedTensor from a nested Python list.
 
@@ -74,6 +76,7 @@ def constant(pylist, dtype=None, ragged_rank=None, inner_shape=None, name=None):
                            inner_shape)
 
 
+@tf_export(v1=["ragged.constant_value"])
 def constant_value(pylist, dtype=None, ragged_rank=None, inner_shape=None):
   """Constructs a RaggedTensorValue from a nested Python list.
 
diff --git a/tensorflow/python/ops/ragged/ragged_functional_ops.py b/tensorflow/python/ops/ragged/ragged_functional_ops.py
index 751f2c7359..7344c96465 100644
--- a/tensorflow/python/ops/ragged/ragged_functional_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_functional_ops.py
@@ -21,8 +21,10 @@ from __future__ import print_function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_util
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export("ragged.map_flat_values")
 def map_flat_values(op, *args, **kwargs):
   """Applies `op` to the inner values of one or more RaggedTensors.
 
diff --git a/tensorflow/python/ops/ragged/ragged_math_ops.py b/tensorflow/python/ops/ragged/ragged_math_ops.py
index f774c1eb58..02e927b699 100644
--- a/tensorflow/python/ops/ragged/ragged_math_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_math_ops.py
@@ -31,12 +31,14 @@ from tensorflow.python.ops.ragged import ragged_functional_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_util
 from tensorflow.python.ops.ragged import segment_id_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
 #===============================================================================
 # ragged.range
 #===============================================================================
 # pylint: disable=redefined-builtin
+@tf_export('ragged.range')
 def range(starts, limits=None, deltas=1, dtype=None, name=None):
   """Returns a `RaggedTensor` containing the specified sequences of numbers.
 
diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py
index 567c50203a..acf3a3841d 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor.py
@@ -32,6 +32,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_tensor_value
 from tensorflow.python.ops.ragged import ragged_util
 from tensorflow.python.ops.ragged import segment_id_ops
+from tensorflow.python.util.tf_export import tf_export
 
 # pylint: disable=protected-access
 _eval_using_default_session = ops._eval_using_default_session
@@ -43,6 +44,7 @@ _eval_using_default_session = ops._eval_using_default_session
 #===============================================================================
 
 
+@tf_export("RaggedTensor")
 class RaggedTensor(object):
   """Represents a ragged tensor (go/ragged).
 
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_value.py b/tensorflow/python/ops/ragged/ragged_tensor_value.py
index bf0ac4482a..1162487f0f 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_value.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_value.py
@@ -20,7 +20,10 @@ from __future__ import print_function
 
 import numpy as np
 
+from tensorflow.python.util.tf_export import tf_export
 
+
+@tf_export(v1=["ragged.RaggedTensorValue"])
 class RaggedTensorValue(object):
   """Represents the value of a `RaggedTensor`.
 
diff --git a/tensorflow/python/ops/ragged/segment_id_ops.py b/tensorflow/python/ops/ragged/segment_id_ops.py
index fa2970c3e7..ee17e4d636 100644
--- a/tensorflow/python/ops/ragged/segment_id_ops.py
+++ b/tensorflow/python/ops/ragged/segment_id_ops.py
@@ -25,10 +25,12 @@ from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_util
+from tensorflow.python.util.tf_export import tf_export
 
 
 # For background on "segments" and "segment ids", see:
 # https://www.tensorflow.org/api_guides/python/math_ops#Segmentation
+@tf_export("ragged.row_splits_to_segment_ids")
 def row_splits_to_segment_ids(splits, name=None):
   """Generates the segmentation corresponding to a RaggedTensor `splits` vector.
 
@@ -63,6 +65,7 @@ def row_splits_to_segment_ids(splits, name=None):
 
 # For background on "segments" and "segment ids", see:
 # https://www.tensorflow.org/api_guides/python/math_ops#Segmentation
+@tf_export("ragged.segment_ids_to_row_splits")
 def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
   """Generates the RaggedTensor `splits` vector corresponding to a segmentation.
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 58913b3208..25d0c0f75c 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -69,6 +69,7 @@ TENSORFLOW_API_INIT_FILES = [
     "nn/__init__.py",
     "nn/rnn_cell/__init__.py",
     "quantization/__init__.py",
+    "ragged/__init__.py",
     "random/__init__.py",
     "saved_model/__init__.py",
     "sets/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 0937f98e75..99c8495ce5 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -79,6 +79,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "profiler/__init__.py",
     "python_io/__init__.py",
     "quantization/__init__.py",
+    "ragged/__init__.py",
     "random/__init__.py",
     "resource_loader/__init__.py",
     "strings/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/doc_srcs.py b/tensorflow/python/tools/api/generator/doc_srcs.py
index abb5886deb..b567eead3d 100644
--- a/tensorflow/python/tools/api/generator/doc_srcs.py
+++ b/tensorflow/python/tools/api/generator/doc_srcs.py
@@ -54,6 +54,7 @@ _TENSORFLOW_DOC_SOURCES = {
     'nn': DocSource(docstring_module_name='ops.nn_ops'),
     'nn.rnn_cell': DocSource(docstring_module_name='ops.rnn_cell'),
     'python_io': DocSource(docstring_module_name='lib.io.python_io'),
+    'ragged': DocSource(docstring_module_name='ops.ragged'),
     'resource_loader': DocSource(
         docstring_module_name='platform.resource_loader'),
     'sets': DocSource(docstring_module_name='ops.sets'),
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt
new file mode 100644
index 0000000000..c0ed956535
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-ragged-tensor.pbtxt
@@ -0,0 +1,125 @@
+path: "tensorflow.RaggedTensor"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.ragged.ragged_tensor.RaggedTensor\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "flat_values"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "nested_row_splits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "ragged_rank"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "row_splits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'values\', \'row_splits\', \'cached_row_lengths\', \'cached_value_rowids\', \'cached_nrows\', \'internal\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "bounding_shape"
+    argspec: "args=[\'self\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_nested_row_lengths"
+    argspec: "args=[\'cls\', \'flat_values\', \'nested_row_lengths\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_nested_row_splits"
+    argspec: "args=[\'cls\', \'flat_values\', \'nested_row_splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_nested_value_rowids"
+    argspec: "args=[\'cls\', \'flat_values\', \'nested_value_rowids\', \'nested_nrows\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_row_lengths"
+    argspec: "args=[\'cls\', \'values\', \'row_lengths\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_row_limits"
+    argspec: "args=[\'cls\', \'values\', \'row_limits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_row_splits"
+    argspec: "args=[\'cls\', \'values\', \'row_splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_row_starts"
+    argspec: "args=[\'cls\', \'values\', \'row_starts\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_sparse"
+    argspec: "args=[\'cls\', \'st_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_tensor"
+    argspec: "args=[\'cls\', \'tensor\', \'lengths\', \'padding\', \'ragged_rank\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'1\', \'None\'], "
+  }
+  member_method {
+    name: "from_value_rowids"
+    argspec: "args=[\'cls\', \'values\', \'value_rowids\', \'nrows\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "nested_row_lengths"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "nrows"
+    argspec: "args=[\'self\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int64\'>\", \'None\'], "
+  }
+  member_method {
+    name: "row_lengths"
+    argspec: "args=[\'self\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "row_limits"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "row_starts"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "to_list"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "to_sparse"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "to_tensor"
+    argspec: "args=[\'self\', \'default_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "value_rowids"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "with_flat_values"
+    argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_values"
+    argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 60ff59196b..5592a4c59d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -172,6 +172,10 @@ tf_module {
     name: "QueueBase"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "RaggedTensor"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RandomShuffleQueue"
     mtype: "<type \'type\'>"
@@ -516,6 +520,10 @@ tf_module {
     name: "quint8"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "ragged"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "random"
     mtype: "<type \'module\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.ragged.-ragged-tensor-value.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.ragged.-ragged-tensor-value.pbtxt
new file mode 100644
index 0000000000..96c895e0a4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.ragged.-ragged-tensor-value.pbtxt
@@ -0,0 +1,41 @@
+path: "tensorflow.ragged.RaggedTensorValue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.ragged.ragged_tensor_value.RaggedTensorValue\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "flat_values"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "nested_row_splits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "ragged_rank"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "row_splits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'values\', \'row_splits\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "to_list"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.ragged.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.ragged.pbtxt
new file mode 100644
index 0000000000..22ca7e931f
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.ragged.pbtxt
@@ -0,0 +1,31 @@
+path: "tensorflow.ragged"
+tf_module {
+  member {
+    name: "RaggedTensorValue"
+    mtype: "<type \'type\'>"
+  }
+  member_method {
+    name: "constant"
+    argspec: "args=[\'pylist\', \'dtype\', \'ragged_rank\', \'inner_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "constant_value"
+    argspec: "args=[\'pylist\', \'dtype\', \'ragged_rank\', \'inner_shape\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "map_flat_values"
+    argspec: "args=[\'op\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[\'starts\', \'limits\', \'deltas\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "row_splits_to_segment_ids"
+    argspec: "args=[\'splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "segment_ids_to_row_splits"
+    argspec: "args=[\'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt
new file mode 100644
index 0000000000..c0ed956535
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-ragged-tensor.pbtxt
@@ -0,0 +1,125 @@
+path: "tensorflow.RaggedTensor"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.ragged.ragged_tensor.RaggedTensor\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtype"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "flat_values"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "nested_row_splits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "ragged_rank"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "row_splits"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shape"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "values"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'values\', \'row_splits\', \'cached_row_lengths\', \'cached_value_rowids\', \'cached_nrows\', \'internal\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "bounding_shape"
+    argspec: "args=[\'self\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_nested_row_lengths"
+    argspec: "args=[\'cls\', \'flat_values\', \'nested_row_lengths\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_nested_row_splits"
+    argspec: "args=[\'cls\', \'flat_values\', \'nested_row_splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_nested_value_rowids"
+    argspec: "args=[\'cls\', \'flat_values\', \'nested_value_rowids\', \'nested_nrows\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "from_row_lengths"
+    argspec: "args=[\'cls\', \'values\', \'row_lengths\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_row_limits"
+    argspec: "args=[\'cls\', \'values\', \'row_limits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_row_splits"
+    argspec: "args=[\'cls\', \'values\', \'row_splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_row_starts"
+    argspec: "args=[\'cls\', \'values\', \'row_starts\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_sparse"
+    argspec: "args=[\'cls\', \'st_input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_tensor"
+    argspec: "args=[\'cls\', \'tensor\', \'lengths\', \'padding\', \'ragged_rank\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'1\', \'None\'], "
+  }
+  member_method {
+    name: "from_value_rowids"
+    argspec: "args=[\'cls\', \'values\', \'value_rowids\', \'nrows\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "nested_row_lengths"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "nrows"
+    argspec: "args=[\'self\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'int64\'>\", \'None\'], "
+  }
+  member_method {
+    name: "row_lengths"
+    argspec: "args=[\'self\', \'axis\', \'name\'], varargs=None, keywords=None, defaults=[\'1\', \'None\'], "
+  }
+  member_method {
+    name: "row_limits"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "row_starts"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "to_list"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "to_sparse"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "to_tensor"
+    argspec: "args=[\'self\', \'default_value\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+  member_method {
+    name: "value_rowids"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "with_flat_values"
+    argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "with_values"
+    argspec: "args=[\'self\', \'new_values\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 0f11107dc3..5f31d27480 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -32,6 +32,10 @@ tf_module {
     name: "Operation"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "RaggedTensor"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "RegisterGradient"
     mtype: "<type \'type\'>"
@@ -260,6 +264,10 @@ tf_module {
     name: "quint8"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "ragged"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "random"
     mtype: "<type \'module\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.ragged.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.ragged.pbtxt
new file mode 100644
index 0000000000..5fde488ffd
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.ragged.pbtxt
@@ -0,0 +1,23 @@
+path: "tensorflow.ragged"
+tf_module {
+  member_method {
+    name: "constant"
+    argspec: "args=[\'pylist\', \'dtype\', \'ragged_rank\', \'inner_shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "map_flat_values"
+    argspec: "args=[\'op\'], varargs=args, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "range"
+    argspec: "args=[\'starts\', \'limits\', \'deltas\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'1\', \'None\', \'None\'], "
+  }
+  member_method {
+    name: "row_splits_to_segment_ids"
+    argspec: "args=[\'splits\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "segment_ids_to_row_splits"
+    argspec: "args=[\'segment_ids\', \'num_segments\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
+}
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index 3ab5a0d0d6..9a3f4460f7 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -421,6 +421,9 @@ renames = {
     'tf.qr': 'tf.linalg.qr',
     'tf.quantize': 'tf.quantization.quantize',
     'tf.quantized_concat': 'tf.quantization.quantized_concat',
+    'tf.ragged.constant_value': 'tf.compat.v1.ragged.constant_value',
+    'tf.ragged.convert_to_tensor_or_ragged_tensor': 'tf.compat.v1.ragged.convert_to_tensor_or_ragged_tensor',
+    'tf.ragged.RaggedTensorValue': 'tf.compat.v1.ragged.RaggedTensorValue',
     'tf.random.get_seed': 'tf.compat.v1.random.get_seed',
     'tf.random.set_random_seed': 'tf.compat.v1.random.set_random_seed',
     'tf.random_crop': 'tf.image.random_crop',
-- 
GitLab


From c2255b0f32991813a4bfbcc3e1ee178a5b5eeecd Mon Sep 17 00:00:00 2001
From: Jing Li <jingli@google.com>
Date: Mon, 10 Dec 2018 23:35:19 -0800
Subject: [PATCH 339/873] Rewrite Adam and LazyAdam optimizer to take global
 step for computing beta1 and beta2 accumulators, instead of having the
 optimizer instance to keep its own independent beta1 and beta2 accumulators
 as non-slot variables.

PiperOrigin-RevId: 224948020
---
 tensorflow/contrib/opt/BUILD                  |  36 ++
 tensorflow/contrib/opt/__init__.py            |   4 +
 .../opt/python/training/adam_gs_optimizer.py  | 217 ++++++++++
 .../python/training/adam_gs_optimizer_test.py | 382 +++++++++++++++++
 .../python/training/lazy_adam_gs_optimizer.py | 114 +++++
 .../training/lazy_adam_gs_optimizer_test.py   | 402 ++++++++++++++++++
 6 files changed, 1155 insertions(+)
 create mode 100644 tensorflow/contrib/opt/python/training/adam_gs_optimizer.py
 create mode 100644 tensorflow/contrib/opt/python/training/adam_gs_optimizer_test.py
 create mode 100644 tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer.py
 create mode 100644 tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer_test.py

diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD
index f4ac70eb1a..0446e823d9 100644
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
@@ -14,6 +14,7 @@ py_library(
     name = "opt_py",
     srcs = [
         "__init__.py",
+        "python/training/adam_gs_optimizer.py",
         "python/training/adamax.py",
         "python/training/addsign.py",
         "python/training/agn_optimizer.py",
@@ -22,6 +23,7 @@ py_library(
         "python/training/external_optimizer.py",
         "python/training/ggt.py",
         "python/training/lars_optimizer.py",
+        "python/training/lazy_adam_gs_optimizer.py",
         "python/training/lazy_adam_optimizer.py",
         "python/training/matrix_functions.py",
         "python/training/model_average_optimizer.py",
@@ -60,6 +62,21 @@ py_library(
     ],
 )
 
+py_test(
+    name = "adam_gs_optimizer_test",
+    srcs = ["python/training/adam_gs_optimizer_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":opt_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:training",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "adamax_test",
     srcs = ["python/training/adamax_test.py"],
@@ -148,6 +165,25 @@ py_test(
     ],
 )
 
+py_test(
+    name = "lazy_adam_gs_optimizer_test",
+    srcs = ["python/training/lazy_adam_gs_optimizer_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":opt_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 py_test(
     name = "lazy_adam_optimizer_test",
     srcs = ["python/training/lazy_adam_optimizer_test.py"],
diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py
index c7ea68efa9..e8fc52342c 100644
--- a/tensorflow/contrib/opt/__init__.py
+++ b/tensorflow/contrib/opt/__init__.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=wildcard-import
+from tensorflow.contrib.opt.python.training.adam_gs_optimizer import *
 from tensorflow.contrib.opt.python.training.adamax import *
 from tensorflow.contrib.opt.python.training.addsign import *
 from tensorflow.contrib.opt.python.training.agn_optimizer import *
@@ -28,6 +29,7 @@ from tensorflow.contrib.opt.python.training.external_optimizer import *
 from tensorflow.contrib.opt.python.training.lars_optimizer import *
 from tensorflow.contrib.opt.python.training.ggt import *
 from tensorflow.contrib.opt.python.training.lazy_adam_optimizer import *
+from tensorflow.contrib.opt.python.training.lazy_adam_gs_optimizer import *
 from tensorflow.contrib.opt.python.training.model_average_optimizer import *
 from tensorflow.contrib.opt.python.training.moving_average_optimizer import *
 from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
@@ -44,12 +46,14 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
     'AdaMaxOptimizer',
+    'AdamGSOptimizer',
     'PowerSignOptimizer',
     'AddSignOptimizer',
     'DelayCompensatedGradientDescentOptimizer',
     'DropStaleGradientOptimizer',
     'ExternalOptimizerInterface',
     'LARSOptimizer',
+    'LazyAdamGSOptimizer',
     'LazyAdamOptimizer',
     'NadamOptimizer',
     'MovingAverageOptimizer',
diff --git a/tensorflow/contrib/opt/python/training/adam_gs_optimizer.py b/tensorflow/contrib/opt/python/training/adam_gs_optimizer.py
new file mode 100644
index 0000000000..3fb649ea82
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/adam_gs_optimizer.py
@@ -0,0 +1,217 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Adam rewrite to use global step for computing beta1 & beta2 accumulation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.training import optimizer
+from tensorflow.python.training import training_ops
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export("train.AdamOptimizer")
+class AdamGSOptimizer(optimizer.Optimizer):
+  """Optimizer that implements the Adam algorithm.
+
+  See [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
+  ([pdf](http://arxiv.org/pdf/1412.6980.pdf)).
+  """
+
+  def __init__(self, global_step=0, learning_rate=0.001,
+               beta1=0.9, beta2=0.999, epsilon=1e-8,
+               use_locking=False, name="Adam"):
+    """Construct a new Adam optimizer.
+
+    Branched from tf.train.AdamOptimizer. The only difference is to pass
+    global step for computing beta1 and beta2 accumulators, instead of having
+    optimizer keep its own independent beta1 and beta2 accumulators as non-slot
+    variables.
+
+    Initialization:
+
+    $$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$
+    $$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$
+    $$t := 0 \text{(Initialize timestep)}$$
+
+    The update rule for `variable` with gradient `g` uses an optimization
+    described at the end of section2 of the paper:
+
+    $$t := t + 1$$
+    $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
+
+    $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
+    $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
+    $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
+
+    The default value of 1e-8 for epsilon might not be a good default in
+    general. For example, when training an Inception network on ImageNet a
+    current good choice is 1.0 or 0.1. Note that since AdamOptimizer uses the
+    formulation just before Section 2.1 of the Kingma and Ba paper rather than
+    the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon
+    hat" in the paper.
+
+    The sparse implementation of this algorithm (used when the gradient is an
+    IndexedSlices object, typically because of `tf.gather` or an embedding
+    lookup in the forward pass) does apply momentum to variable slices even if
+    they were not used in the forward pass (meaning they have a gradient equal
+    to zero). Momentum decay (beta1) is also applied to the entire momentum
+    accumulator. This means that the sparse behavior is equivalent to the dense
+    behavior (in contrast to some momentum implementations which ignore momentum
+    unless a variable slice was actually used).
+
+    Args:
+      global_step: tensorflow variable indicating the step.
+      learning_rate: A Tensor or a floating point value.  The learning rate.
+      beta1: A float value or a constant float tensor.
+        The exponential decay rate for the 1st moment estimates.
+      beta2: A float value or a constant float tensor.
+        The exponential decay rate for the 2nd moment estimates.
+      epsilon: A small constant for numerical stability. This epsilon is
+        "epsilon hat" in the Kingma and Ba paper (in the formula just before
+        Section 2.1), not the epsilon in Algorithm 1 of the paper.
+      use_locking: If True use locks for update operations.
+      name: Optional name for the operations created when applying gradients.
+        Defaults to "Adam".
+
+    @compatibility(eager)
+    When eager execution is enabled, `learning_rate`, `beta1`, `beta2`, and
+    `epsilon` can each be a callable that takes no arguments and returns the
+    actual value to use. This can be useful for changing these values across
+    different invocations of optimizer functions.
+    @end_compatibility
+    """
+    super(AdamGSOptimizer, self).__init__(use_locking, name)
+    self._lr = learning_rate
+    self._beta1 = beta1
+    self._beta2 = beta2
+    self._epsilon = epsilon
+    self._global_step = global_step
+    self._global_step_on_worker = None
+
+    # Tensor versions of the constructor arguments, created in _prepare().
+    self._lr_t = None
+    self._beta1_t = None
+    self._beta2_t = None
+    self._epsilon_t = None
+
+    # Created in SparseApply if needed.
+    self._updated_lr = None
+
+  def _get_beta_accumulators(self):
+    return (math_ops.pow(self._beta1_t, self._global_step_on_worker),
+            math_ops.pow(self._beta2_t, self._global_step_on_worker))
+
+  def _create_slots(self, var_list):
+    # Create slots for the first and second moments.
+    for v in var_list:
+      self._zeros_slot(v, "m", self._name)
+      self._zeros_slot(v, "v", self._name)
+
+  def _prepare(self):
+    lr = self._call_if_callable(self._lr)
+    beta1 = self._call_if_callable(self._beta1)
+    beta2 = self._call_if_callable(self._beta2)
+    epsilon = self._call_if_callable(self._epsilon)
+
+    self._lr_t = ops.convert_to_tensor(lr, name="learning_rate")
+    self._beta1_t = ops.convert_to_tensor(beta1, name="beta1")
+    self._beta2_t = ops.convert_to_tensor(beta2, name="beta2")
+    self._epsilon_t = ops.convert_to_tensor(epsilon, name="epsilon")
+
+    # Performance optimization so that worker creates a copy of the global step
+    # to avoid overloading the parameter server holding the global step.
+    self._global_step_on_worker = math_ops.cast(
+        array_ops.identity(self._global_step) + 1, dtypes.float32)
+
+  def _apply_dense(self, grad, var):
+    m = self.get_slot(var, "m")
+    v = self.get_slot(var, "v")
+    beta1_power, beta2_power = self._get_beta_accumulators()
+    return training_ops.apply_adam(
+        var, m, v,
+        math_ops.cast(beta1_power, var.dtype.base_dtype),
+        math_ops.cast(beta2_power, var.dtype.base_dtype),
+        math_ops.cast(self._lr_t, var.dtype.base_dtype),
+        math_ops.cast(self._beta1_t, var.dtype.base_dtype),
+        math_ops.cast(self._beta2_t, var.dtype.base_dtype),
+        math_ops.cast(self._epsilon_t, var.dtype.base_dtype),
+        grad, use_locking=self._use_locking).op
+
+  def _resource_apply_dense(self, grad, var):
+    m = self.get_slot(var, "m")
+    v = self.get_slot(var, "v")
+    beta1_power, beta2_power = self._get_beta_accumulators()
+    return training_ops.resource_apply_adam(
+        var.handle, m.handle, v.handle,
+        math_ops.cast(beta1_power, grad.dtype.base_dtype),
+        math_ops.cast(beta2_power, grad.dtype.base_dtype),
+        math_ops.cast(self._lr_t, grad.dtype.base_dtype),
+        math_ops.cast(self._beta1_t, grad.dtype.base_dtype),
+        math_ops.cast(self._beta2_t, grad.dtype.base_dtype),
+        math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
+        grad, use_locking=self._use_locking)
+
+  def _apply_sparse_shared(self, grad, var, indices, scatter_add):
+    beta1_power, beta2_power = self._get_beta_accumulators()
+    beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
+    beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
+    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
+    beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
+    beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
+    epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
+    lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
+    # m_t = beta1 * m + (1 - beta1) * g_t
+    m = self.get_slot(var, "m")
+    m_scaled_g_values = grad * (1 - beta1_t)
+    m_t = state_ops.assign(m, m * beta1_t,
+                           use_locking=self._use_locking)
+    with ops.control_dependencies([m_t]):
+      m_t = scatter_add(m, indices, m_scaled_g_values)
+    # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
+    v = self.get_slot(var, "v")
+    v_scaled_g_values = (grad * grad) * (1 - beta2_t)
+    v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
+    with ops.control_dependencies([v_t]):
+      v_t = scatter_add(v, indices, v_scaled_g_values)
+    v_sqrt = math_ops.sqrt(v_t)
+    var_update = state_ops.assign_sub(var,
+                                      lr * m_t / (v_sqrt + epsilon_t),
+                                      use_locking=self._use_locking)
+    return control_flow_ops.group(*[var_update, m_t, v_t])
+
+  def _apply_sparse(self, grad, var):
+    return self._apply_sparse_shared(
+        grad.values, var, grad.indices,
+        lambda x, i, v: state_ops.scatter_add(  # pylint: disable=g-long-lambda
+            x, i, v, use_locking=self._use_locking))
+
+  def _resource_scatter_add(self, x, i, v):
+    with ops.control_dependencies(
+        [resource_variable_ops.resource_scatter_add(
+            x.handle, i, v)]):
+      return x.value()
+
+  def _resource_apply_sparse(self, grad, var, indices):
+    return self._apply_sparse_shared(
+        grad, var, indices, self._resource_scatter_add)
diff --git a/tensorflow/contrib/opt/python/training/adam_gs_optimizer_test.py b/tensorflow/contrib/opt/python/training/adam_gs_optimizer_test.py
new file mode 100644
index 0000000000..c68c965aef
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/adam_gs_optimizer_test.py
@@ -0,0 +1,382 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for AdamGS."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.opt.python.training import adam_gs_optimizer
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+def adam_update_numpy(param,
+                      g_t,
+                      t,
+                      m,
+                      v,
+                      alpha=0.001,
+                      beta1=0.9,
+                      beta2=0.999,
+                      epsilon=1e-8):
+  alpha_t = alpha * np.sqrt(1 - beta2**t) / (1 - beta1**t)
+
+  m_t = beta1 * m + (1 - beta1) * g_t
+  v_t = beta2 * v + (1 - beta2) * g_t * g_t
+
+  param_t = param - alpha_t * m_t / (np.sqrt(v_t) + epsilon)
+  return param_t, m_t, v_t
+
+
+class AdamGSOptimizerTest(test.TestCase):
+
+  def doTestSparse(self, use_resource=False):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          global_step = resource_variable_ops.ResourceVariable(
+              array_ops.zeros([], dtypes.int64))
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+        else:
+          global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+        grads0_np_indices = np.array([0, 1], dtype=np.int32)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(grads0_np),
+            constant_op.constant(grads0_np_indices), constant_op.constant([2]))
+        grads1_np_indices = np.array([0, 1], dtype=np.int32)
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(grads1_np),
+            constant_op.constant(grads1_np_indices), constant_op.constant([2]))
+        opt = adam_gs_optimizer.AdamGSOptimizer(global_step=global_step)
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, self.evaluate(beta1_power))
+          self.assertAllCloseAccordingToType(0.999**t,
+                                             self.evaluate(beta2_power))
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+
+  def testSparse(self):
+    self.doTestSparse(use_resource=False)
+
+  def testResourceSparse(self):
+    self.doTestSparse(use_resource=True)
+
+  def testSparseDevicePlacement(self):
+    for index_dtype in [dtypes.int32, dtypes.int64]:
+      with self.cached_session(force_gpu=test.is_gpu_available()):
+        # If a GPU is available, tests that all optimizer ops can be placed on
+        # it (i.e. they have GPU kernels).
+        var = variables.Variable([[1.0], [2.0]])
+        indices = constant_op.constant([0, 1], dtype=index_dtype)
+        gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices))
+        optimizer = adam_gs_optimizer.AdamGSOptimizer(3.0)
+        minimize_op = optimizer.minimize(gathered_sum)
+        variables.global_variables_initializer().run()
+        minimize_op.run()
+
+  def testSparseRepeatedIndices(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        repeated_index_global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64))
+        aggregated_global_step = variables.Variable(
+            array_ops.zeros([], dtypes.int64))
+        repeated_index_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        aggregated_update_var = variables.Variable(
+            [[1.0], [2.0]], dtype=dtype)
+        grad_repeated_index = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1, 0.1], shape=[2, 1], dtype=dtype),
+            constant_op.constant([1, 1]),
+            constant_op.constant([2, 1]))
+        grad_aggregated = ops.IndexedSlices(
+            constant_op.constant(
+                [0.2], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        repeated_update = adam_gs_optimizer.AdamGSOptimizer(
+            global_step=repeated_index_global_step).apply_gradients(
+                [(grad_repeated_index, repeated_index_update_var)],
+                global_step=repeated_index_global_step)
+        aggregated_update = adam_gs_optimizer.AdamGSOptimizer(
+            global_step=aggregated_global_step).apply_gradients(
+                [(grad_aggregated, aggregated_update_var)],
+                global_step=aggregated_global_step)
+        variables.global_variables_initializer().run()
+        self.assertAllClose(aggregated_update_var.eval(),
+                            self.evaluate(repeated_index_update_var))
+        for _ in range(3):
+          repeated_update.run()
+          aggregated_update.run()
+          self.assertAllClose(aggregated_update_var.eval(),
+                              self.evaluate(repeated_index_update_var))
+
+  def doTestBasic(self, use_resource=False, use_callable_params=False):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      with self.session(graph=ops.Graph()):
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          global_step = resource_variable_ops.ResourceVariable(
+              array_ops.zeros([], dtypes.int64), name="global_step_%d" % i)
+          var0 = resource_variable_ops.ResourceVariable(
+              var0_np, name="var0_%d" % i)
+          var1 = resource_variable_ops.ResourceVariable(
+              var1_np, name="var1_%d" % i)
+        else:
+          global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+
+        learning_rate = lambda: 0.001
+        beta1 = lambda: 0.9
+        beta2 = lambda: 0.999
+        epsilon = lambda: 1e-8
+        if not use_callable_params:
+          learning_rate = learning_rate()
+          beta1 = beta1()
+          beta2 = beta2()
+          epsilon = epsilon()
+
+        opt = adam_gs_optimizer.AdamGSOptimizer(global_step=global_step,
+                                                learning_rate=learning_rate)
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        opt_variables = opt.variables()
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+        self.assertTrue(beta1_power is not None)
+        self.assertTrue(beta2_power is not None)
+        self.assertNotIn(beta1_power, opt_variables)
+        self.assertNotIn(beta2_power, opt_variables)
+
+        if not context.executing_eagerly():
+          with ops.Graph().as_default():
+            # Shouldn't return non-slot variables from other graphs.
+            self.assertEqual(0, len(opt.variables()))
+          self.evaluate(variables.global_variables_initializer())
+          # Fetch params to validate initial values
+          self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+          self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          if not context.executing_eagerly():
+            self.evaluate(update)
+            self.assertAllCloseAccordingToType(
+                0.9**(t + 1), self.evaluate(beta1_power))
+            self.assertAllCloseAccordingToType(
+                0.999**(t + 1), self.evaluate(beta2_power))
+          else:
+            if t > 1:
+              opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                  global_step=global_step)
+              beta1_power, beta2_power = opt._get_beta_accumulators()
+              self.assertAllCloseAccordingToType(
+                  0.9**t, self.evaluate(beta1_power))
+              self.assertAllCloseAccordingToType(
+                  0.999**t, self.evaluate(beta2_power))
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+          if use_resource:
+            self.assertEqual("var0_%d/Adam:0" % (i,),
+                             opt.get_slot(var=var0, name="m").name)
+
+  def testBasic(self):
+    with self.cached_session():
+      self.doTestBasic(use_resource=False)
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testBasicCallableParams(self):
+    with context.eager_mode():
+      self.doTestBasic(use_resource=True, use_callable_params=True)
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = adam_gs_optimizer.AdamGSOptimizer(
+            global_step=global_step, learning_rate=constant_op.constant(0.001))
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, self.evaluate(beta1_power))
+          self.assertAllCloseAccordingToType(0.999**t,
+                                             self.evaluate(beta2_power))
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = adam_gs_optimizer.AdamGSOptimizer(global_step=global_step)
+        update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                      global_step=global_step)
+        update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                      global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        # Run 3 steps of intertwined Adam1 and Adam2.
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, self.evaluate(beta1_power))
+          self.assertAllCloseAccordingToType(0.999**t,
+                                             self.evaluate(beta2_power))
+          if t % 2 == 0:
+            update1.run()
+          else:
+            update2.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+
+  def testTwoSessions(self):
+    optimizer = adam_gs_optimizer.AdamGSOptimizer()
+
+    with context.eager_mode():
+      var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+      grads0 = constant_op.constant(np.array([0.1, 0.1]))
+      optimizer.apply_gradients([(grads0, var0)])
+
+    g = ops.Graph()
+    with g.as_default():
+      with session.Session():
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+        optimizer.apply_gradients([(grads0, var0)])
+
+    gg = ops.Graph()
+    with gg.as_default():
+      with session.Session():
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+
+        # If the optimizer saves any state not keyed by graph the following line
+        # fails.
+        optimizer.apply_gradients([(grads0, var0)])
+
+  def testSlotsUniqueEager(self):
+    with context.eager_mode():
+      v1 = resource_variable_ops.ResourceVariable(1.)
+      v2 = resource_variable_ops.ResourceVariable(1.)
+      opt = adam_gs_optimizer.AdamGSOptimizer(1.)
+      opt.minimize(lambda: v1 + v2)
+      # There should be two unique slot variables for v1 and v2 respectively.
+      self.assertEqual(4, len(set(opt.variables())))
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer.py b/tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer.py
new file mode 100644
index 0000000000..8827007e4d
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer.py
@@ -0,0 +1,114 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""LazyAdam rewrite to use global step for computing beta1 & beta2 accumulation.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.opt.python.training import adam_gs_optimizer
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+
+
+class LazyAdamGSOptimizer(adam_gs_optimizer.AdamGSOptimizer):
+  """Variant of the Adam optimizer that handles sparse updates more efficiently.
+
+  Branched from tf.contrib.opt.LazyAdamGSOptimizer. The only difference is to
+  pass global step for computing beta1 and beta2 accumulators, instead of having
+  optimizer keep its own independent beta1 and beta2 accumulators as non-slot
+  variables.
+
+  The original Adam algorithm maintains two moving-average accumulators for
+  each trainable variable; the accumulators are updated at every step.
+  This class provides lazier handling of gradient updates for sparse variables.
+  It only updates moving-average accumulators for sparse variable indices that
+  appear in the current batch, rather than updating the accumulators for all
+  indices. Compared with the original Adam optimizer, it can provide large
+  improvements in model training throughput for some applications. However, it
+  provides slightly different semantics than the original Adam algorithm, and
+  may lead to different empirical results.
+  """
+
+  def _apply_sparse(self, grad, var):
+    beta1_power, beta2_power = self._get_beta_accumulators()
+    beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
+    beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
+    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
+    beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
+    beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
+    epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
+    lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
+
+    # \\(m := beta1 * m + (1 - beta1) * g_t\\)
+    m = self.get_slot(var, "m")
+    m_t = state_ops.scatter_update(m, grad.indices,
+                                   beta1_t * array_ops.gather(m, grad.indices) +
+                                   (1 - beta1_t) * grad.values,
+                                   use_locking=self._use_locking)
+
+    # \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
+    v = self.get_slot(var, "v")
+    v_t = state_ops.scatter_update(v, grad.indices,
+                                   beta2_t * array_ops.gather(v, grad.indices) +
+                                   (1 - beta2_t) * math_ops.square(grad.values),
+                                   use_locking=self._use_locking)
+
+    # \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
+    m_t_slice = array_ops.gather(m_t, grad.indices)
+    v_t_slice = array_ops.gather(v_t, grad.indices)
+    denominator_slice = math_ops.sqrt(v_t_slice) + epsilon_t
+    var_update = state_ops.scatter_sub(var, grad.indices,
+                                       lr * m_t_slice / denominator_slice,
+                                       use_locking=self._use_locking)
+    return control_flow_ops.group(var_update, m_t, v_t)
+
+  def _resource_apply_sparse(self, grad, var, indices):
+    beta1_power, beta2_power = self._get_beta_accumulators()
+    beta1_power = math_ops.cast(beta1_power, var.dtype.base_dtype)
+    beta2_power = math_ops.cast(beta2_power, var.dtype.base_dtype)
+    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
+    beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
+    beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
+    epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
+    lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
+
+    # \\(m := beta1 * m + (1 - beta1) * g_t\\)
+    m = self.get_slot(var, "m")
+    m_t_slice = beta1_t * array_ops.gather(m, indices) + (1 - beta1_t) * grad
+    m_update_op = resource_variable_ops.resource_scatter_update(m.handle,
+                                                                indices,
+                                                                m_t_slice)
+
+    # \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\)
+    v = self.get_slot(var, "v")
+    v_t_slice = (beta2_t * array_ops.gather(v, indices) +
+                 (1 - beta2_t) * math_ops.square(grad))
+    v_update_op = resource_variable_ops.resource_scatter_update(v.handle,
+                                                                indices,
+                                                                v_t_slice)
+
+    # \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\)
+    var_slice = lr * m_t_slice / (math_ops.sqrt(v_t_slice) + epsilon_t)
+    var_update_op = resource_variable_ops.resource_scatter_sub(var.handle,
+                                                               indices,
+                                                               var_slice)
+
+    return control_flow_ops.group(var_update_op, m_update_op, v_update_op)
diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer_test.py b/tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer_test.py
new file mode 100644
index 0000000000..bdc9a02a54
--- /dev/null
+++ b/tensorflow/contrib/opt/python/training/lazy_adam_gs_optimizer_test.py
@@ -0,0 +1,402 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for LazyAdamGSOptimizer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.contrib.opt.python.training import lazy_adam_gs_optimizer
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+
+def adam_update_numpy(param,
+                      g_t,
+                      t,
+                      m,
+                      v,
+                      alpha=0.001,
+                      beta1=0.9,
+                      beta2=0.999,
+                      epsilon=1e-8):
+  alpha_t = alpha * np.sqrt(1 - beta2**t) / (1 - beta1**t)
+
+  m_t = beta1 * m + (1 - beta1) * g_t
+  v_t = beta2 * v + (1 - beta2) * g_t * g_t
+
+  param_t = param - alpha_t * m_t / (np.sqrt(v_t) + epsilon)
+  return param_t, m_t, v_t
+
+
+class LazyAdamGSOptimizerTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters([False, True])
+  def testSparse(self, use_resource):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          global_step = resource_variable_ops.ResourceVariable(
+              array_ops.zeros([], dtypes.int64))
+          var0 = resource_variable_ops.ResourceVariable(var0_np)
+          var1 = resource_variable_ops.ResourceVariable(var1_np)
+        else:
+          global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+
+        grads0_np_indices = np.array([0, 1], dtype=np.int32)
+        grads0 = ops.IndexedSlices(
+            constant_op.constant(grads0_np),
+            constant_op.constant(grads0_np_indices), constant_op.constant([2]))
+        grads1_np_indices = np.array([0, 1], dtype=np.int32)
+        grads1 = ops.IndexedSlices(
+            constant_op.constant(grads1_np),
+            constant_op.constant(grads1_np_indices), constant_op.constant([2]))
+        opt = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
+            global_step=global_step)
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  @parameterized.parameters([False, True])
+  def testSparseDevicePlacement(self, use_resource):
+    for index_dtype in [dtypes.int32, dtypes.int64]:
+      with self.cached_session(force_gpu=test.is_gpu_available()):
+        # If a GPU is available, tests that all optimizer ops can be placed on
+        # it (i.e. they have GPU kernels).
+        if use_resource:
+          global_step = resource_variable_ops.ResourceVariable(
+              array_ops.zeros([], dtypes.int64))
+          var = resource_variable_ops.ResourceVariable([[1.0], [2.0]])
+        else:
+          global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+          var = variables.Variable([[1.0], [2.0]])
+
+        indices = constant_op.constant([0, 1], dtype=index_dtype)
+        gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices))
+        optimizer = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
+            global_step=global_step, learning_rate=3.0)
+        minimize_op = optimizer.minimize(gathered_sum, global_step=global_step)
+        variables.global_variables_initializer().run()
+        minimize_op.run()
+
+  @parameterized.parameters([False, True])
+  def testSparseRepeatedIndices(self, use_resource):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        if use_resource:
+          repeated_index_global_step = resource_variable_ops.ResourceVariable(
+              array_ops.zeros([], dtypes.int64))
+          aggregated_global_step = resource_variable_ops.ResourceVariable(
+              array_ops.zeros([], dtypes.int64))
+          repeated_index_update_var = resource_variable_ops.ResourceVariable(
+              [[1.0], [2.0]], dtype=dtype)
+          aggregated_update_var = resource_variable_ops.ResourceVariable(
+              [[1.0], [2.0]], dtype=dtype)
+        else:
+          repeated_index_global_step = variables.Variable(
+              array_ops.zeros([], dtypes.int64))
+          aggregated_global_step = variables.Variable(
+              array_ops.zeros([], dtypes.int64))
+          repeated_index_update_var = variables.Variable(
+              [[1.0], [2.0]], dtype=dtype)
+          aggregated_update_var = variables.Variable(
+              [[1.0], [2.0]], dtype=dtype)
+
+        grad_repeated_index = ops.IndexedSlices(
+            constant_op.constant(
+                [0.1, 0.1], shape=[2, 1], dtype=dtype),
+            constant_op.constant([1, 1]),
+            constant_op.constant([2, 1]))
+        grad_aggregated = ops.IndexedSlices(
+            constant_op.constant(
+                [0.2], shape=[1, 1], dtype=dtype),
+            constant_op.constant([1]),
+            constant_op.constant([2, 1]))
+        repeated_update_opt = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
+            global_step=repeated_index_global_step)
+        repeated_update = repeated_update_opt.apply_gradients(
+            [(grad_repeated_index, repeated_index_update_var)],
+            global_step=repeated_index_global_step)
+        aggregated_update_opt = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
+            global_step=aggregated_global_step)
+        aggregated_update = aggregated_update_opt.apply_gradients(
+            [(grad_aggregated, aggregated_update_var)],
+            global_step=aggregated_global_step)
+        variables.global_variables_initializer().run()
+        self.assertAllClose(aggregated_update_var.eval(),
+                            repeated_index_update_var.eval())
+        for _ in range(3):
+          repeated_update.run()
+          aggregated_update.run()
+          self.assertAllClose(aggregated_update_var.eval(),
+                              repeated_index_update_var.eval())
+
+  def doTestBasic(self, use_resource=False, use_callable_params=False):
+    for i, dtype in enumerate([dtypes.half, dtypes.float32, dtypes.float64]):
+      with self.session(graph=ops.Graph()):
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        if use_resource:
+          global_step = resource_variable_ops.ResourceVariable(
+              array_ops.zeros([], dtypes.int64), name="global_step_%d" % i)
+          var0 = resource_variable_ops.ResourceVariable(
+              var0_np, name="var0_%d" % i)
+          var1 = resource_variable_ops.ResourceVariable(
+              var1_np, name="var1_%d" % i)
+        else:
+          global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+          var0 = variables.Variable(var0_np)
+          var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+
+        learning_rate = lambda: 0.001
+        beta1 = lambda: 0.9
+        beta2 = lambda: 0.999
+        epsilon = lambda: 1e-8
+        if not use_callable_params:
+          learning_rate = learning_rate()
+          beta1 = beta1()
+          beta2 = beta2()
+          epsilon = epsilon()
+
+        opt = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
+            global_step=global_step, learning_rate=learning_rate)
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        opt_variables = opt.variables()
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+        self.assertIsNotNone(beta1_power)
+        self.assertIsNotNone(beta2_power is not None)
+        self.assertNotIn(beta1_power, opt_variables)
+        self.assertNotIn(beta2_power, opt_variables)
+
+        if not context.executing_eagerly():
+          with ops.Graph().as_default():
+            # Shouldn't return non-slot variables from other graphs.
+            self.assertEqual(0, len(opt.variables()))
+          self.evaluate(variables.global_variables_initializer())
+          # Fetch params to validate initial values
+          self.assertAllClose([1.0, 2.0], self.evaluate(var0))
+          self.assertAllClose([3.0, 4.0], self.evaluate(var1))
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          if not context.executing_eagerly():
+            self.evaluate(update)
+            self.assertAllCloseAccordingToType(
+                0.9**(t + 1), self.evaluate(beta1_power))
+            self.assertAllCloseAccordingToType(
+                0.999**(t + 1), self.evaluate(beta2_power))
+          else:
+            if t > 1:
+              opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                  global_step=global_step)
+              beta1_power, beta2_power = opt._get_beta_accumulators()
+              self.assertAllCloseAccordingToType(
+                  0.9**t, self.evaluate(beta1_power))
+              self.assertAllCloseAccordingToType(
+                  0.999**t, self.evaluate(beta2_power))
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
+          self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
+          if use_resource:
+            self.assertEqual("var0_%d/Adam:0" % (i,),
+                             opt.get_slot(var=var0, name="m").name)
+
+  def testBasic(self):
+    with self.cached_session():
+      self.doTestBasic(use_resource=False)
+
+  @test_util.run_in_graph_and_eager_modes(reset_test=True)
+  def testResourceBasic(self):
+    self.doTestBasic(use_resource=True)
+
+  def testBasicCallableParams(self):
+    with context.eager_mode():
+      self.doTestBasic(use_resource=True, use_callable_params=True)
+
+  def testTensorLearningRate(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
+            global_step=global_step, learning_rate=constant_op.constant(0.001))
+        update = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                     global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Run 3 steps of Adam
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          update.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testSharing(self):
+    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+      with self.cached_session():
+        global_step = variables.Variable(array_ops.zeros([], dtypes.int64))
+        # Initialize variables for numpy implementation.
+        m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0
+        var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype)
+        grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype)
+        var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
+        grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype)
+
+        var0 = variables.Variable(var0_np)
+        var1 = variables.Variable(var1_np)
+        grads0 = constant_op.constant(grads0_np)
+        grads1 = constant_op.constant(grads1_np)
+        opt = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(
+            global_step=global_step)
+        update1 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                      global_step=global_step)
+        update2 = opt.apply_gradients(zip([grads0, grads1], [var0, var1]),
+                                      global_step=global_step)
+        variables.global_variables_initializer().run()
+
+        beta1_power, beta2_power = opt._get_beta_accumulators()
+
+        # Fetch params to validate initial values
+        self.assertAllClose([1.0, 2.0], var0.eval())
+        self.assertAllClose([3.0, 4.0], var1.eval())
+
+        # Run 3 steps of intertwined Adam1 and Adam2.
+        for t in range(1, 4):
+          self.assertAllCloseAccordingToType(0.9**t, beta1_power.eval())
+          self.assertAllCloseAccordingToType(0.999**t, beta2_power.eval())
+          if t % 2 == 0:
+            update1.run()
+          else:
+            update2.run()
+
+          var0_np, m0, v0 = adam_update_numpy(var0_np, grads0_np, t, m0, v0)
+          var1_np, m1, v1 = adam_update_numpy(var1_np, grads1_np, t, m1, v1)
+
+          # Validate updated params
+          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(var1_np, var1.eval())
+
+  def testTwoSessions(self):
+    optimizer = lazy_adam_gs_optimizer.LazyAdamGSOptimizer()
+
+    with context.eager_mode():
+      var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+      grads0 = constant_op.constant(np.array([0.1, 0.1]))
+      optimizer.apply_gradients([(grads0, var0)])
+
+    g = ops.Graph()
+    with g.as_default():
+      with self.session(graph=g):
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+        optimizer.apply_gradients([(grads0, var0)])
+
+    gg = ops.Graph()
+    with gg.as_default():
+      with self.session(graph=gg):
+        var0 = variables.Variable(np.array([1.0, 2.0]), name="v0")
+        grads0 = constant_op.constant(np.array([0.1, 0.1]))
+
+        # If the optimizer saves any state not keyed by graph the following line
+        # fails.
+        optimizer.apply_gradients([(grads0, var0)])
+
+  def testSlotsUniqueEager(self):
+    with context.eager_mode():
+      v1 = resource_variable_ops.ResourceVariable(1.)
+      v2 = resource_variable_ops.ResourceVariable(1.)
+      opt = lazy_adam_gs_optimizer.LazyAdamGSOptimizer(1.)
+      opt.minimize(lambda: v1 + v2)
+      # There should be two non-slot variables, and two unique slot variables
+      # for v1 and v2 respectively.
+      self.assertLen(set(opt.variables()), 4)
+
+
+if __name__ == "__main__":
+  test.main()
-- 
GitLab


From f5aed4f8f10fdd3c3910bdb544c882a0dc96ba14 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 01:02:44 -0800
Subject: [PATCH 340/873] compat: Update forward compatibility horizon to
 2018-12-11

PiperOrigin-RevId: 224956744
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index f11e97b211..679dcf9696 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 10)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 11)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 221f4d23c6cffa2ad5fb492a300fafda2a640cd8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 02:10:55 -0800
Subject: [PATCH 341/873] Switch to mounting the current source in the remote
 config docker.

Add workaround for the latest toolchain repository not supporting older bazel
versions; only load it conditionally.

PiperOrigin-RevId: 224965872
---
 WORKSPACE                                     | 35 +++++-----
 tensorflow/opensource_only.files              |  1 +
 tensorflow/version_check.bzl                  | 66 ++++++++++---------
 .../preconfig/generate/archives.bzl           | 25 +++++++
 .../preconfig/generate/generate.bzl           |  4 +-
 .../toolchains/preconfig/generate/generate.sh |  2 +-
 6 files changed, 79 insertions(+), 54 deletions(-)
 create mode 100644 third_party/toolchains/preconfig/generate/archives.bzl

diff --git a/WORKSPACE b/WORKSPACE
index 7cc08e0164..99d368ff91 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -16,30 +16,27 @@ load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
 
 closure_repositories()
 
-http_archive(
-    name = "base_images_docker",
-    sha256 = "e2b1b7254270bb7605e814a9dbf6d1e4ae04a11136ff1714fbfdabe3f87f7cf9",
-    strip_prefix = "base-images-docker-12801524f867e657fbb5d1a74f31618aff181ac6",
-    urls = ["https://github.com/GoogleCloudPlatform/base-images-docker/archive/12801524f867e657fbb5d1a74f31618aff181ac6.tar.gz"],
-)
+load("//third_party/toolchains/preconfig/generate:archives.bzl",
+     "bazel_toolchains_archive")
 
-http_archive(
-    name = "bazel_toolchains",
-    sha256 = "15b5858b1b5541ec44df31b94c3b8672815b31d71215a98398761ea9f4c4eedb",
-    strip_prefix = "bazel-toolchains-6200b238c9c2d137c0d9a7262c80cc71d98e692b",
-    urls = [
-        "https://github.com/bazelbuild/bazel-toolchains/archive/6200b238c9c2d137c0d9a7262c80cc71d98e692b.tar.gz",
-    ],
+bazel_toolchains_archive()
+
+load(
+    "@bazel_toolchains//repositories:repositories.bzl",
+    bazel_toolchains_repositories = "repositories",
 )
 
-http_archive(
-    name = "io_bazel_rules_docker",
-    sha256 = "29d109605e0d6f9c892584f07275b8c9260803bf0c6fcb7de2623b2bedc910bd",
-    strip_prefix = "rules_docker-0.5.1",
-    urls = ["https://github.com/bazelbuild/rules_docker/archive/v0.5.1.tar.gz"],
+bazel_toolchains_repositories()
+
+load(
+    "@io_bazel_rules_docker//container:container.bzl",
+    container_repositories = "repositories",
 )
 
-load("//third_party/toolchains/preconfig/generate:workspace.bzl", "remote_config_workspace")
+container_repositories()
+
+load("//third_party/toolchains/preconfig/generate:workspace.bzl",
+     "remote_config_workspace")
 
 remote_config_workspace()
 
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 347dc9fc6b..418ef1a369 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -49,6 +49,7 @@ tensorflow/third_party/toolchains/preconfig/ubuntu14.04/nccl2/BUILD
 tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
 tensorflow/third_party/toolchains/preconfig/generate/containers.bzl
 tensorflow/third_party/toolchains/preconfig/generate/generate.bzl
+tensorflow/third_party/toolchains/preconfig/generate/archives.bzl
 tensorflow/third_party/toolchains/preconfig/generate/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/dummy_toolchain.bzl
diff --git a/tensorflow/version_check.bzl b/tensorflow/version_check.bzl
index 79e721dab4..74feaa19ff 100644
--- a/tensorflow/version_check.bzl
+++ b/tensorflow/version_check.bzl
@@ -1,48 +1,52 @@
 """ Helpers to check minimum version of bazel."""
 
 def _extract_version_number(bazel_version):
-  """Extracts the semantic version number from a version string
+    """Extracts the semantic version number from a version string
 
-  Args:
-    bazel_version: the version string that begins with the semantic version
-      e.g. "1.2.3rc1 abc1234" where "abc1234" is a commit hash.
+    Args:
+      bazel_version: the version string that begins with the semantic version
+        e.g. "1.2.3rc1 abc1234" where "abc1234" is a commit hash.
 
-  Returns:
-    The semantic version string, like "1.2.3".
-  """
-  for i in range(len(bazel_version)):
-    c = bazel_version[i]
-    if not (c.isdigit() or c == "."):
-      return bazel_version[:i]
-  return bazel_version
+    Returns:
+      The semantic version string, like "1.2.3".
+    """
+    for i in range(len(bazel_version)):
+        c = bazel_version[i]
+        if not (c.isdigit() or c == "."):
+            return bazel_version[:i]
+    return bazel_version
 
 # Parse the bazel version string from `native.bazel_version`.
 # e.g.
 # "0.10.0rc1 abc123d" => (0, 10, 0)
 # "0.3.0" => (0, 3, 0)
 def _parse_bazel_version(bazel_version):
-  """Parses a version string into a 3-tuple of ints
+    """Parses a version string into a 3-tuple of ints
 
-  int tuples can be compared directly using binary operators (<, >).
+    int tuples can be compared directly using binary operators (<, >).
 
-  Args:
-    bazel_version: the Bazel version string
+    Args:
+      bazel_version: the Bazel version string
 
-  Returns:
-    An int 3-tuple of a (major, minor, patch) version.
-  """
+    Returns:
+      An int 3-tuple of a (major, minor, patch) version.
+    """
 
-  version = _extract_version_number(bazel_version)
-  return tuple([int(n) for n in version.split(".")])
+    version = _extract_version_number(bazel_version)
+    return tuple([int(n) for n in version.split(".")])
 
 def check_bazel_version_at_least(minimum_bazel_version):
-  if "bazel_version" not in dir(native):
-    fail("\nCurrent Bazel version is lower than 0.2.1, expected at least %s\n" % minimum_bazel_version)
-  elif not native.bazel_version:
-    print("\nCurrent Bazel is not a release version, cannot check for compatibility.")
-    print("Make sure that you are running at least Bazel %s.\n" % minimum_bazel_version)
-    return
-
-  if _parse_bazel_version(native.bazel_version) < _parse_bazel_version(minimum_bazel_version):
-    fail("\nCurrent Bazel version is {}, expected at least {}\n".format(
-        native.bazel_version, minimum_bazel_version))
+    if "bazel_version" not in dir(native):
+        fail("\nCurrent Bazel version is lower than 0.2.1, expected at least %s\n" % minimum_bazel_version)
+    elif not native.bazel_version:
+        print("\nCurrent Bazel is not a release version, cannot check for compatibility.")
+        print("Make sure that you are running at least Bazel %s.\n" % minimum_bazel_version)
+        return
+
+    if _parse_bazel_version(native.bazel_version) < _parse_bazel_version(minimum_bazel_version):
+        fail("\nCurrent Bazel version is {}, expected at least {}\n".format(
+            native.bazel_version,
+            minimum_bazel_version,
+        ))
+
+parse_bazel_version = _parse_bazel_version
diff --git a/third_party/toolchains/preconfig/generate/archives.bzl b/third_party/toolchains/preconfig/generate/archives.bzl
new file mode 100644
index 0000000000..086b75b62e
--- /dev/null
+++ b/third_party/toolchains/preconfig/generate/archives.bzl
@@ -0,0 +1,25 @@
+load("//tensorflow:version_check.bzl", "parse_bazel_version")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+def bazel_toolchains_archive():
+  if parse_bazel_version(native.bazel_version) >= parse_bazel_version("0.19"):
+    # This version of the toolchains repo is incompatible with older bazel
+    # versions - we can remove this once TensorFlow drops support for bazel
+    # before 0.19.
+    http_archive(
+        name = "bazel_toolchains",
+        sha256 = "41c48a189be489e2d15dec40e0057ea15b95ee5b39cc2a7e6cf663e31432c75e",
+        strip_prefix = "bazel-toolchains-3f8c58fe530fedc446de04673bc1e32985887dea",
+        urls = [
+            "https://github.com/nlopezgi/bazel-toolchains/archive/3f8c58fe530fedc446de04673bc1e32985887dea.tar.gz",
+        ],
+    )
+  else:
+    http_archive(
+        name = "bazel_toolchains",
+        sha256 = "15b5858b1b5541ec44df31b94c3b8672815b31d71215a98398761ea9f4c4eedb",
+        strip_prefix = "bazel-toolchains-6200b238c9c2d137c0d9a7262c80cc71d98e692b",
+        urls = [
+            "https://github.com/bazelbuild/bazel-toolchains/archive/6200b238c9c2d137c0d9a7262c80cc71d98e692b.tar.gz",
+        ],
+    )
diff --git a/third_party/toolchains/preconfig/generate/generate.bzl b/third_party/toolchains/preconfig/generate/generate.bzl
index 2fb3a94cdc..fb2af02a53 100644
--- a/third_party/toolchains/preconfig/generate/generate.bzl
+++ b/third_party/toolchains/preconfig/generate/generate.bzl
@@ -36,9 +36,7 @@ def _tensorflow_rbe_config(name, cuda_version, cudnn_version, python_version, co
             "TF_NCCL_VERSION": "2",
             "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
         },
-        # TODO(klimek): We should use the sources that we currently work on, not
-        # just the latest snapshot of tensorflow that is checked in.
-        git_repo = "https://github.com/tensorflow/tensorflow",
+        mount_project = "$(mount_project)",
         tags = ["manual"],
         incompatible_changes_off = True,
     )
diff --git a/third_party/toolchains/preconfig/generate/generate.sh b/third_party/toolchains/preconfig/generate/generate.sh
index 37c5211278..1f39fcdf6d 100755
--- a/third_party/toolchains/preconfig/generate/generate.sh
+++ b/third_party/toolchains/preconfig/generate/generate.sh
@@ -46,7 +46,7 @@ echo "CUDA: ${CUDA_VERSION}"
 echo "CUDNN: ${CUDNN_VERSION}"
 echo "NCCL: ${NCCL_VERSION}"
 
-bazel build "${PKG}/generate:${TARGET}"
+bazel build --define=mount_project="${PWD}" "${PKG}/generate:${TARGET}"
 cd "${TEMPDIR}"
 tar xvf "${ROOT}/bazel-bin/${PKG}/generate/${TARGET}_outputs.tar"
 
-- 
GitLab


From 62e8e1fa7ed38b76870ed851121d56df524c7287 Mon Sep 17 00:00:00 2001
From: hyunyoung <gusdud1500@gmail.com>
Date: Tue, 11 Dec 2018 21:37:25 +0900
Subject: [PATCH 342/873] fix typo in _InsertQuantOp docstring

---
 tensorflow/contrib/quantize/python/quantize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py
index 21d1b12130..7c973fe597 100644
--- a/tensorflow/contrib/quantize/python/quantize.py
+++ b/tensorflow/contrib/quantize/python/quantize.py
@@ -685,7 +685,7 @@ def _InsertQuantOp(context,
       [1; 2^bits - 1] or wide range [0; 2^bits - 1].
     producer_scope: The restriction of producer scope. If not None, the new op
       will be inserted only when the producer is in this scope.
-    consumer_scope: The restriction of producer scope. If not None, the new op
+    consumer_scope: The restriction of consumer scope. If not None, the new op
       will be inserted only when all the consumers are in this scope.
   Raises:
     ValueError: When producer operation is not directly connected to the
-- 
GitLab


From 0f2e0d1037be7f8423700e1d8dd455ef969cfbec Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <slebedev@google.com>
Date: Tue, 11 Dec 2018 05:20:26 -0800
Subject: [PATCH 343/873] Deprecated `Variable.count_up_to` and
 `tf.count_up_to`.

`count_up_to` is currently implemented as a variable-specific
op with independent implementations for `RefVariable` and
`ResourceVariable`. While it can be implemented in a more
generic way in terms of `Variable.assign_add`, a better solution
is to use `Dataset.range` for counting.

PiperOrigin-RevId: 224984695
---
 tensorflow/python/ops/resource_variable_ops.py | 2 ++
 tensorflow/python/ops/state_ops.py             | 2 ++
 tensorflow/python/ops/variables.py             | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 1066b357b4..dc53fb8e92 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -43,6 +43,7 @@ from tensorflow.python.ops.gen_resource_variable_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import compat
+from tensorflow.python.util.deprecation import deprecated
 
 
 def get_resource_handle_data(graph_op):
@@ -685,6 +686,7 @@ class ResourceVariable(variables.RefVariable):
     raise NotImplementedError(
         "numpy() is only available when eager execution is enabled.")
 
+  @deprecated(None, "Prefer Dataset.range instead.")
   def count_up_to(self, limit):
     """Increments this variable until it reaches `limit`.
 
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index 3ac69c1c20..71aaceee27 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -33,6 +33,7 @@ from tensorflow.python.ops import gen_state_ops
 from tensorflow.python.ops.gen_state_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import deprecation
+from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -224,6 +225,7 @@ def assign(ref, value, validate_shape=None, use_locking=None, name=None):
 
 
 @tf_export(v1=["count_up_to"])
+@deprecated(None, "Prefer Dataset.range instead.")
 def count_up_to(ref, limit, name=None):
   r"""Increments 'ref' until it reaches 'limit'.
 
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index a31ce65518..e231343825 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -837,6 +837,7 @@ class Variable(six.with_metaclass(VariableMetaclass,
     """
     raise NotImplementedError
 
+  @deprecated(None, "Prefer Dataset.range instead.")
   def count_up_to(self, limit):
     """Increments this variable until it reaches `limit`.
 
@@ -2117,6 +2118,7 @@ class RefVariable(VariableV1):
                                               new_axis_mask=new_axis_mask,
                                               shrink_axis_mask=shrink_axis_mask)
 
+  @deprecated(None, "Prefer Dataset.range instead.")
   def count_up_to(self, limit):
     """Increments this variable until it reaches `limit`.
 
-- 
GitLab


From dba64a3f5a7998166b36e4b9287504ed506e9379 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 08:18:58 -0800
Subject: [PATCH 344/873] Reset XRT memory allocations at
 ConfigureDistributedTPU time. Using XRTAllocate to register device memory, a
 user gets back int64 handles which needs to be explicitly deleted in order to
 avoid memory leaks. If a client crashes (or has bugs in its handle release
 logic), a remote TF server will be leaking memory with no possibility of
 recover. Since clients always run a ConfigureDistributedTPU at boot time, we
 clear the XRT allocated resource manager container at that time. Also add a
 new XRTReleaseAllAllocations operation, to clear all the XRT memory on the
 target host.

PiperOrigin-RevId: 225006277
---
 .../compiler/xrt/kernels/xrt_state_ops.cc     |  5 +++
 .../compiler/xrt/kernels/xrt_state_ops.h      | 20 ++++++++++++
 tensorflow/compiler/xrt/ops/xrt_state_ops.cc  |  7 +++++
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 31 +++++++++++++++++++
 tensorflow/compiler/xrt/xrt_state.cc          |  5 +++
 tensorflow/compiler/xrt/xrt_state.h           |  4 +++
 6 files changed, 72 insertions(+)

diff --git a/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc
index 3258286c10..1a5bfac337 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc
+++ b/tensorflow/compiler/xrt/kernels/xrt_state_ops.cc
@@ -120,4 +120,9 @@ REGISTER_KERNEL_BUILDER(Name("XRTReleaseAllocationHandle")
                             .HostMemory("handle"),
                         XRTReleaseAllocationOp<XRTGenericDeviceAccessor>);
 
+REGISTER_KERNEL_BUILDER(Name("XRTReleaseAllAllocations").Device(DEVICE_XLA_GPU),
+                        XRTReleaseAllAllocationsOp<XRTGenericDeviceAccessor>);
+REGISTER_KERNEL_BUILDER(Name("XRTReleaseAllAllocations").Device(DEVICE_XLA_CPU),
+                        XRTReleaseAllAllocationsOp<XRTGenericDeviceAccessor>);
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
index 26a58fa42d..e3b292e790 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
+++ b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
@@ -469,6 +469,26 @@ class XRTReleaseAllocationOp : public OpKernel {
   }
 };
 
+// Op that discards a handle to device memory.
+template <class DeviceAccessor>
+class XRTReleaseAllAllocationsOp : public OpKernel {
+ public:
+  explicit XRTReleaseAllAllocationsOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx) {}
+  ~XRTReleaseAllAllocationsOp() override = default;
+  XRTReleaseAllAllocationsOp(const XRTReleaseAllAllocationsOp&) = delete;
+  XRTReleaseAllAllocationsOp& operator=(const XRTReleaseAllAllocationsOp&) =
+      delete;
+
+  void Compute(OpKernelContext* ctx) override {
+    VLOG(1) << "XRTReleaseAllAllocationsOp::Compute";
+
+    ResourceMgr* rm;
+    OP_REQUIRES_OK(ctx, DeviceAccessor::GetResourceManager(ctx, &rm));
+    OP_REQUIRES_OK(ctx, XRTTupleAllocation::ReleaseAllAllocations(rm));
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_XRT_KERNELS_XRT_STATE_OPS_H_
diff --git a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc
index a3d63106fa..fe6bee0dac 100644
--- a/tensorflow/compiler/xrt/ops/xrt_state_ops.cc
+++ b/tensorflow/compiler/xrt/ops/xrt_state_ops.cc
@@ -133,4 +133,11 @@ used.
 'handle' is the id returned from the Op that produced the on-device allocation.
 )");
 
+REGISTER_OP("XRTReleaseAllAllocations")
+    .SetShapeFn(tensorflow::shape_inference::NoOutputs)
+    .Doc(
+        R"(
+Discards all the XRT allocations. All the client held handles will be invalid.
+)");
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index abaa17e50e..730a227167 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -265,6 +265,37 @@ TEST(RawApiTest, AllocAndRewrite) {
                            &outputs));
 }
 
+TEST(RawApiTest, AllocAndClearAll) {
+  xrt::XLAAllocation alloc;
+  alloc.set_device_ordinal(0);
+  *alloc.mutable_value() =
+      xla::LiteralUtil::CreateR2({{4, 5}, {6, 7}}).ToProto();
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto value =
+      ops::Const(root.WithDevice("/device:CPU:0"), alloc.SerializeAsString());
+  auto handle = ops::XRTAllocate(root, value);
+  TF_ASSERT_OK(root.status());
+
+  tensorflow::ClientSession session(root);
+  std::vector<tensorflow::Tensor> outputs;
+  TF_EXPECT_OK(session.Run({handle}, &outputs));
+  EXPECT_EQ(outputs.size(), 1);
+
+  int64 allocation_handle = outputs[0].scalar<int64>()();
+
+  auto clear_all = ops::XRTReleaseAllAllocations(root);
+
+  outputs.clear();
+  TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(), {},
+                           {clear_all}, &outputs));
+  EXPECT_EQ(outputs.size(), 0);
+
+  auto read_after_clear = ops::XRTReadLiteral(root, Input(allocation_handle));
+  EXPECT_EQ(session.Run({read_after_clear}, &outputs).code(),
+            tensorflow::error::Code::NOT_FOUND);
+}
+
 TEST(RawApiTest, ReadAndWriteState) {
   xrt::XLAAllocation alloc;
   alloc.set_device_ordinal(0);
diff --git a/tensorflow/compiler/xrt/xrt_state.cc b/tensorflow/compiler/xrt/xrt_state.cc
index 31603e044d..343460ff10 100644
--- a/tensorflow/compiler/xrt/xrt_state.cc
+++ b/tensorflow/compiler/xrt/xrt_state.cc
@@ -272,6 +272,11 @@ const se::DeviceMemoryBase& XRTTupleAllocation::root_allocation() {
   return rm->Delete<XRTTupleAllocation>(kTupleContainer, key_string);
 }
 
+/* static */ Status XRTTupleAllocation::ReleaseAllAllocations(ResourceMgr* rm) {
+  VLOG(1) << "Releasing all XRT held device memory";
+  return rm->Cleanup(kTupleContainer);
+}
+
 // Helper typedef to make ShapeTree ForEach helper lambda signatures more
 // readable. They need a type of const T& where in this case T is the
 // following pointer.
diff --git a/tensorflow/compiler/xrt/xrt_state.h b/tensorflow/compiler/xrt/xrt_state.h
index 3664c0cd4e..3e3d502412 100644
--- a/tensorflow/compiler/xrt/xrt_state.h
+++ b/tensorflow/compiler/xrt/xrt_state.h
@@ -129,6 +129,10 @@ class XRTTupleAllocation : public ResourceBase {
   // Deletes the reference in the rm to an allocation interned under key.
   static Status DeleteFromResourceManager(ResourceMgr* rm, int64 key);
 
+  // Releases all the device memory allocated by XRT within the resource
+  // manager.
+  static Status ReleaseAllAllocations(ResourceMgr* rm);
+
   // Adds the allocation to a ResourceMgr and returns the key that will be used
   // to retrieve it. Transfers a reference on *this to rm.
   Status Intern(ResourceMgr* rm, int64* key);
-- 
GitLab


From b7e2c36719dd290308ecb5ff604276fd8c059aae Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 08:54:36 -0800
Subject: [PATCH 345/873] Fix erroneous dimension .value call

PiperOrigin-RevId: 225011350
---
 tensorflow/python/keras/layers/core.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 854774c569..1b406677d9 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -549,7 +549,8 @@ class Flatten(Layer):
       inputs = array_ops.transpose(inputs, perm=permutation)
 
     outputs = array_ops.reshape(
-        inputs, (inputs.shape[0].value or array_ops.shape(inputs)[0], -1))
+        inputs, (tensor_shape.dimension_value(inputs.shape[0])
+                 or array_ops.shape(inputs)[0], -1))
     if not context.executing_eagerly():
       outputs.set_shape(self.compute_output_shape(inputs.get_shape()))
     return outputs
-- 
GitLab


From f7a9503c9ce346ae1a442fe6aa6551d9475a931f Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Tue, 11 Dec 2018 09:05:05 -0800
Subject: [PATCH 346/873] [XLA:GPU] Convert the reduction implementation to the
 kernel mapping scheme.

Convert the implementation of scalar reduction, row reduction and column
reduction to use EmitTiledKernel, which is a more general kernel tiling
implementation that is based on the information defined by an object of
KernelMappingScheme. For scalar reduction and row reduction, the new
implementation should generate the similar optimized code as the old
implementation.

For column reduction, the new implementation is not exactly the same as the old
implementation for a few reasons. First, in the old implementation, routine
IrEmitterUnnested::EmitColumnReduction uses kTileWidth to control the number
of output elements for which each thread computes a partial result and set the
value of kTileWidth to 2. The new implementation is equivalent to the old
implementation with kTileWidth=1. Supporting kTileWidth=1 in the new
implementation will complicate the implementation and our experiment didn't
show much benefit of kTileWidth=2. Second, the old implementation tries to
maximize the hardware thread blocks. The new implementation currently only uses
one hardware thread block to precess one block of tiles because it uses the
hardware block ID as the index for the block of tiles and uses the hardware
thread ID as the index for the elements within a tile.
PiperOrigin-RevId: 225013188
---
 .../xla/service/gpu/ir_emitter_unnested.cc    | 1834 +++++++----------
 .../xla/service/gpu/ir_emitter_unnested.h     |  109 +-
 .../xla/service/gpu/partition_assignment.cc   |   35 +-
 .../xla/service/gpu/partition_assignment.h    |    3 +
 .../xla/service/llvm_ir/kernel_tiling.cc      |   18 +-
 .../xla/service/llvm_ir/kernel_tiling.h       |   19 +-
 6 files changed, 795 insertions(+), 1223 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index c8b5343e61..87d16c0afc 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h"
 
 #include "absl/algorithm/container.h"
-#include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/types/optional.h"
@@ -548,91 +547,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
           // TODO(b/112040122): Support variadic reduce.
           return Unimplemented("Variadic reduce is not supported on GPU");
         }
-        VLOG(3) << "Emitting fused reduction to vector: " << fusion->ToString();
-        std::vector<std::unique_ptr<Thunk>> thunks;
-        absl::Span<HloInstruction* const> output_instructions =
-            root->opcode() == HloOpcode::kTuple
-                ? root->operands()
-                : absl::Span<HloInstruction* const>(&root, 1);
-
-        // For multi-output fusion emit an initializer for each tuple element.
-        // Otherwise it's sufficient to just initialize the single output.
-        HloInstruction* first_reduce = nullptr;
-        for (int i = 0, e = output_instructions.size(); i != e; ++i) {
-          if (output_instructions[i]->opcode() == HloOpcode::kReduce) {
-            TF_ASSIGN_OR_RETURN(
-                std::unique_ptr<Thunk> initializer_thunk,
-                BuildInitializerThunk(fusion, output_instructions[i] == root
-                                                  ? ShapeIndex()
-                                                  : ShapeIndex({i})));
-            thunks.push_back(std::move(initializer_thunk));
-            first_reduce =
-                first_reduce == nullptr ? output_instructions[i] : first_reduce;
-          }
-        }
-        CHECK(first_reduce != nullptr);
-        std::unique_ptr<KernelThunk> kernel_thunk =
-            BuildKernelThunk(fusion, /*implements_whole_instruction=*/false);
-        GpuElementalIrEmitter elemental_emitter(
-            hlo_module_config_, ir_emitter_context_->llvm_module(), &b_,
-            GetNestedComputer());
-        FusedIrEmitter fused_emitter(GetGeneratorForOperandIrArrays(fusion),
-                                     &elemental_emitter);
-        TF_RETURN_IF_ERROR(root->Accept(&fused_emitter));
-
-        // For multi-output fusion CHECK the constraints and feed all the
-        // reduces into a single loop code generator. Single-output reduce
-        // fusion is a special case of that.
-        InlinedVector<llvm_ir::ElementGenerator, 1> input_gens;
-        InlinedVector<llvm_ir::ElementGenerator, 1> init_value_gens;
-        std::vector<std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-            extra_output_gens;
-        InlinedVector<HloComputation*, 1> reducers;
-        InlinedVector<ShapeIndex, 1> reduce_output_shapes;
-        for (int i = 0, e = output_instructions.size(); i != e; ++i) {
-          const HloInstruction* inst = output_instructions[i];
-          ShapeIndex output_shape_index;
-          if (root->opcode() == HloOpcode::kTuple) {
-            output_shape_index = {i};
-          }
-          if (inst->opcode() == HloOpcode::kReduce) {
-            CHECK(IsReductionToVector(*inst))
-                << "Only reductions to vector are supported";
-            // Shapes, layouts and dimensions must be the same for all reduces
-            // inside of this fusion.
-            CHECK(ShapeUtil::Equal(first_reduce->shape(), inst->shape()));
-            CHECK(ShapeUtil::Equal(first_reduce->operand(0)->shape(),
-                                   inst->operand(0)->shape()));
-            CHECK(ShapeUtil::Equal(first_reduce->operand(1)->shape(),
-                                   inst->operand(1)->shape()));
-            CHECK(first_reduce->dimensions() == inst->dimensions());
-            input_gens.push_back(fused_emitter.GetGenerator(inst->operand(0)));
-            init_value_gens.push_back(
-                fused_emitter.GetGenerator(inst->operand(1)));
-            reducers.push_back(inst->to_apply());
-            reduce_output_shapes.push_back(std::move(output_shape_index));
-          } else {
-            // For extra outputs we can relax shape equality to allow different
-            // types (with the same number of elements). Layouts still have to
-            // match.
-            CHECK(ShapeUtil::CompatibleIgnoringElementType(
-                first_reduce->operand(0)->shape(), inst->shape()));
-            CHECK(LayoutUtil::Equal(first_reduce->operand(0)->shape().layout(),
-                                    inst->shape().layout()));
-            extra_output_gens.emplace_back(fused_emitter.GetGenerator(inst),
-                                           std::move(output_shape_index));
-          }
-        }
-        const Shape& input_shape = first_reduce->operand(0)->shape();
-        TF_CHECK_OK(EmitReductionToVector(
-            kernel_thunk.get(), first_reduce, input_shape, input_gens,
-            init_value_gens, first_reduce->dimensions(), reducers,
-            reduce_output_shapes, extra_output_gens));
-        thunks.push_back(std::move(kernel_thunk));
-        std::unique_ptr<SequentialThunk> sequential_thunk =
-            absl::make_unique<SequentialThunk>(std::move(thunks), fusion);
-        AddThunkToThunkSequence(std::move(sequential_thunk));
-        return Status::OK();
+        return EmitReductionToVector(fusion);
       }
       default:
         LOG(FATAL) << "Bad opcode for input fusion: "
@@ -702,13 +617,12 @@ Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) {
 }
 
 Status IrEmitterUnnested::EmitExtraOutputsForReduce(
-    const HloInstruction* reduce, const IrArray::Index& index,
+    const HloInstruction* unnested_hlo, const IrArray::Index& index,
     absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
         extra_output_gens) {
   for (int i = 0; i != extra_output_gens.size(); ++i) {
-    const HloInstruction* output = reduce->parent()->FusionInstruction();
     llvm::Value* extra_output_address =
-        GetIrArray(*output, *output, extra_output_gens[i].second)
+        GetIrArray(*unnested_hlo, *unnested_hlo, extra_output_gens[i].second)
             .EmitArrayElementAddress(index, &b_,
                                      "extra_output_element_address");
     TF_ASSIGN_OR_RETURN(llvm::Value* const extra_output_ir_value,
@@ -718,984 +632,13 @@ Status IrEmitterUnnested::EmitExtraOutputsForReduce(
   return Status::OK();
 }
 
-Status IrEmitterUnnested::EmitReductionToScalar(
-    KernelThunk* kernel_thunk, HloInstruction* reduce, const Shape& input_shape,
-    absl::Span<const llvm_ir::ElementGenerator> input_gens,
-    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-    absl::Span<HloComputation* const> reducers,
-    absl::Span<const ShapeIndex> reduce_output_shapes,
-    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-        extra_output_gens) {
-  // Number of elements processed by a single thread.
-  constexpr int64 kTileSize = 16;
-  int64 num_elems = ShapeUtil::ElementsIn(input_shape);
-
-  // Round up the number of tiles to a multiple of the warp size.  This is
-  // necessary for correctness.  We launch one thread per tile, and if the
-  // number of threads isn't a multiple of the number of the warp size, our
-  // shuffles will read from inactive threads, producing undefined values.
-  int64 num_tiles =
-      RoundUpToNearest(CeilOfRatio(num_elems, kTileSize), kWarpSize);
-
-  Shape tiled_input_shape = ShapeUtil::MakeShapeWithLayout(
-      reduce->shape().element_type(), {num_tiles}, {0});
-  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
-      tiled_input_shape, ir_emitter_context_->device_description());
-
-  llvm::Type* index_ty =
-      GetIndexTypeForKernel(reduce, launch_dimensions.launch_bound(), &b_);
-
-  auto index_typed_constant = [&](uint64 c) -> llvm::Constant* {
-    return llvm::ConstantInt::get(index_ty, c);
-  };
-
-  // Check whether every thread will process a full tile's worth of elements
-  // without reading outside the bounds of the input.  If this is true, we can
-  // skip some bounds checks in the final algorithm.
-  bool all_threads_in_bounds = num_tiles * kTileSize == num_elems;
-
-  // __global__ void full_reduce_kernel() {
-  //   x_in_tiles = threadIdx.x + blockIdx.x * blockDim.x;
-  //   x = x_in_tiles * kTileSize;
-  //
-  //   partial_result = init_value;
-  //   if (all_threads_in_bounds || x + kTileSize <= num_elems) {
-  //     for (i = 0; i < kTileSize; ++i) {
-  //       partial_result = Reducer(partial_result, input[x + i]);
-  //     }
-  //   } else {
-  //     for (i = 0; i < kTileSize; ++i) {
-  //       if (x + i < num_elems) {
-  //         partial_result = Reducer(partial_result, input[x + i]);
-  //       }
-  //     }
-  //   }
-  //   for (i = warpSize / 2; i > 0; i /= 2) {
-  //     partial_result = Reducer(partial_result,
-  //                              __shfl_down(partial_result, i));
-  //   }
-  //   if (lane_id == 0) {
-  //     AtomicReducer(&output[y], partial_result);
-  //   }
-  // }
-  //
-  // // Choose num_blocks and threads_per_block such that:
-  // //
-  // //   num_blocks * threads_per_block =
-  // //     RoundUpToNextMultipleOf(Ceil(num_elems / kTileSize), warpSize),
-  // //
-  // // and threads_per_block is a multiple of warpSize.
-  // reduce_kernel  //
-  auto loop_body_emitter = [=](const IrArray::Index& tile_index) -> Status {
-    const int num_reduces = reducers.size();
-    llvm::Type* element_ir_type =
-        llvm_ir::PrimitiveTypeToIrType(input_shape.element_type(), module_);
-    std::vector<llvm::Value*> partial_reduction_result_addresses;
-    for (int i = 0; i != num_reduces; ++i) {
-      llvm::Value* partial_reduction_result_address =
-          Alloca(element_ir_type, /*ArraySize=*/nullptr,
-                 "partial_reduction_result." + llvm::Twine(i));
-      TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value,
-                          init_value_gens[i](IrArray::Index(index_ty)));
-      Store(init_ir_value, partial_reduction_result_address);
-      partial_reduction_result_addresses.push_back(
-          partial_reduction_result_address);
-    }
-
-    llvm::Value* x_in_tiles = tile_index[0];
-    x_in_tiles = ZExtOrTrunc(x_in_tiles, index_ty);
-
-    // Emit an inner for-loop that reduces the elements in the tile.
-    auto emit_tile_element_loop = [=](bool tile_in_bounds) -> Status {
-      std::unique_ptr<llvm_ir::ForLoop> tile_element_loop =
-          llvm_ir::ForLoop::EmitForLoop(
-              "element_id_in_tile", index_typed_constant(0),
-              index_typed_constant(kTileSize), index_typed_constant(1), &b_);
-
-      // Emit the body of the partial reduction loop.
-      llvm_ir::SetToFirstInsertPoint(tile_element_loop->GetBodyBasicBlock(),
-                                     &b_);
-      llvm::Value* x =
-          NSWAdd(NSWMul(x_in_tiles, index_typed_constant(kTileSize)),
-                 tile_element_loop->GetIndVarValue());
-      // Unless we know the tile is entirely in bounds, we have to emit a
-      // x-in-bounds check before reading from the input.
-      if (!tile_in_bounds) {
-        llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
-            ICmpULT(x, index_typed_constant(num_elems)), "x_in_bounds", &b_);
-
-        // Emit code that reads the input element and accumulates it to
-        // the partial reduction result.
-        llvm_ir::SetToFirstInsertPoint(if_data.true_block, &b_);
-      }
-
-      IrArray::Index input_index(
-          /*linear=*/x, input_shape, &b_);
-      llvm::Value* input_address = Alloca(element_ir_type);
-      for (int i = 0; i != num_reduces; ++i) {
-        TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value,
-                            input_gens[i](input_index));
-        Store(input_ir_value, input_address);
-        TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-            *reducers[i],
-            {partial_reduction_result_addresses[i], input_address},
-            partial_reduction_result_addresses[i]));
-      }
-      return EmitExtraOutputsForReduce(reduce, input_index, extra_output_gens);
-    };
-
-    // x_end = kTileSize + x_in_tiles * kTileSize, i.e., the location that's
-    // immediately beyond the tile.
-    llvm::Value* x_end =
-        NSWAdd(index_typed_constant(kTileSize),
-               NSWMul(x_in_tiles, index_typed_constant(kTileSize)));
-    // The tile is entirely in bound if all_threads_in_bounds or
-    // x_end <= num_elems.
-    llvm::Value* tile_in_bounds =
-        Or(ICmpULE(x_end, index_typed_constant(num_elems)),
-           b_.getInt1(all_threads_in_bounds));
-    llvm_ir::LlvmIfData if_tile_in_bounds_data =
-        llvm_ir::EmitIfThenElse(tile_in_bounds, "tile_in_bounds", &b_);
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.true_block, &b_);
-    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_bounds=*/true));
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.false_block, &b_);
-    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_bounds=*/false));
-
-    // After the if-then-else statement on tile_in_bounds, emit calls to
-    // shfl_down that accumulate the partial reduction results of all threads
-    // from the warp.
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_bounds_data.after_block, &b_);
-    int bit_width = llvm_ir::GetSizeInBits(element_ir_type);
-    // bitcast cannot be applied to aggregate types (even packed ones), so we
-    // instead bitcast addresses of load/store to intN* of the same bit-width.
-    llvm::Type* shuffle_ir_type = element_ir_type->isStructTy()
-                                      ? b_.getIntNTy(bit_width)
-                                      : element_ir_type;
-    for (int shuffle_distance = kWarpSize / 2; shuffle_distance >= 1;
-         shuffle_distance /= 2) {
-      llvm::Value* result_from_other_lane =
-          Alloca(element_ir_type, nullptr, "result_from_other_lane");
-      for (int i = 0; i != num_reduces; ++i) {
-        llvm::Value* partial_reduction_result =
-            Load(BitCast(partial_reduction_result_addresses[i],
-                         shuffle_ir_type->getPointerTo()),
-                 "partial_reduction_result");
-        CHECK_EQ(launch_dimensions.threads_per_block() % kWarpSize, 0)
-            << "Requires block size a multiple of the warp size, otherwise we "
-               "will read undefined elements.";
-        Store(EmitFullWarpShuffleDown(partial_reduction_result,
-                                      b_.getInt32(shuffle_distance), &b_),
-              BitCast(result_from_other_lane, shuffle_ir_type->getPointerTo()));
-        TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-            *reducers[i],
-            {partial_reduction_result_addresses[i], result_from_other_lane},
-            partial_reduction_result_addresses[i]));
-      }
-    }
-
-    const HloInstruction* output =
-        reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce;
-
-    // Emit an atomic operation that accumulates the partial reduction result of
-    // lane 0 (which holds the partially accumulated result for its warp) to the
-    // output element.
-    llvm::Value* lane_id =
-        URem(x_in_tiles, index_typed_constant(kWarpSize), "lane_id");
-    llvm_ir::LlvmIfData if_lane_id_is_zero_data = llvm_ir::EmitIfThenElse(
-        ICmpEQ(lane_id, index_typed_constant(0)), "lane_id_is_zero", &b_);
-    llvm_ir::SetToFirstInsertPoint(if_lane_id_is_zero_data.true_block, &b_);
-
-    for (int i = 0; i != num_reduces; ++i) {
-      llvm::Value* output_address =
-          GetIrArray(*output, *output, reduce_output_shapes[i])
-              .EmitArrayElementAddress(
-                  IrArray::Index(
-                      /*linear=*/b_.getInt64(0),
-                      ShapeUtil::GetSubshape(output->shape(),
-                                             reduce_output_shapes[i]),
-                      &b_),
-                  &b_, "output_element_address");
-      TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation(
-          *reducers[i], output_address, partial_reduction_result_addresses[i]));
-    }
-    return Status::OK();
-  };
-
-  // Emit a parallel loop that iterates through all input tiles, one per thread.
-  UpdateLaunchDimensions(launch_dimensions, kernel_thunk,
-                         ir_emitter_context_->llvm_module());
-  return ParallelLoopEmitter(loop_body_emitter, tiled_input_shape,
-                             launch_dimensions, &b_)
-      .EmitLoop(IrName(reduce), index_ty);
-}
-
-Status IrEmitterUnnested::EmitColumnReduction(
-    KernelThunk* kernel_thunk, int64 height, int64 width,
-    HloInstruction* reduce, const Shape& input_shape,
-    absl::Span<const llvm_ir::ElementGenerator> input_gens,
-    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-    absl::Span<HloComputation* const> reducers,
-    absl::Span<const ShapeIndex> reduce_output_shapes,
-    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-        extra_output_gens) {
-  // Divide the input matrix into tiles of size KxL. For example, when the
-  // input matrix is 4x4, K=2, and L=1 the tiled matrix looks like
-  //
-  //   0123
-  //   0123
-  //   4567
-  //   4567  // Numbers indicate tile IDs.
-  //
-  // Each tile is first partially reduced to a scalar by a thread, and then the
-  // scalar is accumulated to the output vector using atomic operations.
-  //
-  // We choose 128 as the tile size based on empirical evidence. It's big enough
-  // to reduce the amount of atomic adds in the end, maximizing the memory
-  // bandwidth. A tile width of 2 allows for high memory bandwidth utilization
-  // on 16b input data.
-  constexpr int64 kTileHeight = 128;
-  constexpr int64 kTileWidth = 2;
-
-  // If the height is not a multiple of kTileHeight, we pad the bottom of the
-  // input matrix.
-  const int64 height_in_tiles = CeilOfRatio(height, kTileHeight);
-  // If width is not a multiple of kTileWidth the rightmost thread will process
-  // fewer input elements.
-  const int64 width_in_tiles = CeilOfRatio(width, kTileWidth);
-  Shape tiled_input_shape =
-      ShapeUtil::MakeShapeWithLayout(reduce->shape().element_type(),
-                                     {height_in_tiles, width_in_tiles}, {1, 0});
-  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
-      tiled_input_shape, ir_emitter_context_->device_description());
-
-  // TODO(b/110211620): Convert to use i32 index_type when it is possible.
-  llvm::Type* index_ty = b_.getInt64Ty();
-
-  auto index_typed_constant = [&](uint64 c) -> llvm::Constant* {
-    return llvm::ConstantInt::get(index_ty, c);
-  };
-
-  // for (linear_index = threadIdx.x + blockIdx.x * blockDim.x;
-  //      linear_index < height_in_tiles * width_in_tiles;
-  //      linear_index += blockDim.x * gridDim.x) {
-  //   y_in_tiles = linear_index / width_in_tiles;
-  //   x_in_tiles = linear_index % width_in_tiles;
-  //
-  //   partial_results[kTileWidth] = init_values;
-  //   tile_in_y_bounds = height % kTileHeight == 0 ||
-  //       y_in_tiles * kTileHeight + kTileHeight <= height;
-  //   tile_in_x_bounds = width % kTileWidth == 0 ||
-  //       x_in_tiles * kTileWidth + kTileWidth <= width;
-  //   // The implementation handles y and x bound checks separately.
-  //   if (tile_in_y_bounds && tile_in_x_bounds) {
-  //     for (y_offset : range(kTileHeight)) {
-  //       y = y_in_tiles * kTileHeight + y_offset;
-  //       for (x_offset : range(kTileWidth)) {
-  //         x = x_in_tiles * kTileWidth + x_offset;
-  //         partial_result = Reducer(partial_result[x_offset], input[y][x]);
-  //       }
-  //     }
-  //   } else {
-  //     for (y_offset : range(kTileHeight)) {
-  //       y = y_in_tiles * kTileHeight + y_offset;
-  //       for (y_offset : range(kTileHeight)) {
-  //         x = x_in_tiles * kTileWidth + x_offset;
-  //         if (y < height && x < width) {
-  //           partial_result = Reducer(partial_result, input[y][x]);
-  //         }
-  //       }
-  //     }
-  //   }
-  //   for (x_offset : range(kTileWidth)) {
-  //     AtomicReducer(&output[x + x_offset], partial_result[x_offset]);
-  //   }
-  // }
-  auto loop_body_emitter = [=](const IrArray::Index& tile_index) -> Status {
-    const int num_reduces = reducers.size();
-    // Emit the loop body that reduces one tile.
-    llvm::Type* element_ir_type =
-        llvm_ir::PrimitiveTypeToIrType(input_shape.element_type(), module_);
-    std::vector<llvm::Value*> partial_reduction_result_addresses;
-    for (int i = 0; i != num_reduces; ++i) {
-      for (int x_offset = 0; x_offset < kTileWidth; ++x_offset) {
-        llvm::Value* partial_reduction_result_address =
-            Alloca(element_ir_type, /*ArraySize=*/nullptr,
-                   "partial_reduction_result." +
-                       llvm::Twine(i * kTileWidth + x_offset));
-        TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value,
-                            init_value_gens[i](IrArray::Index(index_ty)));
-        Store(init_ir_value, partial_reduction_result_address);
-        partial_reduction_result_addresses.push_back(
-            partial_reduction_result_address);
-      }
-    }
-
-    // Emit an inner for-loop that partially reduces the elements in the given
-    // tile.
-    llvm::Value* y_in_tiles = tile_index[0];
-    llvm::Value* x_in_tiles = tile_index[1];
-
-    y_in_tiles = ZExtOrTrunc(y_in_tiles, index_ty);
-    x_in_tiles = ZExtOrTrunc(x_in_tiles, index_ty);
-
-    auto emit_tile_element_loop = [=](bool tile_in_y_bounds,
-                                      bool tile_in_x_bounds) -> Status {
-      std::unique_ptr<llvm_ir::ForLoop> tile_element_loop =
-          llvm_ir::ForLoop::EmitForLoop(
-              "element_id_in_tile", index_typed_constant(0),
-              index_typed_constant(kTileHeight), index_typed_constant(1), &b_);
-
-      // Emit the body of the partial reduction loop.
-      llvm_ir::SetToFirstInsertPoint(tile_element_loop->GetBodyBasicBlock(),
-                                     &b_);
-      llvm::Value* y =
-          NSWAdd(NSWMul(y_in_tiles, index_typed_constant(kTileHeight)),
-                 tile_element_loop->GetIndVarValue());
-
-      // Unless we know that y is in bounds, we have to emit a check before
-      // reading from the input.
-      if (!tile_in_y_bounds) {
-        llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
-            ICmpULT(y, index_typed_constant(height)), "y_in_bounds", &b_);
-
-        // Emit code that reads the input element and accumulates it to
-        // the partial reduction result.
-        llvm_ir::SetToFirstInsertPoint(if_data.true_block, &b_);
-      }
-      for (int x_offset = 0; x_offset < kTileWidth; ++x_offset) {
-        llvm::Value* x =
-            NSWAdd(NSWMul(x_in_tiles, index_typed_constant(kTileWidth)),
-                   index_typed_constant(x_offset));
-        // Unless we know that x is in bounds, we have to emit a check before
-        // reading from the input.
-        if (!tile_in_x_bounds) {
-          llvm_ir::LlvmIfData if_data = llvm_ir::EmitIfThenElse(
-              ICmpULT(x, index_typed_constant(width)), "x_in_bounds", &b_);
-          llvm_ir::SetToFirstInsertPoint(if_data.true_block, &b_);
-        }
-        llvm::Value* input_address = Alloca(element_ir_type);
-        // {y,x} is an index to input_matrix_shape [height,width]. We need to
-        // convert that to an index to input_shape (the shape of the operand of
-        // "reduce"). This conversion is composed of a transposition from
-        // input_shape to normalized_input_shape and a reshape from
-        // normalized_input_shape to input_matrix_shape.
-        const Shape normalized_input_shape =
-            ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
-                input_shape);
-        auto input_shape_min2maj = LayoutUtil::MinorToMajor(input_shape);
-        const std::vector<int64> transpose_dimension_mapping(
-            input_shape_min2maj.rbegin(), input_shape_min2maj.rend());
-
-        const Shape input_matrix_shape =
-            ShapeUtil::MakeShapeWithDescendingLayout(input_shape.element_type(),
-                                                     {height, width});
-        const IrArray::Index input_matrix_index({y, x}, input_matrix_shape,
-                                                &b_);
-        const IrArray::Index input_index =
-            input_matrix_index
-                .SourceIndexOfReshape(input_matrix_shape,
-                                      normalized_input_shape, &b_)
-                .SourceIndexOfTranspose(normalized_input_shape, input_shape,
-                                        transpose_dimension_mapping, &b_);
-        for (int i = 0; i != num_reduces; ++i) {
-          TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value,
-                              input_gens[i](input_index));
-          Store(input_ir_value, input_address);
-          TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-              *reducers[i],
-              {partial_reduction_result_addresses[i * kTileWidth + x_offset],
-               input_address},
-              partial_reduction_result_addresses[i * kTileWidth + x_offset]));
-          TF_RETURN_IF_ERROR(EmitExtraOutputsForReduce(reduce, input_index,
-                                                       extra_output_gens));
-        }
-      }
-      return Status::OK();
-    };
-
-    // y_end = kTileHeight + y_in_tiles * kTileHeight, i.e., the y location
-    // that's immediately beyond the tile.
-    llvm::Value* y_end =
-        NSWAdd(index_typed_constant(kTileHeight),
-               NSWMul(y_in_tiles, index_typed_constant(kTileHeight)));
-    // x_end = kTileWidth + x_in_tiles * kTileWidth, i.e., the x location
-    // that's immediately beyond the tile.
-    llvm::Value* x_end =
-        NSWAdd(index_typed_constant(kTileWidth),
-               NSWMul(x_in_tiles, index_typed_constant(kTileWidth)));
-    llvm::Value* tile_in_y_bounds =
-        Or(ICmpULE(y_end, index_typed_constant(height)),
-           b_.getInt1(height % kTileHeight == 0));
-    llvm::Value* tile_in_x_bounds =
-        Or(ICmpULE(x_end, index_typed_constant(width)),
-           b_.getInt1(width % kTileWidth == 0));
-    // The tile is in y bounds if "height" is a multiple of kTileHeight or
-    // y_end <= height.
-    llvm_ir::LlvmIfData if_tile_in_y_bounds_data =
-        llvm_ir::EmitIfThenElse(tile_in_y_bounds, "tile_in_y_bounds", &b_);
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_y_bounds_data.true_block, &b_);
-    // The tile is in x bounds if "width" is a multiple of kTileWidth or
-    // x_end <= width.
-    llvm_ir::LlvmIfData if_tile_in_x_bounds_data =
-        llvm_ir::EmitIfThenElse(tile_in_x_bounds, "tile_in_x_bounds", &b_);
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_x_bounds_data.true_block, &b_);
-    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_y_bounds=*/true,
-                                              /*tile_in_x_bounds=*/true));
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_x_bounds_data.false_block, &b_);
-    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_y_bounds=*/true,
-                                              /*tile_in_x_bounds=*/false));
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_y_bounds_data.false_block, &b_);
-    if_tile_in_x_bounds_data =
-        llvm_ir::EmitIfThenElse(tile_in_x_bounds, "tile_in_x_bounds", &b_);
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_x_bounds_data.true_block, &b_);
-    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_y_bounds=*/false,
-                                              /*tile_in_x_bounds=*/true));
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_x_bounds_data.false_block, &b_);
-    TF_RETURN_IF_ERROR(emit_tile_element_loop(/*tile_in_y_bounds=*/false,
-                                              /*tile_in_x_bounds=*/false));
-
-    // After the nested if-then-else statement on tile_in_y_bounds and
-    // tile_in_x_bounds, emit atomic operations to accumulate the partial
-    // reduction result to the output element.
-    llvm_ir::SetToFirstInsertPoint(if_tile_in_y_bounds_data.after_block, &b_);
-    const HloInstruction* output =
-        reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce;
-    for (int i = 0; i != num_reduces; ++i) {
-      for (int x_offset = 0; x_offset < kTileWidth; ++x_offset) {
-        llvm::Value* x =
-            NSWAdd(NSWMul(x_in_tiles, index_typed_constant(kTileWidth)),
-                   index_typed_constant(x_offset));
-        llvm::Value* output_address =
-            GetIrArray(*output, *output, reduce_output_shapes[i])
-                .EmitArrayElementAddress(
-                    IrArray::Index(
-                        x,
-                        ShapeUtil::GetSubshape(output->shape(),
-                                               reduce_output_shapes[i]),
-                        &b_),
-                    &b_, "output_element_address");
-        TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation(
-            *reducers[i], output_address,
-            partial_reduction_result_addresses[i * kTileWidth + x_offset]));
-      }
-    }
-    return Status::OK();
-  };
-
-  // Emit a parallel loop that iterate through all input tiles.
-  UpdateLaunchDimensions(launch_dimensions, kernel_thunk,
-                         ir_emitter_context_->llvm_module());
-  return ParallelLoopEmitter(loop_body_emitter, tiled_input_shape,
-                             launch_dimensions, &b_)
-      .EmitLoop(IrName(reduce), index_ty);
-}
-
-static std::pair<int64, int64> ComputeKernelMappingSchemeForReduction(
-    int64 depth, int64 width, int64 kWarpSize) {
-  constexpr int64 kTargetNumElementsPerThread = 64;
-  int64 x_tile_size = kTargetNumElementsPerThread;
-  int64 z_tile_size = 1;
-
-  // Only tile along the x dimension with tile size kTargetNumElementsPerThread
-  // if doing so doesn't require a slow version of loop with bound check on each
-  // dimension. A more sophisticated heuristics is to enable tile along the
-  // x dimension with tile size kTargetNumElementsPerThread when either width is
-  // a factor of (kWarpSize * kTargetNumElementsPerThread) or width is big
-  // enough so that only a small fraction of the threads execute the slow
-  // version of loop with bound check.
-  if (width % (kWarpSize * kTargetNumElementsPerThread) != 0) {
-    x_tile_size = 8;
-    z_tile_size = 8;
-    while (depth % z_tile_size != 0) {
-      z_tile_size -= 1;
-    }
-  }
-
-  return std::pair<int64, int64>(x_tile_size, z_tile_size);
-}
-
-Status IrEmitterUnnested::EmitRowReduction(
-    KernelThunk* kernel_thunk, int64 depth, int64 height, int64 width,
-    HloInstruction* reduce, const Shape& input_shape,
-    absl::Span<const llvm_ir::ElementGenerator> input_gens,
-    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-    absl::Span<HloComputation* const> reducers,
-    absl::Span<const ShapeIndex> reduce_output_shapes,
-    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-        extra_output_gens) {
-  // A naive algorithm is:
-  // 1. Divide the x dimension of the input tensor into tiles of size 1x1xX.
-  // 2. Partially reduces each tile to a scalar using one thread.
-  // 3. Accumulates that scalar to the output vector using atomic operations.
-  //
-  // for (linear_index = threadIdx.x + blockIdx.x * blockDim.x;
-  //      linear_index < depth * height * width_in_tiles;
-  //      linear_index += blockDim.x * gridDim.x) {
-  //   int x_in_tiles = linear_index % width_in_tiles;
-  //   int y = linear_index / width_in_tiles % height;
-  //   int z = linear_index / (height * width_in_tiles);
-  //   float partial_result = 0;
-  //   for (element_id_in_tile : range(x_tile_size)) {
-  //     int x = x_in_tiles * x_tile_size + element_id_in_tile;
-  //     if (x < width)
-  //       partial_result = reducer(partial_result, input[z][y][x]);
-  //   }
-  //   AtomicReducer(&output[y], partial_result);
-  // }
-  //
-  // Four optimizations are performed.
-  //
-  // 1. To coalesce global memory accesses, dilate the tile with a factor of 32
-  // (i.e. the warp size). For example, suppose the width is 8x32=256. Instead
-  // of making each tile consecutive, we let make tile 0 column
-  // [0,32,64,...,224], tile 1 column [1,33,65,...,225], and so on. This ensures
-  // that threads in a warp access consecutive memory in one iteration (i.e.
-  // coalesced). In the above example, the warp that contains thread 0-31
-  // accesses column 0-31 in the first iteration, and 32-63 in the second
-  // iteration, and so on.
-  //
-  // 2. Partially accumulate partial reduced results computed by threads in the
-  // same warp using shfl_down. Using shfl_down is faster than directly using
-  // atomic operations because shfl_down transfers the data between threads
-  // using shared memory and threads in the same warp run in lock step (thus no
-  // extra synchronization needed). See
-  // https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/
-  // for details. The downside is, to produce correct results when using
-  // shfl_down, we need to guarantee threads in the same warp work on input
-  // elements with the same y, so the number of tiles in each row must be a
-  // multiple of 32.
-  //
-  // 3. Specialize the case that the entire tile is in bounds. When that is
-  // true, we don't need to emit "if(x<width)" inside the loop on
-  // element_id_in_tile, which makes the code more friendly to optimizations
-  // such as LICM.
-  //
-  // 4. When the width is too small and x_tile_size is less than the target
-  //    number of elements per thread and use a small factor of depth as
-  //    z_tile_size to increase the number of elements calculated by each
-  //    partial sum. This can reduce the needed number of dynamic shfl_down and
-  //    atomic operations.
-  //
-  // for (linear_index = threadIdx.x + blockIdx.x * blockDim.x;
-  //      linear_index < depth * height * width_in_tiles;
-  //      linear_index += blockDim.x * gridDim.x) {
-  //   int x_in_tiles = linear_index % width_in_tiles;
-  //   int y = linear_index / width_in_tiles % height;
-  //   int z_in_tiles = linear_index / (height * width_in_tiles);
-  //   int warp_id = x_in_tiles / warpSize;
-  //   int lane_id = x_in_tiles % warpSize;
-  //   float partial_result = 0;
-  //   int x = warp_id * kTileSize * warpSize + lane_id;
-  //   if (width % (x_tile_size * warpSize) == 0 ||
-  //       x + (x_tile_size - 1) * warpSize < width) {
-  //     // The entire x_tile is in bounds.
-  //     for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size;
-  //          ++element_id_in_z_tile) {
-  //       z = z_in_tiles * z_tile_size + element_id_in_z_tile;
-  //       int tx = x;
-  //       for (int element_id_in_x_tile = 0;
-  //            element_id_in_x_tile < x_tile_size;
-  //            ++element_id_in_x_tile, tx += warpSize) {
-  //         partial_result = Reducer(partial_result, input[z][y][tx]);
-  //       }
-  //     }
-  //   } else {
-  //     // The tile is partially in bounds.
-  //     for (int element_id_in_z_tile = 0; element_id_in_z_tile < z_tile_size;
-  //          ++element_id_in_z_tile) {
-  //       z = z_in_tiles * z_tile_size + element_id_in_z_tile;
-  //       int tx = x;
-  //       for (int element_id_in_x_tile = 0; element_id_in_x_tile <
-  //            x_tile_size; ++element_id_in_tile, tx += warpSize) {
-  //         if (tx < width)
-  //           partial_result = Reducer(partial_result, input[z][y][tx]);
-  //       }
-  //     }
-  //   }
-  //   for (shuffle_distance = 16; shuffle_distance > 0; shuffle_distance /= 2)
-  //     partial_result = Reducer(
-  //         partial_result,
-  //         __shfl_down_sync(CUDA_WARP_ALL, partial_result, shuffle_distance));
-  //   if (lane_id == 0)
-  //     AtomicReducer(&output[y], partial_result);
-  // }
-  //
-
-  int64 x_tile_size;
-  int64 z_tile_size;
-  std::tie(x_tile_size, z_tile_size) =
-      ComputeKernelMappingSchemeForReduction(depth, width, kWarpSize);
-
-  // Round the width in tiles up to the nearest multiple of kWarpSize, so that
-  // the use of shfl_down is valid.
-  const int64 width_in_tiles =
-      RoundUpToNearest(CeilOfRatio(width, x_tile_size), kWarpSize);
-  Shape tiled_input_shape = ShapeUtil::MakeShapeWithLayout(
-      reduce->shape().element_type(),
-      {depth / z_tile_size, height, width_in_tiles}, {2, 1, 0});
-  LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
-      tiled_input_shape, ir_emitter_context_->device_description());
-  llvm::Type* index_ty =
-      GetIndexTypeForKernel(reduce, launch_dimensions.launch_bound(), &b_);
-
-  auto index_typed_constant = [&](uint64 c) -> llvm::Constant* {
-    return llvm::ConstantInt::get(index_ty, c);
-  };
-
-  auto loop_body_emitter = [=](const IrArray::Index& tile_index) {
-    const int num_reduces = reducers.size();
-    llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType(
-        input_shape.element_type(), ir_emitter_context_->llvm_module());
-    std::vector<llvm::Value*> partial_reduction_result_addresses;
-    for (int i = 0; i != num_reduces; ++i) {
-      llvm::Value* partial_reduction_result_address =
-          Alloca(element_ir_type, /*ArraySize=*/nullptr,
-                 "partial_reduction_result." + llvm::Twine(i));
-      TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value,
-                          init_value_gens[i](IrArray::Index(index_ty)));
-      Store(init_ir_value, partial_reduction_result_address);
-      partial_reduction_result_addresses.push_back(
-          partial_reduction_result_address);
-    }
-
-    llvm::Value* z_tile = tile_index[0];
-    llvm::Value* y = tile_index[1];
-    llvm::Value* x_tile = tile_index[2];
-
-    x_tile = ZExtOrTrunc(x_tile, index_ty);
-
-    llvm::Value* warp_id =
-        UDiv(x_tile, index_typed_constant(kWarpSize), "warp_id");
-    llvm::Value* lane_id =
-        URem(x_tile, index_typed_constant(kWarpSize), "lane_id");
-
-    // The x-location of the last element in this z-x-tile.
-    // last_x = lane_id + warpSize * (x_tile_size - 1 + warp_id * x_tile_size);
-    llvm::Value* last_x = NSWAdd(
-        lane_id,
-        NSWMul(index_typed_constant(kWarpSize),
-               NSWAdd(index_typed_constant(x_tile_size - 1),
-                      NSWMul(warp_id, index_typed_constant(x_tile_size)))));
-
-    KernelSupportLibrary ksl(
-        &b_,
-        /*unroll_mode=*/xla::llvm_ir::UnrollMode::kFullyUnroll,
-        /*prevent_vectorization=*/false);
-
-    // Emit a for-loop that partially reduces the elements in the given
-    // z-x-tile.
-    auto emit_z_x_tile_element_loop = [&](bool x_tile_in_bounds,
-                                          int64 x_tile_loop_bound) -> Status {
-      auto emit_z_tile_element_loop = [&](llvm::Value* z_indvar) -> Status {
-        llvm::Value* z =
-            NSWAdd(z_indvar, NSWMul(index_typed_constant(z_tile_size), z_tile));
-        TF_RETURN_IF_ERROR(ksl.ForWithStatus(
-            "x_tile",
-            /*start=*/index_typed_constant(0),
-            /*end=*/index_typed_constant(x_tile_loop_bound),
-            /*step=*/1, [&](llvm::Value* x_indvar) -> Status {
-              // x = lane_id +
-              //     warpSize * (element_id_in_x_tile + warp_id * x_tile_size);
-              llvm::Value* x = NSWAdd(
-                  lane_id,
-                  NSWMul(index_typed_constant(kWarpSize),
-                         NSWAdd(x_indvar,
-                                NSWMul(warp_id, llvm::ConstantInt::get(
-                                                    index_ty, x_tile_size)))));
-
-              // Unless we know the x-tile is entirely in bounds, we have to
-              // emit a x-in-bounds check before reading from the input.
-              if (!x_tile_in_bounds) {
-                llvm_ir::LlvmIfData if_x_in_bounds_data =
-                    llvm_ir::EmitIfThenElse(
-                        ICmpULT(x, index_typed_constant(width)), "x_in_bounds",
-                        &b_);
-                // Points b_ to the then-block.
-                llvm_ir::SetToFirstInsertPoint(if_x_in_bounds_data.true_block,
-                                               &b_);
-              }
-
-              // Emit code that reads the input element and accumulates it
-              // to the partial reduction result.
-              llvm::Value* input_address = Alloca(element_ir_type);
-              {
-                // {z,y,x} is an index to input_3d_tensor_shape
-                // [depth,height,width]. We need to convert that to an index
-                // to input_shape (the shape of the operand of "reduce").
-                // This conversion is composed of a transposition from
-                // input_shape to normalized_input_shape and a reshape from
-                // normalized_input_shape to input_3d_tensor_shape.
-                const Shape normalized_input_shape = ShapeUtil::
-                    MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
-                        input_shape);
-                auto input_shape_min2maj =
-                    LayoutUtil::MinorToMajor(input_shape);
-                const std::vector<int64> transpose_dimension_mapping(
-                    input_shape_min2maj.rbegin(), input_shape_min2maj.rend());
-                const Shape input_3d_tensor_shape =
-                    ShapeUtil::MakeShapeWithDescendingLayout(
-                        input_shape.element_type(), {depth, height, width});
-                const IrArray::Index input_3d_tensor_index(
-                    {z, y, x}, input_3d_tensor_shape, &b_);
-                const IrArray::Index input_index =
-                    input_3d_tensor_index
-                        .SourceIndexOfReshape(input_3d_tensor_shape,
-                                              normalized_input_shape, &b_)
-                        .SourceIndexOfTranspose(
-                            normalized_input_shape, input_shape,
-                            transpose_dimension_mapping, &b_);
-
-                for (int i = 0; i != num_reduces; ++i) {
-                  TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value,
-                                      input_gens[i](input_index));
-                  Store(input_ir_value, input_address);
-                  TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-                      *reducers[i],
-                      {partial_reduction_result_addresses[i], input_address},
-                      partial_reduction_result_addresses[i]));
-                }
-                return EmitExtraOutputsForReduce(reduce, input_index,
-                                                 extra_output_gens);
-              }
-            }));
-        return Status::OK();
-      };
-
-      return ksl.ForWithStatus("z_tile",
-                               /*start=*/index_typed_constant(0),
-                               /*end=*/index_typed_constant(z_tile_size),
-                               /*step=*/1, emit_z_tile_element_loop);
-    };
-
-    llvm::Value* tile_in_bounds =
-        Or(b_.getInt1(width % (x_tile_size * kWarpSize) == 0),
-           ICmpULT(last_x, index_typed_constant(width)));
-
-    TF_RETURN_IF_ERROR(ksl.IfWithStatus(
-        tile_in_bounds,
-        /*true_block_generator=*/
-        [&]() -> Status {
-          return emit_z_x_tile_element_loop(/*x_tile_in_bounds=*/true,
-                                            x_tile_size);
-        },
-        /*false_block_generator=*/
-        [&]() -> Status {
-          return emit_z_x_tile_element_loop(
-              /*x_tile_in_bounds=*/false,
-              CeilOfRatio(width % (x_tile_size * kWarpSize), kWarpSize));
-        }));
-
-    // After accumulating the elements of the z_x_tile, emit calls to
-    // shfl_down that accumulate the partial reduction results of all
-    // threads in a warp.
-    int bit_width = llvm_ir::GetSizeInBits(element_ir_type);
-    // bitcast cannot be applied to aggregate types (even packed ones), so we
-    // instead bitcast addresses of load/store to intN* of the same bit-width.
-    llvm::Type* shuffle_ir_type = element_ir_type->isStructTy()
-                                      ? b_.getIntNTy(bit_width)
-                                      : element_ir_type;
-    for (int shuffle_distance = 16; shuffle_distance >= 1;
-         shuffle_distance /= 2) {
-      llvm::Value* result_from_other_lane =
-          Alloca(element_ir_type, nullptr, "result_from_other_lane");
-      for (int i = 0; i != num_reduces; ++i) {
-        llvm::Value* partial_reduction_result =
-            Load(BitCast(partial_reduction_result_addresses[i],
-                         shuffle_ir_type->getPointerTo()),
-                 "partial_reduction_result");
-        CHECK_EQ(launch_dimensions.threads_per_block() % kWarpSize, 0)
-            << "Requires block size a multiple of the warp size, otherwise we "
-               "will read undefined elements.";
-        Store(EmitFullWarpShuffleDown(partial_reduction_result,
-                                      b_.getInt32(shuffle_distance), &b_),
-              BitCast(result_from_other_lane, shuffle_ir_type->getPointerTo()));
-        TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-            *reducers[i],
-            {partial_reduction_result_addresses[i], result_from_other_lane},
-            partial_reduction_result_addresses[i]));
-      }
-    }
-
-    const HloInstruction* output =
-        reduce->IsFused() ? reduce->parent()->FusionInstruction() : reduce;
-
-    // Emit an atomic operation that accumulates the partial reduction result of
-    // lane 0 (which holds the partially accumulated result for its warp) to the
-    // output element.
-    llvm_ir::LlvmIfData if_lane_id_is_zero_data = llvm_ir::EmitIfThenElse(
-        ICmpEQ(lane_id, index_typed_constant(0)), "lane_id_is_zero", &b_);
-    llvm_ir::SetToFirstInsertPoint(if_lane_id_is_zero_data.true_block, &b_);
-    for (int i = 0; i != num_reduces; ++i) {
-      llvm::Value* output_address =
-          GetIrArray(*output, *output, reduce_output_shapes[i])
-              .EmitArrayElementAddress(
-                  IrArray::Index(y,
-                                 ShapeUtil::GetSubshape(
-                                     output->shape(), reduce_output_shapes[i]),
-                                 &b_),
-                  &b_, "output_element_address");
-      // We don't need to emit atomic operations if there is only one tile of
-      // results. 'depth' is the z dimension, 'width' is the x dimension.
-      if (z_tile_size >= depth && x_tile_size >= width) {
-        TF_RETURN_IF_ERROR(EmitCallToNestedComputation(
-            *reducers[i],
-            {output_address, partial_reduction_result_addresses[i]},
-            output_address));
-      } else {
-        TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation(
-            *reducers[i], output_address,
-            partial_reduction_result_addresses[i]));
-      }
-    }
-    return Status::OK();
-  };
-
-  // Emit a parallel loop that iterates through every input tiles.
-  UpdateLaunchDimensions(launch_dimensions, kernel_thunk,
-                         ir_emitter_context_->llvm_module());
-  return ParallelLoopEmitter(loop_body_emitter, tiled_input_shape,
-                             launch_dimensions, &b_)
-      .EmitLoop(IrName(reduce), index_ty);
-}
-
-// Figures out whether `reduce` is a row or column reduction, and which
-// dimensions to reduce, and calls either `EmitRowReduction` or
-// `EmitColumnReduction` as appropriate.
-// Prerequisite: all the dimensions to keep are contiguous in the input layout
-//               and, if `reduce` is fused, the fused subgraph is pure
-//               elementwise.
-Status IrEmitterUnnested::EmitReductionToVector(
-    KernelThunk* kernel_thunk, HloInstruction* reduce, const Shape& input_shape,
-    absl::Span<const llvm_ir::ElementGenerator> input_gens,
-    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-    absl::Span<const int64> dimensions_to_reduce,
-    absl::Span<HloComputation* const> reducers,
-    absl::Span<const ShapeIndex> reduce_output_shapes,
-    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-        extra_output_gens) {
-  // This emission requires "reduce" to have an input layout. It is either set
-  // by LayoutAssignment (for a top-level kReduce) or by InstructionFusion (for
-  // a fused kReduce).
-  CHECK(input_shape.has_layout()) << "LayoutAssignment or InstructionFusion "
-                                     "doesn't set the input layout of "
-                                  << reduce->ToString();
-
-  // Specialize multi-dimensional-array-to-vector reduction.
-  std::vector<int64> input_dims_to_keep;
-  for (int64 input_dim = 0; input_dim < ShapeUtil::Rank(input_shape);
-       ++input_dim) {
-    if (std::find(dimensions_to_reduce.begin(), dimensions_to_reduce.end(),
-                  input_dim) == dimensions_to_reduce.end()) {
-      input_dims_to_keep.push_back(input_dim);
-    }
-  }
-
-  // Sort the dimensions to keep from minor to major, to facilitate checking
-  // whether another dimension is major or minor of them.
-  std::sort(input_dims_to_keep.begin(), input_dims_to_keep.end(),
-            [&input_shape](int64 dim_a, int64 dim_b) {
-              return PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
-                                         dim_a) <
-                     PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
-                                         dim_b);
-            });
-  // Now, if output rank is at least 1, `input_dims_to_keep.front()` is
-  // minormost and `input_dims_to_keep.back()` is majormost.
-
-  // If the dimensions to keep are minormost, emit a column reduction. As all
-  // the dimensions to keep are contiguous, by prerequisite of
-  // `EmitReductionToVector`, we only need to check whether the minormost
-  // dimension of the input is to keep.
-  if (ShapeUtil::IsEffectiveScalar(reduce->shape())) {
-    return EmitReductionToScalar(kernel_thunk, reduce, input_shape, input_gens,
-                                 init_value_gens, reducers,
-                                 reduce_output_shapes, extra_output_gens);
-  } else if (input_dims_to_keep.front() ==
-             LayoutUtil::Minor(input_shape.layout(), 0)) {
-    // Column reduction. Treat the result of "input" as a matrix whose width
-    // is the most minor dimension and height the product of other dimensions,
-    // and treat "reduce" as a column reduction of the input matrix.
-    const int64 width = ShapeUtil::ElementsIn(reduce->shape());
-    // "width" can be zero, so don't do
-    //   height = ShapeUtil::ElementsIn(input_shape) / width;
-    int64 height = 1;
-    for (int64 input_dim = 0; input_dim < ShapeUtil::Rank(input_shape);
-         ++input_dim) {
-      if (!std::count(input_dims_to_keep.begin(), input_dims_to_keep.end(),
-                      input_dim)) {
-        height *= input_shape.dimensions(input_dim);
-      }
-    }
-    return EmitColumnReduction(kernel_thunk, height, width, reduce, input_shape,
-                               input_gens, init_value_gens, reducers,
-                               reduce_output_shapes, extra_output_gens);
-  } else {
-    // Reduce the row dimension of a matrix or reduce dimension 0 and 2 in a
-    // 3D tensor. The size of dimension 1 (the height) is the size of the
-    // dimension to keep, the size of dimension 0 (the depth) is the product
-    // of dimensions that are more major than the dimension to keep, and the
-    // size of dimension 2 (the width) is the product of more minor
-    // dimensions.
-    int64 depth = 1;
-    int64 width = 1;
-    for (int64 input_dim = 0; input_dim < ShapeUtil::Rank(input_shape);
-         ++input_dim) {
-      if (PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
-                              input_dim) >
-          PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
-                              input_dims_to_keep.back())) {
-        depth *= input_shape.dimensions(input_dim);
-      } else if (PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
-                                     input_dim) <
-                 PositionInContainer(LayoutUtil::MinorToMajor(input_shape),
-                                     input_dims_to_keep.front())) {
-        width *= input_shape.dimensions(input_dim);
-      }
-    }
-    const int64 height = ShapeUtil::ElementsIn(reduce->shape());
-    return EmitRowReduction(kernel_thunk, depth, height, width, reduce,
-                            input_shape, input_gens, init_value_gens, reducers,
-                            reduce_output_shapes, extra_output_gens);
-  }
-}
-
 Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) {
   // TODO(b/112040122): Support multi-output reduce.
   if (!ShapeUtil::IsArray(reduce->shape())) {
     return Unimplemented("Multi-output reduce is not supported on GPU");
   }
-  auto input = reduce->operand(0);
-  auto init_value = reduce->operand(1);
-  absl::Span<const int64> dimensions_to_reduce(reduce->dimensions());
-  HloComputation* reducer = reduce->to_apply();
-  // HandleReduce specializes reduction from a multi-dimensional array to a 1D
-  // array. The specialized version requires an initializer thunk that
-  // initializes the output array to the initial value of the reduce.
   if (IsReductionToVector(*reduce)) {
-    TF_ASSIGN_OR_RETURN(std::unique_ptr<Thunk> initializer_thunk,
-                        BuildInitializerThunk(reduce));
-    std::vector<std::unique_ptr<Thunk>> thunks;
-    thunks.push_back(std::move(initializer_thunk));
-    std::unique_ptr<KernelThunk> kernel_thunk =
-        BuildKernelThunk(reduce, /*implements_whole_instruction=*/false);
-
-    TF_CHECK_OK(EmitReductionToVector(
-        kernel_thunk.get(), reduce, input->shape(),
-        {[&](const IrArray::Index& index) {
-          return GetIrArray(*input, *reduce).EmitReadArrayElement(index, &b_);
-        }},
-        {[&](const IrArray::Index& index) {
-          return GetIrArray(*init_value, *reduce)
-              .EmitReadArrayElement(index, &b_);
-        }},
-        dimensions_to_reduce, {reducer}, {{}}, {}));
-
-    thunks.push_back(std::move(kernel_thunk));
-
-    std::unique_ptr<SequentialThunk> sequential_thunk =
-        absl::make_unique<SequentialThunk>(std::move(thunks), reduce);
-    AddThunkToThunkSequence(std::move(sequential_thunk));
-    return Status::OK();
+    return EmitReductionToVector(reduce);
   }
 
   return IrEmitter::HandleReduce(reduce);
@@ -1820,7 +763,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter(
     // Create the inner loop to iterate over the window.
     llvm_ir::ForLoopNest window_loops(IrName(select_and_scatter, "inner"), &b_,
                                       index_type);
-    std::vector<int64> window_size;
+    DimensionVector window_size;
     for (const auto& dim : window.dimensions()) {
       window_size.push_back(dim.size());
       CHECK_GT(dim.size(), 0);
@@ -3193,34 +2136,36 @@ int IrEmitterUnnested::ConstructInputReducedShapeAndCastInputIrArrayToShape(
 
 namespace {
 
-void EmitFullTile(const KernelMappingScheme* mapping_scheme,
-                  const IrArray::Index& tile_origin_index,
-                  llvm::IRBuilder<>* builder, llvm::Value* y, llvm::Value* x,
-                  llvm::Type* index_ty,
-                  const std::function<void(const IrArray::Index&, llvm::Value*,
-                                           llvm::Value*)>& emit_elem_function) {
+void EmitFullElementalTile(
+    const KernelMappingScheme* mapping_scheme,
+    const IrArray::Index& tile_origin_index, const string& loop_name,
+    KernelSupportLibrary* ksl, llvm::IRBuilder<>* builder, llvm::Value* y,
+    llvm::Value* x, llvm::Type* index_ty,
+    const std::function<void(const IrArray::Index&, llvm::Value*,
+                             llvm::Value*)>& emit_elem_function) {
   int64 num_threads_x = mapping_scheme->GetNumberOfThreadsForDimensionX();
   int64 num_threads_y = mapping_scheme->GetNumberOfThreadsForDimensionY();
   int64 tile_size_x = mapping_scheme->GetTileSizeForDimensionX();
   int64 tile_size_y = mapping_scheme->GetTileSizeForDimensionY();
-  for (int64 i = 0; i < tile_size_y; i += num_threads_y) {
-    IrArray::Index source_idx_y =
-        tile_origin_index.AddOffsetToDim(llvm::ConstantInt::get(index_ty, i),
-                                         KernelMappingScheme::DimY, builder);
-    llvm::Value* y_loc =
-        builder->CreateAdd(llvm::ConstantInt::get(index_ty, i), y);
-    for (int64 j = 0; j < tile_size_x; j += num_threads_x) {
-      IrArray::Index source_idx =
-          source_idx_y.AddOffsetToDim(llvm::ConstantInt::get(index_ty, j),
-                                      KernelMappingScheme::DimX, builder);
-      llvm::Value* x_loc =
-          builder->CreateAdd(llvm::ConstantInt::get(index_ty, j), x);
-      emit_elem_function(source_idx, y_loc, x_loc);
-    }
-  }
-}
-
-void EmitPartialTile(
+  ksl->For(loop_name + "_y", /*start=*/llvm::ConstantInt::get(index_ty, 0),
+           /*end=*/llvm::ConstantInt::get(index_ty, tile_size_y),
+           /*step=*/llvm::ConstantInt::get(index_ty, num_threads_y),
+           [&](llvm::Value* y_indvar) {
+             IrArray::Index source_idx_y = tile_origin_index.AddOffsetToDim(
+                 y_indvar, KernelMappingScheme::DimY, builder);
+             llvm::Value* y_loc = builder->CreateAdd(y_indvar, y);
+             for (int64 j = 0; j < tile_size_x; j += num_threads_x) {
+               IrArray::Index source_idx = source_idx_y.AddOffsetToDim(
+                   llvm::ConstantInt::get(index_ty, j),
+                   KernelMappingScheme::DimX, builder);
+               llvm::Value* x_loc =
+                   builder->CreateAdd(llvm::ConstantInt::get(index_ty, j), x);
+               emit_elem_function(source_idx, y_loc, x_loc);
+             }
+           });
+}
+
+void EmitPartialElementalTile(
     const KernelMappingScheme* mapping_scheme,
     const IrArray::Index& tile_origin_index, const string& loop_name,
     KernelSupportLibrary* ksl, llvm::IRBuilder<>* builder, llvm::Value* y,
@@ -3240,7 +2185,8 @@ void EmitPartialTile(
         builder->CreateAdd(llvm::ConstantInt::get(index_ty, j), x);
 
     ksl->If(
-        "x_in_tile", builder->CreateICmpULT(x_loc, tile_width), [&] {
+        loop_name + "_x_in_tile", builder->CreateICmpULT(x_loc, tile_width),
+        [&] {
           // tile_height_bound =
           //   ceil(tile_height / num_threads_y) * num_threads_y
           llvm::Value* ceiling_of_ratio = builder->CreateUDiv(
@@ -3257,8 +2203,8 @@ void EmitPartialTile(
               [&](llvm::Value* y_indvar) {
                 llvm::Value* y_loc = builder->CreateAdd(y_indvar, y);
                 ksl->If(
-                    "y_in_tile", builder->CreateICmpULT(y_loc, tile_height),
-                    [&] {
+                    loop_name + "_y_in_tile",
+                    builder->CreateICmpULT(y_loc, tile_height), [&] {
                       emit_elem_function(
                           source_idx.AddOffsetToDim(
                               y_indvar, KernelMappingScheme::DimY, builder),
@@ -3289,20 +2235,20 @@ void EmitTiledElementalCodeWithBoundsCheck(
   llvm::Type* index_ty = tile_width->getType();
 
   ksl->If(
-      "full_tile",
+      loop_name + "_full_tile",
       builder->CreateAnd(
           builder->CreateICmpEQ(llvm::ConstantInt::get(index_ty, tile_size_x),
                                 tile_width),
           builder->CreateICmpEQ(llvm::ConstantInt::get(index_ty, tile_size_y),
                                 tile_height)),
       [&] {
-        EmitFullTile(mapping_scheme, tile_origin_index, builder, y, x, index_ty,
-                     emit_elem_function);
+        EmitFullElementalTile(mapping_scheme, tile_origin_index, loop_name, ksl,
+                              builder, y, x, index_ty, emit_elem_function);
       },
       [&] {
-        EmitPartialTile(mapping_scheme, tile_origin_index, loop_name, ksl,
-                        builder, y, x, tile_height, tile_width, index_ty,
-                        emit_elem_function);
+        EmitPartialElementalTile(mapping_scheme, tile_origin_index, loop_name,
+                                 ksl, builder, y, x, tile_height, tile_width,
+                                 index_ty, emit_elem_function);
       });
 }
 }  // namespace
@@ -3380,7 +2326,395 @@ void IrEmitterUnnested::EmitTileElementForFusion(
   }
 }
 
-// Emits a block of tiles, given a function object to emit one tile.
+// Information to support the code generation for a tiled reduction kernel.
+using AddressVector = InlinedVector<llvm::AllocaInst*, 1>;
+class ReductionCodegenInfo : public IrEmitterUnnested::KernelCodegenInfo {
+ public:
+  explicit ReductionCodegenInfo(llvm_ir::KernelMappingScheme* mapping_scheme,
+                                bool is_row_reduction)
+      : KernelCodegenInfo(mapping_scheme),
+        current_output_linear_index_address_(nullptr),
+        current_output_inbound_address_(nullptr),
+        is_row_reduction_(is_row_reduction) {}
+
+  void SetCurrentOutputLinearIndexAddress(llvm::AllocaInst* a) {
+    current_output_linear_index_address_ = a;
+  }
+  // Returns the address of the memory that stores the linear index of the
+  // current output. Since we are processing reduction to contiguous physical
+  // dimensions, this linear index is the linear index of the 1D output array.
+  llvm::AllocaInst* GetCurrentOutputLinearIndexAddress() const {
+    return current_output_linear_index_address_;
+  }
+
+  void SetCurrentOutputInboundAddress(llvm::AllocaInst* a) {
+    current_output_inbound_address_ = a;
+  }
+
+  llvm::AllocaInst* GetCurrentOutputInboundAddress() const {
+    return current_output_inbound_address_;
+  }
+
+  AddressVector* GetMutablePartialResultAddresses() {
+    return &partial_result_addresses_;
+  }
+  absl::Span<llvm::AllocaInst* const> GetPartialResultAddresses() const {
+    return partial_result_addresses_;
+  }
+
+  AddressVector* GetMutableReductionInputAddresses() {
+    return &reduction_input_addresses_;
+  }
+  absl::Span<llvm::AllocaInst* const> GetReductionInputAddresses() const {
+    return reduction_input_addresses_;
+  }
+
+  InlinedVector<HloComputation*, 1>* GetMutableReducers() { return &reducers_; }
+  const InlinedVector<HloComputation*, 1>& GetReducers() const {
+    return reducers_;
+  }
+  int GetNumberOfReduces() const { return reducers_.size(); }
+
+  InlinedVector<ShapeIndex, 1>* GetMutableReductionOutputShapeIndices() {
+    return &reduction_output_shape_indices_;
+  }
+  absl::Span<const ShapeIndex> GetReductionOutputShapeIndices() const {
+    return reduction_output_shape_indices_;
+  }
+
+  bool IsRowReduction() const { return is_row_reduction_; }
+
+  // Return the dimension that is being reduced between DimX and DimY.
+  int GetReducedDimensionEnum() const {
+    return IsRowReduction() ? llvm_ir::KernelMappingScheme::DimX
+                            : llvm_ir::KernelMappingScheme::DimY;
+  }
+
+  // Return the dimension that is being ketp between DimX and DimY.
+  int GetKeptDimensionEnum() const {
+    return IsRowReduction() ? llvm_ir::KernelMappingScheme::DimY
+                            : llvm_ir::KernelMappingScheme::DimX;
+  }
+
+ private:
+  AddressVector partial_result_addresses_;
+  AddressVector reduction_input_addresses_;
+  InlinedVector<HloComputation*, 1> reducers_;
+  InlinedVector<ShapeIndex, 1> reduction_output_shape_indices_;
+  llvm::AllocaInst* current_output_linear_index_address_;
+  llvm::AllocaInst* current_output_inbound_address_;
+  bool is_row_reduction_;
+};
+
+namespace {
+// Returns a group of instructions that generate the output for the kernel
+// containing the given HLO instruction. The result may be an unnested kReduce
+// HLO, a nested kReduce HLO of a kInput fusion, or the operands of the tuple
+// for a multiple output fusion.
+absl::Span<HloInstruction* const> GetOutputInstructions(
+    HloInstruction* const* reduce_or_tuple_pointer) {
+  HloOpcode opcode = (*reduce_or_tuple_pointer)->opcode();
+  CHECK(opcode == HloOpcode::kReduce || opcode == HloOpcode::kTuple);
+  return opcode == HloOpcode::kTuple
+             ? (*reduce_or_tuple_pointer)->operands()
+             : absl::Span<HloInstruction* const>(reduce_or_tuple_pointer, 1);
+}
+
+const HloInstruction* GetFirstReduceInstruction(
+    absl::Span<HloInstruction* const> instructions) {
+  auto first_reduce_iter =
+      absl::c_find_if(instructions, [](const HloInstruction* inst) {
+        return inst->opcode() == HloOpcode::kReduce;
+      });
+  CHECK_NE(first_reduce_iter, instructions.end());
+  return *first_reduce_iter;
+}
+
+};  // namespace
+
+void IrEmitterUnnested::EmitPrologueForOneReduction(
+    HloInstruction* unnested_hlo, HloInstruction* reduce_inst, int reduce_idx,
+    KernelCodegenInfo* kernel_info, GpuElementalIrEmitter* elemental_emitter,
+    ShapeIndex output_shape_index) {
+  ReductionCodegenInfo* reduction_info =
+      static_cast<ReductionCodegenInfo*>(kernel_info);
+
+  InlinedVector<HloComputation*, 1>* reducers =
+      reduction_info->GetMutableReducers();
+  CHECK(IsReductionToVector(*reduce_inst));
+  reducers->push_back(reduce_inst->to_apply());
+
+  InlinedVector<ShapeIndex, 1>* reduction_output_shape_indices =
+      reduction_info->GetMutableReductionOutputShapeIndices();
+  reduction_output_shape_indices->push_back(std::move(output_shape_index));
+
+  AddressVector* reduction_input_addresses =
+      reduction_info->GetMutableReductionInputAddresses();
+  llvm::Type* element_type = llvm_ir::PrimitiveTypeToIrType(
+      reduce_inst->shape().element_type(), ir_emitter_context_->llvm_module());
+  llvm::AllocaInst* reduction_input_address = Alloca(element_type);
+  reduction_input_addresses->push_back(reduction_input_address);
+
+  AddressVector* partial_result_addresses =
+      reduction_info->GetMutablePartialResultAddresses();
+  llvm::AllocaInst* partial_result_address =
+      Alloca(element_type, /*ArraySize=*/nullptr,
+             "partial_reduction_result." + llvm::Twine(reduce_idx));
+  partial_result_addresses->push_back(partial_result_address);
+
+  // Initialize the partial result with the initial value of the reduction.
+  llvm::Value* init_ir_value;
+  if (unnested_hlo->opcode() == HloOpcode::kFusion) {
+    HloInstruction* init_value_operand = reduce_inst->mutable_operand(1);
+    FusedIrEmitter fused_emitter(GetGeneratorForOperandIrArrays(unnested_hlo),
+                                 elemental_emitter);
+
+    TF_CHECK_OK(init_value_operand->Accept(&fused_emitter));
+    init_ir_value =
+        fused_emitter
+            .GetGenerator(init_value_operand)(IrArray::Index(b_.getInt32Ty()))
+            .ValueOrDie();
+  } else {
+    const HloInstruction* init_value = unnested_hlo->operand(1);
+    init_ir_value =
+        GetIrArray(*init_value, *unnested_hlo)
+            .EmitReadArrayElement(IrArray::Index(b_.getInt32Ty()), &b_);
+  }
+
+  Store(init_ir_value, partial_result_address);
+}
+
+void IrEmitterUnnested::EmitPrologueForReduction(
+    HloInstruction* unnested_hlo, KernelCodegenInfo* kernel_info) {
+  VLOG(10) << "Emit prologue for reduction " << unnested_hlo->ToString();
+  // Find the unnested kReduce or the tuple that contains a list of kReduce.
+  HloInstruction* reduce_or_tuple = unnested_hlo->opcode() == HloOpcode::kFusion
+                                        ? unnested_hlo->fused_expression_root()
+                                        : unnested_hlo;
+  absl::Span<HloInstruction* const> output_instructions =
+      GetOutputInstructions(&reduce_or_tuple);
+  ReductionCodegenInfo* reduction_info =
+      static_cast<ReductionCodegenInfo*>(kernel_info);
+  GpuElementalIrEmitter elemental_emitter(hlo_module_config_,
+                                          ir_emitter_context_->llvm_module(),
+                                          &b_, GetNestedComputer());
+  const HloInstruction* first_reduce = nullptr;
+  for (int i = 0, e = output_instructions.size(); i != e; ++i) {
+    if (output_instructions[i]->opcode() != HloOpcode::kReduce) {
+      continue;
+    }
+    HloInstruction* reduce_inst = output_instructions[i];
+    if (first_reduce == nullptr) {
+      first_reduce = reduce_inst;
+    } else {
+      CHECK(first_reduce->dimensions() == reduce_inst->dimensions());
+    }
+    ShapeIndex output_shape_index;
+    if (reduce_or_tuple->opcode() == HloOpcode::kTuple) {
+      output_shape_index = {i};
+    }
+
+    EmitPrologueForOneReduction(unnested_hlo, reduce_inst, i, kernel_info,
+                                &elemental_emitter,
+                                std::move(output_shape_index));
+  }
+
+  // Allocate stack storage to store the current output linear index and record
+  // the address of the storage.
+  reduction_info->SetCurrentOutputLinearIndexAddress(
+      Alloca(reduction_info->GetIndexType()));
+
+  if (!reduction_info->IsRowReduction()) {
+    llvm::Type* bool_ty = b_.getInt1Ty();
+    llvm::AllocaInst* output_inbound_addr = Alloca(bool_ty);
+    Store(llvm::ConstantInt::get(bool_ty, 0), output_inbound_addr);
+    reduction_info->SetCurrentOutputInboundAddress(output_inbound_addr);
+  }
+}
+
+void IrEmitterUnnested::EmitFullWarpShuffleDownLoopForAllReduces(
+    absl::Span<HloComputation* const> reducers,
+    absl::Span<llvm::AllocaInst* const> partial_result_addresses) {
+  for (int distance = 16; distance >= 1; distance /= 2) {
+    for (int i = 0; i != reducers.size(); ++i) {
+      llvm::Type* element_type =
+          partial_result_addresses[i]->getType()->getElementType();
+      int bit_width = llvm_ir::GetSizeInBits(element_type);
+      llvm::Value* result_from_other_lane = Alloca(
+          element_type, nullptr, "result_from_other_lane" + llvm::Twine(i));
+      // Bitcast cannot be applied to aggregate types (even packed ones), so
+      // we bitcast addresses of load/store to intN* of the same bit-width.
+      llvm::Type* shuffled_value_type =
+          element_type->isStructTy() ? b_.getIntNTy(bit_width) : element_type;
+      auto convert_pointer_for_shuffle = [&](llvm::Value* ptr) {
+        return BitCast(ptr, shuffled_value_type->getPointerTo());
+      };
+      llvm::Value* partial_result =
+          Load(convert_pointer_for_shuffle(partial_result_addresses[i]),
+               "partial_reduction_result");
+      Store(EmitFullWarpShuffleDown(partial_result, b_.getInt32(distance), &b_),
+            convert_pointer_for_shuffle(result_from_other_lane));
+      TF_CHECK_OK(EmitCallToNestedComputation(
+          *reducers[i], {partial_result_addresses[i], result_from_other_lane},
+          partial_result_addresses[i]));
+    }
+  }
+}
+
+void IrEmitterUnnested::EmitEpilogueForReduction(
+    HloInstruction* unnested_hlo, KernelCodegenInfo* kernel_info) {
+  ReductionCodegenInfo* reduction_info =
+      static_cast<ReductionCodegenInfo*>(kernel_info);
+  int num_reduces = reduction_info->GetNumberOfReduces();
+  absl::Span<llvm::AllocaInst* const> partial_result_addresses =
+      reduction_info->GetPartialResultAddresses();
+  const InlinedVector<HloComputation*, 1>& reducers =
+      reduction_info->GetReducers();
+  absl::Span<const ShapeIndex> reduction_output_shape_indices =
+      reduction_info->GetReductionOutputShapeIndices();
+
+  if (reduction_info->IsRowReduction()) {
+    EmitFullWarpShuffleDownLoopForAllReduces(reducers,
+                                             partial_result_addresses);
+    llvm::Value* lane_id = reduction_info->GetLaneId();
+    llvm_ir::LlvmIfData if_lane_id_is_zero_data = llvm_ir::EmitIfThenElse(
+        ICmpEQ(lane_id, llvm::ConstantInt::get(lane_id->getType(), 0)),
+        "lane_id_is_zero", &b_);
+    llvm_ir::SetToFirstInsertPoint(if_lane_id_is_zero_data.true_block, &b_);
+  } else {
+    llvm::Value* output_inbound_addr =
+        reduction_info->GetCurrentOutputInboundAddress();
+    llvm::Value* output_inbound = Load(output_inbound_addr);
+    llvm_ir::LlvmIfData if_output_inbound_data = llvm_ir::EmitIfThenElse(
+        ICmpEQ(output_inbound,
+               llvm::ConstantInt::get(output_inbound->getType(), 1)),
+        "output_inbound", &b_);
+    llvm_ir::SetToFirstInsertPoint(if_output_inbound_data.true_block, &b_);
+  }
+
+  // Emit an atomic operation that accumulates the partial reduction to the
+  // output element. For row reduction, this is only for lane 0 due to the
+  // if-statement emitted above.
+  for (int i = 0; i != num_reduces; ++i) {
+    IrArray::Index element_index(
+        /*linear=*/Load(reduction_info->GetCurrentOutputLinearIndexAddress(),
+                        "output_linear_addr"),
+        ShapeUtil::GetSubshape(unnested_hlo->shape(),
+                               reduction_output_shape_indices[i]),
+        &b_);
+    llvm::Value* output_address =
+        GetIrArray(*unnested_hlo, *unnested_hlo,
+                   reduction_output_shape_indices[i])
+            .EmitArrayElementAddress(element_index, &b_,
+                                     "output_element_address");
+    // Do not emit atomic operations if each element in the reduction result is
+    // computed by one block, that is the dimension being reduced has only one
+    // block.
+    const llvm_ir::KernelMappingScheme* mapping_scheme =
+        reduction_info->GetKernelMappingScheme();
+    if (mapping_scheme->GetTileBlockSizeForDimension(
+            llvm_ir::KernelMappingScheme::DimZ) == 1 &&
+        mapping_scheme->GetTileBlockSizeForDimension(
+            reduction_info->GetReducedDimensionEnum()) == 1) {
+      TF_CHECK_OK(EmitCallToNestedComputation(
+          *reducers[i], {output_address, partial_result_addresses[i]},
+          output_address));
+    } else {
+      TF_CHECK_OK(EmitAtomicOperationForNestedComputation(
+          *reducers[i], output_address, partial_result_addresses[i]));
+    }
+  }
+}
+
+void IrEmitterUnnested::EmitTileElementForReduction(
+    HloInstruction* unnested_hlo, const llvm_ir::IrArray::Index& index,
+    const KernelCodegenInfo* kernel_info, llvm::Value* y_loc,
+    llvm::Value* x_loc) {
+  VLOG(10) << "Emit tile element for reduce " << unnested_hlo->ToString();
+  HloInstruction* reduce_or_tuple = unnested_hlo->opcode() == HloOpcode::kFusion
+                                        ? unnested_hlo->fused_expression_root()
+                                        : unnested_hlo;
+  llvm_ir::TiledParameterInfo* tiled_param_info =
+      kernel_info->GetTiledParameterInfo();
+  tiled_param_info->set_y(y_loc);
+  tiled_param_info->set_x(x_loc);
+
+  // Record the linear address for the current reduction.
+  const ReductionCodegenInfo* reduction_info =
+      dynamic_cast<const ReductionCodegenInfo*>(kernel_info);
+  Store(index[reduction_info->GetKeptDimensionEnum()],
+        reduction_info->GetCurrentOutputLinearIndexAddress());
+  if (!reduction_info->IsRowReduction()) {
+    llvm::Type* bool_ty = b_.getInt1Ty();
+    llvm::AllocaInst* output_inbound_addr =
+        reduction_info->GetCurrentOutputInboundAddress();
+    Store(llvm::ConstantInt::get(bool_ty, 1), output_inbound_addr);
+  }
+
+  InlinedVector<llvm_ir::ElementGenerator, 1> input_gens;
+  std::vector<std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+      extra_output_gens;
+  GpuElementalIrEmitter elem_emitter(hlo_module_config_, module_, &b_,
+                                     GetNestedComputer());
+  FusedIrEmitter fused_emitter(GetGeneratorForOperandIrArrays(unnested_hlo),
+                               &elem_emitter);
+  absl::Span<HloInstruction* const> output_instructions =
+      GetOutputInstructions(&reduce_or_tuple);
+  // Construct the ElementGenerator for each reduction and extra output in the
+  // the group of output instructions.
+  if (unnested_hlo->opcode() == HloOpcode::kFusion) {
+    fused_emitter.SetTiledParameterInfo(tiled_param_info);
+    TF_CHECK_OK(unnested_hlo->fused_expression_root()->Accept(&fused_emitter));
+
+    for (int i = 0, e = output_instructions.size(); i != e; ++i) {
+      const HloInstruction* inst = output_instructions[i];
+      ShapeIndex output_shape_index;
+      if (reduce_or_tuple->opcode() == HloOpcode::kTuple) {
+        output_shape_index = {i};
+      }
+      if (inst->opcode() == HloOpcode::kReduce) {
+        input_gens.push_back(fused_emitter.GetGenerator(inst->operand(0)));
+      } else {
+        extra_output_gens.emplace_back(fused_emitter.GetGenerator(inst),
+                                       std::move(output_shape_index));
+      }
+    }
+  } else {
+    input_gens.push_back([&](const IrArray::Index& index) {
+      return GetIrArray(*unnested_hlo->operand(0), *unnested_hlo)
+          .EmitReadArrayElement(index, &b_);
+    });
+  }
+
+  IrArray::Index input_index =
+      reduction_info->GetKernelMappingScheme()->GetUnnormalizedIndex(
+          index,
+          GetFirstReduceInstruction(output_instructions)->operand(0)->shape());
+  absl::Span<llvm::AllocaInst* const> partial_reduction_result_addresses =
+      reduction_info->GetPartialResultAddresses();
+  absl::Span<llvm::AllocaInst* const> reduction_input_addresses =
+      reduction_info->GetReductionInputAddresses();
+  const InlinedVector<HloComputation*, 1>& reducers =
+      reduction_info->GetReducers();
+
+  // Emit code to generate the input and perform the reduction computation for
+  // each reduction instruction.
+  for (int i = 0; i != reducers.size(); ++i) {
+    llvm::Value* const input_ir_value = input_gens[i](input_index).ValueOrDie();
+    Store(input_ir_value, reduction_input_addresses[i]);
+    TF_CHECK_OK(EmitCallToNestedComputation(
+        *reducers[i],
+        {partial_reduction_result_addresses[i], reduction_input_addresses[i]},
+        partial_reduction_result_addresses[i]));
+  }
+
+  // Emit code to generate the output for the non-reduction instructions in the
+  // fusion, if any.
+  TF_CHECK_OK(
+      EmitExtraOutputsForReduce(unnested_hlo, input_index, extra_output_gens));
+}
+
+// Emits a kernel for the hlo instruction using the given tiling scheme.
 void IrEmitterUnnested::EmitBlock(const TileGenerator& emit_one_tile,
                                   const KernelCodegenInfo* kernel_info,
                                   KernelSupportLibrary& ksl,
@@ -3506,11 +2840,22 @@ LaunchDimensions IrEmitterUnnested::EmitKernel(
             << llvm_ir::DumpToString(*param_shmem_buffers[id]);
   }
 
-  CHECK_EQ(mapping_scheme->GetThreadsPerTile() % kWarpSize, 0);
-  LaunchDimensions launch_dimensions = LaunchDimensions(
-      mapping_scheme->GetNumberOfBlocks(), mapping_scheme->GetThreadsPerTile());
-  llvm::Type* index_ty = GetIndexTypeForKernel(
-      unnested_hlo, launch_dimensions.launch_bound(), &b_);
+  const ReductionCodegenInfo* reduction_info =
+      dynamic_cast<const ReductionCodegenInfo*>(kernel_info);
+  bool is_column_reduction =
+      (reduction_info && !reduction_info->IsRowReduction());
+
+  LaunchDimensions launch_dimensions =
+      LaunchDimensions(mapping_scheme->GetNumberOfBlocks(),
+                       mapping_scheme->GetThreadsPerBlock());
+
+  // TODO(b/110211620): Enable int32 index type for column reduction.
+  llvm::Type* index_ty =
+      is_column_reduction
+          ? b_.getInt64Ty()
+          : GetIndexTypeForKernel(unnested_hlo,
+                                  launch_dimensions.launch_bound(), &b_);
+
   auto index_typed_constant = [&](uint64 c) -> llvm::Constant* {
     return llvm::ConstantInt::get(index_ty, c);
   };
@@ -3520,7 +2865,7 @@ LaunchDimensions IrEmitterUnnested::EmitKernel(
   // but we do it at the beginning in the hopes of reducing register pressure,
   // since we touch threadIdx.x and blockIdx.x at the beginning of the kernel
   // *anyway*.
-  if (unnested_hlo->IsMultiOutputFusion()) {
+  if (!reduction_info && unnested_hlo->IsMultiOutputFusion()) {
     KernelSupportLibrary{&b_}.If(
         "emit_mof_tuple", IsBlock0Thread0(&b_), [&] {
           llvm_ir::EmitTuple(GetIrArray(*unnested_hlo, *unnested_hlo),
@@ -3549,6 +2894,7 @@ LaunchDimensions IrEmitterUnnested::EmitKernel(
   kernel_info->SetLaneId(
       mapping_scheme->GetNumberOfThreadsForDimensionX() == kWarpSize ? x
                                                                      : nullptr);
+  kernel_info->SetIndexType(index_ty);
 
   KernelSupportLibrary ksl(&b_, llvm_ir::UnrollMode::kDefaultUnroll);
   // Curry a few parameters to EmitTiledElementalCodeWithBoundsCheck.
@@ -3573,29 +2919,31 @@ LaunchDimensions IrEmitterUnnested::EmitKernel(
         input_tile_origin.AddOffsetToDim(x, KernelMappingScheme::DimX, &b_)
             .AddOffsetToDim(y, KernelMappingScheme::DimY, &b_);
 
-    // Copy input parameter values to shared memory buffers:
-    // tile[y, x] = input[index]
-    // Note that tile_width and tile_height are flipped here because we are
-    // reading a transposed tile.
-    emit_tiled_elemental_code_with_bounds_check(
-        input_index, "input", output_tile_bounds[2], output_tile_bounds[1],
-        [&](const IrArray::Index& index, llvm::Value* y_loc,
-            llvm::Value* x_loc) {
-          for (int64 id : tiled_param_ids) {
-            IrArray& input_in_logical_shape = param_in_reduced_shape_arrays[id];
-            llvm::Value* shmem_buffer = param_shmem_buffers[id];
-            // TODO(jlebar): Add AA metadata to this store.  Tile buffers are
-            // global variables, so LLVM can't infer much about it.
-            Store(input_in_logical_shape.EmitReadArrayElement(index, &b_,
-                                                              "input_element"),
-                  GEP(shmem_buffer, {index_typed_constant(0), y_loc, x_loc}));
-          }
-        });
-
     // If shared memory transpose is needed, wait for all threads to reach this
     // point, lest we copy a value from tile to output before the other thread
     // copies it from input to tile. This is `__syncthreads` in CUDA.
     if (!tiled_param_ids.empty()) {
+      // Copy input parameter values to shared memory buffers:
+      // tile[y, x] = input[index]
+      // Note that tile_width and tile_height are flipped here because we are
+      // reading a transposed tile.
+      emit_tiled_elemental_code_with_bounds_check(
+          input_index, "input", output_tile_bounds[2], output_tile_bounds[1],
+          [&](const IrArray::Index& index, llvm::Value* y_loc,
+              llvm::Value* x_loc) {
+            for (int64 id : tiled_param_ids) {
+              IrArray& input_in_logical_shape =
+                  param_in_reduced_shape_arrays[id];
+              llvm::Value* shmem_buffer = param_shmem_buffers[id];
+              // TODO(jlebar): Add AA metadata to this store.  Tile buffers are
+              // global variables, so LLVM can't infer much about it.
+              Store(input_in_logical_shape.EmitReadArrayElement(
+                        index, &b_, "input_element"),
+                    GEP(shmem_buffer, {index_typed_constant(0), y_loc, x_loc}));
+            }
+          });
+
+      // Wait for all threads to reach this point using `__syncthreads` in CUDA.
       llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::nvvm_barrier0, {}, {}, &b_);
     }
 
@@ -3615,6 +2963,7 @@ LaunchDimensions IrEmitterUnnested::EmitKernel(
           kernel_generator.GetTileElementGenerator()(unnested_hlo, index,
                                                      kernel_info, y_loc, x_loc);
         });
+
     // If a tile block contains multiple tiles and shared memory buffers are
     // used, we need to wait for all threads to finish using the shared memory
     // buffer for the current tile before we move on to process the next tile
@@ -3810,6 +3159,249 @@ bool IrEmitterUnnested::CheckAndEmitHloWithTile021(HloInstruction* hlo) {
   return true;
 }
 
+namespace {
+// Checks that the outputs of a fusion with reduction are consistent.
+Status AreFusedReductionOutputsConsistent(
+    absl::Span<HloInstruction* const> output_instructions,
+    const HloInstruction* first_reduce) {
+  for (const HloInstruction* inst : output_instructions) {
+    if (inst->opcode() == HloOpcode::kReduce) {
+      // Shapes, layouts and dimensions must be the same for all reduces
+      // inside of this fusion.
+      TF_RET_CHECK(ShapeUtil::Equal(first_reduce->shape(), inst->shape()));
+      TF_RET_CHECK(ShapeUtil::Equal(first_reduce->operand(0)->shape(),
+                                    inst->operand(0)->shape()));
+      TF_RET_CHECK(ShapeUtil::Equal(first_reduce->operand(1)->shape(),
+                                    inst->operand(1)->shape()));
+      TF_RET_CHECK(first_reduce->dimensions() == inst->dimensions());
+    } else {
+      // For extra outputs we can relax shape equality to allow different
+      // types (with the same number of elements). Layouts still have to
+      // match.
+      TF_RET_CHECK(ShapeUtil::CompatibleIgnoringElementType(
+          first_reduce->operand(0)->shape(), inst->shape()));
+      TF_RET_CHECK(LayoutUtil::Equal(first_reduce->operand(0)->shape().layout(),
+                                     inst->shape().layout()));
+    }
+  }
+  return Status::OK();
+}
+
+// Finds the dimensions to keep for the reduction, sorts and returns the
+// dimensions from minor to major.
+DimensionVector GetDimensionsToKeepMinorToMajor(
+    const Shape& input_shape, absl::Span<const int64> dims_to_reduce) {
+  DimensionVector input_dims(ShapeUtil::Rank(input_shape), 0);
+  absl::c_iota(input_dims, 0);
+  DimensionVector input_dims_to_keep;
+  for (int input_dim : input_dims) {
+    auto it = absl::c_find_if(dims_to_reduce, [&](int64 dim_to_reduce) {
+      return dim_to_reduce == input_dim;
+    });
+    if (it == dims_to_reduce.end()) {
+      input_dims_to_keep.push_back(input_dim);
+    }
+  }
+
+  // Sort the dimensions to keep from minor to major.
+  absl::c_sort(input_dims_to_keep, [&input_shape](int64 dim_a, int64 dim_b) {
+    return PositionInContainer(LayoutUtil::MinorToMajor(input_shape), dim_a) <
+           PositionInContainer(LayoutUtil::MinorToMajor(input_shape), dim_b);
+  });
+
+  VLOG(10) << "dims to keep minor to major"
+           << absl::StrJoin(input_dims_to_keep, ",");
+  return input_dims_to_keep;
+}
+
+// Given the input shape and dimensions to reduce for the reduction to vector,
+// returns <num_reduced_major, num_kept, num_reduced_minor>:
+// num_kept: the number of elements in the contiguous dimensions to keep.
+// num_reduced_major: the number of elements in the dimensions to reduce that
+//   are more major than the dimensions to keep.
+// num_reduced_minor: the number of elements in the dimensions to reduce that
+//   are more minor than the dimensions to kept.
+std::tuple<int64, int64, int64> GetReductionToVectorDimensions(
+    const Shape& input_shape, absl::Span<const int64> dims_to_reduce) {
+  DimensionVector input_dims_to_keep_minor_to_major =
+      GetDimensionsToKeepMinorToMajor(input_shape, dims_to_reduce);
+  CHECK(LayoutUtil::AreDimensionsConsecutive(
+      input_shape.layout(), input_dims_to_keep_minor_to_major));
+  int num_reduced_major = 1, num_kept = 1, num_reduced_minor = 1;
+  if (input_dims_to_keep_minor_to_major.empty()) {
+    return std::make_tuple(num_reduced_major, num_kept, num_reduced_minor);
+  }
+  DimensionVector input_dims(ShapeUtil::Rank(input_shape), 0);
+  absl::c_iota(input_dims, 0);
+  absl::Span<const int64> minor_to_major =
+      LayoutUtil::MinorToMajor(input_shape);
+  for (int input_dim : input_dims) {
+    int64 curr_dim_size = input_shape.dimensions(input_dim);
+    if (PositionInContainer(minor_to_major, input_dim) >
+        PositionInContainer(minor_to_major,
+                            input_dims_to_keep_minor_to_major.back())) {
+      num_reduced_major *= curr_dim_size;
+    } else if (PositionInContainer(minor_to_major, input_dim) <
+               PositionInContainer(minor_to_major,
+                                   input_dims_to_keep_minor_to_major.front())) {
+      num_reduced_minor *= curr_dim_size;
+    } else {
+      num_kept *= curr_dim_size;
+    }
+  }
+
+  return std::make_tuple(num_reduced_major, num_kept, num_reduced_minor);
+}
+
+}  // namespace
+
+std::tuple<KernelMappingScheme, bool>
+IrEmitterUnnested::ComputeMappingSchemeAndReductionKind(
+    const HloInstruction* first_reduce) {
+  int64 depth = 1;
+  int64 height = 1;
+  int64 width = 1;
+  bool is_row_reduction = true;
+  int64 tile_size_x = 1;
+  int64 tile_size_y = 1;
+  int64 block_size_z = 1;
+  int64 num_threads_x = 1;
+  int64 num_threads_y = 1;
+  const Shape& input_shape = first_reduce->operand(0)->shape();
+  int64 num_input_elems = ShapeUtil::ElementsIn(input_shape);
+  int64 num_output_elems = ShapeUtil::ElementsIn(first_reduce->shape());
+  int64 num_reduced_major, num_kept, num_reduced_minor;
+  std::tie(num_reduced_major, num_kept, num_reduced_minor) =
+      GetReductionToVectorDimensions(input_shape, first_reduce->dimensions());
+  CHECK_EQ(num_output_elems, num_kept);
+
+  if (num_kept == 1) {
+    // Scalar reduction is a special row reduction with depth = height = 1.
+    width = num_input_elems;
+    tile_size_x = kWarpSize * 16;
+    num_threads_x = kWarpSize;
+  } else if (num_reduced_minor == 1) {
+    // Column reduction reduces inputs with dimension [height, width], where
+    // width is the minor dimension, to dimension [width].
+    height = num_reduced_major;
+    width = num_kept;
+    is_row_reduction = false;
+    // Column reduction without transpose doesn't require communication among
+    // threads processing elements in the same tile. The current implementation
+    // only support the use of on hardware thread block to process one block of
+    // tiles in the KernelMappingScheme. We try to maximize the values of
+    // num_threads_x and tile_size_x to allow a bigger hardware thread block.
+    int64 hw_threads_per_block_limit =
+        ThreadsPerBlockLimit(ir_emitter_context_->device_description());
+    tile_size_x = std::min(hw_threads_per_block_limit, num_kept);
+    num_threads_x = tile_size_x;
+    int64 kNumElementsPerPartialSum = 128;
+    tile_size_y = kNumElementsPerPartialSum;
+  } else {
+    // Row reduction reduces inputs with dimension [depth, height, width],
+    // where width is the most minor dimension, to dimension [height] .
+    depth = num_reduced_major;
+    height = num_kept;
+    width = num_reduced_minor;
+    num_threads_x = kWarpSize;
+    if (width % (kWarpSize * 64) == 0) {
+      tile_size_x = kWarpSize * 64;
+    } else {
+      tile_size_x = kWarpSize * 8;
+      block_size_z = 8;
+      while (depth % block_size_z != 0) {
+        block_size_z -= 1;
+      }
+    }
+  }
+  DCHECK_EQ(depth * height * width, num_input_elems);
+  VLOG(10) << "is_row_reduction " << is_row_reduction << depth << " " << height
+           << " " << width;
+
+  DimensionVector dims_in_elem{depth, height, width};
+  DimensionVector req_block_sizes{block_size_z, 1, 1};
+  llvm_ir::KernelMappingScheme mapping_scheme(
+      dims_in_elem, tile_size_y, tile_size_x, req_block_sizes, num_threads_y,
+      num_threads_x, &b_);
+  return std::make_tuple(mapping_scheme, is_row_reduction);
+}
+
+Status IrEmitterUnnested::EmitReductionToVector(HloInstruction* unnested_hlo) {
+  VLOG(10) << "Emitting reduction to vector " << unnested_hlo->ToString();
+
+  HloInstruction* reduce_or_tuple = unnested_hlo->opcode() == HloOpcode::kFusion
+                                        ? unnested_hlo->fused_expression_root()
+                                        : unnested_hlo;
+  absl::Span<HloInstruction* const> output_instructions =
+      GetOutputInstructions(&reduce_or_tuple);
+  const HloInstruction* first_reduce =
+      GetFirstReduceInstruction(output_instructions);
+
+  if (output_instructions.size() > 1) {
+    TF_RETURN_IF_ERROR(
+        AreFusedReductionOutputsConsistent(output_instructions, first_reduce));
+  }
+
+  // Build an initializer thunk to initialize each reduction output.
+  std::vector<std::unique_ptr<Thunk>> thunks;
+  for (int i = 0, e = output_instructions.size(); i != e; ++i) {
+    if (output_instructions[i]->opcode() != HloOpcode::kReduce) {
+      continue;
+    }
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<Thunk> initializer_thunk,
+        BuildInitializerThunk(unnested_hlo,
+                              (output_instructions[i] == reduce_or_tuple)
+                                  ? ShapeIndex()
+                                  : ShapeIndex({i})));
+    thunks.push_back(std::move(initializer_thunk));
+  }
+
+  // Build a kernel thunk to compute all the outputs.
+  std::unique_ptr<KernelThunk> kernel_thunk =
+      BuildKernelThunk(unnested_hlo, /*implements_whole_instruction=*/false);
+
+  const Shape& input_shape = first_reduce->operand(0)->shape();
+  // The layout of a reduction input is either set by LayoutAssignment for
+  // unnested kReduce or by InstructionFusion for fused kReduce.
+  CHECK(input_shape.has_layout()) << "LayoutAssignment or InstructionFusion "
+                                     "doesn't set the input layout of "
+                                  << first_reduce->ToString();
+
+  bool is_row_reduction;
+  llvm_ir::KernelMappingScheme mapping_scheme;
+  std::tie(mapping_scheme, is_row_reduction) =
+      ComputeMappingSchemeAndReductionKind(first_reduce);
+  ReductionCodegenInfo reduction_info(&mapping_scheme, is_row_reduction);
+  KernelCodeGenerator kernel_generator(
+      /*tile_element_generator=*/
+      [&](HloInstruction* hlo, const llvm_ir::IrArray::Index& index,
+          const KernelCodegenInfo* kernel_info, llvm::Value* y_loc,
+          llvm::Value* x_loc) {
+        EmitTileElementForReduction(hlo, index, kernel_info, y_loc, x_loc);
+      },
+      /*block_prologue_generator=*/
+      [&](HloInstruction* hlo, KernelCodegenInfo* kernel_info) {
+        EmitPrologueForReduction(hlo, kernel_info);
+      },
+      /*block_epilogue_generator*/
+      [&](HloInstruction* hlo, KernelCodegenInfo* kernel_info) {
+        EmitEpilogueForReduction(hlo, kernel_info);
+      });
+
+  LaunchDimensions launch_dimensions =
+      EmitKernel(unnested_hlo, {}, kernel_generator, &reduction_info);
+  UpdateLaunchDimensions(launch_dimensions, kernel_thunk.get(),
+                         ir_emitter_context_->llvm_module());
+
+  thunks.push_back(std::move(kernel_thunk));
+  std::unique_ptr<SequentialThunk> sequential_thunk =
+      absl::make_unique<SequentialThunk>(std::move(thunks), unnested_hlo);
+  AddThunkToThunkSequence(std::move(sequential_thunk));
+
+  return Status::OK();
+}
+
 Status IrEmitterUnnested::EmitConstantGlobals() {
   for (const BufferAllocation& allocation :
        ir_emitter_context_->buffer_assignment().Allocations()) {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index e09ed657a8..1ebea7ab48 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_UNNESTED_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_IR_EMITTER_UNNESTED_H_
 
+#include "absl/container/inlined_vector.h"
 #include "tensorflow/compiler/xla/service/gpu/ir_emitter.h"
 #include "tensorflow/compiler/xla/service/gpu/sequential_thunk.h"
 #include "tensorflow/compiler/xla/service/gpu/thunk.h"
@@ -68,9 +69,12 @@ class IrEmitterUnnested : public IrEmitter {
     explicit KernelCodegenInfo(llvm_ir::KernelMappingScheme* mapping_scheme)
         : mapping_scheme_(mapping_scheme),
           tiled_param_info_(nullptr),
-          lane_id_(nullptr) {}
+          lane_id_(nullptr),
+          index_ty_(nullptr) {}
+    virtual ~KernelCodegenInfo() {}
 
     void SetLaneId(llvm::Value* v) { lane_id_ = v; }
+    void SetIndexType(llvm::Type* t) { index_ty_ = t; }
     void SetTiledParamInfo(llvm_ir::TiledParameterInfo* tiled_param_info) {
       CHECK_EQ(tiled_param_info_, nullptr);
       tiled_param_info_ = tiled_param_info;
@@ -83,11 +87,13 @@ class IrEmitterUnnested : public IrEmitter {
     llvm_ir::TiledParameterInfo* GetTiledParameterInfo() const {
       return tiled_param_info_;
     }
+    llvm::Type* GetIndexType() const { return index_ty_; }
 
    private:
     llvm_ir::KernelMappingScheme* mapping_scheme_;
     llvm_ir::TiledParameterInfo* tiled_param_info_;
     llvm::Value* lane_id_;
+    llvm::Type* index_ty_;
   };
 
   // A function object to prepare for the code generation for a tile block.
@@ -200,82 +206,19 @@ class IrEmitterUnnested : public IrEmitter {
 
   // Helper for writing extra outputs from inside a reduce kernel.
   Status EmitExtraOutputsForReduce(
-      const HloInstruction* reduce, const llvm_ir::IrArray::Index& index,
+      const HloInstruction* unnested_hlo, const llvm_ir::IrArray::Index& index,
       absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
-  // EmitColumnReduction and EmitRowReduction emit code for column and row
-  // reduction of a matrix and/or 3D tensor. Row and column reduction have
-  // different memory access pattern, so for performance their implementations
-  // are significantly different.
+  // Generates code for reduction to contiguous dimensions.
   //
-  // Emits code that reduces a matrix of shape [height x width] to a vector of
-  // [width]. Other parameters have the same meaning as those of
-  // `EmitReductionToVector`. Note that input shape might not be
-  // [height x width], but can be bitcast to [height x width] with "height"
-  // being the major dimension.
-  Status EmitColumnReduction(
-      KernelThunk* kernel_thunk, int64 height, int64 width,
-      HloInstruction* reduce, const Shape& input_shape,
-      absl::Span<const llvm_ir::ElementGenerator> input_gens,
-      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-      absl::Span<HloComputation* const> reducers,
-      absl::Span<const ShapeIndex> reduce_output_shapes,
-      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-          extra_output_gens);
-
-  // Emits code that reduces a 3D tensor of shape [depth x height x width] to a
-  // vector of shape [height]. Other parameters have the same meaning as those
-  // of `EmitReductionToVector`. Note that input shape might not be
-  // [depth x height x width], but can be bitcast to [depth x height x width]
-  // with "depth" being the most major dimension.
-  Status EmitRowReduction(
-      KernelThunk* kernel_thunk, int64 depth, int64 height, int64 width,
-      HloInstruction* reduce, const Shape& input_shape,
-      absl::Span<const llvm_ir::ElementGenerator> input_gens,
-      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-      absl::Span<HloComputation* const> reducers,
-      absl::Span<const ShapeIndex> reduce_output_shapes,
-      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-          extra_output_gens);
-
-  // Emits code that reduces a tensor of arbitrary rank to a scalar.
-  Status EmitReductionToScalar(
-      KernelThunk* kernel_thunk, HloInstruction* reduce,
-      const Shape& input_shape,
-      absl::Span<const llvm_ir::ElementGenerator> input_gens,
-      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-      absl::Span<HloComputation* const> reducers,
-      absl::Span<const ShapeIndex> reduce_output_shapes,
-      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-          extra_output_gens);
+  // Prerequisite: `IsReductionToVector(*unnested_hlo)`
+  Status EmitReductionToVector(HloInstruction* unnested_hlo);
 
-  // Figures out whether `reduce` is a row or column reduction, and which
-  // dimensions to reduce, and calls either `EmitRowReduction` or
-  // `EmitColumnReduction` as appropriate. `input_shape` is the shape of the
-  // input array, which is the operand of the Reduce instruction if unfused or
-  // of the Fusion instruction if fused. `input_gen` and `init_value_gen`
-  // generate elements of the input and the initial value. Other parameters mean
-  // the same as for `HandleReduce`.
-  //
-  // Multiple reduces can be emitted in the same loop, assuming they have the
-  // same input and output shapes, and the same reduce dimensions.
-  //
-  // extra_output_gens can contain extra generators for intermediate outputs.
-  // These must have the same shape as the reduce input as they are computed
-  // when the reduce inputs are being read.
-  //
-  // Prerequisite: `IsReductionToVector(*reduce)`
-  Status EmitReductionToVector(
-      KernelThunk* kernel_thunk, HloInstruction* reduce,
-      const Shape& input_shape,
-      absl::Span<const llvm_ir::ElementGenerator> input_gens,
-      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
-      absl::Span<const int64> dimensions_to_reduce,
-      absl::Span<HloComputation* const> reducers,
-      absl::Span<const ShapeIndex> reduce_output_shapes,
-      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
-          extra_output_gens);
+  // Computes the KernelMappingScheme for the reduce HLO and indicates whether
+  // the reduction is a row reduction.
+  std::tuple<llvm_ir::KernelMappingScheme, bool>
+  ComputeMappingSchemeAndReductionKind(const HloInstruction* first_reduce);
 
   // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in
   // the process. `scatter` may be fused, scatter indices are taken from
@@ -314,6 +257,28 @@ class IrEmitterUnnested : public IrEmitter {
                                 const llvm_ir::IrArray::Index& index,
                                 const KernelCodegenInfo* kernel_info,
                                 llvm::Value* y_loc, llvm::Value* x_loc);
+  // Emits code to process a tensor element in a tile for the given input hlo
+  // that is either a unnested kReduce or a kInput fusion.
+  void EmitTileElementForReduction(HloInstruction* unnested_hlo,
+                                   const llvm_ir::IrArray::Index& index,
+                                   const KernelCodegenInfo* kernel_info,
+                                   llvm::Value* y_loc, llvm::Value* x_loc);
+  // Prepares for the code generation for a tile block of a reduction kernel.
+  void EmitPrologueForReduction(HloInstruction* unnested_hlo,
+                                KernelCodegenInfo* kernel_info);
+  void EmitPrologueForOneReduction(HloInstruction* unnested_hlo,
+                                   HloInstruction* reduce_inst, int reduce_idx,
+                                   KernelCodegenInfo* kernel_info,
+                                   GpuElementalIrEmitter* elemental_emitter,
+                                   ShapeIndex output_shape_index);
+  // Wraps up the code generation for a tile block of a reduction kernel.
+  void EmitEpilogueForReduction(HloInstruction* unnested_hlo,
+                                KernelCodegenInfo* kernel_info);
+  // For each reducer, emits the shuffle-down loop to accumulate the partial
+  // result to the global result.
+  void EmitFullWarpShuffleDownLoopForAllReduces(
+      absl::Span<HloComputation* const> reducers,
+      absl::Span<llvm::AllocaInst* const> partial_result_addresses);
 
   // Generates the IrArray for each input of an hlo and returns a vector that
   // constains such IrArrays.
diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
index 375f68a159..bfed4f5230 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc
@@ -39,6 +39,25 @@ std::ostream& operator<<(std::ostream& out,
   return out;
 }
 
+int64 ThreadsPerBlockLimit(const se::DeviceDescription& device_desc) {
+  int64 threads_per_block = device_desc.threads_per_block_limit();
+  if (threads_per_block == 0) {
+    static std::atomic<int64> log_count{0};
+    if (log_count.fetch_add(1) < 8) {
+      LOG(WARNING) << "Attempting to calculate launch dimensions for GPU "
+                      "without full information about its capabilities.  "
+                      "StreamExecutor's PopulateDeviceDescription should be "
+                      "updated for this device.";
+    }
+    threads_per_block = device_desc.threads_per_warp();
+    if (threads_per_block == 0) {
+      // Fall back to *something* if we can't even get num threads per warp.
+      threads_per_block = 32;
+    }
+  }
+  return threads_per_block;
+}
+
 // Calculates the launch dimensions used to invoke `hlo`.
 LaunchDimensions CalculateLaunchDimensions(
     const Shape& shape, const se::DeviceDescription& device_desc,
@@ -62,21 +81,7 @@ LaunchDimensions CalculateLaunchDimensions(
   //
   //   <num threads per block> * <max blocks per core> = <max threads per core>
 
-  int64 threads_per_block = device_desc.threads_per_block_limit();
-  if (threads_per_block == 0) {
-    static std::atomic<int64> log_count{0};
-    if (log_count.fetch_add(1) < 8) {
-      LOG(WARNING) << "Attempting to calculate launch dimensions for GPU "
-                      "without full information about its capabilities.  "
-                      "StreamExecutor's PopulateDeviceDescription should be "
-                      "updated for this device.";
-    }
-    threads_per_block = device_desc.threads_per_warp();
-    if (threads_per_block == 0) {
-      // Fall back to *something* if we can't even get num threads per warp.
-      threads_per_block = 32;
-    }
-  }
+  int64 threads_per_block = ThreadsPerBlockLimit(device_desc);
 
   if (num_elements < threads_per_block) {
     threads_per_block = num_elements;
diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.h b/tensorflow/compiler/xla/service/gpu/partition_assignment.h
index 02471129e0..eb41dcccb9 100644
--- a/tensorflow/compiler/xla/service/gpu/partition_assignment.h
+++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.h
@@ -57,6 +57,9 @@ class LaunchDimensions {
 std::ostream& operator<<(std::ostream& out,
                          const LaunchDimensions& launch_dims);
 
+// Returns the maximum number of threads per block allowed by the device.
+int64 ThreadsPerBlockLimit(const se::DeviceDescription& device_desc);
+
 LaunchDimensions CalculateLaunchDimensions(
     const Shape& shape, const se::DeviceDescription& device_desc,
     int unroll_factor = 1);
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc
index c26711e526..cebbc42901 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc
@@ -120,7 +120,7 @@ KernelMappingScheme::KernelMappingScheme(
     absl::Span<const int64> req_block_sizes, int64 num_threads_y,
     int64 num_threads_x, llvm::IRBuilder<>* b)
     : b_(b),
-      dims_in_elems_(dims_in_elems),
+      dims_in_elems_(dims_in_elems.begin(), dims_in_elems.end()),
       tile_sizes_{1, tile_size_y, tile_size_x},
       num_threads_x_(num_threads_x),
       num_threads_y_(num_threads_y) {
@@ -170,14 +170,16 @@ IrArray::Index KernelMappingScheme::EmitBlockIndex(llvm::Type* index_ty) {
 
 IrArray::Index KernelMappingScheme::GetTileIndexForBlockOrigin(
     const IrArray::Index& block_index) {
-  IrArray::Index tile_index = block_index;
+  DCHECK_EQ(block_index.size(), block_sizes_.size());
+  std::vector<llvm::Value*> multidim;
+  multidim.reserve(block_sizes_.size());
   for (int i = 0; i < block_sizes_.size(); ++i) {
-    tile_index[i] = b_->CreateMul(
+    multidim.push_back(b_->CreateMul(
         block_index[i],
         llvm::ConstantInt::get(block_index[i]->getType(), block_sizes_[i]),
-        "block_origin." + std::to_string(i));
+        "block_origin." + std::to_string(i)));
   }
-  return tile_index;
+  return IrArray::Index(multidim, block_index[0]->getType());
 }
 
 IrArray::Index KernelMappingScheme::GetElementIndexForTileOrigin(
@@ -217,14 +219,14 @@ KernelMappingScheme::EmitThreadYXCoordinate(llvm::Type* index_ty) {
   // defined by (num_thread_y, num_thread_x) from thread_id.
   llvm::CallInst* thread_id_raw = llvm_ir::EmitCallToIntrinsic(
       llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x, {}, {}, b_);
-  llvm_ir::AddRangeMetadata(0, GetThreadsPerTile(), thread_id_raw);
+  llvm_ir::AddRangeMetadata(0, GetThreadsPerBlock(), thread_id_raw);
   llvm::Value* thread_id_int =
       b_->CreateIntCast(thread_id_raw, index_ty,
                         /*isSigned=*/true, "thread.id.x");
   llvm::Value* num_thread_x =
       llvm::ConstantInt::get(index_ty, GetNumberOfThreadsForDimensionX());
-  llvm::Value* x = b_->CreateURem(thread_id_int, num_thread_x);
-  llvm::Value* y = b_->CreateUDiv(thread_id_int, num_thread_x);
+  llvm::Value* x = b_->CreateURem(thread_id_int, num_thread_x, "thread.x");
+  llvm::Value* y = b_->CreateUDiv(thread_id_int, num_thread_x, "thread.y");
   return std::make_tuple(y, x);
 }
 
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h
index 06002d57b0..fb633b12e6 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h
@@ -90,15 +90,16 @@ class KernelMappingScheme {
   enum { DimZ = 0, DimY, DimX, DimTot };
 
  public:
+  KernelMappingScheme() {}
   // dims_in_elems: the normalized tensor dimensions.
   // req_block_sizes: the requested block size in number of tiles for each
   //   dimension. The actual block size is set to min(req_block_size,
   //   dims_in_number_of_blocks).
-  explicit KernelMappingScheme(absl::Span<const int64> dims_in_elems,
-                               int64 tile_size_y, int64 tile_size_x,
-                               absl::Span<const int64> req_block_sizes,
-                               int64 num_threads_y, int64 num_threads_x,
-                               llvm::IRBuilder<>* b);
+  KernelMappingScheme(absl::Span<const int64> dims_in_elems, int64 tile_size_y,
+                      int64 tile_size_x,
+                      absl::Span<const int64> req_block_sizes,
+                      int64 num_threads_y, int64 num_threads_x,
+                      llvm::IRBuilder<>* b);
 
   absl::Span<const int64> GetDimensionsInElements() const {
     return dims_in_elems_;
@@ -133,11 +134,15 @@ class KernelMappingScheme {
   }
 
   absl::Span<const int64> GetBlockSizes() const { return block_sizes_; }
+  int64 GetTileBlockSizeForDimension(int d) const {
+    DCHECK(d >= DimZ && d <= DimX);
+    return dims_in_blocks_[d];
+  }
 
   int64 GetNumberOfThreadsForDimensionX() const { return num_threads_x_; }
   int64 GetNumberOfThreadsForDimensionY() const { return num_threads_y_; }
 
-  int64 GetThreadsPerTile() const {
+  int64 GetThreadsPerBlock() const {
     return GetNumberOfThreadsForDimensionX() *
            GetNumberOfThreadsForDimensionY();
   }
@@ -163,7 +168,7 @@ class KernelMappingScheme {
  private:
   llvm::IRBuilder<>* b_;
   // The number of elements in each dimension.
-  absl::Span<const int64> dims_in_elems_;
+  std::vector<int64> dims_in_elems_;
 
   // The number of elements for each dimension of a tile.
   std::vector<int64> tile_sizes_;
-- 
GitLab


From 15c5a3bc95931a5540669b09ab9fe56d139de420 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Tue, 11 Dec 2018 09:36:27 -0800
Subject: [PATCH 347/873] Reduce flakiness of testScanCapturesVariables

PiperOrigin-RevId: 225017976
---
 tensorflow/python/kernel_tests/ctc_loss_op_test.py | 2 +-
 tensorflow/python/ops/ctc_ops.py                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/kernel_tests/ctc_loss_op_test.py b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
index e24f304c1b..39a637d831 100644
--- a/tensorflow/python/kernel_tests/ctc_loss_op_test.py
+++ b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
@@ -811,7 +811,7 @@ class CTCLossTestV2(test.TestCase):
       x = random_ops.random_uniform([])
       fn = lambda accum, elem: accum + x * elem
       out = ctc_ops._scan(fn, constant_op.constant([0.0, 1.0, 2.0]), 23.0)
-      self.assertAllEqual(*sess.run([
+      self.assertAllClose(*sess.run([
           [23.0 + x * 0.0, 23.0 + x * 1.0, 23.0 + x * 3.0], out
       ]))
 
diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py
index 3a7eb9355a..db7f9d2378 100644
--- a/tensorflow/python/ops/ctc_ops.py
+++ b/tensorflow/python/ops/ctc_ops.py
@@ -1029,7 +1029,7 @@ def _scan(fn, elems, initial, reverse=False, inclusive=False, final_only=False):
   for the forward backward use case.
 
   Examples:
-    scan(lambda a, e: a + e, [1.0, 2.0, 3.0], 1.0) => [2.0, 3.0, 4.0]
+    scan(lambda a, e: a + e, [1.0, 2.0, 3.0], 1.0) => [2.0, 4.0, 7.0]
 
     Multiple accumulators:
       scan(lambda a, e: (a[0] + e, a[1] * e), [1.0, 2.0, 3.0], (0.0, 1.0))
-- 
GitLab


From 73b5a64e38c0fb03eb1b860464ea48f5eb03e288 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 09:41:03 -0800
Subject: [PATCH 348/873] Update Google Cloud Bigtable C++ Client to the v0.4.0
 release.

PiperOrigin-RevId: 225018765
---
 .../kernels/test_kernels/bigtable_test_client.cc      | 11 +++++++++++
 .../kernels/test_kernels/bigtable_test_client.h       |  7 +++++++
 tensorflow/workspace.bzl                              |  8 ++++----
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc
index e95dc57718..3fe71a2ea7 100644
--- a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc
+++ b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.cc
@@ -399,6 +399,17 @@ BigtableTestClient::AsyncMutateRows(
   return nullptr;
 }
 
+std::unique_ptr<grpc::ClientAsyncResponseReaderInterface<
+    google::bigtable::v2::CheckAndMutateRowResponse>>
+BigtableTestClient::AsyncCheckAndMutateRow(
+    grpc::ClientContext* context,
+    const google::bigtable::v2::CheckAndMutateRowRequest& request,
+    grpc::CompletionQueue* cq) {
+  LOG(WARNING) << "Call to InMemoryDataClient::" << __func__
+               << "(); this will likely cause a crash!";
+  return nullptr;
+}
+
 std::shared_ptr<grpc::Channel> BigtableTestClient::Channel() {
   LOG(WARNING) << "Call to InMemoryDataClient::Channel(); this will likely "
                   "cause a crash!";
diff --git a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h
index c4a1f06bc5..8570590457 100644
--- a/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h
+++ b/tensorflow/contrib/bigtable/kernels/test_kernels/bigtable_test_client.h
@@ -80,6 +80,13 @@ class BigtableTestClient : public ::google::cloud::bigtable::DataClient {
                   const ::google::bigtable::v2::MutateRowsRequest& request,
                   ::grpc::CompletionQueue* cq, void* tag) override;
 
+  std::unique_ptr<grpc::ClientAsyncResponseReaderInterface<
+      google::bigtable::v2::CheckAndMutateRowResponse>>
+  AsyncCheckAndMutateRow(
+      grpc::ClientContext* context,
+      const google::bigtable::v2::CheckAndMutateRowRequest& request,
+      grpc::CompletionQueue* cq) override;
+
   std::shared_ptr<grpc::Channel> Channel() override;
 
  private:
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 60dcca3207..5210df240d 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -179,15 +179,15 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
 
     tf_http_archive(
         name = "com_github_googlecloudplatform_google_cloud_cpp",
-        sha256 = "3ade2072e6588ff56c0434abe6c63aa5f3f2d56be15a299bafc7e9cdf0a12c17",
-        strip_prefix = "google-cloud-cpp-0.3.0",
+        sha256 = "44eee8bd47cbd5ff192e895b45f9f913e2e117f10fdb9af0fd3b1a87a7b53bc3",
+        strip_prefix = "google-cloud-cpp-0.4.0",
         system_build_file = clean_dep("//third_party/systemlibs:google_cloud_cpp.BUILD"),
         system_link_files = {
             "//third_party/systemlibs:google_cloud_cpp.google.cloud.bigtable.BUILD": "google/cloud/bigtable/BUILD",
         },
         urls = [
-            "https://mirror.bazel.build/github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.3.0.tar.gz",
-            "https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.3.0.tar.gz",
+            "https://mirror.bazel.build/github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.4.0.tar.gz",
+            "https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/v0.4.0.tar.gz",
         ],
     )
 
-- 
GitLab


From dd80d3f78710c6ebb4bfd8cad9c5cc01a1acf51e Mon Sep 17 00:00:00 2001
From: "Wen-Heng (Jack) Chung" <whchung@gmail.com>
Date: Tue, 11 Dec 2018 18:01:34 +0000
Subject: [PATCH 349/873] Rename CudnnScratchAllocator to DnnScratchAllocator

Rename CudnnScratchAllocator as the logic is applicable for not only Cudnn, but
also other DNN algorithm libraries such as MIOpen.
---
 .../kernels/fused_conv2d_bias_activation_op.cc       |  6 +++---
 tensorflow/core/kernels/conv_grad_filter_ops.cc      |  6 +++---
 tensorflow/core/kernels/conv_grad_input_ops.cc       |  6 +++---
 tensorflow/core/kernels/conv_grad_ops_3d.cc          | 12 ++++++------
 tensorflow/core/kernels/conv_ops.cc                  |  8 ++++----
 tensorflow/core/kernels/conv_ops_3d.cc               |  6 +++---
 tensorflow/core/kernels/conv_ops_gpu.h               | 10 +++++-----
 7 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index 93b1aaa85e..c541c71f99 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -522,7 +522,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
   auto bias_ptr = AsDeviceMemory(bias.template flat<BiasType>().data(),
                                  bias.template flat<BiasType>().size());
 
-  static int64 ConvolveScratchSize = GetCudnnWorkspaceLimit(
+  static int64 ConvolveScratchSize = GetDnnWorkspaceLimit(
       // default value is in bytes despite the name of the environment variable
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB
   );
@@ -570,7 +570,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
     for (auto profile_algorithm : algorithms) {
       // TODO(zhengxq): profile each algorithm multiple times to better
       // accuracy.
-      CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
+      DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
       dnn::ProfileResult profile_result;
       bool cudnn_launch_status =
           stream
@@ -609,7 +609,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
                                                       algorithm_config);
   }
 
-  CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
+  DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
   bool cudnn_launch_status =
       stream
           ->ThenFusedConvolveWithAlgorithm(
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index bc30da4099..efd8772226 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -903,7 +903,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
   auto input_ptr = AsDeviceMemory(transformed_input.template flat<T>().data(),
                                   transformed_input.template flat<T>().size());
 
-  static int64 ConvolveBackwardFilterScratchSize = GetCudnnWorkspaceLimit(
+  static int64 ConvolveBackwardFilterScratchSize = GetDnnWorkspaceLimit(
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB by default
   );
   int device_id = stream->parent()->device_ordinal();
@@ -939,7 +939,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
     for (auto profile_algorithm : algorithms) {
       // TODO(zhengxq): profile each algorithm multiple times to better
       // accuracy.
-      CudnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
+      DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
                                               ctx);
       ProfileResult profile_result;
       bool cudnn_launch_status =
@@ -977,7 +977,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
     AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters,
                                                  algorithm_config);
   }
-  CudnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
+  DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
                                           ctx);
   bool cudnn_launch_status =
       stream
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index e06af15f2f..7339fb736f 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -951,10 +951,10 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
       AsDeviceMemory(pre_transformed_in_backprop.template flat<T>().data(),
                      pre_transformed_in_backprop.template flat<T>().size());
 
-  static int64 ConvolveBackwardDataScratchSize = GetCudnnWorkspaceLimit(
+  static int64 ConvolveBackwardDataScratchSize = GetDnnWorkspaceLimit(
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB by default
   );
-  CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, ctx);
+  DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, ctx);
   int device_id = stream->parent()->device_ordinal();
   DataType dtype = out_backprop.dtype();
   ConvParameters conv_parameters = {
@@ -988,7 +988,7 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
     for (auto profile_algorithm : algorithms) {
       // TODO(zhengxq): profile each algorithm multiple times to better
       // accuracy.
-      CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
+      DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
                                               ctx);
       ProfileResult profile_result;
       bool cudnn_launch_status =
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index e4c49efea0..a518fcc874 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -1333,7 +1333,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
         AsDeviceMemory(pre_transformed_in_backprop.template flat<T>().data(),
                        pre_transformed_in_backprop.template flat<T>().size());
 
-    static int64 ConvolveBackwardDataScratchSize = GetCudnnWorkspaceLimit(
+    static int64 ConvolveBackwardDataScratchSize = GetDnnWorkspaceLimit(
         "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32);  // 4GB by default
 
     const int device_id = stream->parent()->device_ordinal();
@@ -1368,7 +1368,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
       for (auto profile_algorithm : algorithms) {
         // TODO(zhengxq): profile each algorithm multiple times to better
         // accuracy.
-        CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
+        DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
                                                 context);
         ProfileResult profile_result;
         bool cudnn_launch_status =
@@ -1405,7 +1405,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
       AutoTuneConv3dBwdData::GetInstance()->Insert(conv_parameters,
                                                    algorithm_config);
     }
-    CudnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
+    DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
                                             context);
     bool cudnn_launch_status =
         stream
@@ -1739,7 +1739,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
         AsDeviceMemory(transformed_input.template flat<T>().data(),
                        transformed_input.template flat<T>().size());
 
-    static int64 ConvolveBackwardFilterScratchSize = GetCudnnWorkspaceLimit(
+    static int64 ConvolveBackwardFilterScratchSize = GetDnnWorkspaceLimit(
         "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32);  // 4GB by default
 
     const int device_id = stream->parent()->device_ordinal();
@@ -1774,7 +1774,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
       for (auto profile_algorithm : algorithms) {
         // TODO(zhengxq): profile each algorithm multiple times to better
         // accuracy.
-        CudnnScratchAllocator scratch_allocator(
+        DnnScratchAllocator scratch_allocator(
             ConvolveBackwardFilterScratchSize, context);
         ProfileResult profile_result;
         bool cudnn_launch_status =
@@ -1812,7 +1812,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
       AutoTuneConv3dBwdFilter::GetInstance()->Insert(conv_parameters,
                                                      algorithm_config);
     }
-    CudnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
+    DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
                                             context);
     bool cudnn_launch_status =
         stream
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 74857fc207..8c2deeed0e 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -521,7 +521,7 @@ template struct LaunchConv2DOp<CPUDevice, float>;
 template struct LaunchConv2DOp<CPUDevice, double>;
 
 #if GOOGLE_CUDA
-int64 GetCudnnWorkspaceLimit(const string& envvar_in_mb,
+int64 GetDnnWorkspaceLimit(const string& envvar_in_mb,
                              int64 default_value_in_bytes) {
   const char* workspace_limit_in_mb_str = getenv(envvar_in_mb.c_str());
   if (workspace_limit_in_mb_str != nullptr &&
@@ -759,7 +759,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
       AsDeviceMemory(transformed_output.template flat<T>().data(),
                      transformed_output.template flat<T>().size());
 
-  static int64 ConvolveScratchSize = GetCudnnWorkspaceLimit(
+  static int64 ConvolveScratchSize = GetDnnWorkspaceLimit(
       // default value is in bytes despite the name of the environment variable
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB
   );
@@ -803,7 +803,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
     for (auto profile_algorithm : algorithms) {
       // TODO(zhengxq): profile each algorithm multiple times to better
       // accuracy.
-      CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
+      DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
       ProfileResult profile_result;
       bool cudnn_launch_status =
           stream
@@ -841,7 +841,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
     AutoTuneConv::GetInstance()->Insert(conv_parameters, algorithm_config);
   }
 
-  CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
+  DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
   bool cudnn_launch_status =
       stream
           ->ThenConvolveWithAlgorithm(input_desc, input_ptr, filter_desc,
diff --git a/tensorflow/core/kernels/conv_ops_3d.cc b/tensorflow/core/kernels/conv_ops_3d.cc
index f20ac93b5a..5a59e20cc2 100644
--- a/tensorflow/core/kernels/conv_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_ops_3d.cc
@@ -407,7 +407,7 @@ struct LaunchConvOp<GPUDevice, T> {
         AsDeviceMemory(transformed_output.template flat<T>().data(),
                        transformed_output.template flat<T>().size());
 
-    static int64 ConvolveScratchSize = GetCudnnWorkspaceLimit(
+    static int64 ConvolveScratchSize = GetDnnWorkspaceLimit(
         "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32);  // 4GB by default
 
     int device_id = stream->parent()->device_ordinal();
@@ -450,7 +450,7 @@ struct LaunchConvOp<GPUDevice, T> {
       for (auto profile_algorithm : algorithms) {
         // TODO(zhengxq): profile each algorithm multiple times to better
         // accuracy.
-        CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
+        DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
         ProfileResult profile_result;
         bool cudnn_launch_status =
             stream
@@ -486,7 +486,7 @@ struct LaunchConvOp<GPUDevice, T> {
       AutoTuneConv3d::GetInstance()->Insert(conv_parameters, algorithm_config);
     }
 
-    CudnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
+    DnnScratchAllocator scratch_allocator(ConvolveScratchSize, ctx);
     bool cudnn_launch_status =
         stream
             ->ThenConvolveWithAlgorithm(input_desc, input_ptr, filter_desc,
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index 21d135decd..19fc45b756 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -27,19 +27,19 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Get the Cudnn workspace limit from the environment variable, which is in MB.
+// Get the Dnn workspace limit from the environment variable, which is in MB.
 // Return the workspace memory limit in bytes. If no value is set, return the
 // default value.
-int64 GetCudnnWorkspaceLimit(const string& envvar_in_mb,
+int64 GetDnnWorkspaceLimit(const string& envvar_in_mb,
                              int64 default_value_in_bytes);
 
 // A class to provide scratch-space allocator for Stream-Executor Cudnn
 // callback. TensorFlow is responsible for releasing the temporary buffers after
 // the kernel finishes.
-class CudnnScratchAllocator : public se::ScratchAllocator {
+class DnnScratchAllocator : public se::ScratchAllocator {
  public:
-  virtual ~CudnnScratchAllocator() {}
-  CudnnScratchAllocator(int64 memory_limit, OpKernelContext* context)
+  virtual ~DnnScratchAllocator() {}
+  DnnScratchAllocator(int64 memory_limit, OpKernelContext* context)
       : memory_limit_(memory_limit), total_byte_size_(0), context_(context) {}
   int64 GetMemoryLimitInBytes(se::Stream* stream) override {
     return memory_limit_;
-- 
GitLab


From c6129ba7abc2245f8b05ce16aea95aed954985a1 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Tue, 11 Dec 2018 10:04:00 -0800
Subject: [PATCH 350/873] Fix comment in Eager C API.

Somehow this comment seems to have been repeated. I delete the second copy and re-flow the existing one.

PiperOrigin-RevId: 225022682
---
 tensorflow/c/eager/c_api.h | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index f80ae5a6d0..120748ab76 100755
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -170,23 +170,11 @@ TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h,
                                                   int dim_index,
                                                   TF_Status* status);
 
-// Returns the device of the operation that produced `h`.
-// If `h` was produced by a copy, returns the destination device of
-// the copy. Note that returned device name is not always the device
-// holding the tensor handle's memory. If you want the latter, use
-// TFE_TensorHandleBackingDeviceName.
-// This function will block till the operation that produces `h` has completed.
-//
-// Device on which the kernel of the operation that produced `h` ran.
-//
-// If `h` was produced by a copy, returns the destination device of
-// the copy.
-//
-// Note that returned device name is not always the device that owns the memory
-// that backs the tensor handle. For the latter see
-// TFE_TensorHandleBackingDeviceName.
-//
-// This function will block till the operation that produces `h` has completed.
+// Returns the device of the operation that produced `h`. If `h` was produced by
+// a copy, returns the destination device of the copy. Note that the returned
+// device name is not always the device holding the tensor handle's memory. If
+// you want the latter, use TFE_TensorHandleBackingDeviceName. This function
+// will block till the operation that produces `h` has completed.
 TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceName(
     TFE_TensorHandle* h, TF_Status* status);
 
-- 
GitLab


From 7d1c9e739453d3a4d082a6b63ce05ee9048538aa Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Tue, 11 Dec 2018 10:22:05 -0800
Subject: [PATCH 351/873] Move some class symbols from tf 2.0. -
 io.PaddingFIFOQueue -> queue.PaddingFIFOQueue - io.PriorityQueue ->
 queue.PriorityQueue - io.QueueBase -> queue.QueueBase - io.RandomShuffleQueue
 -> queue.RandomShuffleQueue - FIFOQueue -> queue.FIFOQueue - train.Server ->
 distribute.Server

PiperOrigin-RevId: 225025906
---
 tensorflow/python/ops/data_flow_ops.py        | 25 ++++---
 .../tools/api/generator/api_init_files.bzl    |  1 +
 .../tools/api/generator/api_init_files_v1.bzl |  1 +
 tensorflow/python/training/server_lib.py      |  4 +-
 .../tensorflow.distribute.-server.pbtxt}      |  2 +-
 .../api/golden/v1/tensorflow.distribute.pbtxt |  4 ++
 .../tools/api/golden/v1/tensorflow.pbtxt      |  4 ++
 .../tensorflow.queue.-f-i-f-o-queue.pbtxt}    |  2 +-
 ...orflow.queue.-padding-f-i-f-o-queue.pbtxt} |  2 +-
 .../tensorflow.queue.-priority-queue.pbtxt}   |  2 +-
 .../tensorflow.queue.-queue-base.pbtxt}       |  2 +-
 ...sorflow.queue.-random-shuffle-queue.pbtxt} |  2 +-
 .../api/golden/v1/tensorflow.queue.pbtxt      | 23 +++++++
 .../v2/tensorflow.distribute.-server.pbtxt    | 29 ++++++++
 .../api/golden/v2/tensorflow.distribute.pbtxt |  4 ++
 .../tools/api/golden/v2/tensorflow.io.pbtxt   | 16 -----
 .../tools/api/golden/v2/tensorflow.pbtxt      |  8 +--
 .../v2/tensorflow.queue.-f-i-f-o-queue.pbtxt  | 66 +++++++++++++++++++
 ...sorflow.queue.-padding-f-i-f-o-queue.pbtxt | 66 +++++++++++++++++++
 .../v2/tensorflow.queue.-priority-queue.pbtxt | 66 +++++++++++++++++++
 .../v2/tensorflow.queue.-queue-base.pbtxt     | 65 ++++++++++++++++++
 ...nsorflow.queue.-random-shuffle-queue.pbtxt | 66 +++++++++++++++++++
 .../api/golden/v2/tensorflow.queue.pbtxt      | 23 +++++++
 .../api/golden/v2/tensorflow.train.pbtxt      |  4 --
 tensorflow/tools/compatibility/renames_v2.py  | 16 +++--
 25 files changed, 457 insertions(+), 46 deletions(-)
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-server.pbtxt => v1/tensorflow.distribute.-server.pbtxt} (96%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.-f-i-f-o-queue.pbtxt => v1/tensorflow.queue.-f-i-f-o-queue.pbtxt} (98%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt => v1/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt} (98%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.io.-priority-queue.pbtxt => v1/tensorflow.queue.-priority-queue.pbtxt} (98%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.io.-queue-base.pbtxt => v1/tensorflow.queue.-queue-base.pbtxt} (98%)
 rename tensorflow/tools/api/golden/{v2/tensorflow.io.-random-shuffle-queue.pbtxt => v1/tensorflow.queue.-random-shuffle-queue.pbtxt} (97%)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.distribute.-server.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.queue.-f-i-f-o-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.queue.-priority-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.queue.-queue-base.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.queue.-random-shuffle-queue.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.queue.pbtxt

diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py
index 2030332e4e..1557bdf0ed 100644
--- a/tensorflow/python/ops/data_flow_ops.py
+++ b/tensorflow/python/ops/data_flow_ops.py
@@ -113,8 +113,9 @@ def _shape_common(s1, s2):
 
 
 # pylint: disable=protected-access
-@tf_export("io.QueueBase", v1=["io.QueueBase", "QueueBase"])
-@deprecation.deprecated_endpoints("QueueBase")
+@tf_export("queue.QueueBase",
+           v1=["queue.QueueBase", "io.QueueBase", "QueueBase"])
+@deprecation.deprecated_endpoints(["io.QueueBase", "QueueBase"])
 class QueueBase(object):
   """Base class for queue implementations.
 
@@ -616,8 +617,11 @@ def _shared_name(shared_name):
 
 
 @tf_export(
-    "io.RandomShuffleQueue", v1=["io.RandomShuffleQueue", "RandomShuffleQueue"])
-@deprecation.deprecated_endpoints("RandomShuffleQueue")
+    "queue.RandomShuffleQueue",
+    v1=["queue.RandomShuffleQueue",
+        "io.RandomShuffleQueue", "RandomShuffleQueue"])
+@deprecation.deprecated_endpoints(
+    ["io.RandomShuffleQueue", "RandomShuffleQueue"])
 class RandomShuffleQueue(QueueBase):
   """A queue implementation that dequeues elements in a random order.
 
@@ -702,7 +706,8 @@ class RandomShuffleQueue(QueueBase):
     super(RandomShuffleQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("FIFOQueue")
+@tf_export("queue.FIFOQueue", v1=["queue.FIFOQueue", "FIFOQueue"])
+@deprecation.deprecated_endpoints("FIFOQueue")
 class FIFOQueue(QueueBase):
   """A queue implementation that dequeues elements in first-in first-out order.
 
@@ -760,8 +765,9 @@ class FIFOQueue(QueueBase):
 
 
 @tf_export(
-    "io.PaddingFIFOQueue", v1=["io.PaddingFIFOQueue", "PaddingFIFOQueue"])
-@deprecation.deprecated_endpoints("PaddingFIFOQueue")
+    "queue.PaddingFIFOQueue",
+    v1=["queue.PaddingFIFOQueue", "io.PaddingFIFOQueue", "PaddingFIFOQueue"])
+@deprecation.deprecated_endpoints(["io.PaddingFIFOQueue", "PaddingFIFOQueue"])
 class PaddingFIFOQueue(QueueBase):
   """A FIFOQueue that supports batching variable-sized tensors by padding.
 
@@ -835,8 +841,9 @@ class PaddingFIFOQueue(QueueBase):
     super(PaddingFIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
 
-@tf_export("io.PriorityQueue", v1=["io.PriorityQueue", "PriorityQueue"])
-@deprecation.deprecated_endpoints("PriorityQueue")
+@tf_export("queue.PriorityQueue",
+           v1=["queue.PriorityQueue", "io.PriorityQueue", "PriorityQueue"])
+@deprecation.deprecated_endpoints(["io.PriorityQueue", "PriorityQueue"])
 class PriorityQueue(QueueBase):
   """A queue implementation that dequeues elements in prioritized order.
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 25d0c0f75c..5fee9c5eaf 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -20,6 +20,7 @@ TENSORFLOW_API_INIT_FILES = [
     "graph_util/__init__.py",
     "image/__init__.py",
     "io/__init__.py",
+    "queue/__init__.py",
     "initializers/__init__.py",
     "keras/__init__.py",
     "keras/activations/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 99c8495ce5..8d3b86bf26 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -23,6 +23,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "graph_util/__init__.py",
     "image/__init__.py",
     "io/__init__.py",
+    "queue/__init__.py",
     "initializers/__init__.py",
     "keras/__init__.py",
     "keras/activations/__init__.py",
diff --git a/tensorflow/python/training/server_lib.py b/tensorflow/python/training/server_lib.py
index 302ca2dd44..b3c21d5337 100644
--- a/tensorflow/python/training/server_lib.py
+++ b/tensorflow/python/training/server_lib.py
@@ -23,6 +23,7 @@ from tensorflow.core.protobuf import tensorflow_server_pb2
 from tensorflow.python import pywrap_tensorflow as c_api
 from tensorflow.python.framework import errors
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -93,7 +94,8 @@ def _make_server_def(server_or_cluster_def, job_name, task_index, protocol,
   return server_def
 
 
-@tf_export("train.Server")
+@tf_export("distribute.Server", v1=["distribute.Server", "train.Server"])
+@deprecation.deprecated_endpoints("train.Server")
 class Server(object):
   """An in-process TensorFlow server, for use in distributed training.
 
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-server.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-server.pbtxt
similarity index 96%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-server.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.distribute.-server.pbtxt
index 9b8f185f5b..6c39bf4fc4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-server.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-server.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.Server"
+path: "tensorflow.distribute.Server"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.server_lib.Server\'>"
   is_instance: "<type \'object\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt
index b0dd73ca1d..31dc6e0716 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "ReplicaContext"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Server"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "Strategy"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 5592a4c59d..4ed4deea13 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -512,6 +512,10 @@ tf_module {
     name: "quantization"
     mtype: "<type \'module\'>"
   }
+  member {
+    name: "queue"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "quint16"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.queue.-f-i-f-o-queue.pbtxt
similarity index 98%
rename from tensorflow/tools/api/golden/v2/tensorflow.-f-i-f-o-queue.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.queue.-f-i-f-o-queue.pbtxt
index a095616c00..724ab5fe82 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-f-i-f-o-queue.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.queue.-f-i-f-o-queue.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.FIFOQueue"
+path: "tensorflow.queue.FIFOQueue"
 tf_class {
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.FIFOQueue\'>"
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt
similarity index 98%
rename from tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt
index 85306fdcac..9ef0a4d9eb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.-padding-f-i-f-o-queue.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.io.PaddingFIFOQueue"
+path: "tensorflow.queue.PaddingFIFOQueue"
 tf_class {
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PaddingFIFOQueue\'>"
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.queue.-priority-queue.pbtxt
similarity index 98%
rename from tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.queue.-priority-queue.pbtxt
index 02d8037b34..bb66beb13a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.-priority-queue.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.queue.-priority-queue.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.io.PriorityQueue"
+path: "tensorflow.queue.PriorityQueue"
 tf_class {
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PriorityQueue\'>"
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.queue.-queue-base.pbtxt
similarity index 98%
rename from tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.queue.-queue-base.pbtxt
index a30481a0ea..8faaad22af 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.-queue-base.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.queue.-queue-base.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.io.QueueBase"
+path: "tensorflow.queue.QueueBase"
 tf_class {
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
   is_instance: "<type \'object\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.queue.-random-shuffle-queue.pbtxt
similarity index 97%
rename from tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.queue.-random-shuffle-queue.pbtxt
index 82cbf9884f..31cd503b13 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.-random-shuffle-queue.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.queue.-random-shuffle-queue.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.io.RandomShuffleQueue"
+path: "tensorflow.queue.RandomShuffleQueue"
 tf_class {
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.RandomShuffleQueue\'>"
   is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.queue.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.queue.pbtxt
new file mode 100644
index 0000000000..c16e95e211
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.queue.pbtxt
@@ -0,0 +1,23 @@
+path: "tensorflow.queue"
+tf_module {
+  member {
+    name: "FIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PaddingFIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PriorityQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "QueueBase"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomShuffleQueue"
+    mtype: "<type \'type\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-server.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-server.pbtxt
new file mode 100644
index 0000000000..6c39bf4fc4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-server.pbtxt
@@ -0,0 +1,29 @@
+path: "tensorflow.distribute.Server"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.server_lib.Server\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "server_def"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "target"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'server_or_cluster_def\', \'job_name\', \'task_index\', \'protocol\', \'config\', \'start\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'True\'], "
+  }
+  member_method {
+    name: "create_local_server"
+    argspec: "args=[\'config\', \'start\'], varargs=None, keywords=None, defaults=[\'None\', \'True\'], "
+  }
+  member_method {
+    name: "join"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "start"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt
index b0dd73ca1d..31dc6e0716 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.pbtxt
@@ -20,6 +20,10 @@ tf_module {
     name: "ReplicaContext"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "Server"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "Strategy"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
index 8906329742..2d9c759e3c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.io.pbtxt
@@ -8,22 +8,6 @@ tf_module {
     name: "FixedLenSequenceFeature"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "PaddingFIFOQueue"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "PriorityQueue"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "QueueBase"
-    mtype: "<type \'type\'>"
-  }
-  member {
-    name: "RandomShuffleQueue"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "SparseFeature"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 5f31d27480..ee81e86fd5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -12,10 +12,6 @@ tf_module {
     name: "Event"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "FIFOQueue"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "GradientTape"
     mtype: "<type \'type\'>"
@@ -256,6 +252,10 @@ tf_module {
     name: "quantization"
     mtype: "<type \'module\'>"
   }
+  member {
+    name: "queue"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "quint16"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.queue.-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.queue.-f-i-f-o-queue.pbtxt
new file mode 100644
index 0000000000..724ab5fe82
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.queue.-f-i-f-o-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.queue.FIFOQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.FIFOQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'dtypes\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'fifo_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt
new file mode 100644
index 0000000000..9ef0a4d9eb
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.queue.-padding-f-i-f-o-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.queue.PaddingFIFOQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PaddingFIFOQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'dtypes\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'padding_fifo_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.queue.-priority-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.queue.-priority-queue.pbtxt
new file mode 100644
index 0000000000..bb66beb13a
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.queue.-priority-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.queue.PriorityQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.PriorityQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'types\', \'shapes\', \'names\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'priority_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.queue.-queue-base.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.queue.-queue-base.pbtxt
new file mode 100644
index 0000000000..8faaad22af
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.queue.-queue-base.pbtxt
@@ -0,0 +1,65 @@
+path: "tensorflow.queue.QueueBase"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'dtypes\', \'shapes\', \'names\', \'queue_ref\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.queue.-random-shuffle-queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.queue.-random-shuffle-queue.pbtxt
new file mode 100644
index 0000000000..31cd503b13
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.queue.-random-shuffle-queue.pbtxt
@@ -0,0 +1,66 @@
+path: "tensorflow.queue.RandomShuffleQueue"
+tf_class {
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.RandomShuffleQueue\'>"
+  is_instance: "<class \'tensorflow.python.ops.data_flow_ops.QueueBase\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "dtypes"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "name"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "names"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "queue_ref"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "shapes"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'capacity\', \'min_after_dequeue\', \'dtypes\', \'shapes\', \'names\', \'seed\', \'shared_name\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'random_shuffle_queue\'], "
+  }
+  member_method {
+    name: "close"
+    argspec: "args=[\'self\', \'cancel_pending_enqueues\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], "
+  }
+  member_method {
+    name: "dequeue"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_many"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "dequeue_up_to"
+    argspec: "args=[\'self\', \'n\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "enqueue_many"
+    argspec: "args=[\'self\', \'vals\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "from_list"
+    argspec: "args=[\'index\', \'queues\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "is_closed"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "size"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.queue.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.queue.pbtxt
new file mode 100644
index 0000000000..c16e95e211
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.queue.pbtxt
@@ -0,0 +1,23 @@
+path: "tensorflow.queue"
+tf_module {
+  member {
+    name: "FIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PaddingFIFOQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "PriorityQueue"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "QueueBase"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "RandomShuffleQueue"
+    mtype: "<type \'type\'>"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index 8c327f88f3..cc63a7fd82 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -68,10 +68,6 @@ tf_module {
     name: "SequenceExample"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "Server"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "ServerDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index 9a3f4460f7..ad4c3d2750 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -34,6 +34,7 @@ renames = {
     'tf.ConfigProto': 'tf.compat.v1.ConfigProto',
     'tf.DeviceSpec': 'tf.compat.v1.DeviceSpec',
     'tf.Dimension': 'tf.compat.v1.Dimension',
+    'tf.FIFOQueue': 'tf.queue.FIFOQueue',
     'tf.FixedLenFeature': 'tf.io.FixedLenFeature',
     'tf.FixedLenSequenceFeature': 'tf.io.FixedLenSequenceFeature',
     'tf.FixedLengthRecordReader': 'tf.compat.v1.FixedLengthRecordReader',
@@ -58,12 +59,12 @@ renames = {
     'tf.NotDifferentiable': 'tf.no_gradient',
     'tf.OpError': 'tf.errors.OpError',
     'tf.OptimizerOptions': 'tf.compat.v1.OptimizerOptions',
-    'tf.PaddingFIFOQueue': 'tf.io.PaddingFIFOQueue',
+    'tf.PaddingFIFOQueue': 'tf.queue.PaddingFIFOQueue',
     'tf.Print': 'tf.compat.v1.Print',
-    'tf.PriorityQueue': 'tf.io.PriorityQueue',
+    'tf.PriorityQueue': 'tf.queue.PriorityQueue',
     'tf.QUANTIZED_DTYPES': 'tf.dtypes.QUANTIZED_DTYPES',
-    'tf.QueueBase': 'tf.io.QueueBase',
-    'tf.RandomShuffleQueue': 'tf.io.RandomShuffleQueue',
+    'tf.QueueBase': 'tf.queue.QueueBase',
+    'tf.RandomShuffleQueue': 'tf.queue.RandomShuffleQueue',
     'tf.ReaderBase': 'tf.compat.v1.ReaderBase',
     'tf.RunMetadata': 'tf.compat.v1.RunMetadata',
     'tf.RunOptions': 'tf.compat.v1.RunOptions',
@@ -229,6 +230,10 @@ renames = {
     'tf.initializers.tables_initializer': 'tf.compat.v1.initializers.tables_initializer',
     'tf.initializers.variables': 'tf.compat.v1.initializers.variables',
     'tf.invert_permutation': 'tf.math.invert_permutation',
+    'tf.io.PaddingFIFOQueue': 'tf.queue.PaddingFIFOQueue',
+    'tf.io.PriorityQueue': 'tf.queue.PriorityQueue',
+    'tf.io.QueueBase': 'tf.queue.QueueBase',
+    'tf.io.RandomShuffleQueue': 'tf.queue.RandomShuffleQueue',
     'tf.io.tf_record_iterator': 'tf.compat.v1.io.tf_record_iterator',
     'tf.is_finite': 'tf.math.is_finite',
     'tf.is_inf': 'tf.math.is_inf',
@@ -527,9 +532,7 @@ renames = {
     'tf.sparse_merge': 'tf.compat.v1.sparse_merge',
     'tf.sparse_minimum': 'tf.sparse.minimum',
     'tf.sparse_placeholder': 'tf.compat.v1.sparse_placeholder',
-    'tf.sparse_reduce_max': 'tf.compat.v1.sparse_reduce_max',
     'tf.sparse_reduce_max_sparse': 'tf.compat.v1.sparse_reduce_max_sparse',
-    'tf.sparse_reduce_sum': 'tf.compat.v1.sparse_reduce_sum',
     'tf.sparse_reduce_sum_sparse': 'tf.compat.v1.sparse_reduce_sum_sparse',
     'tf.sparse_reorder': 'tf.sparse.reorder',
     'tf.sparse_reset_shape': 'tf.sparse.reset_shape',
@@ -619,6 +622,7 @@ renames = {
     'tf.train.SaverDef': 'tf.compat.v1.train.SaverDef',
     'tf.train.Scaffold': 'tf.compat.v1.train.Scaffold',
     'tf.train.SecondOrStepTimer': 'tf.estimator.SecondOrStepTimer',
+    'tf.train.Server': 'tf.distribute.Server',
     'tf.train.SessionCreator': 'tf.compat.v1.train.SessionCreator',
     'tf.train.SessionManager': 'tf.compat.v1.train.SessionManager',
     'tf.train.SessionRunArgs': 'tf.compat.v1.train.SessionRunArgs',
-- 
GitLab


From 932f281c6467865654e55f2e7f139d9fed2a349a Mon Sep 17 00:00:00 2001
From: "Wen-Heng (Jack) Chung" <whchung@gmail.com>
Date: Tue, 11 Dec 2018 18:14:05 +0000
Subject: [PATCH 352/873] Address clang-format checks

---
 .../kernels/fused_conv2d_bias_activation_op.cc         |  2 +-
 tensorflow/core/kernels/conv_grad_filter_ops.cc        |  7 +++----
 tensorflow/core/kernels/conv_grad_input_ops.cc         |  4 ++--
 tensorflow/core/kernels/conv_grad_ops_3d.cc            | 10 +++++-----
 tensorflow/core/kernels/conv_ops.cc                    |  4 ++--
 tensorflow/core/kernels/conv_ops_gpu.h                 |  2 +-
 6 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
index c541c71f99..1c40b6a414 100644
--- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
+++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc
@@ -525,7 +525,7 @@ void LaunchFusedConv2DBiasActivationOp<GPUDevice, T, BiasType, ScaleType>::
   static int64 ConvolveScratchSize = GetDnnWorkspaceLimit(
       // default value is in bytes despite the name of the environment variable
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB
-  );
+      );
 
   int device_id = stream->parent()->device_ordinal();
   FusedConvParameters fused_conv_parameters = {
diff --git a/tensorflow/core/kernels/conv_grad_filter_ops.cc b/tensorflow/core/kernels/conv_grad_filter_ops.cc
index efd8772226..58a4f6ba86 100644
--- a/tensorflow/core/kernels/conv_grad_filter_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_filter_ops.cc
@@ -905,7 +905,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
 
   static int64 ConvolveBackwardFilterScratchSize = GetDnnWorkspaceLimit(
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB by default
-  );
+      );
   int device_id = stream->parent()->device_ordinal();
   DataType dtype = input.dtype();
   ConvParameters conv_parameters = {
@@ -940,7 +940,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
       // TODO(zhengxq): profile each algorithm multiple times to better
       // accuracy.
       DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
-                                              ctx);
+                                            ctx);
       ProfileResult profile_result;
       bool cudnn_launch_status =
           stream
@@ -977,8 +977,7 @@ void LaunchConv2DBackpropFilterOp<Eigen::GpuDevice, T>::operator()(
     AutoTuneConvBwdFilter::GetInstance()->Insert(conv_parameters,
                                                  algorithm_config);
   }
-  DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
-                                          ctx);
+  DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize, ctx);
   bool cudnn_launch_status =
       stream
           ->ThenConvolveBackwardFilterWithAlgorithm(
diff --git a/tensorflow/core/kernels/conv_grad_input_ops.cc b/tensorflow/core/kernels/conv_grad_input_ops.cc
index 7339fb736f..e799016852 100644
--- a/tensorflow/core/kernels/conv_grad_input_ops.cc
+++ b/tensorflow/core/kernels/conv_grad_input_ops.cc
@@ -953,7 +953,7 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
 
   static int64 ConvolveBackwardDataScratchSize = GetDnnWorkspaceLimit(
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB by default
-  );
+      );
   DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize, ctx);
   int device_id = stream->parent()->device_ordinal();
   DataType dtype = out_backprop.dtype();
@@ -989,7 +989,7 @@ void LaunchConv2DBackpropInputOp<GPUDevice, T>::operator()(
       // TODO(zhengxq): profile each algorithm multiple times to better
       // accuracy.
       DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
-                                              ctx);
+                                            ctx);
       ProfileResult profile_result;
       bool cudnn_launch_status =
           stream
diff --git a/tensorflow/core/kernels/conv_grad_ops_3d.cc b/tensorflow/core/kernels/conv_grad_ops_3d.cc
index a518fcc874..562a9c8aed 100644
--- a/tensorflow/core/kernels/conv_grad_ops_3d.cc
+++ b/tensorflow/core/kernels/conv_grad_ops_3d.cc
@@ -1369,7 +1369,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
         // TODO(zhengxq): profile each algorithm multiple times to better
         // accuracy.
         DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
-                                                context);
+                                              context);
         ProfileResult profile_result;
         bool cudnn_launch_status =
             stream
@@ -1406,7 +1406,7 @@ class Conv3DBackpropInputOp<GPUDevice, T> : public OpKernel {
                                                    algorithm_config);
     }
     DnnScratchAllocator scratch_allocator(ConvolveBackwardDataScratchSize,
-                                            context);
+                                          context);
     bool cudnn_launch_status =
         stream
             ->ThenConvolveBackwardDataWithAlgorithm(
@@ -1774,8 +1774,8 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
       for (auto profile_algorithm : algorithms) {
         // TODO(zhengxq): profile each algorithm multiple times to better
         // accuracy.
-        DnnScratchAllocator scratch_allocator(
-            ConvolveBackwardFilterScratchSize, context);
+        DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
+                                              context);
         ProfileResult profile_result;
         bool cudnn_launch_status =
             stream
@@ -1813,7 +1813,7 @@ class Conv3DBackpropFilterOp<GPUDevice, T> : public OpKernel {
                                                      algorithm_config);
     }
     DnnScratchAllocator scratch_allocator(ConvolveBackwardFilterScratchSize,
-                                            context);
+                                          context);
     bool cudnn_launch_status =
         stream
             ->ThenConvolveBackwardFilterWithAlgorithm(
diff --git a/tensorflow/core/kernels/conv_ops.cc b/tensorflow/core/kernels/conv_ops.cc
index 8c2deeed0e..a1917862e7 100644
--- a/tensorflow/core/kernels/conv_ops.cc
+++ b/tensorflow/core/kernels/conv_ops.cc
@@ -522,7 +522,7 @@ template struct LaunchConv2DOp<CPUDevice, double>;
 
 #if GOOGLE_CUDA
 int64 GetDnnWorkspaceLimit(const string& envvar_in_mb,
-                             int64 default_value_in_bytes) {
+                           int64 default_value_in_bytes) {
   const char* workspace_limit_in_mb_str = getenv(envvar_in_mb.c_str());
   if (workspace_limit_in_mb_str != nullptr &&
       strcmp(workspace_limit_in_mb_str, "") != 0) {
@@ -762,7 +762,7 @@ void LaunchConv2DOp<GPUDevice, T>::operator()(
   static int64 ConvolveScratchSize = GetDnnWorkspaceLimit(
       // default value is in bytes despite the name of the environment variable
       "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB
-  );
+      );
 
   int device_id = stream->parent()->device_ordinal();
   DataType dtype = input.dtype();
diff --git a/tensorflow/core/kernels/conv_ops_gpu.h b/tensorflow/core/kernels/conv_ops_gpu.h
index 19fc45b756..7a67658c4d 100644
--- a/tensorflow/core/kernels/conv_ops_gpu.h
+++ b/tensorflow/core/kernels/conv_ops_gpu.h
@@ -31,7 +31,7 @@ namespace tensorflow {
 // Return the workspace memory limit in bytes. If no value is set, return the
 // default value.
 int64 GetDnnWorkspaceLimit(const string& envvar_in_mb,
-                             int64 default_value_in_bytes);
+                           int64 default_value_in_bytes);
 
 // A class to provide scratch-space allocator for Stream-Executor Cudnn
 // callback. TensorFlow is responsible for releasing the temporary buffers after
-- 
GitLab


From 4143d8d30b1f7d2737426c8c181c88bcd8dba5d5 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Tue, 11 Dec 2018 10:32:54 -0800
Subject: [PATCH 353/873] Support F16 and BF16 for iota HLO in evaluator. Also
 clean up type error reporting in evaluator.

PiperOrigin-RevId: 225028144
---
 .../compiler/xla/g3doc/operation_semantics.md | 320 +++++++++---------
 .../xla/service/hlo_evaluator_typed_visitor.h |  94 +++--
 tensorflow/compiler/xla/tests/iota_test.cc    |   2 +-
 3 files changed, 219 insertions(+), 197 deletions(-)

diff --git a/tensorflow/compiler/xla/g3doc/operation_semantics.md b/tensorflow/compiler/xla/g3doc/operation_semantics.md
index d888b1f23f..002ebc31b9 100644
--- a/tensorflow/compiler/xla/g3doc/operation_semantics.md
+++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md
@@ -38,25 +38,25 @@ Alltoall is a collective operation that sends data from all cores to all cores.
 It has two phases:
 
 1.  the scatter phase. On each core, the operand is split into `split_count`
-    number of blocks along the `split_dimensions`, and the blocks are scattered
-    to all cores, e.g., the ith block is send to the ith core.
+number of blocks along the `split_dimensions`, and the blocks are scattered
+to all cores, e.g., the ith block is send to the ith core.
 2.  the gather phase. Each core concatenates the received blocks along the
-    `concat_dimension`.
+`concat_dimension`.
 
 The participating cores can be configured by:
 
 -   `replica_groups`: each ReplicaGroup contains a list of replica id. If empty,
-    all replicas belong to one group in the order of 0 - (n-1). Alltoall will be
-    applied within subgroups in the specified order. For example, replica
-    groups = {{1,2,3},{4,5,0}} means, an Alltoall will be applied within replica
-    1, 2, 3, and in the gather phase, the received blocks will be concatenated
-    in the order of 1, 2, 3; another Alltoall will be applied within replica 4,
-    5, 0, and the concatenation order is 4, 5, 0.
+all replicas belong to one group in the order of 0 - (n-1). Alltoall will be
+applied within subgroups in the specified order. For example, replica
+groups = {{1,2,3},{4,5,0}} means, an Alltoall will be applied within replica
+1, 2, 3, and in the gather phase, the received blocks will be concatenated
+in the order of 1, 2, 3; another Alltoall will be applied within replica 4,
+5, 0, and the concatenation order is 4, 5, 0.
 
 Prerequisites:
 
 -   The dimension size of the operand on the split_dimension is divisible by
-    split_count.
+split_count.
 -   The operand's shape is not tuple.
 
 <b> `AllToAll(operand, split_dimension, concat_dimension, split_count,
@@ -93,7 +93,7 @@ AllToAll(x, /*split_dimension=*/1, /*concat_dimension=*/0, /*split_count=*/4);
 ```
 
 <div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="./images/ops_alltoall.png">
+<img style="width:100%" src="./images/ops_alltoall.png">
 </div>
 
 In this example, there are 4 cores participating the Alltoall. On each core, the
@@ -387,34 +387,34 @@ For example, let v be an array of 24 elements:
 
 ```
 let v = f32[4x2x3] {{{10, 11, 12},  {15, 16, 17}},
-                    {{20, 21, 22},  {25, 26, 27}},
-                    {{30, 31, 32},  {35, 36, 37}},
-                    {{40, 41, 42},  {45, 46, 47}}};
+{{20, 21, 22},  {25, 26, 27}},
+{{30, 31, 32},  {35, 36, 37}},
+{{40, 41, 42},  {45, 46, 47}}};
 
 // Collapse to a single dimension, leaving one dimension.
 let v012 = Collapse(v, {0,1,2});
 then v012 == f32[24] {10, 11, 12, 15, 16, 17,
-                      20, 21, 22, 25, 26, 27,
-                      30, 31, 32, 35, 36, 37,
-                      40, 41, 42, 45, 46, 47};
+20, 21, 22, 25, 26, 27,
+30, 31, 32, 35, 36, 37,
+40, 41, 42, 45, 46, 47};
 
 // Collapse the two lower dimensions, leaving two dimensions.
 let v01 = Collapse(v, {0,1});
 then v01 == f32[4x6] {{10, 11, 12, 15, 16, 17},
-                      {20, 21, 22, 25, 26, 27},
-                      {30, 31, 32, 35, 36, 37},
-                      {40, 41, 42, 45, 46, 47}};
+{20, 21, 22, 25, 26, 27},
+{30, 31, 32, 35, 36, 37},
+{40, 41, 42, 45, 46, 47}};
 
 // Collapse the two higher dimensions, leaving two dimensions.
 let v12 = Collapse(v, {1,2});
 then v12 == f32[8x3] {{10, 11, 12},
-                      {15, 16, 17},
-                      {20, 21, 22},
-                      {25, 26, 27},
-                      {30, 31, 32},
-                      {35, 36, 37},
-                      {40, 41, 42},
-                      {45, 46, 47}};
+{15, 16, 17},
+{20, 21, 22},
+{25, 26, 27},
+{30, 31, 32},
+{35, 36, 37},
+{40, 41, 42},
+{45, 46, 47}};
 
 ```
 
@@ -441,9 +441,9 @@ replicas.
 Note that there are the following restrictions on the `source_target_pair`:
 
 -   Any two pairs should not have the same target replica id, and they should
-    not have the same source replica id.
+not have the same source replica id.
 -   If a replica id is not a target in any pair, then the output on that replica
-    is a tensor consists of 0(s) with the same shape as the input.
+is a tensor consists of 0(s) with the same shape as the input.
 
 ## Concatenate
 
@@ -480,25 +480,25 @@ Concat({{2, 3}, {4, 5}, {6, 7}}, 0)
 
 ```
 let a = {
-  {1, 2},
-  {3, 4},
-  {5, 6},
+{1, 2},
+{3, 4},
+{5, 6},
 };
 let b = {
-  {7, 8},
+{7, 8},
 };
 Concat({a, b}, 0)
 >>> {
-  {1, 2},
-  {3, 4},
-  {5, 6},
-  {7, 8},
+{1, 2},
+{3, 4},
+{5, 6},
+{7, 8},
 }
 ```
 
 Diagram:
 <div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="./images/ops_concatenate.png">
+<img style="width:100%" src="./images/ops_concatenate.png">
 </div>
 
 ## Conditional
@@ -566,20 +566,20 @@ the rhs is also an input. In a neural network, these are the input activations.
 The n+2 dimensions are, in this order:
 
 *   `batch`: Each coordinate in this dimension represents an independent input
-    for which convolution is carried out.
+for which convolution is carried out.
 *   `z/depth/features`: Each (y,x) position in the base area has a vector
-    associated to it, which goes into this dimension.
+associated to it, which goes into this dimension.
 *   `spatial_dims`: Describes the `n` spatial dimensions that define the base
-    area that the window moves across.
+area that the window moves across.
 
 The `rhs` argument is a rank n+2 array describing the convolutional
 filter/kernel/window. The dimensions are, in this order:
 
 *   `output-z`: The `z` dimension of the output.
 *   `input-z`: The size of this dimension times `feature_group_count` should
-    equal the size of the `z` dimension in lhs.
+equal the size of the `z` dimension in lhs.
 *   `spatial_dims`: Describes the `n` spatial dimensions that define the n-d
-    window that moves across the base area.
+window that moves across the base area.
 
 The `window_strides` argument specifies the stride of the convolutional window
 in the spatial dimensions. For example, if the stride in the first spatial
@@ -633,7 +633,7 @@ The output shape has these dimensions, in this order:
 *   `batch`: Same size as `batch` on the input (`lhs`).
 *   `z`: Same size as `output-z` on the kernel (`rhs`).
 *   `spatial_dims`: One value for each valid placement of the convolutional
-    window.
+window.
 
 The valid placements of the convolutional window are determined by the strides
 and the size of the base area after padding.
@@ -658,15 +658,15 @@ Here is pseudo-code for a 2d convolution with padding and striding:
 
 ```
 for (b, oz, oy, ox) {  // output coordinates
-  value = 0;
-  for (iz, ky, kx) {  // kernel coordinates and input z
-    iy = oy*stride_y + ky - pad_low_y;
-    ix = ox*stride_x + kx - pad_low_x;
-    if ((iy, ix) inside the base area considered without padding) {
-      value += input(b, iz, iy, ix) * kernel(oz, iz, ky, kx);
-    }
-  }
-  output(b, oz, oy, ox) = value;
+value = 0;
+for (iz, ky, kx) {  // kernel coordinates and input z
+iy = oy*stride_y + ky - pad_low_y;
+ix = ox*stride_x + kx - pad_low_x;
+if ((iy, ix) inside the base area considered without padding) {
+value += input(b, iz, iy, ix) * kernel(oz, iz, ky, kx);
+}
+}
+output(b, oz, oy, ox) = value;
 }
 ```
 
@@ -777,19 +777,19 @@ Here is an example of an implementation of `myfunc`:
 
 ```
 extern "C" void myfunc(void* out, void** in) {
-  float (&x)[2] = *static_cast<float(*)[2]>(in[0]);
-  float (&y)[2][3] = *static_cast<float(*)[2][3]>(in[1]);
-  EXPECT_EQ(1, x[0]);
-  EXPECT_EQ(2, x[1]);
-  EXPECT_EQ(10, y[0][0]);
-  EXPECT_EQ(20, y[0][1]);
-  EXPECT_EQ(30, y[0][2]);
-  EXPECT_EQ(40, y[1][0]);
-  EXPECT_EQ(50, y[1][1]);
-  EXPECT_EQ(60, y[1][2]);
-  float (&z)[3][3] = *static_cast<float(*)[3][3]>(out);
-  z[0][0] = x[1] + y[1][0];
-  // ...
+float (&x)[2] = *static_cast<float(*)[2]>(in[0]);
+float (&y)[2][3] = *static_cast<float(*)[2][3]>(in[1]);
+EXPECT_EQ(1, x[0]);
+EXPECT_EQ(2, x[1]);
+EXPECT_EQ(10, y[0][0]);
+EXPECT_EQ(20, y[0][1]);
+EXPECT_EQ(30, y[0][2]);
+EXPECT_EQ(40, y[1][0]);
+EXPECT_EQ(50, y[1][1]);
+EXPECT_EQ(60, y[1][2]);
+float (&z)[3][3] = *static_cast<float(*)[3][3]>(out);
+z[0][0] = x[1] + y[1][0];
+// ...
 }
 ```
 
@@ -864,17 +864,17 @@ Example with contracting dimension numbers:
 
 ```
 lhs = { {1.0, 2.0, 3.0},
-        {4.0, 5.0, 6.0} }
+{4.0, 5.0, 6.0} }
 
 rhs = { {1.0, 1.0, 1.0},
-        {2.0, 2.0, 2.0} }
+{2.0, 2.0, 2.0} }
 
 DotDimensionNumbers dnums;
 dnums.add_lhs_contracting_dimensions(1);
 dnums.add_rhs_contracting_dimensions(1);
 
 DotGeneral(lhs, rhs, dnums) -> { {6.0, 12.0},
-                                 {15.0, 30.0} }
+{15.0, 30.0} }
 ```
 
 Associated batch dimension numbers from the 'lhs' and 'rhs' must have the same
@@ -886,14 +886,14 @@ Example with batch dimension numbers (batch size 2, 2x2 matrices):
 
 ```
 lhs = { { {1.0, 2.0},
-          {3.0, 4.0} },
-        { {5.0, 6.0},
-          {7.0, 8.0} } }
+{3.0, 4.0} },
+{ {5.0, 6.0},
+{7.0, 8.0} } }
 
 rhs = { { {1.0, 0.0},
-          {0.0, 1.0} },
-        { {1.0, 0.0},
-          {0.0, 1.0} } }
+{0.0, 1.0} },
+{ {1.0, 0.0},
+{0.0, 1.0} } }
 
 DotDimensionNumbers dnums;
 dnums.add_lhs_contracting_dimensions(2);
@@ -902,9 +902,9 @@ dnums.add_lhs_batch_dimensions(0);
 dnums.add_rhs_batch_dimensions(0);
 
 DotGeneral(lhs, rhs, dnums) -> { { {1.0, 2.0},
-                                   {3.0, 4.0} },
-                                 { {5.0, 6.0},
-                                   {7.0, 8.0} } }
+{3.0, 4.0} },
+{ {5.0, 6.0},
+{7.0, 8.0} } }
 ```
 
 | Input                               | Output            | Semantics        |
@@ -963,22 +963,22 @@ let a = {0.0, 1.0, 2.0, 3.0, 4.0}
 let s = {2}
 
 DynamicSlice(a, s, {2}) produces:
-  {2.0, 3.0}
+{2.0, 3.0}
 ```
 
 2-dimensional example:
 
 ```
 let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
+{ {0.0,  1.0,  2.0},
+{3.0,  4.0,  5.0},
+{6.0,  7.0,  8.0},
+{9.0, 10.0, 11.0} }
 let s = {2, 1}
 
 DynamicSlice(b, s, {2, 2}) produces:
-  { { 7.0,  8.0},
-    {10.0, 11.0} }
+{ { 7.0,  8.0},
+{10.0, 11.0} }
 ```
 ## DynamicUpdateSlice
 
@@ -1027,29 +1027,29 @@ let u = {5.0, 6.0}
 let s = {2}
 
 DynamicUpdateSlice(a, u, s) produces:
-  {0.0, 1.0, 5.0, 6.0, 4.0}
+{0.0, 1.0, 5.0, 6.0, 4.0}
 ```
 
 2-dimensional example:
 
 ```
 let b =
- { {0.0,  1.0,  2.0},
-   {3.0,  4.0,  5.0},
-   {6.0,  7.0,  8.0},
-   {9.0, 10.0, 11.0} }
+{ {0.0,  1.0,  2.0},
+{3.0,  4.0,  5.0},
+{6.0,  7.0,  8.0},
+{9.0, 10.0, 11.0} }
 let u =
- { {12.0,  13.0},
-   {14.0,  15.0},
-   {16.0,  17.0} }
+{ {12.0,  13.0},
+{14.0,  15.0},
+{16.0,  17.0} }
 
 let s = {1, 1}
 
 DynamicUpdateSlice(b, u, s) produces:
- { {0.0,  1.0,  2.0},
-   {3.0, 12.0, 13.0},
-   {6.0, 14.0, 15.0},
-   {9.0, 16.0, 17.0} }
+{ {0.0,  1.0,  2.0},
+{3.0, 12.0, 13.0},
+{6.0, 14.0, 15.0},
+{9.0, 16.0, 17.0} }
 ```
 
 ## Element-wise binary arithmetic operations
@@ -1235,42 +1235,42 @@ shape of `start_indices` to be `[6,7,1]`).
 
 The bounds for the output array along dimension `i` is computed as follows:
 
-  1. If `i` is present in `batch_dims` (i.e. is equal to `batch_dims[k]` for
-     some `k`) then we pick the corresponding dimension bounds out of
-     `start_indices.shape`, skipping `index_vector_dim` (i.e. pick
-     `start_indices.shape.dims`[`k`] if `k` < `index_vector_dim` and
-     `start_indices.shape.dims`[`k`+`1`] otherwise).
+1. If `i` is present in `batch_dims` (i.e. is equal to `batch_dims[k]` for
+some `k`) then we pick the corresponding dimension bounds out of
+`start_indices.shape`, skipping `index_vector_dim` (i.e. pick
+`start_indices.shape.dims`[`k`] if `k` < `index_vector_dim` and
+`start_indices.shape.dims`[`k`+`1`] otherwise).
 
-  2. If `i` is present in `offset_dims` (i.e. equal to `offset_dims`[`k`] for
-     some `k`) then we pick the corresponding bound out of `slice_sizes` after
-     accounting for `collapsed_slice_dims` (i.e. we pick
-     `adjusted_slice_sizes`[`k`] where `adjusted_slice_sizes` is `slice_sizes`
-     with the bounds at indices `collapsed_slice_dims` removed).
+2. If `i` is present in `offset_dims` (i.e. equal to `offset_dims`[`k`] for
+some `k`) then we pick the corresponding bound out of `slice_sizes` after
+accounting for `collapsed_slice_dims` (i.e. we pick
+`adjusted_slice_sizes`[`k`] where `adjusted_slice_sizes` is `slice_sizes`
+with the bounds at indices `collapsed_slice_dims` removed).
 
 Formally, the operand index `In` corresponding to an output index `Out` is
 computed as follows:
 
-  1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }.  Use `G` to slice out
-     vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where
-     Combine(A, b) inserts b at position `index_vector_dim` into A.  Note that
-     this is well defined even if `G` is empty -- if `G` is empty then `S` =
-     `start_indices`.
-
-  2. Create a starting index, `S`<sub>`in`</sub>, into `operand` using `S` by
-     scattering `S` using `start_index_map`.  More precisely:
-       1. `S`<sub>`in`</sub>[`start_index_map`[`k`]] = `S`[`k`] if `k` <
-          `start_index_map.size`.
-       2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
-
-  3. Create an index `O`<sub>`in`</sub> into `operand` by scattering the indices
-     at the offset dimensions in `Out` according to the `collapsed_slice_dims`
-     set.  More precisely:
-       1. `O`<sub>`in`</sub>[`expand_offset_dims`(`k`)] =
-          `Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size`
-          (`expand_offset_dims` is defined below).
-       2. `O`<sub>`in`</sub>[`_`] = `0` otherwise.
-  4. `In` is `O`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
-     addition.
+1. Let `G` = { `Out`[`k`] for `k` in `batch_dims` }.  Use `G` to slice out
+vector `S` such that `S`[`i`] = `start_indices`[Combine(`G`, `i`)] where
+Combine(A, b) inserts b at position `index_vector_dim` into A.  Note that
+this is well defined even if `G` is empty -- if `G` is empty then `S` =
+`start_indices`.
+
+2. Create a starting index, `S`<sub>`in`</sub>, into `operand` using `S` by
+scattering `S` using `start_index_map`.  More precisely:
+1. `S`<sub>`in`</sub>[`start_index_map`[`k`]] = `S`[`k`] if `k` <
+`start_index_map.size`.
+2. `S`<sub>`in`</sub>[`_`] = `0` otherwise.
+
+3. Create an index `O`<sub>`in`</sub> into `operand` by scattering the indices
+at the offset dimensions in `Out` according to the `collapsed_slice_dims`
+set.  More precisely:
+1. `O`<sub>`in`</sub>[`expand_offset_dims`(`k`)] =
+`Out`[`offset_dims`[`k`]] if `k` < `offset_dims.size`
+(`expand_offset_dims` is defined below).
+2. `O`<sub>`in`</sub>[`_`] = `0` otherwise.
+4. `In` is `O`<sub>`in`</sub> + `S`<sub>`in`</sub> where + is element-wise
+addition.
 
 `expand_offset_dims` is the monotonic function with domain [`0`, `offset.size`)
 and range [`0`, `operand.rank`) \ `collapsed_slice_dims`.  So if, e.g.,
@@ -1282,21 +1282,21 @@ and range [`0`, `operand.rank`) \ `collapsed_slice_dims`.  So if, e.g.,
 Informally, every index `Out` in the output array corresponds to an element `E`
 in the operand array, computed as follows:
 
-  - We use the batch dimensions in `Out` to look up a starting index from
-    `start_indices`.
+- We use the batch dimensions in `Out` to look up a starting index from
+`start_indices`.
 
-  - We use `start_index_map` to map the starting index (which may have size less
-    than operand.rank) to a "full" starting index into operand.
+- We use `start_index_map` to map the starting index (which may have size less
+than operand.rank) to a "full" starting index into operand.
 
-  - We dynamic-slice out a slice with size `slice_sizes` using the full starting
-    index.
+- We dynamic-slice out a slice with size `slice_sizes` using the full starting
+index.
 
-  - We reshape the slice by collapsing the `collapsed_slice_dims` dimensions.
-    Since all collapsed slice dimensions have to have bound 1 this reshape is
-    always legal.
+- We reshape the slice by collapsing the `collapsed_slice_dims` dimensions.
+Since all collapsed slice dimensions have to have bound 1 this reshape is
+always legal.
 
-  - We use the offset dimensions in `Out` to index into this slice to get the
-    input element, `E`, corresponding to output index `Out`.
+- We use the offset dimensions in `Out` to index into this slice to get the
+input element, `E`, corresponding to output index `Out`.
 
 `index_vector_dim` is set to `start_indices.rank` - `1` in all of the
 examples that follow.  More interesting values for `index_vector_dim` does not
@@ -1315,7 +1315,7 @@ the output shape, and maps it to an element in the input array in the following
 way:
 
 <div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="./images/ops_xla_gather_0.svg">
+<img style="width:100%" src="./images/ops_xla_gather_0.svg">
 </div>
 
 We first select an (`X`,`Y`) vector from the gather indices array using `G`.
@@ -1334,7 +1334,7 @@ version of the example above using a "gather indices" array of shape `[4,5,2]`
 would translate indices like this:
 
 <div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="./images/ops_xla_gather_1.svg">
+<img style="width:100%" src="./images/ops_xla_gather_1.svg">
 </div>
 
 Again, this acts as a batch dynamic slice `G`<sub>`0`</sub> and
@@ -1343,27 +1343,27 @@ Again, this acts as a batch dynamic slice `G`<sub>`0`</sub> and
 The gather operation in XLA generalizes the informal semantics outlined above in
 the following ways:
 
- 1. We can configure which dimensions in the output shape are the offset
-    dimensions (dimensions containing `O`<sub>`0`</sub>, `O`<sub>`1`</sub> in
-    the last example).  The output batch dimensions (dimensions containing
-    `G`<sub>`0`</sub>, `G`<sub>`1`</sub> in the last example) are defined to be
-    the output dimensions that are not offset dimensions.
+1. We can configure which dimensions in the output shape are the offset
+dimensions (dimensions containing `O`<sub>`0`</sub>, `O`<sub>`1`</sub> in
+the last example).  The output batch dimensions (dimensions containing
+`G`<sub>`0`</sub>, `G`<sub>`1`</sub> in the last example) are defined to be
+the output dimensions that are not offset dimensions.
 
- 2. The number of output offset dimensions explicitly present in the output
-    shape may be smaller than the input rank.  These "missing" dimensions, which
-    are listed explicitly as `collapsed_slice_dims`, must have a slice size of
-    `1`.  Since they have a slice size of `1` the only valid index for them is
-    `0` and eliding them does not introduce ambiguity.
+2. The number of output offset dimensions explicitly present in the output
+shape may be smaller than the input rank.  These "missing" dimensions, which
+are listed explicitly as `collapsed_slice_dims`, must have a slice size of
+`1`.  Since they have a slice size of `1` the only valid index for them is
+`0` and eliding them does not introduce ambiguity.
 
- 3. The slice extracted from the "Gather Indices" array ((`X`, `Y`) in the last
-    example) may have fewer elements than the input array rank, and an explicit
-    mapping dictates how the index should be expanded to have the same rank as
-    the input.
+3. The slice extracted from the "Gather Indices" array ((`X`, `Y`) in the last
+example) may have fewer elements than the input array rank, and an explicit
+mapping dictates how the index should be expanded to have the same rank as
+the input.
 
 As a final example, we use (2) and (3) to implement `tf.gather_nd`:
 
 <div style="width:95%; margin:auto; margin-bottom:10px; margin-top:20px;">
-  <img style="width:100%" src="./images/ops_xla_gather_2.svg">
+<img style="width:100%" src="./images/ops_xla_gather_2.svg">
 </div>
 
 `G`<sub>`0`</sub> and `G`<sub>`1`</sub> are used to slice out a starting index
@@ -1442,11 +1442,11 @@ dependency between the while loops.
 
 ```
 result1 = while (condition, init = init_value) {
-  Infeed(shape)
+Infeed(shape)
 }
 
 result2 = while (condition, init = result1) {
-  Infeed(shape)
+Infeed(shape)
 }
 ```
 
@@ -1464,7 +1464,9 @@ Infeed of the device.
 
 Builds a constant literal on device rather than a potentially large host
 transfer. Creates a rank 1 array of values starting at zero and incrementing by
-one.
+one. For floating-point types, the produced array is equivalent to
+`ConvertElementType(Iota(...))` where the `Iota` is of integral type and the
+conversion is to the floating-point type.
 
 Arguments        | Type            | Semantics
 ---------------- | --------------- | ------------------------------------
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index b87fc3e340..cd79117cbe 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -105,6 +105,12 @@ bool SafeLess(const NativeT& a, const NativeT& b) {
 template <typename ReturnT, typename ElementwiseT = ReturnT>
 class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
  private:
+  Status UnsupportedTypeError(HloInstruction* instruction) {
+    return InvalidArgument(
+        "Unsupported type for %s: %s", HloOpcodeString(instruction->opcode()),
+        PrimitiveType_Name(instruction->shape().element_type()));
+  }
+
   // Get the value in the given literal static_cast as a double.
   template <
       typename NativeT,
@@ -224,7 +230,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleRound(HloInstruction* round) {
-    return InvalidArgument("Unsupported type for Round");
+    return UnsupportedTypeError(round);
   }
 
   Status HandleRound(HloInstruction* round) override {
@@ -246,7 +252,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleCeil(HloInstruction* ceil) {
-    return InvalidArgument("Unsupported type for Ceil");
+    return UnsupportedTypeError(ceil);
   }
 
   Status HandleCeil(HloInstruction* ceil) override {
@@ -297,8 +303,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   template <
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
-  Status HandleExpm1(HloInstruction* floor) {
-    return InvalidArgument("Unsupported type for Expm1");
+  Status HandleExpm1(HloInstruction* expm1) {
+    return UnsupportedTypeError(expm1);
   }
 
   Status HandleExpm1(HloInstruction* floor) override {
@@ -321,7 +327,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleFloor(HloInstruction* floor) {
-    return InvalidArgument("Unsupported type for Floor");
+    return UnsupportedTypeError(floor);
   }
 
   Status HandleFloor(HloInstruction* floor) override {
@@ -351,12 +357,12 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   template <
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
-  Status HandleLog1p(HloInstruction* floor) {
-    return InvalidArgument("Unsupported type for Log1p");
+  Status HandleLog1p(HloInstruction* log1p) {
+    return UnsupportedTypeError(log1p);
   }
 
-  Status HandleLog1p(HloInstruction* floor) override {
-    return HandleLog1p<ReturnT>(floor);
+  Status HandleLog1p(HloInstruction* log1p) override {
+    return HandleLog1p<ReturnT>(log1p);
   }
 
   template <typename NativeT,
@@ -396,7 +402,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleNot(HloInstruction* not_) {
-    return InvalidArgument("Unsupported type for Not");
+    return UnsupportedTypeError(not_);
   }
 
   Status HandleNot(HloInstruction* not_) override {
@@ -476,7 +482,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   template <typename NativeT, typename std::enable_if<!std::is_floating_point<
                                   NativeT>::value>::type* = nullptr>
   Status HandleAtan2(HloInstruction* atan2) {
-    return InvalidArgument("Unsupported type for Atan2");
+    return UnsupportedTypeError(atan2);
   }
 
   Status HandleAtan2(HloInstruction* atan2) override {
@@ -624,7 +630,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleMaximum(HloInstruction* maximum) {
-    return InvalidArgument("Unsupported type for Maximum");
+    return UnsupportedTypeError(maximum);
   }
 
   Status HandleMaximum(HloInstruction* maximum) override {
@@ -659,7 +665,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleMinimum(HloInstruction* minimum) {
-    return InvalidArgument("Unsupported type for Minimum");
+    return UnsupportedTypeError(minimum);
   }
 
   Status HandleMinimum(HloInstruction* minimum) override {
@@ -724,7 +730,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleRemainder(HloInstruction* remainder) {
-    return InvalidArgument("Unsupported type for Remainder");
+    return UnsupportedTypeError(remainder);
   }
 
   Status HandleRemainder(HloInstruction* remainder) override {
@@ -746,14 +752,14 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   template <typename NativeT, typename std::enable_if<std::is_floating_point<
                                   NativeT>::value>::type* = nullptr>
   Status HandleAnd(HloInstruction* and_) {
-    return InvalidArgument("Unsupported type for And");
+    return UnsupportedTypeError(and_);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleAnd(HloInstruction* and_) {
-    return InvalidArgument("Unsupported type for And");
+    return UnsupportedTypeError(and_);
   }
 
   Status HandleAnd(HloInstruction* and_) override {
@@ -775,7 +781,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   template <typename NativeT, typename std::enable_if<std::is_floating_point<
                                   NativeT>::value>::type* = nullptr>
   Status HandleOr(HloInstruction* or_) {
-    return InvalidArgument("Unsupported type for Or");
+    return UnsupportedTypeError(or_);
   }
 
   template <
@@ -804,14 +810,14 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   template <typename NativeT, typename std::enable_if<std::is_floating_point<
                                   NativeT>::value>::type* = nullptr>
   Status HandleXor(HloInstruction* xor_) {
-    return InvalidArgument("Unsupported type for Xor");
+    return UnsupportedTypeError(xor_);
   }
 
   template <
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleXor(HloInstruction* xor_) {
-    return InvalidArgument("Unsupported type for Xor");
+    return UnsupportedTypeError(xor_);
   }
 
   Status HandleXor(HloInstruction* xor_) override {
@@ -836,8 +842,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
             typename std::enable_if<!std::is_integral<NativeT>::value ||
                                     std::is_same<NativeT, bool>::value>::type* =
                 nullptr>
-  Status HandleShiftLeft(HloInstruction*) {
-    return InvalidArgument("Unsupported type for ShiftLeft");
+  Status HandleShiftLeft(HloInstruction* shift) {
+    return UnsupportedTypeError(shift);
   }
 
   Status HandleShiftLeft(HloInstruction* shl) override {
@@ -866,8 +872,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
             typename std::enable_if<!std::is_integral<NativeT>::value ||
                                     std::is_same<NativeT, bool>::value>::type* =
                 nullptr>
-  Status HandleShiftRightArithmetic(HloInstruction*) {
-    return InvalidArgument("Unsupported type for ShiftRightArithmetic");
+  Status HandleShiftRightArithmetic(HloInstruction* shift) {
+    return UnsupportedTypeError(shift);
   }
 
   Status HandleShiftRightArithmetic(HloInstruction* shra) override {
@@ -897,8 +903,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
             typename std::enable_if<!std::is_integral<NativeT>::value ||
                                     std::is_same<NativeT, bool>::value>::type* =
                 nullptr>
-  Status HandleShiftRightLogical(HloInstruction*) {
-    return InvalidArgument("Unsupported type for ShiftRightLogical");
+  Status HandleShiftRightLogical(HloInstruction* shift) {
+    return UnsupportedTypeError(shift);
   }
 
   Status HandleShiftRightLogical(HloInstruction* shrl) override {
@@ -923,8 +929,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   template <
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
-  Status HandleClamp(HloInstruction*) {
-    return InvalidArgument("Unsupported type for Clamp");
+  Status HandleClamp(HloInstruction* clamp) {
+    return UnsupportedTypeError(clamp);
   }
 
   Status HandleClamp(HloInstruction* clamp) override {
@@ -1578,7 +1584,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
                                     std::is_same<NativeT, bool>::value>::type* =
                 nullptr>
   Status HandleSort(HloInstruction* sort) {
-    return InvalidArgument("Unsupported type for Sort");
+    return UnsupportedTypeError(sort);
   }
 
   Status HandleSort(HloInstruction* sort) override {
@@ -2357,7 +2363,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
             std::is_same<NativeT, int64>::value ||
             std::is_same<NativeT, uint64>::value)>::type* = nullptr>
   Status HandleClz(HloInstruction* clz) {
-    return InvalidArgument("Unsupported type for Clz");
+    return UnsupportedTypeError(clz);
   }
 
   template <typename NativeT,
@@ -2403,7 +2409,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<std::is_integral<NativeT>::value ||
                               is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleSin(HloInstruction* sin) {
-    return InvalidArgument("Unsupported type for Sin");
+    return UnsupportedTypeError(sin);
   }
 
   Status HandleSin(HloInstruction* sin) override {
@@ -2425,7 +2431,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<std::is_integral<NativeT>::value ||
                               is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleCos(HloInstruction* cos) {
-    return InvalidArgument("Unsupported type for Cos");
+    return UnsupportedTypeError(cos);
   }
 
   Status HandleCos(HloInstruction* cos) override {
@@ -2534,7 +2540,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename std::enable_if<std::is_integral<NativeT>::value ||
                               is_complex_t<NativeT>::value>::type* = nullptr>
   Status HandleReducePrecision(HloInstruction* reduce_precision) {
-    return InvalidArgument("Unsupported type for reduce precision");
+    return UnsupportedTypeError(reduce_precision);
   }
 
   Status HandleReducePrecision(HloInstruction* reduce_precision) override {
@@ -2543,15 +2549,27 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
   template <typename NativeT,
             typename std::enable_if<
+                std::is_same<NativeT, bfloat16>::value ||
+                std::is_same<NativeT, Eigen::half>::value ||
                 std::is_integral<NativeT>::value ||
                 std::is_floating_point<NativeT>::value>::type* = nullptr>
   Status HandleIota(HloInstruction* instruction) {
     auto* iota = Cast<HloIotaInstruction>(instruction);
+    const int64 iota_size = iota->shape().dimensions(iota->iota_dimension());
     // Avoid using std::vector since std::vector<bool> does not convert to
     // absl::Span<bool>.
-    absl::InlinedVector<NativeT, 1> data(
-        iota->shape().dimensions(iota->iota_dimension()));
-    std::iota(data.begin(), data.end(), 0);
+    absl::InlinedVector<NativeT, 1> data(iota_size);
+    // We don't use std::iota for two reasons:
+    //
+    // (1) std:iota does not support bfloat16 and float16.
+    //
+    // (2) std::iota saturates for floating point types when the value is not
+    //     representable, but the definition of HLO iota is the value as a
+    //     64-bit integer cast to the native type.
+    for (int64 i = 0; i < iota_size; ++i) {
+      // static_cast is required for Eigen::half (F16).
+      data[i] = static_cast<NativeT>(i);
+    }
     auto result = LiteralUtil::CreateR1<NativeT>(data);
 
     if (ShapeUtil::Rank(iota->shape()) > 1) {
@@ -2567,10 +2585,12 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   }
   template <typename NativeT,
             typename std::enable_if<
-                !(std::is_integral<NativeT>::value ||
+                !(std::is_same<NativeT, bfloat16>::value ||
+                  std::is_same<NativeT, Eigen::half>::value ||
+                  std::is_integral<NativeT>::value ||
                   std::is_floating_point<NativeT>::value)>::type* = nullptr>
   Status HandleIota(HloInstruction* iota) {
-    return InvalidArgument("Unsupported type for iota");
+    return UnsupportedTypeError(iota);
   }
   Status HandleIota(HloInstruction* iota) override {
     return HandleIota<ReturnT>(iota);
diff --git a/tensorflow/compiler/xla/tests/iota_test.cc b/tensorflow/compiler/xla/tests/iota_test.cc
index 65205f53dd..37b2c635ee 100644
--- a/tensorflow/compiler/xla/tests/iota_test.cc
+++ b/tensorflow/compiler/xla/tests/iota_test.cc
@@ -80,7 +80,7 @@ TEST_P(IotaR2Test, DoIt) {
 }
 
 INSTANTIATE_TEST_CASE_P(IotaR2TestInstantiation, IotaR2Test,
-                        ::testing::Combine(::testing::Values(F32, S32),
+                        ::testing::Combine(::testing::Values(F32, S32, BF16),
                                            ::testing::Range(/*start=*/10,
                                                             /*end=*/1001,
                                                             /*step=*/10),
-- 
GitLab


From 5741f4b94090a33b01875c2ae42c42644fe4b46d Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Tue, 11 Dec 2018 10:36:04 -0800
Subject: [PATCH 354/873] Fix GRU cell breakage when reset_after=True in eager
 mode.

Also added unit test to cover that.

PiperOrigin-RevId: 225028823
---
 tensorflow/python/keras/layers/gru_test.py  | 23 ++++++++++++++++
 tensorflow/python/keras/layers/recurrent.py | 30 ++++++++++-----------
 2 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/keras/layers/gru_test.py b/tensorflow/python/keras/layers/gru_test.py
index 9988c9fae5..1b2881a26b 100644
--- a/tensorflow/python/keras/layers/gru_test.py
+++ b/tensorflow/python/keras/layers/gru_test.py
@@ -81,6 +81,29 @@ class GRULayerTest(test.TestCase):
                   'implementation': mode},
           input_shape=(num_samples, timesteps, embedding_dim))
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_reset_after_GRU(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+
+    (x_train, y_train), _ = testing_utils.get_test_data(
+        train_samples=num_samples,
+        test_samples=0,
+        input_shape=(timesteps, embedding_dim),
+        num_classes=units)
+    y_train = keras.utils.to_categorical(y_train, units)
+
+    inputs = keras.layers.Input(shape=[timesteps, embedding_dim])
+    gru_layer = keras.layers.GRU(units,
+                                 reset_after=True)
+    output = gru_layer(inputs)
+    gru_model = keras.models.Model(inputs, output)
+    gru_model.compile('rmsprop', 'mse')
+    gru_model.fit(x_train, y_train)
+    gru_model.predict(x_train)
+
   def test_statefulness_GRU(self):
     num_samples = 2
     timesteps = 3
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index a39db7e8b1..1c6f2bd3f8 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -1497,12 +1497,6 @@ class GRUCell(Layer):
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)
-      if not self.reset_after:
-        self.input_bias, self.recurrent_bias = self.bias, None
-      else:
-        self.input_bias = K.flatten(self.bias[0])
-        self.recurrent_bias = K.flatten(self.bias[1])
-
     else:
       self.bias = None
     self.built = True
@@ -1529,6 +1523,12 @@ class GRUCell(Layer):
     # dropout matrices for recurrent units
     rec_dp_mask = self._recurrent_dropout_mask
 
+    if self.use_bias:
+      if not self.reset_after:
+        input_bias, recurrent_bias = self.bias, None
+      else:
+        input_bias, recurrent_bias = array_ops.unstack(self.bias)
+
     if self.implementation == 1:
       if 0. < self.dropout < 1.:
         inputs_z = inputs * dp_mask[0]
@@ -1544,9 +1544,9 @@ class GRUCell(Layer):
       x_h = K.dot(inputs_h, self.kernel[:, self.units * 2:])
 
       if self.use_bias:
-        x_z = K.bias_add(x_z, self.input_bias[:self.units])
-        x_r = K.bias_add(x_r, self.input_bias[self.units: self.units * 2])
-        x_h = K.bias_add(x_h, self.input_bias[self.units * 2:])
+        x_z = K.bias_add(x_z, input_bias[:self.units])
+        x_r = K.bias_add(x_r, input_bias[self.units: self.units * 2])
+        x_h = K.bias_add(x_h, input_bias[self.units * 2:])
 
       if 0. < self.recurrent_dropout < 1.:
         h_tm1_z = h_tm1 * rec_dp_mask[0]
@@ -1561,10 +1561,9 @@ class GRUCell(Layer):
       recurrent_r = K.dot(h_tm1_r,
                           self.recurrent_kernel[:, self.units:self.units * 2])
       if self.reset_after and self.use_bias:
-        recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias[:self.units])
+        recurrent_z = K.bias_add(recurrent_z, recurrent_bias[:self.units])
         recurrent_r = K.bias_add(recurrent_r,
-                                 self.recurrent_bias[self.units:
-                                                     self.units * 2])
+                                 recurrent_bias[self.units:self.units * 2])
 
       z = self.recurrent_activation(x_z + recurrent_z)
       r = self.recurrent_activation(x_r + recurrent_r)
@@ -1573,8 +1572,7 @@ class GRUCell(Layer):
       if self.reset_after:
         recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel[:, self.units * 2:])
         if self.use_bias:
-          recurrent_h = K.bias_add(recurrent_h,
-                                   self.recurrent_bias[self.units * 2:])
+          recurrent_h = K.bias_add(recurrent_h, recurrent_bias[self.units * 2:])
         recurrent_h = r * recurrent_h
       else:
         recurrent_h = K.dot(r * h_tm1_h,
@@ -1589,7 +1587,7 @@ class GRUCell(Layer):
       matrix_x = K.dot(inputs, self.kernel)
       if self.use_bias:
         # biases: bias_z_i, bias_r_i, bias_h_i
-        matrix_x = K.bias_add(matrix_x, self.input_bias)
+        matrix_x = K.bias_add(matrix_x, input_bias)
 
       x_z = matrix_x[:, :self.units]
       x_r = matrix_x[:, self.units: 2 * self.units]
@@ -1602,7 +1600,7 @@ class GRUCell(Layer):
         # hidden state projected by all gate matrices at once
         matrix_inner = K.dot(h_tm1, self.recurrent_kernel)
         if self.use_bias:
-          matrix_inner = K.bias_add(matrix_inner, self.recurrent_bias)
+          matrix_inner = K.bias_add(matrix_inner, recurrent_bias)
       else:
         # hidden state projected separately for update/reset and new
         matrix_inner = K.dot(h_tm1, self.recurrent_kernel[:, :2 * self.units])
-- 
GitLab


From 90a840fbcb0d5db6049de261061c48061d345678 Mon Sep 17 00:00:00 2001
From: Peter Buchlovsky <petebu@google.com>
Date: Tue, 11 Dec 2018 10:37:05 -0800
Subject: [PATCH 355/873] Add the run function from the revised Distribution
 Strategy proposal.

PiperOrigin-RevId: 225028975
---
 .../python/distribute/distribute_lib.py       | 36 +++++++++++++++++++
 ...orflow.distribute.-mirrored-strategy.pbtxt |  4 +++
 .../v1/tensorflow.distribute.-strategy.pbtxt  |  4 +++
 ...orflow.distribute.-mirrored-strategy.pbtxt |  4 +++
 .../v2/tensorflow.distribute.-strategy.pbtxt  |  4 +++
 5 files changed, 52 insertions(+)

diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index 87bf510ec5..60bb75ded0 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -422,6 +422,42 @@ class DistributionStrategy(object):
     return self.extended._make_input_fn_iterator(  # pylint: disable=protected-access
         input_fn, replication_mode=replication_mode)
 
+  def experimental_run(self, fn, input_iterator=None):
+    """Runs ops in `fn` on each replica, with inputs from `input_iterator`.
+
+    When eager execution is enabled, executes ops specified by `fn` on each
+    replica.  Otherwise, builds a graph to execute the ops on each replica.
+
+    Each replica will take a single, different input from the inputs provided by
+    one `get_next` call on the input iterator.
+
+    `fn` may call `tf.distribute.get_replica_context()` to access members such
+    as `replica_id_in_sync_group`.
+
+    IMPORTANT: Depending on the `DistributionStrategy` being used, and whether
+    eager execution is enabled, `fn` may be called one or more times (once for
+    each replica).
+
+    Args:
+      fn: function to run. The inputs to the function must match the outputs of
+        `input_iterator.get_next()`. The output must be a `tf.nest` of
+        `Tensor`s.
+      input_iterator: (Optional) input iterator from which the inputs are taken.
+
+    Returns:
+      Merged return value of `fn` across replicas. The structure of the return
+      value is the same as the return value from `fn`. Each element in the
+      structure can either be `PerReplica` (if the values are unsynchronized),
+      `Mirrored` (if the values are kept in sync), or `Tensor` (if running on a
+      single replica).
+    """
+    with self.scope():
+      if input_iterator is None:
+        return self._extended.call_for_each_replica(fn)
+      else:
+        inputs = input_iterator.get_next()
+        return self._extended.call_for_each_replica(fn, args=(inputs,))
+
   @doc_controls.do_not_generate_docs  # DEPRECATED, moving to `extended`
   def broadcast(self, tensor, destinations=None):
     """DEPRECATED: use extended.broadcast_to() instead."""
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt
index a613e2d3d1..81224f00a4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt
@@ -75,6 +75,10 @@ tf_class {
     name: "experimental_initialize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "experimental_run"
+    argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "finalize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt
index 9eb73d2c0d..63b6584caf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt
@@ -74,6 +74,10 @@ tf_class {
     name: "experimental_initialize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "experimental_run"
+    argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "finalize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
index a613e2d3d1..81224f00a4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
@@ -75,6 +75,10 @@ tf_class {
     name: "experimental_initialize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "experimental_run"
+    argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "finalize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
index 9eb73d2c0d..63b6584caf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
@@ -74,6 +74,10 @@ tf_class {
     name: "experimental_initialize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "experimental_run"
+    argspec: "args=[\'self\', \'fn\', \'input_iterator\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "finalize"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-- 
GitLab


From bd312687ad05ad36b5ed0589b0303df848bea266 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Tue, 11 Dec 2018 10:52:07 -0800
Subject: [PATCH 356/873] Remove unneeded import

---
 tensorflow/contrib/tensorrt/test/quantization_mnist_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py b/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py
index b96d965bad..e7d6ec4ad3 100644
--- a/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py
+++ b/tensorflow/contrib/tensorrt/test/quantization_mnist_test.py
@@ -24,7 +24,6 @@ from tensorflow.contrib.tensorrt.python.ops import trt_engine_op
 # pylint: enable=unused-import
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import data
-#from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python import keras
 from tensorflow.python.estimator.estimator import Estimator
 from tensorflow.python.estimator.model_fn import EstimatorSpec
-- 
GitLab


From 10aba412d7db31ac7bce1e46c967c979b5d85ca1 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Tue, 11 Dec 2018 10:58:18 -0800
Subject: [PATCH 357/873] Fix clang-format

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 32 ++++++++++---------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 18e8599a01..5fe284c042 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -1539,9 +1539,9 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) {
         node_def.name());
   }
   if (inputs.at(1).is_tensor()) {
-    return tensorflow::errors::Unimplemented(
-        "Kernel for ", node_def.op(), " must be constant weights, at ",
-        node_def.name());
+    return tensorflow::errors::Unimplemented("Kernel for ", node_def.op(),
+                                             " must be constant weights, at ",
+                                             node_def.name());
   }
   TRT_ShapedWeights weights_rsck = inputs.at(1).weights();
   VLOG(2) << "weight shape: " << weights_rsck.DebugString();
@@ -1658,7 +1658,7 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params,
     case ConvolutionType::DEPTHWISE_CONV:
       return ConvertConv2DHelper(params, 0);
   }
-  return tensorflow::errors::Unimplemented("unsupported convolution type at, " +
+  return tensorflow::errors::Unimplemented("Unsupported convolution type, at ",
                                            params->node_def.name());
 }
 
@@ -2050,16 +2050,14 @@ tensorflow::Status ConvertPool(OpConverterParams* params) {
   } else if (node_def.op() == "AvgPool") {
     type = nvinfer1::PoolingType::kAVERAGE;
   } else {
-    return tensorflow::errors::Unimplemented("Unsupported pooling type: ",
-                                             node_def.op(), ", at ",
-                                             node_def.name());
+    return tensorflow::errors::Unimplemented(
+        "Unsupported pooling type: ", node_def.op(), ", at ", node_def.name());
   }
   TFAttrs attrs(node_def);
   const string padding_type = attrs.get<string>("padding");
   if ((padding_type != "SAME") && (padding_type != "VALID")) {
-    return tensorflow::errors::Unimplemented("Unsupported padding type: ",
-                                             padding_type, ", at ",
-                                             node_def.name());
+    return tensorflow::errors::Unimplemented(
+        "Unsupported padding type: ", padding_type, ", at ", node_def.name());
   }
   if (params->validation_only) return Status::OK();
 
@@ -2988,20 +2986,24 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) {
   bool is_training = attrs.get<bool>("is_training");
   if (is_training) {
     return tensorflow::errors::Unimplemented(
-        node_def.op(), " only supports is_training=false. If you are using "
+        node_def.op(),
+        " only supports is_training=false. If you are using "
         "Keras, please use keras.backend.set_learning_phase(0). At ",
         node_def.name());
   }
   if (inputs.at(0).is_weights()) {
     return tensorflow::errors::Unimplemented(
-        node_def.op(), " is only implemented for tensor inputs, not weights, "
-        "at ", node_def.name());
+        node_def.op(),
+        " is only implemented for tensor inputs, not weights, at ",
+        node_def.name());
   }
   for (int i = 1; i < 5; i++) {
     if (inputs.at(i).is_tensor()) {
       return tensorflow::errors::Unimplemented(
-          node_def.op(), " must have constant inputs for scale, offset, mean "
-          "and variance, at ", node_def.name());
+          node_def.op(),
+          " must have constant inputs for scale, offset, mean and variance, "
+          "at ",
+           node_def.name());
     }
   }
   nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
-- 
GitLab


From c99ecfa992eaa09a799e841fcdcfadd60b98f0c2 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Tue, 11 Dec 2018 11:01:27 -0800
Subject: [PATCH 358/873] [XLA] Split out HloDynamicUpdateSliceInstruction

This doesn't have any benefit in terms of sizeof(HloInstruction), but it's awkward to have a sublcass for DS and not DUS. Also adds an intermediate class in the hierarchy that avoids having to hard-code the index operand's number.

PiperOrigin-RevId: 225033893
---
 .../compiler/xla/service/hlo_instruction.cc   |  8 ++-----
 .../compiler/xla/service/hlo_instructions.cc  | 11 +++++++++-
 .../compiler/xla/service/hlo_instructions.h   | 21 ++++++++++++++++++-
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 21b1dbc167..5c1f1a61cc 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -914,12 +914,8 @@ HloInstruction::CreateDynamicUpdateSlice(const Shape& shape,
                                          HloInstruction* operand,
                                          HloInstruction* update,
                                          HloInstruction* start_indices) {
-  auto instruction = absl::WrapUnique(
-      new HloInstruction(HloOpcode::kDynamicUpdateSlice, shape));
-  instruction->AppendOperand(operand);
-  instruction->AppendOperand(update);
-  instruction->AppendOperand(start_indices);
-  return instruction;
+  return absl::make_unique<HloDynamicUpdateSliceInstruction>(
+      shape, operand, update, start_indices);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateConcatenate(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 1ea02cf9c0..2fe6395efe 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1994,12 +1994,21 @@ std::unique_ptr<HloInstruction> HloPadInstruction::CloneWithNewOperandsImpl(
 HloDynamicSliceInstruction::HloDynamicSliceInstruction(
     const Shape& shape, HloInstruction* operand, HloInstruction* start_indices,
     absl::Span<const int64> slice_sizes)
-    : HloInstruction(HloOpcode::kDynamicSlice, shape),
+    : HloDynamicIndexInstruction(HloOpcode::kDynamicSlice, shape),
       dynamic_slice_sizes_(slice_sizes.begin(), slice_sizes.end()) {
   AppendOperand(operand);
   AppendOperand(start_indices);
 }
 
+HloDynamicUpdateSliceInstruction::HloDynamicUpdateSliceInstruction(
+    const Shape& shape, HloInstruction* operand, HloInstruction* update,
+    HloInstruction* start_indices)
+    : HloDynamicIndexInstruction(HloOpcode::kDynamicUpdateSlice, shape) {
+  AppendOperand(operand);
+  AppendOperand(update);
+  AppendOperand(start_indices);
+}
+
 HloInstructionProto HloDynamicSliceInstruction::ToProto() const {
   HloInstructionProto proto = HloInstruction::ToProto();
   for (int64 slice_size : dynamic_slice_sizes_) {
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index b5c28137a1..5420d4ce11 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -1171,7 +1171,14 @@ class HloPadInstruction : public HloInstruction {
   PaddingConfig padding_config_;
 };
 
-class HloDynamicSliceInstruction : public HloInstruction {
+class HloDynamicIndexInstruction : public HloInstruction {
+ public:
+  explicit HloDynamicIndexInstruction(HloOpcode opcode, const Shape& shape)
+      : HloInstruction(opcode, shape) {}
+  virtual int64 index_operand_number() const = 0;
+};
+
+class HloDynamicSliceInstruction : public HloDynamicIndexInstruction {
  public:
   explicit HloDynamicSliceInstruction(const Shape& shape,
                                       HloInstruction* operand,
@@ -1189,6 +1196,8 @@ class HloDynamicSliceInstruction : public HloInstruction {
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
+  int64 index_operand_number() const override { return 1; }
+
  private:
   std::vector<string> ExtraAttributesToStringImpl(
       const HloPrintOptions& options) const override;
@@ -1206,6 +1215,16 @@ class HloDynamicSliceInstruction : public HloInstruction {
   std::vector<int64> dynamic_slice_sizes_;
 };
 
+class HloDynamicUpdateSliceInstruction : public HloDynamicIndexInstruction {
+ public:
+  explicit HloDynamicUpdateSliceInstruction(const Shape& shape,
+                                            HloInstruction* operand,
+                                            HloInstruction* update,
+                                            HloInstruction* start_indices);
+
+  int64 index_operand_number() const override { return 2; }
+};
+
 class HloGatherInstruction : public HloInstruction {
  public:
   explicit HloGatherInstruction(
-- 
GitLab


From 9b964193d9e9cc2b082f634010102b320daf70e2 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 11 Dec 2018 11:15:58 -0800
Subject: [PATCH 359/873] [XLA] [TF:XLA] Move Cholesky decomposition into
 xla/client/lib/cholesky.*

Move loop helpers used by Cholesky decomposition into xla/client/lib/loops.*.

PiperOrigin-RevId: 225037112
---
 tensorflow/compiler/tf2xla/kernels/BUILD      |  13 +-
 .../compiler/tf2xla/kernels/cholesky_op.cc    |   4 +-
 .../compiler/tf2xla/kernels/gather_op.cc      |   1 -
 .../compiler/tf2xla/kernels/image_ops.cc      |   8 +-
 .../compiler/tf2xla/kernels/random_ops.cc     |   6 +-
 tensorflow/compiler/tf2xla/lib/BUILD          |  42 +----
 tensorflow/compiler/tf2xla/lib/qr.cc          |  10 +-
 tensorflow/compiler/tf2xla/lib/scatter.cc     |   1 -
 tensorflow/compiler/xla/client/lib/BUILD      |  63 ++++++-
 .../{tf2xla => xla/client}/lib/cholesky.cc    |  98 +++++------
 .../{tf2xla => xla/client}/lib/cholesky.h     |  10 +-
 .../compiler/xla/client/lib/cholesky_test.cc  | 166 ++++++++++++++++++
 .../while_loop.cc => xla/client/lib/loops.cc} |  90 +++++-----
 .../while_loop.h => xla/client/lib/loops.h}   |  43 +++--
 14 files changed, 361 insertions(+), 194 deletions(-)
 rename tensorflow/compiler/{tf2xla => xla/client}/lib/cholesky.cc (68%)
 rename tensorflow/compiler/{tf2xla => xla/client}/lib/cholesky.h (87%)
 create mode 100644 tensorflow/compiler/xla/client/lib/cholesky_test.cc
 rename tensorflow/compiler/{tf2xla/lib/while_loop.cc => xla/client/lib/loops.cc} (50%)
 rename tensorflow/compiler/{tf2xla/lib/while_loop.h => xla/client/lib/loops.h} (62%)

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 8bc3292296..901b97736b 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -1,16 +1,11 @@
+load("//tensorflow:tensorflow.bzl", "tf_copts", "tf_kernel_library")
+
 licenses(["notice"])  # Apache 2.0
 
 package(
     default_visibility = ["//tensorflow/compiler/tf2xla:internal"],
 )
 
-load("//tensorflow:tensorflow.bzl", "tf_copts")
-load("//tensorflow:tensorflow.bzl", "tf_kernel_library")
-load(
-    "//third_party/mkl:build_defs.bzl",
-    "if_mkl",
-)
-
 tf_kernel_library(
     name = "xla_ops",
     srcs = [
@@ -122,12 +117,10 @@ tf_kernel_library(
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/lib:broadcast",
-        "//tensorflow/compiler/tf2xla/lib:cholesky",
         "//tensorflow/compiler/tf2xla/lib:qr",
         "//tensorflow/compiler/tf2xla/lib:random",
         "//tensorflow/compiler/tf2xla/lib:scatter",
         "//tensorflow/compiler/tf2xla/lib:util",
-        "//tensorflow/compiler/tf2xla/lib:while_loop",
         "//tensorflow/compiler/tf2xla/ops:xla_ops",
         "//tensorflow/compiler/xla:array4d",
         "//tensorflow/compiler/xla:literal",
@@ -140,7 +133,9 @@ tf_kernel_library(
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
         "//tensorflow/compiler/xla/client/lib:arithmetic",
+        "//tensorflow/compiler/xla/client/lib:cholesky",
         "//tensorflow/compiler/xla/client/lib:constants",
+        "//tensorflow/compiler/xla/client/lib:loops",
         "//tensorflow/compiler/xla/client/lib:math",
         "//tensorflow/compiler/xla/client/lib:matrix",
         "//tensorflow/compiler/xla/client/lib:pooling",
diff --git a/tensorflow/compiler/tf2xla/kernels/cholesky_op.cc b/tensorflow/compiler/tf2xla/kernels/cholesky_op.cc
index 9fcbc86adc..0ed3044efa 100644
--- a/tensorflow/compiler/tf2xla/kernels/cholesky_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/cholesky_op.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/tf2xla/lib/cholesky.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/cholesky.h"
 
 namespace tensorflow {
 namespace {
@@ -24,7 +24,7 @@ class CholeskyOp : public XlaOpKernel {
  public:
   explicit CholeskyOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
   void Compile(XlaOpKernelContext* ctx) override {
-    ctx->SetOutput(0, Cholesky(ctx->Input(0)));
+    ctx->SetOutput(0, xla::Cholesky(ctx->Input(0)));
   }
 };
 
diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op.cc b/tensorflow/compiler/tf2xla/kernels/gather_op.cc
index 20b0de193d..41c31d0ed5 100644
--- a/tensorflow/compiler/tf2xla/kernels/gather_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/gather_op.cc
@@ -14,7 +14,6 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
-#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/type_util.h"
 #include "tensorflow/compiler/tf2xla/xla_context.h"
diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
index e9bb0a77e9..96ddd42e2a 100644
--- a/tensorflow/compiler/tf2xla/kernels/image_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc
@@ -15,12 +15,12 @@ limitations under the License.
 
 #include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
 #include "tensorflow/compiler/tf2xla/lib/util.h"
-#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/loops.h"
 #include "tensorflow/compiler/xla/client/lib/sorting.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/shape_util.h"
@@ -505,9 +505,9 @@ class NonMaxSuppressionOp : public XlaOpKernel {
     init_values.push_back(included_iou);
 
     auto suppress_loop_result =
-        XlaWhileLoop(WhileCondFn(num_boxes, output_size),
-                     SuppressBodyFn(num_boxes), init_values, "suppress_loop",
-                     builder)
+        xla::WhileLoopHelper(WhileCondFn(num_boxes, output_size),
+                             SuppressBodyFn(num_boxes), init_values,
+                             "suppress_loop", builder)
             .ValueOrDie();
 
     xla::XlaOp included_score =
diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc
index 8822e29f7e..2d92056e4f 100644
--- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc
@@ -20,12 +20,12 @@ limitations under the License.
 #include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h"
 #include "tensorflow/compiler/tf2xla/lib/random.h"
 #include "tensorflow/compiler/tf2xla/lib/util.h"
-#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/tf2xla/shape_util.h"
 #include "tensorflow/compiler/tf2xla/xla_helpers.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/loops.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -175,8 +175,8 @@ class RandomShuffleOp : public XlaOpKernel {
     };
     // for i in range(n):
     auto swap_loop_result =
-        XlaForEachIndex(n, xla::S32, swap_body_fn, {swaps, indices},
-                        "indices_swap_loop", builder)
+        xla::ForEachIndex(n, xla::S32, swap_body_fn, {swaps, indices},
+                          "indices_swap_loop", builder)
             .ValueOrDie();
     auto swapped_indices = swap_loop_result[1];
 
diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD
index 3e7a761120..9ec9e9bdc0 100644
--- a/tensorflow/compiler/tf2xla/lib/BUILD
+++ b/tensorflow/compiler/tf2xla/lib/BUILD
@@ -15,8 +15,6 @@ filegroup(
     ]),
 )
 
-load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test")
-
 cc_library(
     name = "broadcast",
     srcs = ["broadcast.cc"],
@@ -33,27 +31,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "cholesky",
-    srcs = ["cholesky.cc"],
-    hdrs = ["cholesky.h"],
-    deps = [
-        ":util",
-        ":while_loop",
-        "//tensorflow/compiler/xla:literal",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/client/lib:constants",
-        "//tensorflow/compiler/xla/client/lib:matrix",
-        "//tensorflow/compiler/xla/client/lib:slicing",
-        "//tensorflow/compiler/xla/client/lib:triangular_solve",
-        "//tensorflow/core:lib",
-    ],
-)
-
 cc_library(
     name = "random",
     srcs = ["random.cc"],
@@ -75,7 +52,6 @@ cc_library(
     hdrs = ["qr.h"],
     deps = [
         ":util",
-        ":while_loop",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
@@ -84,6 +60,7 @@ cc_library(
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client/lib:arithmetic",
         "//tensorflow/compiler/xla/client/lib:constants",
+        "//tensorflow/compiler/xla/client/lib:loops",
         "//tensorflow/compiler/xla/client/lib:math",
         "//tensorflow/compiler/xla/client/lib:matrix",
         "//tensorflow/compiler/xla/client/lib:slicing",
@@ -97,7 +74,6 @@ cc_library(
     hdrs = ["scatter.h"],
     deps = [
         ":util",
-        ":while_loop",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:status_macros",
@@ -128,19 +104,3 @@ cc_library(
         "@com_google_absl//absl/types:span",
     ],
 )
-
-cc_library(
-    name = "while_loop",
-    srcs = ["while_loop.cc"],
-    hdrs = ["while_loop.h"],
-    deps = [
-        ":util",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/client:xla_computation",
-        "@com_google_absl//absl/strings",
-        "@com_google_absl//absl/types:span",
-    ],
-)
diff --git a/tensorflow/compiler/tf2xla/lib/qr.cc b/tensorflow/compiler/tf2xla/lib/qr.cc
index d600774860..057045fc0c 100644
--- a/tensorflow/compiler/tf2xla/lib/qr.cc
+++ b/tensorflow/compiler/tf2xla/lib/qr.cc
@@ -19,9 +19,9 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/compiler/tf2xla/lib/util.h"
-#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/loops.h"
 #include "tensorflow/compiler/xla/client/lib/math.h"
 #include "tensorflow/compiler/xla/client/lib/matrix.h"
 #include "tensorflow/compiler/xla/client/lib/slicing.h"
@@ -225,8 +225,8 @@ xla::StatusOr<QRBlockResult> QRBlock(
       builder, xla::ShapeUtil::MakeShape(type, ConcatVectors(batch_dims, {n})));
 
   TF_ASSIGN_OR_RETURN(auto values,
-                      XlaForEachIndex(std::min(m, n), xla::S32, qr_body_fn,
-                                      {a, vs, taus}, "qr", builder));
+                      xla::ForEachIndex(std::min(m, n), xla::S32, qr_body_fn,
+                                        {a, vs, taus}, "qr", builder));
 
   QRBlockResult result;
   result.r = values[0];
@@ -301,8 +301,8 @@ xla::StatusOr<xla::XlaOp> ComputeWYRepresentation(
   w = UpdateSliceInMinorDims(w, bv, {0});
 
   TF_ASSIGN_OR_RETURN(
-      auto values, XlaForEachIndex(n - 1, xla::S32, body_fn, {w, y, vs, taus},
-                                   "wy", builder));
+      auto values, xla::ForEachIndex(n - 1, xla::S32, body_fn, {w, y, vs, taus},
+                                     "wy", builder));
   return values[0];
 }
 
diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc
index 2b1c2ced92..688056791f 100644
--- a/tensorflow/compiler/tf2xla/lib/scatter.cc
+++ b/tensorflow/compiler/tf2xla/lib/scatter.cc
@@ -20,7 +20,6 @@ limitations under the License.
 
 #include "absl/types/span.h"
 #include "tensorflow/compiler/tf2xla/lib/util.h"
-#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal.h"
diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD
index 41db8de29f..bf21b267c5 100644
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD
@@ -1,5 +1,7 @@
 # Common computation builders for XLA.
 
+load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites", "xla_test")
+
 licenses(["notice"])  # Apache 2.0
 
 package(default_visibility = ["//tensorflow/compiler/xla/client:friends"])
@@ -13,9 +15,6 @@ filegroup(
     ]),
 )
 
-load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test")
-load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites")
-
 # Generate test_suites for all backends, named "${backend}_tests".
 generate_backend_suites()
 
@@ -35,6 +34,48 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "cholesky",
+    srcs = ["cholesky.cc"],
+    hdrs = ["cholesky.h"],
+    deps = [
+        ":math",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/client/lib:constants",
+        "//tensorflow/compiler/xla/client/lib:loops",
+        "//tensorflow/compiler/xla/client/lib:matrix",
+        "//tensorflow/compiler/xla/client/lib:slicing",
+        "//tensorflow/compiler/xla/client/lib:triangular_solve",
+        "//tensorflow/core:lib",
+    ],
+)
+
+xla_test(
+    name = "cholesky_test",
+    srcs = ["cholesky_test.cc"],
+    tags = ["optonly"],
+    deps = [
+        ":arithmetic",
+        ":cholesky",
+        ":matrix",
+        "//tensorflow/compiler/xla:array2d",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "constants",
     srcs = ["constants.cc"],
@@ -75,6 +116,22 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "loops",
+    srcs = ["loops.cc"],
+    hdrs = ["loops.h"],
+    deps = [
+        ":constants",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/client:xla_computation",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
 cc_library(
     name = "math",
     srcs = ["math.cc"],
diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.cc b/tensorflow/compiler/xla/client/lib/cholesky.cc
similarity index 68%
rename from tensorflow/compiler/tf2xla/lib/cholesky.cc
rename to tensorflow/compiler/xla/client/lib/cholesky.cc
index 550ab5b056..fd98049968 100644
--- a/tensorflow/compiler/tf2xla/lib/cholesky.cc
+++ b/tensorflow/compiler/xla/client/lib/cholesky.cc
@@ -13,14 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/tf2xla/lib/cholesky.h"
+#include "tensorflow/compiler/xla/client/lib/cholesky.h"
 
 #include <memory>
 #include <vector>
 
-#include "tensorflow/compiler/tf2xla/lib/util.h"
-#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/lib/loops.h"
+#include "tensorflow/compiler/xla/client/lib/math.h"
 #include "tensorflow/compiler/xla/client/lib/matrix.h"
 #include "tensorflow/compiler/xla/client/lib/slicing.h"
 #include "tensorflow/compiler/xla/client/lib/triangular_solve.h"
@@ -31,7 +31,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/errors.h"
 
-namespace tensorflow {
+namespace xla {
 
 namespace {
 
@@ -50,26 +50,25 @@ namespace {
 //     l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], row_t)) /
 //                       l[..., j, j]
 //   return l
-xla::XlaOp CholeskyUnblocked(xla::XlaOp a,
-                             xla::PrecisionConfig::Precision precision) {
-  xla::XlaBuilder* builder = a.builder();
-  return builder->ReportErrorOrReturn([&]() -> xla::StatusOr<xla::XlaOp> {
-    TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a));
-    const int n_dims = xla::ShapeUtil::Rank(a_shape);
-    const int64 n = xla::ShapeUtil::GetDimension(a_shape, -1);
-    auto major_dims = xla::AsInt64Slice(a_shape.dimensions())
+XlaOp CholeskyUnblocked(XlaOp a, PrecisionConfig::Precision precision) {
+  XlaBuilder* builder = a.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
+    const int n_dims = ShapeUtil::Rank(a_shape);
+    const int64 n = ShapeUtil::GetDimension(a_shape, -1);
+    auto major_dims = AsInt64Slice(a_shape.dimensions())
                           .subspan(
                               /*pos=*/0,
                               /*len=*/n_dims - 2);
 
-    xla::XlaOp l = xla::ZerosLike(a);
+    XlaOp l = ZerosLike(a);
 
     // Construct the for loop body to iterate over rows.
-    auto body_fn = [&](xla::XlaOp i, absl::Span<const xla::XlaOp> loop_vars,
-                       xla::XlaBuilder* body_builder)
-        -> xla::StatusOr<std::vector<xla::XlaOp>> {
-      xla::Shape col_shape;
-      xla::Shape row_shape;
+    auto body_fn =
+        [&](XlaOp i, absl::Span<const XlaOp> loop_vars,
+            XlaBuilder* body_builder) -> StatusOr<std::vector<XlaOp>> {
+      Shape col_shape;
+      Shape row_shape;
       for (int64 d : major_dims) {
         row_shape.add_dimensions(d);
         col_shape.add_dimensions(d);
@@ -77,43 +76,40 @@ xla::XlaOp CholeskyUnblocked(xla::XlaOp a,
       row_shape.add_dimensions(1);
       row_shape.add_dimensions(n);
       row_shape.set_element_type(a_shape.element_type());
-      auto mask_zeros_row = xla::Zeros(body_builder, row_shape);
+      auto mask_zeros_row = Zeros(body_builder, row_shape);
 
       col_shape.add_dimensions(n);
       col_shape.add_dimensions(1);
       col_shape.set_element_type(a_shape.element_type());
-      auto mask_zeros_col = xla::Zeros(body_builder, col_shape);
+      auto mask_zeros_col = Zeros(body_builder, col_shape);
 
       std::vector<int32> mask_vector(n);
       std::iota(mask_vector.begin(), mask_vector.end(), 0);
-      auto mask_range = xla::ConstantR1<int32>(body_builder, mask_vector);
+      auto mask_range = ConstantR1<int32>(body_builder, mask_vector);
       auto mask_range_row =
-          xla::Broadcast(xla::Reshape(mask_range, {0}, {1, n}), major_dims);
+          Broadcast(Reshape(mask_range, {0}, {1, n}), major_dims);
       auto mask_range_col =
-          xla::Broadcast(xla::Reshape(mask_range, {0}, {n, 1}), major_dims);
+          Broadcast(Reshape(mask_range, {0}, {n, 1}), major_dims);
       auto body_a = loop_vars[0];
       auto body_l = loop_vars[1];
 
       // row = l[..., i, :i]
       // select the whole i-th row, then mask out all columns past i-1
-      auto zero = xla::ConstantR0<int32>(body_builder, 0);
+      auto zero = ConstantR0<int32>(body_builder, 0);
       auto l_i = DynamicSliceInMinorDims(body_l, {i, zero}, {1, n});
-      auto row = xla::Select(xla::Ge(mask_range_row, i), mask_zeros_row, l_i);
+      auto row = Select(Ge(mask_range_row, i), mask_zeros_row, l_i);
       // a[..., i, i]
       auto a_ii = DynamicSliceInMinorDims(body_a, {i, i}, {1, 1});
       // np.dot(row, np.swapaxes(row, -1, -2))
       auto diag_dot = BatchDot(row, TransposeInMinorDims(row), precision);
       // l[..., i, i] = np.sqrt(a[..., i, i] - np.dot(row,
       //                                              np.swapaxes(row, -1, -2)))
-      auto l_ii =
-          xla::Pow(a_ii - diag_dot,
-                   FloatLiteral(body_builder, a_shape.element_type(), 0.5));
+      auto l_ii = Sqrt(a_ii - diag_dot);
 
       // a[..., i+1:, i]
       // select the whole i-th column, then mask out all rows above i+1
       auto a_0i = DynamicSliceInMinorDims(body_a, {i}, {1});
-      auto a_ip1i =
-          xla::Select(xla::Le(mask_range_col, i), mask_zeros_col, a_0i);
+      auto a_ip1i = Select(Le(mask_range_col, i), mask_zeros_col, a_0i);
 
       // l[..., i+1:, i] = (a[..., i+1:, i] - np.dot(l[..., i+1:, :i], r.T)) /
       //                   l[..., i, i]
@@ -122,8 +118,7 @@ xla::XlaOp CholeskyUnblocked(xla::XlaOp a,
       // r.T)
       auto dot = BatchDot(body_l, TransposeInMinorDims(row), precision);
       // np.dot(l[..., i+1:, :i], r.T)
-      auto dot_ip1 =
-          xla::Select(xla::Le(mask_range_col, i), mask_zeros_col, dot);
+      auto dot_ip1 = Select(Le(mask_range_col, i), mask_zeros_col, dot);
 
       body_l =
           DynamicUpdateSliceInMinorDims(body_l, (a_ip1i - dot_ip1) / l_ii, {i});
@@ -131,12 +126,12 @@ xla::XlaOp CholeskyUnblocked(xla::XlaOp a,
       // column assign will wrap around and overwrite the diagonal assign.
       body_l = DynamicUpdateSliceInMinorDims(body_l, l_ii, {i, i});
 
-      return std::vector<xla::XlaOp>{body_a, body_l};
+      return std::vector<XlaOp>{body_a, body_l};
     };
 
     TF_ASSIGN_OR_RETURN(
         auto cholesky_while,
-        XlaForEachIndex(n, xla::S32, body_fn, {a, l}, "unblocked", builder));
+        ForEachIndex(n, S32, body_fn, {a, l}, "unblocked", builder));
 
     return cholesky_while[1];
   });
@@ -144,34 +139,35 @@ xla::XlaOp CholeskyUnblocked(xla::XlaOp a,
 
 }  // namespace
 
-xla::XlaOp Cholesky(xla::XlaOp a, int64 block_size,
-                    xla::PrecisionConfig::Precision precision) {
-  xla::XlaBuilder* builder = a.builder();
-  return builder->ReportErrorOrReturn([&]() -> xla::StatusOr<xla::XlaOp> {
-    TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a));
-    const int ndims = xla::ShapeUtil::Rank(a_shape);
+XlaOp Cholesky(XlaOp a, int64 block_size,
+               PrecisionConfig::Precision precision) {
+  XlaBuilder* builder = a.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
+    const int ndims = ShapeUtil::Rank(a_shape);
     if (ndims < 2) {
-      return errors::InvalidArgument(
-          "Arguments to Cholesky must have rank >= 2: ", ndims);
+      return InvalidArgument(
+          "Argument to Cholesky must have rank >= 2; shape was %s",
+          a_shape.ToString());
     }
 
-    const int64 n = xla::ShapeUtil::GetDimension(a_shape, -1);
-    if (n != xla::ShapeUtil::GetDimension(a_shape, -2)) {
-      return errors::InvalidArgument(
-          "Arguments to Cholesky must be square matrices: ",
-          xla::ShapeUtil::HumanString(a_shape));
+    const int64 n = ShapeUtil::GetDimension(a_shape, -1);
+    if (n != ShapeUtil::GetDimension(a_shape, -2)) {
+      return InvalidArgument(
+          "Argument to Cholesky must be batched square matrices; got shape %s",
+          ShapeUtil::HumanString(a_shape));
     }
 
     if (block_size < 1) {
-      return errors::InvalidArgument(
-          "block_size argument to Cholesky must be >= 1; got ", block_size);
+      return InvalidArgument(
+          "block_size argument to Cholesky must be >= 1; got %d", block_size);
     }
 
     // Blocked left-looking Cholesky factorization.
     // Algorithm 1 from
     // Haidar, Azzam, et al. "High-performance Cholesky factorization for
     // GPU-only execution." Proceedings of General Purpose GPUs. ACM, 2017.
-    xla::XlaOp l = xla::ZerosLike(a);
+    XlaOp l = ZerosLike(a);
     for (int64 i = 0; i < n; i += block_size) {
       int64 k = std::min(block_size, n - i);
       if (i > 0) {
@@ -207,4 +203,4 @@ xla::XlaOp Cholesky(xla::XlaOp a, int64 block_size,
   });
 }
 
-}  // namespace tensorflow
+}  // namespace xla
diff --git a/tensorflow/compiler/tf2xla/lib/cholesky.h b/tensorflow/compiler/xla/client/lib/cholesky.h
similarity index 87%
rename from tensorflow/compiler/tf2xla/lib/cholesky.h
rename to tensorflow/compiler/xla/client/lib/cholesky.h
index 9a561c34b9..0bae26837c 100644
--- a/tensorflow/compiler/tf2xla/lib/cholesky.h
+++ b/tensorflow/compiler/xla/client/lib/cholesky.h
@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_
-#define TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_
+#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_CHOLESKY_H_
+#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_CHOLESKY_H_
 
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 
-namespace tensorflow {
+namespace xla {
 
 // Computes the Cholesky decompositions of a batch of symmetric positive
 // definite matrices.
@@ -34,6 +34,6 @@ xla::XlaOp Cholesky(
     xla::XlaOp a, int64 block_size = 256,
     xla::PrecisionConfig::Precision precision = xla::PrecisionConfig::HIGHEST);
 
-}  // namespace tensorflow
+}  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_TF2XLA_LIB_CHOLESKY_H_
+#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_CHOLESKY_H_
diff --git a/tensorflow/compiler/xla/client/lib/cholesky_test.cc b/tensorflow/compiler/xla/client/lib/cholesky_test.cc
new file mode 100644
index 0000000000..ba9580a3d3
--- /dev/null
+++ b/tensorflow/compiler/xla/client/lib/cholesky_test.cc
@@ -0,0 +1,166 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/lib/cholesky.h"
+
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/client/lib/arithmetic.h"
+#include "tensorflow/compiler/xla/client/lib/matrix.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace {
+
+using xla::int64;
+
+using CholeskyTest = xla::ClientLibraryTestBase;
+
+XLA_TEST_F(CholeskyTest, Simple) {
+  xla::XlaBuilder builder(TestName());
+
+  xla::Array2D<float> a_vals({
+      {4, 6, 8, 10},
+      {6, 45, 54, 63},
+      {8, 54, 146, 166},
+      {10, 63, 166, 310},
+  });
+
+  xla::XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_vals, 0, "a", &builder, &a);
+  xla::Cholesky(a, /*block_size=*/2);
+
+  xla::Array2D<float> expected({
+      {2, 0, 0, 0},
+      {3, 6, 0, 0},
+      {4, 7, 9, 0},
+      {5, 8, 10, 11},
+  });
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get()},
+                             xla::ErrorSpec(1e-4, 1e-4));
+}
+
+XLA_TEST_F(CholeskyTest, Simple2) {
+  xla::XlaBuilder builder(TestName());
+
+  xla::Array2D<float> a_vals({
+      {16, 24, 8, 12},
+      {24, 61, 82, 48},
+      {8, 82, 456, 106},
+      {12, 48, 106, 62},
+  });
+
+  xla::XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_vals, 0, "a", &builder, &a);
+  xla::Cholesky(a);
+
+  xla::Array2D<float> expected(
+      {{4, 0, 0, 0}, {6, 5, 0, 0}, {2, 14, 16, 0}, {3, 6, 1, 4}});
+
+  ComputeAndCompareR2<float>(&builder, expected, {a_data.get()},
+                             xla::ErrorSpec(1e-4, 1e-4));
+}
+
+XLA_TEST_F(CholeskyTest, SimpleBatched) {
+  xla::XlaBuilder builder(TestName());
+
+  xla::Array3D<float> a_vals({
+      {
+          {4, 6, 8, 10},
+          {6, 45, 54, 63},
+          {8, 54, 146, 166},
+          {10, 63, 166, 310},
+      },
+      {
+          {16, 24, 8, 12},
+          {24, 61, 82, 48},
+          {8, 82, 456, 106},
+          {12, 48, 106, 62},
+      },
+  });
+
+  xla::XlaOp a;
+  auto a_data = CreateR3Parameter<float>(a_vals, 0, "a", &builder, &a);
+  xla::Cholesky(a);
+
+  xla::Array3D<float> expected({
+      {
+          {2, 0, 0, 0},
+          {3, 6, 0, 0},
+          {4, 7, 9, 0},
+          {5, 8, 10, 11},
+      },
+      {{4, 0, 0, 0}, {6, 5, 0, 0}, {2, 14, 16, 0}, {3, 6, 1, 4}},
+  });
+
+  ComputeAndCompareR3<float>(&builder, expected, {a_data.get()},
+                             xla::ErrorSpec(1e-4, 1e-4));
+}
+
+using CholeskyTestCase = std::tuple<int64, int64>;
+
+class RandomCholeskyTest
+    : public xla::ClientLibraryTestBase,
+      public ::testing::WithParamInterface<CholeskyTestCase> {};
+
+XLA_TEST_P(RandomCholeskyTest, Random) {
+  xla::XlaBuilder builder(TestName());
+
+  auto test_params = GetParam();
+  std::vector<int64> dimensions = {std::get<0>(test_params),
+                                   std::get<1>(test_params),
+                                   std::get<1>(test_params)};
+  xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, dimensions);
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto literal,
+      xla::LiteralUtil::CreateRandomLiteral<xla::F32>(shape, 0.0, 1.0));
+
+  auto input = xla::Parameter(&builder, 0, shape, "input");
+  // Form a random positive definite matrix.
+  auto matrix = xla::BatchDot(input, TransposeInMinorDims(input),
+                              xla::PrecisionConfig::HIGHEST);
+
+  auto cholesky = xla::Cholesky(matrix, /*block_size=*/4);
+
+  // Verify that ||matrix - cholesky * cholesky_t||_2 ~= 0
+  auto verification = xla::BatchDot(cholesky, TransposeInMinorDims(cholesky),
+                                    xla::PrecisionConfig::HIGHEST);
+  auto delta = matrix - verification;
+  xla::Reduce(delta * delta, xla::ConstantR0<float>(&builder, 0.0),
+              CreateScalarAddComputation(xla::F32, &builder), {0, 1, 2});
+
+  TF_ASSERT_OK_AND_ASSIGN(auto input_data, client_->TransferToServer(literal));
+  ComputeAndCompareR0<float>(&builder, 0.0, {input_data.get()},
+                             xla::ErrorSpec(1e-4, 1e-4));
+}
+
+INSTANTIATE_TEST_CASE_P(RandomCholeskyTestInstance, RandomCholeskyTest,
+                        ::testing::Values(CholeskyTestCase{1, 1},
+                                          CholeskyTestCase{1, 2},
+                                          CholeskyTestCase{10, 5},
+                                          CholeskyTestCase{2, 20}));
+
+}  // namespace
diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.cc b/tensorflow/compiler/xla/client/lib/loops.cc
similarity index 50%
rename from tensorflow/compiler/tf2xla/lib/while_loop.cc
rename to tensorflow/compiler/xla/client/lib/loops.cc
index 594ab1dfd0..721f987628 100644
--- a/tensorflow/compiler/tf2xla/lib/while_loop.cc
+++ b/tensorflow/compiler/xla/client/lib/loops.cc
@@ -13,44 +13,43 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/tf2xla/lib/while_loop.h"
-#include "tensorflow/compiler/tf2xla/lib/util.h"
+#include "tensorflow/compiler/xla/client/lib/loops.h"
+
+#include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 
-namespace tensorflow {
+namespace xla {
 
-xla::StatusOr<std::vector<xla::XlaOp>> XlaWhileLoop(
-    const LoopConditionFunction& condition_function,
-    const LoopBodyFunction& body_function,
-    absl::Span<const xla::XlaOp> initial_values, absl::string_view name,
-    xla::XlaBuilder* builder) {
+StatusOr<std::vector<XlaOp>> WhileLoopHelper(
+    const WhileLoopHelperConditionFunction& condition_function,
+    const WhileLoopHelperBodyFunction& body_function,
+    absl::Span<const XlaOp> initial_values, absl::string_view name,
+    XlaBuilder* builder) {
   int arity = initial_values.size();
-  std::vector<xla::Shape> var_shapes;
+  std::vector<Shape> var_shapes;
   var_shapes.reserve(arity);
-  for (const xla::XlaOp& input : initial_values) {
+  for (const XlaOp& input : initial_values) {
     TF_ASSIGN_OR_RETURN(auto shape, builder->GetShape(input));
     var_shapes.push_back(std::move(shape));
   }
-  xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(var_shapes);
+  Shape tuple_shape = ShapeUtil::MakeTupleShape(var_shapes);
 
   // Unpacks a tuple into its component parts.
-  auto unpack_tuple = [](xla::XlaOp tuple, int arity,
-                         xla::XlaBuilder* builder) {
-    std::vector<xla::XlaOp> elements(arity);
+  auto unpack_tuple = [](XlaOp tuple, int arity, XlaBuilder* builder) {
+    std::vector<XlaOp> elements(arity);
     for (int i = 0; i < arity; ++i) {
-      elements[i] = xla::GetTupleElement(tuple, i);
+      elements[i] = GetTupleElement(tuple, i);
     }
     return elements;
   };
 
   // Build the condition.
-  std::unique_ptr<xla::XlaBuilder> cond_builder =
+  std::unique_ptr<XlaBuilder> cond_builder =
       builder->CreateSubBuilder(absl::StrCat(name, "_condition"));
   {
-    auto parameter =
-        xla::Parameter(cond_builder.get(), 0, tuple_shape, "parameter");
+    auto parameter = Parameter(cond_builder.get(), 0, tuple_shape, "parameter");
 
     TF_RETURN_IF_ERROR(
         condition_function(unpack_tuple(parameter, arity, cond_builder.get()),
@@ -60,11 +59,10 @@ xla::StatusOr<std::vector<xla::XlaOp>> XlaWhileLoop(
   TF_ASSIGN_OR_RETURN(auto cond, cond_builder->Build());
 
   // Build the body.
-  std::unique_ptr<xla::XlaBuilder> body_builder =
+  std::unique_ptr<XlaBuilder> body_builder =
       builder->CreateSubBuilder(absl::StrCat(name, "_body"));
   {
-    auto parameter =
-        xla::Parameter(body_builder.get(), 0, tuple_shape, "parameter");
+    auto parameter = Parameter(body_builder.get(), 0, tuple_shape, "parameter");
 
     TF_ASSIGN_OR_RETURN(
         auto result,
@@ -72,56 +70,54 @@ xla::StatusOr<std::vector<xla::XlaOp>> XlaWhileLoop(
                       body_builder.get()));
 
     TF_RET_CHECK(result.size() == initial_values.size());
-    xla::Tuple(body_builder.get(), result);
+    Tuple(body_builder.get(), result);
   }
   TF_ASSIGN_OR_RETURN(auto body, body_builder->Build());
 
-  auto outputs = xla::While(cond, body, xla::Tuple(builder, initial_values));
+  auto outputs = While(cond, body, Tuple(builder, initial_values));
 
   return unpack_tuple(outputs, arity, builder);
 }
 
-xla::StatusOr<std::vector<xla::XlaOp>> XlaForEachIndex(
-    int64 num_iterations, xla::PrimitiveType num_iterations_type,
+StatusOr<std::vector<XlaOp>> ForEachIndex(
+    int64 num_iterations, PrimitiveType num_iterations_type,
     const ForEachIndexBodyFunction& body_function,
-    absl::Span<const xla::XlaOp> initial_values, absl::string_view name,
-    xla::XlaBuilder* builder) {
-  auto while_cond_fn =
-      [&](absl::Span<const xla::XlaOp> values,
-          xla::XlaBuilder* cond_builder) -> xla::StatusOr<xla::XlaOp> {
-    return xla::Lt(values[0], IntegerLiteral(cond_builder, num_iterations_type,
-                                             num_iterations));
+    absl::Span<const XlaOp> initial_values, absl::string_view name,
+    XlaBuilder* builder) {
+  auto while_cond_fn = [&](absl::Span<const XlaOp> values,
+                           XlaBuilder* cond_builder) -> StatusOr<XlaOp> {
+    return Lt(values[0], ConstantR0WithType(cond_builder, num_iterations_type,
+                                            num_iterations));
   };
-  auto while_body_fn = [&](absl::Span<const xla::XlaOp> values,
-                           xla::XlaBuilder* body_builder)
-      -> xla::StatusOr<std::vector<xla::XlaOp>> {
-    xla::XlaOp iteration = values[0];
+  auto while_body_fn =
+      [&](absl::Span<const XlaOp> values,
+          XlaBuilder* body_builder) -> StatusOr<std::vector<XlaOp>> {
+    XlaOp iteration = values[0];
 
-    std::vector<xla::XlaOp> updated_values;
+    std::vector<XlaOp> updated_values;
     updated_values.reserve(values.size());
-    updated_values.push_back(xla::Add(
+    updated_values.push_back(Add(
         iteration,
-        xla::ConstantLiteral(body_builder,
-                             xla::LiteralUtil::One(num_iterations_type))));
+        ConstantLiteral(body_builder, LiteralUtil::One(num_iterations_type))));
 
     values.remove_prefix(1);
-    TF_ASSIGN_OR_RETURN(std::vector<xla::XlaOp> body_outputs,
+    TF_ASSIGN_OR_RETURN(std::vector<XlaOp> body_outputs,
                         body_function(iteration, values, body_builder));
     updated_values.insert(updated_values.end(), body_outputs.begin(),
                           body_outputs.end());
     return updated_values;
   };
 
-  std::vector<xla::XlaOp> values;
+  std::vector<XlaOp> values;
   values.reserve(initial_values.size() + 1);
-  values.push_back(xla::ConstantLiteral(
-      builder, xla::LiteralUtil::Zero(num_iterations_type)));
+  values.push_back(
+      ConstantLiteral(builder, LiteralUtil::Zero(num_iterations_type)));
   values.insert(values.end(), initial_values.begin(), initial_values.end());
 
-  TF_ASSIGN_OR_RETURN(values, XlaWhileLoop(while_cond_fn, while_body_fn, values,
-                                           name, builder));
+  TF_ASSIGN_OR_RETURN(values, WhileLoopHelper(while_cond_fn, while_body_fn,
+                                              values, name, builder));
   values.erase(values.begin(), values.begin() + 1);
   return values;
 }
 
-}  // namespace tensorflow
+}  // namespace xla
diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.h b/tensorflow/compiler/xla/client/lib/loops.h
similarity index 62%
rename from tensorflow/compiler/tf2xla/lib/while_loop.h
rename to tensorflow/compiler/xla/client/lib/loops.h
index f2134bb449..e11de59493 100644
--- a/tensorflow/compiler/tf2xla/lib/while_loop.h
+++ b/tensorflow/compiler/xla/client/lib/loops.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_WHILE_LOOP_H_
-#define TENSORFLOW_COMPILER_TF2XLA_LIB_WHILE_LOOP_H_
+#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_LOOPS_H_
+#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_LOOPS_H_
 
 #include <functional>
 #include <vector>
@@ -25,19 +25,18 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/statusor.h"
 
-namespace tensorflow {
+namespace xla {
 
 // Function that builds a loop condition. Takes as input a sequence of input
 // values, and returns a boolean value representing if the condition succeeds.
-typedef std::function<xla::StatusOr<xla::XlaOp>(absl::Span<const xla::XlaOp>,
-                                                xla::XlaBuilder*)>
-    LoopConditionFunction;
+typedef std::function<StatusOr<XlaOp>(absl::Span<const XlaOp>, XlaBuilder*)>
+    WhileLoopHelperConditionFunction;
 
 // Function that builds a loop body. Takes as input a sequence of input values
 // and returns a sequence of output values.
-typedef std::function<xla::StatusOr<std::vector<xla::XlaOp>>(
-    absl::Span<const xla::XlaOp>, xla::XlaBuilder*)>
-    LoopBodyFunction;
+typedef std::function<StatusOr<std::vector<XlaOp>>(absl::Span<const XlaOp>,
+                                                   XlaBuilder*)>
+    WhileLoopHelperBodyFunction;
 
 // Helper function for building an XLA while loop, where the values carried by
 // the loop are a tuple of values, e.g., (a, b, c):
@@ -47,27 +46,27 @@ typedef std::function<xla::StatusOr<std::vector<xla::XlaOp>>(
 //   init: (a, b, c)
 // )
 // 'name' is a descriptive name for the loop.
-xla::StatusOr<std::vector<xla::XlaOp>> XlaWhileLoop(
-    const LoopConditionFunction& condition_function,
-    const LoopBodyFunction& body_function,
-    absl::Span<const xla::XlaOp> initial_values, absl::string_view name,
-    xla::XlaBuilder* builder);
+StatusOr<std::vector<XlaOp>> WhileLoopHelper(
+    const WhileLoopHelperConditionFunction& condition_function,
+    const WhileLoopHelperBodyFunction& body_function,
+    absl::Span<const XlaOp> initial_values, absl::string_view name,
+    XlaBuilder* builder);
 
 // Builds an XLA loop that repeats a computation `num_iterations` times.
 //
 // The body function (ForEachIndexBodyFunction) takes as input a pair of
 // (current iteration number, loop-carried values), and returns an updated
 // vector of the loop-carried values.
-typedef std::function<xla::StatusOr<std::vector<xla::XlaOp>>(
-    xla::XlaOp, absl::Span<const xla::XlaOp>, xla::XlaBuilder*)>
+typedef std::function<StatusOr<std::vector<XlaOp>>(
+    XlaOp, absl::Span<const XlaOp>, XlaBuilder*)>
     ForEachIndexBodyFunction;
 
-xla::StatusOr<std::vector<xla::XlaOp>> XlaForEachIndex(
-    int64 num_iterations, xla::PrimitiveType num_iterations_type,
+StatusOr<std::vector<XlaOp>> ForEachIndex(
+    int64 num_iterations, PrimitiveType num_iterations_type,
     const ForEachIndexBodyFunction& body_function,
-    absl::Span<const xla::XlaOp> initial_values, absl::string_view name,
-    xla::XlaBuilder* builder);
+    absl::Span<const XlaOp> initial_values, absl::string_view name,
+    XlaBuilder* builder);
 
-}  // namespace tensorflow
+}  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_TF2XLA_LIB_WHILE_LOOP_H_
+#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_LOOPS_H_
-- 
GitLab


From 74ba3593214d5efd173ac91ed2c2f2bc3d58232e Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Tue, 11 Dec 2018 11:38:37 -0800
Subject: [PATCH 360/873] Move importing Estimator to after API_PLACEHOLDER
 text in template.

PiperOrigin-RevId: 225041387
---
 tensorflow/api_template.__init__.py       | 4 ++--
 tensorflow/api_template_v1.__init__.py    | 4 ++--
 tensorflow/compat_template_v1.__init__.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index d81cf067eb..4eba763129 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -20,14 +20,14 @@ from __future__ import print_function as _print_function
 
 import os as _os
 
+# API IMPORTS PLACEHOLDER
+
 # pylint: disable=g-bad-import-order
 from tensorflow.python.tools import component_api_helper as _component_api_helper
 _component_api_helper.package_hook(
     parent_package_str=__name__,
     child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
 
-# API IMPORTS PLACEHOLDER
-
 # Make sure directory containing top level submodules is in
 # the __path__ so that "from tensorflow.foo import bar" works.
 # We're using bitwise, but there's nothing special about that.
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 65bdb6cb1b..21b5277614 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -23,13 +23,13 @@ import os as _os
 # pylint: disable=g-bad-import-order
 from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
 
+# API IMPORTS PLACEHOLDER
+
 from tensorflow.python.tools import component_api_helper as _component_api_helper
 _component_api_helper.package_hook(
     parent_package_str=__name__,
     child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
 
-# API IMPORTS PLACEHOLDER
-
 from tensorflow.python.util.lazy_loader import LazyLoader  # pylint: disable=g-import-not-at-top
 contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
 del LazyLoader
diff --git a/tensorflow/compat_template_v1.__init__.py b/tensorflow/compat_template_v1.__init__.py
index 7df80ec012..d58acde09f 100644
--- a/tensorflow/compat_template_v1.__init__.py
+++ b/tensorflow/compat_template_v1.__init__.py
@@ -23,12 +23,12 @@ import os as _os
 # pylint: disable=g-bad-import-order
 from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
 
+# API IMPORTS PLACEHOLDER
+
 from tensorflow.python.tools import component_api_helper as _component_api_helper
 _component_api_helper.package_hook(
     parent_package_str=__name__,
     child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
 
-# API IMPORTS PLACEHOLDER
-
 from tensorflow.python.platform import flags  # pylint: disable=g-import-not-at-top
 app.flags = flags  # pylint: disable=undefined-variable
-- 
GitLab


From a3ad14bbd2fdb941b8dcf076b27389000e1ee17e Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Tue, 11 Dec 2018 11:55:47 -0800
Subject: [PATCH 361/873] [XLA] Verify instruction IDs don't over/under-flow
 int.

The proto field is int64, but the class field is int.

PiperOrigin-RevId: 225044350
---
 tensorflow/compiler/xla/service/hlo_instruction.cc | 5 +++++
 tensorflow/compiler/xla/service/hlo_proto_util.cc  | 1 +
 2 files changed, 6 insertions(+)

diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 5c1f1a61cc..152a451c18 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -569,6 +569,11 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
   instruction->SetAndSanitizeName(proto.name());
   instruction->metadata_ = proto.metadata();
   instruction->backend_config_ = proto.backend_config();
+
+  TF_RET_CHECK(proto.id() >= 0)
+      << "Instruction with negative id: " << proto.id();
+  TF_RET_CHECK(proto.id() <= INT_MAX)
+      << "Instruction with id > INT_MAX: " << proto.id();
   instruction->unique_id_ = proto.id();
 
   if (proto.has_sharding()) {
diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc
index 981d06ce10..3a9ee57e55 100644
--- a/tensorflow/compiler/xla/service/hlo_proto_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc
@@ -39,6 +39,7 @@ HloProto MakeHloProto(const HloModule& module) {
 
 StatusOr<std::unique_ptr<HloModule>> CreateModuleFromProto(
     const HloModuleProto& proto, const HloModuleConfig& module_config) {
+  VLOG(4) << proto.ShortDebugString();
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
                       HloModule::CreateFromProto(proto, module_config));
   TF_RETURN_IF_ERROR(
-- 
GitLab


From 1390ba8f7877af2d673413ac7ef7cb2500e96c27 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 11 Dec 2018 11:57:09 -0800
Subject: [PATCH 362/873] [XLA] Move QR decomposition out of TF2XLA and into
 xla/client/lib.

Add a couple of simple C++ tests.

PiperOrigin-RevId: 225044584
---
 tensorflow/compiler/tf2xla/kernels/BUILD      |   2 +-
 tensorflow/compiler/tf2xla/kernels/qr_op.cc   |   4 +-
 tensorflow/compiler/tf2xla/lib/BUILD          |  22 --
 tensorflow/compiler/xla/client/lib/BUILD      |  42 ++++
 .../compiler/{tf2xla => xla/client}/lib/qr.cc | 207 +++++++++---------
 .../compiler/{tf2xla => xla/client}/lib/qr.h  |  20 +-
 tensorflow/compiler/xla/client/lib/qr_test.cc |  93 ++++++++
 7 files changed, 250 insertions(+), 140 deletions(-)
 rename tensorflow/compiler/{tf2xla => xla/client}/lib/qr.cc (62%)
 rename tensorflow/compiler/{tf2xla => xla/client}/lib/qr.h (74%)
 create mode 100644 tensorflow/compiler/xla/client/lib/qr_test.cc

diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index 901b97736b..a18a4e92d6 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -117,7 +117,6 @@ tf_kernel_library(
         "//tensorflow/compiler/tf2xla:common",
         "//tensorflow/compiler/tf2xla:xla_compiler",
         "//tensorflow/compiler/tf2xla/lib:broadcast",
-        "//tensorflow/compiler/tf2xla/lib:qr",
         "//tensorflow/compiler/tf2xla/lib:random",
         "//tensorflow/compiler/tf2xla/lib:scatter",
         "//tensorflow/compiler/tf2xla/lib:util",
@@ -140,6 +139,7 @@ tf_kernel_library(
         "//tensorflow/compiler/xla/client/lib:matrix",
         "//tensorflow/compiler/xla/client/lib:pooling",
         "//tensorflow/compiler/xla/client/lib:prng",
+        "//tensorflow/compiler/xla/client/lib:qr",
         "//tensorflow/compiler/xla/client/lib:sorting",
         "//tensorflow/compiler/xla/client/lib:triangular_solve",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/tf2xla/kernels/qr_op.cc b/tensorflow/compiler/tf2xla/kernels/qr_op.cc
index 7ea0afc1f5..66ec40a946 100644
--- a/tensorflow/compiler/tf2xla/kernels/qr_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/qr_op.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/tf2xla/lib/qr.h"
 #include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
 #include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/qr.h"
 
 namespace tensorflow {
 namespace {
@@ -26,7 +26,7 @@ class QROp : public XlaOpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr("full_matrices", &full_matrices_));
   }
   void Compile(XlaOpKernelContext* ctx) override {
-    auto result = QRDecomposition(ctx->Input(0), full_matrices_);
+    auto result = xla::QRDecomposition(ctx->Input(0), full_matrices_);
     if (!result.ok()) {
       ctx->SetStatus(result.status());
       return;
diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD
index 9ec9e9bdc0..3d7b0bc959 100644
--- a/tensorflow/compiler/tf2xla/lib/BUILD
+++ b/tensorflow/compiler/tf2xla/lib/BUILD
@@ -46,28 +46,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "qr",
-    srcs = ["qr.cc"],
-    hdrs = ["qr.h"],
-    deps = [
-        ":util",
-        "//tensorflow/compiler/xla:literal_util",
-        "//tensorflow/compiler/xla:shape_util",
-        "//tensorflow/compiler/xla:status_macros",
-        "//tensorflow/compiler/xla:statusor",
-        "//tensorflow/compiler/xla:xla_data_proto",
-        "//tensorflow/compiler/xla/client:xla_builder",
-        "//tensorflow/compiler/xla/client/lib:arithmetic",
-        "//tensorflow/compiler/xla/client/lib:constants",
-        "//tensorflow/compiler/xla/client/lib:loops",
-        "//tensorflow/compiler/xla/client/lib:math",
-        "//tensorflow/compiler/xla/client/lib:matrix",
-        "//tensorflow/compiler/xla/client/lib:slicing",
-        "//tensorflow/core:lib",
-    ],
-)
-
 cc_library(
     name = "scatter",
     srcs = ["scatter.cc"],
diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD
index bf21b267c5..8fc221ee2b 100644
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD
@@ -234,6 +234,48 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "qr",
+    srcs = ["qr.cc"],
+    hdrs = ["qr.h"],
+    deps = [
+        ":arithmetic",
+        ":constants",
+        ":loops",
+        ":math",
+        ":matrix",
+        ":slicing",
+        "//tensorflow/compiler/xla:literal_util",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/core:lib",
+    ],
+)
+
+xla_test(
+    name = "qr_test",
+    srcs = ["qr_test.cc"],
+    tags = ["optonly"],
+    deps = [
+        ":matrix",
+        ":qr",
+        "//tensorflow/compiler/xla:array2d",
+        "//tensorflow/compiler/xla:array3d",
+        "//tensorflow/compiler/xla:literal",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/core:test",
+    ],
+)
+
 cc_library(
     name = "slicing",
     srcs = ["slicing.cc"],
diff --git a/tensorflow/compiler/tf2xla/lib/qr.cc b/tensorflow/compiler/xla/client/lib/qr.cc
similarity index 62%
rename from tensorflow/compiler/tf2xla/lib/qr.cc
rename to tensorflow/compiler/xla/client/lib/qr.cc
index 057045fc0c..72ca653173 100644
--- a/tensorflow/compiler/tf2xla/lib/qr.cc
+++ b/tensorflow/compiler/xla/client/lib/qr.cc
@@ -13,12 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/tf2xla/lib/qr.h"
+#include "tensorflow/compiler/xla/client/lib/qr.h"
 
 #include <memory>
 #include <vector>
 
-#include "tensorflow/compiler/tf2xla/lib/util.h"
 #include "tensorflow/compiler/xla/client/lib/arithmetic.h"
 #include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/lib/loops.h"
@@ -32,10 +31,18 @@ limitations under the License.
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/core/lib/core/errors.h"
 
-namespace tensorflow {
+namespace xla {
 
 namespace {
 
+std::vector<int64> ConcatVectors(absl::Span<const int64> xs,
+                                 absl::Span<const int64> ys) {
+  std::vector<int64> output(xs.size() + ys.size());
+  std::copy(xs.begin(), xs.end(), output.begin());
+  std::copy(ys.begin(), ys.end(), output.begin() + xs.size());
+  return output;
+}
+
 // Computes a Householder reflection of the form:
 // H = I - tau v v.T.
 // such that
@@ -65,52 +72,47 @@ namespace {
 //   return (v, tau, beta)
 // TODO(phawkins): LAPACK's xLARFG implementation has code for handling
 // overflows in the norm/beta calculations. Perhaps do the same here.
-xla::Status House(xla::XlaOp x, xla::XlaOp k,
-                  absl::Span<const int64> batch_dims, const int64 m,
-                  xla::XlaOp* v, xla::XlaOp* tau, xla::XlaOp* beta) {
-  xla::XlaBuilder* const builder = x.builder();
-  TF_ASSIGN_OR_RETURN(xla::Shape x_shape, builder->GetShape(x));
-  const xla::PrimitiveType type = x_shape.element_type();
+Status House(XlaOp x, XlaOp k, absl::Span<const int64> batch_dims,
+             const int64 m, XlaOp* v, XlaOp* tau, XlaOp* beta) {
+  XlaBuilder* const builder = x.builder();
+  TF_ASSIGN_OR_RETURN(Shape x_shape, builder->GetShape(x));
+  const PrimitiveType type = x_shape.element_type();
 
   std::vector<int64> batch_dim_ids(batch_dims.size());
   std::iota(batch_dim_ids.begin(), batch_dim_ids.end(), 0);
   const int64 minor_dim = batch_dims.size();
 
-  xla::XlaOp zero = xla::ScalarLike(x, 0.0);
-  xla::XlaOp one = xla::ScalarLike(x, 1.0);
+  XlaOp zero = ScalarLike(x, 0.0);
+  XlaOp one = ScalarLike(x, 1.0);
 
   // alpha = x[k]
-  xla::XlaOp alpha =
-      xla::Reshape(DynamicSliceInMinorDims(x, {k}, {1}), batch_dims);
+  XlaOp alpha = Reshape(DynamicSliceInMinorDims(x, {k}, {1}), batch_dims);
 
   // Compute x[k+1:] (padded with zeros in elements 0..k)
-  xla::XlaOp iota = xla::Iota(builder, xla::S32, m);
-  xla::XlaOp x_after_k =
-      xla::Mul(x, xla::ConvertElementType(xla::Gt(iota, k), type),
-               /*broadcast_dimensions=*/{minor_dim});
+  XlaOp iota = Iota(builder, S32, m);
+  XlaOp x_after_k = Mul(x, ConvertElementType(Gt(iota, k), type),
+                        /*broadcast_dimensions=*/{minor_dim});
 
   // sigma = np.dot(x[k+1:], x[k+1:])
-  auto sigma =
-      xla::Reduce(x_after_k * x_after_k, zero,
-                  xla::CreateScalarAddComputation(type, builder), {minor_dim});
+  auto sigma = Reduce(x_after_k * x_after_k, zero,
+                      CreateScalarAddComputation(type, builder), {minor_dim});
   // mu = np.sqrt(x[k]*x[k] + sigma)
-  auto mu = xla::Sqrt(xla::Square(alpha) + sigma);
+  auto mu = Sqrt(Square(alpha) + sigma);
 
-  auto sigma_is_zero = xla::Eq(sigma, zero);
+  auto sigma_is_zero = Eq(sigma, zero);
 
-  *beta = xla::Select(sigma_is_zero, alpha, -xla::Sign(alpha) * mu);
-  *tau = xla::Select(sigma_is_zero, xla::Broadcast(zero, batch_dims),
-                     (*beta - alpha) / *beta);
-  auto divisor = xla::Select(sigma_is_zero, xla::Broadcast(one, batch_dims),
-                             alpha - *beta);
+  *beta = Select(sigma_is_zero, alpha, -Sign(alpha) * mu);
+  *tau = Select(sigma_is_zero, Broadcast(zero, batch_dims),
+                (*beta - alpha) / *beta);
+  auto divisor =
+      Select(sigma_is_zero, Broadcast(one, batch_dims), alpha - *beta);
 
-  auto e_k = xla::Broadcast(xla::ConvertElementType(xla::Eq(iota, k), type),
-                            std::vector<int64>(batch_dims.size(), 1));
+  auto e_k = Broadcast(ConvertElementType(Eq(iota, k), type),
+                       std::vector<int64>(batch_dims.size(), 1));
 
   // Form v as [0, 0, ..., 1] ++ x[k+1:] / divisor
   // If sigma is zero, x[k+1:] is zero, so use any non-zero divisor.
-  *v = e_k +
-       xla::Div(x_after_k, divisor, /*broadcast_dimensions=*/batch_dim_ids);
+  *v = e_k + Div(x_after_k, divisor, /*broadcast_dimensions=*/batch_dim_ids);
   return Status::OK();
 }
 
@@ -143,90 +145,86 @@ xla::Status House(xla::XlaOp x, xla::XlaOp k,
 //   return (q, vs, taus)
 struct QRBlockResult {
   // The factored R value
-  xla::XlaOp r;
+  XlaOp r;
 
   // Representation of the Householder matrices I - beta v v.T
-  xla::XlaOp taus;  // Shape: [..., n]
-  xla::XlaOp vs;    // Shape: [..., m, n]
+  XlaOp taus;  // Shape: [..., n]
+  XlaOp vs;    // Shape: [..., m, n]
 };
-xla::StatusOr<QRBlockResult> QRBlock(
-    xla::XlaOp a, xla::PrecisionConfig::Precision precision) {
-  xla::XlaBuilder* builder = a.builder();
-  TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a));
-  const int num_dims = xla::ShapeUtil::Rank(a_shape);
+StatusOr<QRBlockResult> QRBlock(XlaOp a, PrecisionConfig::Precision precision) {
+  XlaBuilder* builder = a.builder();
+  TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
+  const int num_dims = ShapeUtil::Rank(a_shape);
   if (num_dims < 2) {
-    return errors::InvalidArgument("Arguments to QR must have rank >= 2: ",
-                                   num_dims);
+    return InvalidArgument("Argument to QR must have rank >= 2; got shape %s",
+                           a_shape.ToString());
   }
-  xla::PrimitiveType type = a_shape.element_type();
+  PrimitiveType type = a_shape.element_type();
 
-  const int64 m = xla::ShapeUtil::GetDimension(a_shape, -2);
-  const int64 n = xla::ShapeUtil::GetDimension(a_shape, -1);
+  const int64 m = ShapeUtil::GetDimension(a_shape, -2);
+  const int64 n = ShapeUtil::GetDimension(a_shape, -1);
 
   const int64 num_batch_dims = num_dims - 2;
   std::vector<int64> batch_dims(num_batch_dims);
   for (int i = 0; i < num_batch_dims; ++i) {
-    batch_dims[i] = xla::ShapeUtil::GetDimension(a_shape, i);
+    batch_dims[i] = ShapeUtil::GetDimension(a_shape, i);
   }
 
   std::vector<int64> batch_dim_indices(num_batch_dims);
   std::iota(batch_dim_indices.begin(), batch_dim_indices.end(), 0);
 
-  auto qr_body_fn =
-      [&](xla::XlaOp j, absl::Span<const xla::XlaOp> values,
-          xla::XlaBuilder* builder) -> xla::StatusOr<std::vector<xla::XlaOp>> {
+  auto qr_body_fn = [&](XlaOp j, absl::Span<const XlaOp> values,
+                        XlaBuilder* builder) -> StatusOr<std::vector<XlaOp>> {
     auto a = values[0];
     auto vs = values[1];
     auto taus = values[2];
 
     // v, beta = house(a[:, j], j)
     auto x = DynamicSliceInMinorDims(a, {j}, {1});
-    xla::XlaOp v, tau, beta;
-    TF_RETURN_IF_ERROR(House(xla::Collapse(x, {num_dims - 2, num_dims - 1}), j,
+    XlaOp v, tau, beta;
+    TF_RETURN_IF_ERROR(House(Collapse(x, {num_dims - 2, num_dims - 1}), j,
                              batch_dims, m, &v, &tau, &beta));
 
     std::vector<int64> shape = batch_dims;
     shape.push_back(1);
     shape.push_back(m);
-    auto v_broadcast = xla::Reshape(v, shape);
+    auto v_broadcast = Reshape(v, shape);
     // a[:, :] -= tau * np.dot(v[:, np.newaxis],
     //                          np.dot(v[np.newaxis, :], a[:, :]))
     auto vva = BatchDot(v_broadcast, a, precision);
     vva = BatchDot(TransposeInMinorDims(v_broadcast), vva, precision);
-    a = a - xla::Mul(tau, vva,
-                     /*broadcast_dimensions=*/batch_dim_indices);
+    a = a - Mul(tau, vva,
+                /*broadcast_dimensions=*/batch_dim_indices);
 
     // It is more precise to populate column 'k' explicitly, rather than
     // computing it implicitly by applying the Householder transformation.
     // a[k,k] = beta
     // a[k+1:,k] = np.zeros([m-k-1], dtype=a.dtype)
-    auto iota = xla::Reshape(xla::Iota(a.builder(), xla::S32, m), {m, 1});
-    auto predecessor_mask = xla::ConvertElementType(xla::Lt(iota, j), type);
-    auto mask = xla::Broadcast(xla::ConvertElementType(xla::Eq(iota, j), type),
-                               std::vector<int64>(batch_dims.size(), 1));
-    auto new_x =
-        xla::Mul(x, predecessor_mask,
-                 /*broadcast_dimensions=*/{num_dims - 2, num_dims - 1}) +
-        xla::Mul(beta, mask, /*broadcast_dimensions=*/batch_dim_indices);
+    auto iota = Reshape(Iota(a.builder(), S32, m), {m, 1});
+    auto predecessor_mask = ConvertElementType(Lt(iota, j), type);
+    auto mask = Broadcast(ConvertElementType(Eq(iota, j), type),
+                          std::vector<int64>(batch_dims.size(), 1));
+    auto new_x = Mul(x, predecessor_mask,
+                     /*broadcast_dimensions=*/{num_dims - 2, num_dims - 1}) +
+                 Mul(beta, mask, /*broadcast_dimensions=*/batch_dim_indices);
     a = DynamicUpdateSliceInMinorDims(a, new_x, {j});
 
     // vs[:, j] = v
     vs = DynamicUpdateSliceInMinorDims(
-        vs, xla::Reshape(v, ConcatVectors(batch_dims, {m, 1})), {j});
+        vs, Reshape(v, ConcatVectors(batch_dims, {m, 1})), {j});
     // taus[j] = tau
     taus = DynamicUpdateSliceInMinorDims(
-        taus, xla::Reshape(tau, ConcatVectors(batch_dims, {1})), {j});
-    return std::vector<xla::XlaOp>{a, vs, taus};
+        taus, Reshape(tau, ConcatVectors(batch_dims, {1})), {j});
+    return std::vector<XlaOp>{a, vs, taus};
   };
 
-  auto vs = xla::Zeros(builder, xla::ShapeUtil::MakeShape(
-                                    type, ConcatVectors(batch_dims, {m, n})));
-  auto taus = xla::Zeros(
-      builder, xla::ShapeUtil::MakeShape(type, ConcatVectors(batch_dims, {n})));
+  auto vs = Zeros(
+      builder, ShapeUtil::MakeShape(type, ConcatVectors(batch_dims, {m, n})));
+  auto taus = Zeros(builder,
+                    ShapeUtil::MakeShape(type, ConcatVectors(batch_dims, {n})));
 
-  TF_ASSIGN_OR_RETURN(auto values,
-                      xla::ForEachIndex(std::min(m, n), xla::S32, qr_body_fn,
-                                        {a, vs, taus}, "qr", builder));
+  TF_ASSIGN_OR_RETURN(auto values, ForEachIndex(std::min(m, n), S32, qr_body_fn,
+                                                {a, vs, taus}, "qr", builder));
 
   QRBlockResult result;
   result.r = values[0];
@@ -250,24 +248,23 @@ xla::StatusOr<QRBlockResult> QRBlock(
 // return W
 // There is no need to return Y since at termination of the loop it is equal to
 // vs.
-xla::StatusOr<xla::XlaOp> ComputeWYRepresentation(
-    xla::PrimitiveType type, absl::Span<const int64> batch_dims, xla::XlaOp vs,
-    xla::XlaOp taus, int64 m, int64 n,
-    xla::PrecisionConfig::Precision precision) {
+StatusOr<XlaOp> ComputeWYRepresentation(PrimitiveType type,
+                                        absl::Span<const int64> batch_dims,
+                                        XlaOp vs, XlaOp taus, int64 m, int64 n,
+                                        PrecisionConfig::Precision precision) {
   std::vector<int64> batch_dim_indices(batch_dims.size());
   std::iota(batch_dim_indices.begin(), batch_dim_indices.end(), 0);
   int64 n_index = batch_dims.size() + 1;
 
-  auto body_fn =
-      [&](xla::XlaOp j, absl::Span<const xla::XlaOp> values,
-          xla::XlaBuilder* builder) -> xla::StatusOr<std::vector<xla::XlaOp>> {
+  auto body_fn = [&](XlaOp j, absl::Span<const XlaOp> values,
+                     XlaBuilder* builder) -> StatusOr<std::vector<XlaOp>> {
     auto w = values[0];
     auto y = values[1];
     const auto vs = values[2];
     const auto taus = values[3];
 
     // Want j values in range [1, ... n).
-    j = j + xla::ConstantR0<int32>(builder, 1);
+    j = j + ConstantR0<int32>(builder, 1);
     // vs has shape [..., m, 1]
     auto v = DynamicSliceInMinorDims(vs, {j}, {1});
     // beta has shape [..., 1]
@@ -278,31 +275,31 @@ xla::StatusOr<xla::XlaOp> ComputeWYRepresentation(
     // wyv has shape [..., m, 1]
     auto wyv = BatchDot(w, yv, precision);
 
-    auto z = xla::Mul(
+    auto z = Mul(
         -beta, v + wyv,
         /*broadcast_dimensions=*/ConcatVectors(batch_dim_indices, {n_index}));
 
     w = DynamicUpdateSliceInMinorDims(w, z, {j});
     y = DynamicUpdateSliceInMinorDims(y, v, {j});
 
-    return std::vector<xla::XlaOp>{w, y, vs, taus};
+    return std::vector<XlaOp>{w, y, vs, taus};
   };
 
-  xla::XlaBuilder* builder = vs.builder();
-  auto w = xla::Zeros(builder, xla::ShapeUtil::MakeShape(
-                                   type, ConcatVectors(batch_dims, {m, n})));
+  XlaBuilder* builder = vs.builder();
+  auto w = Zeros(builder,
+                 ShapeUtil::MakeShape(type, ConcatVectors(batch_dims, {m, n})));
   auto y = w;
   auto v = SliceInMinorDims(vs, {0}, {1});
   auto beta = SliceInMinorDims(taus, {0}, {1});
   y = UpdateSliceInMinorDims(y, v, {0});
-  auto bv = xla::Mul(
-      -beta, v,
-      /*broadcast_dimensions=*/ConcatVectors(batch_dim_indices, {n_index}));
+  auto bv =
+      Mul(-beta, v,
+          /*broadcast_dimensions=*/ConcatVectors(batch_dim_indices, {n_index}));
   w = UpdateSliceInMinorDims(w, bv, {0});
 
   TF_ASSIGN_OR_RETURN(
-      auto values, xla::ForEachIndex(n - 1, xla::S32, body_fn, {w, y, vs, taus},
-                                     "wy", builder));
+      auto values,
+      ForEachIndex(n - 1, S32, body_fn, {w, y, vs, taus}, "wy", builder));
   return values[0];
 }
 
@@ -323,34 +320,34 @@ xla::StatusOr<xla::XlaOp> ComputeWYRepresentation(
 //   return (q, a)
 // TODO(phawkins): consider using UT transformations (in the form I - V U V')
 // rather than WY transformations.
-xla::StatusOr<QRDecompositionResult> QRDecomposition(
-    xla::XlaOp a, bool full_matrices, int64 block_size,
-    xla::PrecisionConfig::Precision precision) {
-  xla::XlaBuilder* builder = a.builder();
-  TF_ASSIGN_OR_RETURN(xla::Shape a_shape, builder->GetShape(a));
-  const int num_dims = xla::ShapeUtil::Rank(a_shape);
+StatusOr<QRDecompositionResult> QRDecomposition(
+    XlaOp a, bool full_matrices, int64 block_size,
+    PrecisionConfig::Precision precision) {
+  XlaBuilder* builder = a.builder();
+  TF_ASSIGN_OR_RETURN(Shape a_shape, builder->GetShape(a));
+  const int num_dims = ShapeUtil::Rank(a_shape);
   if (num_dims < 2) {
-    return errors::InvalidArgument("Arguments to QR must have rank >= 2: ",
-                                   num_dims);
+    return InvalidArgument("Arguments to QR must have rank >= 2: got shape %s",
+                           a_shape.ToString());
   }
-  xla::PrimitiveType type = a_shape.element_type();
+  PrimitiveType type = a_shape.element_type();
 
-  const int64 m = xla::ShapeUtil::GetDimension(a_shape, -2);
-  const int64 n = xla::ShapeUtil::GetDimension(a_shape, -1);
+  const int64 m = ShapeUtil::GetDimension(a_shape, -2);
+  const int64 n = ShapeUtil::GetDimension(a_shape, -1);
   const int64 p = std::min(m, n);
 
   if (block_size < 1) {
-    return errors::InvalidArgument(
-        "block_size argument to QR must be >= 1; got ", block_size);
+    return InvalidArgument("block_size argument to QR must be >= 1; got %d",
+                           block_size);
   }
 
   const int64 num_batch_dims = num_dims - 2;
   std::vector<int64> batch_dims(num_batch_dims);
   for (int i = 0; i < num_batch_dims; ++i) {
-    batch_dims[i] = xla::ShapeUtil::GetDimension(a_shape, i);
+    batch_dims[i] = ShapeUtil::GetDimension(a_shape, i);
   }
 
-  auto q = xla::Broadcast(xla::IdentityMatrix(builder, type, m, m), batch_dims);
+  auto q = Broadcast(IdentityMatrix(builder, type, m, m), batch_dims);
   for (int64 i = 0; i < p; i += block_size) {
     int64 k = std::min(block_size, p - i);
 
@@ -393,4 +390,4 @@ xla::StatusOr<QRDecompositionResult> QRDecomposition(
   return result;
 }
 
-}  // namespace tensorflow
+}  // namespace xla
diff --git a/tensorflow/compiler/tf2xla/lib/qr.h b/tensorflow/compiler/xla/client/lib/qr.h
similarity index 74%
rename from tensorflow/compiler/tf2xla/lib/qr.h
rename to tensorflow/compiler/xla/client/lib/qr.h
index 24b537ac8b..827c8eeca0 100644
--- a/tensorflow/compiler/tf2xla/lib/qr.h
+++ b/tensorflow/compiler/xla/client/lib/qr.h
@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_QR_H_
-#define TENSORFLOW_COMPILER_TF2XLA_LIB_QR_H_
+#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_QR_H_
+#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_QR_H_
 
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 
-namespace tensorflow {
+namespace xla {
 
 // Computes the QR decompositions of a batch of matrices. That is,
 // given a (batched) matrix a, computes an orthonormal matrix Q and an
@@ -29,14 +29,14 @@ namespace tensorflow {
 // the block size to use.
 // TODO(phawkins): handle the complex case.
 struct QRDecompositionResult {
-  xla::XlaOp q;
-  xla::XlaOp r;
+  XlaOp q;
+  XlaOp r;
 };
 
-xla::StatusOr<QRDecompositionResult> QRDecomposition(
-    xla::XlaOp a, bool full_matrices, int64 block_size = 128,
-    xla::PrecisionConfig::Precision precision = xla::PrecisionConfig::HIGHEST);
+StatusOr<QRDecompositionResult> QRDecomposition(
+    XlaOp a, bool full_matrices, int64 block_size = 128,
+    PrecisionConfig::Precision precision = PrecisionConfig::HIGHEST);
 
-}  // namespace tensorflow
+}  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_TF2XLA_LIB_QR_H_
+#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_QR_H_
diff --git a/tensorflow/compiler/xla/client/lib/qr_test.cc b/tensorflow/compiler/xla/client/lib/qr_test.cc
new file mode 100644
index 0000000000..b27d364b62
--- /dev/null
+++ b/tensorflow/compiler/xla/client/lib/qr_test.cc
@@ -0,0 +1,93 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/lib/qr.h"
+
+#include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/array3d.h"
+#include "tensorflow/compiler/xla/client/lib/matrix.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+
+namespace {
+
+using QrTest = xla::ClientLibraryTestBase;
+
+XLA_TEST_F(QrTest, Simple) {
+  xla::XlaBuilder builder(TestName());
+
+  xla::Array2D<float> a_vals({
+      {4, 6, 8, 10},
+      {6, 45, 54, 63},
+      {8, 54, 146, 166},
+      {10, 63, 166, 310},
+  });
+
+  xla::XlaOp a;
+  auto a_data = CreateR2Parameter<float>(a_vals, 0, "a", &builder, &a);
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto result,
+      xla::QRDecomposition(a, /*full_matrices=*/true, /*block_size=*/2));
+
+  // Verifies that the decomposition composes back to the original matrix.
+  //
+  // This isn't a terribly demanding test, (e.g., we should verify that Q is
+  // orthonormal and R is upper-triangular) but it's awkward to write such tests
+  // without more linear algebra libraries. It's easier to test the numerics
+  // from Python, anyway, where we have access to numpy and scipy.
+  xla::BatchDot(result.q, result.r, xla::PrecisionConfig::HIGHEST);
+
+  ComputeAndCompareR2<float>(&builder, a_vals, {a_data.get()},
+                             xla::ErrorSpec(1e-4, 1e-4));
+}
+
+XLA_TEST_F(QrTest, SimpleBatched) {
+  xla::XlaBuilder builder(TestName());
+
+  xla::Array3D<float> a_vals({
+      {
+          {4, 6, 8, 10},
+          {6, 45, 54, 63},
+          {8, 54, 146, 166},
+          {10, 63, 166, 310},
+      },
+      {
+          {16, 24, 8, 12},
+          {24, 61, 82, 48},
+          {8, 82, 456, 106},
+          {12, 48, 106, 62},
+      },
+  });
+
+  xla::XlaOp a;
+  auto a_data = CreateR3Parameter<float>(a_vals, 0, "a", &builder, &a);
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto result,
+      xla::QRDecomposition(a, /*full_matrices=*/true, /*block_size=*/2));
+
+  xla::BatchDot(result.q, result.r, xla::PrecisionConfig::HIGHEST);
+
+  ComputeAndCompareR3<float>(&builder, a_vals, {a_data.get()},
+                             xla::ErrorSpec(1e-4, 1e-4));
+}
+
+}  // namespace
-- 
GitLab


From 06c60fb179befb6011ad85cf8632315c70ddcba1 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Tue, 11 Dec 2018 12:01:56 -0800
Subject: [PATCH 363/873] TF Lite models page skeleton

PiperOrigin-RevId: 225045442
---
 tensorflow/lite/g3doc/_book.yaml              |  26 ++++
 tensorflow/lite/g3doc/models/_index.yaml      | 125 ++++++++++++++++++
 .../lite/g3doc/models/image/label/android.md  |   3 +
 .../lite/g3doc/models/image/label/ios.md      |   3 +
 .../lite/g3doc/models/image/label/overview.md |   8 ++
 5 files changed, 165 insertions(+)
 create mode 100644 tensorflow/lite/g3doc/models/_index.yaml
 create mode 100644 tensorflow/lite/g3doc/models/image/label/android.md
 create mode 100644 tensorflow/lite/g3doc/models/image/label/ios.md
 create mode 100644 tensorflow/lite/g3doc/models/image/label/overview.md

diff --git a/tensorflow/lite/g3doc/_book.yaml b/tensorflow/lite/g3doc/_book.yaml
index 36bf4f4618..0c79e79fdd 100644
--- a/tensorflow/lite/g3doc/_book.yaml
+++ b/tensorflow/lite/g3doc/_book.yaml
@@ -77,6 +77,32 @@ upper_tabs:
         - title: Optimizing for mobile
           path: /lite/tfmobile/optimizing
 
+    # - name: Models
+    #   contents:
+    #   - title: Overview
+    #     path: /lite/models/
+    #   - heading: Beginner
+    #     style: divider
+    #   - title: Image labeling
+    #     section:
+    #     - title: Overview
+    #       path: /lite/models/image/label/overview
+    #     - title: Android
+    #       path: /lite/models/image/label/android
+    #     - title: iOS
+    #       path: /lite/models/image/label/ios
+    #   - heading: Advanced
+    #     style: divider
+    #   - heading: Image
+    #   - title: Image classification
+    #     path: /lite/models/image/classification/
+    #   - heading: Audio
+    #   - title: Hot word detection
+    #     path: /lite/models/audio/hot_word/
+    #   - heading: Text
+    #   - title: Text classification
+    #     path: /lite/models/text/classification/
+
     - name: API
       skip_translation: true
       contents:
diff --git a/tensorflow/lite/g3doc/models/_index.yaml b/tensorflow/lite/g3doc/models/_index.yaml
new file mode 100644
index 0000000000..f4d8bc40a9
--- /dev/null
+++ b/tensorflow/lite/g3doc/models/_index.yaml
@@ -0,0 +1,125 @@
+project_path: /lite/_project.yaml
+book_path: /lite/_book.yaml
+description: <!--no description-->
+landing_page:
+  body_class: tfo-hide-page-nav
+  custom_css_path: /site-assets/css/style.css
+  show_side_navs: true
+  rows:
+
+  # Hero
+  - classname: >
+      devsite-landing-row-50
+      devsite-landing-row-large-headings
+      devsite-landing-row-no-image-background
+    foreground: theme
+    items:
+    - heading: Models marketplace
+      description: >
+        The TensorFlow Lite models marketplace, your neighborhood model shoppe.
+      image_path: /resources/images/tflite-card-16x9.png
+
+  # Features
+  - background: grey
+    items:
+    - heading: Optimized for mobile
+      description: >
+        Machine learning can make your apps more engaging, personalized, and
+        helpful, and provides solutions that are optimized to run on-device.
+    - heading: Built with Google expertise
+      description: >
+        Models offer the technologies that have long powered Google's own
+        experiences on mobile.
+    - heading: Approachable and comprehensive
+      description: >
+        Use out-of-the-box solutions (base APIs) or custom models, running
+        on-device or in the Cloud, depending on your specific needs.
+
+  # Beginner models
+  - classname: devsite-landing-row-100
+    heading: "Build machine learning into your apps"
+    items:
+    - heading: >
+        Image labeling
+      description: >
+        Identify objects, locations, activities, animal species, products, and
+        more
+      icon:
+        path: ../images/landing-page/assistant_logo.png
+      path: /lite/image/labeling/
+    - heading: >
+        Text recognition (OCR)
+      description: >
+        Recognize and extract text from images
+      icon:
+        path: ../images/landing-page/assistant_logo.png
+      path: /lite/image/labeling/
+    - heading: >
+        Face detection
+      description: >
+        Detect faces and facial landmarks
+      icon:
+        path: ../images/landing-page/assistant_logo.png
+      path: /lite/image/labeling/
+
+  - items:
+    - heading: >
+        Barcode scanning
+      description: >
+        Scan and process barcodes
+      icon:
+        path: ../images/landing-page/assistant_logo.png
+      path: /lite/image/labeling/
+    - heading: >
+        Landmark detection
+      description: >
+        Identify popular landmarks in an image
+      icon:
+        path: ../images/landing-page/assistant_logo.png
+      path: /lite/image/labeling/
+    - heading: >
+        Smart reply
+      description: >
+        Provide suggested text snippet that fits context
+      icon:
+        path: ../images/landing-page/assistant_logo.png
+      path: /lite/image/labeling/
+
+  # Custom models
+  - classname: >
+      devsite-landing-row-no-image-background
+      devsite-landing-row-50
+      devsite-landing-row-large-headings
+    foreground: theme
+    background: grey
+    items:
+    - heading: Custom models
+      description: >
+        <p>If models don’t cover your use cases, you can always
+        bring your own existing TensorFlow Lite models. Just upload your model,
+        and we’ll take care of hosting and serving it to your app.</p>
+
+        <p>Models acts as an API layer to your custom model, making it easy to
+        run and use. In addition to deploying your models, we are releasing an
+        experimental model compression flow that aims to reduce model size (up
+        to orders of magnitudes) while maintaining similar accuracy. Sign up at
+        <a href="https://g.co/firebase/signup">g.co/firebase/signup</a></p>
+
+        <p>And if you’re new to machine learning and want more information on
+        custom models for mobile, you can <a
+        href="//www.tensorflow.org/lite/">learn more about TensorFlow
+        Lite.</a></p>
+      image_path: /resources/images/tflite-card-16x9.png
+      image_left: true
+  - classname: devsite-landing-row-large-headings
+    foreground: theme
+    items:
+    - heading: Just the beginning
+      description: >
+        Our ultimate goal is to reduce idea–to–implementation cycles and make AI
+        an essential and intuitive part of a developer's toolkit. We will do so
+        by continuing to add new Base APIs that leverage Google’s machine
+        learning expertise. Base APIs will ultimately cover significantly more
+        use cases in the vision, speech, and text fields. We will also continue
+        to simplify use of custom models, adding tools to deploy, compress, and
+        create them.
diff --git a/tensorflow/lite/g3doc/models/image/label/android.md b/tensorflow/lite/g3doc/models/image/label/android.md
new file mode 100644
index 0000000000..9cd54aad1e
--- /dev/null
+++ b/tensorflow/lite/g3doc/models/image/label/android.md
@@ -0,0 +1,3 @@
+# Android
+
+lorem
diff --git a/tensorflow/lite/g3doc/models/image/label/ios.md b/tensorflow/lite/g3doc/models/image/label/ios.md
new file mode 100644
index 0000000000..904c6450ac
--- /dev/null
+++ b/tensorflow/lite/g3doc/models/image/label/ios.md
@@ -0,0 +1,3 @@
+# iOS
+
+lorem
diff --git a/tensorflow/lite/g3doc/models/image/label/overview.md b/tensorflow/lite/g3doc/models/image/label/overview.md
new file mode 100644
index 0000000000..b3d9133bb2
--- /dev/null
+++ b/tensorflow/lite/g3doc/models/image/label/overview.md
@@ -0,0 +1,8 @@
+# Overview
+
+Image labeling gives you insight into the content of images. When you use the
+API, you get a list of the entities that were recognized: people, things,
+places, activities, and so on. Each label found comes with a score that
+indicates the confidence the ML model has in its relevance. With this
+information, you can perform tasks such as automatic metadata generation
+and content moderation.
-- 
GitLab


From cf9878d6a691c1ee8277c83a94f86adcd5fedc65 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 11 Dec 2018 12:14:29 -0800
Subject: [PATCH 364/873] [XLA:Python] Add Cholesky, QR, and TriangularSolve to
 the XLA Python API.

This allows non-TF Python clients to reuse the TensorFlow implementations of these ops (and any future improvements to be shared between users).

PiperOrigin-RevId: 225047881
---
 tensorflow/compiler/xla/python/BUILD          |  3 ++
 .../xla/python/local_computation_builder.cc   | 24 +++++++++++
 .../xla/python/local_computation_builder.h    |  7 ++++
 .../xla/python/local_computation_builder.i    |  3 ++
 tensorflow/compiler/xla/python/xla_client.py  | 14 +++++++
 .../compiler/xla/python/xla_client_test.py    | 41 +++++++++++++++++--
 6 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index 63ac1c6649..4a57b1051e 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -66,7 +66,10 @@ cc_library(
         "//tensorflow/compiler/xla/client:local_client",
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
+        "//tensorflow/compiler/xla/client/lib:cholesky",
         "//tensorflow/compiler/xla/client/lib:math",
+        "//tensorflow/compiler/xla/client/lib:qr",
+        "//tensorflow/compiler/xla/client/lib:triangular_solve",
         "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xla/service:shaped_buffer",
         "//tensorflow/compiler/xrt:xrt_proto",
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index 6e2ee86632..d4d31fb8c0 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -24,7 +24,10 @@ limitations under the License.
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope.h"
 #include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/compiler/xla/client/lib/cholesky.h"
 #include "tensorflow/compiler/xla/client/lib/math.h"
+#include "tensorflow/compiler/xla/client/lib/qr.h"
+#include "tensorflow/compiler/xla/client/lib/triangular_solve.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/client/xla_computation.h"
 #include "tensorflow/compiler/xla/executable_run_options.h"
@@ -865,6 +868,27 @@ LocalOp LocalComputationBuilder::SortKeyVal(const LocalOp& keys,
   return xla::Sort(keys.op(), {values.op()}, dimension);
 }
 
+LocalOp LocalComputationBuilder::Cholesky(const LocalOp& a) {
+  return xla::Cholesky(a.op());
+}
+
+LocalOp LocalComputationBuilder::QR(const LocalOp& a, bool full_matrices) {
+  XlaBuilder* builder = a.op().builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(auto qr, xla::QRDecomposition(a.op(), full_matrices));
+    return xla::Tuple(builder, {qr.q, qr.r});
+  });
+}
+
+LocalOp LocalComputationBuilder::TriangularSolve(const LocalOp& a,
+                                                 const LocalOp& b,
+                                                 bool left_side, bool lower,
+                                                 bool transpose_a,
+                                                 bool conjugate_a) {
+  return xla::TriangularSolve(a.op(), b.op(), left_side, lower, transpose_a,
+                              conjugate_a);
+}
+
 StatusOr<LocalComputation*> LocalComputationBuilder::BuildConstantSubGraph(
     const LocalOp& operand) {
   TF_ASSIGN_OR_RETURN(XlaComputation computation,
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index 149e44570d..7647ef44ad 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -394,6 +394,13 @@ class LocalComputationBuilder {
   LocalOp SortKeyVal(const LocalOp& keys, const LocalOp& values,
                      int64 dimension);
 
+  LocalOp QR(const LocalOp& a, bool full_matrices);
+
+  LocalOp Cholesky(const LocalOp& a);
+
+  LocalOp TriangularSolve(const LocalOp& a, const LocalOp& b, bool left_side,
+                          bool lower, bool transpose_a, bool conjugate_a);
+
   StatusOr<LocalComputation*> BuildConstantSubGraph(const LocalOp& operand);
 
 #define _FORWARD(method_name, return_sig, args_sig) \
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
index d23d693c1e..82d25304f0 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.i
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -1144,6 +1144,9 @@ tensorflow::ImportNumpy();
 %unignore xla::swig::LocalComputationBuilder::Imag;
 %unignore xla::swig::LocalComputationBuilder::Conj;
 %unignore xla::swig::LocalComputationBuilder::Complex;
+%unignore xla::swig::LocalComputationBuilder::Cholesky;
+%unignore xla::swig::LocalComputationBuilder::QR;
+%unignore xla::swig::LocalComputationBuilder::TriangularSolve;
 %unignore xla::swig::DeleteLocalComputation;
 %unignore xla::swig::DestructureLocalShapedBufferTuple;
 %unignore xla::swig::DestructureXrtAllocationTuple;
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index c91a2aaf56..3366a83543 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -1411,6 +1411,20 @@ class ComputationBuilder(object):
     """Enqueues a key-value sort operation onto the computation."""
     return self._client.SortKeyVal(keys, values, dimension)
 
+  def Cholesky(self, a):
+    """Enqueues a Cholesky decomposition onto the computation."""
+    return self._client.Cholesky(a)
+
+  def QR(self, a, full_matrices=True):
+    """Enqueues a QR decomposition onto the computation."""
+    return self._client.QR(a, full_matrices)
+
+  def TriangularSolve(self, a, b, left_side=False, lower=False,
+                      transpose_a=False, conjugate_a=False):
+    """Enqueues a triangular-solve operation onto the computation."""
+    return self._client.TriangularSolve(
+        a, b, left_side, lower, transpose_a, conjugate_a)
+
 
 def _forward_methods_to_local_builder():
   """Forward remaining ComputationBuilder methods to the C API.
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index 21b5c93b61..a4c615846e 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
 import itertools
 import threading
 
@@ -51,9 +52,11 @@ class LocalComputationTest(unittest.TestCase):
   def _ExecuteAndCompareExact(self, c, arguments=(), expected=None):
     self._ExecuteAndAssertWith(np.testing.assert_equal, c, arguments, expected)
 
-  def _ExecuteAndCompareClose(self, c, arguments=(), expected=None):
-    self._ExecuteAndAssertWith(np.testing.assert_allclose, c, arguments,
-                               expected)
+  def _ExecuteAndCompareClose(self, c, arguments=(), expected=None, rtol=1e-7,
+                              atol=0):
+    self._ExecuteAndAssertWith(
+        functools.partial(np.testing.assert_allclose, rtol=rtol, atol=atol),
+        c, arguments, expected)
 
 
 def NumpyArrayF32(*args, **kwargs):
@@ -1057,6 +1060,38 @@ class SingleOpTest(LocalComputationTest):
     self.assertTrue(np.all(lo <= result))
     self.assertTrue(np.all(result < hi))
 
+  def testCholesky(self):
+    l = np.array([[4, 0, 0, 0], [6, 5, 0, 0], [2, 14, 16, 0], [3, 6, 1, 4]],
+                 dtype=np.float32)
+    c = self._NewComputation()
+    c.Cholesky(c.Constant(np.dot(l, l.T)))
+    self._ExecuteAndCompareClose(c, expected=l, rtol=1e-4)
+
+  def testQR(self):
+    a = np.array(
+        [[4, 6, 8, 10], [6, 45, 54, 63], [8, 54, 146, 166], [10, 63, 166, 310]],
+        dtype=np.float32)
+    c = self._NewComputation()
+    c.QR(c.Constant(a), full_matrices=True)
+    q, r = self._Execute(c, ())
+    np.testing.assert_allclose(np.dot(q, r), a, rtol=1e-4)
+
+  def testTriangularSolve(self):
+    a_vals = np.array(
+        [[2, 0, 0, 0], [3, 6, 0, 0], [4, 7, 9, 0], [5, 8, 10, 11]],
+        dtype=np.float32)
+    b_vals = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
+                      dtype=np.float32)
+
+    c = self._NewComputation()
+    c.TriangularSolve(c.Constant(a_vals), c.Constant(b_vals), left_side=False,
+                      lower=True, transpose_a=True)
+    self._ExecuteAndCompareClose(c, expected=np.array([
+        [0.5, 0.08333334, 0.04629629, 0.03367003],
+        [2.5, -0.25, -0.1388889, -0.1010101],
+        [4.5, -0.58333331, -0.32407406, -0.23569024],
+    ], dtype=np.float32), rtol=1e-4)
+
   def testIsConstant(self):
     c = self._NewComputation()
     a = c.ConstantS32Scalar(3)
-- 
GitLab


From 316660063aaaaeb95b63d08a54e746934de659c0 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 11 Dec 2018 12:15:01 -0800
Subject: [PATCH 365/873] Remove/avoid deprecation warnings in 2.x saving
 utilities

Having the warning on little-used 1.x utilities isn't super important, but the 2.x utilities shouldn't print them during normal use for sure.

PiperOrigin-RevId: 225047956
---
 tensorflow/python/training/checkpoint_management.py | 4 ----
 tensorflow/python/training/checkpointable/util.py   | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/tensorflow/python/training/checkpoint_management.py b/tensorflow/python/training/checkpoint_management.py
index f745ab4824..a7ad1f70e5 100644
--- a/tensorflow/python/training/checkpoint_management.py
+++ b/tensorflow/python/training/checkpoint_management.py
@@ -56,10 +56,6 @@ def _GetCheckpointFilename(save_dir, latest_filename):
   return os.path.join(save_dir, latest_filename)
 
 
-@deprecation.deprecated(
-    date=None,
-    instructions=("Use tf.train.CheckpointManager to manage checkpoints rather "
-                  "than editing the Checkpoint proto manually."))
 @tf_export(v1=["train.generate_checkpoint_state_proto"])
 def generate_checkpoint_state_proto(save_dir,
                                     model_checkpoint_path,
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index a54f41a54f..ce1b9c6fc5 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -1863,7 +1863,7 @@ class Checkpoint(tracking.Checkpointable):
       checkpoint_number = assign_op.numpy()
     file_path = self.write("%s-%d" % (file_prefix, checkpoint_number),
                            session=session)
-    checkpoint_management.update_checkpoint_state(
+    checkpoint_management.update_checkpoint_state_internal(
         save_dir=os.path.dirname(file_prefix),
         model_checkpoint_path=file_path,
         all_model_checkpoint_paths=[file_path])
-- 
GitLab


From 4fe05f35cfab9324caedc4fc8da3c16b0f412d27 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 11 Dec 2018 12:15:39 -0800
Subject: [PATCH 366/873] [XLA:CPU] Add support for CustomCall targets that
 return tuples.

Populate the tuple index table of the return value; the callee cannot do this since it does not know the buffer assignments.

Explicitly enable custom_call_test only for cpu in the BUILD file, rather than disabling it on non-CPU backends. These tests would not work on any non-CPU backend.

PiperOrigin-RevId: 225048065
---
 .../compiler/xla/service/cpu/ir_emitter.cc    | 16 ++++++++
 tensorflow/compiler/xla/tests/BUILD           | 19 ++++-----
 .../compiler/xla/tests/custom_call_test.cc    | 39 ++++++++++++++++---
 3 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 4032c2da2f..38ab5b78d2 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -2271,6 +2271,22 @@ Status IrEmitter::HandleCustomCall(HloInstruction* custom_call) {
               /*isVarArg=*/false)));
 
   TF_RETURN_IF_ERROR(EmitTargetAddressForOp(custom_call));
+  // Write the tuple table if the output is a tuple.
+  if (ShapeUtil::IsTuple(custom_call->shape())) {
+    std::vector<llvm::Value*> base_ptrs;
+    for (int i = 0; i < ShapeUtil::TupleElementCount(custom_call->shape());
+         ++i) {
+      const Shape& elem_shape =
+          ShapeUtil::GetTupleElementShape(custom_call->shape(), i);
+      TF_RET_CHECK(!ShapeUtil::IsTuple(elem_shape))
+          << "Nested tuples not implemented";
+      TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice slice,
+                          assignment_.GetUniqueSlice(custom_call, {i}));
+      llvm::Value* addr = EmitBufferPointer(slice, elem_shape);
+      base_ptrs.push_back(addr);
+    }
+    llvm_ir::EmitTuple(GetIrArrayFor(custom_call), base_ptrs, &b_, module_);
+  }
   auto* output_address_arg =
       PointerCast(GetEmittedValueFor(custom_call), i8_ptr_type);
 
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 5a7a4faa7e..0300b64ed5 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -1,6 +1,13 @@
 # Description:
 #   Base testing infrastructure for XLA.
 
+load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites", "generate_backend_test_macros", "xla_test", "xla_test_library")
+load(
+    "//tensorflow/core:platform/default/build_config_root.bzl",
+    "tf_cuda_tests_tags",
+)
+load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test")
+
 licenses(["notice"])  # Apache 2.0
 
 package(
@@ -23,17 +30,6 @@ filegroup(
     ]),
 )
 
-load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test")
-load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test_library")
-load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_suites")
-load("//tensorflow/compiler/xla/tests:build_defs.bzl", "generate_backend_test_macros")
-load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
-load("//tensorflow:tensorflow.bzl", "tf_cc_test")
-load(
-    "//tensorflow/core:platform/default/build_config_root.bzl",
-    "tf_cuda_tests_tags",
-)
-
 # Generate test_suites for all backends, named "${backend}_tests".
 generate_backend_suites()
 
@@ -1348,6 +1344,7 @@ xla_test(
 xla_test(
     name = "custom_call_test",
     srcs = ["custom_call_test.cc"],
+    backends = ["cpu"],
     deps = [
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc
index 738b644235..cad43d1b55 100644
--- a/tensorflow/compiler/xla/tests/custom_call_test.cc
+++ b/tensorflow/compiler/xla/tests/custom_call_test.cc
@@ -54,11 +54,20 @@ void Add1ToValues(float* out, float** in) {
   out[2] = array[2] + 1;
   out[3] = array[3] + 1;
 }
+
+void F32TupleSwap(float** out, float** in) {
+  TF_ANNOTATE_MEMORY_IS_INITIALIZED(in[0], sizeof(float));
+  TF_ANNOTATE_MEMORY_IS_INITIALIZED(in[1], sizeof(float));
+  *out[0] = *in[1];
+  *out[1] = *in[0];
+}
+
 }  // namespace
 
 REGISTER_CUSTOM_CALL_TARGET(R0F32Add2);
 REGISTER_CUSTOM_CALL_TARGET(R2F32ReduceSum);
 REGISTER_CUSTOM_CALL_TARGET(Add1ToValues);
+REGISTER_CUSTOM_CALL_TARGET(F32TupleSwap);
 
 namespace xla {
 namespace {
@@ -69,7 +78,7 @@ class CustomCallTest : public HloTestBase {
   Shape r2f32_ = ShapeUtil::MakeShape(F32, {2, 2});
 };
 
-XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR0F32Add2)) {
+XLA_TEST_F(CustomCallTest, CustomCallR0F32Add2) {
   auto module = CreateNewUnverifiedModule();
   auto builder = HloComputation::Builder(TestName());
 
@@ -84,7 +93,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR0F32Add2)) {
   LiteralTestUtil::ExpectR0Near<float>(44.0f, result, error_spec_);
 }
 
-XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) {
+XLA_TEST_F(CustomCallTest, CustomCallR2F32Reduce) {
   auto module = CreateNewUnverifiedModule();
   auto builder = HloComputation::Builder(TestName());
 
@@ -105,7 +114,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) {
   LiteralTestUtil::ExpectR0Near<float>(10.0f, result, error_spec_);
 }
 
-XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(UsedInOtherComputations)) {
+XLA_TEST_F(CustomCallTest, UsedInOtherComputations) {
   auto module = CreateNewUnverifiedModule();
   auto b = HloComputation::Builder(TestName());
 
@@ -129,7 +138,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(UsedInOtherComputations)) {
       Array3D<float>{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, result);
 }
 
-XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(InputAndOutputLayoutDiffer)) {
+XLA_TEST_F(CustomCallTest, InputAndOutputLayoutDiffer) {
   auto module = CreateNewUnverifiedModule();
   auto b = HloComputation::Builder(TestName());
 
@@ -151,7 +160,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(InputAndOutputLayoutDiffer)) {
   LiteralTestUtil::ExpectR2Equal<float>({{2.f, 4.f}, {3.f, 5.f}}, result);
 }
 
-XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(LayoutConstrained)) {
+XLA_TEST_F(CustomCallTest, LayoutConstrained) {
   // The argument and result of the computation are set to different layouts,
   // but the custom call is layout constrained to a fixed operand and result
   // layout, so the correct result should be produced.
@@ -176,6 +185,26 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(LayoutConstrained)) {
   LiteralTestUtil::ExpectR2Equal<float>({{2.f, 3.f}, {4.f, 5.f}}, result);
 }
 
+XLA_TEST_F(CustomCallTest, TupleOutput) {
+  const char* kModuleStr = R"(
+    HloModule m
+    test {
+      p0 = f32[] parameter(0)
+      p1 = f32[] parameter(1)
+      ROOT %custom-call = (f32[], f32[]) custom-call(f32[] %p0, f32[] %p1), custom_call_target="F32TupleSwap", operand_layout_constraints={f32[], f32[]}
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(kModuleStr));
+
+  Literal arg0 = LiteralUtil::CreateR0<float>(7.f);
+  Literal arg1 = LiteralUtil::CreateR0<float>(42.f);
+
+  Literal expected = LiteralUtil::MakeTuple({&arg1, &arg0});
+  Literal result = ExecuteAndTransfer(std::move(module), {&arg0, &arg1});
+  EXPECT_EQ(result, expected);
+}
+
 class CustomCallClientAPITest : public ClientLibraryTestBase {};
 
 // When using the client API, CustomCall targets can't begin with '$' -- these
-- 
GitLab


From 39b6e1924ebfbc439e8cbb9b66c70d68bad37077 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Tue, 11 Dec 2018 12:19:24 -0800
Subject: [PATCH 367/873] Fix documentation formatting for OneHot op.

PiperOrigin-RevId: 225048682
---
 .../api_def/base_api/api_def_OneHot.pbtxt     | 57 +++++++++----------
 1 file changed, 28 insertions(+), 29 deletions(-)

diff --git a/tensorflow/core/api_def/base_api/api_def_OneHot.pbtxt b/tensorflow/core/api_def/base_api/api_def_OneHot.pbtxt
index 807b8ae310..b325df1c8c 100644
--- a/tensorflow/core/api_def/base_api/api_def_OneHot.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_OneHot.pbtxt
@@ -66,7 +66,6 @@ Examples
 =========
 
 Suppose that
-
 ```
   indices = [0, 2, -1, 1]
   depth = 3
@@ -76,16 +75,15 @@ Suppose that
 ```
 
 Then output is `[4 x 3]`:
-
-    ```output =
-      [5.0 0.0 0.0]  // one_hot(0)
-      [0.0 0.0 5.0]  // one_hot(2)
-      [0.0 0.0 0.0]  // one_hot(-1)
-      [0.0 5.0 0.0]  // one_hot(1)
-    ```
+```
+output =
+  [5.0 0.0 0.0]  // one_hot(0)
+  [0.0 0.0 5.0]  // one_hot(2)
+  [0.0 0.0 0.0]  // one_hot(-1)
+  [0.0 5.0 0.0]  // one_hot(1)
+```
 
 Suppose that
-
 ```
   indices = [0, 2, -1, 1]
   depth = 3
@@ -95,19 +93,19 @@ Suppose that
 ```
 
 Then output is `[3 x 4]`:
+```
+output =
+  [0.0 3.0 3.0 3.0]
+  [3.0 3.0 3.0 0.0]
+  [3.0 3.0 3.0 3.0]
+  [3.0 0.0 3.0 3.0]
+//  ^                one_hot(0)
+//      ^            one_hot(2)
+//          ^        one_hot(-1)
+//              ^    one_hot(1)
+```
 
-    ```output =
-      [0.0 3.0 3.0 3.0]
-      [3.0 3.0 3.0 0.0]
-      [3.0 3.0 3.0 3.0]
-      [3.0 0.0 3.0 3.0]
-    //  ^                one_hot(0)
-    //      ^            one_hot(2)
-    //          ^        one_hot(-1)
-    //              ^    one_hot(1)
-    ```
 Suppose that
-
 ```
   indices = [[0, 2], [1, -1]]
   depth = 3
@@ -117,14 +115,15 @@ Suppose that
 ```
 
 Then output is `[2 x 2 x 3]`:
-
-    ```output =
-      [
-        [1.0, 0.0, 0.0]  // one_hot(0)
-        [0.0, 0.0, 1.0]  // one_hot(2)
-      ][
-        [0.0, 1.0, 0.0]  // one_hot(1)
-        [0.0, 0.0, 0.0]  // one_hot(-1)
-      ]```
+```
+output =
+  [
+    [1.0, 0.0, 0.0]  // one_hot(0)
+    [0.0, 0.0, 1.0]  // one_hot(2)
+  ][
+    [0.0, 1.0, 0.0]  // one_hot(1)
+    [0.0, 0.0, 0.0]  // one_hot(-1)
+  ]
+```
 END
 }
-- 
GitLab


From 6756eee557e6a6b14ebb6c3dcb738951c44ff295 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Tue, 11 Dec 2018 12:23:07 -0800
Subject: [PATCH 368/873] Fix up tests to work with TensorShapeV2

PiperOrigin-RevId: 225049315
---
 tensorflow/lite/python/convert_saved_model_test.py  |  7 +++----
 tensorflow/python/eager/backprop.py                 |  9 +++++++--
 tensorflow/python/eager/backprop_test.py            |  9 ---------
 tensorflow/python/eager/ops_test.py                 |  1 -
 tensorflow/python/keras/integration_test.py         |  1 -
 tensorflow/python/keras/layers/core.py              |  4 ++--
 tensorflow/python/keras/layers/core_test.py         |  2 --
 .../python/kernel_tests/control_flow_ops_py_test.py | 13 ++++++-------
 tensorflow/python/kernel_tests/ctc_loss_op_test.py  | 10 ----------
 .../kernel_tests/linalg/linear_operator_test.py     |  6 +++---
 tensorflow/python/layers/core_test.py               |  7 ++++++-
 tensorflow/python/ops/ctc_ops.py                    |  4 +++-
 tensorflow/python/ops/linalg/linear_operator.py     |  5 ++++-
 13 files changed, 34 insertions(+), 44 deletions(-)

diff --git a/tensorflow/lite/python/convert_saved_model_test.py b/tensorflow/lite/python/convert_saved_model_test.py
index 11bfcdc795..fdcbc79ee9 100644
--- a/tensorflow/lite/python/convert_saved_model_test.py
+++ b/tensorflow/lite/python/convert_saved_model_test.py
@@ -93,7 +93,7 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
         str(error.exception))
     self.assertEqual([None, 3, 5], tensor.shape.as_list())
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testSetTensorShapeDimensionInvalid(self):
     # Tests set_tensor_shape where the shape passed in is incompatiable.
     tensor = array_ops.placeholder(shape=[None, 3, 5], dtype=dtypes.float32)
@@ -102,9 +102,8 @@ class TensorFunctionsTest(test_util.TensorFlowTestCase):
     with self.assertRaises(ValueError) as error:
       convert_saved_model.set_tensor_shapes([tensor],
                                             {"Placeholder": [1, 5, 5]})
-    self.assertIn(
-        "The shape of tensor 'Placeholder' cannot be changed from "
-        "(?, 3, 5) to [1, 5, 5].", str(error.exception))
+    self.assertIn("The shape of tensor 'Placeholder' cannot be changed",
+                  str(error.exception))
     self.assertEqual([None, 3, 5], tensor.shape.as_list())
 
   @test_util.run_v1_only("b/120545219")
diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 29f9b2cda3..481f680f56 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -1104,8 +1104,13 @@ class GradientTape(object):
         dimension of `target` and `source` do not match.
     """
     target_shape = target.shape
-    if not target_shape.with_rank_at_least(2)[0].is_compatible_with(
-        source.shape.with_rank_at_least(2)[0]):
+    if target_shape.rank is None:
+      dim = Dimension(None)
+    else:
+      dim = target_shape.dims[0]
+    if not (target_shape.with_rank_at_least(2) and
+            source.shape.with_rank_at_least(2) and
+            dim.is_compatible_with(source.shape[0])):
       raise ValueError(
           "Need first dimension of target shape (%s) and "
           "source shape (%s) to match." % (target.shape, source.shape))
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 61c47a29fd..477d18e214 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -1338,17 +1338,14 @@ class BatchJacobianTest(test.TestCase):
                               array_ops.diag(2 * x[1] * y[1])])
     return batch_jacobian, answer
 
-  @test_util.run_v1_only('b/120545219')
   def testPfor(self):
     batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=True)
     self.assertAllEqual(answer, batch_jacobian)
 
-  @test_util.run_v1_only('b/120545219')
   def testWhileLoop(self):
     batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=False)
     self.assertAllEqual(answer, batch_jacobian)
 
-  @test_util.run_v1_only('b/120545219')
   def testPforDefun(self):
 
     @function.defun
@@ -1358,7 +1355,6 @@ class BatchJacobianTest(test.TestCase):
     batch_jacobian, answer = _f()
     self.assertAllEqual(answer, batch_jacobian)
 
-  @test_util.run_v1_only('b/120545219')
   def testWhileLoopDefun(self):
 
     @function.defun
@@ -1368,7 +1364,6 @@ class BatchJacobianTest(test.TestCase):
     batch_jacobian, answer = _f()
     self.assertAllEqual(answer, batch_jacobian)
 
-  @test_util.run_v1_only('b/120545219')
   def testPersistentTape(self):
     if not context.executing_eagerly():
       return
@@ -1379,7 +1374,6 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(RuntimeError, 'persistent'):
       g.batch_jacobian(y, x, experimental_use_pfor=False)
 
-  @test_util.run_v1_only('b/120545219')
   def testBadShape(self):
     x = random_ops.random_uniform([2, 3])
     with backprop.GradientTape() as g:
@@ -1387,7 +1381,6 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'Need first dimension'):
       g.batch_jacobian(y, x)
 
-  @test_util.run_v1_only('b/120545219')
   def testBadInputRank(self):
     x = random_ops.random_uniform([2])
     with backprop.GradientTape() as g:
@@ -1402,7 +1395,6 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'must have rank at least 2'):
       g.batch_jacobian(y, x)
 
-  @test_util.run_v1_only('b/120545219')
   def testPforException(self):
     var = variables.Variable([1.])
 
@@ -1423,7 +1415,6 @@ class BatchJacobianTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'No converter'):
       g.batch_jacobian(y, x, experimental_use_pfor=True)
 
-  @test_util.run_v1_only('b/120545219')
   def test_parallel_iterations(self):
     with backprop.GradientTape(persistent=True) as g:
       x = constant_op.constant([[1., 2], [3, 4]])
diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py
index 91d0d5c6f0..17a090d526 100644
--- a/tensorflow/python/eager/ops_test.py
+++ b/tensorflow/python/eager/ops_test.py
@@ -330,7 +330,6 @@ class OpsTest(test_util.TensorFlowTestCase):
     self.assertEquals(t, dtypes.string)
     self.assertEquals(r[0].dtype, dtypes.string)
 
-  @test_util.run_v1_only('b/120545219')
   def testFlattenLayer(self):
     flatten_layer = core.Flatten()
     x = constant_op.constant([[[-10, -20], [-30, -40]], [[10, 20], [30, 40]]])
diff --git a/tensorflow/python/keras/integration_test.py b/tensorflow/python/keras/integration_test.py
index c516514f63..8d65f63aba 100644
--- a/tensorflow/python/keras/integration_test.py
+++ b/tensorflow/python/keras/integration_test.py
@@ -134,7 +134,6 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
-  @test_util.run_v1_only('b/120545219')
   def test_image_classification_sequential(self):
     with self.cached_session():
       np.random.seed(1337)
diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 1b406677d9..39bcb82c72 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -549,8 +549,8 @@ class Flatten(Layer):
       inputs = array_ops.transpose(inputs, perm=permutation)
 
     outputs = array_ops.reshape(
-        inputs, (tensor_shape.dimension_value(inputs.shape[0])
-                 or array_ops.shape(inputs)[0], -1))
+        inputs, (tensor_shape.dimension_value(inputs.shape[0]) or
+                 array_ops.shape(inputs)[0], -1))
     if not context.executing_eagerly():
       outputs.set_shape(self.compute_output_shape(inputs.get_shape()))
     return outputs
diff --git a/tensorflow/python/keras/layers/core_test.py b/tensorflow/python/keras/layers/core_test.py
index b8def07190..f138adf760 100644
--- a/tensorflow/python/keras/layers/core_test.py
+++ b/tensorflow/python/keras/layers/core_test.py
@@ -135,7 +135,6 @@ class CoreLayersTest(test.TestCase):
           kwargs={'dims': (1, 4, 2)}, input_shape=(3, 2, 4))
 
   @tf_test_util.run_in_graph_and_eager_modes
-  @tf_test_util.run_v1_only('b/120545219')
   def test_flatten(self):
     testing_utils.layer_test(
         keras.layers.Flatten, kwargs={}, input_shape=(3, 2, 4))
@@ -151,7 +150,6 @@ class CoreLayersTest(test.TestCase):
     self.assertAllClose(outputs, target_outputs)
 
   @tf_test_util.run_in_graph_and_eager_modes
-  @tf_test_util.run_v1_only('b/120545219')
   def test_flatten_scalar_channels(self):
     testing_utils.layer_test(
         keras.layers.Flatten, kwargs={}, input_shape=(3,))
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 21ded25a11..f4a7d5bec9 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -1516,12 +1516,12 @@ class ControlFlowTest(test.TestCase):
         ]
 
       _, r = control_flow_ops.while_loop(c, b, [i, x])
-      self.assertEqual(r.dense_shape.get_shape()[0].value, 1)
+      self.assertEqual(r.dense_shape.get_shape()[0], 1)
 
       _, r = control_flow_ops.while_loop(
           c, b, [i, x],
           [i.get_shape(), tensor_shape.TensorShape([None])])
-      self.assertTrue(r.dense_shape.get_shape()[0].value is None)
+      self.assertEqual(r.dense_shape.get_shape().as_list(), [None])
 
       with self.assertRaisesRegexp(ValueError, "is not compatible with"):
         _, r = control_flow_ops.while_loop(
@@ -1548,15 +1548,14 @@ class ControlFlowTest(test.TestCase):
         ]
 
       _, r = control_flow_ops.while_loop(c, b, [i, x])
-      self.assertEqual(r.dense_shape.get_shape()[0].value, 2)
+      self.assertEqual(r.dense_shape.get_shape()[0], 2)
       self.assertEqual(r.values.get_shape(), tensor_shape.TensorShape([2, 2]))
 
       _, r = control_flow_ops.while_loop(
           c, b, [i, x],
           [i.get_shape(), tensor_shape.TensorShape([None, 2])])
-      self.assertEqual(r.dense_shape.get_shape()[0].value, 2)
-      self.assertTrue(r.values.get_shape()[0].value is None)
-      self.assertEqual(r.values.get_shape()[1].value, 2)
+      self.assertEqual(r.dense_shape.get_shape()[0], 2)
+      self.assertEqual(r.values.get_shape().as_list(), [None, 2])
 
       with self.assertRaisesRegexp(ValueError, "is not compatible with"):
         _, r = control_flow_ops.while_loop(
@@ -1925,7 +1924,7 @@ class ControlFlowTest(test.TestCase):
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result2)
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only("b/120545219")
   def testWhileUpdateVariable_3(self):
     with self.cached_session():
       select = variables.Variable([3.0, 4.0, 5.0])
diff --git a/tensorflow/python/kernel_tests/ctc_loss_op_test.py b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
index 39a637d831..352dedea4a 100644
--- a/tensorflow/python/kernel_tests/ctc_loss_op_test.py
+++ b/tensorflow/python/kernel_tests/ctc_loss_op_test.py
@@ -242,7 +242,6 @@ class CTCLossTest(test.TestCase):
 
     self._testCTCLoss(inputs, seq_lens, labels, loss_truth, grad_truth)
 
-  @test_util.run_v1_only("b/120545219")
   def test_time_major(self):
     """Testing time_major param.
 
@@ -565,7 +564,6 @@ class CTCLossTestV2(test.TestCase):
               rtol=2e-06,
               atol=2e-06)
 
-  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeated(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 3, 3, 3, 0],
@@ -579,7 +577,6 @@ class CTCLossTestV2(test.TestCase):
          [1, 4, 0, 0],
          [4, 2, 9, 4]])
 
-  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedPreservesDtypes(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=constant_op.constant(
@@ -597,7 +594,6 @@ class CTCLossTestV2(test.TestCase):
          [1, 4, 0, 0],
          [4, 2, 9, 4]])
 
-  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedExtraPadding(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 3, 3, 3, 0, 0, 0],
@@ -611,7 +607,6 @@ class CTCLossTestV2(test.TestCase):
          [1, 4, 0, 0],
          [4, 2, 9, 4]])
 
-  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedFrontRepeats(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 1, 1, 2, 2],
@@ -625,7 +620,6 @@ class CTCLossTestV2(test.TestCase):
          [1, 2],
          [1, 0]])
 
-  @test_util.run_v1_only("b/120545219")
   def testCollapseRepeatedAllLabelsTheSame(self):
     collapsed, new_seq_lengths = ctc_ops.collapse_repeated(
         labels=[[1, 1, 1, 1, 1],
@@ -658,7 +652,6 @@ class CTCLossTestV2(test.TestCase):
 
     self.assertAllEqual(padded_dense, new_dense)
 
-  @test_util.run_v1_only("b/120545219")
   def testUnique(self):
     labels = [
         [3, 4, 4, 3],
@@ -674,7 +667,6 @@ class CTCLossTestV2(test.TestCase):
         [0, 0, 0, 1],
     ], idx)
 
-  @test_util.run_v1_only("b/120545219")
   def testSumStates(self):
     idx = [
         [0, 1, 0, 1],
@@ -694,7 +686,6 @@ class CTCLossTestV2(test.TestCase):
          [1.8, 0.8, 0.0, 0.0]]
     ], sum_of_states)
 
-  @test_util.run_v1_only("b/120545219")
   def testStateToOlabel(self):
     labels = [
         [3, 4, 3, 4],
@@ -733,7 +724,6 @@ class CTCLossTestV2(test.TestCase):
          [22.0 + 23.0 + 24.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]],
     ])
 
-  @test_util.run_v1_only("b/120545219")
   def testStateToOlabelUnique(self):
     labels = [
         [3, 4, 3, 4],
diff --git a/tensorflow/python/kernel_tests/linalg/linear_operator_test.py b/tensorflow/python/kernel_tests/linalg/linear_operator_test.py
index 18e13a76a0..8f8b15e8ed 100644
--- a/tensorflow/python/kernel_tests/linalg/linear_operator_test.py
+++ b/tensorflow/python/kernel_tests/linalg/linear_operator_test.py
@@ -214,7 +214,7 @@ class LinearOperatorTest(test.TestCase):
     operator = LinearOperatorMatmulSolve(matrix, is_square=True)
     self.assertTrue(operator.is_square)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def test_linear_operator_matmul_hints_closed(self):
     matrix = array_ops.placeholder(dtypes.float32)
     operator1 = LinearOperatorMatmulSolve(matrix)
@@ -241,7 +241,7 @@ class LinearOperatorTest(test.TestCase):
     self.assertTrue(operator_matmul.is_self_adjoint)
     self.assertEqual(None, operator_matmul.is_positive_definite)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def test_linear_operator_matmul_hints_false(self):
     matrix = array_ops.placeholder(dtypes.float32)
     operator1 = LinearOperatorMatmulSolve(
@@ -274,7 +274,7 @@ class LinearOperatorTest(test.TestCase):
     self.assertEqual(None, operator_matmul.is_self_adjoint)
     self.assertEqual(None, operator_matmul.is_positive_definite)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def test_linear_operator_matmul_hint_infer_square(self):
     matrix1 = array_ops.placeholder(shape=[2, 3], dtype=dtypes.float32)
     matrix2 = array_ops.placeholder(shape=[3, 2], dtype=dtypes.float32)
diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py
index 3338e55f82..b40a268238 100644
--- a/tensorflow/python/layers/core_test.py
+++ b/tensorflow/python/layers/core_test.py
@@ -463,9 +463,9 @@ class DropoutTest(test.TestCase):
       self.assertAllClose(np.ones((5, 5)), np_output)
 
 
-@test_util.run_v1_only('b/120545219')
 class FlattenTest(test.TestCase):
 
+  @test_util.run_deprecated_v1
   def testCreateFlatten(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
@@ -490,6 +490,7 @@ class FlattenTest(test.TestCase):
     shape = core_layers.Flatten().compute_output_shape((None, 3, None))
     self.assertEqual(shape.as_list(), [None, None])
 
+  @test_util.run_deprecated_v1
   def testDataFormat5d(self):
     np_input_channels_last = np.arange(
         120, dtype='float32').reshape([1, 5, 4, 3, 2])
@@ -507,6 +508,7 @@ class FlattenTest(test.TestCase):
 
       self.assertAllEqual(np_output_cl, np_output_cf)
 
+  @test_util.run_deprecated_v1
   def testDataFormat4d(self):
     np_input_channels_last = np.arange(
         24, dtype='float32').reshape([1, 4, 3, 2])
@@ -524,11 +526,13 @@ class FlattenTest(test.TestCase):
 
       self.assertAllEqual(np_output_cl, np_output_cf)
 
+  @test_util.run_deprecated_v1
   def testFunctionalFlatten(self):
     x = array_ops.placeholder(shape=(None, 2, 3), dtype='float32')
     y = core_layers.flatten(x, name='flatten')
     self.assertEqual(y.get_shape().as_list(), [None, 6])
 
+  @test_util.run_deprecated_v1
   def testFlatten0D(self):
     x = array_ops.placeholder(shape=(None,), dtype='float32')
     y = core_layers.Flatten()(x)
@@ -537,6 +541,7 @@ class FlattenTest(test.TestCase):
     self.assertEqual(list(np_output.shape), [5, 1])
     self.assertEqual(y.shape.as_list(), [None, 1])
 
+  @test_util.run_deprecated_v1
   def testFlattenUnknownAxes(self):
     with self.cached_session() as sess:
       x = array_ops.placeholder(shape=(5, None, None), dtype='float32')
diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py
index db7f9d2378..45286f7c18 100644
--- a/tensorflow/python/ops/ctc_ops.py
+++ b/tensorflow/python/ops/ctc_ops.py
@@ -24,6 +24,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_shape
 
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import functional_ops
@@ -1127,4 +1128,5 @@ def _scan(fn, elems, initial, reverse=False, inclusive=False, final_only=False):
 
 def _get_dim(tensor, i):
   """Get value of tensor shape[i] preferring static value if available."""
-  return tensor.shape[i].value or array_ops.shape(tensor)[i]
+  return tensor_shape.dimension_value(
+      tensor.shape[i]) or array_ops.shape(tensor)[i]
diff --git a/tensorflow/python/ops/linalg/linear_operator.py b/tensorflow/python/ops/linalg/linear_operator.py
index 8efafda3a1..6be81f4b34 100644
--- a/tensorflow/python/ops/linalg/linear_operator.py
+++ b/tensorflow/python/ops/linalg/linear_operator.py
@@ -381,7 +381,10 @@ class LinearOperator(object):
       `Dimension` object.
     """
     # Derived classes get this "for free" once .shape is implemented.
-    return self.shape[-1]
+    if self.shape.rank is None:
+      return tensor_shape.Dimension(None)
+    else:
+      return self.shape.dims[-1]
 
   def domain_dimension_tensor(self, name="domain_dimension_tensor"):
     """Dimension (in the sense of vector spaces) of the domain of this operator.
-- 
GitLab


From 806ccc2cf778407edacfc78bb864a3be01033f06 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Tue, 11 Dec 2018 12:28:45 -0800
Subject: [PATCH 369/873] Fixes race condition.

PiperOrigin-RevId: 225050185
---
 tensorflow/core/kernels/training_op_helpers.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/training_op_helpers.h b/tensorflow/core/kernels/training_op_helpers.h
index 98e2b3c0f2..715dd8af7d 100644
--- a/tensorflow/core/kernels/training_op_helpers.h
+++ b/tensorflow/core/kernels/training_op_helpers.h
@@ -178,7 +178,7 @@ VariableInputLockHolder MaybeLockVariableInputMutexesInOrder(
     mutex* mu = GetTrainingVariableMutex<Device, T>(ctx, input, sparse, &var);
     core::ScopedUnref scoped_unref(var);
     if (mu != nullptr) {
-      if (do_lock) {
+      if (!sparse || do_lock) {
         locks->emplace_back(*mu);
       } else {
         shared_locks->emplace_back(*mu);
-- 
GitLab


From e3d751c2a85a74b74c5eacf038721f2c67eb2da5 Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <slebedev@google.com>
Date: Tue, 11 Dec 2018 12:30:48 -0800
Subject: [PATCH 370/873] IS_IN_GRAPH_MODE should not force-init the eager
 context.

This caused hard to diagnose failures in enable_eager_execution calls.

PiperOrigin-RevId: 225050519
---
 tensorflow/python/eager/context.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index cbbe5cf49e..848b300eba 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -923,6 +923,10 @@ def add_function(fdef):
 # but they do all import this file.  Note that IS_IN_GRAPH_MODE and
 # in_graph_mode are both parameterless functions.
 def _tmp_in_graph_mode():
+  if context_safe() is None:
+    # Context not yet initialized. Assume graph mode following the
+    # default implementation in `is_in_graph_mode`.
+    return True
   return not executing_eagerly()
 
 
-- 
GitLab


From d6a46850353acfe26625c5ab1ffe7bd5c5a4aaf0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 12:39:12 -0800
Subject: [PATCH 371/873] Improve build rules to compile NCCL from source, in
 particular for clang.

PiperOrigin-RevId: 225051897
---
 third_party/nccl/archive.BUILD      | 154 ++++-----
 third_party/nccl/build_defs.bzl.tpl | 467 ++++++++++++++++++----------
 2 files changed, 351 insertions(+), 270 deletions(-)

diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD
index 7a08f97ef3..22b9728017 100644
--- a/third_party/nccl/archive.BUILD
+++ b/third_party/nccl/archive.BUILD
@@ -1,157 +1,110 @@
 # NVIDIA NCCL 2
 # A package of optimized primitives for collective multi-GPU communication.
 
-licenses(["restricted"])
+licenses(["notice"])
 
 exports_files(["LICENSE.txt"])
 
 load(
     "@local_config_nccl//:build_defs.bzl",
-    "gen_nccl_h",
-    "nccl_library",
-    "rdc_copts",
-    "rdc_library",
-)
-load(
-    "@local_config_cuda//cuda:build_defs.bzl",
-    "cuda_default_copts",
+    "cuda_rdc_library",
+    "gen_device_srcs",
+    "process_srcs",
 )
+load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cuda_library")
 
-# Generate the nccl.h header file.
-gen_nccl_h(
-    name = "nccl_h",
-    output = "src/nccl.h",
-    template = "src/nccl.h.in",
+process_srcs(
+    name = "process_srcs",
+    srcs = glob([
+        "**/*.cc",
+        "**/*.h",
+    ]),
 )
 
-nccl_library(
+cc_library(
     name = "src_hdrs",
     hdrs = [
-        "src/nccl.h",
-        # src/include/common_coll.h #includes "collectives/collectives.h".
-        # All other #includes of collectives.h are patched in process_srcs.
         "src/collectives/collectives.h",
+        "src/nccl.h",
     ],
+    data = [":process_srcs"],
     strip_include_prefix = "src",
 )
 
-nccl_library(
+cc_library(
     name = "include_hdrs",
     hdrs = glob(["src/include/*.h"]),
+    data = [":process_srcs"],
     strip_include_prefix = "src/include",
 )
 
-filegroup(
+cc_library(
     name = "device_hdrs",
-    srcs = glob(["src/collectives/device/*.h"]),
+    hdrs = glob(["src/collectives/device/*.h"]),
+    strip_include_prefix = "src/collectives/device",
 )
 
 filegroup(
     name = "device_srcs",
     srcs = [
-        "src/collectives/device/all_gather.cu",
-        "src/collectives/device/all_reduce.cu",
-        "src/collectives/device/broadcast.cu",
-        "src/collectives/device/reduce.cu",
-        "src/collectives/device/reduce_scatter.cu",
+        "src/collectives/device/all_gather.cu.cc",
+        "src/collectives/device/all_reduce.cu.cc",
+        "src/collectives/device/broadcast.cu.cc",
+        "src/collectives/device/reduce.cu.cc",
+        "src/collectives/device/reduce_scatter.cu.cc",
     ],
 )
 
-nccl_library(
+# NCCL compiles the same source files with different NCCL_OP defines. RDC
+# compilation requires that each compiled module has a unique ID. Clang derives
+# the module ID from the path only so we need to rename the files to get
+# different IDs for different parts of compilation. NVCC does not have that
+# problem because it generates IDs based on preprocessed content.
+gen_device_srcs(
     name = "sum",
-    srcs = [
-        ":device_hdrs",
-        ":device_srcs",
-    ],
-    copts = ["-DNCCL_OP=0"] + rdc_copts(),
-    linkstatic = True,
-    prefix = "sum_",
-    deps = [
-        ":include_hdrs",
-        ":src_hdrs",
-        "@local_config_cuda//cuda:cuda_headers",
-    ],
+    srcs = [":device_srcs"],
+    NCCL_OP = 0,
 )
 
-nccl_library(
+gen_device_srcs(
     name = "prod",
-    srcs = [
-        ":device_hdrs",
-        ":device_srcs",
-    ],
-    copts = ["-DNCCL_OP=1"] + rdc_copts(),
-    linkstatic = True,
-    prefix = "_prod",
-    deps = [
-        ":include_hdrs",
-        ":src_hdrs",
-        "@local_config_cuda//cuda:cuda_headers",
-    ],
+    srcs = [":device_srcs"],
+    NCCL_OP = 1,
 )
 
-nccl_library(
+gen_device_srcs(
     name = "min",
-    srcs = [
-        ":device_hdrs",
-        ":device_srcs",
-    ],
-    copts = ["-DNCCL_OP=2"] + rdc_copts(),
-    linkstatic = True,
-    prefix = "min_",
-    deps = [
-        ":include_hdrs",
-        ":src_hdrs",
-        "@local_config_cuda//cuda:cuda_headers",
-    ],
+    srcs = [":device_srcs"],
+    NCCL_OP = 2,
 )
 
-nccl_library(
+gen_device_srcs(
     name = "max",
-    srcs = [
-        ":device_hdrs",
-        ":device_srcs",
-    ],
-    copts = ["-DNCCL_OP=3"] + rdc_copts(),
-    linkstatic = True,
-    prefix = "max_",
-    deps = [
-        ":include_hdrs",
-        ":src_hdrs",
-        "@local_config_cuda//cuda:cuda_headers",
-    ],
+    srcs = [":device_srcs"],
+    NCCL_OP = 3,
 )
 
-nccl_library(
-    name = "functions",
+cuda_rdc_library(
+    name = "device",
     srcs = [
-        "src/collectives/device/functions.cu",
-        ":device_hdrs",
-    ],
-    copts = rdc_copts(),
-    linkstatic = True,
-    deps = [
-        ":include_hdrs",
-        ":src_hdrs",
-        "@local_config_cuda//cuda:cuda_headers",
-    ],
-)
-
-rdc_library(
-    name = "device_code",
-    deps = [
-        ":functions",
+        "src/collectives/device/functions.cu.cc",
         ":max",
         ":min",
         ":prod",
         ":sum",
     ],
+    deps = [
+        ":device_hdrs",
+        ":include_hdrs",
+        ":src_hdrs",
+    ],
 )
 
 # Primary NCCL target.
-nccl_library(
+tf_cuda_library(
     name = "nccl",
     srcs = glob(
-        include = ["src/**/*.cu"],
+        include = ["src/**/*.cu.cc"],
         # Exclude device-library code.
         exclude = ["src/collectives/device/**"],
     ) + [
@@ -162,13 +115,14 @@ nccl_library(
         "src/nccl.h",
     ],
     hdrs = ["src/nccl.h"],
-    copts = cuda_default_copts(),
+    copts = ["-Wno-vla"],
     include_prefix = "third_party/nccl",
     strip_include_prefix = "src",
     visibility = ["//visibility:public"],
     deps = [
-        ":device_code",
+        ":device",
         ":include_hdrs",
         ":src_hdrs",
+        "@local_config_cuda//cuda:cudart_static",
     ],
 )
diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
index 42de79c411..fe16f10432 100644
--- a/third_party/nccl/build_defs.bzl.tpl
+++ b/third_party/nccl/build_defs.bzl.tpl
@@ -1,87 +1,86 @@
 """Repository rule for NCCL."""
 
-load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts")
-
-def _gen_nccl_h_impl(ctx):
-    """Creates nccl.h from a template."""
-    ctx.actions.expand_template(
-        output = ctx.outputs.output,
-        template = ctx.file.template,
-        substitutions = {
-            "${nccl:Major}": "2",
-            "${nccl:Minor}": "3",
-            "${nccl:Patch}": "5",
-            "${nccl:Suffix}": "",
-            "${nccl:Version}": "2305",
-        },
-    )
-
-gen_nccl_h = rule(
-    implementation = _gen_nccl_h_impl,
-    attrs = {
-        "template": attr.label(allow_single_file = True),
-        "output": attr.output(),
-    },
-)
-"""Creates the NCCL header file."""
+load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cuda_library")
+load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
 
 def _process_srcs_impl(ctx):
     """Appends .cc to .cu files, patches include directives."""
     files = []
     for src in ctx.files.srcs:
-        if not src.is_source:
-            # Process only once, specifically "src/nccl.h".
-            files.append(src)
-            continue
+        substitutions = {
+            "\"collectives.h": "\"collectives/collectives.h",
+            "\"../collectives.h": "\"collectives/collectives.h",
+            # Clang does not define __CUDACC_VER_*__, use CUDA_VERSION instead.
+            # TODO(csigg): Apply substitutions upstream and remove here.
+            "#if __CUDACC_VER_MAJOR__ >= 10 || (__CUDACC_VER_MAJOR__ >= 9 && __CUDACC_VER_MINOR__ >= 2)": "#if CUDA_VERSION >= 9200",
+            "#if __CUDACC_VER_MAJOR__ >= 10": "#if CUDA_VERSION >= 10000",
+            "#if __CUDACC_VER_MAJOR__ >= 9": "#if CUDA_VERSION >= 9000",
+            "#if __CUDACC_VER_MAJOR__ < 9": "#if CUDA_VERSION < 9000",
+            "nullptr_t": "std::nullptr_t",
+        }
         name = src.basename
+        if name == "nccl.in.h":
+            name = "nccl.h"
+            substitutions.update({
+                "${nccl:Major}": "2",
+                "${nccl:Minor}": "3",
+                "${nccl:Patch}": "5",
+                "${nccl:Suffix}": "",
+                "${nccl:Version}": "2305",
+            })
+        if name == "functions.cu":
+            # Don't try to initialize the host shadow copy of this device-side
+            # global variable. There is no host pointer to a device-side
+            # function, which confuses clang.
+            # TODO(csigg): remove when fixed in clang.
+            substitutions.update({
+                "NCCL_FUNCS2B(ncclBroadcast),": "#if __CUDA_ARCH__\nNCCL_FUNCS2B(ncclBroadcast),",
+                "NCCL_FUNCS2A(ncclAllReduce)": "NCCL_FUNCS2A(ncclAllReduce)\n#endif",
+            })
         if src.extension == "cu":
-            name = ctx.attr.prefix + name + ".cc"
+            name += ".cc"
         file = ctx.actions.declare_file(name, sibling = src)
         ctx.actions.expand_template(
             output = file,
             template = src,
-            substitutions = {
-                "\"collectives.h": "\"collectives/collectives.h",
-                "\"../collectives.h": "\"collectives/collectives.h",
-                "#if __CUDACC_VER_MAJOR__": "#if defined __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__",
-                # Substitutions are applied in order.
-                "std::nullptr_t": "nullptr_t",
-                "nullptr_t": "std::nullptr_t",
-            },
+            substitutions = substitutions,
         )
         files.append(file)
     return [DefaultInfo(files = depset(files))]
 
-_process_srcs = rule(
+process_srcs = rule(
     implementation = _process_srcs_impl,
     attrs = {
         "srcs": attr.label_list(allow_files = True),
-        "prefix": attr.string(default = ""),
     },
 )
 """Processes the NCCL srcs so they can be compiled with bazel and clang."""
 
-def nccl_library(name, srcs = None, hdrs = None, prefix = None, **kwargs):
-    """Processes the srcs and hdrs and creates a cc_library."""
-
-    _process_srcs(
-        name = name + "_srcs",
-        srcs = srcs,
-        prefix = prefix,
-    )
-    _process_srcs(
-        name = name + "_hdrs",
-        srcs = hdrs,
-    )
+def _gen_device_srcs_impl(ctx):
+    files = []
+    for src in ctx.files.srcs:
+        name = "%s_%s" % (ctx.attr.name, src.basename)
+        file = ctx.actions.declare_file(name, sibling = src)
+        ctx.actions.expand_template(
+            output = file,
+            template = src,
+            substitutions = {
+                "#define UNROLL 4": "#define UNROLL 4\n#define NCCL_OP %d" % ctx.attr.NCCL_OP,
+            },
+        )
+        files.append(file)
+    return [DefaultInfo(files = depset(files))]
 
-    native.cc_library(
-        name = name,
-        srcs = [name + "_srcs"] if srcs else [],
-        hdrs = [name + "_hdrs"] if hdrs else [],
-        **kwargs
-    )
+gen_device_srcs = rule(
+    implementation = _gen_device_srcs_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "NCCL_OP": attr.int(),
+    },
+)
+"""Adds prefix to each file name in srcs and adds #define NCCL_OP."""
 
-def rdc_copts():
+def _rdc_copts():
     """Returns copts for compiling relocatable device code."""
 
     # The global functions can not have a lower register count than the
@@ -89,7 +88,7 @@ def rdc_copts():
     # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
     maxrregcount = "-maxrregcount=96"
 
-    return cuda_default_copts() + select({
+    return select({
         "@local_config_cuda//cuda:using_nvcc": [
             "-nvcc_options",
             "relocatable-device-code=true",
@@ -100,118 +99,255 @@ def rdc_copts():
             "-fcuda-rdc",
             "-Xcuda-ptxas",
             maxrregcount,
+            # Work around for clang bug (fixed in r348662), declaring
+            # '__device__ operator delete(void*, std::size_t)' non-inline.
+            # TODO(csigg): Only add this option for older clang versions.
+            "-std=gnu++11",
         ],
         "//conditions:default": [],
-    }) + ["-fvisibility=hidden"]
+    })
 
-def _filter_impl(ctx):
-    suffix = ctx.attr.suffix
-    files = [src for src in ctx.files.srcs if src.path.endswith(suffix)]
-    return [DefaultInfo(files = depset(files))]
+def _lookup_file(filegroup, path):
+    """Extracts file at (relative) path in filegroup."""
+    for file in filegroup.files:
+        if file.path.endswith(path):
+            return file
+    return None
 
-_filter = rule(
-    implementation = _filter_impl,
-    attrs = {
-        "srcs": attr.label_list(allow_files = True),
-        "suffix": attr.string(),
-    },
-)
-"""Filters the srcs to the ones ending with suffix."""
+def _pic_only(files):
+    """Returns the PIC files if there are any in 'files', otherwise 'files'."""
+    pic_only = [f for f in files if f.basename.find(".pic.") >= 0]
+    return pic_only if pic_only else files
+
+def _device_link_impl(ctx):
+    if not ctx.attr.gpu_archs:
+        fail("No GPU architecture specified. NCCL requires --config=cuda or similar.")
+
+    inputs = []
+    for dep in ctx.attr.deps:
+        inputs += dep.files.to_list()
+    inputs = _pic_only(inputs)
 
-def _gen_link_src_impl(ctx):
+    # Device-link to cubins for each architecture.
+    name = ctx.attr.name
+    register_h = None
+    cubins = []
+    images = []
+    for arch in ctx.attr.gpu_archs:
+        cubin = ctx.actions.declare_file("%s_%s.cubin" % (name, arch))
+        register_h = ctx.actions.declare_file("%s_register_%s.h" % (name, arch))
+        ctx.actions.run(
+            outputs = [register_h, cubin],
+            inputs = inputs,
+            executable = ctx.file._nvlink,
+            arguments = ctx.attr.nvlink_args + [
+                "--arch=%s" % arch,
+                "--register-link-binaries=%s" % register_h.path,
+                "--output-file=%s" % cubin.path,
+            ] + [file.path for file in inputs],
+            mnemonic = "nvlink",
+        )
+        cubins.append(cubin)
+        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
+
+    # Generate fatbin header from all cubins.
+    tmp_fatbin = ctx.actions.declare_file("%s.fatbin" % name)
+    fatbin_h = ctx.actions.declare_file("%s_fatbin.h" % name)
+    bin2c = ctx.file._bin2c
+    ctx.actions.run(
+        outputs = [tmp_fatbin, fatbin_h],
+        inputs = cubins,
+        executable = ctx.file._fatbinary,
+        arguments = [
+            "-64",
+            "--cmdline=--compile-only",
+            "--link",
+            "--compress-all",
+            "--bin2c-path=%s" % bin2c.dirname,
+            "--create=%s" % tmp_fatbin.path,
+            "--embedded-fatbin=%s" % fatbin_h.path,
+        ] + images,
+        tools = [bin2c],
+        mnemonic = "fatbinary",
+    )
+
+    # Generate the source file #including the headers generated above.
     ctx.actions.expand_template(
-        output = ctx.outputs.output,
-        template = ctx.file.template,
+        output = ctx.outputs.out,
+        template = ctx.file._link_stub,
         substitutions = {
-            "REGISTERLINKBINARYFILE": '"%s"' % ctx.file.register_hdr.short_path,
-            "FATBINFILE": '"%s"' % ctx.file.fatbin_hdr.short_path,
+            "REGISTERLINKBINARYFILE": '"%s"' % register_h.short_path,
+            "FATBINFILE": '"%s"' % fatbin_h.short_path,
         },
     )
 
-_gen_link_src = rule(
-    implementation = _gen_link_src_impl,
+    return [DefaultInfo(files = depset([register_h, fatbin_h]))]
+
+_device_link = rule(
+    implementation = _device_link_impl,
     attrs = {
-        "register_hdr": attr.label(allow_single_file = True),
-        "fatbin_hdr": attr.label(allow_single_file = True),
-        "template": attr.label(allow_single_file = True),
-        "output": attr.output(),
+        "deps": attr.label_list(),
+        "out": attr.output(mandatory = True),
+        "gpu_archs": attr.string_list(),
+        "nvlink_args": attr.string_list(),
+        "_nvlink": attr.label(
+            default = Label("@local_config_nccl//:nvlink"),
+            allow_single_file = True,
+            executable = True,
+            cfg = "host",
+        ),
+        "_fatbinary": attr.label(
+            default = Label("@local_config_nccl//:cuda/bin/fatbinary"),
+            allow_single_file = True,
+            executable = True,
+            cfg = "host",
+        ),
+        "_bin2c": attr.label(
+            default = Label("@local_config_nccl//:cuda/bin/bin2c"),
+            allow_single_file = True,
+            executable = True,
+            cfg = "host",
+        ),
+        "_link_stub": attr.label(
+            default = Label("@local_config_nccl//:cuda/bin/crt/link.stub"),
+            allow_single_file = True,
+        ),
     },
 )
-"""Patches the include directives for the link.stub file."""
-
-def rdc_library(name, deps):
-    """Produces a cc_library from deps containing relocatable device code."""
-
-    # From .a and .pic.a archives, just use the latter. Otherwise we get
-    # multiply defined symbols.
-    # TODO(csigg): C++ Sandwich once available should allow passing this target
-    # to a cc_library dependency, which would avoid the linking order issue.
-    _filter(
-        name = name + "_deps_a",
-        srcs = deps,
-        suffix = ".pic.a",
+"""Links device code and generates source code for kernel registration."""
+
+def _merge_archive_impl(ctx):
+    # Generate an mri script to the merge archives in srcs and pass it to 'ar'.
+    # See https://stackoverflow.com/a/23621751.
+    files = _pic_only(ctx.files.srcs)
+    mri_script = "create " + ctx.outputs.out.path
+    for f in files:
+        mri_script += "\\naddlib " + f.path
+    mri_script += "\\nsave\\nend"
+
+    cc_toolchain = find_cpp_toolchain(ctx)
+    ctx.actions.run_shell(
+        inputs = ctx.files.srcs,  # + ctx.files._crosstool,
+        outputs = [ctx.outputs.out],
+        command = ("printf \"%s\" " % mri_script +
+                   "| %s -M" % cc_toolchain.ar_executable),
     )
 
-    # Device-link to cubins for each architecture.
-    images = []
-    cubins = []
-    for arch in %{gpu_architectures}:
-        cubin = "%s_%s.cubin" % (name, arch)
-        register_hdr = "%s_%s.h" % (name, arch)
-        nvlink = "@local_config_nccl//:nvlink"
-        cmd = ("$(location %s) " % nvlink +
-               select({
-                   # NCCL is only supported on Linux.
-                   "@org_tensorflow//tensorflow:linux_x86_64": "--cpu-arch=X86_64 ",
-                   "@org_tensorflow//tensorflow:linux_ppc64le": "--cpu-arch=PPC64LE ",
-                   "//conditions:default": "",
-               }) +
-               "--arch=%s $(SRCS) " % arch +
-               "--register-link-binaries=$(location %s) " % register_hdr +
-               "--output-file=$(location %s)" % cubin)
-        native.genrule(
-            name = "%s_%s" % (name, arch),
-            outs = [register_hdr, cubin],
-            srcs = [name + "_deps_a"],
-            cmd = cmd,
-            tools = [nvlink],
-        )
-        images.append("--image=profile=%s,file=$(location %s)" % (arch, cubin))
-        cubins.append(cubin)
+_merge_archive = rule(
+    implementation = _merge_archive_impl,
+    attrs = {
+        "srcs": attr.label_list(mandatory = True, allow_files = True),
+        "_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"),
+        # "_crosstool": attr.label_list(cfg = "host", default = ["@bazel_tools//tools/cpp:crosstool"]),
+    },
+    outputs = {"out": "lib%{name}.a"},
+)
+"""Merges srcs into a single archive."""
 
-    # Generate fatbin header from all cubins.
-    fatbin_hdr = name + ".fatbin.h"
-    fatbinary = "@local_config_nccl//:cuda/bin/fatbinary"
-    bin2c = "@local_config_nccl//:cuda/bin/bin2c"
-    cmd = ("$(location %s) -64 --cmdline=--compile-only " % fatbinary +
-           "--link --bin2c-path $$(dirname $(location %s)) " % bin2c +
-           "--compress-all %s --create=%%{name}.fatbin " % " ".join(images) +
-           "--embedded-fatbin=$@")
-    native.genrule(
-        name = name + "_fatbin_h",
-        outs = [fatbin_hdr],
-        srcs = cubins,
-        cmd = cmd,
-        tools = [fatbinary, bin2c],
+def cuda_rdc_library(name, hdrs = None, copts = None, linkstatic = True, **kwargs):
+    """Produces a cuda_library using separate compilation and linking.
+
+    CUDA separate compilation and linking allows device function calls across
+    translation units. This is different from the normal whole program
+    compilation where each translation unit contains all device code. For more
+    background, see
+    https://devblogs.nvidia.com/separate-compilation-linking-cuda-device-code/,
+    https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-options-for-separate-compilation
+
+    During separate compilation, the different CUDA source files are compiled
+    to 'relocatable device code' (RDC) and embedded in the host object files.
+    When using nvcc, linking the device code for each supported GPU
+    architecture and generating kernel registration code for the CUDA runtime
+    is handled automatically. Clang supports generating relocatable device
+    code, but it can't link it. We therefore rely on tools provided by the CUDA
+    SDK to link the device code and generate the host code to register the
+    kernels.
+
+    The nvlink tool extracts the RDC code from the object files and links it
+    into cubin files, one per GPU architecture. It also produces a header file
+    with a list of kernel names to register. The cubins are merged into a
+    binary blob using the fatbinary tool, and converted to a C header file with
+    the help of the bin2c tool. The registration header file, the fatbinary
+    header file, and the link.stub file (shipped with the CUDA SDK) are
+    compiled as ordinary host code.
+
+    Here is a diagram of the CUDA separate compilation trajectory:
+
+     x.cu.cc    y.cu.cc
+           \    /            cc_library (compile RDC and archive)
+            xy.a
+           /    \            * nvlink
+    register.h  xy.cubin
+          :      |           * fatbinary and bin2c
+          :     xy.fatbin.h
+          :      :           * #include
+          dlink.cc           * Expanded from crt/dlink.stub template
+             |               cc_library (host compile and archive)
+          dlink.a
+
+    The steps marked with '*' are implemented in the _device_link rule.
+
+    The object files in both xy.a and dlink.a reference symbols defined in the
+    other archive. The separate archives are a side effect of using two
+    cc_library targets to implement a single compilation trajectory. We could
+    fix this once bazel supports C++ sandwich. For now, we just merge the two
+    archives to avoid unresolved symbols:
+
+    xy.a      dlink.a
+        \    /           merge archive
+      xy_dlink.a
+           |             cc_library (or alternatively, cc_import)
+     final target
+
+    Another complication is that cc_library produces (depending on the
+    configuration) both PIC and non-PIC archives, but the distinction
+    is hidden from Starlark until C++ sandwich becomes available. We work
+    around this by dropping the non-PIC files if PIC files are available.
+
+    Args:
+      name: Target name.
+      hdrs: Header files.
+      copts: Compiler options.
+      linkstatic: Must be true.
+      **kwargs: Any other arguments.
+    """
+
+    if not hdrs:
+        hdrs = []
+    if not copts:
+        copts = []
+
+    # Compile host and device code into library.
+    lib = name + "_lib"
+    tf_cuda_library(
+        name = lib,
+        hdrs = hdrs,
+        copts = _rdc_copts() + copts,
+        linkstatic = linkstatic,
+        **kwargs
     )
 
-    # Generate the source file #including the headers generated above.
-    _gen_link_src(
-        name = name + "_dlink_src",
-        # Include just the last one, they are equivalent.
-        register_hdr = register_hdr,
-        fatbin_hdr = fatbin_hdr,
-        template = "@local_config_nccl//:cuda/bin/crt/link.stub",
-        output = name + ".cc",
+    # Generate source file containing linked device code.
+    dlink_hdrs = name + "_dlink_hdrs"
+    dlink_cc = name + "_dlink.cc"
+    _device_link(
+        name = dlink_hdrs,
+        deps = [lib],
+        out = dlink_cc,
+        gpu_archs = %{gpu_architectures},
+        nvlink_args = select({
+            "@org_tensorflow//tensorflow:linux_x86_64": ["--cpu-arch=X86_64"],
+            "@org_tensorflow//tensorflow:linux_ppc64le": ["--cpu-arch=PPC64LE"],
+            "//conditions:default": [],
+        }),
     )
 
-    # Compile the source file into the cc_library.
+    # Compile the source file into a library.
+    dlink = name + "_dlink"
     native.cc_library(
-        name = name + "_dlink_a",
-        srcs = [
-            name + "_dlink_src",
-        ],
-        textual_hdrs = [register_hdr, fatbin_hdr],
+        name = dlink,
+        srcs = [dlink_cc],
+        textual_hdrs = [dlink_hdrs],
         deps = [
             "@local_config_cuda//cuda:cuda_headers",
         ],
@@ -222,31 +358,22 @@ def rdc_library(name, deps):
             "__NV_EXTRA_INITIALIZATION=",
             "__NV_EXTRA_FINALIZATION=",
         ],
-        linkstatic = True,
+        linkstatic = linkstatic,
     )
 
-    # Repackage deps into a single archive. This avoid unresolved symbols when
-    # the archives happen to be linked in the wrong order. For more details, see
+    # Repackage the two libs into a single archive. This is required because
+    # both libs reference symbols defined in the other one. For details, see
     # https://eli.thegreenplace.net/2013/07/09/library-order-in-static-linking
-    native.genrule(
-        name = name + "_a",
-        srcs = [
-            name + "_deps_a",
-            name + "_dlink_a",
-        ],
-        outs = [name + ".a"],
-        # See https://stackoverflow.com/a/23621751
-        cmd = """
-addlibs=$$(echo $(SRCS) | sed "s/[^ ]* */\\naddlib &/g")
-printf "create $@$${addlibs}\\nsave\\nend" | $(AR) -M
-""",
+    archive = name + "_a"
+    _merge_archive(
+        name = archive,
+        srcs = [lib, dlink],
     )
 
+    # Create cc target from archive.
     native.cc_library(
         name = name,
-        srcs = [name + "_a"],
-        deps = [
-            "@local_config_cuda//cuda:cudart_static",
-        ],
-        linkstatic = True,
+        srcs = [archive],
+        hdrs = hdrs,
+        linkstatic = linkstatic,
     )
-- 
GitLab


From f9dbe98610790fff9ccec148e3ec088bc779460f Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 11 Dec 2018 12:53:13 -0800
Subject: [PATCH 372/873] Tweak logger dependencies.

PiperOrigin-RevId: 225054204
---
 tensorflow/core/BUILD | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 66714235b5..5f5ca63540 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -445,16 +445,20 @@ cc_library(
     ] + tf_additional_human_readable_json_deps(),
 )
 
+cc_library(
+    name = "logger_interface",
+    hdrs = ["platform/logger.h"],
+    copts = tf_copts(),
+    visibility = ["//visibility:public"],
+    deps = [":platform_protobuf"],
+)
+
 cc_library(
     name = "logger",
     srcs = tf_platform_srcs(["logger.cc"]),
-    hdrs = ["platform/logger.h"] + tf_platform_hdrs(["logger.h"]),
     copts = tf_copts(),
     visibility = ["//visibility:public"],
-    deps = [
-        ":lib",
-        ":lib_internal",
-    ] + tf_additional_logger_deps(),
+    deps = [":logger_interface"] + tf_additional_logger_deps(),
 )
 
 filegroup(
@@ -1619,7 +1623,6 @@ filegroup(
             "util/reporter.*",
             "platform/**/cuda_libdevice_path.*",
             "platform/**/logger.cc",
-            "platform/**/logger.h",
             "platform/default/test_benchmark.*",
             "platform/cuda.h",
             "platform/google/**/*",
-- 
GitLab


From 93439a553937e77e8877a149d13039960da59abf Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 11 Dec 2018 13:44:11 -0800
Subject: [PATCH 373/873] Use "in symbol.__dict__" instead of "hasattr" to
 check if a symbol has api names set. The former would behave correctly for
 subclasses.

Also, moving get_v1_names|constants and get_v2_names|constants functions to tf_export.py to reduce code duplication.

PiperOrigin-RevId: 225063242
---
 tensorflow/python/util/tf_export.py           | 88 +++++++++++++++++++
 tensorflow/python/util/tf_export_test.py      | 28 ++++++
 .../tools/compatibility/tf_upgrade_v2_test.py | 38 ++------
 .../update/generate_v2_renames_map.py         | 62 ++-----------
 .../update/generate_v2_reorders_map.py        | 36 +-------
 5 files changed, 128 insertions(+), 124 deletions(-)

diff --git a/tensorflow/python/util/tf_export.py b/tensorflow/python/util/tf_export.py
index ec70cae7d2..74afc3746f 100644
--- a/tensorflow/python/util/tf_export.py
+++ b/tensorflow/python/util/tf_export.py
@@ -147,6 +147,94 @@ def get_canonical_name(api_names, deprecated_api_names):
   return None
 
 
+def get_v1_names(symbol):
+  """Get a list of TF 1.* names for this symbol.
+
+  Args:
+    symbol: symbol to get API names for.
+
+  Returns:
+    List of all API names for this symbol including TensorFlow and
+    Estimator names.
+  """
+  names_v1 = []
+  tensorflow_api_attr_v1 = API_ATTRS_V1[TENSORFLOW_API_NAME].names
+  estimator_api_attr_v1 = API_ATTRS_V1[ESTIMATOR_API_NAME].names
+
+  if not hasattr(symbol, tensorflow_api_attr_v1):
+    return names_v1
+  if tensorflow_api_attr_v1 in symbol.__dict__:
+    names_v1.extend(getattr(symbol, tensorflow_api_attr_v1))
+  if estimator_api_attr_v1 in symbol.__dict__:
+    names_v1.extend(getattr(symbol, estimator_api_attr_v1))
+  return names_v1
+
+
+def get_v2_names(symbol):
+  """Get a list of TF 2.0 names for this symbol.
+
+  Args:
+    symbol: symbol to get API names for.
+
+  Returns:
+    List of all API names for this symbol including TensorFlow and
+    Estimator names.
+  """
+  names_v2 = []
+  tensorflow_api_attr = API_ATTRS[TENSORFLOW_API_NAME].names
+  estimator_api_attr = API_ATTRS[ESTIMATOR_API_NAME].names
+
+  if not hasattr(symbol, tensorflow_api_attr):
+    return names_v2
+  if tensorflow_api_attr in symbol.__dict__:
+    names_v2.extend(getattr(symbol, tensorflow_api_attr))
+  if estimator_api_attr in symbol.__dict__:
+    names_v2.extend(getattr(symbol, estimator_api_attr))
+  return names_v2
+
+
+def get_v1_constants(module):
+  """Get a list of TF 1.* constants in this module.
+
+  Args:
+    module: TensorFlow module.
+
+  Returns:
+    List of all API constants under the given module including TensorFlow and
+    Estimator constants.
+  """
+  constants_v1 = []
+  tensorflow_constants_attr_v1 = API_ATTRS_V1[TENSORFLOW_API_NAME].constants
+  estimator_constants_attr_v1 = API_ATTRS_V1[ESTIMATOR_API_NAME].constants
+
+  if hasattr(module, tensorflow_constants_attr_v1):
+    constants_v1.extend(getattr(module, tensorflow_constants_attr_v1))
+  if hasattr(module, estimator_constants_attr_v1):
+    constants_v1.extend(getattr(module, estimator_constants_attr_v1))
+  return constants_v1
+
+
+def get_v2_constants(module):
+  """Get a list of TF 2.0 constants in this module.
+
+  Args:
+    module: TensorFlow module.
+
+  Returns:
+    List of all API constants under the given module including TensorFlow and
+    Estimator constants.
+  """
+  constants_v2 = []
+  tensorflow_constants_attr = API_ATTRS[TENSORFLOW_API_NAME].constants
+  estimator_constants_attr = API_ATTRS[ESTIMATOR_API_NAME].constants
+
+  if hasattr(module, tensorflow_constants_attr):
+    constants_v2.extend(getattr(module, tensorflow_constants_attr))
+  if hasattr(module, estimator_constants_attr):
+    constants_v2.extend(getattr(module, estimator_constants_attr))
+  return constants_v2
+
+
 class api_export(object):  # pylint: disable=invalid-name
   """Provides ways to export symbols to the TensorFlow API."""
 
diff --git a/tensorflow/python/util/tf_export_test.py b/tensorflow/python/util/tf_export_test.py
index a0fac8bf36..20625792e9 100644
--- a/tensorflow/python/util/tf_export_test.py
+++ b/tensorflow/python/util/tf_export_test.py
@@ -62,6 +62,10 @@ class ValidateExportTest(test.TestCase):
         del symbol._tf_api_names
       if hasattr(symbol, '_tf_api_names_v1'):
         del symbol._tf_api_names_v1
+      if hasattr(symbol, '_estimator_api_names'):
+        del symbol._estimator_api_names
+      if hasattr(symbol, '_estimator_api_names_v1'):
+        del symbol._estimator_api_names_v1
 
   def _CreateMockModule(self, name):
     mock_module = self.MockModule(name)
@@ -74,6 +78,10 @@ class ValidateExportTest(test.TestCase):
     decorated_function = export_decorator(_test_function)
     self.assertEquals(decorated_function, _test_function)
     self.assertEquals(('nameA', 'nameB'), decorated_function._tf_api_names)
+    self.assertEquals(['nameA', 'nameB'],
+                      tf_export.get_v1_names(decorated_function))
+    self.assertEquals(['nameA', 'nameB'],
+                      tf_export.get_v2_names(decorated_function))
 
   def testExportMultipleFunctions(self):
     export_decorator1 = tf_export.tf_export('nameA', 'nameB')
@@ -95,6 +103,22 @@ class ValidateExportTest(test.TestCase):
     export_decorator_b(TestClassB)
     self.assertEquals(('TestClassA1',), TestClassA._tf_api_names)
     self.assertEquals(('TestClassB1',), TestClassB._tf_api_names)
+    self.assertEquals(['TestClassA1'], tf_export.get_v1_names(TestClassA))
+    self.assertEquals(['TestClassB1'], tf_export.get_v1_names(TestClassB))
+
+  def testExportClassInEstimator(self):
+    export_decorator_a = tf_export.tf_export('TestClassA1')
+    export_decorator_a(TestClassA)
+    self.assertEquals(('TestClassA1',), TestClassA._tf_api_names)
+
+    export_decorator_b = tf_export.estimator_export(
+        'estimator.TestClassB1')
+    export_decorator_b(TestClassB)
+    self.assertTrue('_tf_api_names' not in TestClassB.__dict__)
+    self.assertEquals(('TestClassA1',), TestClassA._tf_api_names)
+    self.assertEquals(['TestClassA1'], tf_export.get_v1_names(TestClassA))
+    self.assertEquals(['estimator.TestClassB1'],
+                      tf_export.get_v1_names(TestClassB))
 
   def testExportSingleConstant(self):
     module1 = self._CreateMockModule('module1')
@@ -103,6 +127,10 @@ class ValidateExportTest(test.TestCase):
     export_decorator.export_constant('module1', 'test_constant')
     self.assertEquals([(('NAME_A', 'NAME_B'), 'test_constant')],
                       module1._tf_api_constants)
+    self.assertEquals([(('NAME_A', 'NAME_B'), 'test_constant')],
+                      tf_export.get_v1_constants(module1))
+    self.assertEquals([(('NAME_A', 'NAME_B'), 'test_constant')],
+                      tf_export.get_v2_constants(module1))
 
   def testExportMultipleConstants(self):
     module1 = self._CreateMockModule('module1')
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index 0fc7a18734..2cc874fe7f 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -37,32 +37,6 @@ from tensorflow.tools.compatibility import ast_edits
 from tensorflow.tools.compatibility import tf_upgrade_v2
 
 
-_TENSORFLOW_API_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.TENSORFLOW_API_NAME].names)
-_TENSORFLOW_API_ATTR = tf_export.API_ATTRS[tf_export.TENSORFLOW_API_NAME].names
-_ESTIMATOR_API_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.ESTIMATOR_API_NAME].names)
-_ESTIMATOR_API_ATTR = tf_export.API_ATTRS[tf_export.ESTIMATOR_API_NAME].names
-
-
-def get_v1_names(symbol):
-  names_v1 = []
-  if hasattr(symbol, _TENSORFLOW_API_ATTR_V1):
-    names_v1.extend(getattr(symbol, _TENSORFLOW_API_ATTR_V1))
-  if hasattr(symbol, _ESTIMATOR_API_ATTR_V1):
-    names_v1.extend(getattr(symbol, _ESTIMATOR_API_ATTR_V1))
-  return names_v1
-
-
-def get_v2_names(symbol):
-  names_v2 = set()
-  if hasattr(symbol, _TENSORFLOW_API_ATTR):
-    names_v2.update(getattr(symbol, _TENSORFLOW_API_ATTR))
-  if hasattr(symbol, _ESTIMATOR_API_ATTR):
-    names_v2.update(getattr(symbol, _ESTIMATOR_API_ATTR))
-  return list(names_v2)
-
-
 def get_symbol_for_name(root, name):
   name_parts = name.split(".")
   symbol = root
@@ -118,7 +92,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
     def symbol_collector(unused_path, unused_parent, children):
       for child in children:
         _, attr = tf_decorator.unwrap(child[1])
-        api_names_v2 = get_v2_names(attr)
+        api_names_v2 = tf_export.get_v2_names(attr)
         for name in api_names_v2:
           cls.v2_symbols["tf." + name] = attr
 
@@ -166,7 +140,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
     def conversion_visitor(unused_path, unused_parent, children):
       for child in children:
         _, attr = tf_decorator.unwrap(child[1])
-        api_names = get_v1_names(attr)
+        api_names = tf_export.get_v1_names(attr)
         for name in api_names:
           _, _, _, text = self._upgrade("tf." + name)
           if (text and
@@ -190,7 +164,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
     def conversion_visitor(unused_path, unused_parent, children):
       for child in children:
         _, attr = tf_decorator.unwrap(child[1])
-        api_names = get_v1_names(attr)
+        api_names = tf_export.get_v1_names(attr)
         for name in api_names:
           if collect:
             v1_symbols.add("tf." + name)
@@ -219,7 +193,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
     def arg_test_visitor(unused_path, unused_parent, children):
       for child in children:
         _, attr = tf_decorator.unwrap(child[1])
-        names_v1 = get_v1_names(attr)
+        names_v1 = tf_export.get_v1_names(attr)
 
         for name in names_v1:
           name = "tf.%s" % name
@@ -270,7 +244,7 @@ class TestUpgrade(test_util.TensorFlowTestCase):
         _, attr = tf_decorator.unwrap(child[1])
         if not tf_inspect.isfunction(attr):
           continue
-        names_v1 = get_v1_names(attr)
+        names_v1 = tf_export.get_v1_names(attr)
         arg_names_v1 = get_args(attr)
 
         for name in names_v1:
@@ -340,7 +314,7 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
       # get other names for this function
       attr = get_symbol_for_name(tf.compat.v1, name)
       _, attr = tf_decorator.unwrap(attr)
-      v1_names = get_v1_names(attr)
+      v1_names = tf_export.get_v1_names(attr)
       self.assertTrue(v1_names)
       v1_names = ["tf.%s" % n for n in v1_names]
       # check if any other name is in
diff --git a/tensorflow/tools/compatibility/update/generate_v2_renames_map.py b/tensorflow/tools/compatibility/update/generate_v2_renames_map.py
index 19ad6c3a2a..a2c5e7cf82 100644
--- a/tensorflow/tools/compatibility/update/generate_v2_renames_map.py
+++ b/tensorflow/tools/compatibility/update/generate_v2_renames_map.py
@@ -64,58 +64,6 @@ from __future__ import print_function
 
 """
 
-_TENSORFLOW_API_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.TENSORFLOW_API_NAME].names)
-_TENSORFLOW_API_ATTR = tf_export.API_ATTRS[tf_export.TENSORFLOW_API_NAME].names
-_TENSORFLOW_CONSTANTS_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.TENSORFLOW_API_NAME].constants)
-_TENSORFLOW_CONSTANTS_ATTR = (
-    tf_export.API_ATTRS[tf_export.TENSORFLOW_API_NAME].constants)
-
-_ESTIMATOR_API_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.ESTIMATOR_API_NAME].names)
-_ESTIMATOR_API_ATTR = tf_export.API_ATTRS[tf_export.ESTIMATOR_API_NAME].names
-_ESTIMATOR_CONSTANTS_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.ESTIMATOR_API_NAME].constants)
-_ESTIMATOR_CONSTANTS_ATTR = (
-    tf_export.API_ATTRS[tf_export.ESTIMATOR_API_NAME].constants)
-
-
-def get_v1_names(symbol):
-  names_v1 = []
-  if hasattr(symbol, _TENSORFLOW_API_ATTR_V1):
-    names_v1.extend(getattr(symbol, _TENSORFLOW_API_ATTR_V1))
-  if hasattr(symbol, _ESTIMATOR_API_ATTR_V1):
-    names_v1.extend(getattr(symbol, _ESTIMATOR_API_ATTR_V1))
-  return names_v1
-
-
-def get_v2_names(symbol):
-  names_v2 = []
-  if hasattr(symbol, _TENSORFLOW_API_ATTR):
-    names_v2.extend(getattr(symbol, _TENSORFLOW_API_ATTR))
-  if hasattr(symbol, _ESTIMATOR_API_ATTR):
-    names_v2.extend(getattr(symbol, _ESTIMATOR_API_ATTR))
-  return list(names_v2)
-
-
-def get_v1_constants(module):
-  constants_v1 = []
-  if hasattr(module, _TENSORFLOW_CONSTANTS_ATTR_V1):
-    constants_v1.extend(getattr(module, _TENSORFLOW_CONSTANTS_ATTR_V1))
-  if hasattr(module, _ESTIMATOR_CONSTANTS_ATTR_V1):
-    constants_v1.extend(getattr(module, _ESTIMATOR_CONSTANTS_ATTR_V1))
-  return constants_v1
-
-
-def get_v2_constants(module):
-  constants_v2 = []
-  if hasattr(module, _TENSORFLOW_CONSTANTS_ATTR):
-    constants_v2.extend(getattr(module, _TENSORFLOW_CONSTANTS_ATTR))
-  if hasattr(module, _ESTIMATOR_CONSTANTS_ATTR):
-    constants_v2.extend(getattr(module, _ESTIMATOR_CONSTANTS_ATTR))
-  return constants_v2
-
 
 def get_canonical_name(v2_names, v1_name):
   if v2_names:
@@ -131,7 +79,7 @@ def get_all_v2_names():
     """Visitor that collects TF 2.0 names."""
     for child in children:
       _, attr = tf_decorator.unwrap(child[1])
-      api_names_v2 = get_v2_names(attr)
+      api_names_v2 = tf_export.get_v2_names(attr)
       for name in api_names_v2:
         v2_names.add(name)
 
@@ -149,8 +97,8 @@ def collect_constant_renames():
   """
   renames = set()
   for module in sys.modules.values():
-    constants_v1_list = get_v1_constants(module)
-    constants_v2_list = get_v2_constants(module)
+    constants_v1_list = tf_export.get_v1_constants(module)
+    constants_v2_list = tf_export.get_v2_constants(module)
 
     # _tf_api_constants attribute contains a list of tuples:
     # (api_names_list, constant_name)
@@ -186,8 +134,8 @@ def collect_function_renames():
     """Visitor that collects rename strings to add to rename_line_set."""
     for child in children:
       _, attr = tf_decorator.unwrap(child[1])
-      api_names_v1 = get_v1_names(attr)
-      api_names_v2 = get_v2_names(attr)
+      api_names_v1 = tf_export.get_v1_names(attr)
+      api_names_v2 = tf_export.get_v2_names(attr)
       deprecated_api_names = set(api_names_v1) - set(api_names_v2)
       for name in deprecated_api_names:
         renames.add((name, get_canonical_name(api_names_v2, name)))
diff --git a/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py b/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py
index 63541771bf..0eb942d396 100644
--- a/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py
+++ b/tensorflow/tools/compatibility/update/generate_v2_reorders_map.py
@@ -64,40 +64,6 @@ from __future__ import print_function
 
 """
 
-_TENSORFLOW_API_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.TENSORFLOW_API_NAME].names)
-_TENSORFLOW_API_ATTR = tf_export.API_ATTRS[tf_export.TENSORFLOW_API_NAME].names
-_TENSORFLOW_CONSTANTS_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.TENSORFLOW_API_NAME].constants)
-_TENSORFLOW_CONSTANTS_ATTR = (
-    tf_export.API_ATTRS[tf_export.TENSORFLOW_API_NAME].constants)
-
-_ESTIMATOR_API_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.ESTIMATOR_API_NAME].names)
-_ESTIMATOR_API_ATTR = tf_export.API_ATTRS[tf_export.ESTIMATOR_API_NAME].names
-_ESTIMATOR_CONSTANTS_ATTR_V1 = (
-    tf_export.API_ATTRS_V1[tf_export.ESTIMATOR_API_NAME].constants)
-_ESTIMATOR_CONSTANTS_ATTR = (
-    tf_export.API_ATTRS[tf_export.ESTIMATOR_API_NAME].constants)
-
-
-def get_v1_names(symbol):
-  names_v1 = []
-  if hasattr(symbol, _TENSORFLOW_API_ATTR_V1):
-    names_v1.extend(getattr(symbol, _TENSORFLOW_API_ATTR_V1))
-  if hasattr(symbol, _ESTIMATOR_API_ATTR_V1):
-    names_v1.extend(getattr(symbol, _ESTIMATOR_API_ATTR_V1))
-  return names_v1
-
-
-def get_v2_names(symbol):
-  names_v2 = []
-  if hasattr(symbol, _TENSORFLOW_API_ATTR):
-    names_v2.extend(getattr(symbol, _TENSORFLOW_API_ATTR))
-  if hasattr(symbol, _ESTIMATOR_API_ATTR):
-    names_v2.extend(getattr(symbol, _ESTIMATOR_API_ATTR))
-  return list(names_v2)
-
 
 def collect_function_arg_names(function_names):
   """Determines argument names for reordered function signatures.
@@ -115,7 +81,7 @@ def collect_function_arg_names(function_names):
     """Visitor that collects arguments for reordered functions."""
     for child in children:
       _, attr = tf_decorator.unwrap(child[1])
-      api_names_v1 = get_v1_names(attr)
+      api_names_v1 = tf_export.get_v1_names(attr)
       api_names_v1 = ['tf.%s' % name for name in api_names_v1]
       matches_function_names = any(
           name in function_names for name in api_names_v1)
-- 
GitLab


From 795f16f1fbf7b2018475c81c43e1050a1f87ce8e Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Tue, 11 Dec 2018 13:52:38 -0800
Subject: [PATCH 374/873] remove `global data`

---
 tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index bbcfc32098..77889effc8 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -125,7 +125,6 @@ data_index = 0
 
 # Step 3: Function to generate a training batch for the skip-gram model.
 def generate_batch(batch_size, num_skips, skip_window):
-  global data
   global data_index
   assert batch_size % num_skips == 0
   assert num_skips <= 2 * skip_window
-- 
GitLab


From ec6df8e4fb10b25737295bcb49791842eb478400 Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Tue, 11 Dec 2018 13:47:51 -0800
Subject: [PATCH 375/873] Fix TF_TensorFromTensor not setting status on success

PiperOrigin-RevId: 225063980
---
 tensorflow/c/c_api.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 94d18eb8b0..9580215a31 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -488,6 +488,7 @@ static TF_Tensor* EmptyTensor(TF_DataType dtype, const TensorShape& shape) {
 // Non-static for testing.
 TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
                                TF_Status* status) {
+  TF_SetStatus(status, TF_OK, "");
   if (!src.IsInitialized()) {
     status->status = FailedPrecondition(
         "attempt to use a tensor with an uninitialized value");
-- 
GitLab


From 5dd912f2d734342441d9649a7b5259150d197f23 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 13:50:53 -0800
Subject: [PATCH 376/873] Automated rollback of commit
 d09435e0cc8b21e5b10eb0f9750e7a24c2031e85

PiperOrigin-RevId: 225064608
---
 tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
index ef35e84ba5..b4b06a40a2 100644
--- a/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc
@@ -98,7 +98,7 @@ Status DumpOpProfileToLogDirectory(StringPiece run_dir,
   if (!status.ok()) {
     return errors::Internal(
         "Failed to convert op profile to json. Skipping... ",
-        string(status.message()));
+        string(status.error_message()));
   }
   TF_RETURN_IF_ERROR(WriteStringToFile(Env::Default(), path, json));
   if (os) {
-- 
GitLab


From 24b6319fdf70e6b2b35fd804dccdfa3cc07b2537 Mon Sep 17 00:00:00 2001
From: Goldie Gadde <ggadde@google.com>
Date: Tue, 11 Dec 2018 14:19:51 -0800
Subject: [PATCH 377/873] Updated

---
 tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h | 8 --------
 tensorflow/python/ops/confusion_matrix.py                 | 1 +
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index 164be226b7..85a0e5328c 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -215,14 +215,6 @@ class IrEmitterUnnested : public IrEmitter {
   // Prerequisite: `IsReductionToVector(*unnested_hlo)`
   Status EmitReductionToVector(HloInstruction* unnested_hlo);
 
-  // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in
-  // the process. `scatter` may be fused, scatter indices are taken from
-  // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is
-  // expected to have the operand values in it already.
-  Status EmitScatter(Thunk* thunk, HloInstruction* scatter,
-                     const llvm_ir::ElementGenerator& scatter_indices_gen,
-                     const llvm_ir::ElementGenerator& updates_gen);
-
   // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in
   // the process. `scatter` may be fused, scatter indices are taken from
   // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is
diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py
index fb584cc6f8..ccfe3b65c2 100644
--- a/tensorflow/python/ops/confusion_matrix.py
+++ b/tensorflow/python/ops/confusion_matrix.py
@@ -26,6 +26,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
-- 
GitLab


From 40345bd2c3cfdcb095f8cdd7595f4a1eb9698f8f Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Tue, 11 Dec 2018 14:18:26 -0800
Subject: [PATCH 378/873] Re-submit the coordinator change.

PiperOrigin-RevId: 225069740
---
 .../python/estimator_training_test.py         |  7 ++--
 .../python/multi_worker_test_base.py          | 10 ++++--
 .../distribute/distribute_coordinator.py      | 35 +++++++++++++------
 3 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/estimator_training_test.py b/tensorflow/contrib/distribute/python/estimator_training_test.py
index b369a7fefe..3f55a8a1c8 100644
--- a/tensorflow/contrib/distribute/python/estimator_training_test.py
+++ b/tensorflow/contrib/distribute/python/estimator_training_test.py
@@ -375,11 +375,13 @@ class DistributeCoordinatorIntegrationTest(
     threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn,
                                                  cluster_spec, train_distribute,
                                                  eval_distribute)
+    threads_to_join = []
     for task_type, ts in threads.items():
       if task_type == PS:
         continue
       for t in ts:
-        t.join()
+        threads_to_join.append(t)
+    self.join_independent_workers(threads_to_join)
 
     estimator = self._get_estimator(train_distribute, eval_distribute)
     self._inspect_train_and_eval_events(estimator)
@@ -413,8 +415,7 @@ class DistributeCoordinatorIntegrationTest(
     threads = self.run_multiple_tasks_in_threads(self._independent_worker_fn,
                                                  cluster_spec, train_distribute,
                                                  eval_distribute)
-    threads[WORKER][0].join()
-    threads[EVALUATOR][0].join()
+    self.join_independent_workers([threads[WORKER][0], threads[EVALUATOR][0]])
 
     estimator = self._get_estimator(train_distribute, eval_distribute)
     self._inspect_train_and_eval_events(estimator)
diff --git a/tensorflow/contrib/distribute/python/multi_worker_test_base.py b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
index 147c9b83f8..b05aac431f 100644
--- a/tensorflow/contrib/distribute/python/multi_worker_test_base.py
+++ b/tensorflow/contrib/distribute/python/multi_worker_test_base.py
@@ -40,6 +40,7 @@ from tensorflow.python.client import session
 from tensorflow.python.estimator import run_config
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import coordinator
 from tensorflow.python.training import server_lib
 
 ASSIGNED_PORTS = set()
@@ -360,6 +361,7 @@ class IndependentWorkerTestBase(test.TestCase):
     self._mock_os_env = MockOsEnv()
     self._mock_context = test.mock.patch.object(os, 'environ',
                                                 self._mock_os_env)
+    self._coord = coordinator.Coordinator()
     super(IndependentWorkerTestBase, self).setUp()
     self._mock_context.__enter__()
 
@@ -368,8 +370,9 @@ class IndependentWorkerTestBase(test.TestCase):
     super(IndependentWorkerTestBase, self).tearDown()
 
   def _task_thread(self, task_fn, tf_config, *args, **kwargs):
-    os.environ['TF_CONFIG'] = json.dumps(tf_config)
-    task_fn(*args, **kwargs)
+    with self._coord.stop_on_exception():
+      os.environ['TF_CONFIG'] = json.dumps(tf_config)
+      task_fn(*args, **kwargs)
 
   def _run_task_in_thread(self, task_fn, cluster_spec, task_type, task_id,
                           *args, **kwargs):
@@ -403,3 +406,6 @@ class IndependentWorkerTestBase(test.TestCase):
                                      *args, **kwargs)
         threads[task_type].append(t)
     return threads
+
+  def join_independent_workers(self, worker_threads):
+    self._coord.join(worker_threads)
diff --git a/tensorflow/python/distribute/distribute_coordinator.py b/tensorflow/python/distribute/distribute_coordinator.py
index c0f9b8a1fd..78c995a578 100644
--- a/tensorflow/python/distribute/distribute_coordinator.py
+++ b/tensorflow/python/distribute/distribute_coordinator.py
@@ -29,6 +29,7 @@ from tensorflow.python.client import session
 from tensorflow.python.distribute import distribute_coordinator_context
 from tensorflow.python.distribute import multi_worker_util
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import coordinator
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import server_lib
 
@@ -328,7 +329,8 @@ def _run_single_worker(worker_fn,
                        task_id,
                        session_config,
                        rpc_layer="",
-                       worker_barrier=None):
+                       worker_barrier=None,
+                       coord=None):
   """Runs a single worker by calling `worker_fn` under context."""
   session_config = copy.deepcopy(session_config)
   strategy = copy.deepcopy(strategy)
@@ -350,7 +352,11 @@ def _run_single_worker(worker_fn,
       rpc_layer=rpc_layer,
       worker_barrier=worker_barrier)
   with context:
-    return worker_fn(strategy)
+    if coord:
+      with coord.stop_on_exception():
+        return worker_fn(strategy)
+    else:
+      return worker_fn(strategy)
 
 
 def _split_cluster_for_evaluator(cluster_spec, task_type):
@@ -423,6 +429,7 @@ def _run_std_server(cluster_spec=None,
 def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                               cluster_spec, session_config, rpc_layer):
   """Runs a standalone client for between-graph replication."""
+  coord = coordinator.Coordinator()
   eval_thread = None
   if _TaskType.EVALUATOR in cluster_spec.jobs:
     eval_thread = threading.Thread(
@@ -431,6 +438,7 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
               session_config),
         kwargs={
             "rpc_layer": rpc_layer,
+            "coord": coord,
         })
     eval_thread.start()
 
@@ -444,18 +452,18 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                 session_config),
           kwargs={
               "rpc_layer": rpc_layer,
-              "worker_barrier": worker_barrier
+              "worker_barrier": worker_barrier,
+              "coord": coord,
           })
       t.start()
       threads.append(t)
 
-  # TODO(yuefengz): wrap threads into thread coordinator?
-  for t in threads:
-    t.join()
-
-  # TODO(yuefengz): is it necessary to join eval thread?
   if eval_thread:
-    eval_thread.join()
+    # TODO(yuefengz): is it necessary to join eval thread?
+    threads_to_join = threads + [eval_thread]
+  else:
+    threads_to_join = threads
+  coord.join(threads_to_join)
 
   # TODO(yuefengz): we probably want to return results from all workers?
   return None
@@ -464,6 +472,7 @@ def _run_between_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
 def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
                          cluster_spec, session_config, rpc_layer):
   """Runs a standalone client for in-graph replication."""
+  coord = coordinator.Coordinator()
   eval_thread = None
   if _TaskType.EVALUATOR in cluster_spec.jobs:
     eval_thread = threading.Thread(
@@ -472,6 +481,7 @@ def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
               session_config),
         kwargs={
             "rpc_layer": rpc_layer,
+            "coord": coord,
         })
     eval_thread.start()
 
@@ -482,9 +492,12 @@ def _run_in_graph_client(worker_fn, strategy, eval_fn, eval_strategy,
       None,
       None,
       session_config,
-      rpc_layer=rpc_layer)
+      rpc_layer=rpc_layer,
+      coord=coord)
+
   if eval_thread:
-    eval_thread.join()
+    coord.join([eval_thread])
+
   return worker_result
 
 
-- 
GitLab


From 2a87c2df921753fb8c1cba585f78bd3ab6087be2 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 11 Dec 2018 14:36:14 -0800
Subject: [PATCH 379/873] Skeleton for PolymorphicFunction serialization

Missing things like variables, function/argument names, support for arguments that aren't a flat list of Tensors, and many other things. But it does manage to save, restore, and call a function.

Starts saving a bit of extra metadata when a new function trace is created. Since this does not have to be computed each time the function is called, I expect the performance impact to be minimal.

PiperOrigin-RevId: 225072712
---
 tensorflow/python/eager/def_function.py       | 21 ++++++
 tensorflow/python/eager/def_function_test.py  | 19 +++++
 tensorflow/python/eager/function.py           | 21 ++++++
 tensorflow/python/framework/function.py       | 22 +++---
 tensorflow/python/framework/function_test.py  | 10 +--
 tensorflow/python/framework/importer.py       |  4 +-
 tensorflow/python/saved_model/BUILD           | 35 ++++++++-
 .../saved_model/function_deserialization.py   | 46 ++++++++++++
 .../saved_model/function_serialization.py     | 71 +++++++++++++++++++
 tensorflow/python/saved_model/load.py         | 18 ++++-
 tensorflow/python/saved_model/load_test.py    |  2 +
 tensorflow/python/saved_model/save.py         |  4 ++
 .../saved_model/saved_object_graph.proto      | 11 +++
 13 files changed, 263 insertions(+), 21 deletions(-)
 create mode 100644 tensorflow/python/saved_model/function_deserialization.py
 create mode 100644 tensorflow/python/saved_model/function_serialization.py

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 3663d72999..cdbf39ddd5 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -242,6 +242,7 @@ class PolymorphicFunction(object):
       raise NotImplementedError()
     self._created_variables = None
     self._stateful_fn = None
+    self._stateless_fn = None
     self._descriptor_cache = weakref.WeakKeyDictionary()
     self._name = name
 
@@ -382,6 +383,26 @@ class PolymorphicFunction(object):
 
     return initialize_variables.get_concrete_function()
 
+  @property
+  def _cached_input_signatures(self):
+    """All input signatures used to call this PolymorphicFunction."""
+    seen = set()
+    # Preserves signature ordering rather than returning a set() so that we
+    # don't need to re-sort signatures later to work around Python 2's set
+    # nondeterminism.
+    # pylint: disable=protected-access
+    concrete_functions = []
+    if self._stateful_fn:
+      concrete_functions.extend(self._stateful_fn._function_cache.values())
+    if self._stateless_fn:
+      concrete_functions.extend(self._stateless_fn._function_cache.values())
+    for concrete_function in concrete_functions:
+      signature = concrete_function._python_call_signature
+      if signature not in seen:
+        yield signature
+        seen.add(signature)
+    # pylint: enable=protected-access
+
   def get_concrete_function(self, *args, **kwargs):
     """Returns a `Function` object specialized to inputs and execution context.
 
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 4100a10044..8b4c40791a 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -238,6 +238,25 @@ class DefFunctionTest(test.TestCase):
     concrete = compute.get_concrete_function(
         tensor_spec.TensorSpec(None, dtypes.float32))
     self.assertAllClose(4., concrete(constant_op.constant(2.)))
+    input_signature, = compute._cached_input_signatures
+    self.assertEqual(
+        tuple(input_signature),
+        (tensor_spec.TensorSpec(None, dtypes.float32),))
+
+  def test_serialization_signature_cache(self):
+
+    @def_function.function
+    def f(x, y):
+      return x, y
+
+    f(constant_op.constant([[3., 4.]]), constant_op.constant([2.]))
+    f(constant_op.constant([[3, 4, 5]]), constant_op.constant([2]))
+    self.assertEqual(
+        set(f._cached_input_signatures),
+        set(((tensor_spec.TensorSpec([1, 2], dtypes.float32),
+              tensor_spec.TensorSpec([1], dtypes.float32)),
+             (tensor_spec.TensorSpec([1, 3], dtypes.int32),
+              tensor_spec.TensorSpec([1], dtypes.int32)))))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 520c85a2c2..0de0cd96ac 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -748,6 +748,19 @@ class Function(object):
     return ret
 
 
+class UnknownArgument(object):
+  """Signifies an argument which is not currently handled."""
+  pass
+
+
+def _encode_arg_for_serialization(arg):
+  """A representation for this argument, for serializing signatures."""
+  if isinstance(arg, ops.Tensor):
+    return tensor_spec.TensorSpec(arg.shape, arg.dtype)
+  else:
+    return UnknownArgument()
+
+
 pywrap_tensorflow.RegisterType("Tensor", ops.Tensor)
 pywrap_tensorflow.RegisterType("IndexedSlices", ops.IndexedSlices)
 
@@ -1163,6 +1176,14 @@ class PolymorphicFunction(object):
                 autograph=self._autograph,
                 arg_names=arg_names),
             self._function_attributes)
+        if self._input_signature:
+          python_call_signature = self._input_signature
+        else:
+          python_call_signature = tuple(
+              _encode_arg_for_serialization(arg) for arg in args)
+        # Save information about non-Tensor arguments with the concrete
+        # function. Used to serialize PolymorphicFunctions.
+        graph_function._python_call_signature = python_call_signature  # pylint: disable=protected-access
         self._function_cache[cache_key] = graph_function
       return graph_function, args, kwargs
 
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index cfdc915a1b..afc11b17bf 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -993,17 +993,18 @@ def _call(sig, *inputs, **kwargs):
   name = kwargs.pop("name", None)
   g = ops.get_default_graph()
   func_name = sig.name
+  if name is None:
+    name = func_name
   attrs = _parse_kwargs_as_attrs(func_name, **kwargs)
   output_types = [dtypes.DType(x.type) for x in sig.output_arg]
-  with ops.name_scope(name, func_name, inputs) as name:
-    op = g.create_op(
-        func_name,
-        list(inputs),
-        output_types,
-        name=name,
-        attrs=attrs,
-        op_def=sig,
-        compute_shapes=False)
+  op = g.create_op(
+      func_name,
+      list(inputs),
+      output_types,
+      name=name,
+      attrs=attrs,
+      op_def=sig,
+      compute_shapes=False)
   if op.outputs:
     if len(op.outputs) == 1:
       ret = op.outputs[0]
@@ -1046,12 +1047,13 @@ def _from_definition(fdef, grad_func=None):
   c_func = c_api.TF_FunctionImportFunctionDef(serialized)
   result._c_func = c_api_util.ScopedTFFunction(c_func)
   result._extra_inputs = []
+  result._op_def = fdef.signature
   # pylint: enable=protected-access
 
   return result
 
 
-def _from_library(lib):
+def from_library(lib):
   """Creates _DefinedFunctions initialized from a FunctionDefLibrary proto.
 
   This method handles assigning the correct gradient functions to each
diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py
index 6ec71ba8e9..7543376bcf 100644
--- a/tensorflow/python/framework/function_test.py
+++ b/tensorflow/python/framework/function_test.py
@@ -1287,7 +1287,7 @@ class FunctionsFromProtos(test.TestCase):
       gradients_impl.gradients([f1, f2, f3, f4], c)
 
     library = g.as_graph_def().library
-    new_funcs = function._from_library(library)
+    new_funcs = function.from_library(library)
 
     def CheckNewFunc(func):
       new_func = [f for f in new_funcs if f.name == func.name]
@@ -1303,7 +1303,7 @@ class FunctionsFromProtos(test.TestCase):
 
   def testFromLibraryEmptyLib(self):
     library = function_pb2.FunctionDefLibrary()
-    self.assertEqual(len(function._from_library(library)), 0)
+    self.assertEqual(len(function.from_library(library)), 0)
 
   def testFromLibraryMissingFuncDef(self):
 
@@ -1327,7 +1327,7 @@ class FunctionsFromProtos(test.TestCase):
     with self.assertRaisesRegexp(
         ValueError,
         "FunctionDefLibrary missing 'G1_[0-9a-zA-Z]{8,11}' FunctionDef"):
-      function._from_library(library)
+      function.from_library(library)
 
     # Create invalid function def that is missing F1 function def
     library = function_pb2.FunctionDefLibrary()
@@ -1337,7 +1337,7 @@ class FunctionsFromProtos(test.TestCase):
     with self.assertRaisesRegexp(
         ValueError,
         "FunctionDefLibrary missing 'F1_[0-9a-zA-Z]{8,11}' FunctionDef"):
-      function._from_library(library)
+      function.from_library(library)
 
   def testFromLibraryCyclicGradFuncs(self):
 
@@ -1366,7 +1366,7 @@ class FunctionsFromProtos(test.TestCase):
 
     with self.assertRaisesRegexp(
         ValueError, "FunctionDefLibrary contains cyclic gradient functions!"):
-      function._from_library(library)
+      function.from_library(library)
 
   def testExperimentalAttrs(self):
 
diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index 98c7aeccc4..c737bd4881 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -442,11 +442,9 @@ def import_graph_def(graph_def,
     _ProcessNewOps(graph)
 
   if graph_def.library and graph_def.library.function:
-    # pylint: disable=protected-access
-    functions = function._from_library(graph_def.library)
+    functions = function.from_library(graph_def.library)
     for f in functions:
       f.add_to_graph(graph)
-    # pylint: enable=protected-access
 
   # Treat input mappings that don't appear in the graph as an error, because
   # they are likely to be due to a typo.
diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD
index 53d0640542..71d9e34592 100644
--- a/tensorflow/python/saved_model/BUILD
+++ b/tensorflow/python/saved_model/BUILD
@@ -287,7 +287,7 @@ py_library(
     deps = [
         ":builder",
         ":constants",
-        ":loader",
+        ":function_serialization",
         ":saved_object_graph_py",
         ":signature_constants",
         ":signature_def_utils",
@@ -295,15 +295,20 @@ py_library(
         ":utils",
         "//tensorflow/core:protos_all_py",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
         "//tensorflow/python:framework",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:lib",
         "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:tensor_spec",
         "//tensorflow/python:util",
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:function",
         "//tensorflow/python/training/checkpointable:base",
+        "//tensorflow/python/training/checkpointable:tracking",
         "//tensorflow/python/training/checkpointable:util",
     ],
 )
@@ -330,8 +335,12 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":constants",
+        ":function_deserialization",
         ":loader",
         ":saved_object_graph_py",
+        ":utils",
+        "//tensorflow/python:function",
         "//tensorflow/python:lib",
         "//tensorflow/python:util",
         "//tensorflow/python/training/checkpointable:tracking",
@@ -345,10 +354,34 @@ py_test(
     deps = [
         ":load",
         ":save",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:lib",
         "//tensorflow/python:tensor_spec",
         "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:test",
         "//tensorflow/python/training/checkpointable:tracking",
     ],
 )
+
+py_library(
+    name = "function_serialization",
+    srcs = [
+        "function_serialization.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":saved_object_graph_py",
+        "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/eager:function",
+    ],
+)
+
+py_library(
+    name = "function_deserialization",
+    srcs = [
+        "function_deserialization.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = ["//tensorflow/python/eager:def_function"],
+)
diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
new file mode 100644
index 0000000000..46bd69ad03
--- /dev/null
+++ b/tensorflow/python/saved_model/function_deserialization.py
@@ -0,0 +1,46 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tools for deserializing PolymorphicFunctions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import def_function
+
+
+def recreate_polymorphic_function(
+    saved_polymorphic_function, defined_functions):
+  """Creates a PolymorphicFunction which runs restored function definitions."""
+  @def_function.function
+  def restored_function(*args):
+    """Calls a restored function."""
+    # Try calling each function, return a value from the first one whose
+    # signature matches.
+    # TODO(allenl): Consider re-populating the function cache directly.
+    # TODO(allenl): Functions saved with input_signatures should revive with
+    # input_signatures.
+    for monomorphic_function in saved_polymorphic_function.monomorphic_function:
+      try:
+        # TODO(allenl): Passing an explicit name here prevents invalid name
+        # errors. We should replace this with something based on the actual
+        # Python function name.
+        return defined_functions[monomorphic_function.concrete_function](
+            *args, name="imported_function")
+      except ValueError:
+        continue
+    raise AssertionError(
+        "Could not find matching function to call for arguments: %s" % (args,))
+  return restored_function
diff --git a/tensorflow/python/saved_model/function_serialization.py b/tensorflow/python/saved_model/function_serialization.py
new file mode 100644
index 0000000000..7cf82776bd
--- /dev/null
+++ b/tensorflow/python/saved_model/function_serialization.py
@@ -0,0 +1,71 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tools for serializing PolymorphicFunctions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function as defun_lib
+from tensorflow.python.saved_model import saved_object_graph_pb2
+
+
+def _serialize_polymorphic_function(function):
+  """Represents a PolymorphicFunction in a SavedModel.
+
+  Adds `function`'s concrete functions to the current graph.
+
+  Args:
+    function: A `PolymorphicFunction` to serialize.
+
+  Returns:
+    An unserialized `SavedPolymorphicFunction` protocol buffer object.
+  """
+  monomorphic_functions = []
+  for signature in function._cached_input_signatures:  # pylint: disable=protected-access
+    if any(isinstance(arg, defun_lib.UnknownArgument) for arg in signature):
+      continue
+    concrete_function = function.get_concrete_function(*signature)
+    concrete_function.add_to_graph()
+    monomorphic_functions.append(
+        saved_object_graph_pb2.SavedMonomorphicFunction(
+            concrete_function=concrete_function.name))
+  return saved_object_graph_pb2.SavedPolymorphicFunction(
+      monomorphic_function=monomorphic_functions)
+
+
+def add_polymorphic_functions_to_object_graph_proto(
+    checkpointable_objects, saved_object_graph):
+  """Finds PolymorphicFunctions attached to objects and saves them."""
+  existing_objects = list(zip(checkpointable_objects, saved_object_graph.nodes))
+  for obj, obj_proto in existing_objects:
+    for attribute_name in dir(obj):
+      try:
+        attribute_value = getattr(obj, attribute_name, None)
+      except:  # pylint: disable=bare-except
+        # We really don't want to throw an exception just because some object's
+        # attribute accessor is broken.
+        attribute_value = None
+      # TODO(allenl): Consider de-duplicating functions which are referenced
+      # from multiple attributes.
+      if isinstance(attribute_value, def_function.PolymorphicFunction):
+        function_node_id = len(saved_object_graph.nodes)
+        function_node = saved_object_graph.nodes.add()
+        function_node.function.CopyFrom(
+            _serialize_polymorphic_function(attribute_value))
+        reference = obj_proto.children.add()
+        reference.node_id = function_node_id
+        reference.local_name = attribute_name
diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py
index e3095f4ee5..28c0af2b65 100644
--- a/tensorflow/python/saved_model/load.py
+++ b/tensorflow/python/saved_model/load.py
@@ -20,8 +20,10 @@ from __future__ import print_function
 
 import os
 
+from tensorflow.python.framework import function as function_lib
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.saved_model import constants
+from tensorflow.python.saved_model import function_deserialization
 from tensorflow.python.saved_model import loader_impl
 from tensorflow.python.saved_model import saved_object_graph_pb2
 from tensorflow.python.saved_model import utils_impl as saved_model_utils
@@ -33,9 +35,17 @@ class _Loader(object):
   """Helper class to load an object-based SavedModel."""
 
   def __init__(self, object_graph_proto, saved_model_proto, export_dir):
-    self._asset_file_def = saved_model_proto.meta_graphs[0].asset_file_def
+    meta_graph = saved_model_proto.meta_graphs[0]
+    self._asset_file_def = meta_graph.asset_file_def
     self._proto = object_graph_proto
     self._export_dir = export_dir
+    self._defined_functions = {}
+    for defined_function in function_lib.from_library(
+        meta_graph.graph_def.library):
+      # TODO(allenl): Do we need to do name mapping here? Not quite sure what
+      # happens when loaded names collide with existing names.
+      defined_function.add_to_graph(None)
+      self._defined_functions[defined_function.name] = defined_function
     self._load_all()
 
   def _load_all(self):
@@ -52,6 +62,7 @@ class _Loader(object):
     factory = {
         "user_object": lambda: self._recreate_user_object(proto.user_object),
         "asset": lambda: self._recreate_asset(proto.asset),
+        "function": lambda: self._recreate_function(proto.function)
     }
     kind = proto.WhichOneof("kind")
     if kind not in factory:
@@ -68,6 +79,10 @@ class _Loader(object):
         self._asset_file_def[proto.asset_file_def_index].filename)
     return tracking.TrackableAsset(filename)
 
+  def _recreate_function(self, proto):
+    return function_deserialization.recreate_polymorphic_function(
+        proto, self._defined_functions)
+
 
 def _load_saved_object_graph_proto(filename):
   with file_io.FileIO(filename, "rb") as f:
@@ -92,5 +107,4 @@ def load(export_dir):
     raise NotImplementedError(
         "Currently only SavedModels exported with `tf.saved_model.save` may be "
         "imported. Other SavedModels may eventually be supported via load().")
-  # TODO(allenl): load functions from the SavedModel into the eager context
   return root
diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py
index a2971101cd..6a10ac432d 100644
--- a/tensorflow/python/saved_model/load_test.py
+++ b/tensorflow/python/saved_model/load_test.py
@@ -23,6 +23,7 @@ import tempfile
 
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.lib.io import file_io
@@ -47,6 +48,7 @@ class LoadTest(test.TestCase):
     imported = load.load(save_dir)
     self.assertIs(imported.dep_three, imported.dep_two.dep)
     self.assertIsNot(imported.dep_one, imported.dep_two)
+    self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
 
   def _make_asset(self, contents):
     filename = tempfile.mktemp(prefix=self.get_temp_dir())
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index e2726087a5..b065a5a265 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -37,6 +37,7 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.saved_model import builder_impl
 from tensorflow.python.saved_model import constants
+from tensorflow.python.saved_model import function_serialization
 from tensorflow.python.saved_model import saved_object_graph_pb2
 from tensorflow.python.saved_model import signature_constants
 from tensorflow.python.saved_model import signature_def_utils
@@ -511,6 +512,9 @@ def _write_object_graph(root, export_dir, asset_file_def_index):
   for obj, obj_proto in zip(checkpointable_objects, proto.nodes):
     _write_object_proto(obj, obj_proto, asset_file_def_index)
 
+  function_serialization.add_polymorphic_functions_to_object_graph_proto(
+      checkpointable_objects, proto)
+
   extra_asset_dir = os.path.join(
       compat.as_bytes(export_dir),
       compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY))
diff --git a/tensorflow/python/saved_model/saved_object_graph.proto b/tensorflow/python/saved_model/saved_object_graph.proto
index 3991fbede4..ed5c63935f 100644
--- a/tensorflow/python/saved_model/saved_object_graph.proto
+++ b/tensorflow/python/saved_model/saved_object_graph.proto
@@ -48,6 +48,7 @@ message SavedObject {
   oneof kind {
     SavedUserObject user_object = 4;
     SavedAsset asset = 5;
+    SavedPolymorphicFunction function = 6;
   }
 }
 
@@ -71,3 +72,13 @@ message SavedAsset {
   // `AssetFileDef.tensor_info`, MUST be ignored.
   uint32 asset_file_def_index = 1;
 }
+
+// A function with multiple signatures, possibly with non-Tensor arguments.
+message SavedPolymorphicFunction {
+  repeated SavedMonomorphicFunction monomorphic_function = 1;
+}
+
+message SavedMonomorphicFunction {
+  // A reference to a TensorFlow function in the MetaGraph's FunctionDefLibrary
+  string concrete_function = 1;
+}
-- 
GitLab


From 6ea1bab952c7e343986b3d1f894970876faa8412 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 14:37:01 -0800
Subject: [PATCH 380/873] Enhance the Tensor-inspector in the following ways:
 (1) Combine tensors from multiple replicas into a single tensor; each replica
 may have its own trace file. (2) Accept two tensor traces and report their
 difference. (3) Summarize and print the value of a tensor in terms of:       
 (a) full tensor value, (b) partial tensor value, (c) any NaN/Inf, (d) the
 vector Norm, (e) Max-absolute value across all elements. (4) Various print
 order: topological, numerical, alphabetical. (5) Many more unit tests.

PiperOrigin-RevId: 225072821
---
 tensorflow/python/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 0a3ee65bc4..8a7c001321 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -6,6 +6,7 @@
 
 visibility = [
     "//engedu/ml/tf_from_scratch:__pkg__",
+    "//third_party/cloud_tpu/convergence_tools:__subpackages__",
     "//tensorflow:internal",
     "//tensorflow/lite/toco/python:__pkg__",
     "//tensorflow_models:__subpackages__",
-- 
GitLab


From 3b94c63e1b113b8504221c635c83a5477666605b Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Tue, 11 Dec 2018 14:48:11 -0800
Subject: [PATCH 381/873] Fix filename

PiperOrigin-RevId: 225074738
---
 .../src/main/java/org/tensorflow/demo/DetectorActivity.java   | 4 ++--
 .../org/tensorflow/demo/TFLiteObjectDetectionAPIModel.java    | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/DetectorActivity.java b/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/DetectorActivity.java
index 87160f6b3f..2feca79e88 100644
--- a/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/DetectorActivity.java
+++ b/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/DetectorActivity.java
@@ -52,8 +52,8 @@ public class DetectorActivity extends CameraActivity implements OnImageAvailable
   private static final int TF_OD_API_INPUT_SIZE = 300;
   private static final boolean TF_OD_API_IS_QUANTIZED = true;
   private static final String TF_OD_API_MODEL_FILE = "detect.tflite";
-  private static final String TF_OD_API_LABELS_FILE = "file:///android_asset/coco_labels_list.txt";
-  
+  private static final String TF_OD_API_LABELS_FILE = "coco_labels_list.txt";
+
   // Which detection model to use: by default uses Tensorflow Object Detection API frozen
   // checkpoints.
   private enum DetectorMode {
diff --git a/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/TFLiteObjectDetectionAPIModel.java b/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/TFLiteObjectDetectionAPIModel.java
index 9eb21de9d0..afbf317831 100644
--- a/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/TFLiteObjectDetectionAPIModel.java
+++ b/tensorflow/lite/examples/android/app/src/main/java/org/tensorflow/demo/TFLiteObjectDetectionAPIModel.java
@@ -105,8 +105,7 @@ public class TFLiteObjectDetectionAPIModel implements Classifier {
     final TFLiteObjectDetectionAPIModel d = new TFLiteObjectDetectionAPIModel();
 
     InputStream labelsInput = null;
-    String actualFilename = labelFilename.split("file:///android_asset/")[1];
-    labelsInput = assetManager.open(actualFilename);
+    labelsInput = assetManager.open(labelFilename);
     BufferedReader br = null;
     br = new BufferedReader(new InputStreamReader(labelsInput));
     String line;
-- 
GitLab


From 5440a744940b5c773f6a4e0ae84a569cb20acac6 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Tue, 11 Dec 2018 14:58:03 -0800
Subject: [PATCH 382/873] Add warning when using batchnorm in training mode,
 since the error will now go to VLOG(1) by the segmenter. This is a very
 common problem so we want users to see the warning

---
 .../contrib/tensorrt/convert/convert_nodes.cc      | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 5fe284c042..ba1c2e80b2 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2985,10 +2985,16 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) {
   }
   bool is_training = attrs.get<bool>("is_training");
   if (is_training) {
+    // Trying to use batchnorm in training mode is a very common problem.
+    // Because the error message will only be printed in VLOG(1) by the
+    // segmenter, we issue a special warning so that users will actually see it.
+    LOG(WARNING) << node_def.op() << " only supports is_training=false. If you "
+                 << "are using Keras, please call "
+                 << "keras.backend.set_learning_phase(0) before constructing "
+                 << "your model. At "
+                 << node_def.name());
     return tensorflow::errors::Unimplemented(
-        node_def.op(),
-        " only supports is_training=false. If you are using "
-        "Keras, please use keras.backend.set_learning_phase(0). At ",
+        node_def.op(), " only supports is_training=false, at ",
         node_def.name());
   }
   if (inputs.at(0).is_weights()) {
@@ -3003,7 +3009,7 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) {
           node_def.op(),
           " must have constant inputs for scale, offset, mean and variance, "
           "at ",
-           node_def.name());
+          node_def.name());
     }
   }
   nvinfer1::ITensor const* tensor = inputs.at(0).tensor();
-- 
GitLab


From ecb1d048a8b6a651c1c647038b21eae09717fb96 Mon Sep 17 00:00:00 2001
From: Pooya Davoodi <pdavoodi@nvidia.com>
Date: Tue, 11 Dec 2018 15:05:59 -0800
Subject: [PATCH 383/873] TFTRT: Change LOG(ERROR) to VLOG(1) when
 use_calibration=True with fp32/fp16

---
 tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
index c1688d4db8..d57f2300f8 100644
--- a/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
+++ b/tensorflow/contrib/tensorrt/convert/trt_optimization_pass.cc
@@ -226,8 +226,9 @@ tensorflow::Status TRTOptimizationPass::Optimize(
   tensorflow::tensorrt::convert::ConversionParams cp;
 
   if (use_calibration_ && precision_mode_ != INT8MODE) {
-    LOG(ERROR) << "Calibration with FP32 or FP16 is not implemented. "
-               << "Falling back to use_calibration = False.";
+    VLOG(1) << "Calibration with FP32 or FP16 is not implemented. "
+            << "Falling back to use_calibration = False."
+            << "Note that the default value of use_calibration is True.";
     use_calibration_ = false;
   }
 
-- 
GitLab


From 3dfe44784dcfdc8cca87e59ce8eb1a47b9d95bfd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 15:12:10 -0800
Subject: [PATCH 384/873] Small refactor of `thresholds` default value and
 validation steps. The number of thresholds is used instead of the user
 specified type of the `thresholds` kwarg to determine the output of the
 result method:

`thresholds` is a scalar or single element list/tuple -> return scalar
`thresholds` is a multi element list/tuple -> return list

This is functionally equivalent to the previous code except for cases where the user passes in a single element list for the thresholds kwarg. In the previous code, this would cause the result method to return a list whereas now it returns a scalar.

PiperOrigin-RevId: 225079221
---
 tensorflow/python/keras/metrics.py | 41 ++++++++++++++++--------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index 1d1f3b4586..c8ccb7f624 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -177,6 +177,12 @@ def _assert_thresholds_range(thresholds):
                      .format(invalid_thresholds))
 
 
+def _parse_init_thresholds(thresholds, default_threshold=0.5):
+  thresholds = to_list(default_threshold if thresholds is None else thresholds)
+  _assert_thresholds_range(thresholds)
+  return thresholds
+
+
 def _update_confusion_matrix_variables(variables_to_update,
                                        y_true,
                                        y_pred,
@@ -869,12 +875,11 @@ class _ConfusionMatrixConditionCount(Metric):
     """
     super(_ConfusionMatrixConditionCount, self).__init__(name=name, dtype=dtype)
     self._confusion_matrix_cond = confusion_matrix_cond
-    self.thresholds = 0.5 if thresholds is None else thresholds
-    thresholds_list = to_list(self.thresholds)
-    _assert_thresholds_range(thresholds_list)
+    self.thresholds = _parse_init_thresholds(
+        thresholds, default_threshold=0.5)
     self.accumulator = self.add_weight(
         'accumulator',
-        shape=(len(thresholds_list),),
+        shape=(len(self.thresholds),),
         initializer=init_ops.zeros_initializer)
 
   def update_state(self, y_true, y_pred, sample_weight=None):
@@ -895,10 +900,10 @@ class _ConfusionMatrixConditionCount(Metric):
     }, y_true, y_pred, self.thresholds, sample_weight)
 
   def result(self):
-    if isinstance(self.thresholds, (list, tuple)):
-      result = self.accumulator
-    else:
+    if len(self.thresholds) == 1:
       result = self.accumulator[0]
+    else:
+      result = self.accumulator
     return ops.convert_to_tensor(result)
 
   def reset_states(self):
@@ -1152,16 +1157,15 @@ class Precision(Metric):
       dtype: (Optional) data type of the metric result.
     """
     super(Precision, self).__init__(name=name, dtype=dtype)
-    self.thresholds = 0.5 if thresholds is None else thresholds
-    thresholds_list = to_list(self.thresholds)
-    _assert_thresholds_range(thresholds_list)
+    self.thresholds = _parse_init_thresholds(
+        thresholds, default_threshold=0.5)
     self.tp = self.add_weight(
         'true_positives',
-        shape=(len(thresholds_list),),
+        shape=(len(self.thresholds),),
         initializer=init_ops.zeros_initializer)
     self.fp = self.add_weight(
         'false_positives',
-        shape=(len(thresholds_list),),
+        shape=(len(self.thresholds),),
         initializer=init_ops.zeros_initializer)
 
   def update_state(self, y_true, y_pred, sample_weight=None):
@@ -1184,7 +1188,7 @@ class Precision(Metric):
 
   def result(self):
     result = math_ops.div_no_nan(self.tp, self.tp + self.fp)
-    return result if isinstance(self.thresholds, (list, tuple)) else result[0]
+    return result[0] if len(self.thresholds) == 1 else result
 
   def reset_states(self):
     num_thresholds = len(to_list(self.thresholds))
@@ -1237,16 +1241,15 @@ class Recall(Metric):
       dtype: (Optional) data type of the metric result.
     """
     super(Recall, self).__init__(name=name, dtype=dtype)
-    self.thresholds = 0.5 if thresholds is None else thresholds
-    thresholds_list = to_list(self.thresholds)
-    _assert_thresholds_range(thresholds_list)
+    self.thresholds = _parse_init_thresholds(
+        thresholds, default_threshold=0.5)
     self.tp = self.add_weight(
         'true_positives',
-        shape=(len(thresholds_list),),
+        shape=(len(self.thresholds),),
         initializer=init_ops.zeros_initializer)
     self.fn = self.add_weight(
         'false_negatives',
-        shape=(len(thresholds_list),),
+        shape=(len(self.thresholds),),
         initializer=init_ops.zeros_initializer)
 
   def update_state(self, y_true, y_pred, sample_weight=None):
@@ -1269,7 +1272,7 @@ class Recall(Metric):
 
   def result(self):
     result = math_ops.div_no_nan(self.tp, self.tp + self.fn)
-    return result if isinstance(self.thresholds, (list, tuple)) else result[0]
+    return result[0] if len(self.thresholds) == 1 else result
 
   def reset_states(self):
     num_thresholds = len(to_list(self.thresholds))
-- 
GitLab


From 2087bffc231c4c0c864a6933988da286e4137a4b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 15:46:04 -0800
Subject: [PATCH 385/873] Automated rollback of commit
 221f4d23c6cffa2ad5fb492a300fafda2a640cd8

PiperOrigin-RevId: 225085109
---
 WORKSPACE                                     | 35 ++++++++++---------
 tensorflow/opensource_only.files              |  1 -
 tensorflow/version_check.bzl                  |  2 --
 .../preconfig/generate/archives.bzl           | 25 -------------
 .../preconfig/generate/generate.bzl           |  4 ++-
 .../toolchains/preconfig/generate/generate.sh |  2 +-
 6 files changed, 23 insertions(+), 46 deletions(-)
 delete mode 100644 third_party/toolchains/preconfig/generate/archives.bzl

diff --git a/WORKSPACE b/WORKSPACE
index 99d368ff91..7cc08e0164 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -16,27 +16,30 @@ load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
 
 closure_repositories()
 
-load("//third_party/toolchains/preconfig/generate:archives.bzl",
-     "bazel_toolchains_archive")
-
-bazel_toolchains_archive()
-
-load(
-    "@bazel_toolchains//repositories:repositories.bzl",
-    bazel_toolchains_repositories = "repositories",
+http_archive(
+    name = "base_images_docker",
+    sha256 = "e2b1b7254270bb7605e814a9dbf6d1e4ae04a11136ff1714fbfdabe3f87f7cf9",
+    strip_prefix = "base-images-docker-12801524f867e657fbb5d1a74f31618aff181ac6",
+    urls = ["https://github.com/GoogleCloudPlatform/base-images-docker/archive/12801524f867e657fbb5d1a74f31618aff181ac6.tar.gz"],
 )
 
-bazel_toolchains_repositories()
-
-load(
-    "@io_bazel_rules_docker//container:container.bzl",
-    container_repositories = "repositories",
+http_archive(
+    name = "bazel_toolchains",
+    sha256 = "15b5858b1b5541ec44df31b94c3b8672815b31d71215a98398761ea9f4c4eedb",
+    strip_prefix = "bazel-toolchains-6200b238c9c2d137c0d9a7262c80cc71d98e692b",
+    urls = [
+        "https://github.com/bazelbuild/bazel-toolchains/archive/6200b238c9c2d137c0d9a7262c80cc71d98e692b.tar.gz",
+    ],
 )
 
-container_repositories()
+http_archive(
+    name = "io_bazel_rules_docker",
+    sha256 = "29d109605e0d6f9c892584f07275b8c9260803bf0c6fcb7de2623b2bedc910bd",
+    strip_prefix = "rules_docker-0.5.1",
+    urls = ["https://github.com/bazelbuild/rules_docker/archive/v0.5.1.tar.gz"],
+)
 
-load("//third_party/toolchains/preconfig/generate:workspace.bzl",
-     "remote_config_workspace")
+load("//third_party/toolchains/preconfig/generate:workspace.bzl", "remote_config_workspace")
 
 remote_config_workspace()
 
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 418ef1a369..347dc9fc6b 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -49,7 +49,6 @@ tensorflow/third_party/toolchains/preconfig/ubuntu14.04/nccl2/BUILD
 tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
 tensorflow/third_party/toolchains/preconfig/generate/containers.bzl
 tensorflow/third_party/toolchains/preconfig/generate/generate.bzl
-tensorflow/third_party/toolchains/preconfig/generate/archives.bzl
 tensorflow/third_party/toolchains/preconfig/generate/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/dummy_toolchain.bzl
diff --git a/tensorflow/version_check.bzl b/tensorflow/version_check.bzl
index 74feaa19ff..3b61827139 100644
--- a/tensorflow/version_check.bzl
+++ b/tensorflow/version_check.bzl
@@ -48,5 +48,3 @@ def check_bazel_version_at_least(minimum_bazel_version):
             native.bazel_version,
             minimum_bazel_version,
         ))
-
-parse_bazel_version = _parse_bazel_version
diff --git a/third_party/toolchains/preconfig/generate/archives.bzl b/third_party/toolchains/preconfig/generate/archives.bzl
deleted file mode 100644
index 086b75b62e..0000000000
--- a/third_party/toolchains/preconfig/generate/archives.bzl
+++ /dev/null
@@ -1,25 +0,0 @@
-load("//tensorflow:version_check.bzl", "parse_bazel_version")
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
-
-def bazel_toolchains_archive():
-  if parse_bazel_version(native.bazel_version) >= parse_bazel_version("0.19"):
-    # This version of the toolchains repo is incompatible with older bazel
-    # versions - we can remove this once TensorFlow drops support for bazel
-    # before 0.19.
-    http_archive(
-        name = "bazel_toolchains",
-        sha256 = "41c48a189be489e2d15dec40e0057ea15b95ee5b39cc2a7e6cf663e31432c75e",
-        strip_prefix = "bazel-toolchains-3f8c58fe530fedc446de04673bc1e32985887dea",
-        urls = [
-            "https://github.com/nlopezgi/bazel-toolchains/archive/3f8c58fe530fedc446de04673bc1e32985887dea.tar.gz",
-        ],
-    )
-  else:
-    http_archive(
-        name = "bazel_toolchains",
-        sha256 = "15b5858b1b5541ec44df31b94c3b8672815b31d71215a98398761ea9f4c4eedb",
-        strip_prefix = "bazel-toolchains-6200b238c9c2d137c0d9a7262c80cc71d98e692b",
-        urls = [
-            "https://github.com/bazelbuild/bazel-toolchains/archive/6200b238c9c2d137c0d9a7262c80cc71d98e692b.tar.gz",
-        ],
-    )
diff --git a/third_party/toolchains/preconfig/generate/generate.bzl b/third_party/toolchains/preconfig/generate/generate.bzl
index fb2af02a53..2fb3a94cdc 100644
--- a/third_party/toolchains/preconfig/generate/generate.bzl
+++ b/third_party/toolchains/preconfig/generate/generate.bzl
@@ -36,7 +36,9 @@ def _tensorflow_rbe_config(name, cuda_version, cudnn_version, python_version, co
             "TF_NCCL_VERSION": "2",
             "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
         },
-        mount_project = "$(mount_project)",
+        # TODO(klimek): We should use the sources that we currently work on, not
+        # just the latest snapshot of tensorflow that is checked in.
+        git_repo = "https://github.com/tensorflow/tensorflow",
         tags = ["manual"],
         incompatible_changes_off = True,
     )
diff --git a/third_party/toolchains/preconfig/generate/generate.sh b/third_party/toolchains/preconfig/generate/generate.sh
index 1f39fcdf6d..37c5211278 100755
--- a/third_party/toolchains/preconfig/generate/generate.sh
+++ b/third_party/toolchains/preconfig/generate/generate.sh
@@ -46,7 +46,7 @@ echo "CUDA: ${CUDA_VERSION}"
 echo "CUDNN: ${CUDNN_VERSION}"
 echo "NCCL: ${NCCL_VERSION}"
 
-bazel build --define=mount_project="${PWD}" "${PKG}/generate:${TARGET}"
+bazel build "${PKG}/generate:${TARGET}"
 cd "${TEMPDIR}"
 tar xvf "${ROOT}/bazel-bin/${PKG}/generate/${TARGET}_outputs.tar"
 
-- 
GitLab


From 9748092a5dbc67f59983f9361c932530bbfdfe68 Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Tue, 11 Dec 2018 16:05:17 -0800
Subject: [PATCH 386/873] [TF port] Add port::GetCurrentCPU and
 port::NumTotalCPUs.

GetCurrentCPU: returns the current CPU of the calling thread.
NumTotalCPUs: attempts to get the total number of physical cores on the system

When both return non-failing values, we expect 0 <= GetCurrentCPU < NumTotalCPUs.

PiperOrigin-RevId: 225088316
---
 tensorflow/core/platform/cpu_info.h      | 15 ++++++++-
 tensorflow/core/platform/port_test.cc    | 12 ++++++++
 tensorflow/core/platform/posix/port.cc   | 39 ++++++++++++++++++++++--
 tensorflow/core/platform/windows/port.cc | 25 +++++++++++++++
 4 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/platform/cpu_info.h b/tensorflow/core/platform/cpu_info.h
index 6eba83224a..c9208cc755 100644
--- a/tensorflow/core/platform/cpu_info.h
+++ b/tensorflow/core/platform/cpu_info.h
@@ -32,9 +32,22 @@ namespace port {
 // Returns an estimate of the number of schedulable CPUs for this
 // process.  Usually, it's constant throughout the lifetime of a
 // process, but it might change if the underlying cluster management
-// software can change it dynamically.
+// software can change it dynamically.  If the underlying call fails, a default
+// value (e.g. `4`) may be returned.
 int NumSchedulableCPUs();
 
+// Returns the total number of CPUs on the system.  This number should
+// not change even if the underlying cluster management software may
+// change the number of schedulable CPUs.  Unlike `NumSchedulableCPUs`, if the
+// underlying call fails, an invalid value of -1 will be returned;
+// the user must check for validity.
+static constexpr int kUnknownCPU = -1;
+int NumTotalCPUs();
+
+// Returns the id of the current CPU.  Returns -1 if the current CPU cannot be
+// identified.  If successful, the return value will be in [0, NumTotalCPUs()).
+int GetCurrentCPU();
+
 // Returns an estimate of the number of hyperthreads per physical core
 // on the CPU
 int NumHyperthreadsPerCore();
diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc
index 15c3cb24f0..9d144efbfd 100644
--- a/tensorflow/core/platform/port_test.cc
+++ b/tensorflow/core/platform/port_test.cc
@@ -33,6 +33,12 @@ TEST(Port, AlignedMalloc) {
   }
 }
 
+TEST(Port, GetCurrentCPU) {
+  const int cpu = GetCurrentCPU();
+  EXPECT_GE(cpu, 0);
+  EXPECT_LT(cpu, NumTotalCPUs());
+}
+
 TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
   mutex m;
   mutex_lock l(m);
@@ -78,3 +84,9 @@ TEST(TestCPUFeature, TestFeature) {
 
 }  // namespace port
 }  // namespace tensorflow
+
+int main(int argc, char** argv) {
+  // On Linux, add: FLAGS_logtostderr = true;
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index acdd7798ea..0fac8b1a88 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -25,7 +25,14 @@ limitations under the License.
 #if defined(__linux__) && !defined(__ANDROID__)
 #include <sched.h>
 #include <sys/sysinfo.h>
+#else
+#include <sys/syscall.h>
+#endif
+
+#if !defined(__APPLE__) && (__x86_64__ || __i386__)
+#include <cpuid.h>
 #endif
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -69,6 +76,34 @@ int NumSchedulableCPUs() {
   return kDefaultCores;
 }
 
+int NumTotalCPUs() {
+  int count = absl::base_internal::NumCPUs();
+  return (count == 0) ? kUnknownCPU : count;
+}
+
+int GetCurrentCPU() {
+#if defined(__linux__) && !defined(__ANDROID__)
+  return sched_getcpu();
+#elif defined(__cpuid_count)
+  // Attempt to use cpuid on all other platforms.  If that fails, perform a
+  // syscall.
+  uint32_t eax, ebx, ecx, edx;
+  __cpuid_count(/*leaf=*/1, /*subleaf=*/0, eax, ebx, ecx, edx);
+  if ((edx & (1 << 9)) != 0) {
+    // EBX bits 24-31 are APIC ID
+    return static_cast<unsigned int>(ebx >> 24);
+  }
+#elif defined(__NR_getcpu)
+  unsigned int cpu;
+  if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) {
+    return kUnknownCPU;
+  } else {
+    return static_cast<int>(cpu);
+  }
+#endif
+  return kUnknownCPU;
+}
+
 int NumHyperthreadsPerCore() {
   static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
   return (ht_per_core > 0) ? ht_per_core : 1;
@@ -83,9 +118,7 @@ int NUMANumNodes() { return 1; }
 
 void NUMASetThreadNodeAffinity(int node) {}
 
-int NUMAGetThreadNodeAffinity() {
-  return kNUMANoAffinity;
-}
+int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
 
 void* AlignedMalloc(size_t size, int minimum_alignment) {
 #if defined(__ANDROID__)
diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc
index 911ea1902f..b902c85cdc 100644
--- a/tensorflow/core/platform/windows/port.cc
+++ b/tensorflow/core/platform/windows/port.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #endif
 
 #include <Windows.h>
+#include <processthreadsapi.h>
 #include <shlwapi.h>
 
 #include "tensorflow/core/platform/cpu_info.h"
@@ -54,6 +55,30 @@ int NumSchedulableCPUs() {
   return system_info.dwNumberOfProcessors;
 }
 
+int NumTotalCPUs() {
+  // TODO(ebrevdo): Make this more accurate.
+  //
+  // This only returns the number of processors in the current
+  // processor group; which may be undercounting if you have more than 64 cores.
+  // For that case, one needs to call
+  // GetLogicalProcessorInformationEx(RelationProcessorCore, ...) and accumulate
+  // the Size fields by iterating over the written-to buffer.  Since I can't
+  // easily test this on Windows, I'm deferring this to someone who can!
+  //
+  // If you fix this, also consider updatig GetCurrentCPU below.
+  return NumSchedulableCPUs();
+}
+
+int GetCurrentCPU() {
+  // NOTE(ebrevdo): This returns the processor number within the processor
+  // group on systems with >64 processors.  Therefore it doesn't necessarily map
+  // naturally to an index in NumSchedulableCPUs().
+  //
+  // On the plus side, this number is probably guaranteed to be within
+  // [0, NumTotalCPUs()) due to its incomplete implementation.
+  return GetCurrentProcessorNumber();
+}
+
 bool NUMAEnabled() {
   // Not yet implemented: coming soon.
   return false;
-- 
GitLab


From d846d90c5a07c8a93c35622c4f6047031a412166 Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Wed, 5 Dec 2018 21:57:38 -0800
Subject: [PATCH 387/873] Pass visible devices info down to the backend
 construction. Don't use stream executors in backend

---
 tensorflow/compiler/jit/xla_device.cc         |  6 ++-
 tensorflow/compiler/jit/xla_device.h          |  7 +++
 tensorflow/compiler/jit/xla_gpu_device.cc     | 46 +++++++++++--------
 .../compiler/xla/client/client_library.cc     | 21 +++++++--
 .../compiler/xla/client/client_library.h      | 14 +++++-
 tensorflow/compiler/xla/service/backend.cc    | 14 +++++-
 tensorflow/compiler/xla/service/backend.h     | 13 ++++++
 .../compiler/xla/service/local_service.cc     |  6 ++-
 .../compiler/xla/service/platform_util.cc     |  8 +++-
 .../compiler/xla/service/platform_util.h      |  3 +-
 tensorflow/compiler/xla/service/service.cc    | 18 +++++---
 tensorflow/compiler/xla/service/service.h     |  7 +++
 12 files changed, 125 insertions(+), 38 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc
index 4201ff91a8..77cd2f4462 100644
--- a/tensorflow/compiler/jit/xla_device.cc
+++ b/tensorflow/compiler/jit/xla_device.cc
@@ -201,7 +201,8 @@ XlaDevice::XlaDevice(const SessionOptions& session_options,
       jit_device_name_(options.compilation_device_name),
       platform_(options.platform),
       use_multiple_streams_(options.use_multiple_streams),
-      shape_representation_fn_(options.shape_representation_fn) {
+      shape_representation_fn_(options.shape_representation_fn),
+      allowed_devices_(options.allowed_devices) {
   VLOG(1) << "Created XLA device " << options.compilation_device_name << " "
           << this;
   thread_pool_.reset(new thread::ThreadPool(session_options.env, "xla_device",
@@ -234,7 +235,8 @@ xla::LocalClient* XlaDevice::client() const {
 
   // TODO(b/78468222): This can fail, at least when the backend is GPU and
   // there is no GPU on the host.
-  return xla::ClientLibrary::GetOrCreateLocalClient(platform_).ValueOrDie();
+  return xla::ClientLibrary::GetOrCreateLocalClient(platform_, allowed_devices_)
+      .ValueOrDie();
 }
 
 Allocator* XlaDevice::GetAllocator(AllocatorAttributes attr) {
diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h
index c8bb276cdb..90d544e2a1 100644
--- a/tensorflow/compiler/jit/xla_device.h
+++ b/tensorflow/compiler/jit/xla_device.h
@@ -24,6 +24,7 @@ limitations under the License.
 
 #ifndef TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_
 #define TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_
+#include <set>
 
 #include "tensorflow/compiler/jit/xla_device_context.h"
 #include "tensorflow/compiler/jit/xla_tensor.h"
@@ -123,6 +124,8 @@ class XlaDevice : public LocalDevice {
     // If padded_shape_fn is empty, a default implementation that returns
     // the logical on-device shape without padding is used.
     PaddedShapeFn padded_shape_fn;
+    // Set of allowed devices. -1 is all devices
+    std::set<int> allowed_devices = {-1};
   };
 
   // Creates a new XLA Device.
@@ -165,6 +168,7 @@ class XlaDevice : public LocalDevice {
 
   bool RequiresSyncOnCompletion() const override LOCKS_EXCLUDED(mu_);
 
+
   // A simple RAII handle. On construction the device's
   // outstanding_asynchronous_operations_ field is incremented; on destruction
   // it is decremented.
@@ -256,6 +260,9 @@ class XlaDevice : public LocalDevice {
   // completion.
   int64 outstanding_asynchronous_operations_ GUARDED_BY(mu_) = 0;
   condition_variable outstanding_asynchronous_operations_cv_;
+
+  // Set of allowed gpu devices at the time of construction.
+  std::set<int> allowed_devices_ = {-1};
 };
 
 // Builds OpKernel registrations on 'device' for the JIT operators
diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index 0191315a66..877af52dde 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -33,8 +33,34 @@ class XlaGpuDeviceFactory : public DeviceFactory {
  public:
   Status CreateDevices(const SessionOptions& options, const string& name_prefix,
                        std::vector<std::unique_ptr<Device>>* devices) override;
+  // Returns a set containing the device ids contained in visible_device_list or
+  // -1 if the string is empty.
+  static xla::StatusOr<std::set<int>> ParseVisibleDeviceList(
+      const string& visible_device_list);
 };
 
+xla::StatusOr<std::set<int>> XlaGpuDeviceFactory::ParseVisibleDeviceList(
+    const string& visible_device_list) {
+  std::set<int> gpu_ids;
+  if (visible_device_list.length() == 0) {
+    gpu_ids.insert(-1);
+    return gpu_ids;
+  }
+  const std::vector<string> visible_devices =
+      absl::StrSplit(visible_device_list, ',');
+  for (const string& platform_gpu_id_str : visible_devices) {
+    int32 platform_gpu_id;
+    if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) {
+      return errors::InvalidArgument(
+          "Could not parse entry in 'visible_device_list': '",
+          platform_gpu_id_str,
+          "'. visible_device_list = ", visible_device_list);
+    }
+    gpu_ids.insert(platform_gpu_id);
+  }
+  return gpu_ids;
+}
+
 Status XlaGpuDeviceFactory::CreateDevices(
     const SessionOptions& session_options, const string& name_prefix,
     std::vector<std::unique_ptr<Device>>* devices) {
@@ -64,24 +90,7 @@ Status XlaGpuDeviceFactory::CreateDevices(
       gpu_ids.insert(i);
     }
   } else {
-    // For loop below is copied from gpu/gpu_device.cc. It validates
-    // the visible_device_list and populates gpu_ids set.
-    const std::vector<string> visible_devices =
-        absl::StrSplit(allowed_gpus, ',');
-    for (const string& platform_gpu_id_str : visible_devices) {
-      int32 platform_gpu_id;
-      if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) {
-        return errors::InvalidArgument(
-            "Could not parse entry in 'visible_device_list': '",
-            platform_gpu_id_str, "'. visible_device_list = ", allowed_gpus);
-      }
-      if (platform_gpu_id < 0 || platform_gpu_id >= num_visible_devices) {
-        return errors::InvalidArgument(
-            "'visible_device_list' listed an invalid GPU id '", platform_gpu_id,
-            "' but visible device count is ", num_visible_devices);
-      }
-      gpu_ids.insert(platform_gpu_id);
-    }
+    gpu_ids = ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
   }
   for (int i : gpu_ids) {
     XlaDevice::Options options;
@@ -91,6 +100,7 @@ Status XlaGpuDeviceFactory::CreateDevices(
     options.device_ordinal = i;
     options.compilation_device_name = DEVICE_GPU_XLA_JIT;
     options.use_multiple_streams = true;
+    options.allowed_devices=gpu_ids;
     auto device = absl::make_unique<XlaDevice>(session_options, options);
 
     Status status = device->UseGpuDeviceInfo();
diff --git a/tensorflow/compiler/xla/client/client_library.cc b/tensorflow/compiler/xla/client/client_library.cc
index 27b7fa7b29..33d8fa9841 100644
--- a/tensorflow/compiler/xla/client/client_library.cc
+++ b/tensorflow/compiler/xla/client/client_library.cc
@@ -26,10 +26,12 @@ namespace xla {
 
 LocalClientOptions::LocalClientOptions(se::Platform* platform,
                                        int number_of_replicas,
-                                       int intra_op_parallelism_threads)
+                                       int intra_op_parallelism_threads,
+                                       std::set<int> device_set)
     : platform_(platform),
       number_of_replicas_(number_of_replicas),
-      intra_op_parallelism_threads_(intra_op_parallelism_threads) {}
+      intra_op_parallelism_threads_(intra_op_parallelism_threads),
+      allowed_devices_(device_set) {}
 
 LocalClientOptions& LocalClientOptions::set_platform(se::Platform* platform) {
   platform_ = platform;
@@ -58,6 +60,16 @@ int LocalClientOptions::intra_op_parallelism_threads() const {
   return intra_op_parallelism_threads_;
 }
 
+LocalClientOptions& LocalClientOptions::set_allowed_devices(
+    std::set<int> device_set) {
+  allowed_devices_ = device_set;
+  return *this;
+}
+
+std::set<int> LocalClientOptions::get_allowed_devices() const {
+  return allowed_devices_;
+}
+
 /* static */ ClientLibrary& ClientLibrary::Singleton() {
   static ClientLibrary* c = new ClientLibrary;
   return *c;
@@ -67,9 +79,10 @@ ClientLibrary::ClientLibrary() = default;
 ClientLibrary::~ClientLibrary() = default;
 
 /* static */ StatusOr<LocalClient*> ClientLibrary::GetOrCreateLocalClient(
-    se::Platform* platform) {
+    se::Platform* platform, const std::set<int> device_set) {
   LocalClientOptions default_options;
   default_options.set_platform(platform);
+  default_options.set_allowed_devices(device_set);
   return GetOrCreateLocalClient(default_options);
 }
 
@@ -94,7 +107,7 @@ ClientLibrary::~ClientLibrary() = default;
   service_options.set_number_of_replicas(replica_count);
   service_options.set_intra_op_parallelism_threads(
       options.intra_op_parallelism_threads());
-
+  service_options.set_allowed_devices(options.get_allowed_devices());
   auto instance = absl::make_unique<LocalInstance>();
   TF_ASSIGN_OR_RETURN(instance->service,
                       LocalService::NewService(service_options));
diff --git a/tensorflow/compiler/xla/client/client_library.h b/tensorflow/compiler/xla/client/client_library.h
index 3ad558fa53..d180c8f7ca 100644
--- a/tensorflow/compiler/xla/client/client_library.h
+++ b/tensorflow/compiler/xla/client/client_library.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 #include <vector>
+#include <set>
 
 #include "tensorflow/compiler/xla/client/compile_only_client.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
@@ -45,7 +46,8 @@ class LocalClientOptions {
  public:
   LocalClientOptions(se::Platform* platform = nullptr,
                      int number_of_replicas = 1,
-                     int intra_op_parallelism_threads = -1);
+                     int intra_op_parallelism_threads = -1,
+                     std::set<int> device_set = {-1});
 
   // Set the platform backing the service, or nullptr for the default platform.
   LocalClientOptions& set_platform(se::Platform* platform);
@@ -60,10 +62,16 @@ class LocalClientOptions {
   LocalClientOptions& set_intra_op_parallelism_threads(int num_threads);
   int intra_op_parallelism_threads() const;
 
+  // Sets the allowed_devices set for creation of stream executors.
+  LocalClientOptions& set_allowed_devices(const std::set<int> device_set);
+
+  std::set<int> get_allowed_devices() const;
+
  private:
   se::Platform* platform_;
   int number_of_replicas_;
   int intra_op_parallelism_threads_;
+  std::set<int> allowed_devices_;
 };
 
 class ClientLibrary {
@@ -73,8 +81,10 @@ class ClientLibrary {
   //
   //   platform : The platform the underlying XLA service should target. If
   //     null then default platform is used.
+  //   device_set: Set of device IDs for which the stream executor will be created
+  //   for, for the given platform.
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
-      se::Platform* platform = nullptr);
+      se::Platform* platform = nullptr, const std::set<int> device_set = {-1});
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
       const LocalClientOptions& options);
 
diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc
index 5c180cbdd4..385c5eefa0 100644
--- a/tensorflow/compiler/xla/service/backend.cc
+++ b/tensorflow/compiler/xla/service/backend.cc
@@ -57,6 +57,15 @@ int BackendOptions::intra_op_parallelism_threads() const {
   return intra_op_parallelism_threads_;
 }
 
+BackendOptions& BackendOptions::set_allowed_devices(std::set<int> device_set) {
+  allowed_devices_ = device_set;
+  return *this;
+}
+
+std::set<int> BackendOptions::get_allowed_devices() const {
+  return allowed_devices_;
+}
+
 // Define this in .cc file to avoid having to include eigen or forward declare
 // these types in the header.
 struct Backend::EigenThreadPoolWrapper {
@@ -77,7 +86,8 @@ struct Backend::EigenThreadPoolWrapper {
   se::Platform* platform = options.platform();
   TF_ASSIGN_OR_RETURN(auto compiler, Compiler::GetForPlatform(platform));
   TF_ASSIGN_OR_RETURN(auto stream_executors,
-                      PlatformUtil::GetStreamExecutors(platform));
+                      PlatformUtil::GetStreamExecutors(
+                          platform, options.get_allowed_devices()));
   TF_ASSIGN_OR_RETURN(auto transfer_manager,
                       TransferManager::GetForPlatform(platform));
   TF_ASSIGN_OR_RETURN(auto computation_placer,
@@ -172,7 +182,7 @@ StatusOr<se::StreamExecutor*> Backend::stream_executor(
         device_ordinal, stream_executors_.back()->device_ordinal());
   }
   for (auto* executor : stream_executors_) {
-    if (executor->device_ordinal() == device_ordinal) {
+    if (executor && executor->device_ordinal() == device_ordinal) {
       return executor;
     }
   }
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index a2dafbe803..26bae43c1a 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 #include <vector>
+#include <set>
 
 #include "absl/strings/str_cat.h"
 #include "absl/types/span.h"
@@ -53,9 +54,15 @@ class BackendOptions {
   BackendOptions& set_intra_op_parallelism_threads(int num_threads);
   int intra_op_parallelism_threads() const;
 
+  // Sets the allowed_devices set for creation of stream executors.
+  BackendOptions& set_allowed_devices(const std::set<int> device_set);
+
+  std::set<int> get_allowed_devices() const;
+
  private:
   se::Platform* platform_ = nullptr;
   int intra_op_parallelism_threads_ = -1;
+  std::set<int> allowed_devices_ = {-1};
 };
 
 // Class which encapsulates an XLA backend. It includes everything necessary
@@ -106,6 +113,12 @@ class Backend {
   // can be > 1).
   se::StreamExecutor* default_stream_executor() const {
     CHECK(!stream_executors_.empty());
+
+    for(se::StreamExecutor* e :stream_executors_){
+      if(e){
+        return e;
+      }
+    }
     return stream_executors_[0];
   }
 
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index 6c89700983..4b0b8fc347 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -52,8 +52,10 @@ namespace xla {
   }
 
   BackendOptions backend_options;
-  backend_options.set_platform(platform).set_intra_op_parallelism_threads(
-      options.intra_op_parallelism_threads());
+  backend_options.set_platform(platform)
+      .set_intra_op_parallelism_threads(options.intra_op_parallelism_threads())
+      .set_allowed_devices(options.get_allowed_devices());
+
   TF_ASSIGN_OR_RETURN(std::unique_ptr<Backend> backend,
                       Backend::CreateBackend(backend_options));
 
diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc
index c227106511..b8fb2047e6 100644
--- a/tensorflow/compiler/xla/service/platform_util.cc
+++ b/tensorflow/compiler/xla/service/platform_util.cc
@@ -205,7 +205,8 @@ static bool IsDeviceSupported(se::StreamExecutor* executor) {
 }
 
 /* static */ StatusOr<std::vector<se::StreamExecutor*>>
-PlatformUtil::GetStreamExecutors(se::Platform* platform) {
+PlatformUtil::GetStreamExecutors(se::Platform* platform,
+                                 std::set<int> allowed_devices) {
   int device_count = platform->VisibleDeviceCount();
   if (device_count <= 0) {
     return NotFound("no %s devices found", platform->Name());
@@ -226,6 +227,11 @@ PlatformUtil::GetStreamExecutors(se::Platform* platform) {
     tensorflow::thread::ThreadPool thread_pool(
         tensorflow::Env::Default(), "device_initialization", device_count);
     for (int i = 0; i < device_count; ++i) {
+      if (allowed_devices.count(-1) == 0 && allowed_devices.count(i) == 0) {
+        VLOG(1) << "Skipping stream executor for device " << i
+                << " since it is not in the visible device list";
+        continue;
+      }
       thread_pool.Schedule([platform, i, &stream_executors]() {
         VLOG(1) << "Started device init " << i;
         se::StreamExecutorConfig config;
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index 571451ba43..04af762fb6 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <string>
 #include <vector>
+#include <set>
 
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -63,7 +64,7 @@ class PlatformUtil {
   //
   // If the platform has no visible devices, a not-found error is returned.
   static StatusOr<std::vector<se::StreamExecutor*>> GetStreamExecutors(
-      se::Platform* platform);
+      se::Platform* platform, std::set<int> allowed_devices = {-1});
 
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(PlatformUtil);
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 5ec7fe2ade..b9e5be486a 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -113,6 +113,15 @@ int ServiceOptions::intra_op_parallelism_threads() const {
   return intra_op_parallelism_threads_;
 }
 
+ServiceOptions& ServiceOptions::set_allowed_devices(std::set<int> device_set) {
+  allowed_devices_ = device_set;
+  return *this;
+}
+
+std::set<int> ServiceOptions::get_allowed_devices() const {
+  return allowed_devices_;
+}
+
 /* static */ StatusOr<std::unique_ptr<Service>> Service::NewService(
     se::Platform* platform) {
   ServiceOptions default_options;
@@ -129,6 +138,7 @@ int ServiceOptions::intra_op_parallelism_threads() const {
   }
   BackendOptions backend_options;
   backend_options.set_platform(platform);
+  backend_options.set_allowed_devices(options.get_allowed_devices());
   TF_ASSIGN_OR_RETURN(execute_backend, Backend::CreateBackend(backend_options));
 
   std::unique_ptr<Service> service(
@@ -150,17 +160,13 @@ Service::Service(const ServiceOptions& options,
     LOG(INFO) << StrFormat(
         "XLA service %p executing computations on platform %s. Devices:", this,
         execute_backend_->platform()->Name());
+    auto stream_executors=execute_backend_->stream_executors();
     for (int i = 0; i < execute_backend_->device_count(); ++i) {
-      if (execute_backend_->device_ordinal_supported(i)) {
-        se::StreamExecutor* executor =
-            execute_backend_->stream_executor(i).ValueOrDie();
+        se::StreamExecutor* executor =stream_executors.at(i);
         const auto& description = executor->GetDeviceDescription();
         LOG(INFO) << StrFormat("  StreamExecutor device (%d): %s, %s", i,
                                description.name(),
                                description.platform_version());
-      } else {
-        LOG(INFO) << StrFormat("  StreamExecutor device (%d) not supported", i);
-      }
     }
   } else {
     VLOG(1) << "XLA compile-only service constructed";
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 11e1a79552..95a504ee30 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 #include <vector>
+#include <set>
 
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/debug_options_flags.h"
@@ -61,10 +62,16 @@ class ServiceOptions {
   ServiceOptions& set_intra_op_parallelism_threads(int num_threads);
   int intra_op_parallelism_threads() const;
 
+  // Sets the allowed_devices set for creation of stream executors.
+  ServiceOptions& set_allowed_devices(const std::set<int> device_set);
+
+  std::set<int> get_allowed_devices() const;
+
  private:
   se::Platform* platform_ = nullptr;
   int number_of_replicas_ = 1;
   int intra_op_parallelism_threads_ = -1;
+  std::set<int> allowed_devices_ = {-1};
 };
 
 // The XLA service object, which is the same across all platforms. It maintains
-- 
GitLab


From 184223ec1652d0d0206e56d062fa12c4c0d9a5a2 Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Tue, 11 Dec 2018 16:20:05 -0800
Subject: [PATCH 388/873] [TF:XLA] Handle more patterns in ArCrsCombiner, and
 handle sequences of patterns.

Now, we optimize any sequence of the form:
AR [Bitcast|Transpose|Reshape|Convert|Multiply|Add|Subtract]* CRS

PiperOrigin-RevId: 225090998
---
 .../compiler/xla/service/ar_crs_combiner.cc   | 145 +++++----
 .../compiler/xla/service/ar_crs_combiner.h    |   9 +-
 .../xla/service/ar_crs_combiner_test.cc       | 306 +++++++++++++++---
 .../compiler/xla/service/hlo_instruction.cc   |   4 +
 .../compiler/xla/service/hlo_instruction.h    |   5 +-
 5 files changed, 357 insertions(+), 112 deletions(-)

diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.cc b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
index 362bc44a1c..47d2c7e357 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.cc
@@ -36,24 +36,40 @@ namespace {
 
 namespace m = match;
 
-// If the argument instruction is a CRS in the sequence
-// AR -> Convert -> Add -> CRS
-// then return the AR in the sequence.
-// TODO(b/117554291): Rewrite this to recognize more general patterns,
-// not just the specific one of AR -> Add -> Convert -> CRS.
-absl::optional<HloInstruction*> MatchesArCrsPattern(
-    HloInstruction* instruction) {
-  HloInstruction *ar, *convert, *add, *crs;
-  if (Match(instruction,
-            m::CrossReplicaSum(
-                &crs, m::Add(&add, m::Op(),
-                             m::Convert(&convert,
-                                        m::CrossReplicaSum(&ar, m::Op()))))) &&
-      ar->users().size() == 1 && ar->shape().element_type() == BF16 &&
-      convert->shape().element_type() == F32 && !crs->all_reduce_id()) {
-    return ar;
+// Returns true iff the argument instruction is an AllReduce, followed by a
+// certain sequence of instructions and then a CRS. It must be possible to move
+// the AR past each instruction in the sequence.
+bool MatchesArCrsPattern(HloInstruction* instruction) {
+  auto can_ar_move_past_instruction = [](HloInstruction* instruction) -> bool {
+    if (instruction->user_count() != 1) {
+      return false;
+    }
+    auto opcode = instruction->opcode();
+    return opcode == HloOpcode::kBitcast || opcode == HloOpcode::kTranspose ||
+           opcode == HloOpcode::kReshape || opcode == HloOpcode::kConvert ||
+           opcode == HloOpcode::kAdd || opcode == HloOpcode::kSubtract ||
+           opcode == HloOpcode::kMultiply;
+  };
+
+  auto computation_is_addition = [](HloComputation* c) {
+    return c->instruction_count() == 3 &&
+           Match(c->root_instruction(), m::Add(m::Parameter(), m::Parameter()));
+  };
+
+  if (!instruction->IsCrossModuleAllReduce() ||
+      !computation_is_addition(instruction->called_computations()[0]) ||
+      instruction->user_count() != 1) {
+    return false;
   }
-  return absl::optional<HloInstruction*>();
+  auto next = instruction->users()[0];
+  while (!next->IsCrossReplicaAllReduce()) {
+    if (can_ar_move_past_instruction(next)) {
+      next = next->users()[0];
+    } else {
+      return false;
+    }
+  }
+  return computation_is_addition(next->called_computations()[0]);
 }
 
 }  // namespace
@@ -195,9 +211,8 @@ bool ArCrsCombiner::InstructionsComputeSameValue(
 void ArCrsCombiner::GroupAllReducesById(HloModule* module) {
   for (HloComputation* computation : module->MakeNonfusionComputations()) {
     for (HloInstruction* instruction : computation->instructions()) {
-      auto ar = MatchesArCrsPattern(instruction);
-      if (ar) {
-        all_reduce_map_[*((*ar)->all_reduce_id())].push_back(*ar);
+      if (MatchesArCrsPattern(instruction)) {
+        all_reduce_map_[*(instruction->all_reduce_id())].push_back(instruction);
       }
     }
   }
@@ -205,21 +220,23 @@ void ArCrsCombiner::GroupAllReducesById(HloModule* module) {
 
 void ArCrsCombiner::KeepProvablyEqualInstructionGroups() {
   for (auto it : all_reduce_map_) {
+    auto all_reduce_id = it.first;
     auto instruction_vec = it.second;
     CHECK_EQ(instruction_vec.size(), num_spatial_partitions_);
-
     auto instr_0 = instruction_vec[0];
-    auto add_0 = instr_0->users()[0]->users()[0];
-    CHECK_EQ(HloOpcode::kAdd, add_0->opcode());
-
     for (int i = 1; i < instruction_vec.size(); ++i) {
       auto instr_i = instruction_vec[i];
-      auto add_i = instr_i->users()[0]->users()[0];
-      CHECK_EQ(HloOpcode::kAdd, add_i->opcode());
+      auto next_0 = instr_0->users()[0];
+      auto next_i = instr_i->users()[0];
       absl::flat_hash_map<int64, int64> visited_pairs;
-      if (!InstructionsComputeSameValue(add_0, add_i, &visited_pairs)) {
-        all_reduce_map_.erase(it.first);
-      }
+      do {
+        if (!InstructionsComputeSameValue(next_0, next_i, &visited_pairs)) {
+          all_reduce_map_.erase(all_reduce_id);
+          break;
+        }
+        next_0 = next_0->users()[0];
+        next_i = next_i->users()[0];
+      } while (!next_0->IsCrossReplicaAllReduce());
     }
   }
 }
@@ -228,47 +245,51 @@ StatusOr<bool> ArCrsCombiner::RewriteGraph() {
   if (all_reduce_map_.empty()) {
     return false;
   }
-
-  auto computation_is_addition = [](HloComputation* c) {
-    return c->instruction_count() == 3 &&
-           Match(c->root_instruction(), m::Add(m::Parameter(), m::Parameter()));
-  };
-
   for (auto it : all_reduce_map_) {
     auto instruction_vec = it.second;
     for (auto all_reduce : instruction_vec) {
       auto parent_computation = all_reduce->parent();
-      auto convert = all_reduce->users()[0];
-      auto add = convert->users()[0];
-      auto crs = add->users()[0];
-
-      if (!computation_is_addition(all_reduce->called_computations()[0]) ||
-          !computation_is_addition(crs->called_computations()[0])) {
-        continue;
+      auto all_reduce_id = all_reduce->all_reduce_id();
+      auto prev = all_reduce->mutable_operand(0);
+      auto next = all_reduce->users()[0];
+      TF_CHECK_OK(all_reduce->ReplaceUseWith(next, prev));
+      TF_CHECK_OK(parent_computation->RemoveInstruction(all_reduce));
+      while (!next->IsCrossReplicaAllReduce()) {
+        switch (next->opcode()) {
+          case HloOpcode::kBitcast:
+          case HloOpcode::kTranspose:
+          case HloOpcode::kReshape:
+          case HloOpcode::kConvert:
+          case HloOpcode::kMultiply:
+            break;
+          case HloOpcode::kAdd:
+          case HloOpcode::kSubtract: {
+            auto other_operand = (next->operands()[0] == prev)
+                                     ? next->operands()[1]
+                                     : next->operands()[0];
+            // To move the AR past the addition/subtraction, we need to divide
+            // other_operand by the number of spatial partitions.
+            auto shape = other_operand->shape();
+            Literal lit(shape);
+            lit.PopulateWithValue<float>(num_spatial_partitions_);
+            auto divisor = parent_computation->AddInstruction(
+                HloInstruction::CreateConstant(lit.Clone()));
+            auto division =
+                parent_computation->AddInstruction(HloInstruction::CreateBinary(
+                    shape, HloOpcode::kDivide, other_operand, divisor));
+            TF_CHECK_OK(other_operand->ReplaceUseWith(next, division));
+            break;
+          }
+          default:
+            LOG(FATAL) << "Unexpected instruction: " << next->ToShortString();
+        }
+        prev = next;
+        next = next->users()[0];
       }
-      HloInstruction* other_summand = (add->operands()[0] == convert)
-                                          ? add->operands()[1]
-                                          : add->operands()[0];
-      // To move the AR past the addition, we need to divide other_summand by
-      // the number of spatial partitions.
-      CHECK_EQ(all_reduce->user_count(), 1);
-      TF_CHECK_OK(
-          all_reduce->ReplaceAllUsesWith(all_reduce->mutable_operand(0)));
-      auto shape = other_summand->shape();
-      Literal lit(shape);
-      lit.PopulateWithValue<float>(num_spatial_partitions_);
-      auto divisor = parent_computation->AddInstruction(
-          HloInstruction::CreateConstant(lit.Clone()));
-      auto division =
-          parent_computation->AddInstruction(HloInstruction::CreateBinary(
-              shape, HloOpcode::kDivide, other_summand, divisor));
-      TF_CHECK_OK(other_summand->ReplaceUseWith(add, division));
       // The AllReduce and the CRS are combined to an all-core AllReduce.
-      crs->set_all_reduce_id(all_reduce->all_reduce_id());
-      TF_CHECK_OK(parent_computation->RemoveInstruction(all_reduce));
+      next->set_all_reduce_id(all_reduce_id);
     }
   }
-
   return true;
 }
 
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.h b/tensorflow/compiler/xla/service/ar_crs_combiner.h
index f6a7ef76ec..6be7e1002d 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.h
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.h
@@ -25,9 +25,12 @@ limitations under the License.
 
 namespace xla {
 
-// Combine an AllReduce and a CrossReplicaSum when they are close to each other
-// in the graph, to use an efficient CrossReplicaSum implementation that
-// fully utilizes the interconnect bandwidth.
+// When the HLO graph contains an AllReduce, followed by some simple linear
+// operations, followed by a CrossReplicaSum, we can combine the AR and the CRS,
+// to use an efficient CrossReplicaSum implementation that fully utilizes the
+// interconnect bandwidth.
+// Such sequences appear in spatially partitioned models.
+// This pass must run right after spatial partitioning.
 class ArCrsCombiner : public HloModulePass {
  public:
   ArCrsCombiner(int num_spatial_partitions)
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
index 10171835d8..2f7a53bfc8 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
@@ -326,11 +326,27 @@ ENTRY %WhileLoop () -> (f32[2,2], f32[2,2]) {
   EXPECT_FALSE(ArCrsCombiner::TestInstructionsComputeSameValue(i1, i2));
 }
 
-TEST_F(ArCrsCombinerTest, RewritePatternArConvertAddCrs) {
+void CompareReplicaGroups(const std::vector<ReplicaGroup>& groups_before,
+                          const std::vector<ReplicaGroup>& groups_after) {
+  ASSERT_EQ(groups_before.size(), groups_after.size());
+  for (int i = 0; i < groups_before.size(); ++i) {
+    // Somewhat verbose way to compare the replica_ids, because EqualsProto
+    // is not available in the open-source build.
+    auto group_before = groups_before[i];
+    std::vector<int64> ids_before(group_before.replica_ids().begin(),
+                                  group_before.replica_ids().end());
+    auto group_after = groups_after[i];
+    std::vector<int64> ids_after(group_after.replica_ids().begin(),
+                                 group_after.replica_ids().end());
+    EXPECT_EQ(ids_before, ids_after);
+  }
+}
+
+TEST_F(ArCrsCombinerTest, RewriteArConvertCrs) {
   const char* module_str = R"(
 HloModule foobar
 
-%binary_add (a: bf16[], b: bf16[]) -> bf16[] {
+%sum.bf16 (a: bf16[], b: bf16[]) -> bf16[] {
   %a = bf16[] parameter(0)
   %b = bf16[] parameter(1)
   ROOT %add = bf16[] add(%a, %b)
@@ -342,48 +358,257 @@ HloModule foobar
   ROOT %add = f32[] add(%x, %y)
 }
 
-ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
-  %p = f32[2,2] parameter(0)
-  %constant.bf16 = bf16[2,2] constant(bf16[2,2] {{1, 2}, {3, 4}})
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+ENTRY %entrycomp (p: bf16[]) -> (f32[], f32[]) {
+  %p = bf16[] parameter(0)
+
+  %cross-replica-sum.ar.1 = bf16[]
+      cross-replica-sum(%p),
+      replica_groups={{0},{1}},
+      all_reduce_id=1,
+      to_apply=%sum.bf16,
+      sharding={maximal device=0}
+  %convert.1 = f32[]
+      convert(%cross-replica-sum.ar.1),
+      sharding={maximal device=0}
+  %cross-replica-sum.1 = f32[]
+      cross-replica-sum(%convert.1),
+      replica_groups={{0,1}},
+      to_apply=%sum.f32,
+      sharding={maximal device=0}
+
+  %cross-replica-sum.ar.2 = bf16[]
+      cross-replica-sum(%p),
+      replica_groups={{0},{1}},
+      all_reduce_id=1,
+      to_apply=%sum.bf16,
+      sharding={maximal device=1}
+  %convert.2 = f32[]
+      convert(%cross-replica-sum.ar.2),
+      sharding={maximal device=1}
+  %cross-replica-sum.2 = f32[]
+      cross-replica-sum(%convert.2),
+      replica_groups={{0,1}},
+      to_apply=%sum.f32,
+      sharding={maximal device=1}
+
+  ROOT %tuple = (f32[], f32[])
+      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      sharding={{maximal device=0}, {maximal device=1}}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  auto crs_before =
+      module->entry_computation()->root_instruction()->operands()[0];
+  auto replica_groups_before = crs_before->replica_groups();
+  ArCrsCombiner combiner(2);
+  auto changed = combiner.Run(module.get()).ValueOrDie();
+  EXPECT_TRUE(changed);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::CrossReplicaSum(op::Convert(op::Parameter())),
+                        op::CrossReplicaSum(op::Convert(op::Parameter()))));
+  auto crs_after =
+      module->entry_computation()->root_instruction()->operands()[0];
+  auto replica_groups_after = crs_after->replica_groups();
+  CompareReplicaGroups(replica_groups_before, replica_groups_after);
+}
+
+TEST_F(ArCrsCombinerTest, RewriteArBitcastCrs) {
+  const char* module_str = R"(
+HloModule foobar
+
+%sum.1 (a: f32[2,1], b: f32[2,1]) -> f32[2,1] {
+  %a = f32[2,1] parameter(0)
+  %b = f32[2,1] parameter(1)
+  ROOT %add = f32[2,1] add(%a, %b)
+}
+
+%sum.2 (x: f32[2], y: f32[2]) -> f32[2] {
+  %x = f32[2] parameter(0)
+  %y = f32[2] parameter(1)
+  ROOT %add = f32[2] add(%x, %y)
+}
+
+ENTRY %entrycomp (p: f32[2,1]) -> (f32[2], f32[2]) {
+  %p = f32[2,1] parameter(0)
+
+  %cross-replica-sum.ar.1 = f32[2,1]
+      cross-replica-sum(%p),
+      replica_groups={{0},{1}},
+      all_reduce_id=1,
+      to_apply=%sum.1,
+      sharding={maximal device=0}
+  %bitcast.1 = f32[2]{0} bitcast(f32[2,1]{1,0} %cross-replica-sum.ar.1)
+  %cross-replica-sum.1 = f32[2]
+      cross-replica-sum(%bitcast.1),
+      replica_groups={{0,1}},
+      to_apply=%sum.2,
+      sharding={maximal device=0}
+
+  %cross-replica-sum.ar.2 = f32[2,1]
+      cross-replica-sum(%p),
+      replica_groups={{0},{1}},
+      all_reduce_id=1,
+      to_apply=%sum.1,
+      sharding={maximal device=1}
+  %bitcast.2 = f32[2]{0} bitcast(f32[2,1]{1,0} %cross-replica-sum.ar.2)
+  %cross-replica-sum.2 = f32[2]
+      cross-replica-sum(%bitcast.2),
+      replica_groups={{0,1}},
+      to_apply=%sum.2,
+      sharding={maximal device=1}
+
+  ROOT %tuple = (f32[], f32[])
+      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      sharding={{maximal device=0}, {maximal device=1}}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  auto crs_before =
+      module->entry_computation()->root_instruction()->operands()[0];
+  auto replica_groups_before = crs_before->replica_groups();
+  ArCrsCombiner combiner(2);
+  auto changed = combiner.Run(module.get()).ValueOrDie();
+  EXPECT_TRUE(changed);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Tuple(op::CrossReplicaSum(op::Bitcast(op::Parameter())),
+                        op::CrossReplicaSum(op::Bitcast(op::Parameter()))));
+  auto crs_after =
+      module->entry_computation()->root_instruction()->operands()[0];
+  auto replica_groups_after = crs_after->replica_groups();
+  CompareReplicaGroups(replica_groups_before, replica_groups_after);
+}
 
-  %cross-replica-sum.ar.1 = bf16[2,2]
+TEST_F(ArCrsCombinerTest, RewriteArMultiplyCrs) {
+  const char* module_str = R"(
+HloModule foobar
+
+%sum.f32 (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+
+ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
+  %p = f32[] parameter(0)
+  %constant.f32 = f32[] constant(123)
+
+  %cross-replica-sum.ar.1 = f32[]
+      cross-replica-sum(%p),
+      replica_groups={{0},{1}},
+      all_reduce_id=1,
+      to_apply=%sum.f32,
+      sharding={maximal device=0}
+  %multiply.1 = f32[]
+      multiply(%cross-replica-sum.ar.1, %constant.f32),
+      sharding={maximal device=0}
+  %cross-replica-sum.1 = f32[]
+      cross-replica-sum(%multiply.1),
+      replica_groups={{0,1}},
+      to_apply=%sum.f32,
+      sharding={maximal device=0}
+
+  %cross-replica-sum.ar.2 = f32[]
+      cross-replica-sum(%p),
+      replica_groups={{0},{1}},
+      all_reduce_id=1,
+      to_apply=%sum.f32,
+      sharding={maximal device=1}
+  %multiply.2 = f32[]
+      multiply(%cross-replica-sum.ar.2, %constant.f32),
+      sharding={maximal device=1}
+  %cross-replica-sum.2 = f32[]
+      cross-replica-sum(%multiply.2),
+      replica_groups={{0,1}},
+      to_apply=%sum.f32,
+      sharding={maximal device=1}
+
+  ROOT %tuple = (f32[], f32[])
+      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      sharding={{maximal device=0}, {maximal device=1}}
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
+                          ParseAndReturnVerifiedModule(module_str));
+  auto crs_before =
+      module->entry_computation()->root_instruction()->operands()[0];
+  auto replica_groups_before = crs_before->replica_groups();
+  ArCrsCombiner combiner(2);
+  auto changed = combiner.Run(module.get()).ValueOrDie();
+  EXPECT_TRUE(changed);
+  EXPECT_THAT(
+      module->entry_computation()->root_instruction(),
+      op::Tuple(
+          op::CrossReplicaSum(op::Multiply(op::Parameter(), op::Constant())),
+          op::CrossReplicaSum(op::Multiply(op::Parameter(), op::Constant()))));
+  auto crs_after =
+      module->entry_computation()->root_instruction()->operands()[0];
+  auto replica_groups_after = crs_after->replica_groups();
+  CompareReplicaGroups(replica_groups_before, replica_groups_after);
+}
+
+TEST_F(ArCrsCombinerTest, RewriteArConvertAddCrs) {
+  const char* module_str = R"(
+HloModule foobar
+
+%sum.bf16 (a: bf16[], b: bf16[]) -> bf16[] {
+  %a = bf16[] parameter(0)
+  %b = bf16[] parameter(1)
+  ROOT %add = bf16[] add(%a, %b)
+}
+
+%sum.f32 (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(%x, %y)
+}
+
+ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
+  %p = f32[] parameter(0)
+  %constant.bf16 = bf16[] constant(1)
+  %constant.f32 = f32[] constant(2)
+
+  %cross-replica-sum.ar.1 = bf16[]
       cross-replica-sum(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
-      to_apply=%binary_add,
+      to_apply=%sum.bf16,
       sharding={maximal device=0}
-  %convert.1 = f32[2,2]
+  %convert.1 = f32[]
       convert(%cross-replica-sum.ar.1),
       sharding={maximal device=0}
-  %add.1 = f32[2,2]
+  %add.1 = f32[]
       add(%constant.f32, %convert.1),
       sharding={maximal device=0}
-  %cross-replica-sum.1 = f32[2,2]
+  %cross-replica-sum.1 = f32[]
       cross-replica-sum(%add.1),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=0}
 
-  %cross-replica-sum.ar.2 = bf16[2,2]
+  %cross-replica-sum.ar.2 = bf16[]
       cross-replica-sum(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
-      to_apply=%binary_add,
+      to_apply=%sum.bf16,
       sharding={maximal device=1}
-  %convert.2 = f32[2,2]
+  %convert.2 = f32[]
       convert(%cross-replica-sum.ar.2),
       sharding={maximal device=1}
-  %add.2 = f32[2,2]
+  %add.2 = f32[]
       add(%constant.f32, %convert.2),
       sharding={maximal device=1}
-  %cross-replica-sum.2 = f32[2,2]
+  %cross-replica-sum.2 = f32[]
       cross-replica-sum(%add.2),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=1}
 
-  ROOT %tuple = (f32[2,2], f32[2,2])
+  ROOT %tuple = (f32[], f32[])
       tuple(%cross-replica-sum.1, %cross-replica-sum.2),
       sharding={{maximal device=0}, {maximal device=1}}
 }
@@ -407,25 +632,14 @@ ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
   auto crs_after =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();
-  ASSERT_EQ(replica_groups_before.size(), replica_groups_after.size());
-  for (int i = 0; i < replica_groups_before.size(); ++i) {
-    // Somewhat verbose way to compare the replica_ids, because EqualsProto
-    // is not available in the open-source build.
-    auto group_before = replica_groups_before[i];
-    std::vector<int64> ids_before(group_before.replica_ids().begin(),
-                                  group_before.replica_ids().end());
-    auto group_after = replica_groups_after[i];
-    std::vector<int64> ids_after(group_after.replica_ids().begin(),
-                                 group_after.replica_ids().end());
-    EXPECT_EQ(ids_before, ids_after);
-  }
+  CompareReplicaGroups(replica_groups_before, replica_groups_after);
 }
 
 TEST_F(ArCrsCombinerTest, OtherSummandNotTheSameDontRewrite) {
   const char* module_str = R"(
 HloModule foobar
 
-%binary_add (a: bf16[], b: bf16[]) -> bf16[] {
+%sum.bf16 (a: bf16[], b: bf16[]) -> bf16[] {
   %a = bf16[] parameter(0)
   %b = bf16[] parameter(1)
   ROOT %add = bf16[] add(%a, %b)
@@ -437,49 +651,49 @@ HloModule foobar
   ROOT %add = f32[] add(%x, %y)
 }
 
-ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
-  %p = f32[2,2] parameter(0)
-  %constant.bf16 = bf16[2,2] constant(bf16[2,2] {{1, 2}, {3, 4}})
-  %constant.f32.1 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
-  %constant.f32.2 = f32[2,2] constant(f32[2,2] {{3, 4}, {5, 6}})
+ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
+  %p = f32[] parameter(0)
+  %constant.bf16 = bf16[] constant(1)
+  %constant.f32.1 = f32[] constant(2)
+  %constant.f32.2 = f32[] constant(3)
 
-  %cross-replica-sum.ar.1 = bf16[2,2]
+  %cross-replica-sum.ar.1 = bf16[]
       cross-replica-sum(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
-      to_apply=%binary_add,
+      to_apply=%sum.bf16,
       sharding={maximal device=0}
-  %convert.1 = f32[2,2]
+  %convert.1 = f32[]
       convert(%cross-replica-sum.ar.1),
       sharding={maximal device=0}
-  %add.1 = f32[2,2]
+  %add.1 = f32[]
       add(%constant.f32.1, %convert.1),
       sharding={maximal device=0}
-  %cross-replica-sum.1 = f32[2,2]
+  %cross-replica-sum.1 = f32[]
       cross-replica-sum(%add.1),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=0}
 
-  %cross-replica-sum.ar.2 = bf16[2,2]
+  %cross-replica-sum.ar.2 = bf16[]
       cross-replica-sum(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
-      to_apply=%binary_add,
+      to_apply=%sum.bf16,
       sharding={maximal device=1}
-  %convert.2 = f32[2,2]
+  %convert.2 = f32[]
       convert(%cross-replica-sum.ar.2),
       sharding={maximal device=1}
-  %add.2 = f32[2,2]
+  %add.2 = f32[]
       add(%constant.f32.2, %convert.2),
       sharding={maximal device=1}
-  %cross-replica-sum.2 = f32[2,2]
+  %cross-replica-sum.2 = f32[]
       cross-replica-sum(%add.2),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=1}
 
-  ROOT %tuple = (f32[2,2], f32[2,2])
+  ROOT %tuple = (f32[], f32[])
       tuple(%cross-replica-sum.1, %cross-replica-sum.2),
       sharding={{maximal device=0}, {maximal device=1}}
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 152a451c18..c57d9c1e86 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2060,6 +2060,10 @@ bool HloInstruction::IsCrossModuleAllReduce() const {
   return opcode() == HloOpcode::kCrossReplicaSum && all_reduce_id();
 }
 
+bool HloInstruction::IsCrossReplicaAllReduce() const {
+  return opcode() == HloOpcode::kCrossReplicaSum && !all_reduce_id();
+}
+
 string HloInstruction::ToStringWithCanonicalNameMap(
     const HloPrintOptions& options,
     CanonicalNameMap* canonical_name_map) const {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index a54716217d..a312b6bf0d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1174,9 +1174,12 @@ class HloInstruction {
   // Returns true if this instruction is elementwise on all its operands.
   bool IsElementwise() const;
 
-  // Returns true if this is an cross module all-reduce instrucion.
+  // Returns true if this is a cross module all-reduce instruction.
   bool IsCrossModuleAllReduce() const;
 
+  // Returns true if this is a cross-replica all-reduce instruction.
+  bool IsCrossReplicaAllReduce() const;
+
   // Returns true if this elementwise instruction implicitly broadcasts operand
   // `operand_idx`.
   //
-- 
GitLab


From 33bc0b978858bafa56cce5679dc41f4ab408b77c Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 11 Dec 2018 16:20:16 -0800
Subject: [PATCH 389/873] Internal change.

PiperOrigin-RevId: 225091038
---
 third_party/gpus/crosstool/BUILD.tpl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl
index c8812fab33..1260b265ab 100644
--- a/third_party/gpus/crosstool/BUILD.tpl
+++ b/third_party/gpus/crosstool/BUILD.tpl
@@ -22,6 +22,7 @@ cc_toolchain_suite(
         "local|compiler": ":cc-compiler-local",
         "darwin|compiler": ":cc-compiler-darwin",
         "x64_windows|msvc-cl": ":cc-compiler-windows",
+        "x64_windows": ":cc-compiler-windows",
     },
 )
 
@@ -41,6 +42,7 @@ cc_toolchain(
     # last on the command line and contain all shared libraries to link, so all
     # regular options will be left of them.
     supports_param_files = 1,
+    toolchain_identifier = "local_linux",
 )
 
 cc_toolchain(
@@ -55,6 +57,7 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = ":empty",
     supports_param_files = 0,
+    toolchain_identifier = "local_darwin",
 )
 
 cc_toolchain(
@@ -69,6 +72,7 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = ":empty",
     supports_param_files = 1,
+    toolchain_identifier = "local_windows",
 )
 
 filegroup(
-- 
GitLab


From bafb8747983fbcf186ffb063ed39dbb0a18e3c8e Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Tue, 11 Dec 2018 16:24:24 -0800
Subject: [PATCH 390/873] Improve CUDA runtime dependencies search.

tensorflow::CudaRoot() now may return multiple possible locations of the CUDA root.
PiperOrigin-RevId: 225091635
---
 .../xla/service/gpu/nvptx_compiler.cc         | 82 ++++++++++---------
 .../compiler/xla/service/gpu/nvptx_compiler.h |  2 +-
 tensorflow/core/BUILD                         | 16 +---
 .../core/platform/cuda_libdevice_path.cc      | 26 ------
 .../core/platform/cuda_libdevice_path.h       | 10 +--
 .../core/platform/cuda_libdevice_path_test.cc | 35 --------
 .../platform/default/cuda_libdevice_path.cc   |  5 +-
 7 files changed, 53 insertions(+), 123 deletions(-)
 delete mode 100644 tensorflow/core/platform/cuda_libdevice_path.cc
 delete mode 100644 tensorflow/core/platform/cuda_libdevice_path_test.cc

diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index f3e17d8882..60f2116e60 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -108,27 +108,33 @@ namespace {
 
 namespace tracing = tensorflow::tracing;
 
-// Returns the directory containing nvvm libdevice files.  config_cuda_data_dir
-// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
-// HloModule being compiled.
-string GetLibdeviceDir(const string& config_cuda_data_dir) {
-  std::vector<string> potential_libdevice_dirs;
-  if (!config_cuda_data_dir.empty()) {
-    potential_libdevice_dirs.push_back(config_cuda_data_dir);
-  }
-  potential_libdevice_dirs.push_back(tensorflow::LibdeviceRoot());
-
-  // Tries all potential libdevice directories in the order they are inserted.
-  // Returns the first directory that exists in the file system.
-  for (const string& potential_libdevice_dir : potential_libdevice_dirs) {
-    if (tensorflow::Env::Default()->IsDirectory(potential_libdevice_dir).ok()) {
-      VLOG(2) << "Found libdevice dir " << potential_libdevice_dir;
-      return potential_libdevice_dir;
+// Returns a vector of potential locations of the CUDA root directory.
+std::vector<string> GetCudaRootCandidates(
+    const HloModuleConfig& hlo_module_config) {
+  std::vector<string> potential_cuda_roots = tensorflow::CandidateCudaRoots();
+
+  // CUDA location explicitly specified by user via --xla_gpu_cuda_data_dir has
+  // highest priority.
+  string xla_gpu_cuda_data_dir =
+      hlo_module_config.debug_options().xla_gpu_cuda_data_dir();
+  if (!xla_gpu_cuda_data_dir.empty()) {
+    potential_cuda_roots.insert(potential_cuda_roots.begin(),
+                                xla_gpu_cuda_data_dir);
+  }
+  return potential_cuda_roots;
+}
+
+// Returns the directory containing nvvm libdevice files.
+string GetLibdeviceDir(const HloModuleConfig& hlo_module_config) {
+  for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
+    string libdevice_dir =
+        tensorflow::io::JoinPath(cuda_root, "nvvm", "libdevice");
+    VLOG(2) << "Looking for libdevice at " << libdevice_dir;
+    if (tensorflow::Env::Default()->IsDirectory(libdevice_dir).ok()) {
+      VLOG(2) << "Found libdevice dir " << libdevice_dir;
+      return libdevice_dir;
     }
-    VLOG(2) << "Unable to find potential libdevice dir "
-            << potential_libdevice_dir;
   }
-
   LOG(WARNING) << "Unable to find libdevice dir. Using '.'";
   // Last resort: maybe in the current folder.
   return ".";
@@ -478,14 +484,19 @@ void WarnIfBadDriverJITVersion() {
 
 // Compiles the given PTX string using ptxas and returns the resulting machine
 // code (i.e. a cubin) as a byte array.
-StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
-                                        int cc_minor,
-                                        bool disable_ptx_optimizations) {
+StatusOr<std::vector<uint8>> CompilePtx(
+    const string& ptx, int cc_major, int cc_minor,
+    const HloModuleConfig& hlo_module_config) {
   tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
-  const string ptxas_path =
-      tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
-  VLOG(2) << "Checking ptxas at " << ptxas_path;
   auto env = tensorflow::Env::Default();
+  string ptxas_path;
+  for (const string& cuda_root : GetCudaRootCandidates(hlo_module_config)) {
+    ptxas_path = tensorflow::io::JoinPath(cuda_root, "bin", "ptxas");
+    VLOG(2) << "Looking for ptxas at " << ptxas_path;
+    if (env->FileExists(ptxas_path).ok()) {
+      break;
+    }
+  }
   TF_RETURN_IF_ERROR(env->FileExists(ptxas_path));
   VLOG(2) << "Using ptxas at " << ptxas_path;
 
@@ -520,7 +531,7 @@ StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
   if (VLOG_IS_ON(2)) {
     ptxas_args.push_back("-v");
   }
-  if (disable_ptx_optimizations) {
+  if (hlo_module_config.debug_options().xla_gpu_disable_ptxas_optimizations()) {
     ptxas_args.push_back("-O0");
   }
   ptxas_info_dumper.SetProgram(ptxas_path, ptxas_args);
@@ -685,12 +696,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
     // Find the directory containing libdevice.  To avoid searching for it every
     // time, we have a one-element cache, keyed on the module's config's
     // cuda_data_dir.
-    const auto& config_cuda_data_dir =
-        module->config().debug_options().xla_gpu_cuda_data_dir();
-    if (cached_libdevice_dir_.empty() ||
-        cached_cuda_data_dir_ != config_cuda_data_dir) {
-      cached_cuda_data_dir_ = config_cuda_data_dir;
-      cached_libdevice_dir_ = GetLibdeviceDir(config_cuda_data_dir);
+    if (cached_libdevice_dir_.empty()) {
+      cached_libdevice_dir_ = GetLibdeviceDir(module->config());
     }
     libdevice_dir = cached_libdevice_dir_;
   }
@@ -743,9 +750,8 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
     }
   }
 
-  const std::vector<uint8> cubin = CompilePtxOrGetCachedResult(
-      ptx, cc_major, cc_minor,
-      module->config().debug_options().xla_gpu_disable_ptxas_optimizations());
+  const std::vector<uint8> cubin =
+      CompilePtxOrGetCachedResult(ptx, cc_major, cc_minor, module->config());
 
   auto thunk_schedule = absl::make_unique<ThunkSchedule>(
       ir_emitter.ConsumeThunkSequence(), std::move(stream_assignment),
@@ -779,7 +785,7 @@ StatusOr<std::unique_ptr<Executable>> NVPTXCompiler::RunBackend(
 
 std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
     const string& ptx, int cc_major, int cc_minor,
-    bool disable_ptx_optimizations) {
+    const HloModuleConfig& hlo_module_config) {
   XLA_SCOPED_LOGGING_TIMER("NVPTXCompiler::CompilePtxOrGetCachedResult");
   tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
   bool inserted;
@@ -807,8 +813,8 @@ std::vector<uint8> NVPTXCompiler::CompilePtxOrGetCachedResult(
     if (inserted) {
       CHECK(!cache_value->compilation_done);
       if (!ptx.empty()) {
-        StatusOr<std::vector<uint8>> maybe_cubin = CompilePtx(
-            *cache_ptx, cc_major, cc_minor, disable_ptx_optimizations);
+        StatusOr<std::vector<uint8>> maybe_cubin =
+            CompilePtx(*cache_ptx, cc_major, cc_minor, hlo_module_config);
         if (maybe_cubin.ok()) {
           cache_value->cubin_data = std::move(maybe_cubin).ValueOrDie();
           VLOG(2) << "Compiled PTX size:" << ptx.size()
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
index be5e31a501..b2077f42fd 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h
@@ -99,7 +99,7 @@ class NVPTXCompiler : public LLVMCompiler {
   // compiled cubin.  If compilation was unsuccessful, returns an empty vector.
   std::vector<uint8> CompilePtxOrGetCachedResult(
       const string& ptx, int cc_major, int cc_minor,
-      bool disable_ptx_optimizations);
+      const HloModuleConfig& hlo_module_config);
 
   // The compilation_cache_ map is a cache from {ptx string, cc_major, cc_minor}
   // -> cubin so we don't recompile the same ptx twice.  This is important for
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 5f5ca63540..d92f0ba655 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -4062,20 +4062,6 @@ tf_cuda_cc_test(
     ],
 )
 
-tf_cc_test_gpu(
-    name = "cuda_libdevice_path_test",
-    size = "small",
-    srcs = ["platform/cuda_libdevice_path_test.cc"],
-    linkstatic = tf_kernel_tests_linkstatic(),
-    tags = tf_cuda_tests_tags(),
-    deps = [
-        ":cuda_libdevice_path",
-        ":lib",
-        ":test",
-        ":test_main",
-    ],
-)
-
 tf_cuda_only_cc_test(
     name = "util_cuda_kernel_helper_test",
     srcs = [
@@ -4931,7 +4917,7 @@ filegroup(
 
 cc_library(
     name = "cuda_libdevice_path",
-    srcs = ["platform/cuda_libdevice_path.cc"] + tf_additional_libdevice_srcs(),
+    srcs = tf_additional_libdevice_srcs(),
     hdrs = ["platform/cuda_libdevice_path.h"],
     copts = tf_copts(),
     data = tf_additional_libdevice_data(),
diff --git a/tensorflow/core/platform/cuda_libdevice_path.cc b/tensorflow/core/platform/cuda_libdevice_path.cc
deleted file mode 100644
index 4d6532b983..0000000000
--- a/tensorflow/core/platform/cuda_libdevice_path.cc
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/cuda_libdevice_path.h"
-
-#include "tensorflow/core/lib/io/path.h"
-
-namespace tensorflow {
-
-string LibdeviceRoot() {
-  return tensorflow::io::JoinPath(tensorflow::CudaRoot(), "nvvm/libdevice");
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/platform/cuda_libdevice_path.h b/tensorflow/core/platform/cuda_libdevice_path.h
index 6ef565ecd3..f2dbff9043 100644
--- a/tensorflow/core/platform/cuda_libdevice_path.h
+++ b/tensorflow/core/platform/cuda_libdevice_path.h
@@ -16,16 +16,14 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
 #define TENSORFLOW_CORE_PLATFORM_CUDA_LIBDEVICE_PATH_H_
 
+#include <vector>
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 
-// Returns the root directory of the CUDA SDK, which contains sub-folders such
-// as bin, lib64, and nvvm.
-string CudaRoot();
-
-// Returns the directory that contains nvvm libdevice files in the CUDA SDK.
-string LibdeviceRoot();
+// Returns, in order of preference, potential locations of the root directory of
+// the CUDA SDK, which contains sub-folders such as bin, lib64, and nvvm.
+std::vector<string> CandidateCudaRoots();
 
 }  // namespace tensorflow
 
diff --git a/tensorflow/core/platform/cuda_libdevice_path_test.cc b/tensorflow/core/platform/cuda_libdevice_path_test.cc
deleted file mode 100644
index 2d34239a99..0000000000
--- a/tensorflow/core/platform/cuda_libdevice_path_test.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/cuda_libdevice_path.h"
-
-#include "tensorflow/core/lib/core/status_test_util.h"
-#include "tensorflow/core/lib/io/path.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/test.h"
-
-namespace tensorflow {
-
-#if GOOGLE_CUDA
-TEST(CudaLibdevicePathTest, LibdevicePath) {
-  VLOG(2) << "Libdevice root = " << LibdeviceRoot();
-  std::vector<string> libdevice_files;
-  TF_EXPECT_OK(Env::Default()->GetMatchingPaths(
-      io::JoinPath(LibdeviceRoot(), "libdevice.*.bc"), &libdevice_files));
-  EXPECT_LT(0, libdevice_files.size());
-}
-#endif
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/platform/default/cuda_libdevice_path.cc b/tensorflow/core/platform/default/cuda_libdevice_path.cc
index 20ee3ad621..a8b2e7202a 100644
--- a/tensorflow/core/platform/default/cuda_libdevice_path.cc
+++ b/tensorflow/core/platform/default/cuda_libdevice_path.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/platform/cuda_libdevice_path.h"
 
 #include <stdlib.h>
+#include <vector>
 
 #if !defined(PLATFORM_GOOGLE)
 #include "cuda/cuda_config.h"
@@ -24,9 +25,9 @@ limitations under the License.
 
 namespace tensorflow {
 
-string CudaRoot() {
+std::vector<string> CandidateCudaRoots() {
   VLOG(3) << "CUDA root = " << TF_CUDA_TOOLKIT_PATH;
-  return TF_CUDA_TOOLKIT_PATH;
+  return {TF_CUDA_TOOLKIT_PATH};
 }
 
 }  // namespace tensorflow
-- 
GitLab


From b3bd83e826af4019f6d958b2f7d36853082120ca Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Tue, 11 Dec 2018 16:35:10 -0800
Subject: [PATCH 391/873] Fix clang-format issues

---
 tensorflow/compiler/jit/xla_device.h            |  1 -
 tensorflow/compiler/jit/xla_gpu_device.cc       |  2 +-
 tensorflow/compiler/xla/client/client_library.h |  6 +++---
 tensorflow/compiler/xla/service/backend.h       |  6 +++---
 tensorflow/compiler/xla/service/platform_util.h |  2 +-
 tensorflow/compiler/xla/service/service.cc      | 12 ++++++------
 tensorflow/compiler/xla/service/service.h       |  2 +-
 7 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h
index 90d544e2a1..f1bc882f2c 100644
--- a/tensorflow/compiler/jit/xla_device.h
+++ b/tensorflow/compiler/jit/xla_device.h
@@ -168,7 +168,6 @@ class XlaDevice : public LocalDevice {
 
   bool RequiresSyncOnCompletion() const override LOCKS_EXCLUDED(mu_);
 
-
   // A simple RAII handle. On construction the device's
   // outstanding_asynchronous_operations_ field is incremented; on destruction
   // it is decremented.
diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index 877af52dde..e84a784607 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -100,7 +100,7 @@ Status XlaGpuDeviceFactory::CreateDevices(
     options.device_ordinal = i;
     options.compilation_device_name = DEVICE_GPU_XLA_JIT;
     options.use_multiple_streams = true;
-    options.allowed_devices=gpu_ids;
+    options.allowed_devices = gpu_ids;
     auto device = absl::make_unique<XlaDevice>(session_options, options);
 
     Status status = device->UseGpuDeviceInfo();
diff --git a/tensorflow/compiler/xla/client/client_library.h b/tensorflow/compiler/xla/client/client_library.h
index d180c8f7ca..1e6e4c6bf3 100644
--- a/tensorflow/compiler/xla/client/client_library.h
+++ b/tensorflow/compiler/xla/client/client_library.h
@@ -23,9 +23,9 @@ limitations under the License.
 
 #include <functional>
 #include <memory>
+#include <set>
 #include <string>
 #include <vector>
-#include <set>
 
 #include "tensorflow/compiler/xla/client/compile_only_client.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
@@ -81,8 +81,8 @@ class ClientLibrary {
   //
   //   platform : The platform the underlying XLA service should target. If
   //     null then default platform is used.
-  //   device_set: Set of device IDs for which the stream executor will be created
-  //   for, for the given platform.
+  //   device_set: Set of device IDs for which the stream executor will be
+  //   created for, for the given platform.
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
       se::Platform* platform = nullptr, const std::set<int> device_set = {-1});
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index 26bae43c1a..3fc0f8de62 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h
@@ -18,9 +18,9 @@ limitations under the License.
 
 #include <map>
 #include <memory>
+#include <set>
 #include <string>
 #include <vector>
-#include <set>
 
 #include "absl/strings/str_cat.h"
 #include "absl/types/span.h"
@@ -114,8 +114,8 @@ class Backend {
   se::StreamExecutor* default_stream_executor() const {
     CHECK(!stream_executors_.empty());
 
-    for(se::StreamExecutor* e :stream_executors_){
-      if(e){
+    for (se::StreamExecutor* e : stream_executors_) {
+      if (e) {
         return e;
       }
     }
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index 04af762fb6..89291e8b74 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_PLATFORM_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_PLATFORM_UTIL_H_
 
+#include <set>
 #include <string>
 #include <vector>
-#include <set>
 
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index b9e5be486a..06a16e8c79 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -160,13 +160,13 @@ Service::Service(const ServiceOptions& options,
     LOG(INFO) << StrFormat(
         "XLA service %p executing computations on platform %s. Devices:", this,
         execute_backend_->platform()->Name());
-    auto stream_executors=execute_backend_->stream_executors();
+    auto stream_executors = execute_backend_->stream_executors();
     for (int i = 0; i < execute_backend_->device_count(); ++i) {
-        se::StreamExecutor* executor =stream_executors.at(i);
-        const auto& description = executor->GetDeviceDescription();
-        LOG(INFO) << StrFormat("  StreamExecutor device (%d): %s, %s", i,
-                               description.name(),
-                               description.platform_version());
+      se::StreamExecutor* executor = stream_executors.at(i);
+      const auto& description = executor->GetDeviceDescription();
+      LOG(INFO) << StrFormat("  StreamExecutor device (%d): %s, %s", i,
+                             description.name(),
+                             description.platform_version());
     }
   } else {
     VLOG(1) << "XLA compile-only service constructed";
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 95a504ee30..b6c0039ccc 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -18,9 +18,9 @@ limitations under the License.
 
 #include <functional>
 #include <memory>
+#include <set>
 #include <string>
 #include <vector>
-#include <set>
 
 #include "absl/types/span.h"
 #include "tensorflow/compiler/xla/debug_options_flags.h"
-- 
GitLab


From fc220a61b71bd3e348aee311bff3b25117550865 Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Tue, 11 Dec 2018 16:30:04 -0800
Subject: [PATCH 392/873] Remove deprecated tf.substr

PiperOrigin-RevId: 225092500
---
 tensorflow/python/ops/string_ops.py                  | 9 +++++----
 tensorflow/tools/api/golden/v2/tensorflow.pbtxt      | 4 ----
 tensorflow/tools/compatibility/reorders_v2.py        | 1 +
 tensorflow/tools/compatibility/tf_upgrade_v2.py      | 7 +++++--
 tensorflow/tools/compatibility/tf_upgrade_v2_test.py | 7 +++++++
 5 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index 046459706c..9967f48060 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -367,7 +367,7 @@ def string_length_v2(input, unit="BYTE", name=None):
 string_length.__doc__ = gen_string_ops.string_length.__doc__
 
 
-@tf_export("substr")
+@tf_export(v1=["substr"])
 @deprecation.deprecated(None, "Use `tf.strings.substr` instead of `tf.substr`.")
 def substr_deprecated(input, pos, len, name=None, unit="BYTE"):
   return substr(input, pos, len, name=name, unit=unit)
@@ -380,14 +380,15 @@ substr_deprecated.__doc__ = gen_string_ops.substr.__doc__
 def substr(input, pos, len, name=None, unit="BYTE"):
   return gen_string_ops.substr(input, pos, len, unit=unit, name=name)
 
+substr.__doc__ = gen_string_ops.substr.__doc__
+
 
 @tf_export("strings.substr", v1=[])
 @dispatch.add_dispatch_support
 def substr_v2(input, pos, len, unit="BYTE", name=None):
-  return substr(input, pos, len, name=name, unit=unit)
-
+  return gen_string_ops.substr(input, pos, len, unit=unit, name=name)
 
-substr.__doc__ = gen_string_ops.substr.__doc__
+substr_v2.__doc__ = gen_string_ops.substr.__doc__
 
 
 ops.NotDifferentiable("RegexReplace")
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index ee81e86fd5..574b6778fa 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -980,10 +980,6 @@ tf_module {
     name: "string_split"
     argspec: "args=[\'source\', \'delimiter\', \'skip_empty\'], varargs=None, keywords=None, defaults=[\' \', \'True\'], "
   }
-  member_method {
-    name: "substr"
-    argspec: "args=[\'input\', \'pos\', \'len\', \'name\', \'unit\'], varargs=None, keywords=None, defaults=[\'None\', \'BYTE\'], "
-  }
   member_method {
     name: "subtract"
     argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/compatibility/reorders_v2.py b/tensorflow/tools/compatibility/reorders_v2.py
index 44494ac148..1c9fb92db0 100644
--- a/tensorflow/tools/compatibility/reorders_v2.py
+++ b/tensorflow/tools/compatibility/reorders_v2.py
@@ -109,6 +109,7 @@ reorders = {
     'tf.strings.length': ['input', 'name', 'unit'],
     'tf.strings.reduce_join': ['inputs', 'axis', 'keep_dims', 'separator', 'name', 'reduction_indices'],
     'tf.strings.substr': ['input', 'pos', 'len', 'name', 'unit'],
+    'tf.substr': ['input', 'pos', 'len', 'name', 'unit'],
     'tf.transpose': ['a', 'perm', 'name', 'conjugate'],
     'tf.tuple': ['tensors', 'name', 'control_inputs'],
     'tf.while_loop': ['cond', 'body', 'loop_vars', 'shape_invariants', 'parallel_iterations', 'back_prop', 'swap_memory', 'name', 'maximum_iterations', 'return_same_structure']
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index ea86da42f6..427e22b721 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py
@@ -492,6 +492,8 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             "tf.sparse.reduce_max",
         "tf.random.stateless_multinomial":
             "tf.random.stateless_categorical",
+        "tf.substr":
+            "tf.strings.substr",
         "tf.string_to_hash_bucket":
             "tf.strings.to_hash_bucket",
         "tf.string_to_number":
@@ -600,9 +602,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         "tf.sparse.reduce_max",
         "tf.sparse_reduce_max",
         "tf.io.decode_csv",
-        "tf.strings.substr",
-        "tf.strings.reduce_join",
         "tf.strings.length",
+        "tf.strings.reduce_join",
+        "tf.strings.substr",
+        "tf.substr",
         "tf.transpose",
         "tf.tuple",
         "tf.parse_example",
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index 2cc874fe7f..484900d000 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -443,6 +443,13 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     )
     self.assertEqual(new_text, expected_text)
 
+  def test_substr(self):
+    text = "tf.substr(input, pos, len, name, unit)\n"
+    _, unused_report, errors, new_text = self._upgrade(text)
+    self.assertEqual("tf.strings.substr(input=input, pos=pos, len=len, "
+                     "name=name, unit=unit)\n", new_text)
+    self.assertEqual(errors, [])
+
   def testColocateGradientsWithOps(self):
     text = "tf.gradients(a, foo=False)\n"
     _, unused_report, errors, new_text = self._upgrade(text)
-- 
GitLab


From 9b8005ece04fc815b84fbd032c3374ab82976360 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Tue, 11 Dec 2018 16:40:04 -0800
Subject: [PATCH 393/873] Cleanup some duplicated methods for UnifiedLSTM.

The methods in the parent class should work the same way.

PiperOrigin-RevId: 225094141
---
 tensorflow/python/keras/layers/recurrent.py | 43 +--------------------
 1 file changed, 1 insertion(+), 42 deletions(-)

diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 1c6f2bd3f8..93cb805d08 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -2530,6 +2530,7 @@ class LSTM(RNN):
       config['implementation'] = 1
     return cls(**config)
 
+
 @tf_export('keras.layers.LSTM', v1=[])
 class UnifiedLSTM(LSTM):
   """Long Short-Term Memory layer - Hochreiter 1997.
@@ -2655,8 +2656,6 @@ class UnifiedLSTM(LSTM):
     self.state_spec = [
         InputSpec(shape=(None, dim)) for dim in (self.units, self.units)
     ]
-    self._num_constants = None
-    self._num_inputs = None
     self._dropout_mask = None
     self.could_use_cudnn = (
         activation == 'tanh' and recurrent_activation == 'sigmoid' and
@@ -2775,46 +2774,6 @@ class UnifiedLSTM(LSTM):
     else:
       return output
 
-  @property
-  def trainable_weights(self):
-    if self.trainable:
-      weights = []
-      weights += self.cell.trainable_weights
-      return weights
-    return []
-
-  @property
-  def non_trainable_weights(self):
-    if not self.trainable:
-      weights = []
-      weights += self.cell.non_trainable_weights
-      return weights
-    return []
-
-  @property
-  def losses(self):
-    losses = []
-    losses += self.cell.losses
-    return losses + self._losses
-
-  @property
-  def updates(self):
-    updates = []
-    updates += self.cell.updates
-    return updates + self._updates
-
-  def get_weights(self):
-    weights = []
-    weights += self.cell.weights
-    return K.batch_get_value(weights)
-
-  def set_weights(self, weights):
-    tuples = []
-    cell_weights = weights[:len(self.cell.weights)]
-    if cell_weights:
-      tuples.append((self.cell.weights, cell_weights))
-    K.batch_set_value(tuples)
-
 
 def _canonical_to_params(weights, biases, shape, transpose_weights=False):
   """Utility function convert variable to CuDNN compatible parameter.
-- 
GitLab


From a54fd6b71313ccf22c9fe115bceb256dcef27435 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 11 Dec 2018 16:42:40 -0800
Subject: [PATCH 394/873] [TF:XLA] Bump open source abseil revision to
 455dc17ba1af9635f0b60155bc565bc572a1e722

PiperOrigin-RevId: 225094534
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 5210df240d..f8b6bd1a3f 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -123,11 +123,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "3ad76de484192b2d5afd49d90492b5ed0bc59eb1a4e8e0deecc7a2a077a90251",
-        strip_prefix = "abseil-cpp-f197d7c72a54064cfde5a2058f1513a4a0ee36fb",
+        sha256 = "be91500afe4d2768a7aeeeae616d9f7fc4fe237a1493b630883dbf8f20d4682d",
+        strip_prefix = "abseil-cpp-455dc17ba1af9635f0b60155bc565bc572a1e722",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/f197d7c72a54064cfde5a2058f1513a4a0ee36fb.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/f197d7c72a54064cfde5a2058f1513a4a0ee36fb.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/455dc17ba1af9635f0b60155bc565bc572a1e722.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/455dc17ba1af9635f0b60155bc565bc572a1e722.tar.gz",
         ],
     )
 
-- 
GitLab


From e8c65fa77fb7473d95988fa23e51c906a428b27a Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Tue, 11 Dec 2018 16:45:06 -0800
Subject: [PATCH 395/873] Small refactor to improve the readability of the
 Model class for those who use the code as documentation. General idea: most
 important methods come first, private utilities are moved to the bottom of
 the class. Also use a single method for `_standardize_user_data` (previously
 split into 2 methods that did not reflect two separate sets of actions).

PiperOrigin-RevId: 225094903
---
 tensorflow/python/keras/engine/training.py    | 4363 ++++++++---------
 .../keras/engine/training_distributed.py      |    7 +-
 2 files changed, 2158 insertions(+), 2212 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index fe44bc20a1..75d6496988 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -128,272 +128,411 @@ class Model(Network):
 
     self.run_eagerly = None
 
-  def _set_sample_weight_attributes(self, sample_weight_mode,
-                                    skip_target_weighing_indices):
-    """Sets sample weight related attributes on the model."""
-    sample_weights, sample_weight_modes = training_utils.prepare_sample_weights(
-        self.output_names, sample_weight_mode, skip_target_weighing_indices)
-    self.sample_weights = sample_weights
-    self.sample_weight_modes = sample_weight_modes
-    self._feed_sample_weight_modes = [
-        sample_weight_modes[i]
-        for i in range(len(self.outputs))
-        if i not in skip_target_weighing_indices
-    ]
-    self._feed_sample_weights = [
-        sample_weights[i]
-        for i in range(len(sample_weights))
-        if i not in skip_target_weighing_indices
-    ]
-
-  def _cache_output_metric_attributes(self, metrics, weighted_metrics):
-    """Caches metric name and function attributes for every model output."""
-    output_shapes = [
-        None if output is None else output.get_shape().as_list()
-        for output in self.outputs
-    ]
-    self._per_output_metrics = training_utils.collect_per_output_metric_info(
-        metrics, self.output_names, output_shapes, self.loss_functions)
-    self._per_output_weighted_metrics = \
-        training_utils.collect_per_output_metric_info(
-            weighted_metrics, self.output_names, output_shapes,
-            self.loss_functions, self.sample_weights)
-
-  def _add_unique_metric_name(self, metric_name, output_index):
-    """Makes the metric name unique and adds it to the model's metric name list.
-
-      If there are multiple outputs for which the metrics are calculated, the
-      metric names have to be made unique by appending an integer.
+  @checkpointable.no_automatic_dependency_tracking
+  def compile(self,
+              optimizer,
+              loss=None,
+              metrics=None,
+              loss_weights=None,
+              sample_weight_mode=None,
+              weighted_metrics=None,
+              target_tensors=None,
+              distribute=None,
+              **kwargs):
+    """Configures the model for training.
 
     Arguments:
-      metric_name: Metric name that corresponds to the metric specified by the
-          user. For example: 'acc'.
-      output_index: The index of the model output for which the metric name is
-        being added.
+        optimizer: String (name of optimizer) or optimizer instance.
+            See [optimizers](/api_docs/python/tf/keras/optimizers).
+        loss: String (name of objective function) or objective function.
+            See [losses](/api_docs/python/tf/losses).
+            If the model has multiple outputs, you can use a different loss
+            on each output by passing a dictionary or a list of losses.
+            The loss value that will be minimized by the model
+            will then be the sum of all individual losses.
+        metrics: List of metrics to be evaluated by the model
+            during training and testing.
+            Typically you will use `metrics=['accuracy']`.
+            To specify different metrics for different outputs of a
+            multi-output model, you could also pass a dictionary,
+            such as `metrics={'output_a': 'accuracy'}`.
+        loss_weights: Optional list or dictionary specifying scalar
+            coefficients (Python floats) to weight the loss contributions
+            of different model outputs.
+            The loss value that will be minimized by the model
+            will then be the *weighted sum* of all individual losses,
+            weighted by the `loss_weights` coefficients.
+            If a list, it is expected to have a 1:1 mapping
+            to the model's outputs. If a tensor, it is expected to map
+            output names (strings) to scalar coefficients.
+        sample_weight_mode: If you need to do timestep-wise
+            sample weighting (2D weights), set this to `"temporal"`.
+            `None` defaults to sample-wise weights (1D).
+            If the model has multiple outputs, you can use a different
+            `sample_weight_mode` on each output by passing a
+            dictionary or a list of modes.
+        weighted_metrics: List of metrics to be evaluated and weighted
+            by sample_weight or class_weight during training and testing.
+        target_tensors: By default, Keras will create placeholders for the
+            model's target, which will be fed with the target data during
+            training. If instead you would like to use your own
+            target tensors (in turn, Keras will not expect external
+            Numpy data for these targets at training time), you
+            can specify them via the `target_tensors` argument. It can be
+            a single tensor (for a single-output model), a list of tensors,
+            or a dict mapping output names to target tensors.
+        distribute: The DistributionStrategy instance that we want to use to
+            distribute the training of the model.
+        **kwargs: These arguments are passed to `tf.Session.run`.
 
-    Returns:
-      string, name of the model's unique metric name
+    Raises:
+        ValueError: In case of invalid arguments for
+            `optimizer`, `loss`, `metrics` or `sample_weight_mode`.
     """
-    if len(self.output_names) > 1:
-      metric_name = '%s_%s' % (self.output_names[output_index], metric_name)
-    j = 1
-    base_metric_name = metric_name
-    while metric_name in self._compile_metrics_names:
-      metric_name = '%s_%d' % (base_metric_name, j)
-      j += 1
+    run_eagerly = kwargs.pop('run_eagerly', None)
+    self._run_eagerly = run_eagerly
 
-    return metric_name
+    # Validate that arguments passed by the user to `compile` are supported by
+    # DistributionStrategy.
+    if distribute:
+      if not isinstance(
+          optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
+        raise NotImplementedError(
+            'optimizer must be an instance of '
+            'tf.train.Optimizer, not a %s' % type(optimizer))
+      if sample_weight_mode:
+        raise NotImplementedError('sample_weight_mode is not supported with '
+                                  'DistributionStrategy.')
+      if weighted_metrics:
+        raise NotImplementedError('weighted_metrics is not supported with '
+                                  'DistributionStrategy.')
+      if target_tensors:
+        raise ValueError('target_tensors is not supported with '
+                         'DistributionStrategy.')
 
-  @property
-  def metrics(self):
-    """Returns the model's metrics added using `compile`, `add_metric` APIs."""
-    metrics = []
-    if self._is_compiled:
-      metrics += self._compile_stateful_metric_functions
-    return metrics + super(Model, self).metrics
+    loss = loss or {}
+    if self.run_eagerly and not isinstance(
+        optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
+      raise ValueError(
+          'When running a model in eager execution, the optimizer must be an '
+          'instance of tf.train.Optimizer. Received: '
+          '%s' % optimizer)
 
-  @property
-  def metrics_names(self):
-    """Returns the model's display labels for all outputs."""
-    metrics_names = []
-    if self._is_compiled:
-      metrics_names += self._compile_metrics_names  # Includes names of losses.
+    self.optimizer = optimizers.get(optimizer)
+    # We've disabled automatic dependency tracking for this method, but do want
+    # to add a checkpoint dependency on the optimizer if it's checkpointable.
+    if isinstance(self.optimizer, checkpointable.CheckpointableBase):
+      self._track_checkpointable(
+          self.optimizer, name='optimizer', overwrite=True)
+    self.loss = loss
+    self._compile_metrics = metrics or []
+    self.loss_weights = loss_weights
+    self.sample_weight_mode = sample_weight_mode
+    self._compile_weighted_metrics = weighted_metrics
+    if self.run_eagerly and target_tensors is not None:
+      raise ValueError(
+          'target_tensors argument is not supported when '
+          'running a model eagerly.')
+    self.target_tensors = target_tensors
 
-    # Add metric names from layers.
-    for layer in self.layers:
-      metrics_names += [m.name for m in layer._metrics]  # pylint: disable=protected-access
-    metrics_names += [m.name for m in self._metrics]
-    return metrics_names
+    # Set DistributionStrategy specific parameters.
+    self._distribution_strategy = distribute
+    # Reset the value of grouped_model
+    self._grouped_model = None
+    if self._distribution_strategy is not None:
+      distributed_training_utils.configure_and_create_session(
+          self._distribution_strategy)
+    # Initialize model metric attributes.
+    self._init_metric_attributes()
+    if not self.built:
+      # Model is not compilable because it does not know its number of inputs
+      # and outputs, nor their shapes and names. We will compile after the first
+      # time the model gets called on training data.
+      return
+    self._is_compiled = True
 
-  @property
-  def _all_metrics_tensors(self):
-    """Returns the network's symbolic metric tensors."""
-    metrics_tensors = {}
-    if self._is_compiled:
-      metrics_tensors.update(self._compile_metrics_tensors)
-    metrics_tensors.update(super(Model, self)._all_metrics_tensors)
-    return metrics_tensors
+    # Prepare loss functions.
+    if isinstance(loss, dict):
+      for name in loss:
+        if name not in self.output_names:
+          raise ValueError(
+              'Unknown entry in loss '
+              'dictionary: "' + name + '". '
+              'Only expected the following keys: ' + str(self.output_names))
+      loss_functions = []
+      for name in self.output_names:
+        if name not in loss:
+          logging.warning(
+              'Output "' + name +
+              '" missing from loss dictionary. We assume '
+              'this was done on purpose. The fit and evaluate APIs will not be '
+              'expecting any data to be passed to "' + name + '".')
+        loss_functions.append(training_utils.get_loss_function(loss.get(name)))
+    elif isinstance(loss, list):
+      if len(loss) != len(self.outputs):
+        raise ValueError('When passing a list as loss, '
+                         'it should have one entry per model outputs. '
+                         'The model has ' + str(len(self.outputs)) +
+                         ' outputs, but you passed loss=' + str(loss))
+      loss_functions = [training_utils.get_loss_function(l) for l in loss]
+    else:
+      loss_function = training_utils.get_loss_function(loss)
+      loss_functions = [loss_function for _ in range(len(self.outputs))]
+    self.loss_functions = loss_functions
 
-  @property
-  def _all_stateful_metrics_tensors(self):
-    """Returns the network's symbolic metric tensors."""
-    metrics_tensors = {}
-    if self._is_compiled:
-      metrics_tensors.update(self._compile_stateful_metrics_tensors)
-    metrics_tensors.update(super(Model, self)._all_metrics_tensors)
-    return metrics_tensors
+    skip_target_indices = []
+    skip_target_weighing_indices = []
+    self._feed_outputs = []
+    self._feed_output_names = []
+    self._feed_output_shapes = []
+    self._feed_loss_fns = []
+    for i in range(len(loss_functions)):
+      if loss_functions[i] is None:
+        skip_target_indices.append(i)
+        skip_target_weighing_indices.append(i)
 
-  def _init_metric_attributes(self):
-    """Initialized model metric attributes."""
-    # List of all metric names in the model.
-    self._compile_metrics_names = ['loss']
-    # List of stateful metric functions. Used for resetting metric state during
-    # training/eval.
-    # This includes loss functions when there are multiple outputs.
-    self._compile_stateful_metric_functions = []
-    # Dict of all aggregated metric result tensors. This includes aggregated
-    # loss result tensors when there are multiple outputs.
-    self._compile_stateful_metrics_tensors = {}
-    # Dict of all metric result tensors (aggregated or not - based on the
-    # values given in compile.). This includes aggregated loss result tensors
-    # when there are multiple outputs.
-    self._compile_metrics_tensors = {}
+    # Prepare output masks.
+    if not self.run_eagerly:
+      masks = [getattr(x, '_keras_mask', None) for x in self.outputs]
+      if not isinstance(masks, list):
+        masks = [masks]
 
-  def _set_per_output_metric_attributes(self, metrics_dict, output_index):
-    """Sets the metric attributes on the model for the given output.
+    # Prepare loss weights.
+    if loss_weights is None:
+      loss_weights_list = [1. for _ in range(len(self.outputs))]
+    elif isinstance(loss_weights, dict):
+      for name in loss_weights:
+        if name not in self.output_names:
+          raise ValueError(
+              'Unknown entry in loss_weights '
+              'dictionary: "' + name + '". '
+              'Only expected the following keys: ' + str(self.output_names))
+      loss_weights_list = []
+      for name in self.output_names:
+        loss_weights_list.append(loss_weights.get(name, 1.))
+    elif isinstance(loss_weights, list):
+      if len(loss_weights) != len(self.outputs):
+        raise ValueError(
+            'When passing a list as loss_weights, '
+            'it should have one entry per model output. '
+            'The model has ' + str(len(self.outputs)) +
+            ' outputs, but you passed loss_weights=' + str(loss_weights))
+      loss_weights_list = loss_weights
+    else:
+      raise TypeError('Could not interpret loss_weights argument: ' +
+                      str(loss_weights) + ' - expected a list of dicts.')
+    self.loss_weights_list = loss_weights_list
 
-    Arguments:
-      metrics_dict: A dict with metric names as keys and metric fns as values.
-      output_index: The index of the model output for which the metric
-        attributes are added.
+    # Initialization for Eager mode execution.
+    if self.run_eagerly:
+      # Prepare sample weights.
+      self._set_sample_weight_attributes(sample_weight_mode,
+                                         skip_target_weighing_indices)
+      # Save all metric attributes per output of the model.
+      self._cache_output_metric_attributes(metrics, weighted_metrics)
 
-    Returns:
-      Metrics dict updated with unique metric names as keys.
-    """
-    updated_metrics_dict = collections.OrderedDict()
-    for metric_name, (metric_fn, stateful_metric_fn) in metrics_dict.items():
-      metric_name = self._add_unique_metric_name(metric_name, output_index)
-      updated_metrics_dict[metric_name] = (metric_fn, stateful_metric_fn)
-      # Keep track of metric name, function and stateful function.
-      self._compile_metrics_names.append(metric_name)
-      self._compile_stateful_metric_functions.append(stateful_metric_fn)
-    return updated_metrics_dict
+      if target_tensors is not None:
+        raise ValueError('target_tensors are not currently supported in Eager '
+                         'mode.')
+      self.total_loss = None
+      for i in range(len(self.outputs)):
+        if len(self.outputs) > 1:
+          self._compile_metrics_names.append(self.output_names[i] + '_loss')
 
-  def _set_metric_attributes(self, outputs, skip_target_indices=None):
-    """Sets the metric attributes on the model for all the model outputs."""
-    skip_target_indices = skip_target_indices or []
-    updated_per_output_metrics = []
-    updated_per_output_weighted_metrics = []
-    for i in range(len(outputs)):
-      if i in skip_target_indices:
-        updated_per_output_metrics.append(self._per_output_metrics[i])
-        updated_per_output_weighted_metrics.append(
-            self._per_output_weighted_metrics[i])
-        continue
-      updated_per_output_metrics.append(
-          self._set_per_output_metric_attributes(self._per_output_metrics[i],
-                                                 i))
-      updated_per_output_weighted_metrics.append(
-          self._set_per_output_metric_attributes(
-              self._per_output_weighted_metrics[i], i))
+      # Set metric attributes on model.
+      self._set_metric_attributes(
+          self.outputs,
+          skip_target_indices=skip_target_indices,
+      )
 
-    self._per_output_metrics = updated_per_output_metrics
-    self._per_output_weighted_metrics = updated_per_output_weighted_metrics
+      self.targets = []
+      for i in range(len(self.outputs)):
+        self._feed_output_names.append(self.output_names[i])
+      self._collected_trainable_weights = self.trainable_weights
+      return
 
-  def _handle_per_output_metrics(self,
-                                 metrics_dict,
-                                 y_true,
-                                 y_pred,
-                                 mask,
-                                 weights=None,
-                                 return_stateful_result=True):
-    """Calls metric functions for a single output.
+    with K.get_graph().as_default():
+      # Prepare targets of model.
+      self.targets = []
+      self._feed_targets = []
+      if target_tensors not in (None, []):
+        if isinstance(target_tensors, list):
+          if len(target_tensors) != len(self.outputs):
+            raise ValueError(
+                'When passing a list as `target_tensors`, '
+                'it should have one entry per model output. '
+                'The model has %s outputs, but you passed target_tensors=%s' %
+                (len(self.outputs), target_tensors))
+        elif isinstance(target_tensors, dict):
+          for name in target_tensors:
+            if name not in self.output_names:
+              raise ValueError(
+                  'Unknown entry in `target_tensors` '
+                  'dictionary: "' + name + '". '
+                  'Only expected the following keys: ' + str(self.output_names))
+          tmp_target_tensors = []
+          for name in self.output_names:
+            tmp_target_tensors.append(target_tensors.get(name, None))
+          target_tensors = tmp_target_tensors
+        elif tensor_util.is_tensor(target_tensors):
+          target_tensors = [target_tensors]
+        else:
+          raise TypeError('Expected `target_tensors` to be a list or tuple or '
+                          'dict or a single tensor, but got:', target_tensors)
 
-    Arguments:
-      metrics_dict: A dict with metric names as keys and metric fns as values.
-      y_true: Target output.
-      y_pred: Predicted output.
-      mask: Computed mask value for the current output.
-      weights: Weights to be applied on the current output.
-      return_stateful_result: Boolean, indicates whether the stateful
-        (aggregated)/stateless metric result should be returned.
+      for i in range(len(self.outputs)):
+        if i in skip_target_indices:
+          self.targets.append(None)
+        else:
+          shape = K.int_shape(self.outputs[i])
+          name = self.output_names[i]
+          if target_tensors not in (None, []):
+            target = target_tensors[i]
+          else:
+            target = None
+          if target is None or K.is_placeholder(target):
+            if target is None:
+              target_dtype = losses.LABEL_DTYPES_FOR_LOSSES.get(
+                  self.loss_functions[i],
+                  K.dtype(self.outputs[i]))
 
-    Returns:
-      A list of metric result tensors.
-    """
-    metric_results = []
-    for metric_name, (metric_fn, stateful_fn) in metrics_dict.items():
-      with K.name_scope(metric_name):
+              target = K.placeholder(
+                  ndim=len(shape),
+                  name=name + '_target',
+                  sparse=K.is_sparse(self.outputs[i]),
+                  dtype=target_dtype)
+            self._feed_targets.append(target)
+            self._feed_outputs.append(self.outputs[i])
+            self._feed_output_names.append(name)
+            self._feed_output_shapes.append(shape)
+            self._feed_loss_fns.append(self.loss_functions[i])
+          else:
+            skip_target_weighing_indices.append(i)
+          self.targets.append(target)
 
-        def _call_stateful_fn(fn):
-          return training_utils.call_metric_function(
-              fn, y_true, y_pred, weights=weights, mask=mask)
+      # Prepare sample weights.
+      self._set_sample_weight_attributes(sample_weight_mode,
+                                         skip_target_weighing_indices)
+      # Save all metric attributes per output of the model.
+      self._cache_output_metric_attributes(metrics, weighted_metrics)
 
-        def _call_stateless_fn(fn):
-          weighted_metric_fn = training_utils.weighted_masked_objective(fn)
-          return weighted_metric_fn(y_true, y_pred, weights=weights, mask=mask)
+      # Compute total loss.
+      total_loss = None
+      with K.name_scope('loss'):
+        for i in range(len(self.outputs)):
+          if i in skip_target_indices:
+            continue
+          y_true = self.targets[i]
+          y_pred = self.outputs[i]
+          loss_fn = loss_functions[i]
+          sample_weight = self.sample_weights[i]
+          mask = masks[i]
+          loss_weight = loss_weights_list[i]
+          with K.name_scope(self.output_names[i] + '_loss'):
+            if isinstance(loss_fn, losses.Loss):
+              if mask is not None:
+                mask = math_ops.cast(mask, y_pred.dtype)
+                # Update weights with mask.
+                if sample_weight is None:
+                  sample_weight = mask
+                else:
+                  # Update dimensions of weights to match with mask if possible.
+                  mask, _, sample_weight = squeeze_or_expand_dimensions(
+                      mask, None, sample_weight)
+                  sample_weight *= mask
+              output_loss = loss_fn(y_true, y_pred, sample_weight=sample_weight)
+            else:
+              weighted_loss = training_utils.weighted_masked_objective(loss_fn)
+              output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
 
-        def _track_metric_tensors(name, stateless_result, stateful_result):
-          self._compile_metrics_tensors[name] = stateless_result
-          self._compile_stateful_metrics_tensors[name] = stateful_result
+          if len(self.outputs) > 1:
+            # Keep track of the un-aggregated loss result tensor.
+            self._compile_metrics_tensors[self.output_names[i] +
+                                          '_loss'] = output_loss
 
-        if isinstance(metric_fn, metrics_module.Metric):
-          # If the given metric fn is stateful, call the fn and return result.
-          metric_result = _call_stateful_fn(metric_fn)
-          metric_results.append(metric_result)
-          if not self.run_eagerly:
-            _track_metric_tensors(metric_name, metric_result, metric_result)
-        elif self.run_eagerly:
-          # In eager mode, if the given metric fn is not stateful, we invoke the
-          # given fn or its stateful version based on the given flag.
-          if return_stateful_result:
-            metric_result = _call_stateful_fn(stateful_fn)
-          else:
-            metric_result = _call_stateless_fn(metric_fn)
-          metric_results.append(metric_result)
-        else:
-          # In graph mode, we build the sub-graph for both the stateful and the
-          # stateless fns.
-          stateful_metric_result = _call_stateful_fn(stateful_fn)
-          metric_result = _call_stateless_fn(metric_fn)
-          _track_metric_tensors(metric_name, metric_result,
-                                stateful_metric_result)
+            # Keep track of stateful result tensor and function for the loss.
+            loss_name = loss_fn.name if isinstance(
+                loss_fn, losses.Loss) else loss_fn.__name__
+            mean_wrapped_loss = metrics_module.MeanMetricWrapper(
+                loss_fn, name=loss_name)
+            result_tensor = training_utils.call_metric_function(
+                mean_wrapped_loss,
+                y_true,
+                y_pred,
+                weights=sample_weight,
+                mask=mask)
+            self._compile_stateful_metrics_tensors[self.output_names[i] +
+                                                   '_loss'] = result_tensor
+            self._compile_stateful_metric_functions.append(mean_wrapped_loss)
 
-    return metric_results
+            self._compile_metrics_names.append(self.output_names[i] + '_loss')
+          if total_loss is None:
+            total_loss = loss_weight * output_loss
+          else:
+            total_loss += loss_weight * output_loss
+        if total_loss is None:
+          if not self.losses:
+            raise ValueError('The model cannot be compiled '
+                             'because it has no loss to optimize.')
+          else:
+            total_loss = 0.
 
-  def _handle_metrics(self,
-                      outputs,
-                      skip_target_indices=None,
-                      targets=None,
-                      sample_weights=None,
-                      masks=None,
-                      return_stateful_result=True):
-    """Handles calling metric functions.
+        # Add regularization penalties
+        # and other layer-specific losses.
+        for loss_tensor in self.losses:
+          total_loss += loss_tensor
 
-    Arguments:
-      outputs: List of outputs (predictions).
-      skip_target_indices: Optional. List of target ids to skip.
-      targets: List of targets.
-      sample_weights: Optional list of sample weight arrays.
-      masks: List of computed output mask values.
-      return_stateful_result: Boolean, indicates whether the stateful
-        (aggregated)/stateless metric result should be returned.
+      # Set metric attributes on model.
+      self._set_metric_attributes(
+          self.outputs,
+          skip_target_indices=skip_target_indices,
+      )
+      # Invoke metric functions for all the outputs.
+      self._handle_metrics(
+          self.outputs,
+          masks=masks,
+          targets=self.targets,
+          skip_target_indices=skip_target_indices,
+          sample_weights=self.sample_weights)
 
-    Returns:
-      A list of metric result tensors.
-    """
-    skip_target_indices = skip_target_indices or []
-    metric_results = []
-    with K.name_scope('metrics'):
-      # Invoke all metrics added using `compile`.
-      for i in range(len(outputs)):
-        if i in skip_target_indices:
-          continue
-        output = outputs[i] if outputs else None
-        target = targets[i] if targets else None
-        output_mask = masks[i] if masks else None
-        metric_results.extend(
-            self._handle_per_output_metrics(
-                self._per_output_metrics[i],
-                target,
-                output,
-                output_mask,
-                return_stateful_result=return_stateful_result))
-        metric_results.extend(
-            self._handle_per_output_metrics(
-                self._per_output_weighted_metrics[i],
-                target,
-                output,
-                output_mask,
-                weights=sample_weights[i],
-                return_stateful_result=return_stateful_result))
+      # Prepare gradient updates and state updates.
+      self.total_loss = total_loss
 
-    # Add metric results from the `add_metric` metrics in eager mode.
-    if context.executing_eagerly():
-      for m in self.metrics:
-        if m not in self._compile_stateful_metric_functions:
-          metric_results.append(m.result())
-    return metric_results
+      # Functions for train, test and predict will
+      # be compiled lazily when required.
+      # This saves time when the user is not using all functions.
+      self._function_kwargs = kwargs
+
+      self._fit_function = None
+      self._eval_function = None
+      self.train_function = None
+      self.test_function = None
+      self.predict_function = None
+
+      # Collected trainable weights, sorted in topological order.
+      trainable_weights = self.trainable_weights
+      self._collected_trainable_weights = trainable_weights
+
+  @property
+  def metrics(self):
+    """Returns the model's metrics added using `compile`, `add_metric` APIs."""
+    metrics = []
+    if self._is_compiled:
+      metrics += self._compile_stateful_metric_functions
+    return metrics + super(Model, self).metrics
+
+  @property
+  def metrics_names(self):
+    """Returns the model's display labels for all outputs."""
+    metrics_names = []
+    if self._is_compiled:
+      metrics_names += self._compile_metrics_names  # Includes names of losses.
+
+    # Add metric names from layers.
+    for layer in self.layers:
+      metrics_names += [m.name for m in layer._metrics]  # pylint: disable=protected-access
+    metrics_names += [m.name for m in self._metrics]
+    return metrics_names
 
   @property
   def run_eagerly(self):
@@ -429,2159 +568,1969 @@ class Model(Network):
   def run_eagerly(self, value):
     self._run_eagerly = value
 
-  @checkpointable.no_automatic_dependency_tracking
-  def compile(self,
-              optimizer,
-              loss=None,
-              metrics=None,
-              loss_weights=None,
-              sample_weight_mode=None,
-              weighted_metrics=None,
-              target_tensors=None,
-              distribute=None,
-              **kwargs):
-    """Configures the model for training.
+  def fit(self,
+          x=None,
+          y=None,
+          batch_size=None,
+          epochs=1,
+          verbose=1,
+          callbacks=None,
+          validation_split=0.,
+          validation_data=None,
+          shuffle=True,
+          class_weight=None,
+          sample_weight=None,
+          initial_epoch=0,
+          steps_per_epoch=None,
+          validation_steps=None,
+          max_queue_size=10,
+          workers=1,
+          use_multiprocessing=False,
+          **kwargs):
+    """Trains the model for a fixed number of epochs (iterations on a dataset).
 
     Arguments:
-        optimizer: String (name of optimizer) or optimizer instance.
-            See [optimizers](/api_docs/python/tf/keras/optimizers).
-        loss: String (name of objective function) or objective function.
-            See [losses](/api_docs/python/tf/losses).
-            If the model has multiple outputs, you can use a different loss
-            on each output by passing a dictionary or a list of losses.
-            The loss value that will be minimized by the model
-            will then be the sum of all individual losses.
-        metrics: List of metrics to be evaluated by the model
-            during training and testing.
-            Typically you will use `metrics=['accuracy']`.
-            To specify different metrics for different outputs of a
-            multi-output model, you could also pass a dictionary,
-            such as `metrics={'output_a': 'accuracy'}`.
-        loss_weights: Optional list or dictionary specifying scalar
-            coefficients (Python floats) to weight the loss contributions
-            of different model outputs.
-            The loss value that will be minimized by the model
-            will then be the *weighted sum* of all individual losses,
-            weighted by the `loss_weights` coefficients.
-            If a list, it is expected to have a 1:1 mapping
-            to the model's outputs. If a tensor, it is expected to map
-            output names (strings) to scalar coefficients.
-        sample_weight_mode: If you need to do timestep-wise
-            sample weighting (2D weights), set this to `"temporal"`.
-            `None` defaults to sample-wise weights (1D).
-            If the model has multiple outputs, you can use a different
-            `sample_weight_mode` on each output by passing a
-            dictionary or a list of modes.
-        weighted_metrics: List of metrics to be evaluated and weighted
-            by sample_weight or class_weight during training and testing.
-        target_tensors: By default, Keras will create placeholders for the
-            model's target, which will be fed with the target data during
-            training. If instead you would like to use your own
-            target tensors (in turn, Keras will not expect external
-            Numpy data for these targets at training time), you
-            can specify them via the `target_tensors` argument. It can be
-            a single tensor (for a single-output model), a list of tensors,
-            or a dict mapping output names to target tensors.
-        distribute: The DistributionStrategy instance that we want to use to
-            distribute the training of the model.
-        **kwargs: These arguments are passed to `tf.Session.run`.
-
-    Raises:
-        ValueError: In case of invalid arguments for
-            `optimizer`, `loss`, `metrics` or `sample_weight_mode`.
-    """
-    run_eagerly = kwargs.pop('run_eagerly', None)
-    self._run_eagerly = run_eagerly
-
-    # Validate that arguments passed by the user to `compile` are supported by
-    # DistributionStrategy.
-    if distribute:
-      if not isinstance(
-          optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
-        raise NotImplementedError(
-            'optimizer must be an instance of '
-            'tf.train.Optimizer, not a %s' % type(optimizer))
-      if sample_weight_mode:
-        raise NotImplementedError('sample_weight_mode is not supported with '
-                                  'DistributionStrategy.')
-      if weighted_metrics:
-        raise NotImplementedError('weighted_metrics is not supported with '
-                                  'DistributionStrategy.')
-      if target_tensors:
-        raise ValueError('target_tensors is not supported with '
-                         'DistributionStrategy.')
-
-    loss = loss or {}
-    if self.run_eagerly and not isinstance(
-        optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
-      raise ValueError(
-          'When running a model in eager execution, the optimizer must be an '
-          'instance of tf.train.Optimizer. Received: '
-          '%s' % optimizer)
-
-    self.optimizer = optimizers.get(optimizer)
-    # We've disabled automatic dependency tracking for this method, but do want
-    # to add a checkpoint dependency on the optimizer if it's checkpointable.
-    if isinstance(self.optimizer, checkpointable.CheckpointableBase):
-      self._track_checkpointable(
-          self.optimizer, name='optimizer', overwrite=True)
-    self.loss = loss
-    self._compile_metrics = metrics or []
-    self.loss_weights = loss_weights
-    self.sample_weight_mode = sample_weight_mode
-    self._compile_weighted_metrics = weighted_metrics
-    if self.run_eagerly and target_tensors is not None:
-      raise ValueError(
-          'target_tensors argument is not supported when '
-          'running a model eagerly.')
-    self.target_tensors = target_tensors
-
-    # Set DistributionStrategy specific parameters.
-    self._distribution_strategy = distribute
-    # Reset the value of grouped_model
-    self._grouped_model = None
-    if self._distribution_strategy is not None:
-      distributed_training_utils.configure_and_create_session(
-          self._distribution_strategy)
-    # Initialize model metric attributes.
-    self._init_metric_attributes()
-    if not self.built:
-      # Model is not compilable because it does not know its number of inputs
-      # and outputs, nor their shapes and names. We will compile after the first
-      # time the model gets called on training data.
-      return
-    self._is_compiled = True
-
-    # Prepare loss functions.
-    if isinstance(loss, dict):
-      for name in loss:
-        if name not in self.output_names:
-          raise ValueError(
-              'Unknown entry in loss '
-              'dictionary: "' + name + '". '
-              'Only expected the following keys: ' + str(self.output_names))
-      loss_functions = []
-      for name in self.output_names:
-        if name not in loss:
-          logging.warning(
-              'Output "' + name +
-              '" missing from loss dictionary. We assume '
-              'this was done on purpose. The fit and evaluate APIs will not be '
-              'expecting any data to be passed to "' + name + '".')
-        loss_functions.append(training_utils.get_loss_function(loss.get(name)))
-    elif isinstance(loss, list):
-      if len(loss) != len(self.outputs):
-        raise ValueError('When passing a list as loss, '
-                         'it should have one entry per model outputs. '
-                         'The model has ' + str(len(self.outputs)) +
-                         ' outputs, but you passed loss=' + str(loss))
-      loss_functions = [training_utils.get_loss_function(l) for l in loss]
-    else:
-      loss_function = training_utils.get_loss_function(loss)
-      loss_functions = [loss_function for _ in range(len(self.outputs))]
-    self.loss_functions = loss_functions
-
-    skip_target_indices = []
-    skip_target_weighing_indices = []
-    self._feed_outputs = []
-    self._feed_output_names = []
-    self._feed_output_shapes = []
-    self._feed_loss_fns = []
-    for i in range(len(loss_functions)):
-      if loss_functions[i] is None:
-        skip_target_indices.append(i)
-        skip_target_weighing_indices.append(i)
-
-    # Prepare output masks.
-    if not self.run_eagerly:
-      masks = [getattr(x, '_keras_mask', None) for x in self.outputs]
-      if not isinstance(masks, list):
-        masks = [masks]
-
-    # Prepare loss weights.
-    if loss_weights is None:
-      loss_weights_list = [1. for _ in range(len(self.outputs))]
-    elif isinstance(loss_weights, dict):
-      for name in loss_weights:
-        if name not in self.output_names:
-          raise ValueError(
-              'Unknown entry in loss_weights '
-              'dictionary: "' + name + '". '
-              'Only expected the following keys: ' + str(self.output_names))
-      loss_weights_list = []
-      for name in self.output_names:
-        loss_weights_list.append(loss_weights.get(name, 1.))
-    elif isinstance(loss_weights, list):
-      if len(loss_weights) != len(self.outputs):
-        raise ValueError(
-            'When passing a list as loss_weights, '
-            'it should have one entry per model output. '
-            'The model has ' + str(len(self.outputs)) +
-            ' outputs, but you passed loss_weights=' + str(loss_weights))
-      loss_weights_list = loss_weights
-    else:
-      raise TypeError('Could not interpret loss_weights argument: ' +
-                      str(loss_weights) + ' - expected a list of dicts.')
-    self.loss_weights_list = loss_weights_list
-
-    # Initialization for Eager mode execution.
-    if self.run_eagerly:
-      # Prepare sample weights.
-      self._set_sample_weight_attributes(sample_weight_mode,
-                                         skip_target_weighing_indices)
-      # Save all metric attributes per output of the model.
-      self._cache_output_metric_attributes(metrics, weighted_metrics)
-
-      if target_tensors is not None:
-        raise ValueError('target_tensors are not currently supported in Eager '
-                         'mode.')
-      self.total_loss = None
-      for i in range(len(self.outputs)):
-        if len(self.outputs) > 1:
-          self._compile_metrics_names.append(self.output_names[i] + '_loss')
+        x: Input data. It could be:
+          - A Numpy array (or array-like), or a list of arrays
+            (in case the model has multiple inputs).
+          - A TensorFlow tensor, or a list of tensors
+            (in case the model has multiple inputs).
+          - A dict mapping input names to the corresponding array/tensors,
+            if the model has named inputs.
+          - A `tf.data` dataset or a dataset iterator. Should return a tuple
+            of either `(inputs, targets)` or
+            `(inputs, targets, sample_weights)`.
+          - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
+            or `(inputs, targets, sample weights)`.
+        y: Target data. Like the input data `x`,
+          it could be either Numpy array(s) or TensorFlow tensor(s).
+          It should be consistent with `x` (you cannot have Numpy inputs and
+          tensor targets, or inversely). If `x` is a dataset, dataset
+          iterator, generator, or `keras.utils.Sequence` instance, `y` should
+          not be specified (since targets will be obtained from `x`).
+        batch_size: Integer or `None`.
+            Number of samples per gradient update.
+            If unspecified, `batch_size` will default to 32.
+            Do not specify the `batch_size` if your data is in the
+            form of symbolic tensors, dataset, dataset iterators,
+            generators, or `keras.utils.Sequence` instances (since they generate
+            batches).
+        epochs: Integer. Number of epochs to train the model.
+            An epoch is an iteration over the entire `x` and `y`
+            data provided.
+            Note that in conjunction with `initial_epoch`,
+            `epochs` is to be understood as "final epoch".
+            The model is not trained for a number of iterations
+            given by `epochs`, but merely until the epoch
+            of index `epochs` is reached.
+        verbose: Integer. 0, 1, or 2. Verbosity mode.
+            0 = silent, 1 = progress bar, 2 = one line per epoch.
+        callbacks: List of `keras.callbacks.Callback` instances.
+            List of callbacks to apply during training.
+            See [callbacks](/api_docs/python/tf/keras/callbacks).
+        validation_split: Float between 0 and 1.
+            Fraction of the training data to be used as validation data.
+            The model will set apart this fraction of the training data,
+            will not train on it, and will evaluate
+            the loss and any model metrics
+            on this data at the end of each epoch.
+            The validation data is selected from the last samples
+            in the `x` and `y` data provided, before shuffling. This argument is
+            not supported when `x` is a dataset, dataset iterator, generator or
+           `keras.utils.Sequence` instance.
+        validation_data: Data on which to evaluate
+            the loss and any model metrics at the end of each epoch.
+            The model will not be trained on this data.
+            `validation_data` will override `validation_split`.
+            `validation_data` could be:
+              - tuple `(x_val, y_val)` of Numpy arrays or tensors
+              - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
+              - dataset or a dataset iterator
+            For the first two cases, `batch_size` must be provided.
+            For the last case, `validation_steps` must be provided.
+        shuffle: Boolean (whether to shuffle the training data
+            before each epoch) or str (for 'batch').
+            'batch' is a special option for dealing with the
+            limitations of HDF5 data; it shuffles in batch-sized chunks.
+            Has no effect when `steps_per_epoch` is not `None`.
+        class_weight: Optional dictionary mapping class indices (integers)
+            to a weight (float) value, used for weighting the loss function
+            (during training only).
+            This can be useful to tell the model to
+            "pay more attention" to samples from
+            an under-represented class.
+        sample_weight: Optional Numpy array of weights for
+            the training samples, used for weighting the loss function
+            (during training only). You can either pass a flat (1D)
+            Numpy array with the same length as the input samples
+            (1:1 mapping between weights and samples),
+            or in the case of temporal data,
+            you can pass a 2D array with shape
+            `(samples, sequence_length)`,
+            to apply a different weight to every timestep of every sample.
+            In this case you should make sure to specify
+            `sample_weight_mode="temporal"` in `compile()`. This argument is not
+            supported when `x` is a dataset, dataset iterator, generator, or
+           `keras.utils.Sequence` instance, instead provide the sample_weights
+            as the third element of `x`.
+        initial_epoch: Integer.
+            Epoch at which to start training
+            (useful for resuming a previous training run).
+        steps_per_epoch: Integer or `None`.
+            Total number of steps (batches of samples)
+            before declaring one epoch finished and starting the
+            next epoch. When training with input tensors such as
+            TensorFlow data tensors, the default `None` is equal to
+            the number of samples in your dataset divided by
+            the batch size, or 1 if that cannot be determined.
+        validation_steps: Only relevant if `validation_data` is provided and
+            is a dataset or dataset iterator. Total number of steps (batches of
+            samples) to draw before stopping when performing validation
+            at the end of every epoch.
+        max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
+            input only. Maximum size for the generator queue.
+            If unspecified, `max_queue_size` will default to 10.
+        workers: Integer. Used for generator or `keras.utils.Sequence` input
+            only. Maximum number of processes to spin up
+            when using process-based threading. If unspecified, `workers`
+            will default to 1. If 0, will execute the generator on the main
+            thread.
+        use_multiprocessing: Boolean. Used for generator or
+            `keras.utils.Sequence` input only. If `True`, use process-based
+            threading. If unspecified, `use_multiprocessing` will default to
+            `False`. Note that because this implementation relies on
+            multiprocessing, you should not pass non-picklable arguments to
+            the generator as they can't be passed easily to children processes.
+        **kwargs: Used for backwards compatibility.
 
-      # Set metric attributes on model.
-      self._set_metric_attributes(
-          self.outputs,
-          skip_target_indices=skip_target_indices,
-      )
+    Returns:
+        A `History` object. Its `History.history` attribute is
+        a record of training loss values and metrics values
+        at successive epochs, as well as validation loss values
+        and validation metrics values (if applicable).
 
-      self.targets = []
-      for i in range(len(self.outputs)):
-        self._feed_output_names.append(self.output_names[i])
-      self._collected_trainable_weights = self.trainable_weights
-      return
+    Raises:
+        RuntimeError: If the model was never compiled.
+        ValueError: In case of mismatch between the provided input data
+            and what the model expects.
+    """
+    # TODO(fchollet): this method may be creating reference cycles, which would
+    # lead to accumulating garbage in memory when called in a loop. Investigate.
+    if data_utils.is_generator_or_sequence(x):
+      training_utils.check_generator_arguments(y, sample_weight)
+      return self.fit_generator(
+          x,
+          steps_per_epoch=steps_per_epoch,
+          epochs=epochs,
+          verbose=verbose,
+          callbacks=callbacks,
+          validation_data=validation_data,
+          validation_steps=validation_steps,
+          class_weight=class_weight,
+          max_queue_size=max_queue_size,
+          workers=workers,
+          use_multiprocessing=use_multiprocessing,
+          shuffle=shuffle,
+          initial_epoch=initial_epoch)
 
-    with K.get_graph().as_default():
-      # Prepare targets of model.
-      self.targets = []
-      self._feed_targets = []
-      if target_tensors not in (None, []):
-        if isinstance(target_tensors, list):
-          if len(target_tensors) != len(self.outputs):
-            raise ValueError(
-                'When passing a list as `target_tensors`, '
-                'it should have one entry per model output. '
-                'The model has %s outputs, but you passed target_tensors=%s' %
-                (len(self.outputs), target_tensors))
-        elif isinstance(target_tensors, dict):
-          for name in target_tensors:
-            if name not in self.output_names:
-              raise ValueError(
-                  'Unknown entry in `target_tensors` '
-                  'dictionary: "' + name + '". '
-                  'Only expected the following keys: ' + str(self.output_names))
-          tmp_target_tensors = []
-          for name in self.output_names:
-            tmp_target_tensors.append(target_tensors.get(name, None))
-          target_tensors = tmp_target_tensors
-        elif tensor_util.is_tensor(target_tensors):
-          target_tensors = [target_tensors]
-        else:
-          raise TypeError('Expected `target_tensors` to be a list or tuple or '
-                          'dict or a single tensor, but got:', target_tensors)
+    # Legacy support
+    if 'nb_epoch' in kwargs:
+      logging.warning(
+          'The `nb_epoch` argument in `fit` '
+          'has been renamed `epochs`.')
+      epochs = kwargs.pop('nb_epoch')
+    if kwargs:
+      raise TypeError('Unrecognized keyword arguments: ' + str(kwargs))
 
-      for i in range(len(self.outputs)):
-        if i in skip_target_indices:
-          self.targets.append(None)
-        else:
-          shape = K.int_shape(self.outputs[i])
-          name = self.output_names[i]
-          if target_tensors not in (None, []):
-            target = target_tensors[i]
-          else:
-            target = None
-          if target is None or K.is_placeholder(target):
-            if target is None:
-              target_dtype = losses.LABEL_DTYPES_FOR_LOSSES.get(
-                  self.loss_functions[i],
-                  K.dtype(self.outputs[i]))
+    # Validate and standardize user data.
+    if self._distribution_strategy:
+      distributed_training_utils.validate_callbacks(callbacks, self.optimizer,
+                                                    self._distribution_strategy)
 
-              target = K.placeholder(
-                  ndim=len(shape),
-                  name=name + '_target',
-                  sparse=K.is_sparse(self.outputs[i]),
-                  dtype=target_dtype)
-            self._feed_targets.append(target)
-            self._feed_outputs.append(self.outputs[i])
-            self._feed_output_names.append(name)
-            self._feed_output_shapes.append(shape)
-            self._feed_loss_fns.append(self.loss_functions[i])
-          else:
-            skip_target_weighing_indices.append(i)
-          self.targets.append(target)
+      distributed_training_utils.validate_inputs(
+          x, y, self._distribution_strategy)
 
-      # Prepare sample weights.
-      self._set_sample_weight_attributes(sample_weight_mode,
-                                         skip_target_weighing_indices)
-      # Save all metric attributes per output of the model.
-      self._cache_output_metric_attributes(metrics, weighted_metrics)
+      first_x_value = nest.flatten(x)[0]
+      if isinstance(first_x_value, np.ndarray):
+        steps_per_epoch, batch_size = (
+            distributed_training_utils.get_input_params(
+                self._distribution_strategy, first_x_value, steps_per_epoch,
+                batch_size, is_training=True))
 
-      # Compute total loss.
-      total_loss = None
-      with K.name_scope('loss'):
-        for i in range(len(self.outputs)):
-          if i in skip_target_indices:
-            continue
-          y_true = self.targets[i]
-          y_pred = self.outputs[i]
-          loss_fn = loss_functions[i]
-          sample_weight = self.sample_weights[i]
-          mask = masks[i]
-          loss_weight = loss_weights_list[i]
-          with K.name_scope(self.output_names[i] + '_loss'):
-            if isinstance(loss_fn, losses.Loss):
-              if mask is not None:
-                mask = math_ops.cast(mask, y_pred.dtype)
-                # Update weights with mask.
-                if sample_weight is None:
-                  sample_weight = mask
-                else:
-                  # Update dimensions of weights to match with mask if possible.
-                  mask, _, sample_weight = squeeze_or_expand_dimensions(
-                      mask, None, sample_weight)
-                  sample_weight *= mask
-              output_loss = loss_fn(y_true, y_pred, sample_weight=sample_weight)
-            else:
-              weighted_loss = training_utils.weighted_masked_objective(loss_fn)
-              output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
+    batch_size = self._validate_or_infer_batch_size(batch_size, steps_per_epoch,
+                                                    x)
 
-          if len(self.outputs) > 1:
-            # Keep track of the un-aggregated loss result tensor.
-            self._compile_metrics_tensors[self.output_names[i] +
-                                          '_loss'] = output_loss
+    x, y, sample_weights = self._standardize_user_data(
+        x,
+        y,
+        sample_weight=sample_weight,
+        class_weight=class_weight,
+        batch_size=batch_size,
+        check_steps=True,
+        steps_name='steps_per_epoch',
+        steps=steps_per_epoch,
+        validation_split=validation_split,
+        shuffle=shuffle)
 
-            # Keep track of stateful result tensor and function for the loss.
-            loss_name = loss_fn.name if isinstance(
-                loss_fn, losses.Loss) else loss_fn.__name__
-            mean_wrapped_loss = metrics_module.MeanMetricWrapper(
-                loss_fn, name=loss_name)
-            result_tensor = training_utils.call_metric_function(
-                mean_wrapped_loss,
-                y_true,
-                y_pred,
-                weights=sample_weight,
-                mask=mask)
-            self._compile_stateful_metrics_tensors[self.output_names[i] +
-                                                   '_loss'] = result_tensor
-            self._compile_stateful_metric_functions.append(mean_wrapped_loss)
+    # Prepare validation data.
+    if validation_data:
+      if (isinstance(validation_data, iterator_ops.Iterator) or
+          isinstance(validation_data, iterator_ops.EagerIterator) or
+          isinstance(validation_data, dataset_ops.DatasetV2)):
+        val_x = validation_data
+        val_y = None
+        val_sample_weight = None
+      elif len(validation_data) == 2:
+        val_x, val_y = validation_data  # pylint: disable=unpacking-non-sequence
+        val_sample_weight = None
+      elif len(validation_data) == 3:
+        val_x, val_y, val_sample_weight = validation_data  # pylint: disable=unpacking-non-sequence
+      else:
+        raise ValueError(
+            'When passing a `validation_data` argument, '
+            'it must contain either 2 items (x_val, y_val), '
+            'or 3 items (x_val, y_val, val_sample_weights), '
+            'or alternatively it could be a dataset or a '
+            'dataset or a dataset iterator. '
+            'However we received `validation_data=%s`' % validation_data)
 
-            self._compile_metrics_names.append(self.output_names[i] + '_loss')
-          if total_loss is None:
-            total_loss = loss_weight * output_loss
-          else:
-            total_loss += loss_weight * output_loss
-        if total_loss is None:
-          if not self.losses:
-            raise ValueError('The model cannot be compiled '
-                             'because it has no loss to optimize.')
-          else:
-            total_loss = 0.
+      # Validate and standardize validation data.
+      if self._distribution_strategy:
+        distributed_training_utils.validate_inputs(
+            val_x, val_y, self._distribution_strategy)
+        first_valx_value = nest.flatten(val_x)[0]
+        if isinstance(first_valx_value, np.ndarray):
+          validation_steps, _ = distributed_training_utils.get_input_params(
+              self._distribution_strategy, first_valx_value, validation_steps,
+              batch_size)
 
-        # Add regularization penalties
-        # and other layer-specific losses.
-        for loss_tensor in self.losses:
-          total_loss += loss_tensor
+      val_x, val_y, val_sample_weights = self._standardize_user_data(
+          val_x,
+          val_y,
+          sample_weight=val_sample_weight,
+          batch_size=batch_size,
+          steps=validation_steps)
 
-      # Set metric attributes on model.
-      self._set_metric_attributes(
-          self.outputs,
-          skip_target_indices=skip_target_indices,
-      )
-      # Invoke metric functions for all the outputs.
-      self._handle_metrics(
-          self.outputs,
-          masks=masks,
-          targets=self.targets,
-          skip_target_indices=skip_target_indices,
-          sample_weights=self.sample_weights)
+    elif validation_split and 0. < validation_split < 1.:
+      if training_utils.has_symbolic_tensors(x):
+        raise ValueError('If your data is in the form of symbolic tensors, '
+                         'you cannot use `validation_split`.')
+      if hasattr(x[0], 'shape'):
+        split_at = int(x[0].shape[0] * (1. - validation_split))
+      else:
+        split_at = int(len(x[0]) * (1. - validation_split))
+      x, val_x = (slice_arrays(x, 0, split_at), slice_arrays(x, split_at))
+      y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at))
+      sample_weights, val_sample_weights = (slice_arrays(
+          sample_weights, 0, split_at), slice_arrays(sample_weights, split_at))
+    elif validation_steps:
+      val_x = []
+      val_y = []
+      val_sample_weights = []
+    else:
+      val_x = None
+      val_y = None
+      val_sample_weights = None
 
-      # Prepare gradient updates and state updates.
-      self.total_loss = total_loss
+    if (self.run_eagerly or (isinstance(x, iterator_ops.EagerIterator) and
+                             not self._distribution_strategy)):
+      return training_generator.fit_generator(
+          self, (x, y, sample_weights),
+          steps_per_epoch=steps_per_epoch,
+          batch_size=batch_size,
+          epochs=epochs,
+          shuffle=shuffle,
+          verbose=verbose,
+          callbacks=callbacks,
+          validation_data=validation_data,
+          validation_steps=validation_steps,
+          workers=0,
+          initial_epoch=initial_epoch)
+    elif distributed_training_utils.is_tpu_strategy(
+        self._distribution_strategy):
+      return training_distributed.experimental_fit_loop(
+          self,
+          x,
+          epochs=epochs,
+          verbose=verbose,
+          callbacks=callbacks,
+          val_iterator=val_x,
+          initial_epoch=initial_epoch,
+          steps_per_epoch=steps_per_epoch,
+          validation_steps=validation_steps)
+    else:
+      return training_arrays.fit_loop(
+          self,
+          x,
+          y,
+          sample_weights=sample_weights,
+          batch_size=batch_size,
+          epochs=epochs,
+          verbose=verbose,
+          callbacks=callbacks,
+          val_inputs=val_x,
+          val_targets=val_y,
+          val_sample_weights=val_sample_weights,
+          shuffle=shuffle,
+          initial_epoch=initial_epoch,
+          steps_per_epoch=steps_per_epoch,
+          validation_steps=validation_steps)
 
-      # Functions for train, test and predict will
-      # be compiled lazily when required.
-      # This saves time when the user is not using all functions.
-      self._function_kwargs = kwargs
+  def evaluate(self,
+               x=None,
+               y=None,
+               batch_size=None,
+               verbose=1,
+               sample_weight=None,
+               steps=None,
+               max_queue_size=10,
+               workers=1,
+               use_multiprocessing=False):
+    """Returns the loss value & metrics values for the model in test mode.
 
-      self._fit_function = None
-      self._eval_function = None
-      self.train_function = None
-      self.test_function = None
-      self.predict_function = None
+    Computation is done in batches.
 
-      # Collected trainable weights, sorted in topological order.
-      trainable_weights = self.trainable_weights
-      self._collected_trainable_weights = trainable_weights
+    Arguments:
+        x: Input data. It could be:
+          - A Numpy array (or array-like), or a list of arrays
+            (in case the model has multiple inputs).
+          - A TensorFlow tensor, or a list of tensors
+            (in case the model has multiple inputs).
+          - A dict mapping input names to the corresponding array/tensors,
+            if the model has named inputs.
+          - A `tf.data` dataset or a dataset iterator.
+          - A generator or `keras.utils.Sequence` instance.
+        y: Target data. Like the input data `x`,
+          it could be either Numpy array(s) or TensorFlow tensor(s).
+          It should be consistent with `x` (you cannot have Numpy inputs and
+          tensor targets, or inversely).
+          If `x` is a dataset, dataset iterator, generator or
+          `keras.utils.Sequence` instance, `y` should not be specified (since
+          targets will be obtained from the iterator/dataset).
+        batch_size: Integer or `None`.
+            Number of samples per gradient update.
+            If unspecified, `batch_size` will default to 32.
+            Do not specify the `batch_size` is your data is in the
+            form of symbolic tensors, dataset, dataset iterators,
+            generators, or `keras.utils.Sequence` instances (since they generate
+            batches).
+        verbose: 0 or 1. Verbosity mode.
+            0 = silent, 1 = progress bar.
+        sample_weight: Optional Numpy array of weights for
+            the test samples, used for weighting the loss function.
+            You can either pass a flat (1D)
+            Numpy array with the same length as the input samples
+            (1:1 mapping between weights and samples),
+            or in the case of temporal data,
+            you can pass a 2D array with shape
+            `(samples, sequence_length)`,
+            to apply a different weight to every timestep of every sample.
+            In this case you should make sure to specify
+            `sample_weight_mode="temporal"` in `compile()`. This argument is not
+            supported when `x` is a dataset or a dataset iterator, instead pass
+            sample weights as the third element of `x`.
+        steps: Integer or `None`.
+            Total number of steps (batches of samples)
+            before declaring the evaluation round finished.
+            Ignored with the default value of `None`.
+        max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
+            input only. Maximum size for the generator queue.
+            If unspecified, `max_queue_size` will default to 10.
+        workers: Integer. Used for generator or `keras.utils.Sequence` input
+            only. Maximum number of processes to spin up when using
+            process-based threading. If unspecified, `workers` will default
+            to 1. If 0, will execute the generator on the main thread.
+        use_multiprocessing: Boolean. Used for generator or
+            `keras.utils.Sequence` input only. If `True`, use process-based
+            threading. If unspecified, `use_multiprocessing` will default to
+            `False`. Note that because this implementation relies on
+            multiprocessing, you should not pass non-picklable arguments to
+            the generator as they can't be passed easily to children processes.
 
-  def _check_trainable_weights_consistency(self):
-    """Check trainable weights count consistency.
+    Returns:
+        Scalar test loss (if the model has a single output and no metrics)
+        or list of scalars (if the model has multiple outputs
+        and/or metrics). The attribute `model.metrics_names` will give you
+        the display labels for the scalar outputs.
 
-    This will raise a warning if `trainable_weights` and
-    `_collected_trainable_weights` are inconsistent (i.e. have different
-    number of parameters).
-    Inconsistency will typically arise when one modifies `model.trainable`
-    without calling `model.compile` again.
+    Raises:
+        ValueError: in case of invalid arguments.
     """
-    if not hasattr(self, '_collected_trainable_weights'):
-      return
-
-    if len(self.trainable_weights) != len(self._collected_trainable_weights):
-      logging.log_first_n(
-          logging.WARN, 'Discrepancy between trainable weights and collected'
-          ' trainable weights, did you set `model.trainable`'
-          ' without calling `model.compile` after ?', 1)
-
-  def _make_train_function_helper(self, fn_name, outputs, metric_updates=None):
-    if not hasattr(self, fn_name):
-      raise RuntimeError('You must compile your model before using it.')
-    self._check_trainable_weights_consistency()
-    if getattr(self, fn_name) is None:
-      inputs = (self._feed_inputs +
-                self._feed_targets +
-                self._feed_sample_weights)
-      if not isinstance(K.symbolic_learning_phase(), int):
-        inputs += [K.symbolic_learning_phase()]
+    if data_utils.is_generator_or_sequence(x):
+      training_utils.check_generator_arguments(y, sample_weight)
+      return self.evaluate_generator(
+          x,
+          steps=steps,
+          verbose=verbose,
+          max_queue_size=max_queue_size,
+          workers=workers,
+          use_multiprocessing=use_multiprocessing)
+    # Validate and standardize user data.
+    if self._distribution_strategy:
+      distributed_training_utils.validate_inputs(
+          x, y, self._distribution_strategy)
+      first_x_value = nest.flatten(x)[0]
+      if isinstance(first_x_value, np.ndarray):
+        steps, batch_size = distributed_training_utils.get_input_params(
+            self._distribution_strategy, first_x_value, steps, batch_size)
 
-      with K.get_graph().as_default():
-        with K.name_scope('training'):
-          with K.name_scope(self.optimizer.__class__.__name__):
-            # Training updates
-            updates = self.optimizer.get_updates(
-                params=self._collected_trainable_weights, loss=self.total_loss)
-      # Unconditional updates
-      updates += self.get_updates_for(None)
-      # Conditional updates relevant to this model
-      updates += self.get_updates_for(self.inputs)
-      # Add stateful metrics updates.
-      if metric_updates is not None:
-        updates += metric_updates
+    batch_size = self._validate_or_infer_batch_size(batch_size, steps, x)
 
-      with K.name_scope('training'):
-        # Gets loss and metrics. Updates weights at each call.
-        fn = K.function(
-            inputs,
-            outputs,
-            updates=updates,
-            name='train_function',
-            **self._function_kwargs)
-        setattr(self, fn_name, fn)
+    x, y, sample_weights = self._standardize_user_data(
+        x,
+        y,
+        sample_weight=sample_weight,
+        batch_size=batch_size,
+        check_steps=True,
+        steps_name='steps',
+        steps=steps)
 
-  def _make_train_function(self):
-    metrics_tensors = [
-        self._all_metrics_tensors[m] for m in self.metrics_names[1:]
-    ]
-    self._make_train_function_helper('train_function',
-                                     [self.total_loss] + metrics_tensors)
+    if (self.run_eagerly or (isinstance(x, iterator_ops.EagerIterator) and
+                             not self._distribution_strategy)):
+      return training_generator.evaluate_generator(
+          self, (x, y, sample_weights),
+          steps=steps,
+          batch_size=batch_size,
+          verbose=verbose,
+          workers=0)
+    elif distributed_training_utils.is_tpu_strategy(
+        self._distribution_strategy):
+      return training_distributed.experimental_test_loop(
+          self, iterator=x, verbose=verbose, steps=steps)
+    else:
+      return training_arrays.test_loop(
+          self,
+          inputs=x,
+          targets=y,
+          sample_weights=sample_weights,
+          batch_size=batch_size,
+          verbose=verbose,
+          steps=steps)
 
-  def _make_fit_function(self):
-    metrics_tensors = [
-        self._all_stateful_metrics_tensors[m] for m in self.metrics_names[1:]
-    ]
-    self._make_train_function_helper(
-        '_fit_function', [self.total_loss] + metrics_tensors)
+  def predict(self,
+              x,
+              batch_size=None,
+              verbose=0,
+              steps=None,
+              max_queue_size=10,
+              workers=1,
+              use_multiprocessing=False):
+    """Generates output predictions for the input samples.
 
-  def _make_test_function_helper(self, fn_name, outputs, metric_updates=None):
-    if not hasattr(self, fn_name):
-      raise RuntimeError('You must compile your model before using it.')
-    if getattr(self, fn_name) is None:
-      inputs = (self._feed_inputs +
-                self._feed_targets +
-                self._feed_sample_weights)
+    Computation is done in batches.
 
-      with K.name_scope('evaluation'):
-        updates = self.state_updates
-        # Add stateful metrics updates.
-        if metric_updates is not None:
-          updates += metric_updates
-        # Return loss and metrics, no gradient updates.
-        # Does update the network states.
-        fn = K.function(
-            inputs,
-            outputs,
-            updates=updates,
-            name='test_function',
-            **self._function_kwargs)
-        setattr(self, fn_name, fn)
+    Arguments:
+         x: Input samples. It could be:
+          - A Numpy array (or array-like), or a list of arrays
+            (in case the model has multiple inputs).
+          - A TensorFlow tensor, or a list of tensors
+            (in case the model has multiple inputs).
+          - A `tf.data` dataset or a dataset iterator.
+          - A generator or `keras.utils.Sequence` instance.
+        batch_size: Integer or `None`.
+            Number of samples per gradient update.
+            If unspecified, `batch_size` will default to 32.
+            Do not specify the `batch_size` is your data is in the
+            form of symbolic tensors, dataset, dataset iterators,
+            generators, or `keras.utils.Sequence` instances (since they generate
+            batches).
+        verbose: Verbosity mode, 0 or 1.
+        steps: Total number of steps (batches of samples)
+            before declaring the prediction round finished.
+            Ignored with the default value of `None`.
+        max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
+            input only. Maximum size for the generator queue.
+            If unspecified, `max_queue_size` will default to 10.
+        workers: Integer. Used for generator or `keras.utils.Sequence` input
+            only. Maximum number of processes to spin up when using
+            process-based threading. If unspecified, `workers` will default
+            to 1. If 0, will execute the generator on the main thread.
+        use_multiprocessing: Boolean. Used for generator or
+            `keras.utils.Sequence` input only. If `True`, use process-based
+            threading. If unspecified, `use_multiprocessing` will default to
+            `False`. Note that because this implementation relies on
+            multiprocessing, you should not pass non-picklable arguments to
+            the generator as they can't be passed easily to children processes.
 
-  def _make_test_function(self):
-    metrics_tensors = [
-        self._all_metrics_tensors[m] for m in self.metrics_names[1:]
-    ]
-    self._make_test_function_helper('test_function',
-                                    [self.total_loss] + metrics_tensors)
 
-  def _make_eval_function(self):
-    metrics_tensors = [
-        self._all_stateful_metrics_tensors[m] for m in self.metrics_names[1:]
-    ]
-    self._make_test_function_helper(
-        '_eval_function', [self.total_loss] + metrics_tensors)
+    Returns:
+        Numpy array(s) of predictions.
 
-  def _make_predict_function(self):
-    if not hasattr(self, 'predict_function'):
-      self.predict_function = None
-    if self.predict_function is None:
-      inputs = self._feed_inputs
-      # Gets network outputs. Does not update weights.
-      # Does update the network states.
-      kwargs = getattr(self, '_function_kwargs', {})
-      with K.name_scope('predict'):
-        self.predict_function = K.function(
-            inputs,
-            self.outputs,
-            updates=self.state_updates,
-            name='predict_function',
-            **kwargs)
+    Raises:
+        ValueError: In case of mismatch between the provided
+            input data and the model's expectations,
+            or in case a stateful model receives a number of samples
+            that is not a multiple of the batch size.
+    """
+    if data_utils.is_generator_or_sequence(x):
+      return self.predict_generator(
+          x,
+          steps=steps,
+          verbose=verbose,
+          max_queue_size=max_queue_size,
+          workers=workers,
+          use_multiprocessing=use_multiprocessing)
+    if self._distribution_strategy:
+      distributed_training_utils.validate_inputs(
+          x, None, self._distribution_strategy)
+      first_x_value = nest.flatten(x)[0]
+      if isinstance(first_x_value, np.ndarray):
+        steps, batch_size = distributed_training_utils.get_input_params(
+            self._distribution_strategy, first_x_value, steps, batch_size)
 
-  def _make_execution_function(self, mode):
-    if mode == 'train':
-      self._make_fit_function()
-      return self._fit_function
-    if mode == 'test':
-      self._make_eval_function()
-      return self._eval_function
-    if mode == 'predict':
-      self._make_predict_function()
-      return self.predict_function
+    batch_size = self._validate_or_infer_batch_size(batch_size, steps, x)
 
-  def _get_iterator_get_next_tensors(self, iterator):
-    get_next_op = self._iterator_get_next.get(iterator, None)
-    if get_next_op is None:
-      get_next_op = iterator.get_next()
-      self._iterator_get_next[iterator] = get_next_op
-    return get_next_op
+    # Validate and standardize user data.
+    if self._distribution_strategy:
+      x, _, _ = self._standardize_user_data(
+          x, check_steps=True, steps_name='steps', steps=steps,
+          batch_size=batch_size)
+    else:
+      # TODO(anjalisridhar): We don't pass batch_size here for some reason. This
+      # means we need to special case distribution strategy which needs the
+      # batch size.
+      x, _, _ = self._standardize_user_data(
+          x, check_steps=True, steps_name='steps', steps=steps)
 
-  def _distribution_standardize_user_data(self,
-                                          x,
-                                          y=None,
-                                          sample_weight=None,
-                                          class_weight=None,
-                                          batch_size=None,
-                                          check_steps=False,
-                                          steps_name='steps',
-                                          steps=None,
-                                          validation_split=0,
-                                          shuffle=False):
-    """Runs validation checks on input and target data passed by the user.
+    if (self.run_eagerly or (isinstance(x, iterator_ops.EagerIterator) and
+                             not self._distribution_strategy)):
+      return training_generator.predict_generator(
+          self,
+          x,
+          steps=steps,
+          batch_size=batch_size,
+          verbose=verbose,
+          workers=0)
+    elif distributed_training_utils.is_tpu_strategy(
+        self._distribution_strategy):
+      return training_distributed.experimental_predict_loop(
+          self, x, verbose=verbose, steps=steps)
+    else:
+      return training_arrays.predict_loop(
+          self, x, batch_size=batch_size, verbose=verbose, steps=steps)
 
-    This is called when using DistributionStrategy to train, evaluate or serve
-    the model.
+  def reset_metrics(self):
+    """Resets the state of metrics."""
+    if hasattr(self, 'metrics'):
+      for m in self.metrics:
+        m.reset_states()
+      if self._distribution_strategy:
+        training_distributed._reset_metrics(self)  # pylint: disable=protected-access
 
-    Args:
-      x: Input data. A numpy array or `tf.data` dataset.
-      y: Target data. A numpy array or None if x is a `tf.data` dataset.
-      sample_weight: An optional sample-weight array passed by the user to
-        weight the importance of each sample in `x`.
-      class_weight: An optional class-weight array by the user to
-        weight the importance of samples in `x` based on the class they belong
-        to, as conveyed by `y`.
-      batch_size: Integer batch size. If provided, it is used to run additional
-        validation checks on stateful models.
-      check_steps: boolean, True if we want to check for validity of `steps` and
-        False, otherwise.
-      steps_name: The public API's parameter name for `steps`.
-      steps: Integer or `None`. Total number of steps (batches of samples) to
-        execute.
-      validation_split: Float between 0 and 1.
-        Fraction of the training data to be used as validation data.
-      shuffle: Boolean whether to shuffle the training data before each epoch.
+  def train_on_batch(self,
+                     x,
+                     y=None,
+                     sample_weight=None,
+                     class_weight=None,
+                     reset_metrics=True):
+    """Runs a single gradient update on a single batch of data.
+
+    Arguments:
+        x: Input data. It could be:
+          - A Numpy array (or array-like), or a list of arrays
+              (in case the model has multiple inputs).
+          - A TensorFlow tensor, or a list of tensors
+              (in case the model has multiple inputs).
+          - A dict mapping input names to the corresponding array/tensors,
+              if the model has named inputs.
+          - A `tf.data` dataset or a dataset iterator.
+        y: Target data. Like the input data `x`, it could be either Numpy
+          array(s) or TensorFlow tensor(s). It should be consistent with `x`
+          (you cannot have Numpy inputs and tensor targets, or inversely). If
+          `x` is a dataset or a dataset iterator, `y` should not be specified
+          (since targets will be obtained from the iterator).
+        sample_weight: Optional array of the same length as x, containing
+          weights to apply to the model's loss for each sample. In the case of
+          temporal data, you can pass a 2D array with shape (samples,
+          sequence_length), to apply a different weight to every timestep of
+          every sample. In this case you should make sure to specify
+          sample_weight_mode="temporal" in compile(). This argument is not
+          supported when `x` is a dataset or a dataset iterator.
+        class_weight: Optional dictionary mapping class indices (integers) to a
+          weight (float) to apply to the model's loss for the samples from this
+          class during training. This can be useful to tell the model to "pay
+          more attention" to samples from an under-represented class.
+        reset_metrics: If `True`, the metrics returned will be only for this
+          batch. If `False`, the metrics will be statefully accumulated across
+          batches.
 
     Returns:
-      Iterator for reading the dataset `x`.
+        Scalar training loss
+        (if the model has a single output and no metrics)
+        or list of scalars (if the model has multiple outputs
+        and/or metrics). The attribute `model.metrics_names` will give you
+        the display labels for the scalar outputs.
 
     Raises:
-      ValueError: In case of invalid user-provided data.
-      RuntimeError: If the model was never compiled.
+      ValueError: In case of invalid user-provided arguments.
     """
-    if class_weight:
-      raise NotImplementedError('`class_weight` is currently not supported '
-                                'when using DistributionStrategy.')
-
-    if (sample_weight is not None and sample_weight.all() and
-        distributed_training_utils.is_tpu_strategy(
-            self._distribution_strategy)):
-      raise NotImplementedError('`sample_weight` is currently not supported '
-                                'when using TPUStrategy.')
+    if self._distribution_strategy:
+      raise NotImplementedError('`train_on_batch` is not supported for models '
+                                'compiled with DistributionStrategy.')
+    # Validate and standardize user data.
+    x, y, sample_weights = self._standardize_user_data(
+        x, y, sample_weight=sample_weight, class_weight=class_weight)
 
-    # Validates `steps` argument right at the beginning since we use it to
-    # construct the dataset object.
-    # TODO(anjalisridhar): Remove this check once we refactor the
-    # _standardize_user_data code path. This check is already present elsewhere
-    # in the codebase.
-    if check_steps and isinstance(x, dataset_ops.DatasetV2) and steps is None:
-      raise ValueError('When using Datasets as input, '
-                       'you should specify the `{steps_name}` argument.'
-                       .format(steps_name=steps_name))
+    if self.run_eagerly:
+      outputs = training_eager.train_on_batch(
+          self, x, y, sample_weights=sample_weights)
+    else:
+      if not isinstance(K.symbolic_learning_phase(), int):
+        ins = x + y + sample_weights + [True]
+      else:
+        ins = x + y + sample_weights
 
-    first_x_value = nest.flatten(x)[0]
-    if isinstance(first_x_value, np.ndarray):
-      # We need to use the drop_remainder argument to allow for a static
-      # input shape which is required for TPUs.
-      drop_remainder = self._distribution_strategy.require_static_shapes
-      if y is not None:
-        var_x = distributed_training_utils.get_var_for_numpy(
-            self._distribution_strategy, x)
-        var_y = distributed_training_utils.get_var_for_numpy(
-            self._distribution_strategy, y)
-        if sample_weight is not None:
-          var_sample_weights = distributed_training_utils.get_var_for_numpy(
-              self._distribution_strategy, sample_weight)
+      if reset_metrics:
+        self._make_train_function()
+        outputs = self.train_function(ins)  # pylint: disable=not-callable
+      else:
+        self._make_fit_function()
+        outputs = self._fit_function(ins)  # pylint: disable=not-callable
 
-          x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y,
-                                                      var_sample_weights))
-        else:
-          x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y))
+    if reset_metrics:
+      self.reset_metrics()
 
-        x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y))
-        if shuffle:
-          # 1024 is a good buffer size since it is much larger than the average
-          # batch size provided by the user and provides sufficient randomness.
-          # One thing to keep in mind is the memory usage based on the size of
-          # each sample.
-          x = x.shuffle(1024)
-        x = x.repeat()
-        x = x.batch(batch_size, drop_remainder=drop_remainder)
-        y = None
-        sample_weight = None
-      else:
-        # This case is for the predict call where the dataset only contains
-        # inputs and no targets, i.e. it does not return a tuple
-        var_x = distributed_training_utils.get_var_for_numpy(
-            self._distribution_strategy, x)
-        x = dataset_ops.Dataset.from_tensor_slices(var_x)
-        x = x.batch(batch_size, drop_remainder=drop_remainder)
+    if len(outputs) == 1:
+      return outputs[0]
+    return outputs
 
-    assert isinstance(x, dataset_ops.DatasetV2)
+  def test_on_batch(self, x, y=None, sample_weight=None, reset_metrics=True):
+    """Test the model on a single batch of samples.
 
-    with self._distribution_strategy.scope():
-      iterator = self._distribution_strategy.make_dataset_iterator(x)
-      init_op = iterator.initialize()
-      if not context.executing_eagerly():
-        K.get_session().run(init_op)
+    Arguments:
+        x: Input data. It could be:
+          - A Numpy array (or array-like), or a list of arrays
+            (in case the model has multiple inputs).
+          - A TensorFlow tensor, or a list of tensors
+            (in case the model has multiple inputs).
+          - A dict mapping input names to the corresponding array/tensors,
+            if the model has named inputs.
+          - A `tf.data` dataset or a dataset iterator.
+        y: Target data. Like the input data `x`,
+          it could be either Numpy array(s) or TensorFlow tensor(s).
+          It should be consistent with `x` (you cannot have Numpy inputs and
+          tensor targets, or inversely). If `x` is a dataset or a
+          dataset iterator, `y` should not be specified
+          (since targets will be obtained from the iterator).
+        sample_weight: Optional array of the same length as x, containing
+            weights to apply to the model's loss for each sample.
+            In the case of temporal data, you can pass a 2D array
+            with shape (samples, sequence_length),
+            to apply a different weight to every timestep of every sample.
+            In this case you should make sure to specify
+            sample_weight_mode="temporal" in compile(). This argument is not
+            supported when `x` is a dataset or a dataset iterator.
+        reset_metrics: If `True`, the metrics returned will be only for this
+          batch. If `False`, the metrics will be statefully accumulated across
+          batches.
 
-    training_utils.validate_iterator_input(x, y, sample_weight,
-                                           validation_split)
-    return iterator
+    Returns:
+        Scalar test loss (if the model has a single output and no metrics)
+        or list of scalars (if the model has multiple outputs
+        and/or metrics). The attribute `model.metrics_names` will give you
+        the display labels for the scalar outputs.
 
-  def _standardize_user_data(self,
-                             x,
-                             y=None,
-                             sample_weight=None,
-                             class_weight=None,
-                             batch_size=None,
-                             check_steps=False,
-                             steps_name='steps',
-                             steps=None,
-                             validation_split=0,
-                             shuffle=False):
-    """Runs validation checks on input and target data passed by the user.
+    Raises:
+        ValueError: In case of invalid user-provided arguments.
+    """
+    if self._distribution_strategy:
+      raise NotImplementedError('`test_on_batch` is not supported for models '
+                                'compiled with DistributionStrategy.')
+    # Validate and standardize user data.
+    x, y, sample_weights = self._standardize_user_data(
+        x, y, sample_weight=sample_weight)
 
-    Also standardizes the data to lists of arrays, in order.
+    if self.run_eagerly:
+      outputs = training_eager.test_on_batch(
+          self, x, y, sample_weights=sample_weights)
+    else:
+      inputs = x + y + sample_weights
+      if reset_metrics:
+        self._make_test_function()
+        outputs = self.test_function(inputs)  # pylint: disable=not-callable
+      else:
+        self._make_eval_function()
+        outputs = self._eval_function(inputs)  # pylint: disable=not-callable
 
-    Also builds and compiles the model on the fly if it is a subclassed model
-    that has never been called before (and thus has no inputs/outputs).
+    if reset_metrics:
+      self.reset_metrics()
 
-    This is a purely internal method, subject to refactoring at any time.
+    if len(outputs) == 1:
+      return outputs[0]
+    return outputs
 
-    Args:
-      x: Input data. It could be:
-        - A Numpy array (or array-like), or a list of arrays
-          (in case the model has multiple inputs).
-        - A TensorFlow tensor, or a list of tensors
-          (in case the model has multiple inputs).
-        - A dict mapping input names to the corresponding array/tensors,
-          if the model has named inputs.
-        - A `tf.data` dataset or a dataset iterator.
-      y: Target data. Like the input data `x`,
-        it could be either Numpy array(s) or TensorFlow tensor(s).
-        It should be consistent with `x` (you cannot have Numpy inputs and
-        tensor targets, or inversely). If `x` is a dataset or a
-        dataset iterator, `y` should not be specified
-        (since targets will be obtained from the iterator).
-      sample_weight: An optional sample-weight array passed by the user to
-        weight the importance of each sample in `x`.
-      class_weight: An optional class-weight array by the user to
-        weight the importance of samples in `x` based on the class they belong
-        to, as conveyed by `y`.
-      batch_size: Integer batch size. If provided, it is used to run additional
-        validation checks on stateful models.
-      check_steps: boolean, True if we want to check for validity of `steps` and
-        False, otherwise. For example, when we are standardizing one batch of
-        data for train_on_batch/predict_on_batch/test_on_batch APIs, `steps`
-        value is not required and we should not check for its validity in these
-        cases.
-      steps_name: The public API's parameter name for `steps`.
-      steps: Integer or `None`. Total number of steps (batches of samples) to
-        execute.
-      validation_split: Float between 0 and 1.
-        Fraction of the training data to be used as validation data.
-      shuffle: Boolean whether to shuffle the training data before each epoch.
+  def predict_on_batch(self, x):
+    """Returns predictions for a single batch of samples.
+
+    Arguments:
+        x: Input data. It could be:
+          - A Numpy array (or array-like), or a list of arrays
+            (in case the model has multiple inputs).
+          - A TensorFlow tensor, or a list of tensors
+            (in case the model has multiple inputs).
+          - A `tf.data` dataset or a dataset iterator.
 
     Returns:
-      A tuple of 3: inputs (arrays or dicts, depending on whether `x` was a dict
-      or not), target arrays, sample-weight arrays.
-      If the model's input and targets are symbolic, these lists are empty
-      (since the model takes no user-provided data, instead the data comes
-      from the symbolic inputs/targets).
+        Numpy array(s) of predictions.
 
     Raises:
-      ValueError: In case of invalid user-provided data.
-      RuntimeError: If the model was never compiled.
+        ValueError: In case of mismatch between given number of inputs and
+          expectations of the model.
     """
     if self._distribution_strategy:
-      iterator = self._distribution_standardize_user_data(
-          x,
-          y,
-          sample_weight=sample_weight,
-          class_weight=class_weight,
-          batch_size=batch_size,
-          check_steps=check_steps,
-          steps_name=steps_name,
-          steps=steps,
-          validation_split=validation_split,
-          shuffle=shuffle)
-      return iterator, None, None
+      raise NotImplementedError('`predict_on_batch` is not supported for '
+                                'models compiled with DistributionStrategy.')
+    # Validate and standardize user data.
+    inputs, _, _ = self._standardize_user_data(x)
+    if self.run_eagerly:
+      if (isinstance(inputs, iterator_ops.EagerIterator) or
+          (isinstance(inputs, dataset_ops.DatasetV2))):
+        inputs = training_utils.cast_if_floating_dtype(inputs)
+      elif isinstance(inputs, collections.Sequence):
+        inputs = [
+            ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs]
+      return self(inputs)  # pylint: disable=not-callable
 
-    if isinstance(x, dataset_ops.DatasetV2):
-      if context.executing_eagerly():
-        x = iter(x)
-      else:
-        if x in self._dataset_iterator_cache:
-          x = self._dataset_iterator_cache[x]
-        else:
-          iterator = dataset_ops.make_initializable_iterator(x)
-          self._dataset_iterator_cache[x] = iterator
-          x = iterator
-        K.get_session().run(x.initializer)
+    self._make_predict_function()
+    outputs = self.predict_function(inputs)
 
-    # Validates `steps` argument based on x's type.
-    if check_steps:
-      training_utils.check_steps_argument(x, steps, steps_name)
+    if len(outputs) == 1:
+      return outputs[0]
+    return outputs
 
-    is_x_eager_iterator = isinstance(x, iterator_ops.EagerIterator)
-    is_x_iterator = isinstance(x, iterator_ops.Iterator)
+  def fit_generator(self,
+                    generator,
+                    steps_per_epoch=None,
+                    epochs=1,
+                    verbose=1,
+                    callbacks=None,
+                    validation_data=None,
+                    validation_steps=None,
+                    class_weight=None,
+                    max_queue_size=10,
+                    workers=1,
+                    use_multiprocessing=False,
+                    shuffle=True,
+                    initial_epoch=0):
+    """Fits the model on data yielded batch-by-batch by a Python generator.
+
+    The generator is run in parallel to the model, for efficiency.
+    For instance, this allows you to do real-time data augmentation
+    on images on CPU in parallel to training your model on GPU.
+
+    The use of `keras.utils.Sequence` guarantees the ordering
+    and guarantees the single use of every input per epoch when
+    using `use_multiprocessing=True`.
+
+    Arguments:
+        generator: A generator or an instance of `Sequence`
+          (`keras.utils.Sequence`)
+            object in order to avoid duplicate data
+            when using multiprocessing.
+            The output of the generator must be either
+            - a tuple `(inputs, targets)`
+            - a tuple `(inputs, targets, sample_weights)`.
+            This tuple (a single output of the generator) makes a single batch.
+            Therefore, all arrays in this tuple must have the same length (equal
+            to the size of this batch). Different batches may have different
+              sizes.
+            For example, the last batch of the epoch is commonly smaller than
+              the
+            others, if the size of the dataset is not divisible by the batch
+              size.
+            The generator is expected to loop over its data
+            indefinitely. An epoch finishes when `steps_per_epoch`
+            batches have been seen by the model.
+        steps_per_epoch: Total number of steps (batches of samples)
+            to yield from `generator` before declaring one epoch
+            finished and starting the next epoch. It should typically
+            be equal to the number of samples of your dataset
+            divided by the batch size.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
+        epochs: Integer, total number of iterations on the data.
+        verbose: Verbosity mode, 0, 1, or 2.
+        callbacks: List of callbacks to be called during training.
+        validation_data: This can be either
+            - a generator for the validation data
+            - a tuple (inputs, targets)
+            - a tuple (inputs, targets, sample_weights).
+        validation_steps: Only relevant if `validation_data`
+            is a generator. Total number of steps (batches of samples)
+            to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(validation_data)` as a number of steps.
+        class_weight: Dictionary mapping class indices to a weight
+            for the class.
+        max_queue_size: Integer. Maximum size for the generator queue.
+            If unspecified, `max_queue_size` will default to 10.
+        workers: Integer. Maximum number of processes to spin up
+            when using process-based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
+        use_multiprocessing: Boolean.
+            If `True`, use process-based threading.
+            If unspecified, `use_multiprocessing` will default to `False`.
+            Note that because this implementation relies on multiprocessing,
+            you should not pass non-picklable arguments to the generator
+            as they can't be passed easily to children processes.
+        shuffle: Boolean. Whether to shuffle the order of the batches at
+            the beginning of each epoch. Only used with instances
+            of `Sequence` (`keras.utils.Sequence`).
+            Has no effect when `steps_per_epoch` is not `None`.
+        initial_epoch: Epoch at which to start training
+            (useful for resuming a previous training run)
 
-    # Validate user inputs when data is given as a dataset or dataset iterator.
-    if is_x_iterator or is_x_eager_iterator:
-      training_utils.validate_iterator_input(x, y, sample_weight,
-                                             validation_split)
+    Returns:
+        A `History` object.
 
-    # For eager iterators, when we have to process multiple batches of samples,
-    # we will standardize the data when we actually loop over iterator and get
-    # the batches. For now, we just return the iterator as is.
-    if is_x_eager_iterator:
-      return x, y, sample_weight
+    Example:
 
-    # If input data is a dataset iterator in graph mode or if it is an eager
-    # iterator and only one batch of samples is required, we fetch the data
-    # tensors from the iterator and then standardize them.
-    if is_x_iterator or is_x_eager_iterator:
-      try:
-        if is_x_iterator:
-          next_element = self._get_iterator_get_next_tensors(x)
-        else:
-          next_element = x.get_next()
-      except errors.OutOfRangeError:
-        raise RuntimeError('Your dataset iterator ran out of data; '
-                           'Make sure that your dataset can generate '
-                           'required number of samples.')
+    ```python
+        def generate_arrays_from_file(path):
+            while 1:
+                f = open(path)
+                for line in f:
+                    # create numpy arrays of input data
+                    # and labels, from each line in the file
+                    x1, x2, y = process_line(line)
+                    yield ({'input_1': x1, 'input_2': x2}, {'output': y})
+                f.close()
 
-      if isinstance(next_element, (list, tuple)):
-        if len(next_element) not in [2, 3]:
-          raise ValueError(
-              'Please provide model inputs as a list or tuple of 2  or 3'
-              'elements: (input, target) or (input, target, sample_weights)'
-              'Received %s' % next_element)
-        if len(next_element) == 2:
-          x, y = next_element
-        else:
-          x, y, sample_weight = next_element
-      else:
-        x = next_element
-    x, y, sample_weights = self._standardize_weights(
-        x, y, sample_weight, class_weight, batch_size, is_x_iterator)
-    return x, y, sample_weights
+        model.fit_generator(generate_arrays_from_file('/my_file.txt'),
+                            steps_per_epoch=10000, epochs=10)
+    ```
+    Raises:
+        ValueError: In case the generator yields data in an invalid format.
+    """
+    if self._distribution_strategy:
+      raise NotImplementedError('`fit_generator` is not supported for '
+                                'models compiled with DistributionStrategy.')
+    return training_generator.fit_generator(
+        self,
+        generator,
+        steps_per_epoch=steps_per_epoch,
+        epochs=epochs,
+        verbose=verbose,
+        callbacks=callbacks,
+        validation_data=validation_data,
+        validation_steps=validation_steps,
+        class_weight=class_weight,
+        max_queue_size=max_queue_size,
+        workers=workers,
+        use_multiprocessing=use_multiprocessing,
+        shuffle=shuffle,
+        initial_epoch=initial_epoch)
 
-  def _standardize_weights(self,
-                           x,
-                           y,
-                           sample_weight=None,
-                           class_weight=None,
-                           batch_size=None,
-                           from_iterator=False):
-    """Standardize input data, target data, and weight values.
+  def evaluate_generator(self,
+                         generator,
+                         steps=None,
+                         max_queue_size=10,
+                         workers=1,
+                         use_multiprocessing=False,
+                         verbose=0):
+    """Evaluates the model on a data generator.
 
-    This method reformats all data passed to the model to an ordered list of
-    array/tensors, matching the order expected by the model. This also validates
-    the input and target data shapes.
+    The generator should return the same kind of data
+    as accepted by `test_on_batch`.
 
-    Args:
-      x: Input data. It could be:
-        - A Numpy array (or array-like), or a list of arrays
-          (in case the model has multiple inputs).
-        - A TensorFlow tensor, or a list of tensors
-          (in case the model has multiple inputs).
-        - A dict mapping input names to the corresponding array/tensors,
-          if the model has named inputs.
-        x cannot not be an iterator.
-      y: Target data. Like the input data `x`,
-        it could be either Numpy array(s) or TensorFlow tensor(s).
-        It should be consistent with `x` (you cannot have Numpy inputs and
-        tensor targets, or inversely).
-      sample_weight: An optional sample-weight array passed by the user to
-        weight the importance of each sample in `x`.
-      class_weight: An optional class-weight array by the user to
-        weight the importance of samples in `x` based on the class they belong
-        to, as conveyed by `y`.
-      batch_size: Integer batch size. If provided, it is used to run additional
-        validation checks on stateful models.
-      from_iterator: Whether x and y were obtained from an iterator.
+    Arguments:
+        generator: Generator yielding tuples (inputs, targets)
+            or (inputs, targets, sample_weights)
+            or an instance of `keras.utils.Sequence`
+            object in order to avoid duplicate data
+            when using multiprocessing.
+        steps: Total number of steps (batches of samples)
+            to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
+        max_queue_size: maximum size for the generator queue
+        workers: Integer. Maximum number of processes to spin up
+            when using process-based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
+        use_multiprocessing: Boolean.
+            If `True`, use process-based threading.
+            If unspecified, `use_multiprocessing` will default to `False`.
+            Note that because this implementation relies on multiprocessing,
+            you should not pass non-picklable arguments to the generator
+            as they can't be passed easily to children processes.
+        verbose: Verbosity mode, 0 or 1.
 
     Returns:
-      Tuple of standardized data that will be fed to the model:
-        (input data, target data, sample weights)
+        Scalar test loss (if the model has a single output and no metrics)
+        or list of scalars (if the model has multiple outputs
+        and/or metrics). The attribute `model.metrics_names` will give you
+        the display labels for the scalar outputs.
 
     Raises:
-      RuntimeError: If target data is provided, but the model has not yet been
-        compiled.
-      ValueError: If the input data, target data, and batch size have invalid
-        shapes or formats (e.g. the model expects input to be a list of three
-        tensors, but x is a list with two tensors). Error is also raised if the
-        input and target data are not both arrays or tensors.
-    """
-    # TODO(sourabhbajaj): Split input validation from weight standardization.
-    if sample_weight is not None and class_weight is not None:
-      logging.warning(
-          'Received both a `sample_weight` and `class_weight` argument. '
-          'The `class_weight` argument will be ignored.')
-    # First, we build/compile the model on the fly if necessary.
-    all_inputs = []
-    is_build_called = False
-    is_compile_called = False
-    # Whether this is a subclassed model that expects dictionary inputs
-    # rather than list inputs (e.g. FeatureColumn-based models).
-    dict_inputs = False
-    if not self.inputs:
-      # We need to use `x` to set the model inputs.
-      # We type-check that `x` and `y` are either single arrays
-      # or lists of arrays.
-      if isinstance(x, (list, tuple)):
-        if not all(isinstance(v, np.ndarray) or
-                   tensor_util.is_tensor(v) for v in x):
-          raise ValueError('Please provide as model inputs either a single '
-                           'array or a list of arrays. You passed: x=' + str(x))
-        all_inputs += list(x)
-      elif isinstance(x, dict):
-        dict_inputs = True
-        keys = sorted(x.keys())
-        all_inputs = [x[k] for k in keys]
-      else:
-        if not isinstance(x, np.ndarray) and not tensor_util.is_tensor(x):
-          raise ValueError('Please provide as model inputs either a single '
-                           'array or a list of arrays. You passed: x=' + str(x))
-        all_inputs.append(x)
-
-      # Build the model using the retrieved inputs (value or symbolic).
-      # If values or generated from a dataset, then in symbolic-mode
-      # placeholders will be created to match the value shapes.
-      if not self.inputs:
-        is_build_called = True
-        if from_iterator:
-          cast_inputs = nest.map_structure(lambda v: v.shape, x)
-        elif training_utils.has_tensors(x):
-          cast_inputs = training_utils.cast_if_floating_dtype(x)
-        else:
-          cast_inputs = x
-        self._set_inputs(cast_inputs)
-    else:
-      dict_inputs = isinstance(self.inputs, dict)
-    if dict_inputs and context.executing_eagerly():
-      # No support for graph functions when the model expects dictionary inputs
-      # (i.e. FeatureColumn-based models).
-      self.run_eagerly = True
-
-    if y is not None:
-      if not self.optimizer:
-        raise RuntimeError('You must compile a model before '
-                           'training/testing. '
-                           'Use `model.compile(optimizer, loss)`.')
-      if not self._is_compiled:
-        # On-the-fly compilation of the model.
-        # We need to use `y` to set the model targets.
-        if training_utils.has_tensors(y):
-          y = training_utils.cast_if_floating_dtype(y)
-        if isinstance(y, (list, tuple)):
-          if not all(isinstance(v, np.ndarray) or
-                     tensor_util.is_tensor(v) for v in y):
-            raise ValueError('Please provide as model targets either a single '
-                             'array or a list of arrays. '
-                             'You passed: y=' + str(y))
-          all_inputs += list(y)
-        elif isinstance(y, dict):
-          raise ValueError('Please do not pass a dictionary as model targets.')
-        else:
-          if not isinstance(y, np.ndarray) and not tensor_util.is_tensor(y):
-            raise ValueError('Please provide as model targets either a single '
-                             'array or a list of arrays. '
-                             'You passed: y=' + str(y))
-          all_inputs.append(y)
-
-        # Typecheck that all inputs are *either* value *or* symbolic.
-        # TODO(fchollet): this check could be removed in Eager mode?
-        if any(tensor_util.is_tensor(v) for v in all_inputs):
-          if not all(tensor_util.is_tensor(v) for v in all_inputs):
-            raise ValueError('Do not pass inputs that mix Numpy arrays and '
-                             'TensorFlow tensors. '
-                             'You passed: x=' + str(x) + '; y=' + str(y))
+        ValueError: in case of invalid arguments.
 
-        if self.run_eagerly or from_iterator:
-          target_tensors = None
-        else:
-          # Handle target tensors if any passed.
-          if not isinstance(y, (list, tuple)):
-            y = [y]
-          target_tensors = [v for v in y if _is_symbolic_tensor(v)]
-        is_compile_called = True
-        self.compile(
-            optimizer=self.optimizer,
-            loss=self.loss,
-            metrics=self._compile_metrics,
-            weighted_metrics=self._compile_weighted_metrics,
-            loss_weights=self.loss_weights,
-            target_tensors=target_tensors,
-            run_eagerly=self.run_eagerly)
+    Raises:
+        ValueError: In case the generator yields data in an invalid format.
+    """
+    if self._distribution_strategy:
+      raise NotImplementedError('`evaluate_generator` is not supported for '
+                                'models compiled with DistributionStrategy.')
+    return training_generator.evaluate_generator(
+        self,
+        generator,
+        steps=steps,
+        max_queue_size=max_queue_size,
+        workers=workers,
+        use_multiprocessing=use_multiprocessing,
+        verbose=verbose)
 
-    # In graph mode, if we had just set inputs and targets as symbolic tensors
-    # by invoking build and compile on the model respectively, we do not have to
-    # feed anything to the model. Model already has input and target data as
-    # part of the graph.
-    # Note: in this case, `any` and `all` are equivalent since we disallow
-    # mixed symbolic/value inputs.
-    if (not self.run_eagerly and is_build_called and is_compile_called and
-        not from_iterator and any(_is_symbolic_tensor(v) for v in all_inputs)):
-      return [], [], []
+  def predict_generator(self,
+                        generator,
+                        steps=None,
+                        max_queue_size=10,
+                        workers=1,
+                        use_multiprocessing=False,
+                        verbose=0):
+    """Generates predictions for the input samples from a data generator.
 
-    # What follows is input validation and standardization to list format,
-    # in the case where all inputs are value arrays.
+    The generator should return the same kind of data as accepted by
+    `predict_on_batch`.
 
-    if self.run_eagerly:
-      # In eager mode, do not do shape validation
-      # since the network has no input nodes (placeholders) to be fed.
-      feed_input_names = self.input_names
-      feed_input_shapes = None
-    elif not self._is_graph_network:
-      # Case: symbolic-mode subclassed network. Do not do shape validation.
-      feed_input_names = self._feed_input_names
-      feed_input_shapes = None
-    else:
-      # Case: symbolic-mode graph network.
-      # In this case, we run extensive shape validation checks.
-      feed_input_names = self._feed_input_names
-      feed_input_shapes = self._feed_input_shapes
+    Arguments:
+        generator: Generator yielding batches of input samples
+            or an instance of `keras.utils.Sequence` object in order to
+            avoid duplicate data when using multiprocessing.
+        steps: Total number of steps (batches of samples)
+            to yield from `generator` before stopping.
+            Optional for `Sequence`: if unspecified, will use
+            the `len(generator)` as a number of steps.
+        max_queue_size: Maximum size for the generator queue.
+        workers: Integer. Maximum number of processes to spin up
+            when using process-based threading.
+            If unspecified, `workers` will default to 1. If 0, will
+            execute the generator on the main thread.
+        use_multiprocessing: Boolean.
+            If `True`, use process-based threading.
+            If unspecified, `use_multiprocessing` will default to `False`.
+            Note that because this implementation relies on multiprocessing,
+            you should not pass non-picklable arguments to the generator
+            as they can't be passed easily to children processes.
+        verbose: verbosity mode, 0 or 1.
 
-    # Standardize the inputs.
-    x = training_utils.standardize_input_data(
-        x,
-        feed_input_names,
-        feed_input_shapes,
-        check_batch_axis=False,  # Don't enforce the batch size.
-        exception_prefix='input')
+    Returns:
+        Numpy array(s) of predictions.
 
-    if y is not None:
-      if not self._is_graph_network:
-        feed_output_names = self._feed_output_names
-        feed_output_shapes = None
-        # Sample weighting not supported in this case.
-        # TODO(fchollet): consider supporting it.
-        feed_sample_weight_modes = [None for _ in self.outputs]
-      else:
-        feed_output_names = self._feed_output_names
-        feed_sample_weight_modes = self._feed_sample_weight_modes
-        feed_output_shapes = []
-        for output_shape, loss_fn in zip(self._feed_output_shapes,
-                                         self._feed_loss_fns):
-          if loss_fn is losses.sparse_categorical_crossentropy:
-            if K.image_data_format() == 'channels_first':
-              feed_output_shapes.append(
-                  (output_shape[0], 1) + output_shape[2:])
-            else:
-              feed_output_shapes.append(output_shape[:-1] + (1,))
-          elif (not hasattr(loss_fn, '__name__') or
-                getattr(losses, loss_fn.__name__, None) is None):
-            # If `loss_fn` is not a function (e.g. callable class)
-            # or if it not in the `losses` module, then
-            # it is a user-defined loss and we make no assumptions
-            # about it.
-            feed_output_shapes.append(None)
-          else:
-            feed_output_shapes.append(output_shape)
+    Raises:
+        ValueError: In case the generator yields data in an invalid format.
+    """
+    if self._distribution_strategy:
+      raise NotImplementedError('`predict_generator` is not supported for '
+                                'models compiled with DistributionStrategy.')
+    return training_generator.predict_generator(
+        self,
+        generator,
+        steps=steps,
+        max_queue_size=max_queue_size,
+        workers=workers,
+        use_multiprocessing=use_multiprocessing,
+        verbose=verbose)
 
-      # Standardize the outputs.
-      y = training_utils.standardize_input_data(
-          y,
-          feed_output_names,
-          # Don't enforce target shapes to match output shapes.
-          # Precise checks will be run in `check_loss_and_target_compatibility`.
-          shapes=None,
-          check_batch_axis=False,  # Don't enforce the batch size.
-          exception_prefix='target')
+  def _get_callback_model(self):
+    """Returns the Callback Model for this Model."""
 
-      # Generate sample-wise weight values given the `sample_weight` and
-      # `class_weight` arguments.
-      sample_weights = training_utils.standardize_sample_weights(
-          sample_weight, feed_output_names)
-      class_weights = training_utils.standardize_class_weights(
-          class_weight, feed_output_names)
-      sample_weights = [
-          training_utils.standardize_weights(ref, sw, cw, mode)
-          for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights,
-                                         feed_sample_weight_modes)
-      ]
-      # Check that all arrays have the same length.
-      if not self._distribution_strategy:
-        training_utils.check_array_lengths(x, y, sample_weights)
-        if self._is_graph_network and not self.run_eagerly:
-          # Additional checks to avoid users mistakenly using improper loss fns.
-          training_utils.check_loss_and_target_compatibility(
-              y, self._feed_loss_fns, feed_output_shapes)
-    else:
-      y = []
-      sample_weights = []
+    if hasattr(self, '_replicated_model') and self._replicated_model:
+      # When using training_distributed, we set the callback model
+      # to an instance of the `DistributedModel` that we create in
+      # the `compile` call. The `DistributedModel` is initialized
+      # with the first replicated model. We need to set the callback
+      # model to a DistributedModel to allow us to override saving
+      # and loading weights when we checkpoint the model during training.
+      return self._replicated_model
+    if hasattr(self, 'callback_model') and self.callback_model:
+      return self.callback_model
+    return self
 
-    if self.stateful and batch_size:
-      # Check that for stateful networks, number of samples is a multiple
-      # of the static batch size.
-      if x[0].shape[0] % batch_size != 0:
-        raise ValueError('In a stateful network, '
-                         'you should only pass inputs with '
-                         'a number of samples that can be '
-                         'divided by the batch size. Found: ' +
-                         str(x[0].shape[0]) + ' samples')
+  def _make_callback_model(self, grouped_model):
+    first_replicated_model = self._distribution_strategy.unwrap(
+        grouped_model)[0]
+    # We initialize the callback model with the first replicated model.
+    self._replicated_model = DistributedCallbackModel(first_replicated_model)
+    self._replicated_model.set_original_model(self)
 
-    # If dictionary inputs were provided, we return a dictionary as well.
-    if dict_inputs:
-      x = dict(zip(feed_input_names, x))
-    return x, y, sample_weights
+  def _validate_or_infer_batch_size(self, batch_size, steps, x):
+    """Validates that the `batch_size` provided is consistent with InputLayer.
 
-  @checkpointable.no_automatic_dependency_tracking
-  def _set_inputs(self, inputs, outputs=None, training=None):
-    """Set model's input and output specs based on the input data received.
+    It's possible that the user specified a static batch size in their
+    InputLayer. If so, this method checks the provided `batch_size` and `x`
+    arguments are consistent with this static batch size. Also, if
+    `batch_size` is `None`, this method will attempt to infer the batch size
+    from the static batch size of the InputLayer.
 
-    This is to be used for Model subclasses, which do not know at instantiation
-    time what their inputs look like.
+    Arguments:
+      batch_size: The batch_size provided as an argument to
+        fit/evaluate/predict.
+      steps: The steps provided as an argument to fit/evaluate/predict.
+      x: The data passed as `x` to fit/evaluate/predict.
 
-    Args:
-      inputs: Single array, or list of arrays. The arrays could be placeholders,
-        Numpy arrays, data tensors, or TensorShapes.
-        - if placeholders: the model is built on top of these placeholders,
-          and we expect Numpy data to be fed for them when calling `fit`/etc.
-        - if Numpy data or TensorShapes: we create placeholders matching the
-          TensorShapes or shapes of the Numpy arrays. We expect Numpy data to be
-          fed for these placeholders when calling `fit`/etc.
-        - if data tensors: the model is built on top of these tensors.
-          We do not expect any Numpy data to be provided when calling `fit`/etc.
-      outputs: None, a data tensor, or a list of tensors. If None, the
-        outputs will be determined by invoking `self.call()`, otherwise the
-        provided value will be used.
-      training: Boolean or None. Only relevant in symbolic mode. Specifies
-        whether to build the model's graph in inference mode (False), training
-        mode (True), or using the Keras learning phase (None).
-    Raises:
-      ValueError: If dict inputs are passed to a Sequential Model where the
-        first layer isn't FeatureLayer.
+    Returns:
+      The validated batch_size, auto-inferred from the first layer if not
+      provided.
     """
-    if self.inputs:
-      raise ValueError('Model inputs are already set.')
+    layers = super(Model, self).layers  # Avoids the override in Sequential.
+    if layers:
+      first_layer = layers[0]
+      static_batch_size = training_utils.get_static_batch_size(first_layer)
+      if static_batch_size is not None:
 
-    if self.__class__.__name__ == 'Sequential' and not self.built:
-      if tensor_util.is_tensor(inputs):
-        input_shape = (None,) + tuple(inputs.shape.as_list()[1:])
-      elif isinstance(inputs, tensor_shape.TensorShape):
-        input_shape = (None,) + tuple(inputs.as_list()[1:])
-      elif isinstance(inputs, dict):
-        # We assert that the first layer is a FeatureLayer.
-        if not training_utils.is_feature_layer(self.layers[0]):
-          raise ValueError('Passing a dictionary input to a Sequential Model '
-                           'which doesn\'t have FeatureLayer as the first layer'
-                           ' is an error.')
-        input_shape = (None,)
-      else:
-        input_shape = (None,) + tuple(inputs.shape[1:])
-      self._build_input_shape = input_shape
+        # Check `batch_size` argument is consistent with InputLayer.
+        if batch_size is not None and batch_size != static_batch_size:
+          raise ValueError('The `batch_size` argument value {} is incompatible '
+                           'with the specified batch size of your Input Layer: '
+                           '{}'.format(batch_size, static_batch_size))
 
-    # On-the-fly setting of symbolic model inputs (either by using the tensor
-    # provided, or by creating a placeholder if Numpy data was provided).
-    model_inputs = training_utils.ModelInputs(inputs)
-    inputs = model_inputs.get_symbolic_inputs()
-    self.inputs = model_inputs.get_symbolic_inputs(return_single_as_list=True)
-    self.input_names = model_inputs.get_input_names()
+        # Check Dataset/Iterator batch size is consistent with InputLayer.
+        if isinstance(x, (dataset_ops.DatasetV2, iterator_ops.Iterator,
+                          iterator_ops.EagerIterator)):
+          ds_batch_size = tensor_shape.as_dimension(
+              nest.flatten(x.output_shapes)[0][0]).value
+          if ds_batch_size is not None and ds_batch_size != static_batch_size:
+            raise ValueError('The batch output shape of your `Dataset` is {}, '
+                             'which is incompatible with the specified batch '
+                             'size of your Input Layer: {}'.format(
+                                 ds_batch_size, static_batch_size))
 
-    self._feed_inputs = []
-    self._feed_input_names = []
-    self._feed_input_shapes = []
+        # Set inferred batch size from the InputLayer.
+        if steps is None:
+          batch_size = static_batch_size
 
-    for k, v in model_inputs.as_dict():
-      if K.is_placeholder(v):
-        self._feed_inputs.append(v)
-        self._feed_input_names.append(k)
-        self._feed_input_shapes.append(K.int_shape(v))
+    if batch_size is None and steps is None:
+      # Backwards compatibility
+      batch_size = 32
+    return batch_size
+
+  @property
+  def _default_save_signature(self):
+    return training_utils.trace_model_call(self)
 
-    # TODO(fchollet): consider calling `_maybe_build` before calling the model.
+  def _set_sample_weight_attributes(self, sample_weight_mode,
+                                    skip_target_weighing_indices):
+    """Sets sample weight related attributes on the model."""
+    sample_weights, sample_weight_modes = training_utils.prepare_sample_weights(
+        self.output_names, sample_weight_mode, skip_target_weighing_indices)
+    self.sample_weights = sample_weights
+    self.sample_weight_modes = sample_weight_modes
+    self._feed_sample_weight_modes = [
+        sample_weight_modes[i]
+        for i in range(len(self.outputs))
+        if i not in skip_target_weighing_indices
+    ]
+    self._feed_sample_weights = [
+        sample_weights[i]
+        for i in range(len(sample_weights))
+        if i not in skip_target_weighing_indices
+    ]
 
-    if outputs is None:
-      # Obtain symbolic outputs by calling the model.
-      with K.get_graph().as_default():
-        if self._expects_training_arg:
-          outputs = self.call(inputs, training=training)
-        else:
-          outputs = self.call(inputs)
+  def _cache_output_metric_attributes(self, metrics, weighted_metrics):
+    """Caches metric name and function attributes for every model output."""
+    output_shapes = [
+        None if output is None else output.get_shape().as_list()
+        for output in self.outputs
+    ]
+    self._per_output_metrics = training_utils.collect_per_output_metric_info(
+        metrics, self.output_names, output_shapes, self.loss_functions)
+    self._per_output_weighted_metrics = \
+        training_utils.collect_per_output_metric_info(
+            weighted_metrics, self.output_names, output_shapes,
+            self.loss_functions, self.sample_weights)
 
-    outputs = nest.flatten(outputs)
-    self.outputs = outputs
-    self.output_names = training_utils.generic_output_names(outputs)
-    self.built = True
+  def _add_unique_metric_name(self, metric_name, output_index):
+    """Makes the metric name unique and adds it to the model's metric name list.
 
-  def fit(self,
-          x=None,
-          y=None,
-          batch_size=None,
-          epochs=1,
-          verbose=1,
-          callbacks=None,
-          validation_split=0.,
-          validation_data=None,
-          shuffle=True,
-          class_weight=None,
-          sample_weight=None,
-          initial_epoch=0,
-          steps_per_epoch=None,
-          validation_steps=None,
-          max_queue_size=10,
-          workers=1,
-          use_multiprocessing=False,
-          **kwargs):
-    """Trains the model for a fixed number of epochs (iterations on a dataset).
+      If there are multiple outputs for which the metrics are calculated, the
+      metric names have to be made unique by appending an integer.
 
     Arguments:
-        x: Input data. It could be:
-          - A Numpy array (or array-like), or a list of arrays
-            (in case the model has multiple inputs).
-          - A TensorFlow tensor, or a list of tensors
-            (in case the model has multiple inputs).
-          - A dict mapping input names to the corresponding array/tensors,
-            if the model has named inputs.
-          - A `tf.data` dataset or a dataset iterator. Should return a tuple
-            of either `(inputs, targets)` or
-            `(inputs, targets, sample_weights)`.
-          - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
-            or `(inputs, targets, sample weights)`.
-        y: Target data. Like the input data `x`,
-          it could be either Numpy array(s) or TensorFlow tensor(s).
-          It should be consistent with `x` (you cannot have Numpy inputs and
-          tensor targets, or inversely). If `x` is a dataset, dataset
-          iterator, generator, or `keras.utils.Sequence` instance, `y` should
-          not be specified (since targets will be obtained from `x`).
-        batch_size: Integer or `None`.
-            Number of samples per gradient update.
-            If unspecified, `batch_size` will default to 32.
-            Do not specify the `batch_size` if your data is in the
-            form of symbolic tensors, dataset, dataset iterators,
-            generators, or `keras.utils.Sequence` instances (since they generate
-            batches).
-        epochs: Integer. Number of epochs to train the model.
-            An epoch is an iteration over the entire `x` and `y`
-            data provided.
-            Note that in conjunction with `initial_epoch`,
-            `epochs` is to be understood as "final epoch".
-            The model is not trained for a number of iterations
-            given by `epochs`, but merely until the epoch
-            of index `epochs` is reached.
-        verbose: Integer. 0, 1, or 2. Verbosity mode.
-            0 = silent, 1 = progress bar, 2 = one line per epoch.
-        callbacks: List of `keras.callbacks.Callback` instances.
-            List of callbacks to apply during training.
-            See [callbacks](/api_docs/python/tf/keras/callbacks).
-        validation_split: Float between 0 and 1.
-            Fraction of the training data to be used as validation data.
-            The model will set apart this fraction of the training data,
-            will not train on it, and will evaluate
-            the loss and any model metrics
-            on this data at the end of each epoch.
-            The validation data is selected from the last samples
-            in the `x` and `y` data provided, before shuffling. This argument is
-            not supported when `x` is a dataset, dataset iterator, generator or
-           `keras.utils.Sequence` instance.
-        validation_data: Data on which to evaluate
-            the loss and any model metrics at the end of each epoch.
-            The model will not be trained on this data.
-            `validation_data` will override `validation_split`.
-            `validation_data` could be:
-              - tuple `(x_val, y_val)` of Numpy arrays or tensors
-              - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
-              - dataset or a dataset iterator
-            For the first two cases, `batch_size` must be provided.
-            For the last case, `validation_steps` must be provided.
-        shuffle: Boolean (whether to shuffle the training data
-            before each epoch) or str (for 'batch').
-            'batch' is a special option for dealing with the
-            limitations of HDF5 data; it shuffles in batch-sized chunks.
-            Has no effect when `steps_per_epoch` is not `None`.
-        class_weight: Optional dictionary mapping class indices (integers)
-            to a weight (float) value, used for weighting the loss function
-            (during training only).
-            This can be useful to tell the model to
-            "pay more attention" to samples from
-            an under-represented class.
-        sample_weight: Optional Numpy array of weights for
-            the training samples, used for weighting the loss function
-            (during training only). You can either pass a flat (1D)
-            Numpy array with the same length as the input samples
-            (1:1 mapping between weights and samples),
-            or in the case of temporal data,
-            you can pass a 2D array with shape
-            `(samples, sequence_length)`,
-            to apply a different weight to every timestep of every sample.
-            In this case you should make sure to specify
-            `sample_weight_mode="temporal"` in `compile()`. This argument is not
-            supported when `x` is a dataset, dataset iterator, generator, or
-           `keras.utils.Sequence` instance, instead provide the sample_weights
-            as the third element of `x`.
-        initial_epoch: Integer.
-            Epoch at which to start training
-            (useful for resuming a previous training run).
-        steps_per_epoch: Integer or `None`.
-            Total number of steps (batches of samples)
-            before declaring one epoch finished and starting the
-            next epoch. When training with input tensors such as
-            TensorFlow data tensors, the default `None` is equal to
-            the number of samples in your dataset divided by
-            the batch size, or 1 if that cannot be determined.
-        validation_steps: Only relevant if `validation_data` is provided and
-            is a dataset or dataset iterator. Total number of steps (batches of
-            samples) to draw before stopping when performing validation
-            at the end of every epoch.
-        max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
-            input only. Maximum size for the generator queue.
-            If unspecified, `max_queue_size` will default to 10.
-        workers: Integer. Used for generator or `keras.utils.Sequence` input
-            only. Maximum number of processes to spin up
-            when using process-based threading. If unspecified, `workers`
-            will default to 1. If 0, will execute the generator on the main
-            thread.
-        use_multiprocessing: Boolean. Used for generator or
-            `keras.utils.Sequence` input only. If `True`, use process-based
-            threading. If unspecified, `use_multiprocessing` will default to
-            `False`. Note that because this implementation relies on
-            multiprocessing, you should not pass non-picklable arguments to
-            the generator as they can't be passed easily to children processes.
-        **kwargs: Used for backwards compatibility.
+      metric_name: Metric name that corresponds to the metric specified by the
+          user. For example: 'acc'.
+      output_index: The index of the model output for which the metric name is
+        being added.
 
     Returns:
-        A `History` object. Its `History.history` attribute is
-        a record of training loss values and metrics values
-        at successive epochs, as well as validation loss values
-        and validation metrics values (if applicable).
-
-    Raises:
-        RuntimeError: If the model was never compiled.
-        ValueError: In case of mismatch between the provided input data
-            and what the model expects.
+      string, name of the model's unique metric name
     """
-    # TODO(fchollet): this method may be creating reference cycles, which would
-    # lead to accumulating garbage in memory when called in a loop. Investigate.
-    if data_utils.is_generator_or_sequence(x):
-      training_utils.check_generator_arguments(y, sample_weight)
-      return self.fit_generator(
-          x,
-          steps_per_epoch=steps_per_epoch,
-          epochs=epochs,
-          verbose=verbose,
-          callbacks=callbacks,
-          validation_data=validation_data,
-          validation_steps=validation_steps,
-          class_weight=class_weight,
-          max_queue_size=max_queue_size,
-          workers=workers,
-          use_multiprocessing=use_multiprocessing,
-          shuffle=shuffle,
-          initial_epoch=initial_epoch)
+    if len(self.output_names) > 1:
+      metric_name = '%s_%s' % (self.output_names[output_index], metric_name)
+    j = 1
+    base_metric_name = metric_name
+    while metric_name in self._compile_metrics_names:
+      metric_name = '%s_%d' % (base_metric_name, j)
+      j += 1
 
-    # Legacy support
-    if 'nb_epoch' in kwargs:
-      logging.warning(
-          'The `nb_epoch` argument in `fit` '
-          'has been renamed `epochs`.')
-      epochs = kwargs.pop('nb_epoch')
-    if kwargs:
-      raise TypeError('Unrecognized keyword arguments: ' + str(kwargs))
+    return metric_name
 
-    # Validate and standardize user data.
-    if self._distribution_strategy:
-      distributed_training_utils.validate_callbacks(callbacks, self.optimizer,
-                                                    self._distribution_strategy)
+  @property
+  def _all_metrics_tensors(self):
+    """Returns the network's symbolic metric tensors."""
+    metrics_tensors = {}
+    if self._is_compiled:
+      metrics_tensors.update(self._compile_metrics_tensors)
+    metrics_tensors.update(super(Model, self)._all_metrics_tensors)
+    return metrics_tensors
 
-      distributed_training_utils.validate_inputs(
-          x, y, self._distribution_strategy)
+  @property
+  def _all_stateful_metrics_tensors(self):
+    """Returns the network's symbolic metric tensors."""
+    metrics_tensors = {}
+    if self._is_compiled:
+      metrics_tensors.update(self._compile_stateful_metrics_tensors)
+    metrics_tensors.update(super(Model, self)._all_metrics_tensors)
+    return metrics_tensors
 
-      first_x_value = nest.flatten(x)[0]
-      if isinstance(first_x_value, np.ndarray):
-        steps_per_epoch, batch_size = (
-            distributed_training_utils.get_input_params(
-                self._distribution_strategy, first_x_value, steps_per_epoch,
-                batch_size, is_training=True))
+  def _init_metric_attributes(self):
+    """Initialized model metric attributes."""
+    # List of all metric names in the model.
+    self._compile_metrics_names = ['loss']
+    # List of stateful metric functions. Used for resetting metric state during
+    # training/eval.
+    # This includes loss functions when there are multiple outputs.
+    self._compile_stateful_metric_functions = []
+    # Dict of all aggregated metric result tensors. This includes aggregated
+    # loss result tensors when there are multiple outputs.
+    self._compile_stateful_metrics_tensors = {}
+    # Dict of all metric result tensors (aggregated or not - based on the
+    # values given in compile.). This includes aggregated loss result tensors
+    # when there are multiple outputs.
+    self._compile_metrics_tensors = {}
 
-    batch_size = self._validate_or_infer_batch_size(batch_size, steps_per_epoch,
-                                                    x)
+  def _set_per_output_metric_attributes(self, metrics_dict, output_index):
+    """Sets the metric attributes on the model for the given output.
 
-    x, y, sample_weights = self._standardize_user_data(
-        x,
-        y,
-        sample_weight=sample_weight,
-        class_weight=class_weight,
-        batch_size=batch_size,
-        check_steps=True,
-        steps_name='steps_per_epoch',
-        steps=steps_per_epoch,
-        validation_split=validation_split,
-        shuffle=shuffle)
+    Arguments:
+      metrics_dict: A dict with metric names as keys and metric fns as values.
+      output_index: The index of the model output for which the metric
+        attributes are added.
 
-    # Prepare validation data.
-    if validation_data:
-      if (isinstance(validation_data, iterator_ops.Iterator) or
-          isinstance(validation_data, iterator_ops.EagerIterator) or
-          isinstance(validation_data, dataset_ops.DatasetV2)):
-        val_x = validation_data
-        val_y = None
-        val_sample_weight = None
-      elif len(validation_data) == 2:
-        val_x, val_y = validation_data  # pylint: disable=unpacking-non-sequence
-        val_sample_weight = None
-      elif len(validation_data) == 3:
-        val_x, val_y, val_sample_weight = validation_data  # pylint: disable=unpacking-non-sequence
-      else:
-        raise ValueError(
-            'When passing a `validation_data` argument, '
-            'it must contain either 2 items (x_val, y_val), '
-            'or 3 items (x_val, y_val, val_sample_weights), '
-            'or alternatively it could be a dataset or a '
-            'dataset or a dataset iterator. '
-            'However we received `validation_data=%s`' % validation_data)
+    Returns:
+      Metrics dict updated with unique metric names as keys.
+    """
+    updated_metrics_dict = collections.OrderedDict()
+    for metric_name, (metric_fn, stateful_metric_fn) in metrics_dict.items():
+      metric_name = self._add_unique_metric_name(metric_name, output_index)
+      updated_metrics_dict[metric_name] = (metric_fn, stateful_metric_fn)
+      # Keep track of metric name, function and stateful function.
+      self._compile_metrics_names.append(metric_name)
+      self._compile_stateful_metric_functions.append(stateful_metric_fn)
+    return updated_metrics_dict
 
-      # Validate and standardize validation data.
-      if self._distribution_strategy:
-        distributed_training_utils.validate_inputs(
-            val_x, val_y, self._distribution_strategy)
-        first_valx_value = nest.flatten(val_x)[0]
-        if isinstance(first_valx_value, np.ndarray):
-          validation_steps, _ = distributed_training_utils.get_input_params(
-              self._distribution_strategy, first_valx_value, validation_steps,
-              batch_size)
+  def _set_metric_attributes(self, outputs, skip_target_indices=None):
+    """Sets the metric attributes on the model for all the model outputs."""
+    skip_target_indices = skip_target_indices or []
+    updated_per_output_metrics = []
+    updated_per_output_weighted_metrics = []
+    for i in range(len(outputs)):
+      if i in skip_target_indices:
+        updated_per_output_metrics.append(self._per_output_metrics[i])
+        updated_per_output_weighted_metrics.append(
+            self._per_output_weighted_metrics[i])
+        continue
+      updated_per_output_metrics.append(
+          self._set_per_output_metric_attributes(self._per_output_metrics[i],
+                                                 i))
+      updated_per_output_weighted_metrics.append(
+          self._set_per_output_metric_attributes(
+              self._per_output_weighted_metrics[i], i))
 
-      val_x, val_y, val_sample_weights = self._standardize_user_data(
-          val_x,
-          val_y,
-          sample_weight=val_sample_weight,
-          batch_size=batch_size,
-          steps=validation_steps)
+    self._per_output_metrics = updated_per_output_metrics
+    self._per_output_weighted_metrics = updated_per_output_weighted_metrics
 
-    elif validation_split and 0. < validation_split < 1.:
-      if training_utils.has_symbolic_tensors(x):
-        raise ValueError('If your data is in the form of symbolic tensors, '
-                         'you cannot use `validation_split`.')
-      if hasattr(x[0], 'shape'):
-        split_at = int(x[0].shape[0] * (1. - validation_split))
-      else:
-        split_at = int(len(x[0]) * (1. - validation_split))
-      x, val_x = (slice_arrays(x, 0, split_at), slice_arrays(x, split_at))
-      y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at))
-      sample_weights, val_sample_weights = (slice_arrays(
-          sample_weights, 0, split_at), slice_arrays(sample_weights, split_at))
-    elif validation_steps:
-      val_x = []
-      val_y = []
-      val_sample_weights = []
-    else:
-      val_x = None
-      val_y = None
-      val_sample_weights = None
+  def _handle_per_output_metrics(self,
+                                 metrics_dict,
+                                 y_true,
+                                 y_pred,
+                                 mask,
+                                 weights=None,
+                                 return_stateful_result=True):
+    """Calls metric functions for a single output.
 
-    if (self.run_eagerly or (isinstance(x, iterator_ops.EagerIterator) and
-                             not self._distribution_strategy)):
-      return training_generator.fit_generator(
-          self, (x, y, sample_weights),
-          steps_per_epoch=steps_per_epoch,
-          batch_size=batch_size,
-          epochs=epochs,
-          shuffle=shuffle,
-          verbose=verbose,
-          callbacks=callbacks,
-          validation_data=validation_data,
-          validation_steps=validation_steps,
-          workers=0,
-          initial_epoch=initial_epoch)
-    elif distributed_training_utils.is_tpu_strategy(
-        self._distribution_strategy):
-      return training_distributed.experimental_fit_loop(
-          self,
-          x,
-          epochs=epochs,
-          verbose=verbose,
-          callbacks=callbacks,
-          val_iterator=val_x,
-          initial_epoch=initial_epoch,
-          steps_per_epoch=steps_per_epoch,
-          validation_steps=validation_steps)
-    else:
-      return training_arrays.fit_loop(
-          self,
-          x,
-          y,
-          sample_weights=sample_weights,
-          batch_size=batch_size,
-          epochs=epochs,
-          verbose=verbose,
-          callbacks=callbacks,
-          val_inputs=val_x,
-          val_targets=val_y,
-          val_sample_weights=val_sample_weights,
-          shuffle=shuffle,
-          initial_epoch=initial_epoch,
-          steps_per_epoch=steps_per_epoch,
-          validation_steps=validation_steps)
+    Arguments:
+      metrics_dict: A dict with metric names as keys and metric fns as values.
+      y_true: Target output.
+      y_pred: Predicted output.
+      mask: Computed mask value for the current output.
+      weights: Weights to be applied on the current output.
+      return_stateful_result: Boolean, indicates whether the stateful
+        (aggregated)/stateless metric result should be returned.
 
-  def evaluate(self,
-               x=None,
-               y=None,
-               batch_size=None,
-               verbose=1,
-               sample_weight=None,
-               steps=None,
-               max_queue_size=10,
-               workers=1,
-               use_multiprocessing=False):
-    """Returns the loss value & metrics values for the model in test mode.
+    Returns:
+      A list of metric result tensors.
+    """
+    metric_results = []
+    for metric_name, (metric_fn, stateful_fn) in metrics_dict.items():
+      with K.name_scope(metric_name):
 
-    Computation is done in batches.
+        def _call_stateful_fn(fn):
+          return training_utils.call_metric_function(
+              fn, y_true, y_pred, weights=weights, mask=mask)
 
-    Arguments:
-        x: Input data. It could be:
-          - A Numpy array (or array-like), or a list of arrays
-            (in case the model has multiple inputs).
-          - A TensorFlow tensor, or a list of tensors
-            (in case the model has multiple inputs).
-          - A dict mapping input names to the corresponding array/tensors,
-            if the model has named inputs.
-          - A `tf.data` dataset or a dataset iterator.
-          - A generator or `keras.utils.Sequence` instance.
-        y: Target data. Like the input data `x`,
-          it could be either Numpy array(s) or TensorFlow tensor(s).
-          It should be consistent with `x` (you cannot have Numpy inputs and
-          tensor targets, or inversely).
-          If `x` is a dataset, dataset iterator, generator or
-          `keras.utils.Sequence` instance, `y` should not be specified (since
-          targets will be obtained from the iterator/dataset).
-        batch_size: Integer or `None`.
-            Number of samples per gradient update.
-            If unspecified, `batch_size` will default to 32.
-            Do not specify the `batch_size` is your data is in the
-            form of symbolic tensors, dataset, dataset iterators,
-            generators, or `keras.utils.Sequence` instances (since they generate
-            batches).
-        verbose: 0 or 1. Verbosity mode.
-            0 = silent, 1 = progress bar.
-        sample_weight: Optional Numpy array of weights for
-            the test samples, used for weighting the loss function.
-            You can either pass a flat (1D)
-            Numpy array with the same length as the input samples
-            (1:1 mapping between weights and samples),
-            or in the case of temporal data,
-            you can pass a 2D array with shape
-            `(samples, sequence_length)`,
-            to apply a different weight to every timestep of every sample.
-            In this case you should make sure to specify
-            `sample_weight_mode="temporal"` in `compile()`. This argument is not
-            supported when `x` is a dataset or a dataset iterator, instead pass
-            sample weights as the third element of `x`.
-        steps: Integer or `None`.
-            Total number of steps (batches of samples)
-            before declaring the evaluation round finished.
-            Ignored with the default value of `None`.
-        max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
-            input only. Maximum size for the generator queue.
-            If unspecified, `max_queue_size` will default to 10.
-        workers: Integer. Used for generator or `keras.utils.Sequence` input
-            only. Maximum number of processes to spin up when using
-            process-based threading. If unspecified, `workers` will default
-            to 1. If 0, will execute the generator on the main thread.
-        use_multiprocessing: Boolean. Used for generator or
-            `keras.utils.Sequence` input only. If `True`, use process-based
-            threading. If unspecified, `use_multiprocessing` will default to
-            `False`. Note that because this implementation relies on
-            multiprocessing, you should not pass non-picklable arguments to
-            the generator as they can't be passed easily to children processes.
+        def _call_stateless_fn(fn):
+          weighted_metric_fn = training_utils.weighted_masked_objective(fn)
+          return weighted_metric_fn(y_true, y_pred, weights=weights, mask=mask)
+
+        def _track_metric_tensors(name, stateless_result, stateful_result):
+          self._compile_metrics_tensors[name] = stateless_result
+          self._compile_stateful_metrics_tensors[name] = stateful_result
+
+        if isinstance(metric_fn, metrics_module.Metric):
+          # If the given metric fn is stateful, call the fn and return result.
+          metric_result = _call_stateful_fn(metric_fn)
+          metric_results.append(metric_result)
+          if not self.run_eagerly:
+            _track_metric_tensors(metric_name, metric_result, metric_result)
+        elif self.run_eagerly:
+          # In eager mode, if the given metric fn is not stateful, we invoke the
+          # given fn or its stateful version based on the given flag.
+          if return_stateful_result:
+            metric_result = _call_stateful_fn(stateful_fn)
+          else:
+            metric_result = _call_stateless_fn(metric_fn)
+          metric_results.append(metric_result)
+        else:
+          # In graph mode, we build the sub-graph for both the stateful and the
+          # stateless fns.
+          stateful_metric_result = _call_stateful_fn(stateful_fn)
+          metric_result = _call_stateless_fn(metric_fn)
+          _track_metric_tensors(metric_name, metric_result,
+                                stateful_metric_result)
+
+    return metric_results
+
+  def _handle_metrics(self,
+                      outputs,
+                      skip_target_indices=None,
+                      targets=None,
+                      sample_weights=None,
+                      masks=None,
+                      return_stateful_result=True):
+    """Handles calling metric functions.
+
+    Arguments:
+      outputs: List of outputs (predictions).
+      skip_target_indices: Optional. List of target ids to skip.
+      targets: List of targets.
+      sample_weights: Optional list of sample weight arrays.
+      masks: List of computed output mask values.
+      return_stateful_result: Boolean, indicates whether the stateful
+        (aggregated)/stateless metric result should be returned.
 
     Returns:
-        Scalar test loss (if the model has a single output and no metrics)
-        or list of scalars (if the model has multiple outputs
-        and/or metrics). The attribute `model.metrics_names` will give you
-        the display labels for the scalar outputs.
+      A list of metric result tensors.
+    """
+    skip_target_indices = skip_target_indices or []
+    metric_results = []
+    with K.name_scope('metrics'):
+      # Invoke all metrics added using `compile`.
+      for i in range(len(outputs)):
+        if i in skip_target_indices:
+          continue
+        output = outputs[i] if outputs else None
+        target = targets[i] if targets else None
+        output_mask = masks[i] if masks else None
+        metric_results.extend(
+            self._handle_per_output_metrics(
+                self._per_output_metrics[i],
+                target,
+                output,
+                output_mask,
+                return_stateful_result=return_stateful_result))
+        metric_results.extend(
+            self._handle_per_output_metrics(
+                self._per_output_weighted_metrics[i],
+                target,
+                output,
+                output_mask,
+                weights=sample_weights[i],
+                return_stateful_result=return_stateful_result))
 
-    Raises:
-        ValueError: in case of invalid arguments.
+    # Add metric results from the `add_metric` metrics in eager mode.
+    if context.executing_eagerly():
+      for m in self.metrics:
+        if m not in self._compile_stateful_metric_functions:
+          metric_results.append(m.result())
+    return metric_results
+
+  def _check_trainable_weights_consistency(self):
+    """Check trainable weights count consistency.
+
+    This will raise a warning if `trainable_weights` and
+    `_collected_trainable_weights` are inconsistent (i.e. have different
+    number of parameters).
+    Inconsistency will typically arise when one modifies `model.trainable`
+    without calling `model.compile` again.
     """
-    if data_utils.is_generator_or_sequence(x):
-      training_utils.check_generator_arguments(y, sample_weight)
-      return self.evaluate_generator(
-          x,
-          steps=steps,
-          verbose=verbose,
-          max_queue_size=max_queue_size,
-          workers=workers,
-          use_multiprocessing=use_multiprocessing)
-    # Validate and standardize user data.
-    if self._distribution_strategy:
-      distributed_training_utils.validate_inputs(
-          x, y, self._distribution_strategy)
-      first_x_value = nest.flatten(x)[0]
-      if isinstance(first_x_value, np.ndarray):
-        steps, batch_size = distributed_training_utils.get_input_params(
-            self._distribution_strategy, first_x_value, steps, batch_size)
+    if not hasattr(self, '_collected_trainable_weights'):
+      return
 
-    batch_size = self._validate_or_infer_batch_size(batch_size, steps, x)
+    if len(self.trainable_weights) != len(self._collected_trainable_weights):
+      logging.log_first_n(
+          logging.WARN, 'Discrepancy between trainable weights and collected'
+          ' trainable weights, did you set `model.trainable`'
+          ' without calling `model.compile` after ?', 1)
 
-    x, y, sample_weights = self._standardize_user_data(
-        x,
-        y,
-        sample_weight=sample_weight,
-        batch_size=batch_size,
-        check_steps=True,
-        steps_name='steps',
-        steps=steps)
+  def _make_train_function_helper(self, fn_name, outputs, metric_updates=None):
+    if not hasattr(self, fn_name):
+      raise RuntimeError('You must compile your model before using it.')
+    self._check_trainable_weights_consistency()
+    if getattr(self, fn_name) is None:
+      inputs = (self._feed_inputs +
+                self._feed_targets +
+                self._feed_sample_weights)
+      if not isinstance(K.symbolic_learning_phase(), int):
+        inputs += [K.symbolic_learning_phase()]
 
-    if (self.run_eagerly or (isinstance(x, iterator_ops.EagerIterator) and
-                             not self._distribution_strategy)):
-      return training_generator.evaluate_generator(
-          self, (x, y, sample_weights),
-          steps=steps,
-          batch_size=batch_size,
-          verbose=verbose,
-          workers=0)
-    elif distributed_training_utils.is_tpu_strategy(
-        self._distribution_strategy):
-      return training_distributed.experimental_test_loop(
-          self, iterator=x, verbose=verbose, steps=steps)
-    else:
-      return training_arrays.test_loop(
-          self,
-          inputs=x,
-          targets=y,
-          sample_weights=sample_weights,
-          batch_size=batch_size,
-          verbose=verbose,
-          steps=steps)
+      with K.get_graph().as_default():
+        with K.name_scope('training'):
+          with K.name_scope(self.optimizer.__class__.__name__):
+            # Training updates
+            updates = self.optimizer.get_updates(
+                params=self._collected_trainable_weights, loss=self.total_loss)
+      # Unconditional updates
+      updates += self.get_updates_for(None)
+      # Conditional updates relevant to this model
+      updates += self.get_updates_for(self.inputs)
+      # Add stateful metrics updates.
+      if metric_updates is not None:
+        updates += metric_updates
 
-  def predict(self,
-              x,
-              batch_size=None,
-              verbose=0,
-              steps=None,
-              max_queue_size=10,
-              workers=1,
-              use_multiprocessing=False):
-    """Generates output predictions for the input samples.
+      with K.name_scope('training'):
+        # Gets loss and metrics. Updates weights at each call.
+        fn = K.function(
+            inputs,
+            outputs,
+            updates=updates,
+            name='train_function',
+            **self._function_kwargs)
+        setattr(self, fn_name, fn)
 
-    Computation is done in batches.
+  def _make_train_function(self):
+    metrics_tensors = [
+        self._all_metrics_tensors[m] for m in self.metrics_names[1:]
+    ]
+    self._make_train_function_helper('train_function',
+                                     [self.total_loss] + metrics_tensors)
 
-    Arguments:
-         x: Input samples. It could be:
-          - A Numpy array (or array-like), or a list of arrays
-            (in case the model has multiple inputs).
-          - A TensorFlow tensor, or a list of tensors
-            (in case the model has multiple inputs).
-          - A `tf.data` dataset or a dataset iterator.
-          - A generator or `keras.utils.Sequence` instance.
-        batch_size: Integer or `None`.
-            Number of samples per gradient update.
-            If unspecified, `batch_size` will default to 32.
-            Do not specify the `batch_size` is your data is in the
-            form of symbolic tensors, dataset, dataset iterators,
-            generators, or `keras.utils.Sequence` instances (since they generate
-            batches).
-        verbose: Verbosity mode, 0 or 1.
-        steps: Total number of steps (batches of samples)
-            before declaring the prediction round finished.
-            Ignored with the default value of `None`.
-        max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
-            input only. Maximum size for the generator queue.
-            If unspecified, `max_queue_size` will default to 10.
-        workers: Integer. Used for generator or `keras.utils.Sequence` input
-            only. Maximum number of processes to spin up when using
-            process-based threading. If unspecified, `workers` will default
-            to 1. If 0, will execute the generator on the main thread.
-        use_multiprocessing: Boolean. Used for generator or
-            `keras.utils.Sequence` input only. If `True`, use process-based
-            threading. If unspecified, `use_multiprocessing` will default to
-            `False`. Note that because this implementation relies on
-            multiprocessing, you should not pass non-picklable arguments to
-            the generator as they can't be passed easily to children processes.
+  def _make_fit_function(self):
+    metrics_tensors = [
+        self._all_stateful_metrics_tensors[m] for m in self.metrics_names[1:]
+    ]
+    self._make_train_function_helper(
+        '_fit_function', [self.total_loss] + metrics_tensors)
+
+  def _make_test_function_helper(self, fn_name, outputs, metric_updates=None):
+    if not hasattr(self, fn_name):
+      raise RuntimeError('You must compile your model before using it.')
+    if getattr(self, fn_name) is None:
+      inputs = (self._feed_inputs +
+                self._feed_targets +
+                self._feed_sample_weights)
 
+      with K.name_scope('evaluation'):
+        updates = self.state_updates
+        # Add stateful metrics updates.
+        if metric_updates is not None:
+          updates += metric_updates
+        # Return loss and metrics, no gradient updates.
+        # Does update the network states.
+        fn = K.function(
+            inputs,
+            outputs,
+            updates=updates,
+            name='test_function',
+            **self._function_kwargs)
+        setattr(self, fn_name, fn)
 
-    Returns:
-        Numpy array(s) of predictions.
+  def _make_test_function(self):
+    metrics_tensors = [
+        self._all_metrics_tensors[m] for m in self.metrics_names[1:]
+    ]
+    self._make_test_function_helper('test_function',
+                                    [self.total_loss] + metrics_tensors)
 
-    Raises:
-        ValueError: In case of mismatch between the provided
-            input data and the model's expectations,
-            or in case a stateful model receives a number of samples
-            that is not a multiple of the batch size.
-    """
-    if data_utils.is_generator_or_sequence(x):
-      return self.predict_generator(
-          x,
-          steps=steps,
-          verbose=verbose,
-          max_queue_size=max_queue_size,
-          workers=workers,
-          use_multiprocessing=use_multiprocessing)
-    if self._distribution_strategy:
-      distributed_training_utils.validate_inputs(
-          x, None, self._distribution_strategy)
-      first_x_value = nest.flatten(x)[0]
-      if isinstance(first_x_value, np.ndarray):
-        steps, batch_size = distributed_training_utils.get_input_params(
-            self._distribution_strategy, first_x_value, steps, batch_size)
+  def _make_eval_function(self):
+    metrics_tensors = [
+        self._all_stateful_metrics_tensors[m] for m in self.metrics_names[1:]
+    ]
+    self._make_test_function_helper(
+        '_eval_function', [self.total_loss] + metrics_tensors)
 
-    batch_size = self._validate_or_infer_batch_size(batch_size, steps, x)
+  def _make_predict_function(self):
+    if not hasattr(self, 'predict_function'):
+      self.predict_function = None
+    if self.predict_function is None:
+      inputs = self._feed_inputs
+      # Gets network outputs. Does not update weights.
+      # Does update the network states.
+      kwargs = getattr(self, '_function_kwargs', {})
+      with K.name_scope('predict'):
+        self.predict_function = K.function(
+            inputs,
+            self.outputs,
+            updates=self.state_updates,
+            name='predict_function',
+            **kwargs)
 
-    # Validate and standardize user data.
-    if self._distribution_strategy:
-      x, _, _ = self._standardize_user_data(
-          x, check_steps=True, steps_name='steps', steps=steps,
-          batch_size=batch_size)
-    else:
-      # TODO(anjalisridhar): We don't pass batch_size here for some reason. This
-      # means we need to special case distribution strategy which needs the
-      # batch size.
-      x, _, _ = self._standardize_user_data(
-          x, check_steps=True, steps_name='steps', steps=steps)
+  def _make_execution_function(self, mode):
+    if mode == 'train':
+      self._make_fit_function()
+      return self._fit_function
+    if mode == 'test':
+      self._make_eval_function()
+      return self._eval_function
+    if mode == 'predict':
+      self._make_predict_function()
+      return self.predict_function
 
-    if (self.run_eagerly or (isinstance(x, iterator_ops.EagerIterator) and
-                             not self._distribution_strategy)):
-      return training_generator.predict_generator(
-          self,
-          x,
-          steps=steps,
-          batch_size=batch_size,
-          verbose=verbose,
-          workers=0)
-    elif distributed_training_utils.is_tpu_strategy(
-        self._distribution_strategy):
-      return training_distributed.experimental_predict_loop(
-          self, x, verbose=verbose, steps=steps)
-    else:
-      return training_arrays.predict_loop(
-          self, x, batch_size=batch_size, verbose=verbose, steps=steps)
+  def _get_iterator_get_next_tensors(self, iterator):
+    get_next_op = self._iterator_get_next.get(iterator, None)
+    if get_next_op is None:
+      get_next_op = iterator.get_next()
+      self._iterator_get_next[iterator] = get_next_op
+    return get_next_op
 
-  def reset_metrics(self):
-    """Resets the state of metrics."""
-    if hasattr(self, 'metrics'):
-      for m in self.metrics:
-        m.reset_states()
-      if self._distribution_strategy:
-        training_distributed._reset_metrics(self)  # pylint: disable=protected-access
+  def _distribution_standardize_user_data(self,
+                                          x,
+                                          y=None,
+                                          sample_weight=None,
+                                          class_weight=None,
+                                          batch_size=None,
+                                          check_steps=False,
+                                          steps_name='steps',
+                                          steps=None,
+                                          validation_split=0,
+                                          shuffle=False):
+    """Runs validation checks on input and target data passed by the user.
 
-  def train_on_batch(self,
-                     x,
-                     y=None,
-                     sample_weight=None,
-                     class_weight=None,
-                     reset_metrics=True):
-    """Runs a single gradient update on a single batch of data.
+    This is called when using DistributionStrategy to train, evaluate or serve
+    the model.
 
-    Arguments:
-        x: Input data. It could be:
-          - A Numpy array (or array-like), or a list of arrays
-              (in case the model has multiple inputs).
-          - A TensorFlow tensor, or a list of tensors
-              (in case the model has multiple inputs).
-          - A dict mapping input names to the corresponding array/tensors,
-              if the model has named inputs.
-          - A `tf.data` dataset or a dataset iterator.
-        y: Target data. Like the input data `x`, it could be either Numpy
-          array(s) or TensorFlow tensor(s). It should be consistent with `x`
-          (you cannot have Numpy inputs and tensor targets, or inversely). If
-          `x` is a dataset or a dataset iterator, `y` should not be specified
-          (since targets will be obtained from the iterator).
-        sample_weight: Optional array of the same length as x, containing
-          weights to apply to the model's loss for each sample. In the case of
-          temporal data, you can pass a 2D array with shape (samples,
-          sequence_length), to apply a different weight to every timestep of
-          every sample. In this case you should make sure to specify
-          sample_weight_mode="temporal" in compile(). This argument is not
-          supported when `x` is a dataset or a dataset iterator.
-        class_weight: Optional dictionary mapping class indices (integers) to a
-          weight (float) to apply to the model's loss for the samples from this
-          class during training. This can be useful to tell the model to "pay
-          more attention" to samples from an under-represented class.
-        reset_metrics: If `True`, the metrics returned will be only for this
-          batch. If `False`, the metrics will be statefully accumulated across
-          batches.
+    Args:
+      x: Input data. A numpy array or `tf.data` dataset.
+      y: Target data. A numpy array or None if x is a `tf.data` dataset.
+      sample_weight: An optional sample-weight array passed by the user to
+        weight the importance of each sample in `x`.
+      class_weight: An optional class-weight array by the user to
+        weight the importance of samples in `x` based on the class they belong
+        to, as conveyed by `y`.
+      batch_size: Integer batch size. If provided, it is used to run additional
+        validation checks on stateful models.
+      check_steps: boolean, True if we want to check for validity of `steps` and
+        False, otherwise.
+      steps_name: The public API's parameter name for `steps`.
+      steps: Integer or `None`. Total number of steps (batches of samples) to
+        execute.
+      validation_split: Float between 0 and 1.
+        Fraction of the training data to be used as validation data.
+      shuffle: Boolean whether to shuffle the training data before each epoch.
 
     Returns:
-        Scalar training loss
-        (if the model has a single output and no metrics)
-        or list of scalars (if the model has multiple outputs
-        and/or metrics). The attribute `model.metrics_names` will give you
-        the display labels for the scalar outputs.
+      Iterator for reading the dataset `x`.
 
     Raises:
-      ValueError: In case of invalid user-provided arguments.
+      ValueError: In case of invalid user-provided data.
+      RuntimeError: If the model was never compiled.
     """
-    if self._distribution_strategy:
-      raise NotImplementedError('`train_on_batch` is not supported for models '
-                                'compiled with DistributionStrategy.')
-    # Validate and standardize user data.
-    x, y, sample_weights = self._standardize_user_data(
-        x, y, sample_weight=sample_weight, class_weight=class_weight)
+    if class_weight:
+      raise NotImplementedError('`class_weight` is currently not supported '
+                                'when using DistributionStrategy.')
 
-    if self.run_eagerly:
-      outputs = training_eager.train_on_batch(
-          self, x, y, sample_weights=sample_weights)
-    else:
-      if not isinstance(K.symbolic_learning_phase(), int):
-        ins = x + y + sample_weights + [True]
-      else:
-        ins = x + y + sample_weights
+    if (sample_weight is not None and sample_weight.all() and
+        distributed_training_utils.is_tpu_strategy(
+            self._distribution_strategy)):
+      raise NotImplementedError('`sample_weight` is currently not supported '
+                                'when using TPUStrategy.')
 
-      if reset_metrics:
-        self._make_train_function()
-        outputs = self.train_function(ins)  # pylint: disable=not-callable
-      else:
-        self._make_fit_function()
-        outputs = self._fit_function(ins)  # pylint: disable=not-callable
+    # Validates `steps` argument right at the beginning since we use it to
+    # construct the dataset object.
+    # TODO(anjalisridhar): Remove this check once we refactor the
+    # _standardize_user_data code path. This check is already present elsewhere
+    # in the codebase.
+    if check_steps and isinstance(x, dataset_ops.DatasetV2) and steps is None:
+      raise ValueError('When using Datasets as input, '
+                       'you should specify the `{steps_name}` argument.'
+                       .format(steps_name=steps_name))
 
-    if reset_metrics:
-      self.reset_metrics()
+    first_x_value = nest.flatten(x)[0]
+    if isinstance(first_x_value, np.ndarray):
+      # We need to use the drop_remainder argument to allow for a static
+      # input shape which is required for TPUs.
+      drop_remainder = self._distribution_strategy.require_static_shapes
+      if y is not None:
+        var_x = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, x)
+        var_y = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, y)
+        if sample_weight is not None:
+          var_sample_weights = distributed_training_utils.get_var_for_numpy(
+              self._distribution_strategy, sample_weight)
 
-    if len(outputs) == 1:
-      return outputs[0]
-    return outputs
+          x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y,
+                                                      var_sample_weights))
+        else:
+          x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y))
 
-  def test_on_batch(self, x, y=None, sample_weight=None, reset_metrics=True):
-    """Test the model on a single batch of samples.
+        x = dataset_ops.Dataset.from_tensor_slices((var_x, var_y))
+        if shuffle:
+          # 1024 is a good buffer size since it is much larger than the average
+          # batch size provided by the user and provides sufficient randomness.
+          # One thing to keep in mind is the memory usage based on the size of
+          # each sample.
+          x = x.shuffle(1024)
+        x = x.repeat()
+        x = x.batch(batch_size, drop_remainder=drop_remainder)
+        y = None
+        sample_weight = None
+      else:
+        # This case is for the predict call where the dataset only contains
+        # inputs and no targets, i.e. it does not return a tuple
+        var_x = distributed_training_utils.get_var_for_numpy(
+            self._distribution_strategy, x)
+        x = dataset_ops.Dataset.from_tensor_slices(var_x)
+        x = x.batch(batch_size, drop_remainder=drop_remainder)
 
-    Arguments:
-        x: Input data. It could be:
-          - A Numpy array (or array-like), or a list of arrays
-            (in case the model has multiple inputs).
-          - A TensorFlow tensor, or a list of tensors
-            (in case the model has multiple inputs).
-          - A dict mapping input names to the corresponding array/tensors,
-            if the model has named inputs.
-          - A `tf.data` dataset or a dataset iterator.
-        y: Target data. Like the input data `x`,
-          it could be either Numpy array(s) or TensorFlow tensor(s).
-          It should be consistent with `x` (you cannot have Numpy inputs and
-          tensor targets, or inversely). If `x` is a dataset or a
-          dataset iterator, `y` should not be specified
-          (since targets will be obtained from the iterator).
-        sample_weight: Optional array of the same length as x, containing
-            weights to apply to the model's loss for each sample.
-            In the case of temporal data, you can pass a 2D array
-            with shape (samples, sequence_length),
-            to apply a different weight to every timestep of every sample.
-            In this case you should make sure to specify
-            sample_weight_mode="temporal" in compile(). This argument is not
-            supported when `x` is a dataset or a dataset iterator.
-        reset_metrics: If `True`, the metrics returned will be only for this
-          batch. If `False`, the metrics will be statefully accumulated across
-          batches.
+    assert isinstance(x, dataset_ops.DatasetV2)
 
-    Returns:
-        Scalar test loss (if the model has a single output and no metrics)
-        or list of scalars (if the model has multiple outputs
-        and/or metrics). The attribute `model.metrics_names` will give you
-        the display labels for the scalar outputs.
+    with self._distribution_strategy.scope():
+      iterator = self._distribution_strategy.make_dataset_iterator(x)
+      init_op = iterator.initialize()
+      if not context.executing_eagerly():
+        K.get_session().run(init_op)
 
-    Raises:
-        ValueError: In case of invalid user-provided arguments.
-    """
-    if self._distribution_strategy:
-      raise NotImplementedError('`test_on_batch` is not supported for models '
-                                'compiled with DistributionStrategy.')
-    # Validate and standardize user data.
-    x, y, sample_weights = self._standardize_user_data(
-        x, y, sample_weight=sample_weight)
+    training_utils.validate_iterator_input(x, y, sample_weight,
+                                           validation_split)
+    return iterator
 
-    if self.run_eagerly:
-      outputs = training_eager.test_on_batch(
-          self, x, y, sample_weights=sample_weights)
-    else:
-      inputs = x + y + sample_weights
-      if reset_metrics:
-        self._make_test_function()
-        outputs = self.test_function(inputs)  # pylint: disable=not-callable
-      else:
-        self._make_eval_function()
-        outputs = self._eval_function(inputs)  # pylint: disable=not-callable
+  def _standardize_user_data(self,
+                             x,
+                             y=None,
+                             sample_weight=None,
+                             class_weight=None,
+                             batch_size=None,
+                             check_steps=False,
+                             steps_name='steps',
+                             steps=None,
+                             validation_split=0,
+                             shuffle=False):
+    """Runs validation checks on input and target data passed by the user.
 
-    if reset_metrics:
-      self.reset_metrics()
+    Also standardizes the data to lists of arrays, in order.
 
-    if len(outputs) == 1:
-      return outputs[0]
-    return outputs
+    Also builds and compiles the model on the fly if it is a subclassed model
+    that has never been called before (and thus has no inputs/outputs).
 
-  def predict_on_batch(self, x):
-    """Returns predictions for a single batch of samples.
+    This is a purely internal method, subject to refactoring at any time.
 
-    Arguments:
-        x: Input data. It could be:
-          - A Numpy array (or array-like), or a list of arrays
-            (in case the model has multiple inputs).
-          - A TensorFlow tensor, or a list of tensors
-            (in case the model has multiple inputs).
-          - A `tf.data` dataset or a dataset iterator.
+    Args:
+      x: Input data. It could be:
+        - A Numpy array (or array-like), or a list of arrays
+          (in case the model has multiple inputs).
+        - A TensorFlow tensor, or a list of tensors
+          (in case the model has multiple inputs).
+        - A dict mapping input names to the corresponding array/tensors,
+          if the model has named inputs.
+        - A `tf.data` dataset or a dataset iterator.
+      y: Target data. Like the input data `x`,
+        it could be either Numpy array(s) or TensorFlow tensor(s).
+        It should be consistent with `x` (you cannot have Numpy inputs and
+        tensor targets, or inversely). If `x` is a dataset or a
+        dataset iterator, `y` should not be specified
+        (since targets will be obtained from the iterator).
+      sample_weight: An optional sample-weight array passed by the user to
+        weight the importance of each sample in `x`.
+      class_weight: An optional class-weight array by the user to
+        weight the importance of samples in `x` based on the class they belong
+        to, as conveyed by `y`.
+      batch_size: Integer batch size. If provided, it is used to run additional
+        validation checks on stateful models.
+      check_steps: boolean, True if we want to check for validity of `steps` and
+        False, otherwise. For example, when we are standardizing one batch of
+        data for train_on_batch/predict_on_batch/test_on_batch APIs, `steps`
+        value is not required and we should not check for its validity in these
+        cases.
+      steps_name: The public API's parameter name for `steps`.
+      steps: Integer or `None`. Total number of steps (batches of samples) to
+        execute.
+      validation_split: Float between 0 and 1.
+        Fraction of the training data to be used as validation data.
+      shuffle: Boolean whether to shuffle the training data before each epoch.
 
     Returns:
-        Numpy array(s) of predictions.
+      A tuple of 3: inputs (arrays or dicts, depending on whether `x` was a dict
+      or not), target arrays, sample-weight arrays.
+      If the model's input and targets are symbolic, these lists are empty
+      (since the model takes no user-provided data, instead the data comes
+      from the symbolic inputs/targets).
 
     Raises:
-        ValueError: In case of mismatch between given number of inputs and
-          expectations of the model.
+      ValueError: In case of invalid user-provided data.
+      RuntimeError: If the model was never compiled.
     """
     if self._distribution_strategy:
-      raise NotImplementedError('`predict_on_batch` is not supported for '
-                                'models compiled with DistributionStrategy.')
-    # Validate and standardize user data.
-    inputs, _, _ = self._standardize_user_data(x)
-    if self.run_eagerly:
-      if (isinstance(inputs, iterator_ops.EagerIterator) or
-          (isinstance(inputs, dataset_ops.DatasetV2))):
-        inputs = training_utils.cast_if_floating_dtype(inputs)
-      elif isinstance(inputs, collections.Sequence):
-        inputs = [
-            ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs]
-      return self(inputs)  # pylint: disable=not-callable
-
-    self._make_predict_function()
-    outputs = self.predict_function(inputs)
-
-    if len(outputs) == 1:
-      return outputs[0]
-    return outputs
-
-  def fit_generator(self,
-                    generator,
-                    steps_per_epoch=None,
-                    epochs=1,
-                    verbose=1,
-                    callbacks=None,
-                    validation_data=None,
-                    validation_steps=None,
-                    class_weight=None,
-                    max_queue_size=10,
-                    workers=1,
-                    use_multiprocessing=False,
-                    shuffle=True,
-                    initial_epoch=0):
-    """Fits the model on data yielded batch-by-batch by a Python generator.
-
-    The generator is run in parallel to the model, for efficiency.
-    For instance, this allows you to do real-time data augmentation
-    on images on CPU in parallel to training your model on GPU.
-
-    The use of `keras.utils.Sequence` guarantees the ordering
-    and guarantees the single use of every input per epoch when
-    using `use_multiprocessing=True`.
+      iterator = self._distribution_standardize_user_data(
+          x,
+          y,
+          sample_weight=sample_weight,
+          class_weight=class_weight,
+          batch_size=batch_size,
+          check_steps=check_steps,
+          steps_name=steps_name,
+          steps=steps,
+          validation_split=validation_split,
+          shuffle=shuffle)
+      return iterator, None, None
 
-    Arguments:
-        generator: A generator or an instance of `Sequence`
-          (`keras.utils.Sequence`)
-            object in order to avoid duplicate data
-            when using multiprocessing.
-            The output of the generator must be either
-            - a tuple `(inputs, targets)`
-            - a tuple `(inputs, targets, sample_weights)`.
-            This tuple (a single output of the generator) makes a single batch.
-            Therefore, all arrays in this tuple must have the same length (equal
-            to the size of this batch). Different batches may have different
-              sizes.
-            For example, the last batch of the epoch is commonly smaller than
-              the
-            others, if the size of the dataset is not divisible by the batch
-              size.
-            The generator is expected to loop over its data
-            indefinitely. An epoch finishes when `steps_per_epoch`
-            batches have been seen by the model.
-        steps_per_epoch: Total number of steps (batches of samples)
-            to yield from `generator` before declaring one epoch
-            finished and starting the next epoch. It should typically
-            be equal to the number of samples of your dataset
-            divided by the batch size.
-            Optional for `Sequence`: if unspecified, will use
-            the `len(generator)` as a number of steps.
-        epochs: Integer, total number of iterations on the data.
-        verbose: Verbosity mode, 0, 1, or 2.
-        callbacks: List of callbacks to be called during training.
-        validation_data: This can be either
-            - a generator for the validation data
-            - a tuple (inputs, targets)
-            - a tuple (inputs, targets, sample_weights).
-        validation_steps: Only relevant if `validation_data`
-            is a generator. Total number of steps (batches of samples)
-            to yield from `generator` before stopping.
-            Optional for `Sequence`: if unspecified, will use
-            the `len(validation_data)` as a number of steps.
-        class_weight: Dictionary mapping class indices to a weight
-            for the class.
-        max_queue_size: Integer. Maximum size for the generator queue.
-            If unspecified, `max_queue_size` will default to 10.
-        workers: Integer. Maximum number of processes to spin up
-            when using process-based threading.
-            If unspecified, `workers` will default to 1. If 0, will
-            execute the generator on the main thread.
-        use_multiprocessing: Boolean.
-            If `True`, use process-based threading.
-            If unspecified, `use_multiprocessing` will default to `False`.
-            Note that because this implementation relies on multiprocessing,
-            you should not pass non-picklable arguments to the generator
-            as they can't be passed easily to children processes.
-        shuffle: Boolean. Whether to shuffle the order of the batches at
-            the beginning of each epoch. Only used with instances
-            of `Sequence` (`keras.utils.Sequence`).
-            Has no effect when `steps_per_epoch` is not `None`.
-        initial_epoch: Epoch at which to start training
-            (useful for resuming a previous training run)
+    if isinstance(x, dataset_ops.DatasetV2):
+      if context.executing_eagerly():
+        x = iter(x)
+      else:
+        if x in self._dataset_iterator_cache:
+          x = self._dataset_iterator_cache[x]
+        else:
+          iterator = dataset_ops.make_initializable_iterator(x)
+          self._dataset_iterator_cache[x] = iterator
+          x = iterator
+        K.get_session().run(x.initializer)
 
-    Returns:
-        A `History` object.
+    # Validates `steps` argument based on x's type.
+    if check_steps:
+      training_utils.check_steps_argument(x, steps, steps_name)
 
-    Example:
+    is_x_eager_iterator = isinstance(x, iterator_ops.EagerIterator)
+    is_x_iterator = isinstance(x, iterator_ops.Iterator)
 
-    ```python
-        def generate_arrays_from_file(path):
-            while 1:
-                f = open(path)
-                for line in f:
-                    # create numpy arrays of input data
-                    # and labels, from each line in the file
-                    x1, x2, y = process_line(line)
-                    yield ({'input_1': x1, 'input_2': x2}, {'output': y})
-                f.close()
+    # Validate user inputs when data is given as a dataset or dataset iterator.
+    if is_x_iterator or is_x_eager_iterator:
+      training_utils.validate_iterator_input(x, y, sample_weight,
+                                             validation_split)
 
-        model.fit_generator(generate_arrays_from_file('/my_file.txt'),
-                            steps_per_epoch=10000, epochs=10)
-    ```
-    Raises:
-        ValueError: In case the generator yields data in an invalid format.
-    """
-    if self._distribution_strategy:
-      raise NotImplementedError('`fit_generator` is not supported for '
-                                'models compiled with DistributionStrategy.')
-    return training_generator.fit_generator(
-        self,
-        generator,
-        steps_per_epoch=steps_per_epoch,
-        epochs=epochs,
-        verbose=verbose,
-        callbacks=callbacks,
-        validation_data=validation_data,
-        validation_steps=validation_steps,
-        class_weight=class_weight,
-        max_queue_size=max_queue_size,
-        workers=workers,
-        use_multiprocessing=use_multiprocessing,
-        shuffle=shuffle,
-        initial_epoch=initial_epoch)
+    # For eager iterators, when we have to process multiple batches of samples,
+    # we will standardize the data when we actually loop over iterator and get
+    # the batches. For now, we just return the iterator as is.
+    if is_x_eager_iterator:
+      return x, y, sample_weight
 
-  def evaluate_generator(self,
-                         generator,
-                         steps=None,
-                         max_queue_size=10,
-                         workers=1,
-                         use_multiprocessing=False,
-                         verbose=0):
-    """Evaluates the model on a data generator.
+    # If input data is a dataset iterator in graph mode or if it is an eager
+    # iterator and only one batch of samples is required, we fetch the data
+    # tensors from the iterator and then standardize them.
+    if is_x_iterator or is_x_eager_iterator:
+      try:
+        if is_x_iterator:
+          next_element = self._get_iterator_get_next_tensors(x)
+        else:
+          next_element = x.get_next()
+      except errors.OutOfRangeError:
+        raise RuntimeError('Your dataset iterator ran out of data; '
+                           'Make sure that your dataset can generate '
+                           'required number of samples.')
 
-    The generator should return the same kind of data
-    as accepted by `test_on_batch`.
+      if isinstance(next_element, (list, tuple)):
+        if len(next_element) not in [2, 3]:
+          raise ValueError(
+              'Please provide model inputs as a list or tuple of 2  or 3'
+              'elements: (input, target) or (input, target, sample_weights)'
+              'Received %s' % next_element)
+        if len(next_element) == 2:
+          x, y = next_element
+        else:
+          x, y, sample_weight = next_element
+      else:
+        x = next_element
 
-    Arguments:
-        generator: Generator yielding tuples (inputs, targets)
-            or (inputs, targets, sample_weights)
-            or an instance of `keras.utils.Sequence`
-            object in order to avoid duplicate data
-            when using multiprocessing.
-        steps: Total number of steps (batches of samples)
-            to yield from `generator` before stopping.
-            Optional for `Sequence`: if unspecified, will use
-            the `len(generator)` as a number of steps.
-        max_queue_size: maximum size for the generator queue
-        workers: Integer. Maximum number of processes to spin up
-            when using process-based threading.
-            If unspecified, `workers` will default to 1. If 0, will
-            execute the generator on the main thread.
-        use_multiprocessing: Boolean.
-            If `True`, use process-based threading.
-            If unspecified, `use_multiprocessing` will default to `False`.
-            Note that because this implementation relies on multiprocessing,
-            you should not pass non-picklable arguments to the generator
-            as they can't be passed easily to children processes.
-        verbose: Verbosity mode, 0 or 1.
+    if sample_weight is not None and class_weight is not None:
+      logging.warning(
+          'Received both a `sample_weight` and `class_weight` argument. '
+          'The `class_weight` argument will be ignored.')
+    # First, we build/compile the model on the fly if necessary.
+    all_inputs = []
+    is_build_called = False
+    is_compile_called = False
+    # Whether this is a subclassed model that expects dictionary inputs
+    # rather than list inputs (e.g. FeatureColumn-based models).
+    dict_inputs = False
+    if not self.inputs:
+      # We need to use `x` to set the model inputs.
+      # We type-check that `x` and `y` are either single arrays
+      # or lists of arrays.
+      if isinstance(x, (list, tuple)):
+        if not all(isinstance(v, np.ndarray) or
+                   tensor_util.is_tensor(v) for v in x):
+          raise ValueError('Please provide as model inputs either a single '
+                           'array or a list of arrays. You passed: x=' + str(x))
+        all_inputs += list(x)
+      elif isinstance(x, dict):
+        dict_inputs = True
+        keys = sorted(x.keys())
+        all_inputs = [x[k] for k in keys]
+      else:
+        if not isinstance(x, np.ndarray) and not tensor_util.is_tensor(x):
+          raise ValueError('Please provide as model inputs either a single '
+                           'array or a list of arrays. You passed: x=' + str(x))
+        all_inputs.append(x)
 
-    Returns:
-        Scalar test loss (if the model has a single output and no metrics)
-        or list of scalars (if the model has multiple outputs
-        and/or metrics). The attribute `model.metrics_names` will give you
-        the display labels for the scalar outputs.
+      # Build the model using the retrieved inputs (value or symbolic).
+      # If values or generated from a dataset, then in symbolic-mode
+      # placeholders will be created to match the value shapes.
+      if not self.inputs:
+        is_build_called = True
+        if is_x_iterator:
+          cast_inputs = nest.map_structure(lambda v: v.shape, x)
+        elif training_utils.has_tensors(x):
+          cast_inputs = training_utils.cast_if_floating_dtype(x)
+        else:
+          cast_inputs = x
+        self._set_inputs(cast_inputs)
+    else:
+      dict_inputs = isinstance(self.inputs, dict)
+    if dict_inputs and context.executing_eagerly():
+      # No support for graph functions when the model expects dictionary inputs
+      # (i.e. FeatureColumn-based models).
+      self.run_eagerly = True
 
-    Raises:
-        ValueError: in case of invalid arguments.
+    if y is not None:
+      if not self.optimizer:
+        raise RuntimeError('You must compile a model before '
+                           'training/testing. '
+                           'Use `model.compile(optimizer, loss)`.')
+      if not self._is_compiled:
+        # On-the-fly compilation of the model.
+        # We need to use `y` to set the model targets.
+        if training_utils.has_tensors(y):
+          y = training_utils.cast_if_floating_dtype(y)
+        if isinstance(y, (list, tuple)):
+          if not all(isinstance(v, np.ndarray) or
+                     tensor_util.is_tensor(v) for v in y):
+            raise ValueError('Please provide as model targets either a single '
+                             'array or a list of arrays. '
+                             'You passed: y=' + str(y))
+          all_inputs += list(y)
+        elif isinstance(y, dict):
+          raise ValueError('Please do not pass a dictionary as model targets.')
+        else:
+          if not isinstance(y, np.ndarray) and not tensor_util.is_tensor(y):
+            raise ValueError('Please provide as model targets either a single '
+                             'array or a list of arrays. '
+                             'You passed: y=' + str(y))
+          all_inputs.append(y)
 
-    Raises:
-        ValueError: In case the generator yields data in an invalid format.
-    """
-    if self._distribution_strategy:
-      raise NotImplementedError('`evaluate_generator` is not supported for '
-                                'models compiled with DistributionStrategy.')
-    return training_generator.evaluate_generator(
-        self,
-        generator,
-        steps=steps,
-        max_queue_size=max_queue_size,
-        workers=workers,
-        use_multiprocessing=use_multiprocessing,
-        verbose=verbose)
+        # Typecheck that all inputs are *either* value *or* symbolic.
+        # TODO(fchollet): this check could be removed in Eager mode?
+        if any(tensor_util.is_tensor(v) for v in all_inputs):
+          if not all(tensor_util.is_tensor(v) for v in all_inputs):
+            raise ValueError('Do not pass inputs that mix Numpy arrays and '
+                             'TensorFlow tensors. '
+                             'You passed: x=' + str(x) + '; y=' + str(y))
 
-  def predict_generator(self,
-                        generator,
-                        steps=None,
-                        max_queue_size=10,
-                        workers=1,
-                        use_multiprocessing=False,
-                        verbose=0):
-    """Generates predictions for the input samples from a data generator.
+        if self.run_eagerly or is_x_iterator:
+          target_tensors = None
+        else:
+          # Handle target tensors if any passed.
+          if not isinstance(y, (list, tuple)):
+            y = [y]
+          target_tensors = [v for v in y if _is_symbolic_tensor(v)]
+        is_compile_called = True
+        self.compile(
+            optimizer=self.optimizer,
+            loss=self.loss,
+            metrics=self._compile_metrics,
+            weighted_metrics=self._compile_weighted_metrics,
+            loss_weights=self.loss_weights,
+            target_tensors=target_tensors,
+            run_eagerly=self.run_eagerly)
+
+    # In graph mode, if we had just set inputs and targets as symbolic tensors
+    # by invoking build and compile on the model respectively, we do not have to
+    # feed anything to the model. Model already has input and target data as
+    # part of the graph.
+    # Note: in this case, `any` and `all` are equivalent since we disallow
+    # mixed symbolic/value inputs.
+    if (not self.run_eagerly and is_build_called and is_compile_called and
+        not is_x_iterator and any(_is_symbolic_tensor(v) for v in all_inputs)):
+      return [], [], []
+
+    # What follows is input validation and standardization to list format,
+    # in the case where all inputs are value arrays.
+
+    if self.run_eagerly:
+      # In eager mode, do not do shape validation
+      # since the network has no input nodes (placeholders) to be fed.
+      feed_input_names = self.input_names
+      feed_input_shapes = None
+    elif not self._is_graph_network:
+      # Case: symbolic-mode subclassed network. Do not do shape validation.
+      feed_input_names = self._feed_input_names
+      feed_input_shapes = None
+    else:
+      # Case: symbolic-mode graph network.
+      # In this case, we run extensive shape validation checks.
+      feed_input_names = self._feed_input_names
+      feed_input_shapes = self._feed_input_shapes
 
-    The generator should return the same kind of data as accepted by
-    `predict_on_batch`.
+    # Standardize the inputs.
+    x = training_utils.standardize_input_data(
+        x,
+        feed_input_names,
+        feed_input_shapes,
+        check_batch_axis=False,  # Don't enforce the batch size.
+        exception_prefix='input')
 
-    Arguments:
-        generator: Generator yielding batches of input samples
-            or an instance of `keras.utils.Sequence` object in order to
-            avoid duplicate data when using multiprocessing.
-        steps: Total number of steps (batches of samples)
-            to yield from `generator` before stopping.
-            Optional for `Sequence`: if unspecified, will use
-            the `len(generator)` as a number of steps.
-        max_queue_size: Maximum size for the generator queue.
-        workers: Integer. Maximum number of processes to spin up
-            when using process-based threading.
-            If unspecified, `workers` will default to 1. If 0, will
-            execute the generator on the main thread.
-        use_multiprocessing: Boolean.
-            If `True`, use process-based threading.
-            If unspecified, `use_multiprocessing` will default to `False`.
-            Note that because this implementation relies on multiprocessing,
-            you should not pass non-picklable arguments to the generator
-            as they can't be passed easily to children processes.
-        verbose: verbosity mode, 0 or 1.
+    if y is not None:
+      if not self._is_graph_network:
+        feed_output_names = self._feed_output_names
+        feed_output_shapes = None
+        # Sample weighting not supported in this case.
+        # TODO(fchollet): consider supporting it.
+        feed_sample_weight_modes = [None for _ in self.outputs]
+      else:
+        feed_output_names = self._feed_output_names
+        feed_sample_weight_modes = self._feed_sample_weight_modes
+        feed_output_shapes = []
+        for output_shape, loss_fn in zip(self._feed_output_shapes,
+                                         self._feed_loss_fns):
+          if loss_fn is losses.sparse_categorical_crossentropy:
+            if K.image_data_format() == 'channels_first':
+              feed_output_shapes.append(
+                  (output_shape[0], 1) + output_shape[2:])
+            else:
+              feed_output_shapes.append(output_shape[:-1] + (1,))
+          elif (not hasattr(loss_fn, '__name__') or
+                getattr(losses, loss_fn.__name__, None) is None):
+            # If `loss_fn` is not a function (e.g. callable class)
+            # or if it not in the `losses` module, then
+            # it is a user-defined loss and we make no assumptions
+            # about it.
+            feed_output_shapes.append(None)
+          else:
+            feed_output_shapes.append(output_shape)
 
-    Returns:
-        Numpy array(s) of predictions.
+      # Standardize the outputs.
+      y = training_utils.standardize_input_data(
+          y,
+          feed_output_names,
+          # Don't enforce target shapes to match output shapes.
+          # Precise checks will be run in `check_loss_and_target_compatibility`.
+          shapes=None,
+          check_batch_axis=False,  # Don't enforce the batch size.
+          exception_prefix='target')
 
-    Raises:
-        ValueError: In case the generator yields data in an invalid format.
-    """
-    if self._distribution_strategy:
-      raise NotImplementedError('`predict_generator` is not supported for '
-                                'models compiled with DistributionStrategy.')
-    return training_generator.predict_generator(
-        self,
-        generator,
-        steps=steps,
-        max_queue_size=max_queue_size,
-        workers=workers,
-        use_multiprocessing=use_multiprocessing,
-        verbose=verbose)
+      # Generate sample-wise weight values given the `sample_weight` and
+      # `class_weight` arguments.
+      sample_weights = training_utils.standardize_sample_weights(
+          sample_weight, feed_output_names)
+      class_weights = training_utils.standardize_class_weights(
+          class_weight, feed_output_names)
+      sample_weights = [
+          training_utils.standardize_weights(ref, sw, cw, mode)
+          for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights,
+                                         feed_sample_weight_modes)
+      ]
+      # Check that all arrays have the same length.
+      if not self._distribution_strategy:
+        training_utils.check_array_lengths(x, y, sample_weights)
+        if self._is_graph_network and not self.run_eagerly:
+          # Additional checks to avoid users mistakenly using improper loss fns.
+          training_utils.check_loss_and_target_compatibility(
+              y, self._feed_loss_fns, feed_output_shapes)
+    else:
+      y = []
+      sample_weights = []
 
-  def _get_callback_model(self):
-    """Returns the Callback Model for this Model."""
+    if self.stateful and batch_size:
+      # Check that for stateful networks, number of samples is a multiple
+      # of the static batch size.
+      if x[0].shape[0] % batch_size != 0:
+        raise ValueError('In a stateful network, '
+                         'you should only pass inputs with '
+                         'a number of samples that can be '
+                         'divided by the batch size. Found: ' +
+                         str(x[0].shape[0]) + ' samples')
 
-    if hasattr(self, '_replicated_model') and self._replicated_model:
-      # When using training_distributed, we set the callback model
-      # to an instance of the `DistributedModel` that we create in
-      # the `compile` call. The `DistributedModel` is initialized
-      # with the first replicated model. We need to set the callback
-      # model to a DistributedModel to allow us to override saving
-      # and loading weights when we checkpoint the model during training.
-      return self._replicated_model
-    if hasattr(self, 'callback_model') and self.callback_model:
-      return self.callback_model
-    return self
+    # If dictionary inputs were provided, we return a dictionary as well.
+    if dict_inputs:
+      x = dict(zip(feed_input_names, x))
+    return x, y, sample_weights
 
-  def _make_callback_model(self, grouped_model):
-    first_replicated_model = self._distribution_strategy.unwrap(
-        grouped_model)[0]
-    # We initialize the callback model with the first replicated model.
-    self._replicated_model = DistributedCallbackModel(first_replicated_model)
-    self._replicated_model.set_original_model(self)
+  @checkpointable.no_automatic_dependency_tracking
+  def _set_inputs(self, inputs, outputs=None, training=None):
+    """Set model's input and output specs based on the input data received.
 
-  def _validate_or_infer_batch_size(self, batch_size, steps, x):
-    """Validates that the `batch_size` provided is consistent with InputLayer.
+    This is to be used for Model subclasses, which do not know at instantiation
+    time what their inputs look like.
 
-    It's possible that the user specified a static batch size in their
-    InputLayer. If so, this method checks the provided `batch_size` and `x`
-    arguments are consistent with this static batch size. Also, if
-    `batch_size` is `None`, this method will attempt to infer the batch size
-    from the static batch size of the InputLayer.
+    Args:
+      inputs: Single array, or list of arrays. The arrays could be placeholders,
+        Numpy arrays, data tensors, or TensorShapes.
+        - if placeholders: the model is built on top of these placeholders,
+          and we expect Numpy data to be fed for them when calling `fit`/etc.
+        - if Numpy data or TensorShapes: we create placeholders matching the
+          TensorShapes or shapes of the Numpy arrays. We expect Numpy data to be
+          fed for these placeholders when calling `fit`/etc.
+        - if data tensors: the model is built on top of these tensors.
+          We do not expect any Numpy data to be provided when calling `fit`/etc.
+      outputs: None, a data tensor, or a list of tensors. If None, the
+        outputs will be determined by invoking `self.call()`, otherwise the
+        provided value will be used.
+      training: Boolean or None. Only relevant in symbolic mode. Specifies
+        whether to build the model's graph in inference mode (False), training
+        mode (True), or using the Keras learning phase (None).
+    Raises:
+      ValueError: If dict inputs are passed to a Sequential Model where the
+        first layer isn't FeatureLayer.
+    """
+    if self.inputs:
+      raise ValueError('Model inputs are already set.')
 
-    Arguments:
-      batch_size: The batch_size provided as an argument to
-        fit/evaluate/predict.
-      steps: The steps provided as an argument to fit/evaluate/predict.
-      x: The data passed as `x` to fit/evaluate/predict.
+    if self.__class__.__name__ == 'Sequential' and not self.built:
+      if tensor_util.is_tensor(inputs):
+        input_shape = (None,) + tuple(inputs.shape.as_list()[1:])
+      elif isinstance(inputs, tensor_shape.TensorShape):
+        input_shape = (None,) + tuple(inputs.as_list()[1:])
+      elif isinstance(inputs, dict):
+        # We assert that the first layer is a FeatureLayer.
+        if not training_utils.is_feature_layer(self.layers[0]):
+          raise ValueError('Passing a dictionary input to a Sequential Model '
+                           'which doesn\'t have FeatureLayer as the first layer'
+                           ' is an error.')
+        input_shape = (None,)
+      else:
+        input_shape = (None,) + tuple(inputs.shape[1:])
+      self._build_input_shape = input_shape
 
-    Returns:
-      The validated batch_size, auto-inferred from the first layer if not
-      provided.
-    """
-    layers = super(Model, self).layers  # Avoids the override in Sequential.
-    if layers:
-      first_layer = layers[0]
-      static_batch_size = training_utils.get_static_batch_size(first_layer)
-      if static_batch_size is not None:
+    # On-the-fly setting of symbolic model inputs (either by using the tensor
+    # provided, or by creating a placeholder if Numpy data was provided).
+    model_inputs = training_utils.ModelInputs(inputs)
+    inputs = model_inputs.get_symbolic_inputs()
+    self.inputs = model_inputs.get_symbolic_inputs(return_single_as_list=True)
+    self.input_names = model_inputs.get_input_names()
 
-        # Check `batch_size` argument is consistent with InputLayer.
-        if batch_size is not None and batch_size != static_batch_size:
-          raise ValueError('The `batch_size` argument value {} is incompatible '
-                           'with the specified batch size of your Input Layer: '
-                           '{}'.format(batch_size, static_batch_size))
+    self._feed_inputs = []
+    self._feed_input_names = []
+    self._feed_input_shapes = []
 
-        # Check Dataset/Iterator batch size is consistent with InputLayer.
-        if isinstance(x, (dataset_ops.DatasetV2, iterator_ops.Iterator,
-                          iterator_ops.EagerIterator)):
-          ds_batch_size = tensor_shape.as_dimension(
-              nest.flatten(x.output_shapes)[0][0]).value
-          if ds_batch_size is not None and ds_batch_size != static_batch_size:
-            raise ValueError('The batch output shape of your `Dataset` is {}, '
-                             'which is incompatible with the specified batch '
-                             'size of your Input Layer: {}'.format(
-                                 ds_batch_size, static_batch_size))
+    for k, v in model_inputs.as_dict():
+      if K.is_placeholder(v):
+        self._feed_inputs.append(v)
+        self._feed_input_names.append(k)
+        self._feed_input_shapes.append(K.int_shape(v))
 
-        # Set inferred batch size from the InputLayer.
-        if steps is None:
-          batch_size = static_batch_size
+    # TODO(fchollet): consider calling `_maybe_build` before calling the model.
 
-    if batch_size is None and steps is None:
-      # Backwards compatibility
-      batch_size = 32
-    return batch_size
+    if outputs is None:
+      # Obtain symbolic outputs by calling the model.
+      with K.get_graph().as_default():
+        if self._expects_training_arg:
+          outputs = self.call(inputs, training=training)
+        else:
+          outputs = self.call(inputs)
 
-  @property
-  def _default_save_signature(self):
-    return training_utils.trace_model_call(self)
+    outputs = nest.flatten(outputs)
+    self.outputs = outputs
+    self.output_names = training_utils.generic_output_names(outputs)
+    self.built = True
 
 
 class DistributedCallbackModel(Model):
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index d20d092d8e..ffb0266911 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -570,11 +570,8 @@ def _get_input_from_iterator(iterator, model):
   # Validate that all the elements in x and y are of the same type and shape.
   # We can then pass the first element of x and y to `_standardize_weights`
   # below and be confident of the output.
-  x_values, y_values, sample_weights_values = distributed_training_utils.\
-    validate_distributed_dataset_inputs(model._distribution_strategy, x, y,
-                                        sample_weights)
-  model._standardize_weights(x_values, y_values,
-                             sample_weight=sample_weights_values)
+  distributed_training_utils.validate_distributed_dataset_inputs(
+      model._distribution_strategy, x, y, sample_weights)
   return x, y, sample_weights
 
 
-- 
GitLab


From 8e7339b501b23e128572e984a25986c308adb0ed Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Tue, 11 Dec 2018 16:57:35 -0800
Subject: [PATCH 396/873] Fix include ordering in backend.cc

---
 tensorflow/compiler/xla/service/backend.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc
index 385c5eefa0..99e963b929 100644
--- a/tensorflow/compiler/xla/service/backend.cc
+++ b/tensorflow/compiler/xla/service/backend.cc
@@ -22,7 +22,6 @@ limitations under the License.
 #include <utility>
 
 #include "absl/memory/memory.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/compiler/xla/service/compiler.h"
 #include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
@@ -37,6 +36,7 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 namespace xla {
 
-- 
GitLab


From 45a6696c0ad95d6953c43da2352d297ea61916e3 Mon Sep 17 00:00:00 2001
From: Yuefeng Zhou <yuefengz@google.com>
Date: Tue, 11 Dec 2018 16:55:34 -0800
Subject: [PATCH 397/873] Fix the bug in collective_all_reduce_strategy that
 wrong cross device op is used.

PiperOrigin-RevId: 225096446
---
 .../distribute/python/collective_all_reduce_strategy.py   | 8 ++++++--
 .../python/collective_all_reduce_strategy_test.py         | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
index 5c50a20490..346513dc58 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
@@ -70,6 +70,8 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
     self._cross_device_ops = None
     self._num_gpus_per_worker = num_gpus_per_worker
     self._initialize_local_worker(num_gpus_per_worker)
+    assert isinstance(self._get_cross_device_ops(),
+                      cross_device_ops_lib.CollectiveAllReduce)
 
   def _initialize_local_worker(self, num_gpus_per_worker):
     """Initializes the object for local training."""
@@ -86,7 +88,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
 
     self._collective_keys = cross_device_utils.CollectiveKeys()
     self._initialize_local(local_devices)
-    self._cross_tower_ops = cross_device_ops_lib.CollectiveAllReduce(
+    self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
         num_workers=self._num_workers,
         num_gpus_per_worker=num_gpus_per_worker,
         collective_keys=self._collective_keys)
@@ -128,7 +130,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
 
     self._collective_keys = cross_device_utils.CollectiveKeys()
     self._initialize_local(local_devices)
-    self._cross_tower_ops = cross_device_ops_lib.CollectiveAllReduce(
+    self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
         num_workers=self._num_workers,
         num_gpus_per_worker=num_gpus_per_worker,
         collective_keys=self._collective_keys)
@@ -267,6 +269,8 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
       # already been initialized with a `cluster_spec`.
       self._initialize_multi_worker(self._num_gpus_per_worker, cluster_spec,
                                     task_type, task_id)
+      assert isinstance(self._get_cross_device_ops(),
+                        cross_device_ops_lib.CollectiveAllReduce)
 
     if session_config:
       session_config.CopyFrom(self._update_config_proto(session_config))
diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
index 8a9e583f0a..6d7cd14ed5 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
@@ -82,7 +82,7 @@ class CollectiveAllReduceStrategyTestBase(
         instance_key_with_id_start=num_gpus * 10000 +
         CollectiveAllReduceStrategyTestBase.collective_key_base)
     distribution.extended._collective_keys = collective_keys
-    distribution.extended._inferred_cross_device_ops._collective_keys = (
+    distribution.extended._cross_device_ops._collective_keys = (
         collective_keys)
     if task_type and task_id is not None:
       return distribution, 'grpc://' + self._cluster_spec[task_type][
-- 
GitLab


From db340f9efc3dee7f7a7e931db8f2f36104daa446 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 11 Dec 2018 16:57:56 -0800
Subject: [PATCH 398/873] Fix wrap_function on empty arguments

Caused by an implicit boolean check which should have been an explicit None check

PiperOrigin-RevId: 225096833
---
 tensorflow/python/eager/function.py           | 2 +-
 tensorflow/python/eager/wrap_function_test.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 0de0cd96ac..f3480ebb56 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -340,7 +340,7 @@ class Function(object):
       TypeError: For invalid positional/keyword argument combinations.
     """
     if self._arg_keywords is None or self._num_positional_args is None:
-      if self._signature:
+      if self._signature is not None:
         if kwargs:
           raise NotImplementedError(
               "Keyword arguments not supported when calling a "
diff --git a/tensorflow/python/eager/wrap_function_test.py b/tensorflow/python/eager/wrap_function_test.py
index b32b6ca426..d34e9228f3 100644
--- a/tensorflow/python/eager/wrap_function_test.py
+++ b/tensorflow/python/eager/wrap_function_test.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 
 from tensorflow.python.eager import wrap_function
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
@@ -70,6 +71,14 @@ class WrapFunctionTest(test.TestCase):
     f_pruned = f_wrapped.prune(x_in[0], [x_out[0]])
     self.assertAllEqual(f_pruned(ops.convert_to_tensor(2.0)), [4.0])
 
+  def testNoArguments(self):
+
+    def f():
+      return constant_op.constant(1.)
+
+    f_wrapped = wrap_function.wrap_function(f, [])
+    self.assertAllEqual(1.0, f_wrapped())
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
-- 
GitLab


From 5fecd1ead795ec7ddc5d9ede0f8c3b386a1ca8f0 Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Tue, 11 Dec 2018 17:03:32 -0800
Subject: [PATCH 399/873] Fix build error

PiperOrigin-RevId: 225097826
---
 .../microfrontend/audio_microfrontend.cc      | 10 ++++----
 .../microfrontend/audio_microfrontend_test.cc |  9 ++++---
 .../microfrontend/lib/fft_test.cc             |  3 ++-
 .../microfrontend/lib/filterbank_io.c         |  3 ++-
 .../microfrontend/lib/filterbank_test.cc      | 24 ++++++++++++-------
 .../microfrontend/lib/frontend_test.cc        |  6 +++--
 .../microfrontend/lib/log_scale.c             |  3 ++-
 .../microfrontend/lib/log_scale_test.cc       |  6 +++--
 .../microfrontend/lib/noise_reduction_test.cc |  6 +++--
 .../microfrontend/lib/pcan_gain_control.c     |  3 ++-
 .../lib/pcan_gain_control_test.cc             |  3 ++-
 .../lib/pcan_gain_control_util.c              |  3 ++-
 .../microfrontend/lib/window_io.c             |  3 ++-
 .../microfrontend/lib/window_test.cc          | 12 ++++++----
 .../ops/audio_microfrontend_op.cc             | 10 ++++----
 15 files changed, 68 insertions(+), 36 deletions(-)

diff --git a/tensorflow/lite/experimental/microfrontend/audio_microfrontend.cc b/tensorflow/lite/experimental/microfrontend/audio_microfrontend.cc
index 4367fe74a4..84ab164d2c 100644
--- a/tensorflow/lite/experimental/microfrontend/audio_microfrontend.cc
+++ b/tensorflow/lite/experimental/microfrontend/audio_microfrontend.cc
@@ -142,7 +142,8 @@ void GenerateFeatures(TfLiteAudioMicrofrontendParams* data,
 
     if (output.values != nullptr) {
       frame_buffer[frame_index].reserve(output.size);
-      for (int i = 0; i < output.size; ++i) {
+      int i;
+      for (i = 0; i < output.size; ++i) {
         frame_buffer[frame_index].push_back(static_cast<T>(output.values[i]) /
                                             data->out_scale);
       }
@@ -152,9 +153,10 @@ void GenerateFeatures(TfLiteAudioMicrofrontendParams* data,
 
   int index = 0;
   std::vector<T> pad(data->state->filterbank.num_channels, 0);
-  for (int anchor = 0; anchor < frame_buffer.size();
-       anchor += data->frame_stride) {
-    for (int frame = anchor - data->left_context;
+  int anchor;
+  for (anchor = 0; anchor < frame_buffer.size(); anchor += data->frame_stride) {
+    int frame;
+    for (frame = anchor - data->left_context;
          frame <= anchor + data->right_context; ++frame) {
       std::vector<T>* feature;
       if (data->zero_padding && (frame < 0 || frame >= frame_buffer.size())) {
diff --git a/tensorflow/lite/experimental/microfrontend/audio_microfrontend_test.cc b/tensorflow/lite/experimental/microfrontend/audio_microfrontend_test.cc
index a9119d0183..e3a0e06f7b 100644
--- a/tensorflow/lite/experimental/microfrontend/audio_microfrontend_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/audio_microfrontend_test.cc
@@ -140,13 +140,16 @@ class BaseMicroFrontendTest : public ::testing::Test {
 
     // Mimic padding behaviour with zero_padding = true.
     std::vector<int> output_flattened;
-    for (int anchor = 0; anchor < output.size();
+    int anchor;
+    for (anchor = 0; anchor < output.size();
          anchor += micro_frontend->num_frame_stride()) {
-      for (int frame = anchor - micro_frontend->num_left_context();
+      int frame;
+      for (frame = anchor - micro_frontend->num_left_context();
            frame <= anchor + micro_frontend->num_right_context(); ++frame) {
         if (frame < 0 || frame >= output.size()) {
           // Padding with zeros.
-          for (int j = 0; j < num_frequency_per_frame; ++j) {
+          int j;
+          for (j = 0; j < num_frequency_per_frame; ++j) {
             output_flattened.push_back(0.0);
           }
         } else {
diff --git a/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc b/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc
index 7c1ee2d852..1b754c1b4c 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/fft_test.cc
@@ -38,7 +38,8 @@ TEST(FftTest, CheckOutputValues) {
       {-887, 0}, {3000, 3000}, {0, -6401}, {-3000, 3000}, {886, 0}, {118, 119},
       {0, 25},   {9, -10},     {19, 0},    {9, 9},        {0, 0}};
   ASSERT_EQ(state.fft_size / 2 + 1, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i <= state.fft_size / 2; ++i) {
+  int i;
+  for (i = 0; i <= state.fft_size / 2; ++i) {
     EXPECT_EQ(state.output[i].real, expected[i].real);
     EXPECT_EQ(state.output[i].imag, expected[i].imag);
   }
diff --git a/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.c b/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.c
index 2dbb4b3bf0..6ce4c7c796 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/filterbank_io.c
@@ -17,7 +17,8 @@ limitations under the License.
 static void PrintArray(FILE* fp, const char* name, const int16_t* values,
                        size_t size) {
   fprintf(fp, "static int16_t filterbank_%s[] = {", name);
-  for (int i = 0; i < size; ++i) {
+  int i;
+  for (i = 0; i < size; ++i) {
     fprintf(fp, "%d", values[i]);
     if (i < size - 1) {
       fprintf(fp, ", ");
diff --git a/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc b/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc
index 808d527186..41f0064d4f 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/filterbank_test.cc
@@ -71,7 +71,8 @@ TEST_F(FilterbankTest, CheckChannelFrequencyStarts) {
 
   const int16_t expected[] = {0, 4, 8};
   ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i <= state.num_channels; ++i) {
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
     EXPECT_EQ(state.channel_frequency_starts[i], expected[i]);
   }
 
@@ -85,7 +86,8 @@ TEST_F(FilterbankTest, CheckChannelWeightStarts) {
 
   const int16_t expected[] = {0, 8, 16};
   ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i <= state.num_channels; ++i) {
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
     EXPECT_EQ(state.channel_weight_starts[i], expected[i]);
   }
 
@@ -99,7 +101,8 @@ TEST_F(FilterbankTest, CheckChannelWidths) {
 
   const int16_t expected[] = {8, 8, 8};
   ASSERT_EQ(state.num_channels + 1, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i <= state.num_channels; ++i) {
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
     EXPECT_EQ(state.channel_widths[i], expected[i]);
   }
 
@@ -117,7 +120,8 @@ TEST_F(FilterbankTest, CheckWeights) {
   ASSERT_EQ(state.channel_weight_starts[state.num_channels] +
                 state.channel_widths[state.num_channels],
             sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
     EXPECT_EQ(state.weights[i], expected[i]);
   }
 
@@ -135,7 +139,8 @@ TEST_F(FilterbankTest, CheckUnweights) {
   ASSERT_EQ(state.channel_weight_starts[state.num_channels] +
                 state.channel_widths[state.num_channels],
             sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
     EXPECT_EQ(state.unweights[i], expected[i]);
   }
 
@@ -154,7 +159,8 @@ TEST_F(FilterbankTest, CheckConvertFftComplexToEnergy) {
   int32_t* energy = reinterpret_cast<int32_t*>(fake_fft);
   FilterbankConvertFftComplexToEnergy(&state, fake_fft, energy);
 
-  for (int i = state.start_index; i < state.end_index; ++i) {
+  int i;
+  for (i = state.start_index; i < state.end_index; ++i) {
     EXPECT_EQ(energy[i], kEnergy[i]);
   }
 }
@@ -167,7 +173,8 @@ TEST_F(FilterbankTest, CheckAccumulateChannels) {
   FilterbankAccumulateChannels(&state, kEnergy);
 
   ASSERT_EQ(state.num_channels + 1, sizeof(kWork) / sizeof(kWork[0]));
-  for (int i = 0; i <= state.num_channels; ++i) {
+  int i;
+  for (i = 0; i <= state.num_channels; ++i) {
     EXPECT_EQ(state.work[i], kWork[i]);
   }
 
@@ -184,7 +191,8 @@ TEST_F(FilterbankTest, CheckSqrt) {
 
   const uint32_t expected[] = {247311, 508620};
   ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < state.num_channels; ++i) {
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
     EXPECT_EQ(scaled_filterbank[i], expected[i]);
   }
 
diff --git a/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc b/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc
index 993e866cc0..a6faa1fc1f 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/frontend_test.cc
@@ -64,7 +64,8 @@ TEST_F(FrontendTest, CheckOutputValues) {
 
   const uint16_t expected[] = {479, 425};
   ASSERT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < output.size; ++i) {
+  int i;
+  for (i = 0; i < output.size; ++i) {
     EXPECT_EQ(output.values[i], expected[i]);
   }
 
@@ -86,7 +87,8 @@ TEST_F(FrontendTest, CheckConsecutiveWindow) {
 
   const int16_t expected[] = {436, 378};
   ASSERT_EQ(output.size, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < output.size; ++i) {
+  int i;
+  for (i = 0; i < output.size; ++i) {
     EXPECT_EQ(output.values[i], expected[i]);
   }
 
diff --git a/tensorflow/lite/experimental/microfrontend/lib/log_scale.c b/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
index 54f370e7d9..149ec7cfba 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/log_scale.c
@@ -63,7 +63,8 @@ uint16_t* LogScaleApply(struct LogScaleState* state, uint32_t* signal,
   const int scale_shift = state->scale_shift;
   uint16_t* output = (uint16_t*) signal;
   uint16_t* ret = output;
-  for (int i = 0; i < signal_size; ++i) {
+  int i;
+  for (i = 0; i < signal_size; ++i) {
     uint32_t value = *signal++;
     if (state->enable_log) {
       if (correction_bits < 0) {
diff --git a/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc b/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc
index 91ca657e54..1ea0842ec2 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/log_scale_test.cc
@@ -34,7 +34,8 @@ TEST(LogScaleTest, CheckOutputValues) {
                                    kCorrectionBits);
 
   const uint16_t expected[] = {479, 425};
-  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
     EXPECT_EQ(output[i], expected[i]);
   }
 }
@@ -50,7 +51,8 @@ TEST(LogScaleTest, CheckOutputValuesNoLog) {
                                    kCorrectionBits);
 
   const uint16_t expected[] = {65535, 45998};
-  for (int i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
+  int i;
+  for (i = 0; i < sizeof(expected) / sizeof(expected[0]); ++i) {
     EXPECT_EQ(output[i], expected[i]);
   }
 }
diff --git a/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc b/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc
index 1614056487..13d58b2476 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/noise_reduction_test.cc
@@ -44,7 +44,8 @@ TEST_F(NoiseReductionTest, TestNoiseReductionEstimate) {
 
   const uint32_t expected[] = {6321887, 31248341};
   ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < state.num_channels; ++i) {
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
     EXPECT_EQ(state.estimate[i], expected[i]);
   }
 
@@ -60,7 +61,8 @@ TEST_F(NoiseReductionTest, TestNoiseReduction) {
 
   const uint32_t expected[] = {241137, 478104};
   ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < state.num_channels; ++i) {
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
     EXPECT_EQ(signal[i], expected[i]);
   }
 
diff --git a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
index b49eb30137..8ccc2fde98 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control.c
@@ -47,7 +47,8 @@ uint32_t PcanShrink(const uint32_t x) {
 
 void PcanGainControlApply(struct PcanGainControlState* state,
                           uint32_t* signal) {
-  for (int i = 0; i < state->num_channels; ++i) {
+  int i;
+  for (i = 0; i < state->num_channels; ++i) {
     const uint32_t gain = WideDynamicFunction(state->noise_estimate[i],
                                               state->gain_lut);
     const uint32_t snr = ((uint64_t) signal[i] * gain) >> state->snr_shift;
diff --git a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
index 830db89edd..7c92d2d29d 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_test.cc
@@ -49,7 +49,8 @@ TEST_F(PcanGainControlTest, TestPcanGainControl) {
 
   const uint32_t expected[] = {3578, 1533};
   ASSERT_EQ(state.num_channels, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < state.num_channels; ++i) {
+  int i;
+  for (i = 0; i < state.num_channels; ++i) {
     EXPECT_EQ(signal[i], expected[i]);
   }
 
diff --git a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
index dbe44c494a..5201cf045b 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/pcan_gain_control_util.c
@@ -62,7 +62,8 @@ int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
   state->gain_lut[0] = PcanGainLookupFunction(config, input_bits, 0);
   state->gain_lut[1] = PcanGainLookupFunction(config, input_bits, 1);
   state->gain_lut -= 6;
-  for (int interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
+  int interval;
+  for (interval = 2; interval <= kWideDynamicFunctionBits; ++interval) {
     const uint32_t x0 = (uint32_t) 1 << (interval - 1);
     const uint32_t x1 = x0 + (x0 >> 1);
     const uint32_t x2 = (interval == kWideDynamicFunctionBits)
diff --git a/tensorflow/lite/experimental/microfrontend/lib/window_io.c b/tensorflow/lite/experimental/microfrontend/lib/window_io.c
index ed4ac5eb11..d12cac2c85 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/window_io.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/window_io.c
@@ -16,7 +16,8 @@ limitations under the License.
 
 void WindowWriteMemmapPreamble(FILE* fp, const struct WindowState* state) {
   fprintf(fp, "static int16_t window_coefficients[] = {\n");
-  for (int i = 0; i < state->size; ++i) {
+  int i;
+  for (i = 0; i < state->size; ++i) {
     fprintf(fp, "%d", state->coefficients[i]);
     if (i < state->size - 1) {
       fprintf(fp, ", ");
diff --git a/tensorflow/lite/experimental/microfrontend/lib/window_test.cc b/tensorflow/lite/experimental/microfrontend/lib/window_test.cc
index 8c6c19188d..60f11440f5 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/window_test.cc
+++ b/tensorflow/lite/experimental/microfrontend/lib/window_test.cc
@@ -48,7 +48,8 @@ TEST_F(WindowTest, CheckCoefficients) {
                               3843, 3541, 3145, 2681, 2177, 1664, 1176,
                               743,  391,  144,  16};
   ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < state.size; ++i) {
+  int i;
+  for (i = 0; i < state.size; ++i) {
     EXPECT_EQ(state.coefficients[i], expected[i]);
   }
 
@@ -64,7 +65,8 @@ TEST_F(WindowTest, CheckResidualInput) {
       &state, kFakeAudioData,
       sizeof(kFakeAudioData) / sizeof(kFakeAudioData[0]), &num_samples_read));
 
-  for (int i = kStepSamples; i < kWindowSamples; ++i) {
+  int i;
+  for (i = kStepSamples; i < kWindowSamples; ++i) {
     EXPECT_EQ(state.input[i - kStepSamples], kFakeAudioData[i]);
   }
 
@@ -84,7 +86,8 @@ TEST_F(WindowTest, CheckOutputValues) {
       0, 1151,   0, -5944, 0, 13311,  0, -21448, 0, 28327, 0, -32256, 0, 32255,
       0, -28328, 0, 21447, 0, -13312, 0, 5943,   0, -1152, 0};
   ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < state.size; ++i) {
+  int i;
+  for (i = 0; i < state.size; ++i) {
     EXPECT_EQ(state.output[i], expected[i]);
   }
 
@@ -122,7 +125,8 @@ TEST_F(WindowTest, CheckConsecutiveWindow) {
       0, -1152, 0, 5943,   0, -13312, 0, 21447, 0, -28328, 0, 32255, 0, -32256,
       0, 28327, 0, -21448, 0, 13311,  0, -5944, 0, 1151,   0};
   ASSERT_EQ(state.size, sizeof(expected) / sizeof(expected[0]));
-  for (int i = 0; i < state.size; ++i) {
+  int i;
+  for (i = 0; i < state.size; ++i) {
     EXPECT_EQ(state.output[i], expected[i]);
   }
 
diff --git a/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc b/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc
index 51094a976d..9f2ea7eee6 100644
--- a/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc
+++ b/tensorflow/lite/experimental/microfrontend/ops/audio_microfrontend_op.cc
@@ -250,7 +250,8 @@ class AudioMicrofrontendOp : public OpKernel {
 
       if (output.values != nullptr) {
         frame_buffer[frame_index].reserve(output.size);
-        for (int i = 0; i < output.size; ++i) {
+        int i;
+        for (i = 0; i < output.size; ++i) {
           frame_buffer[frame_index].push_back(static_cast<T>(output.values[i]) /
                                               out_scale_);
         }
@@ -261,9 +262,10 @@ class AudioMicrofrontendOp : public OpKernel {
 
     int index = 0;
     std::vector<T> pad(config_.filterbank.num_channels, 0);
-    for (int anchor = 0; anchor < frame_buffer.size();
-         anchor += frame_stride_) {
-      for (int frame = anchor - left_context_; frame <= anchor + right_context_;
+    int anchor;
+    for (anchor = 0; anchor < frame_buffer.size(); anchor += frame_stride_) {
+      int frame;
+      for (frame = anchor - left_context_; frame <= anchor + right_context_;
            ++frame) {
         std::vector<T>* feature;
         if (zero_padding_ && (frame < 0 || frame >= frame_buffer.size())) {
-- 
GitLab


From 00426b0db4f2a9b0cc47d76724b024d5e9183f64 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Tue, 11 Dec 2018 17:05:55 -0800
Subject: [PATCH 400/873] Minor cleanup for the import of the tests.

PiperOrigin-RevId: 225098257
---
 .../python/keras/layers/unified_lstm_test.py  | 77 ++++++++++---------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 0219e5e426..6662bb8c04 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -34,8 +34,6 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers.cudnn_recurrent import CuDNNLSTM
-from tensorflow.python.keras.layers.recurrent import UnifiedLSTM
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_math_ops
@@ -75,7 +73,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
           num_classes=output_shape)
       y_train = keras.utils.to_categorical(y_train, output_shape)
 
-      layer = UnifiedLSTM(rnn_state_size, return_runtime=True)
+      layer = keras.layers.UnifiedLSTM(rnn_state_size, return_runtime=True)
 
       inputs = array_ops.placeholder(
           dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
@@ -122,7 +120,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
           num_classes=output_shape)
       y_train = keras.utils.to_categorical(y_train, output_shape)
 
-      layer = UnifiedLSTM(rnn_state_size, return_runtime=True)
+      layer = keras.layers.UnifiedLSTM(rnn_state_size, return_runtime=True)
 
       inputs = array_ops.placeholder(
           dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
@@ -172,13 +170,14 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
   def test_could_use_defun_backend(self, activation, recurrent_activation,
                                    recurrent_dropout, unroll, use_bias,
                                    bias_regularizer):
-    layer = UnifiedLSTM(1,
-                        activation=activation,
-                        recurrent_activation=recurrent_activation,
-                        recurrent_dropout=recurrent_dropout,
-                        unroll=unroll,
-                        use_bias=use_bias,
-                        bias_regularizer=bias_regularizer)
+    layer = keras.layers.UnifiedLSTM(
+        1,
+        activation=activation,
+        recurrent_activation=recurrent_activation,
+        recurrent_dropout=recurrent_dropout,
+        unroll=unroll,
+        use_bias=use_bias,
+        bias_regularizer=bias_regularizer)
     self.assertFalse(layer.could_use_cudnn)
 
   def test_unified_lstm_feature_parity_with_canonical_lstm(self):
@@ -272,14 +271,14 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     inputs = keras.layers.Input(
         shape=[timestep, input_shape], dtype=dtypes.float32)
     with test_util.device(use_gpu=False):
-      layer = UnifiedLSTM(rnn_state_size)
+      layer = keras.layers.UnifiedLSTM(rnn_state_size)
       output = layer(inputs)
       cpu_model = keras.models.Model(inputs, output)
       weights = cpu_model.get_weights()
       y_1 = cpu_model.predict(x_train)
 
     with test_util.device(use_gpu=True):
-      layer = UnifiedLSTM(rnn_state_size)
+      layer = keras.layers.UnifiedLSTM(rnn_state_size)
       output = layer(inputs)
       gpu_model = keras.models.Model(inputs, output)
       gpu_model.set_weights(weights)
@@ -359,7 +358,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         num_classes=output_shape)
     y_train = keras.utils.to_categorical(y_train, output_shape)
 
-    layer = UnifiedLSTM(rnn_state_size)
+    layer = keras.layers.UnifiedLSTM(rnn_state_size)
 
     inputs = keras.layers.Input(
         shape=[timestep, input_shape], dtype=dtypes.float32)
@@ -378,7 +377,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     embedding_dim = 4
     units = 2
     testing_utils.layer_test(
-        UnifiedLSTM,
+        keras.layers.UnifiedLSTM,
         kwargs={
             'units': units,
             'return_sequences': True
@@ -396,7 +395,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     inputs = keras.layers.Dense(
         embedding_dim, input_shape=(timesteps, embedding_dim))
     model.add(inputs)
-    layer = UnifiedLSTM(units, return_sequences=True)
+    layer = keras.layers.UnifiedLSTM(units, return_sequences=True)
     model.add(layer)
     outputs = model.layers[-1].output
     self.assertEqual(outputs.get_shape().as_list(), [None, timesteps, units])
@@ -407,7 +406,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     timesteps = 3
     embedding_dim = 4
     units = 2
-    layer = UnifiedLSTM(units, input_shape=(None, embedding_dim))
+    layer = keras.layers.UnifiedLSTM(units, input_shape=(None, embedding_dim))
     model = keras.models.Sequential()
     model.add(layer)
     model.compile(gradient_descent.GradientDescentOptimizer(0.001), 'mse')
@@ -422,7 +421,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     embedding_dim = 4
     units = 2
     testing_utils.layer_test(
-        UnifiedLSTM,
+        keras.layers.UnifiedLSTM,
         kwargs={
             'units': units,
             'dropout': 0.1,
@@ -438,7 +437,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     embedding_dim = 4
     units = 2
     testing_utils.layer_test(
-        UnifiedLSTM,
+        keras.layers.UnifiedLSTM,
         kwargs={
             'units': units,
             'implementation': implementation_mode
@@ -448,7 +447,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_constraints_LSTM(self):
     embedding_dim = 4
-    layer_class = UnifiedLSTM
+    layer_class = keras.layers.UnifiedLSTM
     k_constraint = keras.constraints.max_norm(0.01)
     r_constraint = keras.constraints.max_norm(0.01)
     b_constraint = keras.constraints.max_norm(0.01)
@@ -467,7 +466,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_with_masking_layer_LSTM(self):
-    layer_class = UnifiedLSTM
+    layer_class = keras.layers.UnifiedLSTM
     inputs = np.random.random((2, 3, 4))
     targets = np.abs(np.random.random((2, 3, 5)))
     targets /= targets.sum(axis=-1, keepdims=True)
@@ -485,8 +484,8 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     targets = np.abs(np.random.random((2, 3, 5)))
     targets /= targets.sum(axis=-1, keepdims=True)
     model = keras.models.Sequential()
-    model.add(UnifiedLSTM(10, return_sequences=True, unroll=False))
-    model.add(UnifiedLSTM(5, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedLSTM(10, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedLSTM(5, return_sequences=True, unroll=False))
     model.compile(
         loss='categorical_crossentropy',
         optimizer=gradient_descent.GradientDescentOptimizer(0.01))
@@ -499,8 +498,8 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     targets /= targets.sum(axis=-1, keepdims=True)
     model = keras.models.Sequential()
     model.add(keras.layers.Masking(input_shape=(3, 4)))
-    model.add(UnifiedLSTM(10, return_sequences=True, unroll=False))
-    model.add(UnifiedLSTM(5, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedLSTM(10, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedLSTM(5, return_sequences=True, unroll=False))
     model.compile(
         loss='categorical_crossentropy',
         optimizer=gradient_descent.GradientDescentOptimizer(0.01))
@@ -508,7 +507,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_from_config_LSTM(self):
-    layer_class = UnifiedLSTM
+    layer_class = keras.layers.UnifiedLSTM
     for stateful in (False, True):
       l1 = layer_class(units=1, stateful=stateful)
       l2 = layer_class.from_config(l1.get_config())
@@ -525,7 +524,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     # Test with Keras tensor
     inputs = keras.Input((timesteps, embedding_dim))
     initial_state = [keras.Input((units,)) for _ in range(num_states)]
-    layer = UnifiedLSTM(units)
+    layer = keras.layers.UnifiedLSTM(units)
     if len(initial_state) == 1:
       output = layer(inputs, initial_state=initial_state[0])
     else:
@@ -558,7 +557,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         keras.backend.random_normal_variable((num_samples, units), 0, 1)
         for _ in range(num_states)
     ]
-    layer = UnifiedLSTM(units)
+    layer = keras.layers.UnifiedLSTM(units)
     output = layer(inputs, initial_state=initial_state)
 
     model = keras.models.Model(inputs, output)
@@ -578,7 +577,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     units = 3
     num_samples = 2
 
-    layer = UnifiedLSTM(units, stateful=True)
+    layer = keras.layers.UnifiedLSTM(units, stateful=True)
     layer.build((num_samples, timesteps, embedding_dim))
     layer.reset_states()
     assert len(layer.states) == num_states
@@ -612,7 +611,8 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     inputs = keras.Input((timesteps, embedding_dim))
     _ = keras.layers.Masking()(inputs)
     initial_state = [keras.Input((units,)) for _ in range(num_states)]
-    output = UnifiedLSTM(units)(inputs, initial_state=initial_state)
+    output = keras.layers.UnifiedLSTM(units)(
+        inputs, initial_state=initial_state)
 
     model = keras.models.Model([inputs] + initial_state, output)
     model.compile(
@@ -635,7 +635,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     num_samples = 2
 
     inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    layer = UnifiedLSTM(units, return_state=True, stateful=True)
+    layer = keras.layers.UnifiedLSTM(units, return_state=True, stateful=True)
     outputs = layer(inputs)
     state = outputs[1:]
     assert len(state) == num_states
@@ -653,10 +653,11 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     num_samples = 2
 
     inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    layer = UnifiedLSTM(units, return_state=True, return_sequences=True)
+    layer = keras.layers.UnifiedLSTM(
+        units, return_state=True, return_sequences=True)
     outputs = layer(inputs)
     output, state = outputs[0], outputs[1:]
-    output = UnifiedLSTM(units)(output, initial_state=state)
+    output = keras.layers.UnifiedLSTM(units)(output, initial_state=state)
     model = keras.models.Model(inputs, output)
 
     inputs = np.random.random((num_samples, timesteps, embedding_dim))
@@ -669,7 +670,7 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     units = 3
     num_samples = 2
     num_states = 2
-    layer_class = UnifiedLSTM
+    layer_class = keras.layers.UnifiedLSTM
 
     # Test with Keras tensor
     main_inputs = keras.Input((timesteps, embedding_dim))
@@ -701,7 +702,7 @@ class LSTMLayerGraphOnlyTest(test.TestCase):
     timesteps = 3
     embedding_dim = 4
     units = 2
-    layer_class = UnifiedLSTM
+    layer_class = keras.layers.UnifiedLSTM
     with self.cached_session(config=_config):
       model = keras.models.Sequential()
       model.add(
@@ -761,7 +762,7 @@ class LSTMLayerGraphOnlyTest(test.TestCase):
 
   def test_regularizers_LSTM(self):
     embedding_dim = 4
-    layer_class = UnifiedLSTM
+    layer_class = keras.layers.UnifiedLSTM
     with self.cached_session(config=_config):
       layer = layer_class(
           5,
@@ -799,7 +800,7 @@ class UnifiedLSTMPerformanceTest(test.Benchmark):
     rnn_state_size = test_config['rnn_state_size']
     timestep = test_config['timestep']
 
-    cudnn_lstm_layer = CuDNNLSTM(rnn_state_size)
+    cudnn_lstm_layer = keras.layers.CuDNNLSTM(rnn_state_size)
     inputs = keras.layers.Input(
         shape=[timestep, input_shape], dtype=dtypes.float32)
 
@@ -820,7 +821,7 @@ class UnifiedLSTMPerformanceTest(test.Benchmark):
     rnn_state_size = test_config['rnn_state_size']
     timestep = test_config['timestep']
 
-    layer = UnifiedLSTM(rnn_state_size)
+    layer = keras.layers.UnifiedLSTM(rnn_state_size)
     inputs = keras.layers.Input(
         shape=[timestep, input_shape], dtype=dtypes.float32)
 
-- 
GitLab


From 5269f8acf996052f8fcf2587f4f929d9de67b6e4 Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Tue, 11 Dec 2018 17:06:42 -0800
Subject: [PATCH 401/873] Set infinite GRPC watchdog timeout.

Under heavy network load, the GRPC watchdog timer can be "stuck" behind other pending RPCs and timeout actively running workers.  Disable this for now.

PiperOrigin-RevId: 225098378
---
 .../core/distributed_runtime/rpc/grpc_channel.cc       |  1 +
 .../core/distributed_runtime/rpc/grpc_server_lib.cc    | 10 ++++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
index 1420589f82..e5634d38bd 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
@@ -61,6 +61,7 @@ Status ValidateHostPortPair(const string& host_port) {
   ::grpc::ChannelArguments args;
   args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH, std::numeric_limits<int32>::max());
   args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, std::numeric_limits<int>::max());
+  args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, std::numeric_limits<int>::max());
   // NOTE(mrry): Some versions of gRPC use a 20-second minimum backoff
   // on connection failure, which makes our tests time out.
   args.SetInt("grpc.testing.fixed_reconnect_backoff_ms", 1000);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
index 33ff8e1ac4..08518606f6 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
@@ -110,10 +110,7 @@ GrpcServer::~GrpcServer() {
   // - worker_env_.compute_pool
 }
 
-void GrpcServer::MaybeMutateBuilder(::grpc::ServerBuilder* builder) {
-  builder->AddChannelArgument(GRPC_ARG_KEEPALIVE_TIME_MS,
-                              std::numeric_limits<int>::max());
-}
+void GrpcServer::MaybeMutateBuilder(::grpc::ServerBuilder* builder) {}
 
 Status GrpcServer::Init(
     ServiceInitFunction service_func,
@@ -196,6 +193,11 @@ Status GrpcServer::Init(
   builder.AddListeningPort(strings::StrCat("0.0.0.0:", requested_port),
                            GetServerCredentials(server_def_), &bound_port_);
   builder.SetMaxMessageSize(std::numeric_limits<int32>::max());
+  builder.AddChannelArgument(GRPC_ARG_KEEPALIVE_TIME_MS,
+                             std::numeric_limits<int>::max());
+  builder.AddChannelArgument(GRPC_ARG_KEEPALIVE_TIMEOUT_MS,
+                             std::numeric_limits<int>::max());
+
   builder.SetOption(
       std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption));
   // Allow subclasses to specify more args to pass to the gRPC server.
-- 
GitLab


From dcd966eaba1661315828bf9141512c1bdc0b827b Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Tue, 11 Dec 2018 17:41:45 -0800
Subject: [PATCH 402/873] Runtime flag to disable MKL-DNN contraction kernels

PiperOrigin-RevId: 225102856
---
 tensorflow/core/kernels/BUILD                 |  1 +
 .../core/kernels/eigen_contraction_kernel.cc  | 55 +++++++++++++++++++
 .../core/kernels/eigen_contraction_kernel.h   | 48 +++++++++++++++-
 3 files changed, 101 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/core/kernels/eigen_contraction_kernel.cc

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index e8b1dd270f..d62992233b 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -592,6 +592,7 @@ cc_library(
 #   #endif
 cc_library(
     name = "eigen_contraction_kernel",
+    srcs = ["eigen_contraction_kernel.cc"],
     hdrs = ["eigen_contraction_kernel.h"],
     defines = select({
         ":mkldnn_contraction_kernel": [
diff --git a/tensorflow/core/kernels/eigen_contraction_kernel.cc b/tensorflow/core/kernels/eigen_contraction_kernel.cc
new file mode 100644
index 0000000000..da42001781
--- /dev/null
+++ b/tensorflow/core/kernels/eigen_contraction_kernel.cc
@@ -0,0 +1,55 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/eigen_contraction_kernel.h"
+
+#include <mutex>  // NOLINT(build/c++11)
+
+// We need a pair of compile time and runtime flags to disable compilation of
+// custom contraction kernels for unsupported architectures (e.g. Android,
+// iOS, ARM and PPC CPUs, etc...), and to be able to fallback on default Eigen
+// matrix multiplication at runtime.
+//
+// It's not allowed to use absl flags library in Tensorflow, so we have to pass
+// the configuration through the environment variable.
+//
+// Example:
+//   bazel test --test_env=TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL=false //test
+
+#if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
+
+namespace Eigen {
+namespace internal {
+
+// TODO(ezhulenev): This is a temporary workaround for disabling custom kernels
+// at runtime in tests. We should always rely on compile time flags for that.
+// Example: ... --test_env=TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL=false //test
+bool UseCustomContractionKernels() {
+  static bool use_custom_contraction_kernel = true;
+
+  static std::once_flag initialized;
+  std::call_once(initialized, [&] {
+    char* flag = std::getenv("TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL");
+    if (flag && (strcmp(flag, "false") == 0 || strcmp(flag, "0") == 0)) {
+      use_custom_contraction_kernel = false;
+    }
+  });
+
+  return use_custom_contraction_kernel;
+}
+
+}  // namespace internal
+}  // namespace Eigen
+#endif
diff --git a/tensorflow/core/kernels/eigen_contraction_kernel.h b/tensorflow/core/kernels/eigen_contraction_kernel.h
index 66e93a83af..3d8e52ca0e 100644
--- a/tensorflow/core/kernels/eigen_contraction_kernel.h
+++ b/tensorflow/core/kernels/eigen_contraction_kernel.h
@@ -33,11 +33,20 @@ limitations under the License.
 //   #endif
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
 #include "mkldnn.h"
+#endif
 
 namespace Eigen {
 namespace internal {
 
+#if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
+// Returns `true` iff we can use custom contraction kernels. This is a runtime
+// check, that uses environment variables.
+bool UseCustomContractionKernels();
+#endif  // TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL
+
 // Enabled by build option: "--define tensorflow_mkldnn_contraction_kernel=1"
 #if defined(TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL)
 
@@ -170,6 +179,10 @@ class TensorContractionBlocking<float, float, float, StorageIndex,
                                                      num_threads);
     }
 
+    // If we are using default Eigen gebp kernel there is no need to adjust the
+    // block sizes for MKL-DNN.
+    if (!UseCustomContractionKernels()) return;
+
     // 2. And refine them to work well with mkldnn sgemm.
     mc_ = (std::min)(
         m, Eigen::divup(static_cast<StorageIndex>(mc_ * kScaleM), kUnrollM) *
@@ -211,23 +224,52 @@ struct TensorContractionKernel<float, float, float, StorageIndex, OutputMapper,
                                      typename RhsMapper::SubMapper, ColMajor>;
   using GemmKernel = mkldnn_gemm_kernel<Scalar, StorageIndex, OutputMapper>;
 
+  // Fallback on default Eigen pack and GEBP kernel if custom contraction
+  // kernels disabled at runtime.
+  using EigenLhsPacker =
+      gemm_pack_lhs<Scalar, StorageIndex, typename LhsMapper::SubMapper,
+                    Traits::mr, Traits::LhsProgress,
+                    typename Traits::LhsPacket4Packing, ColMajor>;
+  using EigenRhsPacker =
+      gemm_pack_rhs<Scalar, StorageIndex, typename RhsMapper::SubMapper,
+                    Traits::nr, ColMajor>;
+  using GebpKernel =
+      gebp_kernel<Scalar, Scalar, StorageIndex, OutputMapper, Traits::mr,
+                  Traits::nr,
+                  /*ConjugateLhs*/ false, /*ConjugateRhs*/ false>;
+
   EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void packLhs(
       Scalar* lhsBlock, const typename LhsMapper::SubMapper& data_mapper,
       const StorageIndex depth, const StorageIndex rows) {
-    LhsPacker()(lhsBlock, data_mapper, rows, depth);
+    if (UseCustomContractionKernels()) {
+      LhsPacker()(lhsBlock, data_mapper, rows, depth);
+    } else {
+      EigenLhsPacker()(lhsBlock, data_mapper, depth, rows, /*stride*/ 0,
+                       /*offset*/ 0);
+    }
   }
 
   EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void packRhs(
       Scalar* rhsBlock, const typename RhsMapper::SubMapper& data_mapper,
       const StorageIndex depth, const StorageIndex cols) {
-    RhsPacker()(rhsBlock, data_mapper, depth, cols);
+    if (UseCustomContractionKernels()) {
+      RhsPacker()(rhsBlock, data_mapper, depth, cols);
+    } else {
+      EigenRhsPacker()(rhsBlock, data_mapper, depth, cols);
+    }
   }
 
   EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void invoke(
       const OutputMapper& output_mapper, const Scalar* lhsBlock,
       const Scalar* rhsBlock, const StorageIndex rows, const StorageIndex depth,
       const StorageIndex cols, const Scalar alpha) {
-    GemmKernel()(output_mapper, lhsBlock, rhsBlock, rows, depth, cols, alpha);
+    if (UseCustomContractionKernels()) {
+      GemmKernel()(output_mapper, lhsBlock, rhsBlock, rows, depth, cols, alpha);
+    } else {
+      GebpKernel()(output_mapper, lhsBlock, rhsBlock, rows, depth, cols, alpha,
+                   /*strideA*/ -1, /*strideB*/ -1,
+                   /*offsetA*/ 0, /*offsetB*/ 0);
+    }
   }
 };
 
-- 
GitLab


From 0bdd941c2adca373b91b74925ccd3528a565b8bc Mon Sep 17 00:00:00 2001
From: Zhenyu Tan <tanzheny@google.com>
Date: Tue, 11 Dec 2018 17:42:38 -0800
Subject: [PATCH 403/873] expose v2 api for optimizers and migrate away from
 keras v1 optimizers.

PiperOrigin-RevId: 225102983
---
 .../compiler/tf2xla/kernels/training_ops.cc   |  59 ++++
 .../tf2xla/resource_operation_table.cc        |   1 +
 .../python/keras_optimizer_v2_test.py         | 112 +-------
 .../contrib/tpu/python/tpu/keras_support.py   |   4 +
 .../tpu/python/tpu/keras_tpu_variables.py     |   1 +
 tensorflow/python/keras/engine/training.py    |   6 +-
 .../python/keras/optimizer_v2/adadelta.py     |   4 +-
 .../keras/optimizer_v2/adadelta_test.py       |  15 +-
 .../python/keras/optimizer_v2/adagrad.py      |   4 +-
 .../python/keras/optimizer_v2/adagrad_test.py |  23 +-
 tensorflow/python/keras/optimizer_v2/adam.py  |  14 +-
 .../python/keras/optimizer_v2/adam_test.py    |  12 +-
 .../python/keras/optimizer_v2/adamax.py       |   2 +
 .../python/keras/optimizer_v2/adamax_test.py  |  12 +-
 tensorflow/python/keras/optimizer_v2/ftrl.py  |   2 +
 .../python/keras/optimizer_v2/ftrl_test.py    |   7 +-
 .../keras/optimizer_v2/gradient_descent.py    |  16 +-
 .../optimizer_v2/gradient_descent_test.py     |  34 ++-
 tensorflow/python/keras/optimizer_v2/nadam.py |   3 +
 .../python/keras/optimizer_v2/nadam_test.py   |  12 +
 .../python/keras/optimizer_v2/optimizer_v2.py | 253 ++++++++++++------
 .../keras/optimizer_v2/optimizer_v2_test.py   |  66 ++---
 .../python/keras/optimizer_v2/rmsprop.py      |  14 +-
 .../python/keras/optimizer_v2/rmsprop_test.py |  23 +-
 tensorflow/python/keras/optimizers.py         |   7 -
 ...ensorflow.keras.optimizers.-adadelta.pbtxt |  41 ++-
 ...tensorflow.keras.optimizers.-adagrad.pbtxt |  41 ++-
 .../tensorflow.keras.optimizers.-adam.pbtxt   |  41 ++-
 .../tensorflow.keras.optimizers.-adamax.pbtxt |  42 ++-
 ...nsorflow.keras.optimizers.-optimizer.pbtxt |  39 ++-
 ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt |  41 ++-
 .../tensorflow.keras.optimizers.-s-g-d.pbtxt  |  41 ++-
 ...ensorflow.keras.optimizers.-adadelta.pbtxt |  41 ++-
 ...tensorflow.keras.optimizers.-adagrad.pbtxt |  41 ++-
 .../tensorflow.keras.optimizers.-adam.pbtxt   |  41 ++-
 .../tensorflow.keras.optimizers.-adamax.pbtxt |  42 ++-
 ...nsorflow.keras.optimizers.-optimizer.pbtxt |  39 ++-
 ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt |  41 ++-
 .../tensorflow.keras.optimizers.-s-g-d.pbtxt  |  41 ++-
 39 files changed, 942 insertions(+), 336 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/training_ops.cc b/tensorflow/compiler/tf2xla/kernels/training_ops.cc
index 960c1462ce..26d4214099 100644
--- a/tensorflow/compiler/tf2xla/kernels/training_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/training_ops.cc
@@ -172,6 +172,65 @@ class ResourceApplyMomentum : public XlaOpKernel {
 REGISTER_XLA_OP(Name("ResourceApplyMomentum").TypeConstraint("T", kFloatTypes),
                 ResourceApplyMomentum);
 
+class ResourceApplyKerasMomentum : public XlaOpKernel {
+ public:
+  explicit ResourceApplyKerasMomentum(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr("use_nesterov", &use_nesterov_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    DataType type = ctx->input_type(2);
+
+    TensorShape var_shape, accum_shape;
+    xla::XlaOp var, accum;
+    OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(0, type, &var_shape, &var));
+    OP_REQUIRES_OK(ctx, ctx->ReadVariableInput(1, type, &accum_shape, &accum));
+
+    OP_REQUIRES(ctx, var_shape.IsSameSize(accum_shape),
+                errors::InvalidArgument(
+                    "var and accum do not have the same shape",
+                    var_shape.DebugString(), " ", accum_shape.DebugString()));
+
+    TensorShape lr_shape = ctx->InputShape(2);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(lr_shape),
+                errors::InvalidArgument("lr is not a scalar: ",
+                                        lr_shape.DebugString()));
+
+    TensorShape grad_shape = ctx->InputShape(3);
+    OP_REQUIRES(ctx, var_shape.IsSameSize(grad_shape),
+                errors::InvalidArgument(
+                    "var and grad do not have the same shape",
+                    var_shape.DebugString(), " ", grad_shape.DebugString()));
+
+    TensorShape momentum_shape = ctx->InputShape(4);
+    OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(momentum_shape),
+                errors::InvalidArgument("momentum is not a scalar: ",
+                                        momentum_shape.DebugString()));
+
+    xla::XlaOp lr = ctx->Input(2);
+    xla::XlaOp grad = ctx->Input(3);
+    xla::XlaOp momentum = ctx->Input(4);
+
+    accum = accum * momentum - grad * lr;
+    if (use_nesterov_) {
+      // See https://github.com/tensorflow/tensorflow/pull/2798 for an
+      // explanation of the reparameterization used here.
+      var = var + accum * momentum - grad * lr;
+    } else {
+      var = var + accum;
+    }
+    OP_REQUIRES_OK(ctx, ctx->AssignVariable(0, type, var));
+    OP_REQUIRES_OK(ctx, ctx->AssignVariable(1, type, accum));
+  }
+
+ private:
+  bool use_nesterov_;
+};
+REGISTER_XLA_OP(
+    Name("ResourceApplyKerasMomentum").TypeConstraint("T", kFloatTypes),
+    ResourceApplyKerasMomentum);
+
 class ResourceApplyAdagrad : public XlaOpKernel {
  public:
   explicit ResourceApplyAdagrad(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
diff --git a/tensorflow/compiler/tf2xla/resource_operation_table.cc b/tensorflow/compiler/tf2xla/resource_operation_table.cc
index 72b240996f..ff9f1b9ccb 100644
--- a/tensorflow/compiler/tf2xla/resource_operation_table.cc
+++ b/tensorflow/compiler/tf2xla/resource_operation_table.cc
@@ -65,6 +65,7 @@ CreateResourceOpInfoMap() {
   add("ResourceApplyFtrlV2"                  , kReadWrite, kVariable);
   add("ResourceApplyGradientDescent"         , kReadWrite, kVariable);
   add("ResourceApplyMomentum"                , kReadWrite, kVariable);
+  add("ResourceApplyKerasMomentum"           , kReadWrite, kVariable);
   add("ResourceApplyPowerSign"               , kReadWrite, kVariable);
   add("ResourceApplyProximalAdagrad"         , kReadWrite, kVariable);
   add("ResourceApplyProximalGradientDescent" , kReadWrite, kVariable);
diff --git a/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py b/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
index 6dfd85bcc4..8c596549c4 100644
--- a/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
+++ b/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
@@ -18,24 +18,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import shutil
-import tempfile
 from absl.testing import parameterized
 import numpy as np
-import six
 
 from tensorflow.contrib.distribute.python import combinations
-from tensorflow.core.protobuf import config_pb2
 from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.distribute import distribution_strategy_context as ds_context
-from tensorflow.python.estimator import run_config
-from tensorflow.python.estimator import training
-from tensorflow.python.estimator.canned import dnn_linear_combined
-from tensorflow.python.estimator.canned import prediction_keys
-from tensorflow.python.estimator.export import export
-from tensorflow.python.estimator.inputs import numpy_io
-from tensorflow.python.feature_column import feature_column_lib as feature_column
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -44,103 +32,7 @@ from tensorflow.python.keras.optimizer_v2 import gradient_descent
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
-from tensorflow.python.summary.writer import writer_cache
-
-
-class KerasOptimizerV2IntegrationTest(test.TestCase, parameterized.TestCase):
-
-  def setUp(self):
-    self._model_dir = tempfile.mkdtemp()
-
-  def dataset_input_fn(self, x, y, batch_size):
-
-    def input_fn():
-      dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
-      dataset = dataset.repeat(1).batch(batch_size)
-      return dataset
-
-    return input_fn
-
-  @combinations.generate(
-      combinations.combine(
-          mode=['graph'],
-          distribution=[
-              combinations.one_device_strategy,
-              combinations.mirrored_strategy_with_gpu_and_cpu,
-              combinations.mirrored_strategy_with_two_gpus,
-              combinations.core_mirrored_strategy_with_gpu_and_cpu,
-              combinations.core_mirrored_strategy_with_two_gpus
-          ],
-          use_train_and_evaluate=[True, False]))
-  def test_complete_flow_with_mode(self, distribution, use_train_and_evaluate):
-    label_dimension = 2
-    input_dimension = label_dimension
-    batch_size = 10
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
-    train_input_fn = self.dataset_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size // distribution.num_replicas_in_sync)
-    eval_input_fn = self.dataset_input_fn(
-        x={'x': data},
-        y=data,
-        batch_size=batch_size // distribution.num_replicas_in_sync)
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, batch_size=batch_size, shuffle=False)
-
-    linear_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))
-    ]
-    dnn_feature_columns = [
-        feature_column.numeric_column('x', shape=(input_dimension,))
-    ]
-    feature_columns = linear_feature_columns + dnn_feature_columns
-    session_config = config_pb2.ConfigProto(
-        log_device_placement=True, allow_soft_placement=True)
-    estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
-        linear_feature_columns=linear_feature_columns,
-        dnn_hidden_units=(2, 2),
-        dnn_feature_columns=dnn_feature_columns,
-        label_dimension=label_dimension,
-        model_dir=self._model_dir,
-        dnn_optimizer=adam.Adam(0.001),
-        linear_optimizer=adam.Adam(0.001),
-        config=run_config.RunConfig(
-            train_distribute=distribution,
-            eval_distribute=distribution,
-            session_config=session_config))
-
-    num_steps = 2
-    if use_train_and_evaluate:
-      scores, _ = training.train_and_evaluate(
-          estimator, training.TrainSpec(train_input_fn, max_steps=num_steps),
-          training.EvalSpec(eval_input_fn))
-    else:
-      estimator.train(train_input_fn, steps=num_steps)
-      scores = estimator.evaluate(eval_input_fn)
-
-    self.assertIn('loss', six.iterkeys(scores))
-
-    predictions = np.array([
-        x[prediction_keys.PredictionKeys.PREDICTIONS]
-        for x in estimator.predict(predict_input_fn)
-    ])
-    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
-
-    feature_spec = feature_column.make_parse_example_spec(feature_columns)
-    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
-        feature_spec)
-    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
-                                             serving_input_receiver_fn)
-    self.assertTrue(gfile.Exists(export_dir))
-
-  def tearDown(self):
-    if self._model_dir:
-      writer_cache.FileWriterCache.clear()
-      shutil.rmtree(self._model_dir)
 
 
 def get_model():
@@ -162,7 +54,9 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
       var = variables.Variable(
           2.0, name='var', aggregation=variable_scope.VariableAggregation.SUM)
       # grad for cpu is 1, grad for gpu is 2, avg grad is 1.5.
-      loss = math_ops.cast(_replica_id() + 1, dtype=dtypes.float32) * var
+      def loss():
+        return math_ops.cast(_replica_id() + 1, dtype=dtypes.float32) * var
+
       optimizer = adam.Adam(learning_rate=0.01, beta_1=0.2, beta_2=0.2)
       train_op = optimizer.minimize(loss, var_list=[var])
       m = optimizer.get_slot(var, 'm')
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index 4ce1945903..cf9672f8d8 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -2069,6 +2069,8 @@ class KerasTPUModel(models.Model):
       # tpu_model may not be compiled, e.g., loading weights and then predict.
       return
     for k, v in six.iteritems(cpu_optimizer_config):
+      if k == 'name':
+        continue
       opt_var = getattr(self._tpu_model.optimizer, k)
       if isinstance(opt_var, variables.Variable):
         logging.info('CPU -> TPU %s: %s {%s}', k, v, K.get_value(opt_var))
@@ -2097,6 +2099,8 @@ class KerasTPUModel(models.Model):
     self._cpu_model.set_weights(tpu_weights)
     for k, v in six.iteritems(tpu_optimizer_config):
       logging.info('TPU -> CPU %s: %s', k, v)
+      if k == 'name':
+        continue
       opt_var = getattr(self.cpu_optimizer, k)
       if isinstance(opt_var, variables.Variable):
         K.get_session().run(opt_var.assign(v))
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
index 8b0b240dc7..de425626c8 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
@@ -69,6 +69,7 @@ class ReplicatedVariable(object):
   def __init__(self, name, variables):
     self._name = name
     self._primary_var = variables[0]
+    self._common_name = self._primary_var.name.split(":")[0]
     self._vars = variables
     self._cached_value = None
     self._dtype = variables[0].dtype
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 75d6496988..de929f2d3c 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -40,6 +40,7 @@ from tensorflow.python.keras.engine import training_eager
 from tensorflow.python.keras.engine import training_generator
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.engine.network import Network
+from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.keras.utils import data_utils
 from tensorflow.python.keras.utils.generic_utils import slice_arrays
 from tensorflow.python.keras.utils.losses_utils import squeeze_or_expand_dimensions
@@ -195,8 +196,9 @@ class Model(Network):
     # Validate that arguments passed by the user to `compile` are supported by
     # DistributionStrategy.
     if distribute:
-      if not isinstance(
-          optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
+      if not isinstance(optimizer,
+                        (tf_optimizer_module.Optimizer, optimizers.TFOptimizer,
+                         optimizer_v2.OptimizerV2)):
         raise NotImplementedError(
             'optimizer must be an instance of '
             'tf.train.Optimizer, not a %s' % type(optimizer))
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py
index 55b4eba105..8985325056 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta.py
@@ -22,8 +22,10 @@ import numpy as np
 
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.training import training_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export('keras.optimizers.Adadelta')
 class Adadelta(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the Adadelta algorithm.
 
@@ -85,7 +87,7 @@ class Adadelta(optimizer_v2.OptimizerV2):
     @end_compatibility
     """
     super(Adadelta, self).__init__(name, **kwargs)
-    self._set_hyper('learning_rate', learning_rate)
+    self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
     self._set_hyper('decay', self._initial_decay)
     self._set_hyper('rho', rho)
     self._set_hyper('epsilon', epsilon)
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta_test.py b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
index 0fb67d0cd1..c95af6a8ad 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta_test.py
@@ -153,8 +153,11 @@ class AdadeltaOptimizerTest(test.TestCase):
       with self.cached_session():
         var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
-        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        loss = pred * pred
+
+        def loss():
+          pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
+          return pred * pred
+
         sgd_op = adadelta.Adadelta(1.0, 1.0, 1.0).minimize(
             loss, var_list=[var0])
         variables.global_variables_initializer().run()
@@ -165,6 +168,14 @@ class AdadeltaOptimizerTest(test.TestCase):
         # Validate updated params
         self.assertAllCloseAccordingToType([[-111, -138]], self.evaluate(var0))
 
+  def testConstructAdadeltaWithLR(self):
+    opt = adadelta.Adadelta(lr=1.0, rho=0.9, epsilon=1.)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1., lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = adadelta.Adadelta(learning_rate=0.1, rho=0.9, epsilon=1.)
+    self.assertEqual(opt_3.lr, 0.1)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py
index 670cad70e6..6faf7fc2f4 100644
--- a/tensorflow/python/keras/optimizer_v2/adagrad.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad.py
@@ -27,8 +27,10 @@ from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export('keras.optimizers.Adagrad')
 class Adagrad(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the Adagrad algorithm.
 
@@ -86,7 +88,7 @@ class Adagrad(optimizer_v2.OptimizerV2):
     if epsilon < 1e-7:
       raise ValueError('epsilon must be larger than 1e-7: %s' % epsilon)
     super(Adagrad, self).__init__(name, **kwargs)
-    self._set_hyper('learning_rate', learning_rate)
+    self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
     self._set_hyper('decay', self._initial_decay)
     self._initial_accumulator_value = initial_accumulator_value
     self._set_hyper('epsilon', epsilon)
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad_test.py b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
index b2c290178f..cf6f6a7832 100644
--- a/tensorflow/python/keras/optimizer_v2/adagrad_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad_test.py
@@ -167,8 +167,11 @@ class AdagradOptimizerTest(test.TestCase):
         var0 = resource_variable_ops.ResourceVariable(
             [[1.0, 2.0], [3.0, 4.0]], dtype=dtype)
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
-        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        loss = pred * pred
+
+        def loss():
+          pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
+          return pred * pred
+
         sgd_op = adagrad.Adagrad(1.0).minimize(loss, var_list=[var0])
         variables.global_variables_initializer().run()
         # Fetch params to validate initial values
@@ -297,12 +300,12 @@ class AdagradOptimizerTest(test.TestCase):
       with self.cached_session():
         var_repeated = resource_variable_ops.ResourceVariable(
             [1.0, 2.0], dtype=dtype)
-        loss_repeated = math_ops.reduce_sum(
-            embedding_ops.embedding_lookup(var_repeated, [0, 0]))
+        loss_repeated = lambda: math_ops.reduce_sum(  # pylint: disable=g-long-lambda
+            embedding_ops.embedding_lookup(var_repeated, [0, 0]))  # pylint: disable=cell-var-from-loop
         var_aggregated = resource_variable_ops.ResourceVariable(
             [1.0, 2.0], dtype=dtype)
-        loss_aggregated = 2 * math_ops.reduce_sum(
-            embedding_ops.embedding_lookup(var_aggregated, [0]))
+        loss_aggregated = lambda: 2 * math_ops.reduce_sum(  # pylint: disable=g-long-lambda
+            embedding_ops.embedding_lookup(var_aggregated, [0]))  # pylint: disable=cell-var-from-loop
         update_op_repeated = adagrad.Adagrad(2.0).minimize(
             loss_repeated, var_list=[var_repeated])
         update_op_aggregated = adagrad.Adagrad(2.0).minimize(
@@ -395,6 +398,14 @@ class AdagradOptimizerTest(test.TestCase):
         self.assertAllCloseAccordingToType(var0_np, self.evaluate(var0))
         self.assertAllCloseAccordingToType(var1_np, self.evaluate(var1))
 
+  def testConstructAdagradWithLR(self):
+    opt = adagrad.Adagrad(lr=1.0)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = adagrad.Adagrad(learning_rate=0.1, lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = adagrad.Adagrad(learning_rate=0.1)
+    self.assertEqual(opt_3.lr, 0.1)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py
index ef3d783f89..c99468f8cf 100644
--- a/tensorflow/python/keras/optimizer_v2/adam.py
+++ b/tensorflow/python/keras/optimizer_v2/adam.py
@@ -24,8 +24,10 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.training import training_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export('keras.optimizers.Adam')
 class Adam(optimizer_v2.OptimizerV2):
   """Optimizer that implements the Adam algorithm.
 
@@ -127,12 +129,12 @@ class Adam(optimizer_v2.OptimizerV2):
     """
 
     super(Adam, self).__init__(name, **kwargs)
-    self._set_hyper('learning_rate', learning_rate)
+    self._set_hyper('learning_rate', kwargs.get('lr', learning_rate))
     self._set_hyper('decay', self._initial_decay)
     self._set_hyper('beta_1', beta_1)
     self._set_hyper('beta_2', beta_2)
     self._set_hyper('epsilon', epsilon)
-    self._amsgrad = amsgrad
+    self.amsgrad = amsgrad
 
   def _create_slots(self, var_list):
     # Create slots for the first and second moments.
@@ -141,7 +143,7 @@ class Adam(optimizer_v2.OptimizerV2):
       self.add_slot(var, 'm')
     for var in var_list:
       self.add_slot(var, 'v')
-    if self._amsgrad:
+    if self.amsgrad:
       for var in var_list:
         self.add_slot(var, 'vhat')
 
@@ -166,7 +168,7 @@ class Adam(optimizer_v2.OptimizerV2):
     local_step = math_ops.cast(self.iterations + 1, var_dtype)
     beta_1_power = math_ops.pow(beta_1_t, local_step)
     beta_2_power = math_ops.pow(beta_2_t, local_step)
-    if not self._amsgrad:
+    if not self.amsgrad:
       return training_ops.resource_apply_adam(
           var.handle,
           m.handle,
@@ -220,7 +222,7 @@ class Adam(optimizer_v2.OptimizerV2):
     with ops.control_dependencies([v_t]):
       v_t = self._resource_scatter_add(v, indices, v_scaled_g_values)
 
-    if not self._amsgrad:
+    if not self.amsgrad:
       v_sqrt = math_ops.sqrt(v_t)
       var_update = state_ops.assign_sub(
           var, lr * m_t / (v_sqrt + epsilon_t), use_locking=self._use_locking)
@@ -251,6 +253,6 @@ class Adam(optimizer_v2.OptimizerV2):
         'beta_1': self._serialize_hyperparameter('beta_1'),
         'beta_2': self._serialize_hyperparameter('beta_2'),
         'epsilon': self._serialize_hyperparameter('epsilon'),
-        'amsgrad': self._amsgrad,
+        'amsgrad': self.amsgrad,
     })
     return config
diff --git a/tensorflow/python/keras/optimizer_v2/adam_test.py b/tensorflow/python/keras/optimizer_v2/adam_test.py
index 3bbafe12f8..49a9de41cd 100644
--- a/tensorflow/python/keras/optimizer_v2/adam_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adam_test.py
@@ -162,9 +162,9 @@ class AdamOptimizerTest(test.TestCase):
         # it (i.e. they have GPU kernels).
         var = variables.Variable([[1.0], [2.0]])
         indices = constant_op.constant([0, 1], dtype=index_dtype)
-        gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices))
+        g_sum = lambda: math_ops.reduce_sum(array_ops.gather(var, indices))  # pylint: disable=cell-var-from-loop
         optimizer = adam.Adam(3.0)
-        minimize_op = optimizer.minimize(gathered_sum, var_list=[var])
+        minimize_op = optimizer.minimize(g_sum, var_list=[var])
         variables.global_variables_initializer().run()
         minimize_op.run()
 
@@ -503,6 +503,14 @@ class AdamOptimizerTest(test.TestCase):
     self.assertEqual(
         self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration))
 
+  def testConstructAdamWithLR(self):
+    opt = adam.Adam(lr=1.0)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = adam.Adam(learning_rate=0.1, lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = adam.Adam(learning_rate=0.1)
+    self.assertEqual(opt_3.lr, 0.1)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/adamax.py b/tensorflow/python/keras/optimizer_v2/adamax.py
index ddd78584f8..920a6c0fd3 100644
--- a/tensorflow/python/keras/optimizer_v2/adamax.py
+++ b/tensorflow/python/keras/optimizer_v2/adamax.py
@@ -25,8 +25,10 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.training import training_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export('keras.optimizers.Adamax')
 class Adamax(adam.Adam):
   """Optimizer that implements the Adamax algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/adamax_test.py b/tensorflow/python/keras/optimizer_v2/adamax_test.py
index baf131fbb0..339c0fe6e6 100644
--- a/tensorflow/python/keras/optimizer_v2/adamax_test.py
+++ b/tensorflow/python/keras/optimizer_v2/adamax_test.py
@@ -136,9 +136,9 @@ class AdamaxOptimizerTest(test.TestCase):
         # it (i.e. they have GPU kernels).
         var = variables.Variable([[1.0], [2.0]])
         indices = constant_op.constant([0, 1], dtype=index_dtype)
-        gathered_sum = math_ops.reduce_sum(array_ops.gather(var, indices))
+        g_sum = lambda: math_ops.reduce_sum(array_ops.gather(var, indices))  # pylint: disable=cell-var-from-loop
         optimizer = adamax.Adamax(3.0)
-        minimize_op = optimizer.minimize(gathered_sum, var_list=[var])
+        minimize_op = optimizer.minimize(g_sum, var_list=[var])
         variables.global_variables_initializer().run()
         minimize_op.run()
 
@@ -362,6 +362,14 @@ class AdamaxOptimizerTest(test.TestCase):
       # There should be iteration, and two unique slot variables for v1 and v2.
       self.assertEqual(5, len(set(opt.variables())))
 
+  def testConstructAdamaxWithLR(self):
+    opt = adamax.Adamax(lr=1.0)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = adamax.Adamax(learning_rate=0.1, lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = adamax.Adamax(learning_rate=0.1)
+    self.assertEqual(opt_3.lr, 0.1)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/ftrl.py b/tensorflow/python/keras/optimizer_v2/ftrl.py
index e278e352f5..365bd68220 100644
--- a/tensorflow/python/keras/optimizer_v2/ftrl.py
+++ b/tensorflow/python/keras/optimizer_v2/ftrl.py
@@ -21,8 +21,10 @@ from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import training_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export('keras.optimizers.Ftrl')
 class Ftrl(optimizer_v2.OptimizerV2):
   """Optimizer that implements the FTRL algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/ftrl_test.py b/tensorflow/python/keras/optimizer_v2/ftrl_test.py
index bec400e8cb..f0f07e9d03 100644
--- a/tensorflow/python/keras/optimizer_v2/ftrl_test.py
+++ b/tensorflow/python/keras/optimizer_v2/ftrl_test.py
@@ -113,8 +113,11 @@ class FtrlOptimizerTest(test.TestCase):
       with self.cached_session():
         var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
-        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        loss = pred * pred
+
+        def loss():
+          pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
+          return pred * pred
+
         sgd_op = ftrl.Ftrl(1.0).minimize(loss, var_list=[var0])
         variables.global_variables_initializer().run()
         # Fetch params to validate initial values
diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent.py b/tensorflow/python/keras/optimizer_v2/gradient_descent.py
index 2b82b5e78d..a77ae30551 100644
--- a/tensorflow/python/keras/optimizer_v2/gradient_descent.py
+++ b/tensorflow/python/keras/optimizer_v2/gradient_descent.py
@@ -1,4 +1,4 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,8 +21,10 @@ from tensorflow.python.framework import ops
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.training import training_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export("keras.optimizers.SGD")
 class SGD(optimizer_v2.OptimizerV2):
   """Stochastic gradient descent and momentum optimizer.
 
@@ -32,7 +34,7 @@ class SGD(optimizer_v2.OptimizerV2):
   gradient is evaluated at theta(t).
   ```
 
-  or Computes (if `use_nesterov = False`):
+  or Computes (if `nesterov = False`):
   ```
   v(t+1) = momentum * v(t) - learning_rate * gradient
   theta(t+1) = theta(t) + v(t+1)
@@ -75,7 +77,7 @@ class SGD(optimizer_v2.OptimizerV2):
       **kwargs: keyword arguments. Allowed to be {`decay`}
     """
     super(SGD, self).__init__(name, **kwargs)
-    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
     self._set_hyper("decay", self._initial_decay)
 
     self._momentum = False
@@ -85,7 +87,7 @@ class SGD(optimizer_v2.OptimizerV2):
       raise ValueError("`momentum` must be between [0, 1].")
     self._set_hyper("momentum", momentum)
 
-    self._nesterov = nesterov
+    self.nesterov = nesterov
 
   def _create_slots(self, var_list):
     if self._momentum:
@@ -104,7 +106,7 @@ class SGD(optimizer_v2.OptimizerV2):
           grad,
           self._get_hyper("momentum", var_dtype),
           use_locking=self._use_locking,
-          use_nesterov=self._nesterov)
+          use_nesterov=self.nesterov)
     else:
       return training_ops.resource_apply_gradient_descent(
           var.handle, lr_t, grad, use_locking=self._use_locking)
@@ -132,7 +134,7 @@ class SGD(optimizer_v2.OptimizerV2):
         indices,
         self._get_hyper("momentum", var_dtype),
         use_locking=self._use_locking,
-        use_nesterov=self._nesterov)
+        use_nesterov=self.nesterov)
 
   def get_config(self):
     config = super(SGD, self).get_config()
@@ -140,6 +142,6 @@ class SGD(optimizer_v2.OptimizerV2):
         "learning_rate": self._serialize_hyperparameter("learning_rate"),
         "decay": self._serialize_hyperparameter("decay"),
         "momentum": self._serialize_hyperparameter("momentum"),
-        "nesterov": self._nesterov,
+        "nesterov": self.nesterov,
     })
     return config
diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py
index 0c64202da8..9a4178db46 100644
--- a/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py
+++ b/tensorflow/python/keras/optimizer_v2/gradient_descent_test.py
@@ -122,8 +122,6 @@ class GradientDescentOptimizerTest(test.TestCase):
         var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
         loss = lambda: math_ops.matmul(var0, x) + var1  # pylint: disable=cell-var-from-loop
-        if not context.executing_eagerly():
-          loss = loss()
         sgd = gradient_descent.SGD(1.0)
         sgd_op = sgd.minimize(loss, [var0, var1])
         self.evaluate(variables.global_variables_initializer())
@@ -141,9 +139,12 @@ class GradientDescentOptimizerTest(test.TestCase):
         var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0], dtype=dtype)
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
-        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        pred += var1
-        loss = pred * pred
+
+        def loss():
+          pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
+          pred += var1  # pylint: disable=cell-var-from-loop
+          return pred * pred
+
         sgd_op = gradient_descent.SGD(1.0).minimize(loss, [var0, var1])
         self.evaluate(variables.global_variables_initializer())
         # Run 1 step of sgd
@@ -181,7 +182,8 @@ class GradientDescentOptimizerTest(test.TestCase):
         opt = gradient_descent.SGD(3.0)
         values = [1.0, 3.0]
         vars_ = [variables.Variable([v], dtype=dtype) for v in values]
-        grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_)
+        loss = lambda: vars_[0] + vars_[1]  # pylint: disable=cell-var-from-loop
+        grads_and_vars = opt._compute_gradients(loss, vars_)
         self.evaluate(variables.global_variables_initializer())
         for grad, _ in grads_and_vars:
           self.assertAllCloseAccordingToType([1.0], self.evaluate(grad))
@@ -259,6 +261,14 @@ class GradientDescentOptimizerTest(test.TestCase):
       # be an EagerTensor once again, not a graph Tensor.
       self.assertEqual(float(step()), -1.0)
 
+  def testConstructSGDWithLR(self):
+    opt = gradient_descent.SGD(lr=1.0)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = gradient_descent.SGD(learning_rate=0.1, lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = gradient_descent.SGD(learning_rate=0.1)
+    self.assertEqual(opt_3.lr, 0.1)
+
 
 class MomentumOptimizerTest(test.TestCase):
 
@@ -346,7 +356,7 @@ class MomentumOptimizerTest(test.TestCase):
         var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype)
         accum0_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
         accum1_np = np.array([0.0, 0.0], dtype=dtype.as_numpy_dtype)
-        loss = 5 * var0 * var0 + 3 * var1
+        loss = lambda: 5 * var0 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
         mom_op = gradient_descent.SGD(
             learning_rate=2.0, momentum=0.9, nesterov=True)
         opt_op = mom_op.minimize(loss, [var0, var1])
@@ -677,12 +687,20 @@ class MomentumOptimizerTest(test.TestCase):
           opt3._get_hyper("momentum"))
       # self.assertEqual(
       #     self.evaluate(opt._get_hyper("decay")), opt3._get_hyper("decay"))
-      self.assertTrue(opt3._nesterov)
+      self.assertTrue(opt3.nesterov)
 
   def testNesterovWithoutMomentum(self):
     with self.assertRaisesRegexp(ValueError, "must be between"):
       gradient_descent.SGD(learning_rate=1.0, momentum=2.0)
 
+  def testConstructMomentumWithLR(self):
+    opt = gradient_descent.SGD(lr=1.0, momentum=0.9)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9, lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = gradient_descent.SGD(learning_rate=0.1, momentum=0.9)
+    self.assertEqual(opt_3.lr, 0.1)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/nadam.py b/tensorflow/python/keras/optimizer_v2/nadam.py
index 00b095e0dc..afa74c8de3 100644
--- a/tensorflow/python/keras/optimizer_v2/nadam.py
+++ b/tensorflow/python/keras/optimizer_v2/nadam.py
@@ -74,6 +74,9 @@ class Nadam(adam.Adam):
       **kwargs: keyword arguments. Allowed to be {`decay`}
     """
 
+    # Backwards compatiblity with keras NAdam optimizer.
+    if 'schedule_decay' in kwargs:
+      kwargs['decay'] = kwargs.pop('schedule_decay')
     # pylint: disable=useless-super-delegation
     super(Nadam, self).__init__(
         learning_rate=learning_rate,
diff --git a/tensorflow/python/keras/optimizer_v2/nadam_test.py b/tensorflow/python/keras/optimizer_v2/nadam_test.py
index d991e3117c..73568e81f0 100644
--- a/tensorflow/python/keras/optimizer_v2/nadam_test.py
+++ b/tensorflow/python/keras/optimizer_v2/nadam_test.py
@@ -208,6 +208,18 @@ class NadamOptimizerTest(test.TestCase):
           self.assertAllCloseAccordingToType(var0_np, var0.eval())
           self.assertAllCloseAccordingToType(var1_np, var1.eval())
 
+  def testConstructNAdamWithLR(self):
+    opt = nadam.Nadam(lr=1.0)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = nadam.Nadam(learning_rate=0.1, lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = nadam.Nadam(learning_rate=0.1)
+    self.assertEqual(opt_3.lr, 0.1)
+
+  def testConstructNAdamWithScheduleDecay(self):
+    opt = nadam.Nadam(schedule_decay=0.2)
+    self.assertEqual(opt.decay, 0.2)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index 15f3009a4a..a130e1d0c3 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -28,22 +28,45 @@ from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.eager import backprop
-from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend
 from tensorflow.python.keras import initializers
 from tensorflow.python.keras.engine import base_layer_utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.training import optimizer as optimizer_v1
+from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import tf_export
+
+
+def _deduplicate_indexed_slices(values, indices):
+  """Sums `values` associated with any non-unique `indices`.
+
+  Args:
+    values: A `Tensor` with rank >= 1.
+    indices: A one-dimensional integer `Tensor`, indexing into the first
+      dimension of `values` (as in an IndexedSlices object).
+
+  Returns:
+    A tuple of (`summed_values`, `unique_indices`) where `unique_indices` is a
+    de-duplicated version of `indices` and `summed_values` contains the sum of
+    `values` slices associated with each unique index.
+  """
+  unique_indices, new_index_positions = array_ops.unique(indices)
+  summed_values = math_ops.unsorted_segment_sum(
+      values, new_index_positions,
+      array_ops.shape(unique_indices)[0])
+  return (summed_values, unique_indices)
 
 
 @six.add_metaclass(abc.ABCMeta)
-class OptimizerV2(optimizer_v1.Optimizer):
+@tf_export("keras.optimizers.Optimizer")
+class OptimizerV2(checkpointable.CheckpointableBase):
   """Updated base class for optimizers.
 
   This class defines the API to add Ops to train a model.  You never use this
@@ -138,7 +161,7 @@ class OptimizerV2(optimizer_v1.Optimizer):
           _create_vars.
     """
     self._use_locking = True
-    super(OptimizerV2, self).__init__(self._use_locking, name)
+    self._name = name
     self._hyper = {}
     # dict: {variable name : {slot name : variable}}
     self._slots = {}
@@ -148,16 +171,11 @@ class OptimizerV2(optimizer_v1.Optimizer):
     if decay < 0.:
       raise ValueError("decay cannot be less than 0: {}".format(decay))
     self._initial_decay = decay
+    self.__dict__.update(kwargs)
 
     self._prepared = False
 
-  def minimize(self,
-               loss,
-               var_list,
-               aggregation_method=None,
-               colocate_gradients_with_ops=False,
-               name=None,
-               grad_loss=None):
+  def minimize(self, loss, var_list, grad_loss=None, name=None):
     """Add operations to minimize `loss` by updating `var_list`.
 
     This method simply combines calls `compute_gradients()` and
@@ -166,15 +184,11 @@ class OptimizerV2(optimizer_v1.Optimizer):
     of using this function.
 
     Args:
-      loss: A `Tensor` containing the value to minimize.
+      loss: A callable taking no arguments which returns the value to minimize.
       var_list: list or tuple of `Variable` objects to update to minimize
         `loss`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with the
-        corresponding op.
-      name: Optional name for the returned operation.
       grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
+      name: Optional name for the returned operation.
 
     Returns:
       An Operation that updates the variables in `var_list`.  If `global_step`
@@ -186,29 +200,16 @@ class OptimizerV2(optimizer_v1.Optimizer):
     @compatibility(eager)
     When eager execution is enabled, `loss` should be a Python function that
     takes no arguments and computes the value to be minimized. Minimization (and
-    gradient computation) is done with respect to the elements of `var_list` if
-    not None, else with respect to any trainable variables created during the
-    execution of the `loss` function. `gate_gradients`, `aggregation_method`,
-    `colocate_gradients_with_ops` and `grad_loss` are ignored when eager
-    execution is enabled.
+    gradient computation) is done with respect to the elements of `var_list`.
+    `grad_loss` is ignored when eager execution is enabled.
     @end_compatibility
     """
-    grads_and_vars = self.compute_gradients(
-        loss,
-        var_list=var_list,
-        aggregation_method=aggregation_method,
-        colocate_gradients_with_ops=colocate_gradients_with_ops,
-        grad_loss=grad_loss)
+    grads_and_vars = self._compute_gradients(
+        loss, var_list=var_list, grad_loss=grad_loss)
 
     return self.apply_gradients(grads_and_vars, name=name)
 
-  def compute_gradients(self,
-                        loss,
-                        var_list,
-                        aggregation_method=None,
-                        colocate_gradients_with_ops=False,
-                        grad_loss=None,
-                        stop_gradients=None):
+  def _compute_gradients(self, loss, var_list, grad_loss=None):
     """Compute gradients of `loss` for the variables in `var_list`.
 
     This is the first part of `minimize()`.  It returns a list
@@ -218,19 +219,11 @@ class OptimizerV2(optimizer_v1.Optimizer):
     given variable.
 
     Args:
-      loss: A Tensor containing the value to minimize or a callable taking no
-        arguments which returns the value to minimize. When eager execution is
-        enabled it must be a callable.
-      var_list: Optional list or tuple of `tf.Variable` to update to minimize
+      loss: A callable taking no arguments which returns the value to minimize.
+      var_list: List or tuple of `tf.Variable` to update to minimize
         `loss`.  Defaults to the list of variables collected in the graph under
         the key `GraphKeys.TRAINABLE_VARIABLES`.
-      aggregation_method: Specifies the method used to combine gradient terms.
-        Valid values are defined in the class `AggregationMethod`.
-      colocate_gradients_with_ops: If True, try colocating gradients with the
-        corresponding op.
       grad_loss: Optional. A `Tensor` holding the gradient computed for `loss`.
-      stop_gradients: Optional. A Tensor or list of tensors not to differentiate
-        through.
 
     Returns:
       A list of (gradient, variable) pairs. Variable is always present, but
@@ -239,38 +232,22 @@ class OptimizerV2(optimizer_v1.Optimizer):
     Raises:
       TypeError: If `var_list` contains anything else than `Variable` objects.
       ValueError: If some arguments are invalid, or var_list is None.
-      RuntimeError: If called with eager execution enabled and `loss` is
-        not callable.
-
-    @compatibility(eager)
-    When eager execution is enabled, `aggregation_method`, and
-    `colocate_gradients_with_ops` are ignored.
-    @end_compatibility
     """
     var_list = nest.flatten(var_list)
     # TODO(josh11b): Test that we handle weight decay in a reasonable way.
-    if callable(loss):
-      with backprop.GradientTape() as tape:
-        tape.watch(var_list)
-        loss_value = loss()
-        loss_value = self._scale_loss(loss_value)
-      grads = tape.gradient(loss_value, var_list, grad_loss)
-    else:
-      if context.executing_eagerly():
-        raise RuntimeError("`loss` passed to Optimizer.compute_gradients "
-                           "should be a function when eager execution is "
-                           "enabled.")
-      loss = self._scale_loss(loss)
-      self._assert_valid_dtypes([loss])
-      if grad_loss is not None:
-        self._assert_valid_dtypes([grad_loss])
-      grads = gradients.gradients(
-          loss,
-          var_list,
-          grad_ys=grad_loss,
-          aggregation_method=aggregation_method,
-          colocate_gradients_with_ops=colocate_gradients_with_ops,
-          stop_gradients=stop_gradients)
+    with backprop.GradientTape() as tape:
+      tape.watch(var_list)
+      loss_value = loss()
+      loss_value = self._scale_loss(loss_value)
+    grads = tape.gradient(loss_value, var_list, grad_loss)
+
+    if hasattr(self, "clipnorm"):
+      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
+    if hasattr(self, "clipvalue"):
+      grads = [
+          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
+          for g in grads
+      ]
 
     grads_and_vars = list(zip(grads, var_list))
     self._assert_valid_dtypes([
@@ -289,6 +266,37 @@ class OptimizerV2(optimizer_v1.Optimizer):
         loss_value *= (1. / num_replicas)
     return loss_value
 
+  def get_gradients(self, loss, params):
+    """Returns gradients of `loss` with respect to `params`.
+
+    Arguments:
+      loss: Loss tensor.
+      params: List of variables.
+
+    Returns:
+      List of gradient tensors.
+
+    Raises:
+      ValueError: In case any gradient cannot be computed (e.g. if gradient
+        function not implemented).
+    """
+    loss = self._scale_loss(loss)
+    grads = gradients.gradients(loss, params)
+    if None in grads:
+      raise ValueError("An operation has `None` for gradient. "
+                       "Please make sure that all of your ops have a "
+                       "gradient defined (i.e. are differentiable). "
+                       "Common ops without gradient: "
+                       "K.argmax, K.round, K.eval.")
+    if hasattr(self, "clipnorm"):
+      grads = [clip_ops.clip_by_norm(g, self.clipnorm) for g in grads]
+    if hasattr(self, "clipvalue"):
+      grads = [
+          clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
+          for g in grads
+      ]
+    return grads
+
   def apply_gradients(self, grads_and_vars, name=None):
     """Apply gradients to variables.
 
@@ -351,7 +359,13 @@ class OptimizerV2(optimizer_v1.Optimizer):
       return apply_updates
 
   def get_updates(self, loss, params):
-    return [self.minimize(loss, params)]
+    grads = self.get_gradients(loss, params)
+    grads_and_vars = list(zip(grads, params))
+    self._assert_valid_dtypes([
+        v for g, v in grads_and_vars
+        if g is not None and v.dtype != dtypes.resource
+    ])
+    return [self.apply_gradients(grads_and_vars)]
 
   def _set_hyper(self, name, value):
     """set hyper `name` to value. value can be callable, tensor, numeric."""
@@ -575,6 +589,95 @@ class OptimizerV2(optimizer_v1.Optimizer):
 
     return variable
 
+  def _assert_valid_dtypes(self, tensors):
+    """Asserts tensors are all valid types (see `_valid_dtypes`).
+
+    Args:
+      tensors: Tensors to check.
+
+    Raises:
+      ValueError: If any tensor is not a valid type.
+    """
+    valid_dtypes = self._valid_dtypes()
+    for t in tensors:
+      dtype = t.dtype.base_dtype
+      if dtype not in valid_dtypes:
+        raise ValueError("Invalid type %r for %s, expected: %s." %
+                         (dtype, t.name, [v for v in valid_dtypes]))
+
+  def _valid_dtypes(self):
+    """Valid types for loss, variables and gradients.
+
+    Subclasses should override to allow other float types.
+
+    Returns:
+      Valid types for loss, variables and gradients.
+    """
+    return set(
+        [dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64])
+
+  def _call_if_callable(self, param):
+    """Call the function if param is callable."""
+    return param() if callable(param) else param
+
+  def _resource_apply_dense(self, grad, handle):
+    """Add ops to apply dense gradients to the variable `handle`.
+
+    Args:
+      grad: a `Tensor` representing the gradient.
+      handle: a `Tensor` of dtype `resource` which points to the variable to be
+        updated.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    raise NotImplementedError()
+
+  def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices):
+    """Add ops to apply sparse gradients to `handle`, with repeated indices.
+
+    Optimizers which override this method must deal with repeated indices. See
+    the docstring of `_apply_sparse_duplicate_indices` for details. By default
+    the correct behavior, to sum non-unique indices and their associated
+    gradients, is enforced by first pre-processing `grad` and `indices` and
+    passing them on to `_resource_apply_sparse`. Optimizers which deal correctly
+    with duplicate indices may instead override this method to avoid the
+    overhead of summing.
+
+    Args:
+      grad: a `Tensor` representing the gradient for the affected indices.
+      handle: a `Tensor` of dtype `resource` which points to the variable to be
+        updated.
+      indices: a `Tensor` of integral type representing the indices for which
+        the gradient is nonzero. Indices may be repeated.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    summed_grad, unique_indices = _deduplicate_indexed_slices(
+        values=grad, indices=indices)
+    return self._resource_apply_sparse(summed_grad, handle, unique_indices)
+
+  def _resource_apply_sparse(self, grad, handle, indices):
+    """Add ops to apply sparse gradients to the variable `handle`.
+
+    Similar to `_apply_sparse`, the `indices` argument to this method has been
+    de-duplicated. Optimizers which deal correctly with non-unique indices may
+    instead override `_resource_apply_sparse_duplicate_indices` to avoid this
+    overhead.
+
+    Args:
+      grad: a `Tensor` representing the gradient for the affected indices.
+      handle: a `Tensor` of dtype `resource` which points to the variable to be
+        updated.
+      indices: a `Tensor` of integral type representing the indices for which
+        the gradient is nonzero. Indices are unique.
+
+    Returns:
+      An `Operation` which updates the value of the variable.
+    """
+    raise NotImplementedError()
+
 
 def _filter_grads(grads_and_vars):
   """Filter out iterable with grad equal to None."""
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
index 158577fe64..8b2865e2aa 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
@@ -46,7 +46,6 @@ from tensorflow.python.keras.optimizer_v2 import gradient_descent
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
-from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
@@ -64,8 +63,6 @@ class OptimizerTest(test.TestCase):
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
-        if not context.executing_eagerly():
-          loss = loss()
         sgd = gradient_descent.SGD(3.0)
 
         self.evaluate(variables.global_variables_initializer())
@@ -116,33 +113,6 @@ class OptimizerTest(test.TestCase):
       # var1 = [0., 1.] - 0.5 * [3, 3]
       self.assertAllClose([-1.5, -0.5], self.evaluate(var1))
 
-  @test_util.run_in_graph_and_eager_modes
-  def testAggregationMethod(self):
-    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
-      with self.cached_session():
-        var0 = variables.Variable([1.0, 2.0], dtype=dtype)
-        var1 = variables.Variable([3.0, 4.0], dtype=dtype)
-        loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
-        if not context.executing_eagerly():
-          loss = loss()
-        sgd = gradient_descent.SGD(3.0)
-
-        self.evaluate(variables.global_variables_initializer())
-        # Fetch params to validate initial values
-        self.assertAllClose([1.0, 2.0], self.evaluate(var0))
-        self.assertAllClose([3.0, 4.0], self.evaluate(var1))
-        # Run 1 step of sgd through optimizer
-        opt_op = sgd.minimize(
-            loss,
-            var_list=[var0, var1],
-            aggregation_method=gradients_impl.AggregationMethod
-            .EXPERIMENTAL_ACCUMULATE_N)
-        self.evaluate(variables.global_variables_initializer())
-        self.evaluate(opt_op)
-        # Validate updated params
-        self.assertAllClose([-14., -13.], self.evaluate(var0))
-        self.assertAllClose([-6., -5.], self.evaluate(var1))
-
   @test_util.run_in_graph_and_eager_modes
   def testPrecomputedGradient(self):
     for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
@@ -150,8 +120,6 @@ class OptimizerTest(test.TestCase):
         var0 = variables.Variable([1.0, 2.0], dtype=dtype)
         var1 = variables.Variable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
-        if not context.executing_eagerly():
-          loss = loss()
         grad_loss = constant_op.constant([42, -42], dtype=dtype)
         sgd = gradient_descent.SGD(3.0)
 
@@ -176,8 +144,6 @@ class OptimizerTest(test.TestCase):
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0  # pylint: disable=cell-var-from-loop
-        if not context.executing_eagerly():
-          loss = loss()
         sgd_op = gradient_descent.SGD(3.0)
         with self.assertRaisesRegexp(ValueError, 'No gradients'):
           # var1 has no gradient
@@ -190,8 +156,6 @@ class OptimizerTest(test.TestCase):
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
         loss = lambda: constant_op.constant(5.0)
-        if not context.executing_eagerly():
-          loss = loss()
 
         sgd_op = gradient_descent.SGD(3.0)
         with self.assertRaisesRegexp(ValueError,
@@ -216,11 +180,9 @@ class OptimizerTest(test.TestCase):
         var0 = resource_variable_ops.ResourceVariable([1.0, 2.0], dtype=dtype)
         var1 = resource_variable_ops.ResourceVariable([3.0, 4.0], dtype=dtype)
         loss = lambda: 5 * var0 + 3 * var1  # pylint: disable=cell-var-from-loop
-        if not context.executing_eagerly():
-          loss = loss()
 
         sgd = gradient_descent.SGD(3.0)
-        grads_and_vars = sgd.compute_gradients(loss, [var0, var1])
+        grads_and_vars = sgd._compute_gradients(loss, [var0, var1])
         # Convert gradients to tf.Variables
         converted_grads = [
             resource_variable_ops.ResourceVariable(
@@ -259,7 +221,7 @@ class OptimizerTest(test.TestCase):
         return x * x
 
       sgd = gradient_descent.SGD(3.0)
-      grads_and_vars = sgd.compute_gradients(f, [x])
+      grads_and_vars = sgd._compute_gradients(f, [x])
       self.assertEqual(1, len(grads_and_vars))
       grad, x_as_var = grads_and_vars[0]
       self.assertIs(x, x_as_var)
@@ -278,8 +240,6 @@ class OptimizerTest(test.TestCase):
       var1 = variables.Variable([3.0, 4.0],
                                 constraint=constraint_0)
       loss = lambda: 5 * var0 + 3 * var1
-      if not context.executing_eagerly():  # pylint: disable=cell-var-from-loop
-        loss = loss()
       sgd = gradient_descent.SGD(3.0)
 
       self.evaluate(variables.global_variables_initializer())
@@ -338,6 +298,28 @@ class OptimizerTest(test.TestCase):
           self.evaluate(opt._get_hyper('learning_rate')),
           opt3._get_hyper('learning_rate'))
 
+  @test_util.run_in_graph_and_eager_modes
+  def testGradClipValue(self):
+    with self.cached_session():
+      var = resource_variable_ops.ResourceVariable([1.0, 2.0])
+      loss = lambda: 3 * var
+      opt = gradient_descent.SGD(learning_rate=1.0, clipvalue=1.0)
+      opt_op = opt.minimize(loss, [var])
+      self.evaluate(variables.global_variables_initializer())
+      self.evaluate(opt_op)
+      self.assertAllClose([0., 1.], self.evaluate(var))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testGradClipNorm(self):
+    with self.cached_session():
+      var = resource_variable_ops.ResourceVariable([1.0])
+      loss = lambda: 3 * var
+      opt = gradient_descent.SGD(learning_rate=1.0, clipnorm=1.0)
+      opt_op = opt.minimize(loss, [var])
+      self.evaluate(variables.global_variables_initializer())
+      self.evaluate(opt_op)
+      self.assertAllClose([0.], self.evaluate(var))
+
   @test_util.run_in_graph_and_eager_modes
   def testWeights(self):
     with self.cached_session():
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py
index 6a5b334fc4..634111b470 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@@ -20,8 +20,10 @@ from __future__ import print_function
 from tensorflow.python.framework import ops
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.training import training_ops
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export("keras.optimizers.RMSprop")
 class RMSprop(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the RMSprop algorithm.
 
@@ -91,7 +93,7 @@ class RMSprop(optimizer_v2.OptimizerV2):
       **kwargs: keyword arguments. Allowed to be {`decay`}
     """
     super(RMSprop, self).__init__(name, **kwargs)
-    self._set_hyper("learning_rate", learning_rate)
+    self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))
     self._set_hyper("decay", self._initial_decay)
     self._set_hyper("rho", rho)
 
@@ -103,13 +105,13 @@ class RMSprop(optimizer_v2.OptimizerV2):
     self._set_hyper("momentum", momentum)
 
     self._set_hyper("epsilon", epsilon)
-    self._centered = centered
+    self.centered = centered
 
   def _create_slots(self, var_list):
     for var in var_list:
       self.add_slot(var, "rms")
       self.add_slot(var, "momentum")
-      if self._centered:
+      if self.centered:
         self.add_slot(var, "mg")
 
   def _resource_apply_dense(self, grad, var):
@@ -120,7 +122,7 @@ class RMSprop(optimizer_v2.OptimizerV2):
     rho = self._get_hyper("rho", var_dtype)
     momentum = self._get_hyper("momentum", var_dtype)
     epsilon = self._get_hyper("epsilon", var_dtype)
-    if self._centered:
+    if self.centered:
       mg = self.get_slot(var, "mg")
       return training_ops.resource_apply_centered_rms_prop(
           var.handle,
@@ -153,7 +155,7 @@ class RMSprop(optimizer_v2.OptimizerV2):
     rho = self._get_hyper("rho", var_dtype)
     momentum = self._get_hyper("momentum", var_dtype)
     epsilon = self._get_hyper("epsilon", var_dtype)
-    if self._centered:
+    if self.centered:
       mg = self.get_slot(var, "mg")
       return training_ops.resource_sparse_apply_centered_rms_prop(
           var.handle,
@@ -188,7 +190,7 @@ class RMSprop(optimizer_v2.OptimizerV2):
         "rho": self._serialize_hyperparameter("rho"),
         "momentum": self._serialize_hyperparameter("momentum"),
         "epsilon": self._serialize_hyperparameter("epsilon"),
-        "centered": self._centered,
+        "centered": self.centered,
     })
     return config
 
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
index a8658a8550..4d61cfbbc5 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop_test.py
@@ -233,8 +233,11 @@ class RMSpropOptimizerTest(test.TestCase):
       with self.cached_session():
         var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
-        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        loss = pred * pred
+
+        def loss():
+          pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
+          return pred * pred
+
         sgd_op = rmsprop.RMSprop(
             learning_rate=1.0,
             rho=0.0,
@@ -258,8 +261,12 @@ class RMSpropOptimizerTest(test.TestCase):
       with self.cached_session():
         var0 = resource_variable_ops.ResourceVariable([[1.0, 2.0]], dtype=dtype)
         x = constant_op.constant([[4.0], [5.0]], dtype=dtype)
-        pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
-        loss = pred * pred
+
+        def loss():
+          pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)  # pylint: disable=cell-var-from-loop
+          return pred * pred
+
+        # loss = lambda: pred * pred  # pylint: disable=cell-var-from-loop
         sgd_op = rmsprop.RMSprop(
             learning_rate=1.0,
             rho=0.0,
@@ -405,6 +412,14 @@ class RMSpropOptimizerTest(test.TestCase):
                 (0.01 * 2.0 / math.sqrt(0.00001 * 0.9 + 1e-5 + 1.0))
             ]), self.evaluate(var1))
 
+  def testConstructRMSpropWithLR(self):
+    opt = rmsprop.RMSprop(lr=1.0)
+    self.assertEqual(opt.lr, 1.0)
+    opt_2 = rmsprop.RMSprop(learning_rate=0.1, lr=1.0)
+    self.assertEqual(opt_2.lr, 1.0)
+    opt_3 = rmsprop.RMSprop(learning_rate=0.1)
+    self.assertEqual(opt_3.lr, 0.1)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index ee6dbba5ad..a558c2532b 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -45,7 +45,6 @@ from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('keras.optimizers.Optimizer')
 class Optimizer(object):
   """Abstract optimizer base class.
 
@@ -159,7 +158,6 @@ class Optimizer(object):
     return cls(**config)
 
 
-@tf_export('keras.optimizers.SGD')
 class SGD(Optimizer):
   """Stochastic gradient descent optimizer.
 
@@ -224,7 +222,6 @@ class SGD(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.optimizers.RMSprop')
 class RMSprop(Optimizer):
   """RMSProp optimizer.
 
@@ -291,7 +288,6 @@ class RMSprop(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.optimizers.Adagrad')
 class Adagrad(Optimizer):
   """Adagrad optimizer.
 
@@ -358,7 +354,6 @@ class Adagrad(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.optimizers.Adadelta')
 class Adadelta(Optimizer):
   """Adadelta optimizer.
 
@@ -442,7 +437,6 @@ class Adadelta(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.optimizers.Adam')
 class Adam(Optimizer):
   """Adam optimizer.
 
@@ -539,7 +533,6 @@ class Adam(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.optimizers.Adamax')
 class Adamax(Optimizer):
   """Adamax optimizer from Adam paper's Section 7.
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
index b9ce154bdd..00cd5aca4c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.Adadelta"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adadelta\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adadelta.Adadelta\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'rho\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'1.0\', \'0.95\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'Adadelta\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
index d0dc9e37a3..6d47fe310d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.Adagrad"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adagrad\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adagrad.Adagrad\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'Adagrad\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
index 06815fa99a..417362d211 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.Adam"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adam\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'beta_1\', \'beta_2\', \'epsilon\', \'decay\', \'amsgrad\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'None\', \'0.0\', \'False\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'Adam\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
index 47b55fdb44..7b43abee23 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
@@ -1,15 +1,37 @@
 path: "tensorflow.keras.optimizers.Adamax"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adamax\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adamax.Adamax\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'beta_1\', \'beta_2\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.002\', \'0.9\', \'0.999\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'Adamax\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +41,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +53,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
index 53d64dae93..a996746dac 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
@@ -1,14 +1,35 @@
 path: "tensorflow.keras.optimizers.Optimizer"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -18,6 +39,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -26,8 +51,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
index a1e9b8cceb..bfc9d67a47 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.RMSprop"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.RMSprop\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'rho\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'RMSprop\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
index a67fefb1ba..3f3d57962b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.SGD"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.SGD\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.gradient_descent.SGD\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'momentum\', \'decay\', \'nesterov\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'0.0\', \'False\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.0\', \'False\', \'SGD\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
index b9ce154bdd..00cd5aca4c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.Adadelta"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adadelta\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adadelta.Adadelta\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'rho\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'1.0\', \'0.95\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'Adadelta\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
index d0dc9e37a3..6d47fe310d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.Adagrad"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adagrad\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adagrad.Adagrad\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'Adagrad\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
index 06815fa99a..417362d211 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.Adam"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adam\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'beta_1\', \'beta_2\', \'epsilon\', \'decay\', \'amsgrad\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'None\', \'0.0\', \'False\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'Adam\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
index 47b55fdb44..7b43abee23 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
@@ -1,15 +1,37 @@
 path: "tensorflow.keras.optimizers.Adamax"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Adamax\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adamax.Adamax\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'beta_1\', \'beta_2\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.002\', \'0.9\', \'0.999\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'Adamax\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +41,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +53,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
index 53d64dae93..a996746dac 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
@@ -1,14 +1,35 @@
 path: "tensorflow.keras.optimizers.Optimizer"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=kwargs, defaults=None"
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -18,6 +39,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -26,8 +51,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
index a1e9b8cceb..bfc9d67a47 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.RMSprop"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.RMSprop\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'rho\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'None\', \'0.0\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'RMSprop\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
index a67fefb1ba..3f3d57962b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
@@ -1,15 +1,36 @@
 path: "tensorflow.keras.optimizers.SGD"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizers.SGD\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.gradient_descent.SGD\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
+  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
   is_instance: "<type \'object\'>"
+  member {
+    name: "iterations"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "weights"
+    mtype: "<type \'property\'>"
+  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'lr\', \'momentum\', \'decay\', \'nesterov\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'0.0\', \'False\'], "
+    argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.0\', \'False\', \'SGD\'], "
+  }
+  member_method {
+    name: "add_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
+  }
+  member_method {
+    name: "add_weight"
+    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+  }
+  member_method {
+    name: "apply_gradients"
+    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "get_config"
@@ -19,6 +40,10 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot"
+    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -27,8 +52,16 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "minimize"
+    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "variables"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
 }
-- 
GitLab


From 9b21247408beba5325aeaf11e906f4a5e879ec12 Mon Sep 17 00:00:00 2001
From: Pan Daoxin <daoxin.pan@intel.com>
Date: Wed, 12 Dec 2018 10:03:35 +0800
Subject: [PATCH 404/873] More modifications to comments.

---
 tensorflow/core/kernels/mkl_slice_op.cc | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc
index 577aa5c8db..e2cbeec2d2 100644
--- a/tensorflow/core/kernels/mkl_slice_op.cc
+++ b/tensorflow/core/kernels/mkl_slice_op.cc
@@ -190,7 +190,10 @@ class MklSlicePrimitive : public MklPrimitive {
     context_.dst_mem->set_data_handle(sliceParams.to->get_data_handle());
     context_.slice_stream->submit(context_.slice_primitives);
 
-    // For safety guard, so that data_handle wouldn't be rewritten.
+    // We should set it back to DummyData so as to make the primitive
+    // in cache pool stateless. Otherwise, if the result for previous
+    // iteration is kept, problems of current iteration won't be
+    // thrown immediately, and wrong data would be reused.
     context_.src_mem->set_data_handle(DummyData);
     context_.dst_mem->set_data_handle(DummyData);
     return;
@@ -214,7 +217,8 @@ class MklSlicePrimitive : public MklPrimitive {
   engine cpu_engine_ = engine(engine::cpu, 0);
 
   void Setup(const MklSliceParams& sliceParams) {
-    // Just create the memory primitive, fill with dummy.
+    // Actually, this DummyData will not be used in computation,
+    // because the real data will be filled before real execution.
     context_.src_mem.reset(
         new memory({sliceParams.from->get_primitive_desc().desc(), cpu_engine_},
                    DummyData));
-- 
GitLab


From 02101df8e851aad6e9788f82aa51b4f5281b9e75 Mon Sep 17 00:00:00 2001
From: Billy Lamberta <blamb@google.com>
Date: Tue, 11 Dec 2018 18:19:05 -0800
Subject: [PATCH 405/873] Docs: Convert markdown links to backtick auto-link
 format in keras/engine.

PiperOrigin-RevId: 225107457
---
 tensorflow/python/keras/engine/training.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index de929f2d3c..65a5d00d74 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -144,12 +144,11 @@ class Model(Network):
 
     Arguments:
         optimizer: String (name of optimizer) or optimizer instance.
-            See [optimizers](/api_docs/python/tf/keras/optimizers).
+            See `tf.keras.optimizers`.
         loss: String (name of objective function) or objective function.
-            See [losses](/api_docs/python/tf/losses).
-            If the model has multiple outputs, you can use a different loss
-            on each output by passing a dictionary or a list of losses.
-            The loss value that will be minimized by the model
+            See `tf.losses`. If the model has multiple outputs, you can use a
+            different loss on each output by passing a dictionary or a list of
+            losses. The loss value that will be minimized by the model
             will then be the sum of all individual losses.
         metrics: List of metrics to be evaluated by the model
             during training and testing.
@@ -629,7 +628,7 @@ class Model(Network):
             0 = silent, 1 = progress bar, 2 = one line per epoch.
         callbacks: List of `keras.callbacks.Callback` instances.
             List of callbacks to apply during training.
-            See [callbacks](/api_docs/python/tf/keras/callbacks).
+            See `tf.keras.callbacks`.
         validation_split: Float between 0 and 1.
             Fraction of the training data to be used as validation data.
             The model will set apart this fraction of the training data,
-- 
GitLab


From ae244e6dabeb6b879c5adb9ca4c2a85cb4722dc5 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Tue, 11 Dec 2018 18:22:21 -0800
Subject: [PATCH 406/873] Update the function API doc to cover the autograph
 functionality. Minor other formatting fixes.

PiperOrigin-RevId: 225107801
---
 tensorflow/python/eager/def_function.py | 55 ++++++++++++++++++++-----
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index cdbf39ddd5..a12f9ed765 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -553,12 +553,33 @@ def function(func=None,
   assert f(x, y).numpy() == g(x, y).numpy()
 
   # Tensors and tf.Variables used by the Python function are captured in the
-  # traced graph.
+  # graph.
   @tf.function
   def h():
     return f(x, y)
 
   assert (h().numpy() == f(x, y).numpy()).all()
+
+  # Data-dependent control flow is also captured in the graph. Supported
+  # control flow statements include `if`, `for`, `break`, `continue`, `return`.
+  @tf.function
+  def g(x):
+    if tf.reduce_sum(x) > 0:
+      return x * x
+    else:
+      return -x // 2
+
+  # print and TensorFlow side effects are supported, but exercise caution when
+  # using Python side effects like mutating objects, saving to files, etc.
+  l = []
+
+  @tf.function
+  def g(x):
+    for i in x:
+      print(i)                              # Works
+      tf.assign(v, i)                       # Works
+      tf.py_func(lambda i: l.append(i))(i)  # Works
+      l.append(i)                           # Caution! Doesn't work.
   ```
 
   _Referencing `tf.Variable`s_
@@ -630,6 +651,7 @@ def function(func=None,
   ```
 
   _Input Signatures_
+
   `function` instantiates a separate graph for every unique set of input
   shapes and datatypes. For example, the following code snippet will result
   in three distinct graphs being traced, as each input has a different
@@ -663,9 +685,15 @@ def function(func=None,
   When an `input_signature` is specified, the callable will only accept `Tensor`
   (or NumPy `ndarray`) objects as arguments.
 
-  _Tracing_
-  Note that `function` only traces TensorFlow operations, all the other
-  Python code that `func` executes will shape the _construction_ of the graph.
+  _Tracing and staging_
+
+  When `autograph` is `True`, all Python code that depends on `Tensor` values is
+  staged into a TensorFlow graph. When `autograph` is `False`, the function is
+  traced and control flow is not allowed to depend on data.
+
+  Note that `function` only stages TensorFlow operations, all Python code that
+  `func` executes and does not depend on data will shape the _construction_ of
+  the graph.
   For example, consider the following:
 
   ```python
@@ -678,21 +706,26 @@ def function(func=None,
   ```
 
   `add_noise()` will return a different output every time it is invoked.
-  However, `traced` will return the same value every time it is called, since a
-  particular random value generated by the `np.random.randn` call will be
-  inserted in the traced TensorFlow graph as a constant. In this particular
-  example, replacing `np.random.randn(5, 5)` with `tf.random_normal((5, 5))`
-  will result in the same behavior for `add_noise()` and `traced()`.
+  However, `add_noise` will return the same value every time it is called,
+  since a particular random value generated by the `np.random.randn` call will
+  be inserted in the traced/staged TensorFlow graph as a constant. In this
+  particular example, replacing `np.random.randn(5, 5)` with
+  `tf.random_normal((5, 5))` will result in the same behavior for `add_noise()`
+  and `traced()`.
 
   _Python Side-Effects_
+
   A corollary of the previous discussion on tracing is the following: If a
   Python function `func` has Python side-effects, then executing `func` multiple
-  times
-  may not be semantically equivalent to executing `F = tf.function(func)`
+  times may not be semantically equivalent to executing `F = tf.function(func)`
   multiple times; this difference is due to the fact that `function` only
   captures the subgraph of TensorFlow operations that is constructed when `func`
   is invoked to trace a graph.
 
+  The same is true if code with Python side effects is used inside control flow,
+  such as a loop. If your code uses side effects that are not intended to
+  control graph construction, wrap them inside `tf.py_func`.
+
   Args:
     func: function to be compiled. If `func` is None, returns a decorator that
       can be invoked with a single argument - `func`. The end result is
-- 
GitLab


From 04e8759ee2416baac1f31f6a27cb49a8b6051e19 Mon Sep 17 00:00:00 2001
From: Andy Ly <lyandy@google.com>
Date: Tue, 11 Dec 2018 18:36:46 -0800
Subject: [PATCH 407/873] [Grappler] Add helper functions to GraphView.

PiperOrigin-RevId: 225109110
---
 tensorflow/core/grappler/graph_view.h       | 60 ++++++++++++++-------
 tensorflow/core/grappler/graph_view_test.cc | 34 ++++++++++++
 tensorflow/core/grappler/utils.cc           |  7 ++-
 tensorflow/core/grappler/utils.h            |  4 ++
 tensorflow/core/grappler/utils_test.cc      |  7 +++
 5 files changed, 92 insertions(+), 20 deletions(-)

diff --git a/tensorflow/core/grappler/graph_view.h b/tensorflow/core/grappler/graph_view.h
index 0a47b22565..16156d0f20 100644
--- a/tensorflow/core/grappler/graph_view.h
+++ b/tensorflow/core/grappler/graph_view.h
@@ -111,32 +111,37 @@ class GraphViewInternal {
 
   GraphDefT* graph() const { return graph_; }
 
-  // Find a node by name or return `nullptr` if it's not in a graph view.
+  // Finds a node by name or return `nullptr` if it's not in the graph view.
   NodeDefT* GetNode(absl::string_view node_name) const {
     return gtl::FindWithDefault(nodes_, node_name, nullptr);
   }
 
-  // Get the specified input port. Note that the special '-1' port_id can be
+  // Checks if a node by name is in the graph view.
+  bool HasNode(absl::string_view node_name) const {
+    return GetNode(node_name) != nullptr;
+  }
+
+  // Gets the specified input port. Note that the special '-1' port_id can be
   // used to access the controlling nodes (i.e. the nodes connected to node_name
   // through an incoming control dependency).
   InputPort GetInputPort(absl::string_view node_name, int port_id) const {
     return InputPort(GetNode(node_name), port_id);
   }
 
-  // Get the specified output port. Note that the special '-1' port_id can be
+  // Gets the specified output port. Note that the special '-1' port_id can be
   // used to access the controlled nodes (i.e. the nodes connected to node_name
   // through an outgoing control dependency).
   OutputPort GetOutputPort(absl::string_view node_name, int port_id) const {
     return OutputPort(GetNode(node_name), port_id);
   }
 
-  // Get the input (resp. output) port(s) in the immediate fanout (resp. fanin)
-  // of an output (resp. input) port.
+  // Gets the input port(s) in the immediate fanout of an output port.
   const absl::flat_hash_set<InputPort>& GetFanout(
       const OutputPort& port) const {
     return gtl::FindWithDefault(fanouts_, port, fanout_not_found_value_);
   }
 
+  // Gets the output port(s) in the immediate fanin of an input port.
   absl::flat_hash_set<OutputPort> GetFanin(const InputPort& port) const {
     if (port.port_id >= 0) return {GetRegularFanin(port)};
 
@@ -162,9 +167,22 @@ class GraphViewInternal {
     return GetOutputPort(tensor_id.node(), tensor_id.index());
   }
 
-  // Get all the input (resp. output) ports in the immediate fanout (resp
-  // fanin) of a node. Include the controlling nodes iff
-  // include_controlling_nodes is true.
+  // Checks if a tensor id is a fanin of the node.
+  bool HasFanin(const NodeDef& node, const TensorId& fanin) const {
+    if (fanin.index() < -1) {
+      return false;
+    }
+    string fanin_string = TensorIdToString(fanin);
+    for (int i = 0; i < node.input_size(); ++i) {
+      if (node.input(i) == fanin_string) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // Gets all the input ports in the immediate fanout of a node. Include the
+  // controlled nodes iff include_controlled_nodes is true.
   absl::flat_hash_set<InputPort> GetFanouts(
       const NodeDef& node, bool include_controlled_nodes) const {
     absl::flat_hash_set<InputPort> result;
@@ -185,6 +203,8 @@ class GraphViewInternal {
     return result;
   }
 
+  // Gets all the output ports in the immediate fanin of a node. Include the
+  // controlling nodes iff include_controlling_nodes is true.
   absl::flat_hash_set<OutputPort> GetFanins(
       const NodeDef& node, bool include_controlling_nodes) const {
     absl::flat_hash_set<OutputPort> result;
@@ -198,7 +218,7 @@ class GraphViewInternal {
     return result;
   }
 
-  // Get the number of ports in the immediate fanin of a node. Count the
+  // Gets the number of ports in the immediate fanin of a node. Count the
   // controlling nodes iff include_controlling_nodes is true.
   int NumFanins(const NodeDef& node, bool include_controlling_nodes) const {
     int count = 0;
@@ -211,14 +231,14 @@ class GraphViewInternal {
     return count;
   }
 
-  // Get the number of ports in the immediate fanout of a node. Count the
-  // controlling nodes iff include_controlling_nodes is true.
-  int NumFanouts(const NodeDef& node, bool include_controlling_nodes) const {
+  // Gets the number of ports in the immediate fanout of a node. Count the
+  // controlled nodes iff include_controlled_nodes is true.
+  int NumFanouts(const NodeDef& node, bool include_controlled_nodes) const {
     int count = 0;
 
     OutputPort port;
     port.node = const_cast<NodeDefT*>(&node);
-    const int first_port_id = include_controlling_nodes ? -1 : 0;
+    const int first_port_id = include_controlled_nodes ? -1 : 0;
     const int last_port_id =
         gtl::FindWithDefault(max_regular_output_port_, port.node, -1);
 
@@ -231,8 +251,8 @@ class GraphViewInternal {
     return count;
   }
 
-  // Get all the edges in the immediate fanout (resp fanin) of a node.
-  // Include the control edges iff include_controlling_edges is true.
+  // Gets all the edges in the immediate fanout of a node. Include the
+  // controlled edges iff include_controlled_edges is true.
   absl::flat_hash_set<Edge> GetFanoutEdges(
       const NodeDef& node, bool include_controlled_edges) const {
     absl::flat_hash_set<Edge> result;
@@ -248,14 +268,16 @@ class GraphViewInternal {
       auto it = fanouts_.find(port);
       if (it != fanouts_.end()) {
         for (auto itr = it->second.begin(); itr != it->second.end(); ++itr) {
-          result.emplace(/*src*/ OutputPort(const_cast<NodeDefT*>(&node), i),
-                         /*dst*/ *itr);
+          result.emplace(/*src=*/OutputPort(const_cast<NodeDefT*>(&node), i),
+                         /*dst=*/*itr);
         }
       }
     }
     return result;
   }
 
+  // Gets all the edges in the immediate fanin of a node. Include the
+  // controlling edges iff include_controlling_edges is true.
   absl::flat_hash_set<Edge> GetFaninEdges(
       const NodeDef& node, bool include_controlling_edges) const {
     absl::flat_hash_set<Edge> result;
@@ -265,8 +287,8 @@ class GraphViewInternal {
 
       auto it = nodes_.find(tensor_id.node());
       if (it != nodes_.end()) {
-        result.emplace(/*src*/ OutputPort(it->second, tensor_id.index()),
-                       /*dst*/ InputPort(const_cast<NodeDefT*>(&node), i));
+        result.emplace(/*src=*/OutputPort(it->second, tensor_id.index()),
+                       /*dst=*/InputPort(const_cast<NodeDefT*>(&node), i));
       }
     }
     return result;
diff --git a/tensorflow/core/grappler/graph_view_test.cc b/tensorflow/core/grappler/graph_view_test.cc
index cbf859a4a9..404dcd30c1 100644
--- a/tensorflow/core/grappler/graph_view_test.cc
+++ b/tensorflow/core/grappler/graph_view_test.cc
@@ -230,6 +230,40 @@ TEST_F(GraphViewTest, ControlDependencies) {
   EXPECT_EQ(0, (*fanin.begin()).port_id);
 }
 
+TEST_F(GraphViewTest, HasNode) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const(s.WithOpName("a"), 0.0f, {10, 10});
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  GraphView graph(&item.graph);
+
+  EXPECT_EQ(true, graph.HasNode("a"));
+  EXPECT_EQ(false, graph.HasNode("b"));
+}
+
+TEST_F(GraphViewTest, HasFanin) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const(s.WithOpName("a"), 0.0f, {10, 10});
+  Output b = ops::Square(s.WithOpName("b"), {a});
+  Output c = ops::Sqrt(s.WithOpName("c"), {b});
+  Output d = ops::AddN(s.WithOpName("d").WithControlDependencies(a), {b, c});
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  GraphView graph(&item.graph);
+
+  const NodeDef* d_node = graph.GetNode("d");
+  EXPECT_NE(nullptr, d_node);
+
+  EXPECT_EQ(true, graph.HasFanin(*d_node, {"a", Graph::kControlSlot}));
+  EXPECT_EQ(false, graph.HasFanin(*d_node, {"a", 0}));
+  EXPECT_EQ(true, graph.HasFanin(*d_node, {"b", 0}));
+  EXPECT_EQ(false, graph.HasFanin(*d_node, {"b", Graph::kControlSlot}));
+  EXPECT_EQ(true, graph.HasFanin(*d_node, {"c", 0}));
+  EXPECT_EQ(false, graph.HasFanin(*d_node, {"c", Graph::kControlSlot}));
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc
index 2977544262..90ad04cf47 100644
--- a/tensorflow/core/grappler/utils.cc
+++ b/tensorflow/core/grappler/utils.cc
@@ -144,11 +144,16 @@ void NodeMap::UpdateOutput(const string& node_name,
   outputs.insert(nodes_[NodeName(new_output_name)]);
 }
 
+string TensorIdToString(const TensorId& tensor_id) {
+  return tensor_id.index() == 0 ? string(tensor_id.node())
+                                : tensor_id.ToString();
+}
+
 bool IsSameInput(const string& name1, const string& name2) {
   if (name1 == name2) return true;
   TensorId tensor1 = ParseTensorName(name1);
   TensorId tensor2 = ParseTensorName(name2);
-  return tensor1.node() == tensor2.node() && tensor1.index() == tensor2.index();
+  return tensor1 == tensor2;
 }
 
 bool IsControlInput(const string& name) {
diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h
index b1e2d4e9cb..89a87af323 100644
--- a/tensorflow/core/grappler/utils.h
+++ b/tensorflow/core/grappler/utils.h
@@ -100,6 +100,10 @@ class SetVector {
   std::vector<T> vector_;
 };
 
+// Returns formatted string from TensorId specific to grappler. Specifically,
+// for the 0 port (first output), only the node name is returned.
+string TensorIdToString(const TensorId& tensor_id);
+
 // True iff 'name' refers to a control inputs, i.e. a node name prefixed with
 // the ^ character.
 bool IsControlInput(const string& name);
diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc
index e993391b51..f5ae39867a 100644
--- a/tensorflow/core/grappler/utils_test.cc
+++ b/tensorflow/core/grappler/utils_test.cc
@@ -464,6 +464,13 @@ TEST_F(UtilsTest, SetTensorValueBFloat16IntMin) {
       Tensor(bfloat16(std::numeric_limits<int>::min())), t);
 }
 
+TEST_F(UtilsTest, TensorIdToString) {
+  EXPECT_EQ("^foo", TensorIdToString({"foo", -1}));
+  EXPECT_EQ("foo", TensorIdToString({"foo", 0}));
+  EXPECT_EQ("foo:1", TensorIdToString({"foo", 1}));
+  EXPECT_EQ("foo:2", TensorIdToString({"foo", 2}));
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From e4e9409b3de9a8d12a56fc0e2fa7270bffd0d41a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 18:56:53 -0800
Subject: [PATCH 408/873] Fix internal type mismatch in ragged.map_fn

PiperOrigin-RevId: 225110815
---
 tensorflow/python/ops/ragged/BUILD                  |  4 ++--
 .../python/ops/ragged/ragged_map_fn_op_test.py      | 13 +++++++++++++
 tensorflow/python/ops/ragged/ragged_map_ops.py      |  4 +++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index c0db8bfbb5..440d9db824 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -263,17 +263,17 @@ py_library(
     srcs = ["ragged_map_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged_array_ops",
-        ":ragged_factory_ops",
         ":ragged_tensor",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:tensor_array_ops",
+        "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python/eager:context",
diff --git a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
index 49c0996b24..171cb347de 100644
--- a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
@@ -21,6 +21,7 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import backend
 from tensorflow.python.ops import array_ops
@@ -270,6 +271,18 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
           elems,
           dtype=ragged.RaggedTensorType(dtype=dtypes.int64, ragged_rank=10))
 
+  def testMapOnSparseTensor(self):
+    s = sparse_tensor.SparseTensor(
+        indices=[[0, 0], [0, 1], [1, 0], [1, 1]],
+        values=[0, 5, 0, 4],
+        dense_shape=[2, 2],
+    )
+    t2 = ragged.RaggedTensor.from_sparse(s)
+    id_t2 = ragged.map_fn(
+        lambda x: x, t2,
+    )
+    self.assertRaggedEqual(id_t2, [[0, 5], [0, 4]])
+
 
 if __name__ == '__main__':
   googletest.main()
diff --git a/tensorflow/python/ops/ragged/ragged_map_ops.py b/tensorflow/python/ops/ragged/ragged_map_ops.py
index af40352b1d..fbe188bd1a 100644
--- a/tensorflow/python/ops/ragged/ragged_map_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_map_ops.py
@@ -30,6 +30,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops.ragged import ragged_tensor
@@ -238,6 +239,7 @@ def map_fn(fn,
       n = (tensor_shape.dimension_value(static_shape[0]) or
            array_ops.shape(elems_flat[0])[0])
 
+    n = math_ops.cast(n, dtype=dtypes.int32)
     # Create a flat list of TAs.
 
     # Flatten the dtype structure to a list.
@@ -254,7 +256,7 @@ def map_fn(fn,
         for t in dtype_components_flat
     ]
 
-    i = constant_op.constant(0)
+    i = constant_op.constant(0, dtype=dtypes.int32)
 
     def compute(i, tas):
       """The loop body of map_fn.
-- 
GitLab


From 4b974cf1c1c072338f9c420b9149840780907443 Mon Sep 17 00:00:00 2001
From: Yifei Feng <yifeif@google.com>
Date: Tue, 11 Dec 2018 18:59:00 -0800
Subject: [PATCH 409/873] Remove some extra cuda header includes.

PiperOrigin-RevId: 225110993
---
 tensorflow/core/grappler/costs/BUILD           | 3 ---
 tensorflow/core/grappler/costs/utils.cc        | 6 ------
 tensorflow/core/util/cuda_launch_config.h      | 1 -
 tensorflow/core/util/port.cc                   | 3 ---
 tensorflow/stream_executor/cuda/cuda_helpers.h | 1 -
 5 files changed, 14 deletions(-)

diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index 5090e62b2c..f8af1232f7 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -132,9 +132,6 @@ tf_cuda_library(
     name = "utils",
     srcs = ["utils.cc"],
     hdrs = ["utils.h"],
-    cuda_deps = [
-        "@local_config_cuda//cuda:cudnn_header",
-    ],
     visibility = ["//visibility:public"],
     deps = [
         "//third_party/eigen3",
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index 7d868a3679..d45bb14e07 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -20,12 +20,6 @@ limitations under the License.
 
 #include "third_party/eigen3/Eigen/Core"
 
-#if GOOGLE_CUDA
-#include "cuda/include/cuda.h"
-#include "cuda/include/cuda_runtime_api.h"
-#include "cuda/include/cudnn.h"
-#endif
-
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
diff --git a/tensorflow/core/util/cuda_launch_config.h b/tensorflow/core/util/cuda_launch_config.h
index 080d4067ce..c0ae6349f7 100644
--- a/tensorflow/core/util/cuda_launch_config.h
+++ b/tensorflow/core/util/cuda_launch_config.h
@@ -21,7 +21,6 @@ limitations under the License.
 #include <algorithm>
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "cuda/include/cuda.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/stream_executor.h"
diff --git a/tensorflow/core/util/port.cc b/tensorflow/core/util/port.cc
index e01058dff6..7dc8ddda06 100644
--- a/tensorflow/core/util/port.cc
+++ b/tensorflow/core/util/port.cc
@@ -15,9 +15,6 @@ limitations under the License.
 
 #include "tensorflow/core/util/port.h"
 
-#if GOOGLE_CUDA
-#include "cuda/include/cuda.h"
-#endif
 
 namespace tensorflow {
 
diff --git a/tensorflow/stream_executor/cuda/cuda_helpers.h b/tensorflow/stream_executor/cuda/cuda_helpers.h
index d55706c66a..dc0dc694cd 100644
--- a/tensorflow/stream_executor/cuda/cuda_helpers.h
+++ b/tensorflow/stream_executor/cuda/cuda_helpers.h
@@ -25,7 +25,6 @@ limitations under the License.
 #include <complex>
 
 #include "cuda/include/cuComplex.h"
-#include "cuda/include/cuda.h"
 
 namespace stream_executor {
 
-- 
GitLab


From ab3db8c3001002592044c95dfa1ad042f2286149 Mon Sep 17 00:00:00 2001
From: Smit Hinsu <1990079+smit-hinsu@users.noreply.github.com>
Date: Tue, 11 Dec 2018 19:08:24 -0800
Subject: [PATCH 410/873] Update
 tensorflow/contrib/tensorrt/convert/convert_nodes.cc

Co-Authored-By: trevor-m <tmorris@nvidia.com>
---
 tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index ba1c2e80b2..6e411a21f8 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2992,7 +2992,7 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) {
                  << "are using Keras, please call "
                  << "keras.backend.set_learning_phase(0) before constructing "
                  << "your model. At "
-                 << node_def.name());
+                 << node_def.name();
     return tensorflow::errors::Unimplemented(
         node_def.op(), " only supports is_training=false, at ",
         node_def.name());
-- 
GitLab


From 8ac99aa0ec18f65f9976af0eb0e3fc2fef6536c4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 19:09:14 -0800
Subject: [PATCH 411/873] Enhance the Tensor-Tracer in the following ways: (1)
 Able to trace tensors when the model is executed on the CPU.     (previously,
 it could only trace when the model is executed on TPU) (2) Allow the user to
 specify the op-names and op-types to be excluded or included for tracing via
 regular expressions. (3) Two new trace modes: (1) tracing the vector norm of
 the tensor and (2) tracing the maximum of the absolute values of all elements
 in the tensor. (4) Attach the replica-ID to a traced tensor value so that the
 post-processing tool (Tensor-Inspector) can reconstruct the whole tensor from
 all replicas. (5) An API to trace tensors programmatically. (6) Allow writing
 the trace to stdout (previously, it must be written to a file).

PiperOrigin-RevId: 225112219
---
 .../contrib/tpu/python/tpu/tensor_tracer.py   | 553 +++++++++++++++---
 .../contrib/tpu/python/tpu/tpu_estimator.py   |  10 +
 2 files changed, 486 insertions(+), 77 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py b/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py
index 70baea203c..a1494e3660 100644
--- a/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py
+++ b/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py
@@ -21,44 +21,56 @@ from __future__ import print_function
 import os
 import os.path
 import re
+import sys
 
 from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import tpu
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import logging_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 
 _TRACER_LOG_PREFIX = ' [>>>TT>>>]'
 _DEVICE_TYPE_TPU = 'tpu'
 _DEVICE_TYPE_CPU = 'cpu'
-_GLOBAL_STEP_OP_NAME = 'GLOBAL-STEP'
 _TRACE_MODE_NAN_INF = 'nan-inf'
 _TRACE_MODE_PART_TENSOR = 'part-tensor'
 _TRACE_MODE_PART_TENSOR_SIZE = 3
 _TRACE_MODE_FULL_TENSOR = 'full-tensor'
-_RECORD_OUTSIDE_OP_RANGE = 'not-traced-outside-op-range'
-_RECORD_SHOULD_NOT_TRACE = 'not-traced-should-not-trace'
-_RECORD_FILTERED_OUT = 'not-traced-filtered-out'
-_RECORD_SCALAR = 'not-traced-scalar'
-_RECORD_DYNAMIC_SHAPE = 'not-traced-dynamic-shape'
-_RECORD_GET_TRACED = 'get-traced'
+_TRACE_MODE_NORM = 'norm'
+_TRACE_MODE_MAX_ABS = 'max-abs'
+_REASON_OUTSIDE_OP_RANGE = 'not-traced-outside-op-range'
+_REASON_UNSAFE_OP = 'not-traced-unsafe-op'
+_REASON_UNSAFE_SCALAR = 'not-traced-unsafe-scalar'
+_REASON_LESS_INTERESTING_OP = 'not-traced-less-interesting-op'
+_REASON_DEVICE_MISMATCH = 'not-traced-device-mismatch'
+_REASON_DYNAMIC_SHAPE = 'not-traced-dynamic-shape'
+_REASON_SCALAR_GET_TRACED = 'traced-scalar'
+_REASON_TENSOR_GET_TRACED = 'traced-tensor'
+_REASON_USER_INCLUDED = 'traced-user-included'
+_REASON_USER_EXCLUDED = 'not-traced-user-excluded'
+_REASON_NON_NUMERIC_TENSOR = 'not-traced-non-numeric-tensor'
 _MARKER_SECTION_BEGIN = '!!!!!!! section-begin:'
 _MARKER_SECTION_END = '!!!!!!! section-end:'
 _SECTION_NAME_CONFIG = 'configuration'
 _SECTION_NAME_REASON = 'reason'
 _SECTION_NAME_OP_LIST = 'op-list'
+_SECTION_NAME_TENSOR_LIST = 'tensor-list'
 _SECTION_NAME_GRAPH = 'graph'
 _FIELD_NAME_VERSION = 'version:'
 _FIELD_NAME_DEVICE = 'device:'
 _FIELD_NAME_TRACE_MODE = 'trace-mode:'
 _FIELD_NAME_NUM_REPLICAS = 'num-replicas:'
 _FIELD_NAME_NUM_OPS = 'number-of-ops:'
+_FIELD_NAME_NUM_TENSORS = 'number-of-tensors:'
 _FIELD_NAME_TOPOLOGICAL_SORT_SUCCEED = 'topological-sort-succeed:'
 _FLAGS_ENV_VAR = 'TENSOR_TRACER_FLAGS'
 _FLAG_SINGLE_QUOTE_PAT = re.compile(r"\s*--([^=]+)='([^']*)'")
@@ -66,13 +78,72 @@ _FLAG_DOUBLE_QUOTE_PAT = re.compile(r'\s*--([^=]+)="([^"]*)"')
 _FLAG_NO_QUOTE_PAT = re.compile(r'\s*--([^=]+)=(\S*)')
 _FLAG_NAME_ENABLE = 'enable'
 _FLAG_NAME_TRACE_MODE = 'trace_mode'
-_FLAG_NAME_INTERESTING_OPS = 'interesting_ops'
+_FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS = 'include_less_interesting_ops'
+_FLAG_NAME_EXCLUDED_OPNAMES = 'excluded_opnames'
+_FLAG_NAME_EXCLUDED_OPTYPES = 'excluded_optypes'
+_FLAG_NAME_INCLUDED_OPNAMES = 'included_opnames'
+_FLAG_NAME_INCLUDED_OPTYPES = 'included_optypes'
 _FLAG_NAME_TRACE_FILE = 'trace_file_path'
+_FLAG_NAME_REPORT_FILE = 'report_file_path'
 _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR = 'use_test_undeclared_outputs_dir'
 _FLAG_NAME_OP_RANGE = 'op_range'
 _OP_RANGE_PAT = re.compile(r'(\d+):(\d+)')
 _OUTPUT_STREAM_ESCAPE = 'file://'
 _TEST_UNDECLARED_OUTPUTS_DIR_ENV_VAR = 'TEST_UNDECLARED_OUTPUTS_DIR'
+_TENSOR_TRACER_COLLECTION = 'tensor_tracer_variables'
+_TENSOR_TRACER_CHECKPOINT = 'tensor_tracer_checkpoint'
+
+
+def tensor_checkpoint(tensor, checkpoint_name):
+  """Adds a checkpoint with the given checkpoint name for the given tensor.
+
+  The tensor will be added to the list of tensors that will be traced by the
+  tensor tracer.
+
+  Args:
+     tensor: the tensor object for which the tracing is requested.
+     checkpoint_name: a string name for the checkpoint. This name has to be a
+     unique name if used within model comparison. The tensors that have the same
+     checkpoint identifier is compared in model comparison.
+  Returns:
+    The provided tensor.
+  """
+
+  tensor.graph.get_collection(_TENSOR_TRACER_COLLECTION)
+  tensor.graph.add_to_collection(_TENSOR_TRACER_COLLECTION,
+                                 (tensor, checkpoint_name))
+  return tensor
+
+
+def keras_layer_checkpoint(layer, checkpoint_name):
+  """An interface for adding the tensor outputs of a keras layer.
+
+  Encapsulates tensor_checkpoint.
+
+  Args:
+     layer: A keras layer.
+     checkpoint_name: a string name for the checkpoint. This name has to be a
+     unique name if used within model comparison. The tensors that have the same
+     checkpoint identifier is compared in model comparison.
+
+  Returns:
+    The provided layer.
+  """
+  try:
+    outputs = layer.output
+    if tensor_util.is_tensor(outputs):
+      tensor_checkpoint(outputs, '%s' % (checkpoint_name))
+    else:
+      idx = 0
+      for output_tensor in outputs:
+        if tensor_util.is_tensor(outputs):
+          tensor_checkpoint(output_tensor, '%s_%d' % (checkpoint_name, idx))
+        idx += 1
+  except AttributeError:
+    pass
+  except RuntimeError:
+    pass
+  return layer
 
 
 class TensorTracer(object):
@@ -105,6 +176,34 @@ class TensorTracer(object):
     match = _FLAG_NO_QUOTE_PAT.match(flags, pos)
     return match
 
+  @staticmethod
+  def validate_flag_names():
+    """Validates if the TensorTrace flags passed are valid."""
+    valid_flag_names = [_FLAG_NAME_ENABLE, _FLAG_NAME_TRACE_MODE,
+                        _FLAG_NAME_EXCLUDED_OPNAMES,
+                        _FLAG_NAME_EXCLUDED_OPTYPES,
+                        _FLAG_NAME_INCLUDED_OPNAMES,
+                        _FLAG_NAME_INCLUDED_OPTYPES,
+                        _FLAG_NAME_TRACE_FILE, _FLAG_NAME_REPORT_FILE,
+                        _FLAG_NAME_USE_TEST_UNDECLARED_OUTPUTS_DIR,
+                        _FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS,
+                        _FLAG_NAME_OP_RANGE]
+    tensor_tracer_flags = os.environ.get(_FLAGS_ENV_VAR)
+    if not tensor_tracer_flags:
+      return
+    pos = 0
+    while True:
+      match = TensorTracer._match_next_flag(tensor_tracer_flags, pos)
+      if not match:
+        break
+      flag_name = match.group(1)
+      if flag_name not in valid_flag_names:
+        raise ValueError(
+            'The flag name "%s" passed via the environment variable "%s" '
+            'is invalid. Valid flag names are:'
+            '\n%s'%(flag_name, _FLAGS_ENV_VAR, valid_flag_names))
+      pos = match.end()
+
   @staticmethod
   def print_flag_values():
     """Prints all TensorTracer flags passed via environment variables."""
@@ -146,6 +245,20 @@ class TensorTracer(object):
       pos = match.end()
     return ''
 
+  @staticmethod
+  def flag_value_to_re_list(flag_name):
+    """Converts list of strings to compiled RE."""
+
+    re_list = []
+    flag_value = TensorTracer.get_flag_value(flag_name)
+    if not flag_value:
+      return re_list
+    list_of_values = flag_value.split()
+    for v in list_of_values:
+      r = re.compile(v)
+      re_list.append(r)
+    return re_list
+
   @staticmethod
   def is_enabled():
     """Returns True if TensorTracer is enabled."""
@@ -186,29 +299,67 @@ class TensorTracer(object):
     """Checks if the given trace mode is valid."""
 
     valid_trace_modes = [_TRACE_MODE_NAN_INF, _TRACE_MODE_PART_TENSOR,
-                         _TRACE_MODE_FULL_TENSOR]
+                         _TRACE_MODE_FULL_TENSOR, _TRACE_MODE_NORM,
+                         _TRACE_MODE_MAX_ABS]
     if trace_mode not in valid_trace_modes:
       raise ValueError('Invalid trace mode "%s" given to the Tensor_Tracer.'
                        'Valid trace modes are: %s'%(trace_mode,
                                                     valid_trace_modes))
 
   @staticmethod
-  def should_trace(device_type, op):
-    """Returns True if the given Op should be traced."""
+  def unsafe_op(op):
+    """Returns True if this op is not safe to be traced."""
 
-    if device_type != _DEVICE_TYPE_TPU:
-      raise ValueError('Non TPU device type is not supported')
     if control_flow_util.IsInCond(op):
+      return True
+    # Reasons for not including following op types:
+    #    Assign: cause incorrect result with CPU tracing.
+    #    others: compilation problems.
+    if op.type in ['Assign', 'Pack', 'Shape', 'Reshape', 'ArgMin', 'ArgMax']:
+      return True
+    return False
+
+  @staticmethod
+  def device_mismatch(device_type, op):
+    if device_type == _DEVICE_TYPE_TPU:
+      # pylint: disable=protected-access
+      return tpu._TPU_REPLICATE_ATTR not in op.node_def.attr
+      # pylint: enable=protected-access
+    return False
+
+  @staticmethod
+  def unsafe_scalar_trace(op):
+    """Return true if scalar output tensor from Op is not safe to be traced."""
+
+    # Tracing the following causes cycle in the graph on TPU.
+    if op.type in ['LoopCond', 'Enter', 'Merge', 'Const',
+                   'Switch', 'Less', 'ReadVariableOp']:
+      return True
+    # Tracing the following will cause casting-issue
+    # with the norm tracing mode or other compilation issues on CPU.
+    if op.type in ['VarHandleOp', 'IteratorToStringHandle',
+                   'IteratorGetNext', 'OneShotIterator',
+                   'IteratorV2', 'MakeIterator',
+                   'BatchDatasetV2', 'MapDataset',
+                   'FixedLengthRecordDataset', 'TakeDataset', 'ZipDataset',
+                   'Placeholder', 'PlaceholderWithDefault', 'StridedSlice']:
+      return True
+    return False
+
+  @staticmethod
+  def less_interesting_op(op):
+    """Returns True if the given Op is not an interesting one to be traced."""
+
+    include_less_interesting = TensorTracer.get_flag_value(
+        _FLAG_NAME_INCLUDE_LESS_INTERESTING_OPS)
+    if include_less_interesting:
       return False
-    if op.type in ['Reshape', 'ArgMin', 'ArgMax']:
-      return False
-    # pylint: disable=protected-access
-    return tpu._TPU_REPLICATE_ATTR in op.node_def.attr
-    # pylint: enable=protected-access
+    return op.type in ['Const', 'Identity', 'Cast', 'Shape']
 
   @staticmethod
   def reason(op_idx, details):
-    """Returns why the Op at op_idx is traced or not."""
+    """Returns reason why the Op at op_idx is traced or not."""
+
     return '%d %s'%(op_idx, details)
 
   @staticmethod
@@ -274,6 +425,33 @@ class TensorTracer(object):
       assert len(unsorted_ops) == len(sorted_ops)
       return (True, sorted_ops)
 
+  @staticmethod
+  def _make_op_and_tensor_maps(op_list):
+    """Creates various maps and lists from op_list.
+
+    Args:
+       op_list: a list of Ops
+
+    Returns:
+       opname_idx_map: a map from Op's name to its index in op_list.
+       tensor_list: a list of output tensors of the Ops in op_list.
+       tensorname_idx_map: a map from output tensor name to its index
+                           in tensor_list.
+    """
+
+    opname_idx_map = {}
+    tensor_list = []
+    tensorname_idx_map = {}
+    for op_id, op in enumerate(op_list):
+      if op.name in opname_idx_map:
+        raise ValueError('Duplicated Op name: %s'%op.name)
+      opname_idx_map[op.name] = op_id
+      for output_tensor in op.outputs:
+        if output_tensor.name not in tensorname_idx_map:
+          tensor_list.append(output_tensor)
+          tensorname_idx_map[output_tensor.name] = len(tensor_list)-1
+    return (opname_idx_map, tensor_list, tensorname_idx_map)
+
   def __init__(self):
     """Initializes a TensorTracer.
 
@@ -281,16 +459,20 @@ class TensorTracer(object):
     """
     self._version = 'use-outside-compilation'
     self._device_type = None
+    TensorTracer.validate_flag_names()
     self._trace_mode = TensorTracer.get_flag_value(_FLAG_NAME_TRACE_MODE)
     if not self._trace_mode:
       self._trace_mode = _TRACE_MODE_NAN_INF
     TensorTracer.check_trace_mode(self._trace_mode)
     self._part_tensor_size = _TRACE_MODE_PART_TENSOR_SIZE
     self._instrument_records = {}
-    interesting_ops = TensorTracer.get_flag_value(_FLAG_NAME_INTERESTING_OPS)
-    self._selected_ops = interesting_ops.split()
     self._set_trace_file_path()
+    self._set_report_file()
     self._set_op_range()
+    self._set_excluded_opnames()
+    self._set_excluded_optypes()
+    self._set_included_opnames()
+    self._set_included_optypes()
     self._num_replicas = None
     self._replica_id = None
 
@@ -318,10 +500,7 @@ class TensorTracer(object):
     """Sets the path of the output trace file."""
 
     self._trace_file_path = TensorTracer.get_flag_value(_FLAG_NAME_TRACE_FILE)
-    if not self._trace_file_path:
-      raise ValueError('--%s is not set in the environment variable %s'
-                       %(_FLAG_NAME_TRACE_FILE, _FLAGS_ENV_VAR))
-    elif TensorTracer.use_test_undeclared_outputs_dir():
+    if self._trace_file_path and TensorTracer.use_test_undeclared_outputs_dir():
       if os.path.isabs(self._trace_file_path):
         raise ValueError('If use_test_undeclared_outputs_dir is set,'
                          'trace_file_path cannot be an absolute path (%s)'
@@ -330,6 +509,22 @@ class TensorTracer(object):
       self._trace_file_path = os.path.join(outputs_dir,
                                            self._trace_file_path)
 
+  def _set_report_file(self):
+    """Sets the path of the output report file."""
+
+    self._report_file_path = TensorTracer.get_flag_value(_FLAG_NAME_REPORT_FILE)
+    if not self._report_file_path:
+      self._report_file = None
+      return
+    try:
+      self._report_file = gfile.Open(self._report_file_path, 'w')
+    except IOError as e:
+      raise e
+
+  def _close_report_file(self):
+    if self._report_file:
+      self._report_file.close()
+
   def _set_op_range(self):
     """Sets the index range of the Ops that we will consider tracing."""
 
@@ -350,19 +545,48 @@ class TensorTracer(object):
       return False
     return self._op_range[1] < 0 or idx <= self._op_range[1]
 
-  def _write_report(self, content):
-    """Writes the given content to the report."""
+  def _set_excluded_opnames(self):
+    self._excluded_opname_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_EXCLUDED_OPNAMES)
+
+  def _set_excluded_optypes(self):
+    self._excluded_optype_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_EXCLUDED_OPTYPES)
+
+  def _set_included_opnames(self):
+    self._included_opname_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_INCLUDED_OPNAMES)
+
+  def _set_included_optypes(self):
+    self._included_optype_re_list = TensorTracer.flag_value_to_re_list(
+        _FLAG_NAME_INCLUDED_OPTYPES)
+
+  def _is_user_included_op(self, op):
+    for opname_re in self._included_opname_re_list:
+      if opname_re.match(op.name):
+        return True
+    for optype_re in self._included_optype_re_list:
+      if optype_re.match(op.type):
+        return True
+    return False
 
-    logging.info('%s %s'%(_TRACER_LOG_PREFIX, content))
+  def _is_user_excluded_op(self, op):
+    for opname_re in self._excluded_opname_re_list:
+      if opname_re.match(op.name):
+        return True
+    for optype_re in self._excluded_optype_re_list:
+      if optype_re.match(op.type):
+        return True
+    return False
 
-  def _is_selected_op(self, op_name):
-    """Returns True if the Op with op_name is selected to be traced."""
+  def _write_report(self, content):
+    """Writes the given content to the report."""
 
-    if not self._selected_ops:
-      return True
-    if op_name in self._selected_ops:
-      return True
-    return False
+    line = '%s %s'%(_TRACER_LOG_PREFIX, content)
+    if self._report_file:
+      self._report_file.write(line)
+    else:
+      logging.info(line)
 
   def _write_config_section(self):
     """Writes the config section of the report."""
@@ -382,15 +606,42 @@ class TensorTracer(object):
       self._write_report('"%s" %s\n'%(key, self._instrument_records[key]))
     self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_REASON))
 
-  def _write_op_list_section(self, op_list):
+  def _write_op_list_section(self, op_list, tensorname_idx_map):
     """Writes the Op-list section of the report."""
 
     self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN, _SECTION_NAME_OP_LIST))
     self._write_report('%s %d\n'%(_FIELD_NAME_NUM_OPS, len(op_list)))
     for i in range(0, len(op_list)):
-      self._write_report('%d "%s" %s\n'%(i, op_list[i].name, op_list[i].type))
+      op = op_list[i]
+      line = '%d "%s" %s'%(i, op.name, op.type)
+      for out_tensor in op.outputs:
+        if out_tensor.name not in tensorname_idx_map:
+          raise ValueError(
+              'out_tensor %s is not in tensorname_idx_map'%out_tensor.name)
+        line += ' %d'%tensorname_idx_map[out_tensor.name]
+      line += '\n'
+      self._write_report(line)
     self._write_report('%s %s\n'%(_MARKER_SECTION_END, _SECTION_NAME_OP_LIST))
 
+  def _write_tensor_list_section(self, tensor_list, opname_idx_map):
+    """Writes the tensor-list section of the report."""
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
+                                  _SECTION_NAME_TENSOR_LIST))
+    self._write_report('%s %d\n'%(_FIELD_NAME_NUM_TENSORS, len(tensor_list)))
+    for i in range(0, len(tensor_list)):
+      tensor = tensor_list[i]
+      line = '%d "%s"'%(i, tensor.name)
+      for consumer_op in tensor.consumers():
+        if consumer_op.name not in opname_idx_map:
+          raise ValueError(
+              'consumer_op %s is not in opname_idx_map'%consumer_op.name)
+        line += ' %d'%opname_idx_map[consumer_op.name]
+      line += '\n'
+      self._write_report(line)
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
+                                  _SECTION_NAME_TENSOR_LIST))
+
   def _write_graph_section(self, succeed, sorted_or_cycle):
     """Writes the graph section of the report."""
 
@@ -422,7 +673,7 @@ class TensorTracer(object):
       Args:
         op_name: the name of the Op that outputs the tensor to be printed.
         output_idx: which output of the Op it is (0 means the first output).
-        num_elements: number of elements to print.
+        num_elements: number of elements to print (-1 means print all).
         tensor: the tensor needs to be returned.
         output_tensor: the tensor needs to be printed.
 
@@ -430,10 +681,13 @@ class TensorTracer(object):
         The same tensor passed via the "tensor" argument.
       """
       msg = '"%s:%d" '%(op_name, output_idx)
-      output_stream = _OUTPUT_STREAM_ESCAPE + self._trace_file_path
+      if self._trace_file_path:
+        output_stream = _OUTPUT_STREAM_ESCAPE + self._trace_file_path
+      else:
+        output_stream = sys.stderr
       print_op = logging_ops.print_v2(msg, array_ops.shape(output_tensor),
                                       ' @', self._replica_id,
-                                      '\n', output_tensor,
+                                      '\n', output_tensor, '\n',
                                       summarize=num_elements,
                                       output_stream=output_stream)
       with ops.control_dependencies([print_op]):
@@ -442,7 +696,8 @@ class TensorTracer(object):
     def _detect_nan_inf(tensor):
       """Trace function for detecting any NaN/Inf in the tensor."""
 
-      if tensor.dtype.is_floating:
+      if tensor.dtype.__eq__(dtypes.bfloat16) or tensor.dtype.__eq__(
+          dtypes.float16):
         # Since host can't handle bf16, always convert tensor to f32.
         tensor = math_ops.cast(tensor, dtypes.float32)
         output_tensor = math_ops.reduce_any(
@@ -450,12 +705,19 @@ class TensorTracer(object):
                                     gen_math_ops.is_inf(tensor)))
       else:
         output_tensor = constant_op.constant(0)
-      return _print_tensor(op_name, output_idx, 1, tensor, output_tensor)
+      return _print_tensor(op_name, output_idx, -1, tensor, output_tensor)
 
-    def _show_global_step(tensor):
-      """Trace function for printing the global step count."""
+    def _show_norm(tensor):
+      tensor = math_ops.cast(tensor, dtypes.float64)
+      output_tensor = linalg_ops.norm(tensor)
+      return _print_tensor(op_name, output_idx, -1, tensor, output_tensor)
 
-      return _print_tensor(op_name, output_idx, 1, tensor, tensor)
+    def _show_max_abs(tensor):
+      output_tensor = math_ops.cast(math_ops.reduce_max(math_ops.abs(tensor)),
+                                    dtypes.float64)
+      zero = constant_op.constant(0, dtypes.float64)
+      output_tensor = gen_math_ops.maximum(zero, output_tensor)
+      return _print_tensor(op_name, output_idx, -1, tensor, output_tensor)
 
     def _show_part_tensor(tensor):
       """Trace function for printing part of the tensor."""
@@ -468,23 +730,139 @@ class TensorTracer(object):
 
       return _print_tensor(op_name, output_idx, -1, tensor, tensor)
 
-    if op_name == _GLOBAL_STEP_OP_NAME:
-      return _show_global_step
     if self._trace_mode == _TRACE_MODE_NAN_INF:
       return _detect_nan_inf
     if self._trace_mode == _TRACE_MODE_PART_TENSOR:
       return _show_part_tensor
     if self._trace_mode == _TRACE_MODE_FULL_TENSOR:
       return _show_full_tensor
+    if self._trace_mode == _TRACE_MODE_NORM:
+      return _show_norm
+    if self._trace_mode == _TRACE_MODE_MAX_ABS:
+      return _show_max_abs
 
     raise RuntimeError('Tensor trace fun for %s is not yet implemented'
                        %self._trace_mode)
 
+  def _skip_op(self, op_id, op, user_included, user_excluded):
+    """Returns True if we should not trace Op."""
+
+    if user_included:
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_USER_INCLUDED)
+      return False
+    if user_excluded:
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_USER_EXCLUDED)
+      return True
+    if not self._inside_op_range(op_id):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_OUTSIDE_OP_RANGE)
+      return True
+    if TensorTracer.unsafe_op(op):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_UNSAFE_OP)
+      return True
+    if TensorTracer.device_mismatch(self._device_type, op):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_DEVICE_MISMATCH)
+      return True
+    if TensorTracer.less_interesting_op(op):
+      self._instrument_records[op.name] = TensorTracer.reason(
+          op_id, _REASON_LESS_INTERESTING_OP)
+      return True
+    return False
+
+  def _skip_tensor(self, op_id, out_tensor, user_included,
+                   user_excluded):
+    """Returns True if we should not trace out_tensor."""
+
+    # Skips a tensor if the tensor has a non-numeric type.
+    #   Note: we cannot use check_ops.is_numeric_tensor(out_tensor)
+    #         because it also excludes tensors with dtypes, bool, and
+    #         float32_ref, which we actually want to trace.
+    non_numeric_tensor_types = set([dtypes.variant, dtypes.resource,
+                                    dtypes.string])
+    if out_tensor.dtype in non_numeric_tensor_types:
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_NON_NUMERIC_TENSOR)
+      return True
+
+    if user_included:
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_USER_INCLUDED)
+      return False
+    if user_excluded:
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_USER_EXCLUDED)
+      return True
+    if not out_tensor.get_shape().is_fully_defined():
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_DYNAMIC_SHAPE)
+      return True
+    rank = len(out_tensor.shape)
+    if rank < 1:
+      # scalar
+      if TensorTracer.unsafe_scalar_trace(out_tensor.op):
+        self._instrument_records[out_tensor.name] = TensorTracer.reason(
+            op_id, _REASON_UNSAFE_SCALAR)
+        return True
+      else:
+        self._instrument_records[out_tensor.name] = TensorTracer.reason(
+            op_id, _REASON_SCALAR_GET_TRACED)
+        return False
+    else:
+      # tensor
+      self._instrument_records[out_tensor.name] = TensorTracer.reason(
+          op_id, _REASON_TENSOR_GET_TRACED)
+      return False
+
+  def _pre_tracing(self, graph):
+    """Work needs to be done prior to TPU or CPU tracing."""
+
+    operations = graph.get_operations()
+    (opname_idx_map, tensor_list, tensorname_idx_map) = (
+        TensorTracer._make_op_and_tensor_maps(operations))
+    self._write_config_section()
+    self._write_op_list_section(operations, tensorname_idx_map)
+    self._write_tensor_list_section(tensor_list, opname_idx_map)
+    # Does the topological sort before adding any nodes to the graph.
+    (succeed, sorted_or_cycle) = TensorTracer.topological_sort(graph)
+    return (operations, succeed, sorted_or_cycle)
+
+  def _post_tracing(self, succeed, sorted_or_cycle):
+    """Work needs to be done after TPU or CPU tracing."""
+
+    self._write_reason_section()
+    self._write_graph_section(succeed, sorted_or_cycle)
+    self._close_report_file()
+
+  def _get_checkpoints(self, graph):
+    """Returns the list of Ops that produce the tensors traced with API.
+
+    Args:
+      graph: the graph of Ops.
+
+    Returns:
+      A set of operation names which should be traced.
+    """
+
+    self._write_report('%s %s\n'%(_MARKER_SECTION_BEGIN,
+                                  _TENSOR_TRACER_CHECKPOINT))
+    checkpoint_operations = set()
+    tensor_tracer_variables = graph.get_collection(_TENSOR_TRACER_COLLECTION)
+    for (tensor, checkpoint_name) in tensor_tracer_variables:
+      self._write_report('%s %s\n'%(tensor.name, checkpoint_name))
+      checkpoint_operations.add(tensor.op.name)
+    self._write_report('%s %s\n'%(_MARKER_SECTION_END,
+                                  _TENSOR_TRACER_CHECKPOINT))
+    return checkpoint_operations
+
   def trace_tpu(self, graph, result_tensor, num_replicas=None):
     """Traces the tensors generated by TPU Ops in a TF graph.
 
     Args:
-      graph: the graph of Ops.
+      graph: the graph of Ops executed on the TPU.
       result_tensor: a result tensor of evaluating the graph.
       num_replicas: number of replicas used on the TPU.
 
@@ -502,38 +880,22 @@ class TensorTracer(object):
     TensorTracer.check_device_type(self._device_type)
     result_tensor_copy = self._add_replica_id_to_graph(num_replicas,
                                                        result_tensor)
-    self._write_config_section()
+    (operations, succeed, sorted_or_cycle) = self._pre_tracing(graph)
     tracing_ops = []
-    operations = graph.get_operations()
-    self._write_op_list_section(operations)
-    # Does the topological sort before adding any nodes to the graph.
-    (succeed, sorted_or_cycle) = TensorTracer.topological_sort(graph)
+    checkpoint_operations = self._get_checkpoints(graph)
+
     for op_id, op in enumerate(operations):
-      if not self._inside_op_range(op_id):
-        self._instrument_records[op.name] = TensorTracer.reason(
-            op_id, _RECORD_OUTSIDE_OP_RANGE)
+      if checkpoint_operations and op.name not in checkpoint_operations:
         continue
-      if not TensorTracer.should_trace(self._device_type, op):
-        self._instrument_records[op.name] = TensorTracer.reason(
-            op_id, _RECORD_SHOULD_NOT_TRACE)
-        continue
-      if not self._is_selected_op(op.name):
-        self._instrument_records[op.name] = TensorTracer.reason(
-            op_id, _RECORD_FILTERED_OUT)
+      user_included = self._is_user_included_op(op)
+      user_excluded = self._is_user_excluded_op(op)
+      if self._skip_op(op_id, op, user_included, user_excluded):
         continue
       for i in range(len(op.outputs)):
         out_tensor = op.outputs[i]
-        if not out_tensor.get_shape().is_fully_defined():
-          self._instrument_records[out_tensor.name] = TensorTracer.reason(
-              op_id, _RECORD_DYNAMIC_SHAPE)
-          continue  # cannot trace tensors with dynamic shape.
-        rank = len(out_tensor.shape)
-        if rank < 1:
-          self._instrument_records[out_tensor.name] = TensorTracer.reason(
-              op_id, _RECORD_SCALAR)
-          continue  # cannot trace scalar.
-        self._instrument_records[out_tensor.name] = TensorTracer.reason(
-            op_id, _RECORD_GET_TRACED)
+        if self._skip_tensor(op_id, out_tensor, user_included,
+                             user_excluded):
+          continue
         consumers = out_tensor.consumers()
         trace_op = tpu.outside_compilation(
             self._make_tensor_trace_fun(op.name, i), out_tensor)
@@ -546,8 +908,45 @@ class TensorTracer(object):
           # if there is no consumer, we will add the control dependence later
           # when we add the control dependency to the output operations.
           tracing_ops.append(trace_op)
+    self._post_tracing(succeed, sorted_or_cycle)
+    return (result_tensor_copy, tracing_ops)
 
-    self._write_reason_section()
-    self._write_graph_section(succeed, sorted_or_cycle)
+  def trace_cpu(self, graph):
+    """Traces the tensors generated by CPU Ops in a TF graph.
 
-    return (result_tensor_copy, tracing_ops)
+    Args:
+      graph: the graph of Ops executed on the CPU.
+
+    Returns:
+      tracing_calls: a map from keys to trace calls.
+                     A key is constructed from an Op's name.
+                     A trace call consists of a function and a tensor (
+                     the function will be invoked with the tensor).
+    """
+
+    self._device_type = _DEVICE_TYPE_CPU
+    TensorTracer.check_device_type(self._device_type)
+    self._num_replicas = 1
+    self._replica_id = 0
+    (operations, succeed, sorted_or_cycle) = self._pre_tracing(graph)
+    tracing_calls = {}
+    checkpoint_operations = self._get_checkpoints(graph)
+
+    for op_id, op in enumerate(operations):
+      if checkpoint_operations and op.name not in checkpoint_operations:
+        continue
+      user_included = self._is_user_included_op(op)
+      user_excluded = self._is_user_excluded_op(op)
+      if self._skip_op(op_id, op, user_included, user_excluded):
+        continue
+      for i in range(len(op.outputs)):
+        out_tensor = op.outputs[i]
+        if self._skip_tensor(op_id, out_tensor, user_included,
+                             user_excluded):
+          continue
+        trace_fun = self._make_tensor_trace_fun(op.name, i)
+        trace_call = (trace_fun, [out_tensor])
+        trace_call_key = 'tensor_tracing_cpu-%s:%d'%(op.name, i)
+        tracing_calls[trace_call_key] = trace_call
+    self._post_tracing(succeed, sorted_or_cycle)
+    return tracing_calls
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 84816d70d0..fe2ac61bf9 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -336,6 +336,16 @@ class TPUEstimatorSpec(model_fn_lib._TPUEstimatorSpec):  # pylint: disable=prote
     hooks = None
     if self.host_call is not None:
       hooks = [_OutfeedHostCallHook(host_call_ret['host_call'])]
+    if tensor_tracer.TensorTracer.is_enabled():
+      tt = tensor_tracer.TensorTracer()
+      tracing_calls = tt.trace_cpu(ops.get_default_graph())
+      tracing_call_ret = _OutfeedHostCall.create_cpu_hostcall(tracing_calls)
+      tracing_functions = tracing_call_ret.values()
+      if tracing_functions:
+        if hooks:
+          hooks.extend([_OutfeedHostCallHook(tracing_functions)])
+        else:
+          hooks = [_OutfeedHostCallHook(tracing_functions)]
     hooks = tuple(hooks or [])
     scaffold = self.scaffold_fn() if self.scaffold_fn else None
     return model_fn_lib.EstimatorSpec(
-- 
GitLab


From c6245fa0b4efaf5e75b12e8aea4588c0d25c5519 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 20:15:05 -0800
Subject: [PATCH 412/873] RELNOTES: Add an ignore_unknown argument to
 parse_values which suppresses ValueError for unknown hyperparameter types.
 Such hyperparameter are ignored.

parse_values('a=1,b=foo', {a: int}) Raises a ValueError
parse_values('a=1,b=foo', {a: int}, ignore_unknown=True) does not raise a ValueError, and returns {'a': 1}

PiperOrigin-RevId: 225117666
---
 .../training/python/training/hparam.py        |  7 +-
 .../training/python/training/hparam_test.py   | 69 +++++++++++++++++++
 2 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py
index 3beb7bfe30..bcc177601b 100644
--- a/tensorflow/contrib/training/python/training/hparam.py
+++ b/tensorflow/contrib/training/python/training/hparam.py
@@ -187,7 +187,7 @@ def _cast_to_type_if_compatible(name, param_type, value):
   return param_type(value)
 
 
-def parse_values(values, type_map):
+def parse_values(values, type_map, ignore_unknown=False):
   """Parses hyperparameter values from a string into a python map.
 
   `values` is a string containing comma-separated `name=value` pairs.
@@ -233,6 +233,9 @@ def parse_values(values, type_map):
       type T if either V has type T, or V is a list of elements of type T.
       Hence, for a multidimensional parameter 'x' taking float values,
       'x=[0.1,0.2]' will parse successfully if type_map['x'] = float.
+    ignore_unknown: Bool. Whether values that are missing a type in type_map
+      should be ignored. If set to True, a ValueError will not be raised for
+      unknown hyperparameter type.
 
   Returns:
     A python map mapping each name to either:
@@ -260,6 +263,8 @@ def parse_values(values, type_map):
     m_dict = m.groupdict()
     name = m_dict['name']
     if name not in type_map:
+      if ignore_unknown:
+        continue
       raise ValueError('Unknown hyperparameter type for %s' % name)
     type_ = type_map[name]
 
diff --git a/tensorflow/contrib/training/python/training/hparam_test.py b/tensorflow/contrib/training/python/training/hparam_test.py
index 660c97f25e..a990e04711 100644
--- a/tensorflow/contrib/training/python/training/hparam_test.py
+++ b/tensorflow/contrib/training/python/training/hparam_test.py
@@ -216,6 +216,14 @@ class HParamsTest(test.TestCase):
     self.assertTrue(isinstance(parse_dict['arr'], dict))
     self.assertDictEqual(parse_dict['arr'], {1: 10})
 
+  def testParseValuesWithIndexAssigment1_IgnoreUnknown(self):
+    """Assignment to an index position."""
+    parse_dict = hparam.parse_values(
+        'arr[1]=10,b=5', {'arr': int}, ignore_unknown=True)
+    self.assertEqual(len(parse_dict), 1)
+    self.assertTrue(isinstance(parse_dict['arr'], dict))
+    self.assertDictEqual(parse_dict['arr'], {1: 10})
+
   def testParseValuesWithIndexAssigment2(self):
     """Assignment to multiple index positions."""
     parse_dict = hparam.parse_values('arr[0]=10,arr[5]=20', {'arr': int})
@@ -223,6 +231,14 @@ class HParamsTest(test.TestCase):
     self.assertTrue(isinstance(parse_dict['arr'], dict))
     self.assertDictEqual(parse_dict['arr'], {0: 10, 5: 20})
 
+  def testParseValuesWithIndexAssigment2_IgnoreUnknown(self):
+    """Assignment to multiple index positions."""
+    parse_dict = hparam.parse_values(
+        'arr[0]=10,arr[5]=20,foo=bar', {'arr': int}, ignore_unknown=True)
+    self.assertEqual(len(parse_dict), 1)
+    self.assertTrue(isinstance(parse_dict['arr'], dict))
+    self.assertDictEqual(parse_dict['arr'], {0: 10, 5: 20})
+
   def testParseValuesWithIndexAssigment3(self):
     """Assignment to index positions in multiple names."""
     parse_dict = hparam.parse_values('arr[0]=10,arr[1]=20,L[5]=100,L[10]=200',
@@ -234,6 +250,17 @@ class HParamsTest(test.TestCase):
     self.assertTrue(isinstance(parse_dict['L'], dict))
     self.assertDictEqual(parse_dict['L'], {5: 100, 10: 200})
 
+  def testParseValuesWithIndexAssigment3_IgnoreUnknown(self):
+    """Assignment to index positions in multiple names."""
+    parse_dict = hparam.parse_values(
+        'arr[0]=10,C=5,arr[1]=20,B[0]=kkk,L[5]=100,L[10]=200',
+        {'arr': int, 'L': int}, ignore_unknown=True)
+    self.assertEqual(len(parse_dict), 2)
+    self.assertTrue(isinstance(parse_dict['arr'], dict))
+    self.assertDictEqual(parse_dict['arr'], {0: 10, 1: 20})
+    self.assertTrue(isinstance(parse_dict['L'], dict))
+    self.assertDictEqual(parse_dict['L'], {5: 100, 10: 200})
+
   def testParseValuesWithIndexAssigment4(self):
     """Assignment of index positions and scalars."""
     parse_dict = hparam.parse_values('x=10,arr[1]=20,y=30',
@@ -246,6 +273,17 @@ class HParamsTest(test.TestCase):
     self.assertEqual(parse_dict['x'], 10)
     self.assertEqual(parse_dict['y'], 30)
 
+  def testParseValuesWithIndexAssigment4_IgnoreUnknown(self):
+    """Assignment of index positions and scalars."""
+    parse_dict = hparam.parse_values(
+        'x=10,foo[0]=bar,arr[1]=20,zzz=78,y=30',
+        {'x': int, 'y': int, 'arr': int}, ignore_unknown=True)
+    self.assertEqual(len(parse_dict), 3)
+    self.assertTrue(isinstance(parse_dict['arr'], dict))
+    self.assertDictEqual(parse_dict['arr'], {1: 20})
+    self.assertEqual(parse_dict['x'], 10)
+    self.assertEqual(parse_dict['y'], 30)
+
   def testParseValuesWithIndexAssigment5(self):
     """Different variable types."""
     parse_dict = hparam.parse_values('a[0]=5,b[1]=true,c[2]=abc,d[3]=3.14', {
@@ -264,24 +302,55 @@ class HParamsTest(test.TestCase):
     self.assertTrue(isinstance(parse_dict['d'], dict))
     self.assertDictEqual(parse_dict['d'], {3: 3.14})
 
+  def testParseValuesWithIndexAssigment5_IgnoreUnknown(self):
+    """Different variable types."""
+    parse_dict = hparam.parse_values(
+        'a[0]=5,cc=4,b[1]=true,c[2]=abc,mm=2,d[3]=3.14',
+        {'a': int, 'b': bool, 'c': str, 'd': float},
+        ignore_unknown=True)
+    self.assertEqual(set(parse_dict.keys()), {'a', 'b', 'c', 'd'})
+    self.assertTrue(isinstance(parse_dict['a'], dict))
+    self.assertDictEqual(parse_dict['a'], {0: 5})
+    self.assertTrue(isinstance(parse_dict['b'], dict))
+    self.assertDictEqual(parse_dict['b'], {1: True})
+    self.assertTrue(isinstance(parse_dict['c'], dict))
+    self.assertDictEqual(parse_dict['c'], {2: 'abc'})
+    self.assertTrue(isinstance(parse_dict['d'], dict))
+    self.assertDictEqual(parse_dict['d'], {3: 3.14})
+
   def testParseValuesWithBadIndexAssigment1(self):
     """Reject assignment of list to variable type."""
     with self.assertRaisesRegexp(ValueError,
                                  r'Assignment of a list to a list index.'):
       hparam.parse_values('arr[1]=[1,2,3]', {'arr': int})
 
+  def testParseValuesWithBadIndexAssigment1_IgnoreUnknown(self):
+    """Reject assignment of list to variable type."""
+    with self.assertRaisesRegexp(ValueError,
+                                 r'Assignment of a list to a list index.'):
+      hparam.parse_values(
+          'arr[1]=[1,2,3],c=8', {'arr': int}, ignore_unknown=True)
+
   def testParseValuesWithBadIndexAssigment2(self):
     """Reject if type missing."""
     with self.assertRaisesRegexp(ValueError,
                                  r'Unknown hyperparameter type for arr'):
       hparam.parse_values('arr[1]=5', {})
 
+  def testParseValuesWithBadIndexAssigment2_IgnoreUnknown(self):
+    """Ignore missing type."""
+    hparam.parse_values('arr[1]=5', {}, ignore_unknown=True)
+
   def testParseValuesWithBadIndexAssigment3(self):
     """Reject type of the form name[index]."""
     with self.assertRaisesRegexp(ValueError,
                                  'Unknown hyperparameter type for arr'):
       hparam.parse_values('arr[1]=1', {'arr[1]': int})
 
+  def testParseValuesWithBadIndexAssigment3_IgnoreUnknown(self):
+    """Ignore type of the form name[index]."""
+    hparam.parse_values('arr[1]=1', {'arr[1]': int}, ignore_unknown=True)
+
   def testWithReusedVariables(self):
     with self.assertRaisesRegexp(ValueError,
                                  'Multiple assignments to variable \'x\''):
-- 
GitLab


From 23810678737d40c00227252e5efffaaaa8fc94d6 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Tue, 11 Dec 2018 20:56:59 -0800
Subject: [PATCH 413/873] Replace Layout and Tile protos with C++ classes in
 XLA. No functional change. Rename the proto message Layout to LayoutProto,
 and Tile to TileProto. Define in-place replacement C++ classes named Layout
 and Tile with an interface which mirrors the protobuf generated code
 interface. Having these data structures as C++ classes enables greater
 flexibility in the interface, enables enforcement of invariants, and
 potential performance improvements.

PiperOrigin-RevId: 225121052
---
 tensorflow/compiler/xla/BUILD                 |  18 ++
 tensorflow/compiler/xla/client/client.cc      |   2 +-
 tensorflow/compiler/xla/layout.cc             |  96 +++++++++
 tensorflow/compiler/xla/layout.h              | 187 ++++++++++++++++++
 tensorflow/compiler/xla/layout_test.cc        | 104 ++++++++++
 tensorflow/compiler/xla/layout_util.cc        |  34 +---
 tensorflow/compiler/xla/layout_util.h         |   3 +-
 tensorflow/compiler/xla/layout_util_test.cc   |  11 --
 .../compiler/xla/packed_literal_reader.cc     |   3 +-
 .../xla/service/gpu/stream_executor_util.h    |   1 +
 tensorflow/compiler/xla/service/service.cc    |   8 +-
 tensorflow/compiler/xla/shape.cc              |   4 +-
 tensorflow/compiler/xla/shape.h               |  26 +--
 tensorflow/compiler/xla/shape_util.cc         |   8 +-
 tensorflow/compiler/xla/tests/copy_test.cc    |   4 +-
 tensorflow/compiler/xla/xla.proto             |   2 +-
 tensorflow/compiler/xla/xla_data.proto        |  13 +-
 17 files changed, 450 insertions(+), 74 deletions(-)
 create mode 100644 tensorflow/compiler/xla/layout.cc
 create mode 100644 tensorflow/compiler/xla/layout.h
 create mode 100644 tensorflow/compiler/xla/layout_test.cc

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 4360e08579..19f12569ff 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -224,6 +224,7 @@ cc_library(
     name = "shape_util",
     srcs = [
         "index_util.cc",
+        "layout.cc",
         "layout_util.cc",
         "primitive_util.cc",
         "shape.cc",
@@ -231,6 +232,7 @@ cc_library(
     ],
     hdrs = [
         "index_util.h",
+        "layout.h",
         "layout_util.h",
         "primitive_util.h",
         "shape.h",
@@ -301,6 +303,22 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "layout_test",
+    srcs = ["layout_test.cc"],
+    deps = [
+        ":shape_util",
+        ":status_macros",
+        ":test",
+        ":test_helpers",
+        ":types",
+        ":util",
+        ":xla_data_proto",
+        "//tensorflow/core:test_main",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 tf_cc_test(
     name = "index_util_test",
     srcs = ["index_util_test.cc"],
diff --git a/tensorflow/compiler/xla/client/client.cc b/tensorflow/compiler/xla/client/client.cc
index 74b76f9299..43127cae1e 100644
--- a/tensorflow/compiler/xla/client/client.cc
+++ b/tensorflow/compiler/xla/client/client.cc
@@ -186,7 +186,7 @@ StatusOr<Literal> Client::ComputeConstant(const XlaComputation& computation,
   ComputeConstantGraphRequest request;
   *request.mutable_computation() = computation.proto();
   if (output_layout != nullptr) {
-    *request.mutable_output_layout() = *output_layout;
+    *request.mutable_output_layout() = output_layout->ToProto();
   }
 
   ComputeConstantResponse response;
diff --git a/tensorflow/compiler/xla/layout.cc b/tensorflow/compiler/xla/layout.cc
new file mode 100644
index 0000000000..e3b5fcd527
--- /dev/null
+++ b/tensorflow/compiler/xla/layout.cc
@@ -0,0 +1,96 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/layout.h"
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/layout_util.h"
+
+namespace xla {
+
+TileProto Tile::ToProto() const {
+  TileProto tile_proto;
+  for (int64 i : dimensions()) {
+    tile_proto.add_dimensions(i);
+  }
+  return tile_proto;
+}
+
+string Tile::ToString() const {
+  return absl::StrCat("(", absl::StrJoin(dimensions(), ","), ")");
+}
+
+/* static */ Layout Layout::CreateFromProto(const LayoutProto& proto) {
+  Layout layout;
+  layout.set_format(proto.format());
+  layout.minor_to_major_.reserve(proto.minor_to_major_size());
+  for (const int64 dimension : proto.minor_to_major()) {
+    layout.add_minor_to_major(dimension);
+  }
+  layout.set_max_sparse_elements(proto.max_sparse_elements());
+  for (const TileProto& tile_proto : proto.tiles()) {
+    *layout.add_tiles() = Tile::CreateFromProto(tile_proto);
+  }
+  layout.set_element_size_in_bits(proto.element_size_in_bits());
+  return layout;
+}
+
+LayoutProto Layout::ToProto() const {
+  LayoutProto proto;
+  proto.set_format(format_);
+  proto.mutable_minor_to_major()->Reserve(minor_to_major_size());
+  for (const int64 dimension : minor_to_major()) {
+    proto.add_minor_to_major(dimension);
+  }
+  proto.set_max_sparse_elements(max_sparse_elements_);
+  for (const Tile& tile : tiles()) {
+    *proto.add_tiles() = tile.ToProto();
+  }
+  proto.set_element_size_in_bits(element_size_in_bits());
+  return proto;
+}
+
+string Layout::ToString() const {
+  // TODO(b/119839262): Emit tiles in string.
+  if (format() == SPARSE) {
+    return absl::StrCat("sparse{", max_sparse_elements(), "}");
+  } else if (format() == DENSE) {
+    return absl::StrCat("{", absl::StrJoin(minor_to_major(), ","), "}");
+  } else {
+    CHECK_EQ(format(), INVALID_FORMAT);
+    return "invalid{}";
+  }
+}
+
+bool Layout::operator==(const Layout& other) const {
+  return (other.format() == format() &&
+          other.minor_to_major() == minor_to_major() &&
+          other.element_size_in_bits() == element_size_in_bits() &&
+          other.max_sparse_elements() == max_sparse_elements() &&
+          other.tiles() == tiles());
+}
+
+std::ostream& operator<<(std::ostream& out, const Tile& tile) {
+  out << tile.ToString();
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const Layout& layout) {
+  out << layout.ToString();
+  return out;
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/layout.h b/tensorflow/compiler/xla/layout.h
new file mode 100644
index 0000000000..313368c39e
--- /dev/null
+++ b/tensorflow/compiler/xla/layout.h
@@ -0,0 +1,187 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_LAYOUT_H_
+#define TENSORFLOW_COMPILER_XLA_LAYOUT_H_
+
+#include <vector>
+
+#include "absl/types/span.h"
+
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+
+// Describes a tile used in tiling-based layout. Refer to
+// g3doc/third_party/tensorflow/compiler/xla/g3doc/layout_with_tiling.md for
+// details.
+class Tile {
+ public:
+  Tile() = default;
+  explicit Tile(absl::Span<const int64> dimensions)
+      : dimensions_(dimensions.begin(), dimensions.end()) {}
+
+  // De/Serialize a Tile to and from a TileProto.
+  static Tile CreateFromProto(const TileProto& tile_proto) {
+    return Tile(AsInt64Slice(tile_proto.dimensions()));
+  }
+  TileProto ToProto() const;
+
+  bool operator==(const Tile& other) const {
+    return dimensions() == other.dimensions();
+  }
+  bool operator!=(const Tile& other) const { return !(*this == other); }
+
+  string ToString() const;
+
+  // Returns the bound of the tile in the given dimension index.
+  int64 dimension(int i) const { return dimensions_.at(i); }
+
+  // Returns the dimensions of the tile.
+  const std::vector<int64>& dimensions() const { return dimensions_; }
+
+ private:
+  // The bounds of the tile.
+  std::vector<int64> dimensions_;
+};
+
+class Layout {
+ public:
+  Layout() = default;
+
+  // Constructs a dense layout with the given minor-to-major order.
+  explicit Layout(absl::Span<const int64> minor_to_major)
+      : format_(DENSE),
+        minor_to_major_(minor_to_major.begin(), minor_to_major.end()) {}
+
+  // Constructs a dense tiled layout with the given minor-to-major order and
+  // tiles.
+  Layout(absl::Span<const int64> minor_to_major, absl::Span<const Tile> tiles)
+      : format_(DENSE),
+        minor_to_major_(minor_to_major.begin(), minor_to_major.end()),
+        tiles_(tiles.begin(), tiles.end()) {}
+
+  // Construct a shape from a LayoutProto.
+  static Layout CreateFromProto(const LayoutProto& proto);
+
+  // Returns a LayoutProto representation of the Layout.
+  LayoutProto ToProto() const;
+
+  // Returns a human-readable string that represents this layout.
+  string ToString() const;
+
+  bool operator==(const Layout& other) const;
+  bool operator!=(const Layout& other) const { return !(*this == other); }
+
+  // The following methods mirror the protobuf generated code interface for the
+  // message LayoutProto. This enabled easy migration of this data structure
+  // from a proto to a proper C++ class.
+  //
+  // TODO(b/29771030): Replace or augment these methods with a more ergonomic
+  // interface.
+
+  // Methods for accessing the format.
+  Format format() const { return format_; }
+  Layout& set_format(Format value) {
+    format_ = value;
+    return *this;
+  }
+
+  // Methods for accessing the minor-to-major array.
+  int minor_to_major_size() const { return minor_to_major_.size(); }
+  int64 minor_to_major(int index) const { return minor_to_major_.at(index); }
+  Layout& set_minor_to_major(int index, int64 value) {
+    minor_to_major_.at(index) = value;
+    return *this;
+  }
+  Layout& add_minor_to_major(int64 value) {
+    minor_to_major_.push_back(value);
+    return *this;
+  }
+  Layout& clear_minor_to_major() {
+    minor_to_major_.clear();
+    return *this;
+  }
+  const std::vector<int64>& minor_to_major() const { return minor_to_major_; }
+  std::vector<int64>* mutable_minor_to_major() { return &minor_to_major_; }
+
+  // Methods for accessing the tile field.
+  int tiles_size() const { return tiles_.size(); }
+  const Tile& tiles(int index) const { return tiles_.at(index); }
+  Tile* mutable_tiles(int index) { return &tiles_.at(index); }
+  Tile* add_tiles() {
+    tiles_.push_back(Tile());
+    return &tiles_.back();
+  }
+  Layout& clear_tiles() {
+    tiles_.clear();
+    return *this;
+  }
+  const std::vector<Tile>& tiles() const { return tiles_; }
+  std::vector<Tile>* mutable_tiles() { return &tiles_; }
+
+  // Methods for accessing the int64 fields.
+  int64 max_sparse_elements() const { return max_sparse_elements_; }
+  Layout& set_max_sparse_elements(int64 value) {
+    max_sparse_elements_ = value;
+    return *this;
+  }
+  int64 element_size_in_bits() const { return element_size_in_bits_; }
+  Layout& set_element_size_in_bits(int64 value) {
+    element_size_in_bits_ = value;
+    return *this;
+  }
+
+  void Swap(Layout* other) {
+    using std::swap;
+    swap(*this, *other);
+  }
+
+  void Clear() {
+    format_ = INVALID_FORMAT;
+    minor_to_major_.clear();
+    max_sparse_elements_ = 0;
+    element_size_in_bits_ = 0;
+  }
+
+ public:
+  // The format of this layout.
+  Format format_ = INVALID_FORMAT;
+
+  // Sequence of dimension numbers, from minor (fastest varying index) to major
+  // (slowest varying index).
+  std::vector<int64> minor_to_major_;
+
+  // The maximum number of elements that can be stored for SPARSE formats.  This
+  // can be used to determine the maximum size in bytes of arrays stored in
+  // memory.  This field must be zero unless the format is SPARSE.
+  int64 max_sparse_elements_ = 0;
+
+  // The number of bits used to store an individual array element.
+  int64 element_size_in_bits_ = 0;
+
+  // The tiles used in tiling-based layout.
+  std::vector<Tile> tiles_;
+};
+
+std::ostream& operator<<(std::ostream& out, const Tile& Tile);
+std::ostream& operator<<(std::ostream& out, const Layout& layout);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_LAYOUT_H_
diff --git a/tensorflow/compiler/xla/layout_test.cc b/tensorflow/compiler/xla/layout_test.cc
new file mode 100644
index 0000000000..fb6abd3f65
--- /dev/null
+++ b/tensorflow/compiler/xla/layout_test.cc
@@ -0,0 +1,104 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/layout.h"
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/test_helpers.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace xla {
+namespace {
+
+class LayoutTest : public ::testing::Test {};
+
+TEST_F(LayoutTest, ToString) {
+  EXPECT_EQ(Layout().ToString(), "invalid{}");
+  EXPECT_EQ(Layout({4, 5, 6}).ToString(), "{4,5,6}");
+  EXPECT_EQ(Layout().set_format(SPARSE).set_max_sparse_elements(123).ToString(),
+            "sparse{123}");
+  EXPECT_EQ(Layout({4, 5, 6}).ToString(), "{4,5,6}");
+  EXPECT_EQ(Layout({3, 2, 1, 0}, {Tile({42, 123}), Tile({4, 5})}).ToString(),
+            "{3,2,1,0}");
+  EXPECT_EQ(
+      Layout({1, 0}, {Tile({2, 55})}).set_element_size_in_bits(42).ToString(),
+      "{1,0}");
+}
+
+TEST_F(LayoutTest, StreamOut) {
+  {
+    std::ostringstream oss;
+    oss << Tile({7, 8});
+    EXPECT_EQ(oss.str(), "(7,8)");
+  }
+
+  {
+    std::ostringstream oss;
+    oss << Layout({0, 1, 2});
+    EXPECT_EQ(oss.str(), "{0,1,2}");
+  }
+}
+
+TEST_F(LayoutTest, SparseLayoutMaxElements) {
+  EXPECT_EQ(LayoutUtil::MaxSparseElements(LayoutUtil::MakeSparseLayout(101)),
+            101);
+}
+
+TEST_F(LayoutTest, Equality) {
+  EXPECT_EQ(Layout(), Layout());
+  const std::vector<int64> empty_dims;
+  EXPECT_EQ(Layout(empty_dims), Layout(empty_dims));
+  EXPECT_NE(Layout(), Layout(empty_dims));
+  EXPECT_EQ(Layout({0, 1, 2, 3}), Layout({0, 1, 2, 3}));
+  EXPECT_NE(Layout({0, 1, 2, 3}), Layout({0, 1, 2}));
+  EXPECT_EQ(Layout({0, 1, 2}, {Tile({42, 44})}),
+            Layout({0, 1, 2}, {Tile({42, 44})}));
+  EXPECT_NE(Layout({0, 1, 2}, {Tile({42, 44})}),
+            Layout({0, 1, 2}, {Tile({42, 45})}));
+  EXPECT_NE(Layout({0, 1, 2}, {Tile({42, 44})}), Layout({0, 1, 2, 3}));
+  EXPECT_EQ(Layout({0, 1, 2}).set_element_size_in_bits(33),
+            Layout({0, 1, 2}).set_element_size_in_bits(33));
+  EXPECT_NE(Layout({0, 1, 2}).set_element_size_in_bits(33),
+            Layout({0, 1, 2}).set_element_size_in_bits(7));
+  EXPECT_EQ(Layout().set_format(SPARSE), Layout().set_format(SPARSE));
+  EXPECT_EQ(Layout().set_format(SPARSE).set_max_sparse_elements(42),
+            Layout().set_format(SPARSE).set_max_sparse_elements(42));
+  EXPECT_NE(Layout().set_format(SPARSE).set_max_sparse_elements(42),
+            Layout().set_format(SPARSE).set_max_sparse_elements(24));
+}
+
+TEST_F(LayoutTest, LayoutToFromProto) {
+  // Round-trips a Layout through proto de/serialization.
+  auto expect_unchanged = [](const Layout& layout) {
+    EXPECT_EQ(layout, Layout::CreateFromProto(layout.ToProto()));
+  };
+
+  expect_unchanged(Layout());
+  expect_unchanged(Layout({1, 3, 2, 0}));
+  expect_unchanged(Layout().set_format(SPARSE));
+  expect_unchanged(Layout().set_format(SPARSE).set_max_sparse_elements(123));
+  expect_unchanged(Layout({0, 1}).set_element_size_in_bits(42));
+  expect_unchanged(Layout({3, 2, 1, 0}, {Tile({42, 123}), Tile({4, 5})}));
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc
index dbb81381ac..ddccd8c798 100644
--- a/tensorflow/compiler/xla/layout_util.cc
+++ b/tensorflow/compiler/xla/layout_util.cc
@@ -41,15 +41,13 @@ namespace {
 
 // Internal helper for GetDefaultLayoutForShape and SetToDefaultLayout. Sets
 // minor_to_major to the value that represents the default layout.
-void SetDefaultLayoutToContainer(
-    tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>*
-        minor_to_major) {
+void SetDefaultLayoutToContainer(std::vector<int64>* minor_to_major) {
   // The default XLA layout is major-to-minor (dim 0 is major).
   // For more information on XLA layouts, see:
   // https://www.tensorflow.org/performance/xla/shapes
   const int64 size = minor_to_major->size();
   for (int64 i = 0; i < size; ++i) {
-    minor_to_major->Set(i, size - 1 - i);
+    (*minor_to_major)[i] = size - 1 - i;
   }
 }
 
@@ -94,9 +92,8 @@ namespace {
 Layout CreateDefaultLayoutForRank(int64 rank) {
   Layout layout;
   layout.set_format(DENSE);
-  tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>*
-      minor_to_major = layout.mutable_minor_to_major();
-  minor_to_major->Resize(rank, 0);
+  std::vector<int64>* minor_to_major = layout.mutable_minor_to_major();
+  minor_to_major->resize(rank, 0);
   SetDefaultLayoutToContainer(minor_to_major);
   return layout;
 }
@@ -139,9 +136,8 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     shape->clear_layout();
   } else if (ShapeUtil::IsArray(*shape)) {
     shape->mutable_layout()->set_format(DENSE);
-    tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>*
-        minor_to_major = shape->mutable_layout()->mutable_minor_to_major();
-    minor_to_major->Resize(shape->dimensions_size(), 0);
+    auto* minor_to_major = shape->mutable_layout()->mutable_minor_to_major();
+    minor_to_major->resize(shape->dimensions_size(), 0);
     SetDefaultLayoutToContainer(minor_to_major);
   } else {
     // Opaque, token types etc. have no layout.
@@ -210,9 +206,8 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
   }
 
   if (layout.format() == INVALID_FORMAT || !Format_IsValid(layout.format())) {
-    return InvalidArgument(
-        "Layout has an invalid format (%d) in layout {%s}, shape {%s}",
-        layout.format(), layout.ShortDebugString(), shape.ShortDebugString());
+    return InvalidArgument("Layout has an invalid format (%d)",
+                           layout.format());
   }
 
   if (layout.format() == DENSE) {
@@ -316,7 +311,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 }
 
 /* static */ bool LayoutUtil::Equal(const Layout& lhs, const Layout& rhs) {
-  return protobuf_util::ProtobufEquals(lhs, rhs);
+  return lhs == rhs;
 }
 
 /* static */ absl::Span<const int64> LayoutUtil::MinorToMajor(
@@ -358,11 +353,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
 }
 
 /* static */ string LayoutUtil::HumanString(const Layout& layout) {
-  if (IsSparse(layout)) {
-    return absl::StrCat("sparse{", layout.max_sparse_elements(), "}");
-  }
-  CHECK(IsDense(layout));
-  return absl::StrCat("{", absl::StrJoin(layout.minor_to_major(), ","), "}");
+  return layout.ToString();
 }
 
 namespace {
@@ -444,11 +435,6 @@ Status LayoutUtil::CopyLayoutBetweenShapes(const Shape& src, Shape* dst) {
   return true;
 }
 
-std::ostream& operator<<(std::ostream& out, const Layout& layout) {
-  out << LayoutUtil::HumanString(layout);
-  return out;
-}
-
 /*static*/ size_t LayoutUtil::Hash(const Layout& layout) {
   using tensorflow::hash;
   using tensorflow::Hash64Combine;
diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h
index 6c298e5725..609dba67bc 100644
--- a/tensorflow/compiler/xla/layout_util.h
+++ b/tensorflow/compiler/xla/layout_util.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include <string>
 
 #include "absl/types/span.h"
+#include "tensorflow/compiler/xla/layout.h"
 #include "tensorflow/compiler/xla/shape.h"
 #include "tensorflow/compiler/xla/status.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -195,8 +196,6 @@ class LayoutUtil {
   TF_DISALLOW_COPY_AND_ASSIGN(LayoutUtil);
 };
 
-std::ostream& operator<<(std::ostream& out, const Layout& layout);
-
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_LAYOUT_UTIL_H_
diff --git a/tensorflow/compiler/xla/layout_util_test.cc b/tensorflow/compiler/xla/layout_util_test.cc
index 12ce2d2d7c..4cc94c270c 100644
--- a/tensorflow/compiler/xla/layout_util_test.cc
+++ b/tensorflow/compiler/xla/layout_util_test.cc
@@ -317,17 +317,6 @@ TEST_F(LayoutUtilTest, DefaultLayoutGettersMajorToMinor) {
                             ShapeUtil::MakeShape(F32, {10, 20, 30, 15, 25}))));
 }
 
-TEST_F(LayoutUtilTest, SparseLayoutMaxElements) {
-  EXPECT_EQ(LayoutUtil::MaxSparseElements(LayoutUtil::MakeSparseLayout(101)),
-            101);
-}
-
-TEST_F(LayoutUtilTest, StreamOut) {
-  std::ostringstream oss;
-  oss << LayoutUtil::MakeLayout({0, 1, 2});
-  EXPECT_EQ(oss.str(), "{0,1,2}");
-}
-
 TEST_F(LayoutUtilTest, ValidateLayout_ValidArrayLayout) {
   Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {0, 1});
   auto status =
diff --git a/tensorflow/compiler/xla/packed_literal_reader.cc b/tensorflow/compiler/xla/packed_literal_reader.cc
index 0f86f9f35e..339660cf44 100644
--- a/tensorflow/compiler/xla/packed_literal_reader.cc
+++ b/tensorflow/compiler/xla/packed_literal_reader.cc
@@ -42,8 +42,7 @@ PackedLiteralReader::~PackedLiteralReader() { delete file_; }
 StatusOr<Literal> PackedLiteralReader::Read(const Shape& shape,
                                             const Layout* layout) {
   VLOG(3) << "reading shape from file: " << ShapeUtil::HumanString(shape)
-          << " layout: "
-          << (layout == nullptr ? "<none>" : layout->ShortDebugString());
+          << " layout: " << (layout == nullptr ? "<none>" : layout->ToString());
   Shape literal_shape = shape;
   if (layout != nullptr) {
     TF_RETURN_IF_ERROR(
diff --git a/tensorflow/compiler/xla/service/gpu/stream_executor_util.h b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h
index 1fc46bafa1..92e4d6dbbc 100644
--- a/tensorflow/compiler/xla/service/gpu/stream_executor_util.h
+++ b/tensorflow/compiler/xla/service/gpu/stream_executor_util.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_EXECUTOR_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_STREAM_EXECUTOR_UTIL_H_
 
+#include "tensorflow/compiler/xla/layout.h"
 #include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 5ec7fe2ade..ae5bd93e7c 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -1078,9 +1078,11 @@ Status Service::ComputeConstantGraph(const ComputeConstantGraphRequest* arg,
 
   ProgramShape program_shape(arg->computation().host_program_shape());
   TF_DCHECK_OK(ShapeUtil::ValidateShape(program_shape.result()));
+  absl::optional<Layout> output_layout;
   if (arg->has_output_layout()) {
+    output_layout = Layout::CreateFromProto(arg->output_layout());
     TF_RETURN_IF_ERROR(LayoutUtil::ValidateLayoutForShape(
-        arg->output_layout(), program_shape.result()));
+        *output_layout, program_shape.result()));
   }
 
   HloModuleConfig config(program_shape);
@@ -1096,8 +1098,8 @@ Status Service::ComputeConstantGraph(const ComputeConstantGraphRequest* arg,
   // relayout here.
   //
   // TODO(b/77824332): Make HloEvaluator take care of the re-layout.
-  if (arg->has_output_layout()) {
-    result_literal = result_literal.Relayout(arg->output_layout());
+  if (output_layout.has_value()) {
+    result_literal = result_literal.Relayout(*output_layout);
   }
   *result->mutable_literal() = result_literal.ToProto();
 
diff --git a/tensorflow/compiler/xla/shape.cc b/tensorflow/compiler/xla/shape.cc
index 746ab9e997..b206345db2 100644
--- a/tensorflow/compiler/xla/shape.cc
+++ b/tensorflow/compiler/xla/shape.cc
@@ -32,7 +32,7 @@ Shape::Shape(const ShapeProto& shape_proto) {
     *add_tuple_shapes() = Shape(element_shape);
   }
   if (shape_proto.has_layout()) {
-    *mutable_layout() = shape_proto.layout();
+    *mutable_layout() = Layout::CreateFromProto(shape_proto.layout());
   }
 }
 
@@ -48,7 +48,7 @@ ShapeProto Shape::ToProto() const {
     *proto.add_tuple_shapes() = shape.ToProto();
   }
   if (has_layout()) {
-    *proto.mutable_layout() = layout();
+    *proto.mutable_layout() = layout().ToProto();
   }
   return proto;
 }
diff --git a/tensorflow/compiler/xla/shape.h b/tensorflow/compiler/xla/shape.h
index 7f6b14ab42..7643f64d8a 100644
--- a/tensorflow/compiler/xla/shape.h
+++ b/tensorflow/compiler/xla/shape.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/layout.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/platform/types.h"
@@ -76,21 +77,10 @@ class Shape {
   std::vector<Shape>* mutable_tuple_shapes() { return &tuple_shapes_; }
 
   // Methods for accessing the layout field.
-  bool has_layout() const { return layout_.has_value(); }
-  const Layout& layout() const {
-    if (layout_.has_value()) {
-      return *layout_;
-    } else {
-      return Layout::default_instance();
-    }
-  }
-  Layout* mutable_layout() {
-    if (!layout_.has_value()) {
-      layout_ = Layout();
-    }
-    return &layout_.value();
-  }
-  void clear_layout() { layout_.reset(); }
+  bool has_layout() const { return layout_.format() != INVALID_FORMAT; }
+  const Layout& layout() const { return layout_; }
+  Layout* mutable_layout() { return &layout_; }
+  void clear_layout() { layout_.Clear(); }
 
   void Swap(Shape* other) {
     using std::swap;
@@ -101,7 +91,7 @@ class Shape {
     element_type_ = PRIMITIVE_TYPE_INVALID;
     dimensions_.clear();
     tuple_shapes_.clear();
-    layout_.reset();
+    clear_layout();
   }
 
   string SerializeAsString() const { return ToProto().SerializeAsString(); }
@@ -118,8 +108,8 @@ class Shape {
   // The tuple element subshapes. This is nonempty only for tuple shapes.
   std::vector<Shape> tuple_shapes_;
 
-  // The array layout of the shape. This is present only for array shapes.
-  absl::optional<Layout> layout_;
+  // The layout of the shape. Only relevant for arrays.
+  Layout layout_;
 };
 
 // Shape of the parameters and output of an XLA computation. This is analogous
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index a4d4e1e53e..eef2dc913d 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -164,9 +164,9 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
   TF_ASSIGN_OR_RETURN(Shape shape,
                       ShapeUtil::MakeValidatedShape(element_type, dimensions));
   auto min2maj = shape.mutable_layout()->mutable_minor_to_major();
-  min2maj->Clear();
+  min2maj->clear();
   for (int64 value : minor_to_major) {
-    min2maj->Add(value);
+    min2maj->push_back(value);
   }
   if (!shape.has_layout()) {
     return InvalidArgument("Shape has no layout.");
@@ -1618,10 +1618,10 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape,
   if (LayoutUtil::HasLayout(shape)) {
     Layout* layout = shape.mutable_layout();
     layout->set_format(DENSE);
-    for (size_t i = 0; i < layout->minor_to_major().size();) {
+    for (int64 i = 0; i < layout->minor_to_major().size();) {
       if (layout->minor_to_major(i) == dim_to_delete) {
         layout->mutable_minor_to_major()->erase(
-            layout->minor_to_major().begin() + i);
+            layout->mutable_minor_to_major()->begin() + i);
         continue;
       }
       if (layout->minor_to_major(i) > dim_to_delete) {
diff --git a/tensorflow/compiler/xla/tests/copy_test.cc b/tensorflow/compiler/xla/tests/copy_test.cc
index 3622f2c1e8..df005a6709 100644
--- a/tensorflow/compiler/xla/tests/copy_test.cc
+++ b/tensorflow/compiler/xla/tests/copy_test.cc
@@ -133,7 +133,9 @@ XLA_TEST_F(CopyOpTest, CopyConstantR2DifferentLayouts) {
   // Reverse the minor-to-major order of the literal.
   Layout* literal_layout = literal.mutable_shape_do_not_use()->mutable_layout();
   ASSERT_EQ(2, literal_layout->minor_to_major_size());
-  literal_layout->mutable_minor_to_major()->SwapElements(0, 1);
+  // Swap the first and second elements.
+  *literal_layout->mutable_minor_to_major() = {
+      literal_layout->minor_to_major(1), literal_layout->minor_to_major(0)};
 
   HloInstruction* constant = builder.AddInstruction(
       HloInstruction::CreateConstant(std::move(literal)));
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 32b51c104c..238312e36b 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -399,7 +399,7 @@ message WaitForExecutionResponse {
 
 message ComputeConstantGraphRequest {
   HloModuleProto computation = 1;
-  Layout output_layout = 2;
+  LayoutProto output_layout = 2;
 }
 
 message ComputeConstantResponse {
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index 85ec83437a..e9c86abe50 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -100,6 +100,8 @@ message PaddingConfig {
 
 // A format specifies the method used by a layout to store an array in memory.
 enum Format {
+  // TODO(b/120869032): Rename this to FORMAT_NONE or something else which
+  // better corresponds to its meaning.
   INVALID_FORMAT = 0;
   // The default layout, with exactly one storage location per element.
   DENSE = 1;
@@ -109,8 +111,9 @@ enum Format {
 }
 
 // Describes a tile used in tiling-based layout. Refer to
-// g3doc/layout_with_tiling.md for details about tiling-based layout.
-message Tile {
+// g3doc/third_party/tensorflow/compiler/xla/g3doc/layout_with_tiling.md for
+// details about tiling-based layout.
+message TileProto {
   // Number of elements in each dimension of the tile. It's ordered from the
   // most major dimension of the tile to the most minor dimension of the tile.
   // The dimensions correspond to a suffix of the dimensions of the shape being
@@ -128,7 +131,7 @@ message Tile {
 // See the XLA documentation for more information on shapes and layouts.
 //
 // LINT.IfChange
-message Layout {
+message LayoutProto {
   // The method used to store the data in memory. The format determines which of
   // the other fields are used by the layout.
   Format format = 4;
@@ -153,7 +156,7 @@ message Layout {
   //
   // TODO(b/119839262): implement tiling in each backend or add Unimplemented
   // error.
-  repeated Tile tiles = 6;
+  repeated TileProto tiles = 6;
 
   // Bit size of each element. If the size is bigger than what the element
   // type requires, the value is stored in the least significant
@@ -196,7 +199,7 @@ message ShapeProto {
   repeated ShapeProto tuple_shapes = 4;
 
   // The layout used to back this shape.
-  Layout layout = 5;
+  LayoutProto layout = 5;
 
   // Important: if any field is added, be sure to modify ShapeUtil::Equal(),
   // ShapeUtil::Compatible() and ShapeUtil::Hash() appropriately to account for
-- 
GitLab


From 92f67536b78895c47065fb9b35f775ee4326f9e1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 11 Dec 2018 22:00:58 -0800
Subject: [PATCH 414/873] Pack supports input dimensions >= 4.

PiperOrigin-RevId: 225125955
---
 tensorflow/lite/kernels/pack.cc      |  1 -
 tensorflow/lite/kernels/pack_test.cc | 13 +++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/kernels/pack.cc b/tensorflow/lite/kernels/pack.cc
index 479495c875..d15a5a08af 100644
--- a/tensorflow/lite/kernels/pack.cc
+++ b/tensorflow/lite/kernels/pack.cc
@@ -35,7 +35,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   const TfLiteTensor* input0 = GetInput(context, node, 0);
-  TF_LITE_ENSURE(context, NumDimensions(input0) < 4);
   TF_LITE_ENSURE(context, NumDimensions(input0) >= data->axis);
   // TODO(renjieliu): Support negative axis.
   TF_LITE_ENSURE(context, data->axis >= 0);
diff --git a/tensorflow/lite/kernels/pack_test.cc b/tensorflow/lite/kernels/pack_test.cc
index 4f58debc5c..530cc2e50f 100644
--- a/tensorflow/lite/kernels/pack_test.cc
+++ b/tensorflow/lite/kernels/pack_test.cc
@@ -82,6 +82,19 @@ TEST(PackOpTest, FloatMultilDimensions) {
               ElementsAreArray({1, 2, 3, 7, 8, 9, 4, 5, 6, 10, 11, 12}));
 }
 
+TEST(PackOpTest, FloatFiveDimensions) {
+  PackOpModel<float> model({TensorType_FLOAT32, {2, 2, 2, 2}}, 1, 2);
+  model.SetInput(0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
+  model.SetInput(
+      1, {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32});
+  model.Invoke();
+  EXPECT_THAT(model.GetOutputShape(), ElementsAre(2, 2, 2, 2, 2));
+  EXPECT_THAT(model.GetOutput(),
+              ElementsAreArray({1,  2,  3,  4,  5,  6,  7,  8,  17, 18, 19,
+                                20, 21, 22, 23, 24, 9,  10, 11, 12, 13, 14,
+                                15, 16, 25, 26, 27, 28, 29, 30, 31, 32}));
+}
+
 // int32 tests.
 TEST(PackOpTest, Int32ThreeInputs) {
   PackOpModel<int32_t> model({TensorType_INT32, {2}}, 0, 3);
-- 
GitLab


From 413551b9537565b5c918085951c8c0bde315f46c Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 11 Dec 2018 22:22:07 -0800
Subject: [PATCH 415/873] [XLA:CPU] Make instruction order compulsory in
 IrEmitter::EmitComputation; NFC

PiperOrigin-RevId: 225127595
---
 .../compiler/xla/service/cpu/cpu_compiler.cc    | 17 ++++++++---------
 .../compiler/xla/service/cpu/ir_emitter.cc      | 11 +++--------
 .../compiler/xla/service/cpu/ir_emitter.h       |  2 +-
 .../compiler/xla/service/hlo_computation.cc     |  6 +++---
 .../compiler/xla/service/hlo_computation.h      |  2 +-
 5 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index 6374822c81..f3dfa4d642 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -635,18 +635,17 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
             .EmitComputation(
                 embedded_computation, embedded_computation->name(),
                 /*is_top_level_computation=*/false,
-                &schedule.sequence(embedded_computation).instructions())
+                schedule.sequence(embedded_computation).instructions())
             .status());
   }
   string function_name_prefix = entry_computation->name().empty()
                                     ? "__compute"
                                     : entry_computation->name();
-  TF_ASSIGN_OR_RETURN(
-      llvm::Function * entry_function,
-      ir_emitter.EmitComputation(
-          entry_computation, function_name_prefix,
-          /*is_top_level_computation=*/true,
-          &schedule.sequence(entry_computation).instructions()));
+  TF_ASSIGN_OR_RETURN(llvm::Function * entry_function,
+                      ir_emitter.EmitComputation(
+                          entry_computation, function_name_prefix,
+                          /*is_top_level_computation=*/true,
+                          schedule.sequence(entry_computation).instructions()));
 
   string function_name = [&]() {
     llvm::SmallVector<char, 40> function_name_vector;
@@ -835,7 +834,7 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
               .EmitComputation(
                   embedded_computation, embedded_computation->name(),
                   /*is_top_level_computation=*/false,
-                  &schedule.sequence(embedded_computation).instructions())
+                  schedule.sequence(embedded_computation).instructions())
               .status());
     }
     const string& entry_point_name = options.entry_point_name();
@@ -843,7 +842,7 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
                         ir_emitter.EmitComputation(
                             computation, entry_point_name,
                             /*is_top_level_computation=*/true,
-                            &schedule.sequence(computation).instructions()));
+                            schedule.sequence(computation).instructions()));
 
     CHECK(entry_function->getName() == llvm_ir::AsStringRef(entry_point_name));
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 38ab5b78d2..62a4e8d350 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -111,10 +111,9 @@ IrEmitter::IrEmitter(
 StatusOr<llvm::Function*> IrEmitter::EmitComputation(
     HloComputation* computation, const string& function_name_prefix,
     bool is_top_level_computation,
-    const std::vector<HloInstruction*>* instruction_order) {
+    absl::Span<HloInstruction* const> instruction_order) {
   string function_name = name_uniquer_.GetUniqueName(function_name_prefix);
-  VLOG(2) << "Emitting IR for CPU function [" << function_name_prefix
-          << "]; ordered? " << (instruction_order != nullptr);
+  VLOG(2) << "Emitting IR for CPU function [" << function_name_prefix << "]";
   is_top_level_computation_ = is_top_level_computation;
   num_dynamic_loop_bounds_ = 0;
   if (!computation->root_instruction()->outer_dimension_partitions().empty()) {
@@ -141,11 +140,7 @@ StatusOr<llvm::Function*> IrEmitter::EmitComputation(
   bool use_rdtscp = arch_type_ == llvm::Triple::ArchType::x86 ||
                     arch_type_ == llvm::Triple::ArchType::x86_64;
   profiling_state_ = ProfilingState(use_rdtscp);
-  if (instruction_order == nullptr) {
-    TF_RETURN_IF_ERROR(computation->Accept(this));
-  } else {
-    TF_RETURN_IF_ERROR(computation->AcceptOrdered(this, *instruction_order));
-  }
+  TF_RETURN_IF_ERROR(computation->AcceptOrdered(this, instruction_order));
   llvm::Function* ir_function = compute_function_->function();
   InsertOrDie(&emitted_functions_, computation, ir_function);
   // Delete 'compute_function', finalizing 'ir_function' and restoring caller
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 559a8162a2..1db75cc8be 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -101,7 +101,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   StatusOr<llvm::Function*> EmitComputation(
       HloComputation* computation, const string& function_name_prefix,
       bool is_top_level_computation,
-      const std::vector<HloInstruction*>* instruction_order);
+      absl::Span<HloInstruction* const> instruction_order);
 
   llvm::IRBuilder<>* b() { return &b_; }
 
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index ff122b529b..80f7247048 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -797,7 +797,7 @@ Status HloComputation::AcceptWithOperandOrder(
 template <typename HloInstructionPtr>
 Status HloComputation::AcceptOrdered(
     DfsHloVisitorBase<HloInstructionPtr>* visitor,
-    const std::vector<HloInstruction*>& order) const {
+    absl::Span<HloInstruction* const> order) const {
   VLOG(3) << "Accepting visitor with order.";
   for (HloInstruction* root : CollectUnreachableRoots()) {
     TF_RET_CHECK(std::find(order.begin(), order.end(), root) != order.end())
@@ -827,9 +827,9 @@ Status HloComputation::AcceptOrdered(
 
 // Explicit instantiations.
 template Status HloComputation::AcceptOrdered(
-    DfsHloVisitor*, const std::vector<HloInstruction*>&) const;
+    DfsHloVisitor*, absl::Span<HloInstruction* const>) const;
 template Status HloComputation::AcceptOrdered(
-    ConstDfsHloVisitor*, const std::vector<HloInstruction*>&) const;
+    ConstDfsHloVisitor*, absl::Span<HloInstruction* const>) const;
 
 Status HloComputation::Accept(
     const std::function<Status(HloInstruction*)>& visitor_func) {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index c584e4c7ca..da8a5320bb 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -307,7 +307,7 @@ class HloComputation {
   // be a topological sort of all instructions in the computation.
   template <typename HloInstructionPtr>
   Status AcceptOrdered(DfsHloVisitorBase<HloInstructionPtr>* visitor,
-                       const std::vector<HloInstruction*>& order) const;
+                       absl::Span<HloInstruction* const> order) const;
 
   // Same as Accept() above, but the visitor is given as a function.
   Status Accept(const std::function<Status(HloInstruction*)>& visitor_func);
-- 
GitLab


From b4c28561416e97f8029ad0c009cbe15e4fb75563 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Tue, 11 Dec 2018 23:14:27 -0800
Subject: [PATCH 416/873] Make AddWhileInputHack handle control inputs
 correctly.

PiperOrigin-RevId: 225131361
---
 tensorflow/core/graph/graph.cc          | 8 +++++++-
 tensorflow/python/framework/ops_test.py | 3 +++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 550e3ef915..223fc85f9f 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -555,7 +555,13 @@ Status Graph::AddWhileInputHack(Node* new_src, int new_src_index, Node* dst) {
         dst->DebugString());
   }
   TF_RETURN_IF_ERROR(IsValidOutputTensor(new_src, new_src_index));
-  int dst_index = dst->in_edges().size();
+  // Find the current number of data inputs. We'll add the new edge to the next
+  // missing data input.
+  int dst_index = 0;
+  for (const Edge* edge : dst->in_edges()) {
+    if (edge->IsControlEdge()) continue;
+    ++dst_index;
+  }
   TF_RETURN_IF_ERROR(IsValidInputTensor(dst, dst_index));
   AddEdge(new_src, new_src_index, dst, dst_index);
   dst->MaybeCopyOnWrite();
diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py
index 0fcbcd6ee4..2d7ee1a99e 100644
--- a/tensorflow/python/framework/ops_test.py
+++ b/tensorflow/python/framework/ops_test.py
@@ -615,6 +615,9 @@ class OperationTest(test_util.TensorFlowTestCase):
       self.assertEqual(while_op.type, "While")
       orig_num_inputs = len(while_op.inputs)
 
+      # Make sure we can handle the while op having a control input.
+      while_op._add_control_input(constant_op.constant(0).op)
+
       new_input1 = constant_op.constant(1.0)
       new_input2 = constant_op.constant(True)
 
-- 
GitLab


From 9585202ed095ec63c1a6f947a0197fce852e9036 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 00:01:07 -0800
Subject: [PATCH 417/873] Remove
 :android_tensorflow_lib_selective_registration* aliases, targets using
 selective registration can now use the :android_tensorflow_lib_lite* targets.
 PiperOrigin-RevId: 225134497

---
 tensorflow/core/BUILD | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index d92f0ba655..276005038c 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -49,7 +49,7 @@
 # filegroup ":android_proto_srcs" - Protos
 # filegroup ":android_srcs" - Core sources
 # cc_library ":android_tensorflow_lib" - Native library
-# cc_library ":android_tensorflow_lib_selective_registration" - Native library
+# cc_library ":android_tensorflow_lib_lite" - Native library, without ops,
 #   supporting SELECTIVE_REGISTRATION feature.
 # portable_proto_library ":android_proto_lib" (Google-internal)
 #
@@ -1832,27 +1832,6 @@ cc_library(
     alwayslink = 1,
 )
 
-# Android library for use with the SELECTIVE_REGISTRATION feature.
-# Does not contain operators. In contrast to android_tensorflow_lib_lite,
-# this links in framework support for all types, relying on selective
-# registration of ops to prune code size.
-#
-# TODO(gonnet): Move all users of these aliases to the corresponding
-#     :android_tensorflow_lib_lite* targets and remove.
-alias(
-    name = "android_tensorflow_lib_selective_registration",
-    actual = ":android_tensorflow_lib_lite",
-    visibility = ["//visibility:public"],
-)
-
-# Android library for use with the SELECTIVE_REGISTRATION feature with
-# no proto_rtti.
-alias(
-    name = "android_tensorflow_lib_selective_registration_nortti",
-    actual = ":android_tensorflow_lib_lite_nortti",
-    visibility = ["//visibility:public"],
-)
-
 filegroup(
     name = "android_op_registrations_and_gradients",
     srcs = glob(
-- 
GitLab


From d856a3ca443599feb7d577ba17943a76ffa65c1e Mon Sep 17 00:00:00 2001
From: avijit-nervana <avijit.chakraborty@intel.com>
Date: Wed, 12 Dec 2018 00:33:46 -0800
Subject: [PATCH 418/873] Upgraded to v0.9.0 and fixed the broken MacOS build

---
 tensorflow/tensorflow.bzl          |  3 +--
 tensorflow/workspace.bzl           | 32 +++++++++++++++---------------
 third_party/ngraph/ngraph.BUILD    | 18 +++++++++++++----
 third_party/ngraph/ngraph_tf.BUILD |  2 ++
 4 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index ed1de5a31c..d93e0df5e4 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -1666,8 +1666,7 @@ def tf_py_wrap_cc(
     )
     extra_linkopts = select({
         "@local_config_cuda//cuda:darwin": [
-            "-Wl,-exported_symbols_list",
-            "$(location %s.lds)" % vscriptname,
+            "-Wl,-exported_symbols_list,$(location %s.lds)" % vscriptname,
         ],
         clean_dep("//tensorflow:windows"): [],
         "//conditions:default": [
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index f8b6bd1a3f..9ed668e1c5 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -828,44 +828,44 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "tbb",
         build_file = clean_dep("//third_party/ngraph:tbb.BUILD"),
-        sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13",
-        strip_prefix = "tbb-tbb_2018",
+        sha256 = "c3245012296f09f1418b78a8c2f17df5188b3bd0db620f7fd5fabe363320805a",
+        strip_prefix = "tbb-2019_U1",
         urls = [
-            "https://mirror.bazel.build/github.com/01org/tbb/archive/tbb_2018.zip",
-            "https://github.com/01org/tbb/archive/tbb_2018.zip",
+            "https://mirror.bazel.build/github.com/01org/tbb/archive/2019_U1.zip",
+            "https://github.com/01org/tbb/archive/2019_U1.zip",
         ],
     )
 
     tf_http_archive(
         name = "ngraph",
         build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"),
-        sha256 = "2b28f9c9f063b96825a96d56d7f7978c9a1c55c9b25175c20dd49a8a77cb0305",
-        strip_prefix = "ngraph-0.9.1",
+        sha256 = "a1780f24a1381fc25e323b4b2d08b6ef5129f42e011305b2a34dcf43a48030d5",
+        strip_prefix = "ngraph-0.11.0",
         urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.9.1.tar.gz",
-            "https://github.com/NervanaSystems/ngraph/archive/v0.9.1.tar.gz",
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.11.0.tar.gz",
+            "https://github.com/NervanaSystems/ngraph/archive/v0.11.0.tar.gz"
         ],
     )
 
     tf_http_archive(
         name = "nlohmann_json_lib",
         build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"),
-        sha256 = "9f3549824af3ca7e9707a2503959886362801fb4926b869789d6929098a79e47",
-        strip_prefix = "json-3.1.1",
+        sha256 = "c377963a95989270c943d522bfefe7b889ef5ed0e1e15d535fd6f6f16ed70732",
+        strip_prefix = "json-3.4.0",
         urls = [
-            "https://mirror.bazel.build/github.com/nlohmann/json/archive/v3.1.1.tar.gz",
-            "https://github.com/nlohmann/json/archive/v3.1.1.tar.gz",
+            "https://mirror.bazel.build/github.com/nlohmann/json/archive/v3.4.0.tar.gz",
+            "https://github.com/nlohmann/json/archive/v3.4.0.tar.gz",
         ],
     )
 
     tf_http_archive(
         name = "ngraph_tf",
         build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"),
-        sha256 = "89accbc702e68a09775f1011a99dd16561038fd1ce59d566d64450176abaae5c",
-        strip_prefix = "ngraph-tf-0.7.0",
+        sha256 = "742a642d2c6622277df4c902b6830d616d0539cc8cd843d6cdb899bb99e66e36",
+        strip_prefix = "ngraph-tf-0.9.0",
         urls = [
-            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.7.0.tar.gz",
-            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.7.0.tar.gz",
+            "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.9.0.zip",
+            "https://github.com/NervanaSystems/ngraph-tf/archive/v0.9.0.zip"
         ],
     )
 
diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD
index 63e9548c53..bb1f65d347 100644
--- a/third_party/ngraph/ngraph.BUILD
+++ b/third_party/ngraph/ngraph.BUILD
@@ -56,14 +56,16 @@ cc_library(
         "src/ngraph/runtime/cpu/cpu_backend.cpp",
         "src/ngraph/runtime/cpu/cpu_builder.cpp",
         "src/ngraph/runtime/cpu/cpu_call_frame.cpp",
+        "src/ngraph/runtime/cpu/cpu_cse.cpp",
+        "src/ngraph/runtime/cpu/cpu_executor.cpp",
         "src/ngraph/runtime/cpu/cpu_external_function.cpp",
         "src/ngraph/runtime/cpu/cpu_kernels.cpp",
         "src/ngraph/runtime/cpu/cpu_layout_descriptor.cpp",
+        "src/ngraph/runtime/cpu/cpu_op_annotations.cpp",
         "src/ngraph/runtime/cpu/cpu_tensor_view.cpp",
         "src/ngraph/runtime/cpu/cpu_tensor_view_wrapper.cpp",
         "src/ngraph/runtime/cpu/cpu_tracing.cpp",
         "src/ngraph/runtime/cpu/cpu_visualize_tree.cpp",
-        "src/ngraph/runtime/cpu/kernel/eigen_thread_pool.cpp",
         "src/ngraph/runtime/cpu/kernel/pad.cpp",
         "src/ngraph/runtime/cpu/kernel/reduce_max.cpp",
         "src/ngraph/runtime/cpu/kernel/reduce_sum.cpp",
@@ -79,21 +81,27 @@ cc_library(
         "src/ngraph/runtime/cpu/op/conv_relu.cpp",
         "src/ngraph/runtime/cpu/op/convert_layout.cpp",
         "src/ngraph/runtime/cpu/op/group_conv.cpp",
+        "src/ngraph/runtime/cpu/op/group_conv_bias.cpp",
+        "src/ngraph/runtime/cpu/op/halide_op.cpp",
+        "src/ngraph/runtime/cpu/op/leaky_relu.cpp",
         "src/ngraph/runtime/cpu/op/loop_kernel.cpp",
         "src/ngraph/runtime/cpu/op/lstm.cpp",
         "src/ngraph/runtime/cpu/op/matmul_bias.cpp",
         "src/ngraph/runtime/cpu/op/max_pool_with_indices.cpp",
         "src/ngraph/runtime/cpu/op/rnn.cpp",
         "src/ngraph/runtime/cpu/op/sigmoid_mul.cpp",
+        "src/ngraph/runtime/cpu/op/update_slice.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_assignment.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_collapse_dims.cpp",
-        "src/ngraph/runtime/cpu/pass/cpu_concat_inputs.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_horizontal_fusion.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_layout.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_loop_kernel_fusion.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_mat_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_memory_optimization.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_post_layout_optimizations.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp",
+        "src/ngraph/runtime/cpu/pass/cpu_reshape_sinking.cpp",
         "src/ngraph/runtime/cpu/pass/cpu_workspace_insertion.cpp",
     ],
     hdrs = glob(["src/ngraph/runtime/cpu/**/*.hpp"]) + glob([]),
@@ -101,7 +109,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.9.1\\"',
+        '-D NGRAPH_VERSION=\\"0.11.0\\"',
         "-D NGRAPH_DEX_ONLY",
         '-D PROJECT_ROOT_DIR=\\"\\"',
     ],
@@ -124,11 +132,13 @@ cc_library(
         "src/ngraph/builder/*.cpp",
         "src/ngraph/descriptor/*.cpp",
         "src/ngraph/descriptor/layout/*.cpp",
+        "src/ngraph/op/experimental/generate_mask.cpp",
         "src/ngraph/op/experimental/quantized_avg_pool.cpp",
         "src/ngraph/op/experimental/quantized_conv_bias.cpp",
         "src/ngraph/op/experimental/quantized_conv_relu.cpp",
         "src/ngraph/op/experimental/quantized_conv.cpp",
         "src/ngraph/op/experimental/quantized_max_pool.cpp",
+        "src/ngraph/op/experimental/shape_of.cpp",
         "src/ngraph/op/*.cpp",
         "src/ngraph/op/util/*.cpp",
         "src/ngraph/pattern/*.cpp",
@@ -142,7 +152,7 @@ cc_library(
         "-I external/ngraph/src",
         "-I external/nlohmann_json_lib/include/",
         '-D SHARED_LIB_EXT=\\".so\\"',
-        '-D NGRAPH_VERSION=\\"0.9.1\\"',
+        '-D NGRAPH_VERSION=\\"0.11.0\\"',
         '-D PROJECT_ROOT_DIR=\\"\\"',
     ],
     visibility = ["//visibility:public"],
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index db9a66f9b5..d4619395f8 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -18,6 +18,8 @@ cc_library(
         "src/ngraph_api.h",
         "src/ngraph_assign_clusters.cc",
         "src/ngraph_assign_clusters.h",
+        "src/ngraph_backend_manager.h",
+        "src/ngraph_backend_manager.cc",
         "src/ngraph_builder.cc",
         "src/ngraph_builder.h",
         "src/ngraph_capture_variables.cc",
-- 
GitLab


From a0d9780dfbc3ab7a18e58affc5aaafc5c19f419e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 00:34:52 -0800
Subject: [PATCH 419/873] Use proper iso8601 time format in log.

Format in the previous state didn't give the timezone.

PiperOrigin-RevId: 225138116
---
 tensorflow/python/training/evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/training/evaluation.py b/tensorflow/python/training/evaluation.py
index a10178f8cf..37d46795b1 100644
--- a/tensorflow/python/training/evaluation.py
+++ b/tensorflow/python/training/evaluation.py
@@ -253,7 +253,7 @@ def _evaluate_once(checkpoint_path,
       if isinstance(h, (_StopAfterNEvalsHook, _MultiStepStopAfterNEvalsHook)):
         h._set_evals_completed_tensor(eval_step_value)  # pylint: disable=protected-access
 
-  logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
+  logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%dT%H:%M:%SZ',
                                                          time.gmtime()))
 
   # Prepare the session creator.
-- 
GitLab


From 25337d2065bd3ef79b9018714c0cb5af46ca06dc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 01:04:38 -0800
Subject: [PATCH 420/873] compat: Update forward compatibility horizon to
 2018-12-12

PiperOrigin-RevId: 225140840
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 679dcf9696..57a4c8be7d 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 11)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 12)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 1068d773964b06b0a086714aad9bc2760d649c24 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 07:05:09 -0800
Subject: [PATCH 421/873] Docstring fixes

PiperOrigin-RevId: 225178266
---
 tensorflow/python/ops/ragged/ragged_factory_ops.py    | 6 +++---
 tensorflow/python/ops/ragged/ragged_functional_ops.py | 6 +++---
 tensorflow/python/ops/ragged/ragged_getitem.py        | 2 +-
 tensorflow/python/ops/ragged/ragged_tensor.py         | 2 +-
 tensorflow/python/ops/ragged/ragged_tensor_value.py   | 5 ++++-
 tensorflow/python/ops/ragged/segment_id_ops.py        | 4 ++--
 6 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/ops/ragged/ragged_factory_ops.py b/tensorflow/python/ops/ragged/ragged_factory_ops.py
index 695accc652..8cda98765b 100644
--- a/tensorflow/python/ops/ragged/ragged_factory_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_factory_ops.py
@@ -80,9 +80,9 @@ def constant(pylist, dtype=None, ragged_rank=None, inner_shape=None, name=None):
 def constant_value(pylist, dtype=None, ragged_rank=None, inner_shape=None):
   """Constructs a RaggedTensorValue from a nested Python list.
 
-  > Warning: This function returns a `RaggedTensorValue`, not a `RaggedTensor`.
-  > If you wish to construct a constant `RaggedTensor`, use
-  > [`ragged.constant(...)`](constant.md) instead.
+  Warning: This function returns a `RaggedTensorValue`, not a `RaggedTensor`.
+  If you wish to construct a constant `RaggedTensor`, use
+  [`ragged.constant(...)`](constant.md) instead.
 
   Example:
 
diff --git a/tensorflow/python/ops/ragged/ragged_functional_ops.py b/tensorflow/python/ops/ragged/ragged_functional_ops.py
index 7344c96465..b6937a1c37 100644
--- a/tensorflow/python/ops/ragged/ragged_functional_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_functional_ops.py
@@ -26,15 +26,15 @@ from tensorflow.python.util.tf_export import tf_export
 
 @tf_export("ragged.map_flat_values")
 def map_flat_values(op, *args, **kwargs):
-  """Applies `op` to the inner values of one or more RaggedTensors.
+  """Applies `op` to the values of one or more RaggedTensors.
 
   Replaces any `RaggedTensor` in `args` or `kwargs` with its `flat_values`
   tensor, and then calls `op`.  Returns a `RaggedTensor` that is constructed
-  from the input `RaggedTensor`s' `splits` and the value returned by
+  from the input `RaggedTensor`s' `nested_row_splits` and the value returned by
   the `op`.
 
   If the input arguments contain multiple `RaggedTensor`s, then they must have
-  identical `splits`.
+  identical `nested_row_splits`.
 
   Examples:
 
diff --git a/tensorflow/python/ops/ragged/ragged_getitem.py b/tensorflow/python/ops/ragged/ragged_getitem.py
index 0fa72a3658..001a400596 100644
--- a/tensorflow/python/ops/ragged/ragged_getitem.py
+++ b/tensorflow/python/ops/ragged/ragged_getitem.py
@@ -38,7 +38,7 @@ def ragged_tensor_getitem(self, key):
   IndexError; (2) use a default value; or (3) skip that value and return a
   tensor with fewer rows than we started with.  Following the guiding
   principles of Python ("In the face of ambiguity, refuse the temptation to
-  guess" <go/pep20>), we simply disallow this operation.
+  guess"), we simply disallow this operation.
 
   Any dimensions added by `array_ops.newaxis` will be ragged if the following
   dimension is ragged.
diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py
index acf3a3841d..fd334e6cc7 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor.py
@@ -46,7 +46,7 @@ _eval_using_default_session = ops._eval_using_default_session
 
 @tf_export("RaggedTensor")
 class RaggedTensor(object):
-  """Represents a ragged tensor (go/ragged).
+  """Represents a ragged tensor.
 
   A `RaggedTensor` is a tensor with one or more *ragged dimensions*, which are
   dimensions whose slices may have different lengths.  For example, the inner
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_value.py b/tensorflow/python/ops/ragged/ragged_tensor_value.py
index 1162487f0f..c5e498e95f 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_value.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_value.py
@@ -27,7 +27,10 @@ from tensorflow.python.util.tf_export import tf_export
 class RaggedTensorValue(object):
   """Represents the value of a `RaggedTensor`.
 
-  See `RaggedTensor` for a description of ragged tensors.
+  Warning: `RaggedTensorValue` should only be used in graph mode; in
+  eager mode, the `tf.RaggedTensor` class contains its value directly.
+
+  See `tf.RaggedTensor` for a description of ragged tensors.
   """
 
   def __init__(self, values, row_splits):
diff --git a/tensorflow/python/ops/ragged/segment_id_ops.py b/tensorflow/python/ops/ragged/segment_id_ops.py
index ee17e4d636..42dc13223b 100644
--- a/tensorflow/python/ops/ragged/segment_id_ops.py
+++ b/tensorflow/python/ops/ragged/segment_id_ops.py
@@ -32,7 +32,7 @@ from tensorflow.python.util.tf_export import tf_export
 # https://www.tensorflow.org/api_guides/python/math_ops#Segmentation
 @tf_export("ragged.row_splits_to_segment_ids")
 def row_splits_to_segment_ids(splits, name=None):
-  """Generates the segmentation corresponding to a RaggedTensor `splits` vector.
+  """Generates the segmentation corresponding to a RaggedTensor `row_splits`.
 
   Returns an integer vector `segment_ids`, where `segment_ids[i] == j` if
   `splits[j] <= i < splits[j+1]`.  Example:
@@ -67,7 +67,7 @@ def row_splits_to_segment_ids(splits, name=None):
 # https://www.tensorflow.org/api_guides/python/math_ops#Segmentation
 @tf_export("ragged.segment_ids_to_row_splits")
 def segment_ids_to_row_splits(segment_ids, num_segments=None, name=None):
-  """Generates the RaggedTensor `splits` vector corresponding to a segmentation.
+  """Generates the RaggedTensor `row_splits` corresponding to a segmentation.
 
   Returns an integer vector `splits`, where `splits[0] = 0` and
   `splits[i] = splits[i-1] + count(segment_ids==i)`.  Example:
-- 
GitLab


From bf16a7511a2d29c460d4e1a771d53ef692a2d32b Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Wed, 12 Dec 2018 07:21:17 -0800
Subject: [PATCH 422/873] Wrap global_variables_initializer with
 self.evaluate()

In addition, fix a few eval() calls as well as remove some
@test_util.run_v1_only annotations.

PiperOrigin-RevId: 225180248
---
 .../distribute/distribute_coordinator_test.py |   4 +-
 tensorflow/python/eager/function_test.py      |   2 +-
 .../framework/auto_control_deps_test.py       |  16 +-
 .../kernel_tests/checkpoint_ops_test.py       |   7 +-
 .../conditional_accumulator_test.py           |   2 +-
 .../kernel_tests/control_flow_ops_py_test.py  | 139 ++++++------
 .../dense_update_ops_no_tsan_test.py          |  13 +-
 .../kernel_tests/functional_ops_test.py       |   2 +-
 .../partitioned_variables_test.py             |  52 ++---
 .../resource_variable_ops_test.py             |   6 +-
 .../kernel_tests/tensor_array_ops_test.py     |   8 +-
 .../python/kernel_tests/variables_test.py     |  81 ++++---
 .../python/ops/control_flow_ops_test.py       |   2 +-
 tensorflow/python/ops/gradients_test.py       |   2 +-
 .../python/saved_model/saved_model_test.py    |   2 +-
 tensorflow/python/training/adagrad_test.py    |  12 +-
 .../python/training/checkpoint_ops_test.py    |  14 +-
 tensorflow/python/training/input_test.py      |  54 ++---
 .../python/training/moving_averages_test.py   |  12 +-
 .../python/training/queue_runner_test.py      |  14 +-
 tensorflow/python/training/saver_test.py      | 198 +++++++++---------
 .../python/training/slot_creator_test.py      |  12 +-
 .../python/training/training_ops_test.py      |  12 +-
 23 files changed, 322 insertions(+), 344 deletions(-)

diff --git a/tensorflow/python/distribute/distribute_coordinator_test.py b/tensorflow/python/distribute/distribute_coordinator_test.py
index 7598c105c2..dbed3e7f59 100644
--- a/tensorflow/python/distribute/distribute_coordinator_test.py
+++ b/tensorflow/python/distribute/distribute_coordinator_test.py
@@ -230,7 +230,7 @@ class DistributeCoordinatorTestBase(test.TestCase):
       with ops.device("/job:worker/task:0"):
         result = math_ops.add_n(xs)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       result_value = sess.run(result)
     self.assertEqual(result_value, expected)
     if result_value == expected:
@@ -278,7 +278,7 @@ class DistributeCoordinatorTestBase(test.TestCase):
       train_op = control_flow_ops.group([x_add, y_sub])
 
       if context.is_chief:
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
 
       # Synchronize workers after initializaton.
       if context.has_barrier:
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 8d1f8c21d9..2697ab5b17 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -579,7 +579,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
           return self.v * 2
 
       o = HasAVar()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       call = def_function.function(o.call)
       op = call()
       self.assertAllEqual(self.evaluate(op), 2.0)
diff --git a/tensorflow/python/framework/auto_control_deps_test.py b/tensorflow/python/framework/auto_control_deps_test.py
index 5f5de45b9e..d81adef26a 100644
--- a/tensorflow/python/framework/auto_control_deps_test.py
+++ b/tensorflow/python/framework/auto_control_deps_test.py
@@ -39,7 +39,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testBasic(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       with acd.AutomaticControlDependencies() as c:
         v.assign(v + 1)
         v.assign(2 * v)
@@ -51,7 +51,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testCondMustRun(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       p = array_ops.placeholder(dtype=dtypes.bool)
       with acd.AutomaticControlDependencies() as c:
 
@@ -73,7 +73,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testCondMustRunSeparateRead(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       p = array_ops.placeholder(dtype=dtypes.bool)
       with acd.AutomaticControlDependencies() as c:
 
@@ -97,7 +97,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testCondNested(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       p = array_ops.placeholder(dtype=dtypes.bool)
       q = array_ops.placeholder(dtype=dtypes.bool)
       with acd.AutomaticControlDependencies() as c:
@@ -132,7 +132,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testCondOneBranch(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       p = array_ops.placeholder(dtype=dtypes.bool)
       with acd.AutomaticControlDependencies() as c:
 
@@ -153,7 +153,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testCondOneBranchUpdateBefore(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       p = array_ops.placeholder(dtype=dtypes.bool)
       with acd.AutomaticControlDependencies() as c:
         v.assign(v * 2)
@@ -175,7 +175,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testCondOneBranchUpdateAfter(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       p = array_ops.placeholder(dtype=dtypes.bool)
       with acd.AutomaticControlDependencies() as c:
 
@@ -211,7 +211,7 @@ class AutomaticControlDependenciesTest(test.TestCase):
   def testDecorator(self):
     with context.graph_mode(), self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       @acd.automatic_control_dependencies
       def f():
diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
index 6e289bf9b7..dd5ac1f763 100644
--- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py
+++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
@@ -105,7 +105,6 @@ class GenerateVocabRemappingTest(test.TestCase):
       self.assertAllEqual(expected_num_present, self.evaluate(num_present))
 
 
-@test_util.run_v1_only('b/120545219')
 class LoadAndRemapMatrixTest(test.TestCase):
   """Tests for the load_and_remap_matrix() op."""
 
@@ -126,7 +125,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
 
     save = saver.Saver([matrix])
     with self.cached_session() as sess:
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.bundle_file = os.path.join(test.get_temp_dir(), 'bundle_checkpoint')
       save.save(sess, self.bundle_file)
 
@@ -231,6 +230,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
           np.reshape(initializing_values, (num_rows, num_cols)),
           self.evaluate(remapped_matrix))
 
+  @test_util.run_v1_only('b/120545219')
   def test_load_and_remap_invalid_remapping(self):
     """Tests that errors are raised when an ID maps to multiple new IDs.
 
@@ -262,6 +262,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
     with self.cached_session(), self.assertRaises(errors.UnimplementedError):
       self.evaluate(remapped_matrix)
 
+  @test_util.run_v1_only('b/120545219')
   def test_load_and_remap_incorrect_initializing_values(self):
     """Tests that errors are raised with incorrect number of init values."""
     remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix(
@@ -313,7 +314,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase):
     with self.cached_session() as sess:
       ckpt_path = os.path.join(test.get_temp_dir(), 'temp_ckpt')
       save = saver.Saver([matrix])
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       save.save(sess, ckpt_path)
       num_rows, num_cols = np_value.shape
 
diff --git a/tensorflow/python/kernel_tests/conditional_accumulator_test.py b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
index ce34201706..32a2058750 100644
--- a/tensorflow/python/kernel_tests/conditional_accumulator_test.py
+++ b/tensorflow/python/kernel_tests/conditional_accumulator_test.py
@@ -408,7 +408,7 @@ class ConditionalAccumulatorTest(test.TestCase):
 
       set_global_step_op = q.set_global_step(new_global_step)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       for _ in range(3):
         set_global_step_op.run()
         self.evaluate(inc_global_step)
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index f4a7d5bec9..42cfe9e237 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -140,7 +140,7 @@ class ControlFlowTest(test.TestCase):
       v2 = control_flow_ops.with_dependencies([op], v)
 
       self.assertTrue(isinstance(v2, ops.Tensor))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(9, self.evaluate(v2))
 
   @test_util.run_v1_only("b/120545219")
@@ -154,7 +154,7 @@ class ControlFlowTest(test.TestCase):
       op = state_ops.assign(enter_v, enter_nine)
       v2 = control_flow_ops.with_dependencies([op], enter_v)
       v3 = control_flow_ops.exit(v2)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(9, self.evaluate(v3))
 
   @test_util.run_v1_only("b/120545219")
@@ -165,7 +165,7 @@ class ControlFlowTest(test.TestCase):
       p = constant_op.constant(True)
       v1 = control_flow_ops._SwitchRefOrTensor(v._ref(), p)  # pylint: disable=protected-access
       v2 = state_ops.assign(v1[1], 9)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(9, self.evaluate(v2))
 
   def testEnterMulExit(self):
@@ -205,8 +205,8 @@ class ControlFlowTest(test.TestCase):
       switch_op = control_flow_ops.switch(data, pred)
       merge_op = control_flow_ops.merge(switch_op)[0]
 
-      val = merge_op.values.eval()
-      ind = merge_op.indices.eval()
+      val = merge_op.values
+      ind = merge_op.indices
     self.assertAllEqual(np.arange(1, 7), val)
     self.assertAllEqual(np.arange(0, 12, 2), ind)
 
@@ -418,8 +418,8 @@ class ControlFlowTest(test.TestCase):
       fn2 = lambda: ops.IndexedSlices(math_ops.subtract(x.values, 1), indices)
       r = control_flow_ops.cond(pred, fn1, fn2)
 
-      val = r.values.eval()
-      ind = r.indices.eval()
+      val = r.values
+      ind = r.indices
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
 
@@ -437,8 +437,8 @@ class ControlFlowTest(test.TestCase):
       fn2 = lambda: sparse_tensor.SparseTensor(
           indices, x.values - 1, dense_shape=shape)
       r = control_flow_ops.cond(pred, fn1, fn2)
-      self.assertAllEqual([3.0, 5.0], r.values.eval())
-      self.assertAllEqual([[1], [4]], r.indices.eval())
+      self.assertAllEqual([3.0, 5.0], r.values)
+      self.assertAllEqual([[1], [4]], r.indices)
       self.assertAllEqual(r.values.get_shape(), (2,))
 
   @test_util.run_v1_only("b/120545219")
@@ -446,7 +446,7 @@ class ControlFlowTest(test.TestCase):
 
     with self.cached_session():
       rv = resource_variable_ops.ResourceVariable(True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       t = ops.convert_to_tensor(1.0)
 
       def case():
@@ -454,7 +454,8 @@ class ControlFlowTest(test.TestCase):
         with ops.control_dependencies([assign]):
           return array_ops.identity(t)
 
-      self.assertEqual(1.0, control_flow_ops.cond(rv, case, lambda: t).eval())
+      self.assertEqual(
+          1.0, self.evaluate(control_flow_ops.cond(rv, case, lambda: t)))
 
   @test_util.run_v1_only("b/120545219")
   def testCondWithTensorArrayGrad(self):
@@ -483,8 +484,8 @@ class ControlFlowTest(test.TestCase):
       fn2 = lambda: ops.IndexedSlices(math_ops.subtract(x.values, 1), i_64)
       r = control_flow_ops.cond(pred, fn1, fn2)
 
-      val = r.values.eval()
-      ind = r.indices.eval()
+      val = r.values
+      ind = r.indices
     self.assertAllEqual(11, val)
     self.assertAllEqual(0, ind)
     self.assertTrue(ind.dtype == np.int64)
@@ -565,8 +566,8 @@ class ControlFlowTest(test.TestCase):
 
     if not context.executing_eagerly():
       with self.cached_session():
-        variables.global_variables_initializer().run()
-        result = f().eval()
+        self.evaluate(variables.global_variables_initializer())
+        result = self.evaluate(f())
         self.assertEqual(True, result)
         # Only second cond result was fetched, so v1 assign shouldn't run.
         self.assertEqual(7, self.evaluate(v1))
@@ -605,7 +606,7 @@ class ControlFlowTest(test.TestCase):
       fn2 = lambda: v1
       r = control_flow_ops.cond(pred, fn1, fn2)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       result = self.evaluate(r)
       self.assertAllEqual(np.array([7]), result)
 
@@ -895,7 +896,7 @@ class ControlFlowTest(test.TestCase):
       fn2 = lambda: array_ops.gather(v1, [1, 1])
       r = control_flow_ops.cond(pred, fn1, fn2)
       grad = gradients_impl.gradients(r, [v1])[0]
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       # Should just be [1, 1], but possibly a sparse representation
       gv, gi = sess.run([grad.values, grad.indices], feed_dict={c: 1})
       dense_gv = [
@@ -942,11 +943,11 @@ class ControlFlowTest(test.TestCase):
     if not context.executing_eagerly():
       with self.cached_session():
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_cond().eval(), 10)
+          self.assertEqual(build_cond(), 10)
         self.assertEqual(printed.contents(), "C\n")
 
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_nested_cond().eval(), 10)
+          self.assertEqual(build_nested_cond(), 10)
         self.assertEqual(printed.contents(), "C\n")
 
     # In defuns, all prints should execute in program order.
@@ -996,11 +997,11 @@ class ControlFlowTest(test.TestCase):
     if not context.executing_eagerly():
       with self.cached_session():
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_while()[0].eval(), 2)
+          self.assertEqual(build_while()[0], 2)
         self.assertEqual(printed.contents(), "D\nD\n")
 
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_nested_while()[0].eval(), 2)
+          self.assertEqual(build_nested_while()[0], 2)
         self.assertEqual(printed.contents(), "D\nD\n")
 
     # In defuns, all prints should execute in program order.
@@ -1049,8 +1050,8 @@ class ControlFlowTest(test.TestCase):
 
       result = control_flow_ops.while_loop(cond=lambda i: i < 2,
                                            body=body_fn, loop_vars=[1])
-      self.assertAllEqual(result.eval(), 2)
-      self.assertAllEqual(v.eval(), 1.0)
+      self.assertAllEqual(result, 2)
+      self.assertAllEqual(v.read_value(), 1.0)
 
   @test_util.disable_control_flow_v2("b/79881896 (control deps)")
   @test_util.run_v1_only("b/120545219")
@@ -1067,7 +1068,7 @@ class ControlFlowTest(test.TestCase):
       result = control_flow_ops.while_loop(cond=lambda i: i < 5,
                                            body=body_fn, loop_vars=[0])
       self.evaluate(result)
-      self.assertAllEqual(v.eval(), 1.0)
+      self.assertAllEqual(self.evaluate(v), 1.0)
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   @test_util.run_v1_only("b/120545219")
@@ -1085,7 +1086,7 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(c, b, [i, x], parallel_iterations=5)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual(r[0].dtype, dtypes.int32)
       self.assertEqual(r[1].dtype, dtypes.int32_ref)
@@ -1333,7 +1334,7 @@ class ControlFlowTest(test.TestCase):
       d = ops.convert_to_tensor(100)
       r = control_flow_ops.while_loop(lambda i, m, c, o: math_ops.less(i, d),
                                       compute, [i, m, c, o])
-      result = r[3].eval()
+      result = r[3]
     self.assertAllEqual(10100, result)
 
   @test_util.run_deprecated_v1
@@ -1355,7 +1356,7 @@ class ControlFlowTest(test.TestCase):
       s = array_ops.size(x)
       r = control_flow_ops.while_loop(lambda i, m, c, o: math_ops.less(i, s),
                                       compute, [i, m, c, o])
-      result = r[3].eval()
+      result = r[3]
     self.assertAllEqual(42, result)
 
   @test_util.run_v1_only("b/120545219")
@@ -1380,7 +1381,7 @@ class ControlFlowTest(test.TestCase):
                                           tensor_shape.unknown_shape(),
                                           tensor_shape.unknown_shape()
                                       ])
-      result = r[2].eval()
+      result = r[2]
     self.assertAllEqual(np.array([0, 1, 2, 3, 4, 5, 6]), result)
 
   @test_util.disable_control_flow_v2("b/116338794 (buffer_reuse)")
@@ -1634,7 +1635,7 @@ class ControlFlowTest(test.TestCase):
 
       res = control_flow_ops.while_loop(
           condition, body, [n, r], parallel_iterations=1)
-      self.assertAllEqual(12, res[1].eval())
+      self.assertAllEqual(12, res[1])
 
   @test_util.run_deprecated_v1
   def testWhileWithControl_2(self):
@@ -1721,7 +1722,7 @@ class ControlFlowTest(test.TestCase):
             return i + 1
 
       r = control_flow_ops.while_loop(loop_condition, loop_body, (i0,))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(4, self.evaluate(r))
       self.assertAllClose(65536.0, self.evaluate(v))
 
@@ -1747,7 +1748,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.cond(
           constant_op.constant(False), lambda: constant_op.constant(1.0),
           false_branch)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(6.0, self.evaluate(r))
       self.assertEqual(99, self.evaluate(v))
 
@@ -1890,7 +1891,7 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(
           loop_iterator, loop_body, [n], parallel_iterations=1)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(3, self.evaluate(r))
       result = self.evaluate(select)
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
@@ -1916,7 +1917,7 @@ class ControlFlowTest(test.TestCase):
 
       r = control_flow_ops.while_loop(
           loop_iterator, loop_body, [n], parallel_iterations=1)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(3, self.evaluate(r))
       result1 = self.evaluate(select1)
       self.assertAllClose(np.array([10.0, 10.0, 10.0]), result1)
@@ -1942,8 +1943,8 @@ class ControlFlowTest(test.TestCase):
           loop_iterator,
           loop_body, [n, array_ops.identity(select)],
           parallel_iterations=1)
-      variables.global_variables_initializer().run()
-      result = r[1].eval()
+      self.evaluate(variables.global_variables_initializer())
+      result = r[1]
     self.assertAllClose(np.array([10.0, 10.0, 10.0]), result)
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
@@ -1952,7 +1953,7 @@ class ControlFlowTest(test.TestCase):
     with self.cached_session():
       var_a = variables.Variable(0, name="a")
       var_b = variables.Variable(0, name="b")
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       c = constant_op.constant(0, name="c")
       asn1 = state_ops.assign_add(var_a, 1, name="a_add")
@@ -1982,7 +1983,7 @@ class ControlFlowTest(test.TestCase):
       # Create some variables.
       var_a = variables.Variable(0, name="a")
       var_b = variables.Variable(0, name="b")
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Change condition to check var_b
       def pred(_):
@@ -2014,7 +2015,7 @@ class ControlFlowTest(test.TestCase):
       var_a = variables.Variable(0, name="a")
       var_b = variables.Variable(0, name="b")
       c = constant_op.constant(0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Loop condition
       def pred(i):
@@ -2054,7 +2055,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [i], parallel_iterations=1)
       self.assertEqual([10], self.evaluate(r))
       for i in xrange(10):
-        self.assertEqual([i], q.dequeue().eval())
+        self.assertEqual([i], self.evaluate(q.dequeue()))
 
   @test_util.run_v1_only("b/120545219")
   def testWhileTimeOut(self):
@@ -2272,8 +2273,8 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [v], parallel_iterations=1)
 
       r = gradients_impl.gradients(r, a)
-      variables.global_variables_initializer().run()
-      self.assertAllClose(216.0, r[0].eval())
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllClose(216.0, r[0])
 
   @test_util.run_deprecated_v1
   def testWhileGrad_ResourceVariable(self):
@@ -2285,8 +2286,8 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [v], parallel_iterations=1)
 
       g = gradients_impl.gradients(r, a)
-      variables.global_variables_initializer().run()
-      self.assertAllClose(216.0, g[0].eval())
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllClose(216.0, g[0])
 
   @test_util.run_v1_only("b/120545219")
   def testWhileGradInCond(self):
@@ -2463,13 +2464,13 @@ class ControlFlowTest(test.TestCase):
       rx, ry = control_flow_ops.while_loop(c, b, [x, y], parallel_iterations=1)
 
       r = gradients_impl.gradients([rx, ry], x)
-      self.assertAllClose(304.0, r[0].eval())
+      self.assertAllClose(304.0, r[0])
       r = gradients_impl.gradients([rx, ry], y)
-      self.assertAllClose(124.0, r[0].eval())
+      self.assertAllClose(124.0, r[0])
       r = gradients_impl.gradients([rx], x)
-      self.assertAllClose(295.0, r[0].eval())
+      self.assertAllClose(295.0, r[0])
       r = gradients_impl.gradients([rx], y)
-      self.assertAllClose(120.0, r[0].eval())
+      self.assertAllClose(120.0, r[0])
 
   @test_util.run_deprecated_v1
   def testWhileGrad_Dependency(self):
@@ -2487,9 +2488,9 @@ class ControlFlowTest(test.TestCase):
       ri, rx = control_flow_ops.while_loop(c, b, [i, x], parallel_iterations=1)
 
       r = gradients_impl.gradients([ri, rx], x)
-      self.assertAllClose(1024.0, r[0].eval())
+      self.assertAllClose(1024.0, r[0])
       r = gradients_impl.gradients([rx], x)
-      self.assertAllClose(1024.0, r[0].eval())
+      self.assertAllClose(1024.0, r[0])
 
   @test_util.disable_control_flow_v2("b/116355153 (back_prop flag)")
   @test_util.run_v1_only("b/120545219")
@@ -2501,7 +2502,7 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [v], back_prop=False)
       r = math_ops.add(r, v)
       r = gradients_impl.gradients(r, v)
-      self.assertAllClose(1.0, r[0].eval())
+      self.assertAllClose(1.0, r[0])
 
   @test_util.disable_control_flow_v2("b/113324949 (RefVariable)")
   @test_util.run_v1_only("b/120545219")
@@ -2522,7 +2523,7 @@ class ControlFlowTest(test.TestCase):
           cond=cond, body=body, loop_vars=loop_vars)
       cost = math_ops.reduce_sum(tensors[2])
       grad = gradients_impl.gradients(cost, [variable])
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(np.ones([2, 3]), sess.run(grad[0]))
 
   @test_util.run_deprecated_v1
@@ -2562,7 +2563,7 @@ class ControlFlowTest(test.TestCase):
       _, rx = control_flow_ops.while_loop(c, b, [i, rx], parallel_iterations=1)
 
       r = gradients_impl.gradients([rx], x)
-      self.assertAllClose(1024.0, r[0].eval())
+      self.assertAllClose(1024.0, r[0])
 
   @test_util.run_v1_only("b/120545219")
   def testWhileGrad_ParallelTwoLoops(self):
@@ -2582,7 +2583,7 @@ class ControlFlowTest(test.TestCase):
       rx = math_ops.add(r1, r2)
 
       r = gradients_impl.gradients([rx], x)
-      self.assertAllClose(64.0, r[0].eval())
+      self.assertAllClose(64.0, r[0])
 
   @test_util.run_v1_only("b/120545219")
   def testWhileGrad_OneOutputWithControlDependencyOnSecond(self):
@@ -2697,7 +2698,7 @@ class ControlFlowTest(test.TestCase):
       train_op = optimizer.minimize(math_ops.reduce_mean(math_ops.square(res)))
       self.evaluate(variables.global_variables_initializer())
       self.evaluate(train_op)
-      self.assertAllClose(2.999, self.evaluate(var))
+      self.assertAllClose(2.999, var.read_value())
 
   def _testWhileCondGrad_Simple(self, use_gpu):
     with self.cached_session(use_gpu=use_gpu):
@@ -2784,7 +2785,7 @@ class ControlFlowTest(test.TestCase):
       grad_ys = [variables.VariableV1(73)._ref()]  # pylint: disable=protected-access
       grad = gradients_impl.gradients([r[1]], [x], grad_ys=grad_ys)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual(r[0].dtype, dtypes.int32)
       self.assertEqual(r[1].dtype, dtypes.float32_ref)
@@ -3093,7 +3094,7 @@ class ControlFlowTest(test.TestCase):
       grads = linalg_ops.norm(gradients_impl.gradients(r, vars_)[0])
       z = math_ops.add(r, array_ops.stop_gradient(math_ops.reduce_sum(grads)))
       result = gradients_impl.gradients(z, vars_)[0]
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(5.0, self.evaluate(result))
 
   @test_util.run_v1_only("b/120545219")
@@ -3145,14 +3146,14 @@ class ControlFlowTest(test.TestCase):
               x < y: f1,
               x > z: f2
           }, default=f3, exclusive=True)
-      self.assertAllEqual(r1.eval(), 17)
+      self.assertAllEqual(r1, 17)
 
       r2 = control_flow_ops.case([(y > z, f1), (y > x, f2)], default=f3)
-      self.assertAllEqual(r2.eval(), 23)
+      self.assertAllEqual(r2, 23)
 
       # Duplicate events can happen, first one is selected
       r3 = control_flow_ops.case([(x < y, f1), (x < y, f2)], default=f3)
-      self.assertAllEqual(r3.eval(), 17)
+      self.assertAllEqual(r3, 17)
 
       # Duplicate events cause an error if exclusive = True
       r4 = control_flow_ops.case(
@@ -3162,7 +3163,7 @@ class ControlFlowTest(test.TestCase):
 
       # Check that the default is called if none of the others are
       r5 = control_flow_ops.case({x > y: f1}, default=f3)
-      self.assertAllEqual(r5.eval(), -1)
+      self.assertAllEqual(r5, -1)
 
       ran_once = [False, False, False]
 
@@ -3181,7 +3182,7 @@ class ControlFlowTest(test.TestCase):
           [(x < y, break_run_twice(0)), (x > y, break_run_twice(1))],
           default=lambda: constant_op.constant(2))
 
-      self.assertAllEqual(r6.eval(), 0)
+      self.assertAllEqual(r6, 0)
 
   @test_util.run_v1_only("b/120545219")
   def testCaseSideEffects(self):
@@ -3204,17 +3205,17 @@ class ControlFlowTest(test.TestCase):
       r2 = control_flow_ops.case(
           ((x > y, a), (x > y, b)), default=c, exclusive=True)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1] * 3)
       self.assertEqual(2, self.evaluate(r2))
       self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1, -1, 2])
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1] * 3)
       self.assertEqual(1, self.evaluate(r1))
       self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1, 1, -1])
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllEqual(self.evaluate([v0, v1, v2]), [-1] * 3)
       self.assertEqual(0, self.evaluate(r0))
       self.assertAllEqual(self.evaluate([v0, v1, v2]), [0, -1, -1])
@@ -3237,7 +3238,7 @@ class ControlFlowTest(test.TestCase):
 
       i = control_flow_ops.cond(p, a, b)
       self.assertTrue(isinstance(i, ops.Tensor))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual(0, self.evaluate(v))
 
@@ -3495,7 +3496,7 @@ class ControlFlowTest(test.TestCase):
           lambda i, v: [i + 1, script_ops.py_func(func, [v], [dtypes.float32])[0]],
           [constant_op.constant(0), constant_op.constant(2.0, dtypes.float32)],
           [tensor_shape.unknown_shape(), tensor_shape.unknown_shape()])
-      self.assertEqual(r[1].eval(), 65536.0)
+      self.assertEqual(self.evaluate(r[1]), 65536.0)
 
   @test_util.run_v1_only("b/120545219")
   def testWhileFuncBasic(self):
@@ -3512,8 +3513,8 @@ class ControlFlowTest(test.TestCase):
           [tensor_shape.unknown_shape(),
            tensor_shape.unknown_shape()])
       grad = gradients_impl.gradients(r, x)[0]
-      self.assertEqual(r[1].eval(), 65536.0)
-      self.assertEqual(grad.eval(), 524288.0)
+      self.assertEqual(self.evaluate(r[1]), 65536.0)
+      self.assertEqual(self.evaluate(grad), 524288.0)
       # while_v2 does not have stacks.
       if not control_flow_util.ENABLE_CONTROL_FLOW_V2:
         self.assertEqual(
@@ -3877,7 +3878,7 @@ class WhileOpBenchmark(test.Benchmark):
     with session.Session() as sess, ops.device(default_device):
       # Get the initial id i, input x, and kernel.
       i, x, kernel = self._getInitVariables()
-      self.evaluate(variables.global_variables_initializer())
+      variables.global_variables_initializer().run()
 
       if static_unroll:
         for _ in xrange(steps):
diff --git a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
index 4e3da068b8..a778bf231b 100644
--- a/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
+++ b/tensorflow/python/kernel_tests/dense_update_ops_no_tsan_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
@@ -33,7 +32,6 @@ class AssignOpTest(test.TestCase):
   # NOTE(mrry): We exclude thess tests from the TSAN TAP target, because they
   #   contain benign and deliberate data races when multiple threads update
   #   the same parameters without a lock.
-  @test_util.run_v1_only("b/120545219")
   def testParallelUpdateWithoutLocking(self):
     with self.cached_session() as sess:
       ones_t = array_ops.fill([1024, 1024], 1.0)
@@ -42,7 +40,7 @@ class AssignOpTest(test.TestCase):
           state_ops.assign_add(
               p, ones_t, use_locking=False) for _ in range(20)
       ]
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       def run_add(add_op):
         self.evaluate(add_op)
@@ -61,7 +59,6 @@ class AssignOpTest(test.TestCase):
       self.assertTrue((vals >= ones).all())
       self.assertTrue((vals <= ones * 20).all())
 
-  @test_util.run_v1_only("b/120545219")
   def testParallelAssignWithoutLocking(self):
     with self.cached_session() as sess:
       ones_t = array_ops.fill([1024, 1024], float(1))
@@ -70,7 +67,7 @@ class AssignOpTest(test.TestCase):
           state_ops.assign(p, math_ops.multiply(ones_t, float(i)), False)
           for i in range(1, 21)
       ]
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       def run_assign(assign_op):
         self.evaluate(assign_op)
@@ -94,7 +91,6 @@ class AssignOpTest(test.TestCase):
   # contain non-benign but known data races between the variable assignment and
   # returning the output tensors. This issue will be resolved with the new
   # resource variables.
-  @test_util.run_v1_only("b/120545219")
   def testParallelUpdateWithLocking(self):
     with self.cached_session() as sess:
       zeros_t = array_ops.fill([1024, 1024], 0.0)
@@ -104,7 +100,7 @@ class AssignOpTest(test.TestCase):
           state_ops.assign_add(
               p, ones_t, use_locking=True) for _ in range(20)
       ]
-      p.initializer.run()
+      self.evaluate(p.initializer)
 
       def run_add(add_op):
         self.evaluate(add_op)
@@ -122,7 +118,6 @@ class AssignOpTest(test.TestCase):
       ones = np.ones((1024, 1024)).astype(np.float32)
       self.assertAllEqual(vals, ones * 20)
 
-  @test_util.run_v1_only("b/120545219")
   def testParallelAssignWithLocking(self):
     with self.cached_session() as sess:
       zeros_t = array_ops.fill([1024, 1024], 0.0)
@@ -133,7 +128,7 @@ class AssignOpTest(test.TestCase):
               p, math_ops.multiply(ones_t, float(i)), use_locking=True)
           for i in range(1, 21)
       ]
-      p.initializer.run()
+      self.evaluate(p.initializer)
 
       def run_assign(assign_op):
         self.evaluate(assign_op)
diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
index 95ee454614..0d6a3cbd35 100644
--- a/tensorflow/python/kernel_tests/functional_ops_test.py
+++ b/tensorflow/python/kernel_tests/functional_ops_test.py
@@ -466,7 +466,7 @@ class FunctionalOpsTest(test.TestCase):
     loss = l0 + array_ops.stop_gradient(l1)
     grad = gradients_impl.gradients(ys=[loss], xs=[a, b])
     with self.test_session(use_gpu=True) as sess:
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.evaluate(grad)
 
   @test_util.run_in_graph_and_eager_modes
diff --git a/tensorflow/python/kernel_tests/partitioned_variables_test.py b/tensorflow/python/kernel_tests/partitioned_variables_test.py
index da79b4ecfc..edcbc2967e 100644
--- a/tensorflow/python/kernel_tests/partitioned_variables_test.py
+++ b/tensorflow/python/kernel_tests/partitioned_variables_test.py
@@ -323,26 +323,24 @@ class PartitionedVariablesTestCase(test.TestCase):
     for i in xrange(len(expected_specs)):
       self.assertEquals(expected_specs[i], slices[i]._save_slice_info.spec)
 
-  @test_util.run_deprecated_v1
   def testVecConstantInit(self):
     with self.cached_session():
       rnd_par = constant_op.constant([1, 2, 3, 4])
       vs = partitioned_variables.create_partitioned_variables([4], [4], rnd_par)
-      variables.global_variables_initializer().run()
-      val = array_ops.concat(vs, 0).eval()
+      self.evaluate(variables.global_variables_initializer())
+      val = array_ops.concat(vs, 0)
       rnd = self.evaluate(rnd_par)
       self.assertAllClose(rnd, val)
       self.assertEqual([dtypes.int32] * 4, [v.dtype.base_dtype for v in vs])
       self._TestSaveSpec(vs, ["4 0,1", "4 1,1", "4 2,1", "4 3,1"])
 
-  @test_util.run_deprecated_v1
   def testConstantInit(self):
     with self.cached_session():
       rnd_par = constant_op.constant([[1, 2, 3, 4], [5, 6, 7, 8]])
       vs = partitioned_variables.create_partitioned_variables([2, 4], [1, 2],
                                                               rnd_par)
-      variables.global_variables_initializer().run()
-      val = array_ops.concat(vs, 1).eval()
+      self.evaluate(variables.global_variables_initializer())
+      val = array_ops.concat(vs, 1)
       rnd = self.evaluate(rnd_par)
       self.assertAllClose(rnd, val)
       self.assertEqual([dtypes.int32] * 2, [v.dtype.base_dtype for v in vs])
@@ -356,7 +354,7 @@ class PartitionedVariablesTestCase(test.TestCase):
                                                                  rnd_par)
         vs2 = partitioned_variables.create_partitioned_variables([2, 4], [1, 2],
                                                                  rnd_par)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       var1_name = vs1[0]._save_slice_info.full_name
       var2_name = vs2[0]._save_slice_info.full_name
       self.assertEqual("hi/PartitionedVariable", var1_name)
@@ -376,7 +374,7 @@ class PartitionedVariablesTestCase(test.TestCase):
           vs, reuse=True, use_resource=use_resource):
         vs2 = partitioned_variables.create_partitioned_variables(
             [2, 4], [1, 2], rnd_par, dtype=dtypes.int32)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       var1_name = vs1[0]._save_slice_info.full_name
       var2_name = vs2[0]._save_slice_info.full_name
       self.assertEqual("hola/PartitionedVariable", var1_name)
@@ -393,7 +391,7 @@ class PartitionedVariablesTestCase(test.TestCase):
                                                                  rnd_par)
         vs2 = partitioned_variables.create_partitioned_variables([2, 4], [1, 2],
                                                                  rnd_par)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       var1_name = vs1[0]._save_slice_info.full_name
       var2_name = vs2[0]._save_slice_info.full_name
       # Currently, the name scope 'ola' has no effect.
@@ -408,18 +406,16 @@ class PartitionedVariablesTestCase(test.TestCase):
   def testName(self):
     self._testNameHelper(use_resource=False)
 
-  @test_util.run_deprecated_v1
   def testResourceName(self):
     self._testNameHelper(use_resource=True)
 
-  @test_util.run_v1_only("b/120545219")
   def testRandomInitValue(self):
     with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([200, 40]))
       vs = partitioned_variables.create_partitioned_variables(
           rnd.get_shape(), [1, 10], rnd.initialized_value())
-      variables.global_variables_initializer().run()
-      val = array_ops.concat(vs, 1).eval()
+      self.evaluate(variables.global_variables_initializer())
+      val = array_ops.concat(vs, 1)
       rnd = self.evaluate(rnd)
       self.assertAllClose(rnd, val)
       self.assertEqual([dtypes.float32] * 10, [v.dtype.base_dtype for v in vs])
@@ -430,7 +426,6 @@ class PartitionedVariablesTestCase(test.TestCase):
           "200 40 0,200:36,4"
       ])
 
-  @test_util.run_v1_only("b/120545219")
   def testRandomInitUnevenPartitions(self):
     with self.cached_session():
       rnd = variables.Variable(
@@ -440,7 +435,7 @@ class PartitionedVariablesTestCase(test.TestCase):
               rnd.get_shape(), [1, i], rnd.initialized_value())
           for i in xrange(1, 10)
       ]
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       rnd_val = self.evaluate(rnd)
       # Only check the slice save specs for the first 5 tf.
       save_specs = [
@@ -462,33 +457,31 @@ class PartitionedVariablesTestCase(test.TestCase):
           ]
       ]
       for i, vs in enumerate(var_lists):
-        var_val = array_ops.concat(vs, 1).eval()
+        var_val = array_ops.concat(vs, 1)
         self.assertAllClose(rnd_val, var_val)
         self.assertEqual([dtypes.float64] * len(vs),
                          [v.dtype.base_dtype for v in vs])
         if i < len(save_specs):
           self._TestSaveSpec(vs, save_specs[i])
 
-  @test_util.run_v1_only("b/120545219")
   def testDegenerate(self):
     with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([10, 43]))
       vs = partitioned_variables.create_partitioned_variables(
           rnd.get_shape(), [1, 1], rnd.initialized_value())
-      variables.global_variables_initializer().run()
-      val = array_ops.concat(vs, 0).eval()
+      self.evaluate(variables.global_variables_initializer())
+      val = array_ops.concat(vs, 0)
       rnd = self.evaluate(rnd)
       self.assertAllClose(rnd, val)
       self._TestSaveSpec(vs, ["10 43 0,10:0,43"])
 
-  @test_util.run_v1_only("b/120545219")
   def testSliceSizeOne(self):
     with self.cached_session():
       rnd = variables.Variable(random_ops.random_uniform([10, 43]))
       vs = partitioned_variables.create_partitioned_variables(
           rnd.get_shape(), [10, 1], rnd.initialized_value())
-      variables.global_variables_initializer().run()
-      val = array_ops.concat(vs, 0).eval()
+      self.evaluate(variables.global_variables_initializer())
+      val = array_ops.concat(vs, 0)
       rnd = self.evaluate(rnd)
       self.assertAllClose(rnd, val)
       self._TestSaveSpec(vs, [
@@ -497,7 +490,6 @@ class PartitionedVariablesTestCase(test.TestCase):
           "10 43 6,1:0,43", "10 43 7,1:0,43", "10 43 8,1:0,43", "10 43 9,1:0,43"
       ])
 
-  @test_util.run_deprecated_v1
   def testIotaInitializer(self):
     self.assertAllClose([0., 1., 2., 3.], _IotaInitializer([4]))
     self.assertAllClose([[0., 1.], [0., 10.], [0., 100.], [0., 1000.]],
@@ -505,11 +497,11 @@ class PartitionedVariablesTestCase(test.TestCase):
     with self.cached_session():
       vs = partitioned_variables.create_partitioned_variables([13, 5], [3, 1],
                                                               _IotaInitializer)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       slice0 = _IotaInitializer([5, 5])
       slice1 = _IotaInitializer([4, 5])
       slice2 = _IotaInitializer([4, 5])
-      val = array_ops.concat(vs, 0).eval()
+      val = array_ops.concat(vs, 0)
       self.assertAllClose(slice0 + slice1 + slice2, val)
       self._TestSaveSpec(vs, ["13 5 0,5:0,5", "13 5 5,4:0,5", "13 5 9,4:0,5"])
 
@@ -520,7 +512,7 @@ class PartitionedVariablesTestCase(test.TestCase):
     with self.cached_session():
       var0, var1 = partitioned_variables.create_partitioned_variables(
           [20, 12], [1, 2], init_ops.random_uniform_initializer())
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       val0, val1 = self.evaluate(var0).flatten(), self.evaluate(var1).flatten()
       self.assertTrue(np.linalg.norm(val0 - val1) > 1e-6)
     # Negative test that proves that slices have the same values if
@@ -528,7 +520,7 @@ class PartitionedVariablesTestCase(test.TestCase):
     with self.cached_session():
       var0, var1 = partitioned_variables.create_partitioned_variables(
           [20, 12], [1, 2], init_ops.random_uniform_initializer(seed=201))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       val0, val1 = self.evaluate(var0).flatten(), self.evaluate(var1).flatten()
       self.assertAllClose(val0, val1)
 
@@ -607,8 +599,8 @@ class PartitionedVariablesTestCase(test.TestCase):
       self.assertTrue(
           c.op in concat_control_inputs,
           "var_x._concat() should get control dependencies from its scope.")
-      variables.global_variables_initializer().run()
-      self.assertAllClose(value.eval(), var_x.as_tensor().eval())
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllClose(value, var_x.as_tensor())
 
   def testMetaGraphSaveLoad(self):
     save_prefix = os.path.join(self.get_temp_dir(), "ckpt")
@@ -623,7 +615,7 @@ class PartitionedVariablesTestCase(test.TestCase):
         v0_part = v0._get_partitions()
         self.assertEqual(len(v0_list), 5)
         self.assertAllEqual(v0_part, (5, 1))
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
 
         save_graph.get_collection_ref("partvar").append(v0)
         saver = saver_lib.Saver()
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 1dabcbb5c3..df7b686165 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -689,7 +689,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
   def testToFromProto(self):
     with self.cached_session():
       v = resource_variable_ops.ResourceVariable(1.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       w = resource_variable_ops.ResourceVariable.from_proto(v.to_proto())
       self.assertEquals(2, math_ops.add(w, 1).eval())
@@ -793,11 +793,11 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       with self.assertRaises(ValueError):
         _ = w.value().op.get_attr("_class")
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testSharedName(self):
     with self.cached_session():
       v = resource_variable_ops.ResourceVariable(300.0, name="var4")
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       w = resource_variable_ops.var_handle_op(
           dtype=v.dtype.base_dtype, shape=v.get_shape(), shared_name="var4",
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
index 6d8e3e8356..147e7fde57 100644
--- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py
+++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -425,7 +425,6 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual(t_g_ta_0, t_g_ta_1)
       self.assertAllEqual([[4.0, 5.0]], d_r1_0)
 
-  @test_util.run_v1_only("b/120545219")
   def testTensorArrayWriteWrongIndexOrDataTypeFails(self):
     with self.session(use_gpu=True):
       ta = _make_ta(3, "foo", dtype=dtypes.float32)
@@ -459,7 +458,6 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaisesOpError(error_msg):
         self.evaluate(ta.write(3, 3.0).flow)
 
-  @test_util.run_v1_only("b/120545219")
   def testTensorArrayReadWrongIndexOrDataTypeFails(self):
     with self.session(use_gpu=True):
       ta = _make_ta(3, "foo", dtype=dtypes.float32)
@@ -505,7 +503,6 @@ class TensorArrayTest(test.TestCase):
           "it has already been written to."):
         self.evaluate(ta.write(2, 3.0).write(2, 3.0).flow)
 
-  @test_util.run_v1_only("b/120545219")
   def testTensorArrayConcatIncompatibleShapesFails(self):
     with self.session(use_gpu=True):
       ta = tensor_array_ops.TensorArray(
@@ -537,7 +534,6 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaisesOpError("shape"):
         self.evaluate(w3.concat())
 
-  @test_util.run_v1_only("b/120545219")
   def testTensorArraySplitIncompatibleShapesFails(self):
     with self.session(use_gpu=True):
       in_eager_mode = context.executing_eagerly()
@@ -959,7 +955,7 @@ class TensorArrayTest(test.TestCase):
         v0_grad = gradients_impl.gradients([vout], [v0], [grad_val])[0]
         state0_grad = gradients_impl.gradients([vout], [state0], [grad_val])[0]
         var_grad = gradients_impl.gradients([vout], [var], [grad_val])[0]
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
 
       state0_t, var_t, v0_t, vout_t, v0_grad_t, var_grad_t, state0_grad_t = (
           self.evaluate(
@@ -1578,7 +1574,7 @@ class TensorArrayTest(test.TestCase):
       self.assertEqual(tensor_shape.scalar(), read1.get_shape())
 
       if not context.executing_eagerly():
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
 
       read0_v, read1_v, size0_v, size1_v = self.evaluate((read0, read1, size0,
                                                           size1))
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 336e9b0bca..07807e89d0 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -66,7 +66,7 @@ class VariablesTestCase(test.TestCase):
       with self.assertRaisesOpError("Attempting to use uninitialized value"):
         self.evaluate(var1)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertAllClose(0.0, self.evaluate(var0))
       self.assertAllClose(1.1, self.evaluate(var1))
@@ -96,11 +96,11 @@ class VariablesTestCase(test.TestCase):
       self.assertEqual([3, 6], depdep.get_shape())
       self.assertEqual([3, 6], depdep.shape)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
-      self.assertAllClose(rnd.eval(), self.evaluate(dep))
-      self.assertAllClose(rnd.eval() + self.evaluate(dep) + 2.0,
-                          self.evaluate(depdep))
+      self.assertAllClose(self.evaluate(rnd), self.evaluate(dep))
+      self.assertAllClose(
+          self.evaluate(rnd) + self.evaluate(dep) + 2.0, self.evaluate(depdep))
 
   def testIterable(self):
     with self.assertRaisesRegexp(TypeError, "not iterable"):
@@ -117,7 +117,7 @@ class VariablesTestCase(test.TestCase):
       plus_one = var.assign_add(1.0)
       minus_one = var.assign_sub(2.0)
       four = var.assign(4.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(0.0, self.evaluate(var))
 
       self.assertAllClose(1.0, self.evaluate(plus_one))
@@ -136,7 +136,7 @@ class VariablesTestCase(test.TestCase):
       plus_one = var.assign_add(1.0)
       minus_one = var.assign_sub(2.0)
       four = var.assign(4.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(0.0, self.evaluate(var))
 
       self.evaluate(plus_one)
@@ -166,7 +166,7 @@ class VariablesTestCase(test.TestCase):
       var = variables.Variable(zero)
       count_up_to = var.count_up_to(3)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(0, self.evaluate(var))
 
       self.assertEqual(0, self.evaluate(count_up_to))
@@ -264,10 +264,10 @@ class VariablesTestCase(test.TestCase):
     with self.cached_session():
       var_x = variables.Variable(2.0)
       var_y = variables.Variable(3.0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(2.0, self.evaluate(var_x))
       self.assertAllClose(3.0, self.evaluate(var_y))
-      self.assertAllClose(5.0, math_ops.add(var_x, var_y).eval())
+      self.assertAllClose(5.0, self.evaluate(math_ops.add(var_x, var_y)))
 
   @test_util.run_deprecated_v1
   def testZeroSizeVarSameAsConst(self):
@@ -277,9 +277,9 @@ class VariablesTestCase(test.TestCase):
       variable_mul = math_ops.matmul(zero_size_const, zero_size_var)
       const_mul = math_ops.matmul(
           zero_size_const, zero_size_const, transpose_b=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variable_output = self.evaluate(variable_mul)
-      self.assertAllClose(const_mul.eval(), variable_output)
+      self.assertAllClose(self.evaluate(const_mul), variable_output)
       self.assertAllClose([[0., 0.], [0., 0.]], variable_output)
 
   @test_util.run_deprecated_v1
@@ -372,7 +372,7 @@ class VariablesTestCase(test.TestCase):
       matmul = var_m.__matmul__([[10.0], [20.0]])
       rmatmul = var_m.__rmatmul__([[10.0], [20.0]])
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose([2.0], self.evaluate(add))
       self.assertAllClose([3.0], self.evaluate(radd))
       self.assertAllClose([1.0], self.evaluate(sub))
@@ -409,7 +409,7 @@ class VariablesTestCase(test.TestCase):
   def testSession(self):
     with self.cached_session() as sess:
       var = variables.Variable([1, 12])
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose([1, 12], self.evaluate(var))
 
   @test_util.run_v1_only("b/120545219")
@@ -431,7 +431,7 @@ class VariablesTestCase(test.TestCase):
       v1 = variables.Variable(initializer, dtype=dtypes.float32)
       self.assertEqual(shape, v1.get_shape())
       self.assertEqual(shape, v1.shape)
-      self.assertAllClose(value, v1.initial_value.eval())
+      self.assertAllClose(value, self.evaluate(v1.initial_value))
       with self.assertRaises(errors_impl.FailedPreconditionError):
         self.evaluate(v1)
 
@@ -439,11 +439,11 @@ class VariablesTestCase(test.TestCase):
           math_ops.negative(v1.initialized_value()), dtype=dtypes.float32)
       self.assertEqual(v1.get_shape(), v2.get_shape())
       self.assertEqual(v1.shape, v2.shape)
-      self.assertAllClose(np.negative(value), v2.initial_value.eval())
+      self.assertAllClose(np.negative(value), self.evaluate(v2.initial_value))
 
       with self.assertRaises(errors_impl.FailedPreconditionError):
         self.evaluate(v2)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(np.negative(value), self.evaluate(v2))
 
   def testConstraintArg(self):
@@ -465,10 +465,10 @@ class VariablesTestCase(test.TestCase):
       a = variables.Variable([1, 2, 3], dtype=dtypes.float32)
       b = variables.Variable(a.initialized_value() + 2)
       c = variables.Variable(b.initialized_value() + 2)
-      variables.global_variables_initializer().run()
-      self.assertAllEqual(a.eval(), [1, 2, 3])
-      self.assertAllEqual(b.eval(), [3, 4, 5])
-      self.assertAllEqual(c.eval(), [5, 6, 7])
+      self.evaluate(variables.global_variables_initializer())
+      self.assertAllEqual(self.evaluate(a), [1, 2, 3])
+      self.assertAllEqual(self.evaluate(b), [3, 4, 5])
+      self.assertAllEqual(self.evaluate(c), [5, 6, 7])
 
   @test_util.run_deprecated_v1
   def testInitializerFunctionDevicePlacement(self):
@@ -503,7 +503,7 @@ class VariablesTestCase(test.TestCase):
       # initialized_value should not rerun the initializer_op if the variable
       # has already been initialized elsewhere.
       self.evaluate(v.assign(1.0))
-      self.assertEqual(1.0, v.initialized_value().eval())
+      self.assertEqual(1.0, self.evaluate(v.initialized_value()))
 
     v_def.ClearField("initial_value_name")
     with ops.Graph().as_default(), self.cached_session() as sess:
@@ -537,7 +537,7 @@ class VariablesTestCase(test.TestCase):
   def testLoad(self):
     with self.cached_session():
       var = variables.Variable(np.zeros((5, 5), np.float32))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       var.load(np.ones((5, 5), np.float32))
 
       self.assertAllClose(np.ones((5, 5), np.float32), self.evaluate(var))
@@ -573,7 +573,7 @@ class IsInitializedTest(test.TestCase):
       _ = v, w
       uninited = variables.report_uninitialized_variables()
       self.assertAllEqual(np.array([b"v", b"w"]), self.evaluate(uninited))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual(0, self.evaluate(uninited).size)
 
   @test_util.run_v1_only("b/120545219")
@@ -601,20 +601,20 @@ class IsInitializedTest(test.TestCase):
       b = variables.Variable(array_ops.ones([2, 2]))
       objective = math_ops.reduce_sum(b + math_ops.matmul(
           a, a, transpose_a=True))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       do_opt = gradient_descent.GradientDescentOptimizer(0.1).minimize(
           objective)
       self.evaluate([do_opt])
       self.assertAllClose([[0.9, 0.9], [0.9, 0.9]], self.evaluate(b))
 
 
+@test_util.run_v1_only("b/120545219")
 class ObsoleteIsInitializedTest(test.TestCase):
 
   def testNoVars(self):
     with ops.Graph().as_default():
       self.assertEqual(None, variables.assert_variables_initialized())
 
-  @test_util.run_v1_only("b/120545219")
   def testVariables(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.VariableV1([1, 2])
@@ -623,10 +623,9 @@ class ObsoleteIsInitializedTest(test.TestCase):
       inited = variables.assert_variables_initialized()
       with self.assertRaisesOpError("Attempting to use uninitialized value"):
         self.evaluate(inited)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.evaluate(inited)
 
-  @test_util.run_v1_only("b/120545219")
   def testVariableList(self):
     with ops.Graph().as_default(), self.cached_session() as sess:
       v = variables.VariableV1([1, 2])
@@ -766,36 +765,36 @@ class PartitionedVariableTest(test.TestCase):
       assign_list = pv_1.assign([c_0, c_1])
       assign_part_value = pv_1.assign_add(assign_ones)
       assign_part_var = pv_1.assign_sub(pv_0)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
-      self.assertEqual([1.0], plus_delta[0].eval())
+      self.assertEqual([1.0], self.evaluate(plus_delta[0]))
       self.assertEqual([1.0], self.evaluate(v0))
-      self.assertEqual([3.0], plus_delta[1].eval())
+      self.assertEqual([3.0], self.evaluate(plus_delta[1]))
       self.assertEqual([3.0], self.evaluate(v1))
 
-      self.assertEqual([-2.0], minus_delta[0].eval())
+      self.assertEqual([-2.0], self.evaluate(minus_delta[0]))
       self.assertEqual([-2.0], self.evaluate(v0))
-      self.assertEqual([-1.0], minus_delta[1].eval())
+      self.assertEqual([-1.0], self.evaluate(minus_delta[1]))
       self.assertEqual([-1.0], self.evaluate(v1))
 
-      self.assertEqual([1.0], assign_ones[0].eval())
+      self.assertEqual([1.0], self.evaluate(assign_ones[0]))
       self.assertEqual([1.0], self.evaluate(v0))
-      self.assertEqual([1.0], assign_ones[1].eval())
+      self.assertEqual([1.0], self.evaluate(assign_ones[1]))
       self.assertEqual([1.0], self.evaluate(v1))
 
-      self.assertEqual([2.0], assign_list[0].eval())
+      self.assertEqual([2.0], self.evaluate(assign_list[0]))
       self.assertEqual([2.0], self.evaluate(v2))
-      self.assertEqual([3.0], assign_list[1].eval())
+      self.assertEqual([3.0], self.evaluate(assign_list[1]))
       self.assertEqual([3.0], self.evaluate(v3))
 
-      self.assertEqual([3.0], assign_part_value[0].eval())
+      self.assertEqual([3.0], self.evaluate(assign_part_value[0]))
       self.assertEqual([3.0], self.evaluate(v2))
-      self.assertEqual([4.0], assign_part_value[1].eval())
+      self.assertEqual([4.0], self.evaluate(assign_part_value[1]))
       self.assertEqual([4.0], self.evaluate(v3))
 
-      self.assertEqual([2.0], assign_part_var[0].eval())
+      self.assertEqual([2.0], self.evaluate(assign_part_var[0]))
       self.assertEqual([2.0], self.evaluate(v2))
-      self.assertEqual([3.0], assign_part_var[1].eval())
+      self.assertEqual([3.0], self.evaluate(assign_part_var[1]))
       self.assertEqual([3.0], self.evaluate(v3))
 
 
diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py
index 0c18b7208f..f1dd4f529f 100644
--- a/tensorflow/python/ops/control_flow_ops_test.py
+++ b/tensorflow/python/ops/control_flow_ops_test.py
@@ -565,7 +565,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
                                         strict=strict)
 
     with self.cached_session() as sess:
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       true_feed_dict = {condition: True}
       true_feed_dict.update(feed_dict)
       result_cond, result_case = sess.run([output_cond, output_case],
diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py
index abdcbc7a3a..c53afef63b 100644
--- a/tensorflow/python/ops/gradients_test.py
+++ b/tensorflow/python/ops/gradients_test.py
@@ -1027,7 +1027,7 @@ class CustomGradientTest(test_util.TensorFlowTestCase):
           conditional, lambda: alpha * 2, lambda: alpha * 3)
 
       g, = gradients_impl.gradients(output, alpha)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllEqual(g.eval(), [2.0])
       self.assertAllEqual(g.eval(feed_dict={conditional: False}), [3.0])
 
diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py
index 8d94c7c989..e36b8b30bf 100644
--- a/tensorflow/python/saved_model/saved_model_test.py
+++ b/tensorflow/python/saved_model/saved_model_test.py
@@ -1084,7 +1084,7 @@ class SavedModelTest(SavedModelTestBase):
       # CheckpointedOp is a key-value table that can be saved across sessions.
       # The table register itself in SAVEABLE_OBJECTS collection.
       v1 = saver_test_utils.CheckpointedOp(name="v1")
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       v1.insert("k1", 3.0).run()
       # Once the table is restored, we can access it through this reference.
       ops.add_to_collection("table_ref", v1.table_ref)
diff --git a/tensorflow/python/training/adagrad_test.py b/tensorflow/python/training/adagrad_test.py
index 1e2d29b337..3528fdaa8b 100644
--- a/tensorflow/python/training/adagrad_test.py
+++ b/tensorflow/python/training/adagrad_test.py
@@ -106,7 +106,7 @@ class AdagradOptimizerTest(test.TestCase):
         pred = math_ops.matmul(embedding_ops.embedding_lookup([var0], [0]), x)
         loss = pred * pred
         sgd_op = adagrad.AdagradOptimizer(1.0).minimize(loss)
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         # Fetch params to validate initial values
         self.assertAllCloseAccordingToType([[1.0, 2.0], [3.0, 4.0]],
                                            self.evaluate(var0))
@@ -129,7 +129,7 @@ class AdagradOptimizerTest(test.TestCase):
             constant_op.constant(3.0), initial_accumulator_value=0.1)
         ada_update = ada_opt.apply_gradients(
             zip([grads0, grads1], [var0, var1]))
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         # Fetch params to validate initial values
         self.assertAllClose([1.0, 2.0], self.evaluate(var0))
         self.assertAllClose([3.0, 4.0], self.evaluate(var1))
@@ -163,7 +163,7 @@ class AdagradOptimizerTest(test.TestCase):
         ada_opt = adagrad.AdagradOptimizer(3.0, initial_accumulator_value=0.1)
         ada_update = ada_opt.apply_gradients(
             zip([grads0, grads1], [var0, var1]))
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         # Fetch params to validate initial values
         self.assertAllClose([[1.0], [2.0]], self.evaluate(var0))
         self.assertAllClose([[3.0], [4.0]], self.evaluate(var1))
@@ -198,7 +198,7 @@ class AdagradOptimizerTest(test.TestCase):
             [(grad_repeated_index, repeated_index_update_var)])
         aggregated_update = adagrad.AdagradOptimizer(3.0).apply_gradients(
             [(grad_aggregated, aggregated_update_var)])
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllClose(aggregated_update_var.eval(),
                             self.evaluate(repeated_index_update_var))
         for _ in range(3):
@@ -223,7 +223,7 @@ class AdagradOptimizerTest(test.TestCase):
             2.0).minimize(loss_repeated)
         update_op_aggregated = adagrad.AdagradOptimizer(
             2.0).minimize(loss_aggregated)
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         self.assertAllCloseAccordingToType(
             self.evaluate(var_repeated), self.evaluate(var_aggregated))
         for _ in range(3):
@@ -289,7 +289,7 @@ class AdagradOptimizerTest(test.TestCase):
         self.assertEquals(slot0.get_shape(), var0.get_shape())
         slot1 = ada_opt.get_slot(var1, "accumulator")
         self.assertEquals(slot1.get_shape(), var1.get_shape())
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
 
         # Fetch params to validate initial values.
         self.assertAllClose([1.0, 2.0], self.evaluate(var0))
diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py
index c481547139..a0fd2dc6ba 100644
--- a/tensorflow/python/training/checkpoint_ops_test.py
+++ b/tensorflow/python/training/checkpoint_ops_test.py
@@ -154,7 +154,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
         partitioner=partitioned_variables.fixed_size_partitioner(2))
 
     with self.cached_session():
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(expected_remapped_matrix,
                           remapped_matrix.as_tensor().eval())
 
@@ -188,7 +188,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
         partitioner=partitioned_variables.fixed_size_partitioner(2))
 
     with self.cached_session():
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(expected_remapped_matrix,
                           remapped_matrix.as_tensor().eval())
 
@@ -226,7 +226,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
         partitioner=partitioned_variables.fixed_size_partitioner(2))
 
     with self.cached_session():
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(expected_remapped_matrix,
                           remapped_matrix.as_tensor().eval())
 
@@ -262,7 +262,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
         partitioner=partitioned_variables.fixed_size_partitioner(2))
 
     with self.cached_session():
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(expected_remapped_matrix,
                           remapped_matrix.as_tensor().eval())
 
@@ -296,7 +296,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
         partitioner=partitioned_variables.fixed_size_partitioner(2))
 
     with self.cached_session():
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(expected_remapped_embeddings,
                           remapped_embeddings.as_tensor().eval())
 
@@ -342,7 +342,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
         partitioner=partitioned_variables.fixed_size_partitioner(2))
 
     with self.cached_session():
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(expected_remapped_embeddings,
                           remapped_embeddings.as_tensor().eval())
 
@@ -380,7 +380,7 @@ class LoadAndRemapWrappersTest(test.TestCase):
         partitioner=partitioned_variables.fixed_size_partitioner(2))
 
     with self.cached_session():
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(expected_remapped_embeddings,
                           remapped_embeddings.as_tensor().eval())
 
diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py
index d89f5f3bbd..5efc15d56f 100644
--- a/tensorflow/python/training/input_test.py
+++ b/tensorflow/python/training/input_test.py
@@ -58,7 +58,7 @@ class MatchFilenamesOnceTest(test_lib.TestCase):
       question = inp.match_filenames_once(
           os.path.join(self.get_temp_dir(), "match_filenames.?"))
       one = inp.match_filenames_once(additional[1])
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       self.assertItemsEqual(
           map(compat.as_bytes, filenames), self.evaluate(star))
@@ -84,7 +84,7 @@ class LimitEpochsTest(test_lib.TestCase):
     with self.cached_session():
       love_me = constant_op.constant("Love Me")
       love_me_two_times = inp.limit_epochs(love_me, num_epochs=2)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       self.assertEqual(b"Love Me", self.evaluate(love_me_two_times))
       self.assertEqual(b"Love Me", self.evaluate(love_me_two_times))
@@ -105,7 +105,7 @@ class InputProducerTest(test_lib.TestCase):
           input_tensor, num_epochs=num_epochs, shuffle=False)
       dequeue_many = queue.dequeue_many(len(input_tensor) * num_epochs)
       dequeue = queue.dequeue()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -132,7 +132,7 @@ class InputProducerTest(test_lib.TestCase):
           input_tensor, element_shape=[4], num_epochs=num_epochs, shuffle=False)
       dequeue_many = queue.dequeue_many(len(input_value) * num_epochs)
       dequeue = queue.dequeue()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -163,7 +163,7 @@ class StringInputProducerTest(test_lib.TestCase):
           strings, num_epochs=num_epochs, shuffle=False)
       dequeue_many = queue.dequeue_many(len(strings) * num_epochs)
       dequeue = queue.dequeue()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -186,7 +186,7 @@ class StringInputProducerTest(test_lib.TestCase):
           strings, num_epochs=num_epochs, shuffle=True, seed=271828)
       dequeue_many = queue.dequeue_many(len(strings))
       dequeue = queue.dequeue()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -234,7 +234,7 @@ class StringInputProducerTest(test_lib.TestCase):
           constant_op.constant(
               [], dtype=dtypes.string))
       dequeue = queue.dequeue()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners(coord=coord)
       with self.assertRaises(errors_impl.OutOfRangeError):
@@ -284,7 +284,7 @@ class RangeInputProducerTest(test_lib.TestCase):
           range_size, num_epochs=num_epochs, shuffle=False)
       dequeue_many = queue.dequeue_many(range_size * num_epochs)
       dequeue = queue.dequeue()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -307,7 +307,7 @@ class RangeInputProducerTest(test_lib.TestCase):
           range_size, num_epochs=num_epochs, shuffle=True, seed=314159)
       dequeue_many = queue.dequeue_many(range_size)
       dequeue = queue.dequeue()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -358,7 +358,7 @@ class SliceInputProducerTest(test_lib.TestCase):
       source_ints = [2, 3, 5, 7]
       slices = inp.slice_input_producer(
           [source_strings, source_ints], num_epochs=num_epochs, shuffle=False)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -386,7 +386,7 @@ class SliceInputProducerTest(test_lib.TestCase):
           num_epochs=num_epochs,
           shuffle=True,
           seed=161803)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -487,7 +487,7 @@ class BatchTest(test_lib.TestCase):
         batched = inp.batch(
             [counter, sparse_counter, "string"], batch_size=batch_size)
         batched_fetch = batched
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -555,7 +555,7 @@ class BatchTest(test_lib.TestCase):
       counter = examples.count_up_to(num_batches * batch_size)
       string = array_ops.tile(["string"],
                               math_ops.to_int32(array_ops.stack([counter])))
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       batched = inp.batch(
           [counter, string], batch_size=batch_size, dynamic_pad=True)
@@ -590,7 +590,7 @@ class BatchTest(test_lib.TestCase):
           dense_shape=[1])
       pre_batched = inp.batch([counter, sparse_counter, "string"], batch_size=2)
       batched = inp.batch(pre_batched, enqueue_many=True, batch_size=batch_size)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -629,7 +629,7 @@ class BatchTest(test_lib.TestCase):
           [counter, sparse_counter, "string"],
           batch_size=batch_size,
           num_threads=4)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -672,7 +672,7 @@ class BatchTest(test_lib.TestCase):
           [counter, sparse_counter, "string"],
           batch_size=batch_size,
           allow_smaller_final_batch=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -730,7 +730,7 @@ class BatchTest(test_lib.TestCase):
           batch_size=batch_size,
           num_threads=4,
           allow_smaller_final_batch=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1058,7 +1058,7 @@ class BatchJoinTest(test_lib.TestCase):
                           batched_fetch[1].dense_shape.get_shape().as_list())
       self.assertAllEqual((batch_size,), batched_fetch[2].get_shape().as_list())
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1157,7 +1157,7 @@ class BatchJoinTest(test_lib.TestCase):
       self.assertAllEqual((batch_size,), batched[0].get_shape().as_list())
       self.assertAllEqual((batch_size, None), batched[1].get_shape().as_list())
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1244,7 +1244,7 @@ class BatchJoinTest(test_lib.TestCase):
       self.assertAllEqual((2,), batched[1].dense_shape.get_shape().as_list())
       self.assertAllEqual((None,), batched[2].get_shape().as_list())
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1339,7 +1339,7 @@ class BatchJoinTest(test_lib.TestCase):
       self.assertAllEqual((None,), batched[0].get_shape().as_list())
       self.assertAllEqual((None, None), batched[1].get_shape().as_list())
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1644,7 +1644,7 @@ class ShuffleBatchTest(test_lib.TestCase):
             min_after_dequeue=16,
             seed=141421)
         batched_fetch = batched
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1702,7 +1702,7 @@ class ShuffleBatchTest(test_lib.TestCase):
           seed=141421,
           allow_smaller_final_batch=True)
       batched_fetch = batched
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1756,7 +1756,7 @@ class ShuffleBatchTest(test_lib.TestCase):
           min_after_dequeue=16,
           seed=173205,
           num_threads=4)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -1807,7 +1807,7 @@ class ShuffleBatchTest(test_lib.TestCase):
           seed=173205,
           num_threads=4,
           allow_smaller_final_batch=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -2070,7 +2070,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
                           batched_fetch[1].dense_shape.get_shape().as_list())
       self.assertAllEqual((batch_size,), batched_fetch[2].get_shape().as_list())
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
@@ -2165,7 +2165,7 @@ class ShuffleBatchJoinTest(test_lib.TestCase):
       self.assertAllEqual((2,), batched[1].dense_shape.get_shape().as_list())
       self.assertAllEqual((None,), batched[2].get_shape().as_list())
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       variables.local_variables_initializer().run()
       threads = queue_runner_impl.start_queue_runners()
 
diff --git a/tensorflow/python/training/moving_averages_test.py b/tensorflow/python/training/moving_averages_test.py
index 03bcde9c84..0a7cff4f56 100644
--- a/tensorflow/python/training/moving_averages_test.py
+++ b/tensorflow/python/training/moving_averages_test.py
@@ -43,7 +43,7 @@ class MovingAveragesTest(test.TestCase):
       decay = 0.25
       assign = moving_averages.assign_moving_average(
           var, val, decay, zero_debias=False)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose([10.0, 11.0], self.evaluate(var))
       assign.op.run()
       self.assertAllClose(
@@ -57,7 +57,7 @@ class MovingAveragesTest(test.TestCase):
       val = constant_op.constant([1.0, 2.0], dtypes.float32)
       decay = 0.25
       assign = moving_averages.assign_moving_average(var, val, decay)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllClose([0.0, 0.0], self.evaluate(var))
       assign.op.run()
       self.assertAllClose(
@@ -98,7 +98,7 @@ class MovingAveragesTest(test.TestCase):
       val = array_ops.placeholder(dtypes.float32, [])
 
       wma = moving_averages.weighted_moving_average(val, decay, weight)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Get the first weighted moving average.
       val_1 = 3.0
@@ -125,7 +125,7 @@ class MovingAveragesTest(test.TestCase):
       val = array_ops.placeholder(dtypes.bfloat16, [])
 
       wma = moving_averages.weighted_moving_average(val, decay, weight)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Get the first weighted moving average.
       val_1 = 3.0
@@ -164,7 +164,7 @@ class ExponentialMovingAverageTest(test.TestCase):
     thirties = _Repeat(30.0, dim)
     var0 = variables.Variable(tens, name="v0")
     var1 = variables.Variable(thirties, name="v1")
-    variables.global_variables_initializer().run()
+    self.evaluate(variables.global_variables_initializer())
     # Note that tensor2 is not a Variable but just a plain Tensor resulting
     # from the sum operation.
     tensor2 = var0 + var1
@@ -178,7 +178,7 @@ class ExponentialMovingAverageTest(test.TestCase):
     self.assertFalse(avg0 in variables.trainable_variables())
     self.assertFalse(avg1 in variables.trainable_variables())
     self.assertFalse(avg2 in variables.trainable_variables())
-    variables.global_variables_initializer().run()
+    self.evaluate(variables.global_variables_initializer())
 
     self.assertEqual("v0/ExponentialMovingAverage:0", avg0.name)
     self.assertEqual("v1/ExponentialMovingAverage:0", avg1.name)
diff --git a/tensorflow/python/training/queue_runner_test.py b/tensorflow/python/training/queue_runner_test.py
index 2f6e924f98..c5085079b7 100644
--- a/tensorflow/python/training/queue_runner_test.py
+++ b/tensorflow/python/training/queue_runner_test.py
@@ -49,7 +49,7 @@ class QueueRunnerTest(test.TestCase):
       var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       qr = queue_runner_impl.QueueRunner(queue, [count_up_to])
       threads = qr.create_threads(sess)
       self.assertEqual(sorted(t.name for t in threads),
@@ -77,7 +77,7 @@ class QueueRunnerTest(test.TestCase):
       self.assertEqual(sorted(t.name for t in threads),
                        ["QueueRunnerThread-fifo_queue-CountUpTo:0",
                         "QueueRunnerThread-fifo_queue-CountUpTo_1:0"])
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       for t in threads:
         t.start()
       for t in threads:
@@ -93,7 +93,7 @@ class QueueRunnerTest(test.TestCase):
       qr = queue_runner_impl.QueueRunner(queue, [_MockOp("i fail"),
                                                  _MockOp("so fail")])
       threads = qr.create_threads(sess)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       for t in threads:
         t.start()
       for t in threads:
@@ -140,7 +140,7 @@ class QueueRunnerTest(test.TestCase):
       var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       qr = queue_runner_impl.QueueRunner(queue, [count_up_to])
       # As the coordinator to stop.  The queue runner should
       # finish immediately.
@@ -196,7 +196,7 @@ class QueueRunnerTest(test.TestCase):
         var = variables.VariableV1(zero64)
         count_up_to = var.count_up_to(3)
         queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         coord = coordinator.Coordinator()
         qr = queue_runner_impl.QueueRunner(queue, [count_up_to])
         # NOTE that this test does not actually start the threads.
@@ -212,7 +212,7 @@ class QueueRunnerTest(test.TestCase):
       var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       coord = coordinator.Coordinator()
       qr = queue_runner_impl.QueueRunner(queue, [count_up_to])
       threads = []
@@ -229,7 +229,7 @@ class QueueRunnerTest(test.TestCase):
       var = variables.VariableV1(zero64)
       count_up_to = var.count_up_to(3)
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       qr = queue_runner_impl.QueueRunner(queue, [count_up_to,
                                                  _MockOp("bad_op")])
       threads = qr.create_threads(sess, start=True)
diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py
index 95c21cb815..d1b51adaa4 100644
--- a/tensorflow/python/training/saver_test.py
+++ b/tensorflow/python/training/saver_test.py
@@ -124,8 +124,8 @@ class SaverTest(test.TestCase):
       if not context.executing_eagerly():
         self.assertEqual(
             len(variables.report_uninitialized_variables().eval()), 2)
-        self.assertEqual(0, len(v2.keys().eval()))
-        self.assertEqual(0, len(v2.values().eval()))
+        self.assertEqual(0, len(self.evaluate(v2.keys())))
+        self.assertEqual(0, len(self.evaluate(v2.values())))
       # Restore the saved values in the parameter nodes.
       save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable})
       save.restore(sess, save_path)
@@ -331,10 +331,10 @@ class SaverTest(test.TestCase):
       self.evaluate(init_all_op)
 
       # Check that the parameter nodes have been initialized.
-      self.assertEqual(10.0, v0.eval())
-      self.assertEqual(20.0, v1.eval())
-      self.assertEqual(b"k1", v2.keys().eval())
-      self.assertEqual(30.0, v2.values().eval())
+      self.assertEqual(10.0, self.evaluate(v0))
+      self.assertEqual(20.0, self.evaluate(v1))
+      self.assertEqual(b"k1", self.evaluate(v2.keys()))
+      self.assertEqual(30.0, self.evaluate(v2.values()))
 
       # Save the initialized values in the file at "save_path"
       val = save.save(sess, save_path1)
@@ -360,16 +360,16 @@ class SaverTest(test.TestCase):
       # Assert that the variables are not initialized.
       self.assertEqual(
           len(variables.report_uninitialized_variables().eval()), 2)
-      self.assertEqual(0, len(v2.keys().eval()))
-      self.assertEqual(0, len(v2.values().eval()))
+      self.assertEqual(0, len(self.evaluate(v2.keys())))
+      self.assertEqual(0, len(self.evaluate(v2.values())))
 
       # Restore the saved values in the parameter nodes.
       save.restore(sess, save_path2)
       # Check that the parameter nodes have been restored.
-      self.assertEqual(10.0, v0.eval())
-      self.assertEqual(20.0, v1.eval())
-      self.assertEqual(b"k1", v2.keys().eval())
-      self.assertEqual(30.0, v2.values().eval())
+      self.assertEqual(10.0, self.evaluate(v0))
+      self.assertEqual(20.0, self.evaluate(v1))
+      self.assertEqual(b"k1", self.evaluate(v2.keys()))
+      self.assertEqual(30.0, self.evaluate(v2.values()))
 
   @test_util.run_deprecated_v1
   def testFilenameTensor(self):
@@ -398,7 +398,7 @@ class SaverTest(test.TestCase):
       # Build a graph with 1 node, and save and restore for them.
       v = variables.VariableV1(np.int64(15), name="v")
       save = saver_module.Saver({"v": v}, restore_sequentially=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Save the initialized values in the file at "save_path"
       val = save.save(sess, save_path)
@@ -416,7 +416,7 @@ class SaverTest(test.TestCase):
       # Restore the saved values in the parameter nodes.
       save.restore(sess, save_path)
       # Check that the parameter nodes have been restored.
-      self.assertEqual(np.int64(15), v.eval())
+      self.assertEqual(np.int64(15), self.evaluate(v))
 
   def testSomeErrors(self):
     with ops_lib.Graph().as_default():
@@ -478,14 +478,14 @@ class SaverTest(test.TestCase):
       v2 = saver_test_utils.CheckpointedOp(name="v2")
       v2_init = v2.insert("k1", 30.0)
       save = saver_module.Saver([v0, v1, v2.saveable])
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       v2_init.run()
 
       # Check that the parameter nodes have been initialized.
-      self.assertEqual(10.0, v0.eval())
-      self.assertEqual(20.0, v1.eval())
-      self.assertEqual(b"k1", v2.keys().eval())
-      self.assertEqual(30.0, v2.values().eval())
+      self.assertEqual(10.0, self.evaluate(v0))
+      self.assertEqual(20.0, self.evaluate(v1))
+      self.assertEqual(b"k1", self.evaluate(v2.keys()))
+      self.assertEqual(30.0, self.evaluate(v2.values()))
 
       # Save the initialized values in the file at "save_path"
       val = save.save(sess, save_path)
@@ -506,16 +506,16 @@ class SaverTest(test.TestCase):
       with self.assertRaisesWithPredicateMatch(
           errors_impl.OpError, lambda e: "uninitialized value v1" in e.message):
         self.evaluate(v1)
-      self.assertEqual(0, len(v2.keys().eval()))
-      self.assertEqual(0, len(v2.values().eval()))
+      self.assertEqual(0, len(self.evaluate(v2.keys())))
+      self.assertEqual(0, len(self.evaluate(v2.values())))
 
       # Restore the saved values in the parameter nodes.
       save.restore(sess, save_path)
       # Check that the parameter nodes have been restored.
-      self.assertEqual(10.0, v0.eval())
-      self.assertEqual(20.0, v1.eval())
-      self.assertEqual(b"k1", v2.keys().eval())
-      self.assertEqual(30.0, v2.values().eval())
+      self.assertEqual(10.0, self.evaluate(v0))
+      self.assertEqual(20.0, self.evaluate(v1))
+      self.assertEqual(b"k1", self.evaluate(v2.keys()))
+      self.assertEqual(30.0, self.evaluate(v2.values()))
 
     # Build another graph with 2 nodes, initialized
     # differently, and a Restore node for them.
@@ -525,20 +525,20 @@ class SaverTest(test.TestCase):
       v2_2 = saver_test_utils.CheckpointedOp(name="v2")
       save2 = saver_module.Saver([v0_2, v1_2, v2_2.saveable])
       v2_2.insert("k1000", 3000.0).run()
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Check that the parameter nodes have been initialized.
-      self.assertEqual(1000.0, v0_2.eval())
-      self.assertEqual(2000.0, v1_2.eval())
-      self.assertEqual(b"k1000", v2_2.keys().eval())
-      self.assertEqual(3000.0, v2_2.values().eval())
+      self.assertEqual(1000.0, self.evaluate(v0_2))
+      self.assertEqual(2000.0, self.evaluate(v1_2))
+      self.assertEqual(b"k1000", self.evaluate(v2_2.keys()))
+      self.assertEqual(3000.0, self.evaluate(v2_2.values()))
       # Restore the values saved earlier in the parameter nodes.
       save2.restore(sess, save_path)
       # Check that the parameter nodes have been restored.
-      self.assertEqual(10.0, v0_2.eval())
-      self.assertEqual(20.0, v1_2.eval())
-      self.assertEqual(b"k1", v2_2.keys().eval())
-      self.assertEqual(30.0, v2_2.values().eval())
+      self.assertEqual(10.0, self.evaluate(v0_2))
+      self.assertEqual(20.0, self.evaluate(v1_2))
+      self.assertEqual(b"k1", self.evaluate(v2_2.keys()))
+      self.assertEqual(30.0, self.evaluate(v2_2.values()))
 
   def _SaveAndLoad(self, var_name, var_value, other_value, save_path):
     with self.session(graph=ops_lib.Graph()) as sess:
@@ -582,14 +582,14 @@ class SaverTest(test.TestCase):
       with sess.graph.device(test.gpu_device_name()):
         v0_1 = variables.VariableV1(123.45)
       save = saver_module.Saver({"v0": v0_1})
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       save.save(sess, save_path)
 
     with session.Session("", graph=ops_lib.Graph()) as sess:
       with sess.graph.device(test.gpu_device_name()):
         v0_2 = variables.VariableV1(543.21)
       save = saver_module.Saver({"v0": v0_2})
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
   def testSharedServerOnGPU(self):
     if not test.is_gpu_available():
@@ -599,14 +599,14 @@ class SaverTest(test.TestCase):
       with sess.graph.device(test.gpu_device_name()):
         v0_1 = variables.VariableV1(123.45)
       save = saver_module.Saver({"v0": v0_1}, sharded=True, allow_empty=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       save.save(sess, save_path)
 
     with session.Session("", graph=ops_lib.Graph()) as sess:
       with sess.graph.device(test.gpu_device_name()):
         v0_2 = variables.VariableV1(543.21)
       save = saver_module.Saver({"v0": v0_2}, sharded=True, allow_empty=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
   def testVariables(self):
     save_path = os.path.join(self.get_temp_dir(), "variables")
@@ -627,10 +627,10 @@ class SaverTest(test.TestCase):
       # Saver with no arg, defaults to 'all variables'.
       save = saver_module.Saver()
       save.restore(sess, save_path)
-      self.assertAllClose(1.0, one.eval())
-      self.assertAllClose([2.0, 2.0, 2.0], twos.eval())
-      self.assertEqual(b"k1", v2.keys().eval())
-      self.assertEqual(3.0, v2.values().eval())
+      self.assertAllClose(1.0, self.evaluate(one))
+      self.assertAllClose([2.0, 2.0, 2.0], self.evaluate(twos))
+      self.assertEqual(b"k1", self.evaluate(v2.keys()))
+      self.assertEqual(3.0, self.evaluate(v2.values()))
 
   def testVarListShouldBeEmptyInDeferredBuild(self):
     with ops_lib.Graph().as_default():
@@ -664,8 +664,8 @@ class SaverTest(test.TestCase):
       # Saver with no arg, defaults to 'all variables'.
       save = saver_module.Saver()
       save.restore(sess, save_path)
-      self.assertAllClose(1.0, one.eval())
-      self.assertAllClose([2.0, 2.0, 2.0], twos.eval())
+      self.assertAllClose(1.0, self.evaluate(one))
+      self.assertAllClose([2.0, 2.0, 2.0], self.evaluate(twos))
 
   @test_util.run_v1_only("b/120545219")
   def testReshape(self):
@@ -691,7 +691,8 @@ class SaverTest(test.TestCase):
       var = variables.VariableV1([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
       save = saver_module.Saver(reshape=True)
       save.restore(sess, save_path)
-      self.assertAllClose([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], var.eval())
+      self.assertAllClose([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
+                          self.evaluate(var))
 
   @test_util.run_in_graph_and_eager_modes
   def testSaveWithGlobalStep(self, pad_step_number=False):
@@ -726,7 +727,6 @@ class SaverTest(test.TestCase):
   def testSaveWithGlobalStepWithPadding(self):
     self.testSaveWithGlobalStep(pad_step_number=True)
 
-  @test_util.run_v1_only("b/120545219")
   def testSaveToNonexistingPath(self):
     file_io.write_string_to_file(
         os.path.join(self.get_temp_dir(), "actually_a_file"), "")
@@ -753,8 +753,8 @@ class SaverTest(test.TestCase):
           self.evaluate(init_all_op)
 
           # Check that the parameter nodes have been initialized.
-          self.assertEqual(10.0, v0.eval())
-          self.assertEqual(20.0, v1.eval())
+          self.assertEqual(10.0, self.evaluate(v0))
+          self.assertEqual(20.0, self.evaluate(v1))
 
           # Save the graph.
           save.save(sess, save_path)
@@ -763,13 +763,12 @@ class SaverTest(test.TestCase):
           # Restore the saved values in the parameter nodes.
           save.restore(sess, save_path)
           # Check that the parameter nodes have been restored.
-          self.assertEqual(10.0, v0.eval())
-          self.assertEqual(20.0, v1.eval())
+          self.assertEqual(10.0, self.evaluate(v0))
+          self.assertEqual(20.0, self.evaluate(v1))
       except ValueError as exc:
         error_msg_template = "Parent directory of {} doesn't exist, can't save."
         self.assertEqual(error_msg_template.format(save_path), str(exc))
 
-  @test_util.run_deprecated_v1
   def testSaveToURI(self):
     # ParseURI functions don't work on Windows yet.
     # TODO(jhseu): Remove this check when it works.
@@ -789,8 +788,8 @@ class SaverTest(test.TestCase):
       self.evaluate(init_all_op)
 
       # Check that the parameter nodes have been initialized.
-      self.assertEqual(10.0, v0.eval())
-      self.assertEqual(20.0, v1.eval())
+      self.assertEqual(10.0, self.evaluate(v0))
+      self.assertEqual(20.0, self.evaluate(v1))
       save.save(sess, save_path)
 
   def testSaveRestoreAndValidateVariableDtype(self):
@@ -835,7 +834,7 @@ class SaverTest(test.TestCase):
       orig_vars = _model()
       self.evaluate(variables.global_variables_initializer())
       save = saver_module.Saver(max_to_keep=1)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       save.save(sess, save_dir)
       orig_vals = self.evaluate(orig_vars)
 
@@ -882,7 +881,7 @@ class SaveRestoreShardedTest(test.TestCase):
           },
           write_version=self._WRITE_VERSION,
           sharded=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       t0.insert("k1", 30.0).run()
       t1.insert("k2", 40.0).run()
       val = save.save(sess, save_path)
@@ -908,15 +907,15 @@ class SaveRestoreShardedTest(test.TestCase):
             },
             write_version=self._WRITE_VERSION,
             sharded=True)
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         t0.insert("k11", 33.0).run()
-        self.assertEqual(111, v0.eval())
-        self.assertEqual(b"k11", t0.keys().eval())
-        self.assertEqual(33.0, t0.values().eval())
+        self.assertEqual(111, self.evaluate(v0))
+        self.assertEqual(b"k11", self.evaluate(t0.keys()))
+        self.assertEqual(33.0, self.evaluate(t0.values()))
         save.restore(sess, save_path + "-00000-of-00002")
-        self.assertEqual(10, v0.eval())
-        self.assertEqual(b"k1", t0.keys().eval())
-        self.assertEqual(30.0, t0.values().eval())
+        self.assertEqual(10, self.evaluate(v0))
+        self.assertEqual(b"k1", self.evaluate(t0.keys()))
+        self.assertEqual(30.0, self.evaluate(t0.values()))
 
       # Restore different ops from shard 1 of the saved files.
       with session.Session(
@@ -932,15 +931,15 @@ class SaveRestoreShardedTest(test.TestCase):
             },
             write_version=self._WRITE_VERSION,
             sharded=True)
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         t1.insert("k22", 44.0).run()
-        self.assertEqual(222, v1.eval())
-        self.assertEqual(b"k22", t1.keys().eval())
-        self.assertEqual(44.0, t1.values().eval())
+        self.assertEqual(222, self.evaluate(v1))
+        self.assertEqual(b"k22", self.evaluate(t1.keys()))
+        self.assertEqual(44.0, self.evaluate(t1.values()))
         save.restore(sess, save_path + "-00001-of-00002")
-        self.assertEqual(20, v1.eval())
-        self.assertEqual(b"k2", t1.keys().eval())
-        self.assertEqual(40.0, t1.values().eval())
+        self.assertEqual(20, self.evaluate(v1))
+        self.assertEqual(b"k2", self.evaluate(t1.keys()))
+        self.assertEqual(40.0, self.evaluate(t1.values()))
 
     # Now try a restore with the sharded filename.
     with session.Session(
@@ -961,26 +960,26 @@ class SaveRestoreShardedTest(test.TestCase):
           },
           write_version=self._WRITE_VERSION,
           sharded=True)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       t0.insert("k11", 33.0).run()
       t1.insert("k22", 44.0).run()
-      self.assertEqual(111, v0.eval())
-      self.assertEqual(222, v1.eval())
-      self.assertEqual(b"k11", t0.keys().eval())
-      self.assertEqual(33.0, t0.values().eval())
-      self.assertEqual(b"k22", t1.keys().eval())
-      self.assertEqual(44.0, t1.values().eval())
+      self.assertEqual(111, self.evaluate(v0))
+      self.assertEqual(222, self.evaluate(v1))
+      self.assertEqual(b"k11", self.evaluate(t0.keys()))
+      self.assertEqual(33.0, self.evaluate(t0.values()))
+      self.assertEqual(b"k22", self.evaluate(t1.keys()))
+      self.assertEqual(44.0, self.evaluate(t1.values()))
       save_path = os.path.join(self.get_temp_dir(), "sharded_basics")
       if save._write_version is saver_pb2.SaverDef.V1:
         save.restore(sess, save_path + "-?????-of-?????")
       else:
         save.restore(sess, save_path)
-      self.assertEqual(10, v0.eval())
-      self.assertEqual(20, v1.eval())
-      self.assertEqual(b"k1", t0.keys().eval())
-      self.assertEqual(30.0, t0.values().eval())
-      self.assertEqual(b"k2", t1.keys().eval())
-      self.assertEqual(40.0, t1.values().eval())
+      self.assertEqual(10, self.evaluate(v0))
+      self.assertEqual(20, self.evaluate(v1))
+      self.assertEqual(b"k1", self.evaluate(t0.keys()))
+      self.assertEqual(30.0, self.evaluate(t0.values()))
+      self.assertEqual(b"k2", self.evaluate(t1.keys()))
+      self.assertEqual(40.0, self.evaluate(t1.values()))
 
     if save._write_version is saver_pb2.SaverDef.V1:
       self.assertEqual(
@@ -1028,7 +1027,7 @@ class SaveRestoreShardedTest(test.TestCase):
           else:
             vs = [variables.VariableV1(rnd, name=var_name)]
 
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         if call_saver_with_dict:
           saver = saver_module.Saver({var_name: vs[0]})
         else:
@@ -1056,7 +1055,7 @@ class SaveRestoreShardedTest(test.TestCase):
                   name=var_name)
           ]
 
-        variables.global_variables_initializer().run()
+        self.evaluate(variables.global_variables_initializer())
         if call_saver_with_dict:
           saver = saver_module.Saver({
               var_name: new_vs[0]
@@ -1203,7 +1202,7 @@ class MaxToKeepTest(test.TestCase):
     with self.cached_session() as sess:
       v = variables.VariableV1(10.0, name="v")
       save = saver_module.Saver({"v": v}, max_to_keep=2)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual([], save.last_checkpoints)
 
       s1 = save.save(sess, os.path.join(save_dir, "s1"))
@@ -1388,7 +1387,7 @@ class MaxToKeepTest(test.TestCase):
               "v0": v0,
               "v1": v1
           }, sharded=True, max_to_keep=2)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertEqual([], save.last_checkpoints)
 
       s1 = save.save(sess, os.path.join(save_dir, "s1"))
@@ -1434,14 +1433,13 @@ class MaxToKeepTest(test.TestCase):
       self.assertTrue(
           gfile.Exists(checkpoint_management.meta_graph_filename(s3)))
 
-  @test_util.run_deprecated_v1
   def testNoMaxToKeep(self):
     save_dir = self._get_test_dir("no_max_to_keep")
     save_dir2 = self._get_test_dir("max_to_keep_0")
 
     with self.cached_session() as sess:
       v = variables.VariableV1(10.0, name="v")
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Test max_to_keep being None.
       save = saver_module.Saver({"v": v}, max_to_keep=None)
@@ -1463,14 +1461,13 @@ class MaxToKeepTest(test.TestCase):
       self.assertEqual([], save2.last_checkpoints)
       self.assertTrue(checkpoint_management.checkpoint_exists(s2))
 
-  @test_util.run_deprecated_v1
   def testNoMetaGraph(self):
     save_dir = self._get_test_dir("no_meta_graph")
 
     with self.cached_session() as sess:
       v = variables.VariableV1(10.0, name="v")
       save = saver_module.Saver({"v": v})
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       s1 = save.save(sess, os.path.join(save_dir, "s1"), write_meta_graph=False)
       self.assertTrue(checkpoint_management.checkpoint_exists(s1))
@@ -1487,7 +1484,6 @@ class KeepCheckpointEveryNHoursTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   @test.mock.patch.object(saver_module, "time")
-  @test_util.run_deprecated_v1
   def testNonSharded(self, mock_time):
     save_dir = self._get_test_dir("keep_checkpoint_every_n_hours")
 
@@ -1607,7 +1603,6 @@ class SaveRestoreWithVariableNameMap(test.TestCase):
       self.assertEqual(20.0, self.evaluate(v1))
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_v1_only("b/120545219")
   def testNonReshapeResourceVariable(self):
     self._testNonReshape(resource_variable_ops.ResourceVariable)
 
@@ -1714,7 +1709,7 @@ class MetaGraphTest(test.TestCase):
       saver1 = saver_module.Saver({"v1": v1}, name="saver1")
       ops_lib.add_to_collection("savers", saver0)
       ops_lib.add_to_collection("savers", saver1)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       # Saves to different checkpoints.
       saver0.save(sess, saver0_ckpt)
       saver1.save(sess, saver1_ckpt)
@@ -1760,7 +1755,8 @@ class MetaGraphTest(test.TestCase):
       new_saver0.restore(sess, saver0_ckpt)
       v0 = sess.graph.get_tensor_by_name("v0:0")
       v1 = sess.graph.get_tensor_by_name("v1:0")
-      self.assertAllEqual([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], v0.eval())
+      self.assertAllEqual([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
+                          self.evaluate(v0))
       self.assertEqual([3, 2], v0.get_shape())
       self.assertEqual([], v1.get_shape())
       with self.assertRaisesWithPredicateMatch(
@@ -1770,7 +1766,7 @@ class MetaGraphTest(test.TestCase):
       new_saver1 = savers[1]
       new_saver1.restore(sess, saver1_ckpt)
       v1 = sess.graph.get_tensor_by_name("v1:0")
-      self.assertEqual(11.0, v1.eval())
+      self.assertEqual(11.0, self.evaluate(v1))
 
   @test_util.run_v1_only("b/120545219")
   def testMultiSaverCollection(self):
@@ -1794,7 +1790,7 @@ class MetaGraphTest(test.TestCase):
       saver1 = saver_module.Saver({"v1": v1}, name="saver1")
       ops_lib.add_to_collection("savers", saver0)
       ops_lib.add_to_collection("savers", saver1)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       # Saves to different checkpoints.
       saver0.save(sess, saver0_ckpt)
@@ -1878,7 +1874,7 @@ class MetaGraphTest(test.TestCase):
 
       # The names are different and will work.
       slice_saver = saver_module.Saver({"first": v1, "second": v2})
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       # Exports to meta_graph
       meta_graph_def = slice_saver.export_meta_graph(filename)
 
@@ -2093,7 +2089,6 @@ class MetaGraphTest(test.TestCase):
       return i + 1, x + r
     self._testWhileLoopAndGradientSerDes(body)
 
-  @test_util.run_deprecated_v1
   def testNestedControlFlowSerDes(self):
     # Test while loop in a cond in a while loop.
     # pylint: disable=g-long-lambda
@@ -2745,7 +2740,7 @@ class ScopedGraphTest(test.TestCase):
       graph.add_to_collection(ops_lib.GraphKeys.SAVERS, saver2)
 
     with self.session(graph=graph) as sess:
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       saver1.save(sess, saver1_ckpt, write_state=False)
       saver2.save(sess, saver2_ckpt, write_state=False)
 
@@ -2762,7 +2757,7 @@ class ScopedGraphTest(test.TestCase):
 
     with self.session(graph=graph1) as sess:
       saver_list1[0].restore(sess, saver1_ckpt)
-      self.assertEqual(1.0, var_dict1["variable1:0"].eval())
+      self.assertEqual(1.0, self.evaluate(var_dict1["variable1:0"]))
 
     graph2 = ops_lib.Graph()
     var_dict2 = meta_graph.copy_scoped_meta_graph(
@@ -2777,7 +2772,7 @@ class ScopedGraphTest(test.TestCase):
 
     with self.session(graph=graph2) as sess:
       saver_list2[0].restore(sess, saver2_ckpt)
-      self.assertEqual(2.0, var_dict2["variable2:0"].eval())
+      self.assertEqual(2.0, self.evaluate(var_dict2["variable2:0"]))
 
 
 class _OwnsAVariableSimple(checkpointable_base.CheckpointableBase):
@@ -3010,7 +3005,6 @@ class CheckpointableCompatibilityTests(test.TestCase):
             "a mismatch between the current graph and the graph"):
           a_saver.restore(sess=sess, save_path=save_path)
 
-  @test_util.run_v1_only("b/120545219")
   def testLoadFromObjectBasedGraph(self):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
diff --git a/tensorflow/python/training/slot_creator_test.py b/tensorflow/python/training/slot_creator_test.py
index f1f0d58a69..ec2eec3932 100644
--- a/tensorflow/python/training/slot_creator_test.py
+++ b/tensorflow/python/training/slot_creator_test.py
@@ -38,7 +38,7 @@ class SlotCreatorTest(test.TestCase):
       v = variables.Variable([1.0, 2.5], name="var")
       slot = slot_creator.create_slot(v, v.initialized_value(), name="slot")
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual("var/slot", slot.op.name)
       self.assertEqual([2], slot.get_shape().as_list())
@@ -51,7 +51,7 @@ class SlotCreatorTest(test.TestCase):
       v = constant_op.constant([1.0, 2.5], name="const")
       slot = slot_creator.create_slot(v, v * 2, name="slot")
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual("const/slot", slot.op.name)
       self.assertEqual([2], slot.get_shape().as_list())
@@ -66,7 +66,7 @@ class SlotCreatorTest(test.TestCase):
         slot = slot_creator.create_zeros_slot(
             v, name="slot", dtype=dtypes.float64)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual("var/slot", slot.op.name)
       self.assertEqual([2], slot.get_shape().as_list())
@@ -88,7 +88,7 @@ class SlotCreatorTest(test.TestCase):
         slot = slot_creator.create_zeros_slot(
             v, name="slot", dtype=dtypes.float64)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual("var/slot", slot.op.name)
       self.assertEqual([2], array_ops.shape(slot).eval())
@@ -102,7 +102,7 @@ class SlotCreatorTest(test.TestCase):
       with ops.control_dependencies(None):
         slot = slot_creator.create_zeros_slot(v, name="slot")
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual("const/slot", slot.op.name)
       self.assertEqual([2], slot.get_shape().as_list())
@@ -118,7 +118,7 @@ class SlotCreatorTest(test.TestCase):
         slot = slot_creator.create_zeros_slot(
             v, name="slot", dtype=dtypes.float64)
 
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertEqual("const/slot", slot.op.name)
       self.assertEqual([2], array_ops.shape(slot).eval())
diff --git a/tensorflow/python/training/training_ops_test.py b/tensorflow/python/training/training_ops_test.py
index ba0f40999b..8ba6abdcf9 100644
--- a/tensorflow/python/training/training_ops_test.py
+++ b/tensorflow/python/training/training_ops_test.py
@@ -53,7 +53,7 @@ class TrainingOpsTest(TensorFlowTestCase):
     self.setUp()
     with self.session(use_gpu=use_gpu):
       var = variables.VariableV1(x)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
       self.assertAllCloseAccordingToType(x, self.evaluate(var))
       apply_sgd = training_ops.apply_gradient_descent(var, alpha, delta)
       out = self.evaluate(apply_sgd)
@@ -74,7 +74,7 @@ class TrainingOpsTest(TensorFlowTestCase):
     with self.session(use_gpu=use_gpu):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertAllCloseAccordingToType(x, self.evaluate(var))
       apply_adagrad = training_ops.apply_adagrad(var, accum, lr, grad)
@@ -99,7 +99,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       linear = variables.VariableV1(z)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertAllCloseAccordingToType(x, self.evaluate(var))
       apply_ftrl = training_ops.apply_ftrl(var, accum, linear, grad, lr, l1, l2,
@@ -156,7 +156,7 @@ class TrainingOpsTest(TensorFlowTestCase):
     with self.session(use_gpu=False):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertAllCloseAccordingToType(x, self.evaluate(var))
       sparse_apply_adagrad = training_ops.sparse_apply_adagrad(
@@ -187,7 +187,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       linear = variables.VariableV1(z)
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertAllCloseAccordingToType(x, self.evaluate(var))
       sparse_apply_ftrl = training_ops.sparse_apply_ftrl(
@@ -285,7 +285,7 @@ class TrainingOpsTest(TensorFlowTestCase):
       beta2_power_t = variables.VariableV1(beta2_power)
       lr_t = constant_op.constant(lr, self._toType(var.dtype), [])
       epsilon_t = constant_op.constant(epsilon, self._toType(var.dtype), [])
-      variables.global_variables_initializer().run()
+      self.evaluate(variables.global_variables_initializer())
 
       self.assertAllCloseAccordingToType(var, self.evaluate(var_t))
       new_var, _, _ = self._adamUpdateNumpy(var, grad, t, m, v, lr, beta1,
-- 
GitLab


From cd0180d075cdffa442361802704f29fb8085cd3b Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 12 Dec 2018 08:12:47 -0800
Subject: [PATCH 423/873] Don't constant fold LoopCond nodes.

Removing the LoopCond of a while_loop can cause the partitioner to fail with:
  A cross-device loop must have a pivot predicate

For some reason this only triggers with while_v2 (the lowered while
loop is slightly different than what would be produced by the original
while_loop).

PiperOrigin-RevId: 225188075
---
 tensorflow/core/grappler/optimizers/constant_folding.cc | 6 ++++++
 tensorflow/python/eager/function_test.py                | 1 +
 2 files changed, 7 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc
index 5e3e5d6af9..3882e3b3a9 100644
--- a/tensorflow/core/grappler/optimizers/constant_folding.cc
+++ b/tensorflow/core/grappler/optimizers/constant_folding.cc
@@ -751,6 +751,12 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const {
   if (ModifiesFrameInfo(node)) {
     return false;
   }
+
+  // Removing LoopCond nodes can screw up the partitioner.
+  if (node.op() == "LoopCond") {
+    return false;
+  }
+
   // Skip constants, they're already folded
   if (IsConstant(node)) {
     return false;
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 2697ab5b17..95777a3a65 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -462,6 +462,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     var_t = resource_variable_ops.read_variable_op(var_handle, dtype=v.dtype)
     self.assertEqual(var_t.shape, tensor_shape.TensorShape([2, 2]))
 
+  @test_util.enable_control_flow_v2
   def testVariableInLoopInFunction(self):
 
     @function.defun
-- 
GitLab


From 587cda883091868c1b7ac08dfdceb8e4e57a5593 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Wed, 12 Dec 2018 08:21:11 -0800
Subject: [PATCH 424/873] Add fuzzer for CheckNumerics.

PiperOrigin-RevId: 225189182
---
 tensorflow/core/kernels/fuzzing/BUILD         |  2 +
 .../kernels/fuzzing/check_numerics_fuzz.cc    | 50 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc

diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD
index 2d8b734535..fcaf1a8966 100644
--- a/tensorflow/core/kernels/fuzzing/BUILD
+++ b/tensorflow/core/kernels/fuzzing/BUILD
@@ -68,3 +68,5 @@ tf_ops_fuzz_target_lib("decode_json_example")
 tf_oss_fuzz_corpus("decode_json_example")
 
 tf_oss_fuzz_dict("decode_json_example")
+
+tf_ops_fuzz_target_lib("check_numerics")
diff --git a/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
new file mode 100644
index 0000000000..bcd299e308
--- /dev/null
+++ b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
@@ -0,0 +1,50 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/kernels/fuzzing/fuzz_session.h"
+
+namespace tensorflow {
+namespace fuzzing {
+
+class FuzzCheckNumerics : public FuzzSession {
+  void BuildGraph(const Scope& scope) override {
+    auto input =
+        tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_FLOAT);
+    auto prefix = "Error: ";
+    (void)tensorflow::ops::CheckNumerics(scope.WithOpName("output"), input,
+                                         prefix);
+  }
+
+  void FuzzImpl(const uint8_t* data, size_t size) override {
+    size_t ratio = sizeof(float) / sizeof(uint8_t);
+    size_t num_floats = size / ratio;
+    const float* float_data = reinterpret_cast<const float*>(data);
+
+    Tensor input_tensor(tensorflow::DT_FLOAT,
+                        TensorShape({static_cast<int64>(size)}));
+    auto flat_tensor = input_tensor.flat<float>();
+    for (size_t i = 0; i < num_floats; i++) {
+      flat_tensor(i) = float_data[i];
+    }
+    RunOneInput(input_tensor).IgnoreError();
+  }
+};
+
+STANDARD_TF_FUZZ_FUNCTION(FuzzCheckNumerics);
+
+}  // end namespace fuzzing
+}  // end namespace tensorflow
-- 
GitLab


From 51900856e93d9708a602c01e877c1eb6488aa6f1 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Wed, 12 Dec 2018 08:53:57 -0800
Subject: [PATCH 425/873] Remove the bias regularizer has the constrain for
 using cudnn backend.

This contrain was originally added due to the different weights format issue
between canonical and cudnn (extra input bias). Now since the input bias is
feeded as zeros in cudnn mode, and weights are unified into one format. Having
bias regularizer should not be a issue.

PiperOrigin-RevId: 225193782
---
 tensorflow/python/keras/layers/recurrent.py     |  3 +--
 .../python/keras/layers/unified_lstm_test.py    | 17 +++++++----------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 93cb805d08..568e879c9c 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -2659,8 +2659,7 @@ class UnifiedLSTM(LSTM):
     self._dropout_mask = None
     self.could_use_cudnn = (
         activation == 'tanh' and recurrent_activation == 'sigmoid' and
-        recurrent_dropout == 0 and not unroll and use_bias and
-        bias_regularizer is None)
+        recurrent_dropout == 0 and not unroll and use_bias)
 
   def call(self, inputs, mask=None, training=None, initial_state=None):
     # LSTM does not support constants. Ignore it during process.
diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 6662bb8c04..55ccebb43b 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -159,25 +159,22 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         existing_loss = loss_value
 
   @parameterized.named_parameters(
-      ('non_tan_activation', 'relu', 'sigmoid', 0, False, True, None),
-      ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True, None),
-      ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True, None),
-      ('unroll', 'tanh', 'sigmoid', 0, True, True, None),
-      ('not_use_bias', 'tanh', 'sigmoid', 0, False, False, None),
-      ('use_bias_regularizer', 'tanh', 'sigmoid', 0, False, True, 'l2')
+      ('non_tan_activation', 'relu', 'sigmoid', 0, False, True),
+      ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True),
+      ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True),
+      ('unroll', 'tanh', 'sigmoid', 0, True, True),
+      ('not_use_bias', 'tanh', 'sigmoid', 0, False, False),
   )
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_could_use_defun_backend(self, activation, recurrent_activation,
-                                   recurrent_dropout, unroll, use_bias,
-                                   bias_regularizer):
+                                   recurrent_dropout, unroll, use_bias):
     layer = keras.layers.UnifiedLSTM(
         1,
         activation=activation,
         recurrent_activation=recurrent_activation,
         recurrent_dropout=recurrent_dropout,
         unroll=unroll,
-        use_bias=use_bias,
-        bias_regularizer=bias_regularizer)
+        use_bias=use_bias)
     self.assertFalse(layer.could_use_cudnn)
 
   def test_unified_lstm_feature_parity_with_canonical_lstm(self):
-- 
GitLab


From da29d1c8e796e4e5849d371cd613727769257056 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 12 Dec 2018 09:48:44 -0800
Subject: [PATCH 426/873] Control flow v2 should only add control edges if
 outer graph does too.

PiperOrigin-RevId: 225202451
---
 .../python/framework/auto_control_deps.py     |  9 +++
 tensorflow/python/framework/ops.py            |  3 +
 .../kernel_tests/control_flow_ops_py_test.py  | 81 +++++++++++++------
 tensorflow/python/ops/cond_v2.py              |  2 +-
 tensorflow/python/ops/while_v2.py             |  2 +-
 5 files changed, 72 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py
index 30dc959e9a..a72ded1131 100644
--- a/tensorflow/python/framework/auto_control_deps.py
+++ b/tensorflow/python/framework/auto_control_deps.py
@@ -100,6 +100,7 @@ class AutomaticControlDependencies(object):
     # graph (but that would mess up devices and collections at least,
     # probably other things as well).
     self._graph = ops.get_default_graph()
+    self._graph._add_control_dependencies = True  # pylint: disable=protected-access
     self._n_operations = len(self._graph.get_operations())
     return self
 
@@ -170,6 +171,14 @@ class AutomaticControlDependencies(object):
       raise RuntimeError(
           "Graph changed while trying to add control dependencies.")
 
+    # pylint: disable=protected-access
+    if hasattr(self._graph, "outer_graph"):
+      outer_val = self._graph.outer_graph._add_control_dependencies
+      self._graph._add_control_dependencies = outer_val
+    else:
+      self._graph._add_control_dependencies = False
+    # pylint: enable=protected-access
+
     # map from resource tensor to the last op which used it
     last_op_using_resource_tensor = {}
     # set of conditional and loop exits
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index fa306936d6..27c56ef990 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -2896,6 +2896,9 @@ class Graph(object):
     self._last_loss_reduction = None
     self._container = ""
     self._registered_ops = op_def_registry.get_registered_ops()
+    # Set to True if this graph is being built in an
+    # AutomaticControlDependencies context.
+    self._add_control_dependencies = False
 
     # TODO(skyewm): fold as much of the above as possible into the C
     # implementation
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 42cfe9e237..39ceb0d749 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function as eager_function
+from tensorflow.python.eager import wrap_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -921,9 +922,8 @@ class ControlFlowTest(test.TestCase):
     r = control_flow_ops.cond(foo()[1], lambda: 1.0, lambda: 2.0)
     self.assertEqual(self.evaluate(r), 1.0)
 
-  # TODO(b/117945658): reenable
   @test_util.run_in_graph_and_eager_modes
-  def DISABLED_testCondAutoControlDeps(self):
+  def testCondAutoControlDeps(self):
 
     def branch_fn():
       logging_ops.print_v2("A")
@@ -943,11 +943,11 @@ class ControlFlowTest(test.TestCase):
     if not context.executing_eagerly():
       with self.cached_session():
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_cond(), 10)
+          self.assertEqual(self.evaluate(build_cond()), 10)
         self.assertEqual(printed.contents(), "C\n")
 
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_nested_cond(), 10)
+          self.assertEqual(self.evaluate(build_nested_cond()), 10)
         self.assertEqual(printed.contents(), "C\n")
 
     # In defuns, all prints should execute in program order.
@@ -970,9 +970,28 @@ class ControlFlowTest(test.TestCase):
         self.assertEqual(self.evaluate(nested_cond()), 10)
       self.assertEqual(printed.contents(), "A\nB\nC\n")
 
-  # TODO(b/117945658): reenable
+    # wrap_function should prune.
+    def pruned_cond():
+      return build_cond()
+    pruned_cond = wrap_function.wrap_function(pruned_cond, [])
+
+    with self.captureWritesToStream(sys.stderr) as printed:
+      self.assertEqual(self.evaluate(pruned_cond()), 10)
+    self.assertEqual(printed.contents(), "C\n")
+
+    def pruned_nested_cond():
+      return build_nested_cond()
+    pruned_nested_cond = wrap_function.wrap_function(pruned_nested_cond, [])
+
+    with self.captureWritesToStream(sys.stderr) as printed:
+      self.assertEqual(self.evaluate(pruned_nested_cond()), 10)
+    self.assertEqual(printed.contents(), "C\n")
+
   @test_util.run_in_graph_and_eager_modes
-  def DISABLED_testWhileAutoControlDeps(self):
+  def testWhileAutoControlDeps(self):
+    # Legacy while_loop fails this test because it produces deprecation notices
+    # in stderr.
+    if not control_flow_util.ENABLE_CONTROL_FLOW_V2: return
 
     def cond(i, unused_x):
       logging_ops.print_v2("A")
@@ -991,40 +1010,56 @@ class ControlFlowTest(test.TestCase):
 
     def build_nested_while():
       return control_flow_ops.cond(
-          constant_op.constant(True), build_while, lambda: (0, 0))
+          constant_op.constant(True), build_while, lambda: [0, 0])
 
     # In v1 graph mode, pruning should make only "D" print.
     if not context.executing_eagerly():
       with self.cached_session():
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_while()[0], 2)
+          self.assertEqual(self.evaluate(build_while()[0]), 2)
         self.assertEqual(printed.contents(), "D\nD\n")
 
         with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(build_nested_while()[0], 2)
+          self.assertEqual(self.evaluate(build_nested_while()[0]), 2)
         self.assertEqual(printed.contents(), "D\nD\n")
 
     # In defuns, all prints should execute in program order.
-    # This doesn't work with legacy control flow.
-    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
+    @eager_function.defun
+    def while_loop():
+      return build_while()[0]
 
-      @eager_function.defun
-      def while_loop():
-        return build_while()[0]
+    with self.captureWritesToStream(sys.stderr) as printed:
+      self.assertEqual(self.evaluate(while_loop()), 2)
+    self.assertEqual(printed.contents(), "A\nB\nC\nD\nA\nB\nC\nD\nA\n")
 
+    @eager_function.defun
+    def nested_while_loop():
+      return build_nested_while()[0]
+
+    # TODO(b/117840611): calling nested_while_loop fails in eager
+    if not context.executing_eagerly():
       with self.captureWritesToStream(sys.stderr) as printed:
-        self.assertEqual(self.evaluate(while_loop()), 2)
+        self.assertEqual(self.evaluate(nested_while_loop()), 2)
       self.assertEqual(printed.contents(), "A\nB\nC\nD\nA\nB\nC\nD\nA\n")
 
-      @eager_function.defun
-      def nested_while_loop():
-        return build_nested_while()[0]
+    # wrap_function should prune.
+    def pruned_while():
+      return build_while()[0]
+    pruned_while = wrap_function.wrap_function(pruned_while, [])
 
-      # TODO(b/117840611): calling nested_while_loop fails in eager
-      if not context.executing_eagerly():
-        with self.captureWritesToStream(sys.stderr) as printed:
-          self.assertEqual(self.evaluate(nested_while_loop()), 2)
-        self.assertEqual(printed.contents(), "A\nB\nC\nD\nA\nB\nC\nD\nA\n")
+    with self.captureWritesToStream(sys.stderr) as printed:
+      self.assertEqual(self.evaluate(pruned_while()), 2)
+    self.assertEqual(printed.contents(), "D\nD\n")
+
+    def pruned_nested_while():
+      return build_nested_while()[0]
+    pruned_nested_while = wrap_function.wrap_function(pruned_nested_while, [])
+
+    # TODO(b/117840611): calling nested_while_loop fails in eager
+    if not context.executing_eagerly():
+      with self.captureWritesToStream(sys.stderr) as printed:
+        self.assertEqual(self.evaluate(pruned_nested_while()), 2)
+      self.assertEqual(printed.contents(), "D\nD\n")
 
   # Microbenchmark: 256,000 iterations/s.
   @test_util.disable_control_flow_v2("b/116630618 (Times out)")
diff --git a/tensorflow/python/ops/cond_v2.py b/tensorflow/python/ops/cond_v2.py
index abc99c1205..7d09e32e24 100644
--- a/tensorflow/python/ops/cond_v2.py
+++ b/tensorflow/python/ops/cond_v2.py
@@ -61,7 +61,7 @@ def cond_v2(pred, true_fn, false_fn, name="cond"):
 
     # Automatic control dependencies are added in defuns, but not in v1
     # graphs. Propagate that behavior here.
-    add_control_dependencies = util.in_defun()
+    add_control_dependencies = ops.get_default_graph()._add_control_dependencies
     pred = ops.convert_to_tensor(pred)
 
     true_graph = func_graph_module.func_graph_from_py_func(
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index f7566bac9b..3e5a8fcdfa 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -99,7 +99,7 @@ def while_loop(cond,
 
     # Automatic control dependencies are added in defuns, but not in v1
     # graphs. Propagate that behavior here.
-    add_control_dependencies = util.in_defun()
+    add_control_dependencies = ops.get_default_graph()._add_control_dependencies
 
     # Build a `cond` wrapper that can handle the extra counter loop_var.
     def wrapped_cond(loop_counter, *args):
-- 
GitLab


From 46afcd061ca74564329b418a96a0cfb453dca57e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 09:57:48 -0800
Subject: [PATCH 427/873] Implement Dequantize Op in XLA.

1. Only MIN_COMBINED mode is supported;
2. Reshape the output to [d0,..., dn * unpack_size] if input shape is [d0, ..., dn].
3. Only uint32 is supported for the input;
4. Output data type is bfloat16;
5. Only uint8 or uint16 is supported for the original unpacked input.

PiperOrigin-RevId: 225203930
---
 tensorflow/compiler/xla/client/lib/BUILD      |  28 ++
 tensorflow/compiler/xla/client/lib/quantize.h | 162 +++++++++++
 .../compiler/xla/client/lib/quantize_test.cc  | 254 ++++++++++++++++++
 3 files changed, 444 insertions(+)
 create mode 100644 tensorflow/compiler/xla/client/lib/quantize.h
 create mode 100644 tensorflow/compiler/xla/client/lib/quantize_test.cc

diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD
index 8fc221ee2b..970f00759f 100644
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD
@@ -336,6 +336,34 @@ xla_test(
     ],
 )
 
+cc_library(
+    name = "quantize",
+    hdrs = ["quantize.h"],
+    deps = [
+        ":constants",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/core:lib",
+    ],
+)
+
+xla_test(
+    name = "quantize_test",
+    srcs = ["quantize_test.cc"],
+    tags = ["enable_for_xla_interpreter"],
+    deps = [
+        ":quantize",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla:types",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client:xla_builder",
+        "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+    ],
+)
+
 cc_library(
     name = "testing",
     srcs = ["testing.cc"],
diff --git a/tensorflow/compiler/xla/client/lib/quantize.h b/tensorflow/compiler/xla/client/lib/quantize.h
new file mode 100644
index 0000000000..e002e5e19c
--- /dev/null
+++ b/tensorflow/compiler/xla/client/lib/quantize.h
@@ -0,0 +1,162 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_QUANTIZE_H_
+#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_QUANTIZE_H_
+
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "tensorflow/compiler/xla/client/lib/constants.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/bfloat16/bfloat16.h"
+
+namespace xla {
+
+constexpr int64 kBitsOfByte = 8;
+
+// Represents the range used for quantization
+struct QuantizedRange {
+  QuantizedRange() = default;
+  QuantizedRange(float min_in, float max_in) : min(min_in), max(max_in) {}
+
+  bool operator==(const QuantizedRange& rhs) const {
+    return this->min == rhs.min && this->max == rhs.max;
+  }
+
+  bool operator!=(const QuantizedRange& rhs) const { return !(*this == rhs); }
+
+  tensorflow::bfloat16 min = tensorflow::bfloat16(0.0f);
+  tensorflow::bfloat16 max = tensorflow::bfloat16(0.0f);
+};
+
+template <typename T>
+inline std::vector<uint32> PackToUint32(absl::Span<const T> input) {
+  const int64 kElementsPerPack = sizeof(uint32) / sizeof(T);
+  const int64 input_size = input.size();
+  const int64 output_size = CeilOfRatio(input_size, kElementsPerPack);
+
+  std::vector<uint32> output_vec;
+  constexpr int64 kShiftBits = sizeof(T) / sizeof(uint8) * kBitsOfByte;
+
+  for (int64 i = 0; i < output_size; i++) {
+    uint32 result = 0;
+    for (int64 p = 0; p < kElementsPerPack; p++) {
+      int64 index = i * kElementsPerPack + p;
+      if (index < input_size) {
+        int64 total_shift_bits = kShiftBits * (kElementsPerPack - p - 1);
+        result |= (input[index] << total_shift_bits);
+      }
+    }
+    output_vec.push_back(result);
+  }
+
+  return output_vec;
+}
+
+// Dequantize the quantized input of packed uint32 to bfloat16.
+// Only uint8 or uint16 is supported for the original unpacked input.
+// Returns a tensor of shape [d0,..., dn * unpack_size] if
+// input shape is [d0, ..., dn], where unpack_size = sizeof(unit32) / sizeof(T).
+template <typename T>
+inline XlaOp Dequantize(XlaOp input, const QuantizedRange& range,
+                        absl::string_view mode_string = "MIN_COMBINED") {
+  XlaBuilder* const builder = input.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    float half_range =
+        !std::is_signed<T>::value
+            ? 0.0f
+            : (static_cast<float>(std::numeric_limits<T>::max()) -
+               std::numeric_limits<T>::min() + 1) /
+                  2.0f;
+    const int64 unpack_size = sizeof(uint32) / sizeof(T);
+    TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(input));
+
+    auto element_type = shape.element_type();
+    if (element_type != U32) {
+      return InvalidArgument(
+          "Only U32 is supported for input type of xla::Dequantize Op.");
+    }
+
+    auto broadcast_size = shape.dimensions();
+    broadcast_size.push_back(unpack_size);
+    std::vector<int64> broadcast_dimensions(shape.dimensions_size());
+    std::iota(broadcast_dimensions.begin(), broadcast_dimensions.end(), 0);
+    // Broadcast the input to [d0, ..., dn, unpack_size] if input size is
+    // [d0, ..., dn].
+    auto broadcast_input =
+        BroadcastInDim(input, broadcast_size, broadcast_dimensions);
+
+    XlaOp iota_r1 = Iota(builder, U32, unpack_size);
+    // Highest significant bytes needs to shift more bytes than lower
+    // significant bytes.
+    XlaOp shift_bytes =
+        xla::ConstantR0<uint32>(builder, unpack_size - 1) - iota_r1;
+
+    const int bytes_of_type = sizeof(T) / sizeof(uint8);
+    XlaOp shift_bits = shift_bytes * xla::ConstantR0<uint32>(
+                                         builder, kBitsOfByte * bytes_of_type);
+
+    // Make bit_mask for different data type T.
+    uint32 bit_mask = 0x00000000;
+    for (int i = 0; i < bytes_of_type; i++) {
+      bit_mask <<= kBitsOfByte;
+      bit_mask |= 0x000000ff;
+    }
+
+    // Shift the input by sizeof(T) bytes and apply bit_mask to unpack.
+    XlaOp shifted_input = ShiftRightLogical(
+        broadcast_input, Broadcast(shift_bits, shape.dimensions()));
+    XlaOp unpack_input =
+        And(shifted_input, xla::ConstantR0<uint32>(builder, bit_mask));
+
+    XlaOp result;
+
+    if (mode_string == "MIN_COMBINED") {
+      const tensorflow::bfloat16 scale_factor =
+          (range.max - range.min) /
+          (static_cast<tensorflow::bfloat16>(std::numeric_limits<T>::max() -
+                                             std::numeric_limits<T>::min()));
+      // result = bfloat16(input + half_range) * scale_factor + range.min
+      XlaOp unpack_input_bf16 = ConvertElementType(unpack_input, BF16);
+      XlaOp half_range_bf16 = xla::ConstantR0<tensorflow::bfloat16>(
+          builder, static_cast<bfloat16>(half_range));
+      XlaOp sum = unpack_input_bf16 + half_range_bf16;
+
+      result =
+          sum * xla::ConstantR0<tensorflow::bfloat16>(builder, scale_factor) +
+          xla::ConstantR0<tensorflow::bfloat16>(builder, range.min);
+    } else {
+      // TODO(wangtao): support other modes.
+      return InvalidArgument(
+          "Only MIN_COMBINED mode is supported in xla::Dequantize Op.");
+    }
+
+    // Reshape the result to [d0,..., dn * unpack_size] if
+    // input shape is [d0, ..., dn].
+    std::vector<int64> result_shape(shape.dimensions());
+    result_shape[shape.dimensions_size() - 1] =
+        shape.dimensions(shape.dimensions_size() - 1) * unpack_size;
+    return Reshape(result, result_shape);
+  });
+}
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_QUANTIZE_H_
diff --git a/tensorflow/compiler/xla/client/lib/quantize_test.cc b/tensorflow/compiler/xla/client/lib/quantize_test.cc
new file mode 100644
index 0000000000..f7ff3502d1
--- /dev/null
+++ b/tensorflow/compiler/xla/client/lib/quantize_test.cc
@@ -0,0 +1,254 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/client/lib/quantize.h"
+
+#include <limits>
+
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+
+namespace xla {
+namespace {
+
+using bfloat16 = tensorflow::bfloat16;
+
+template <typename NativeT>
+std::vector<NativeT> GenerateInput() {
+  std::vector<NativeT> input;
+
+  for (int64 i = std::numeric_limits<NativeT>::min();
+       i < std::numeric_limits<NativeT>::max(); ++i) {
+    input.push_back(static_cast<NativeT>(i));
+  }
+
+  return input;
+}
+
+template <typename NativeT>
+Array2D<NativeT> GenerateLargeSizeInput(int num_columns, int num_rows) {
+  Array2D<NativeT> input(num_columns, num_rows);
+
+  input.FillRandom(6, 128);
+
+  return input;
+}
+
+template <typename NativeT>
+Array2D<uint32> PackLargeInput(Array2D<NativeT> &input) {
+  const int64 size_per_pack = sizeof(uint32) / sizeof(NativeT);
+  int64 width = input.width();
+
+  int64 padded_output_width = CeilOfRatio(width, size_per_pack);
+
+  Array2D<uint32> pack_input(input.height(), padded_output_width);
+
+  for (int h = 0; h < input.height(); h++) {
+    std::vector<NativeT> input_row;
+    for (int w = 0; w < width; w++) {
+      input_row.push_back(input({h, w}));
+    }
+
+    auto pack_input_vec = PackToUint32<uint8>(input_row);
+
+    for (int w = 0; w < padded_output_width; w++) {
+      pack_input(h, w) = pack_input_vec[w];
+    }
+  }
+
+  return pack_input;
+}
+
+template <typename NativeT>
+Array2D<bfloat16> GenerateLargeSizeMinCombinedOutput(
+    Array2D<NativeT> &input, const QuantizedRange &range) {
+  const int64 size_per_pack = sizeof(uint32) / sizeof(NativeT);
+  int64 width = input.width();
+
+  int64 padded_output_width = CeilOfRatio(width, size_per_pack) * size_per_pack;
+
+  Array2D<bfloat16> output(input.height(), padded_output_width, bfloat16(0.0));
+
+  float half_range =
+      !std::is_signed<NativeT>::value
+          ? 0.0f
+          : (static_cast<float>(std::numeric_limits<NativeT>::max() -
+                                std::numeric_limits<NativeT>::min() + 1)) /
+                2.0f;
+  const bfloat16 scale_factor =
+      (range.max - range.min) /
+      (static_cast<bfloat16>(std::numeric_limits<NativeT>::max() -
+                             std::numeric_limits<NativeT>::min()));
+
+  for (int h = 0; h < input.height(); h++) {
+    std::vector<NativeT> input_row;
+    for (int w = 0; w < width; w++) {
+      bfloat16 result =
+          static_cast<bfloat16>(input(h, w) + half_range) * scale_factor +
+          range.min;
+      output(h, w) = result;
+    }
+  }
+
+  return output;
+}
+
+template <typename NativeT>
+std::vector<bfloat16> GenerateMinCombinedOutput(const QuantizedRange &range) {
+  float half_range =
+      !std::is_signed<NativeT>::value
+          ? 0.0f
+          : (static_cast<float>(std::numeric_limits<NativeT>::max() -
+                                std::numeric_limits<NativeT>::min() + 1)) /
+                2.0f;
+  const bfloat16 scale_factor =
+      (range.max - range.min) /
+      (static_cast<bfloat16>(std::numeric_limits<NativeT>::max() -
+                             std::numeric_limits<NativeT>::min()));
+  std::vector<bfloat16> output;
+  for (int64 i = std::numeric_limits<NativeT>::min();
+       i < std::numeric_limits<NativeT>::max(); ++i) {
+    bfloat16 result =
+        static_cast<bfloat16>(i + half_range) * scale_factor + range.min;
+    output.push_back(result);
+  }
+
+  const int64 pack_size = sizeof(uint32) / sizeof(NativeT);
+  const int64 output_size = output.size();
+
+  int64 num_tailing_zeros =
+      CeilOfRatio(output_size, pack_size) * pack_size - output_size;
+
+  output.insert(output.end(), num_tailing_zeros, bfloat16(0.0));
+  return output;
+}
+
+// TODO(wangtao): add a test to make sure this op is the inverse of the existing
+// TF quantize op defined in: third_party/tensorflow/core/kernels/quantize_op.cc
+
+using DequantizeTest = ClientLibraryTestBase;
+
+TEST(PackTest, PackUint8ToUint32) {
+  std::vector<uint8> input = {0xAB, 0x0B, 0x00, 0xF0, 0x01};
+  auto output = PackToUint32<uint8>(input);
+  EXPECT_THAT(output, ::testing::ElementsAre(0xAB0B00F0, 0x01000000));
+}
+
+TEST(PackTest, PackInt8ToUint32) {
+  std::vector<int8> input = {static_cast<signed char>(0x81), 0x0B, 0x00, 0x20,
+                             0x01};
+  auto output = PackToUint32<int8>(input);
+  EXPECT_THAT(output, ::testing::ElementsAre(0x810B0020, 0x01000000));
+}
+
+TEST(PackTest, PackUint8ToUint32PerfectSize) {
+  std::vector<uint8> input = {3, 2, 1, 0};
+  auto output = PackToUint32<uint8>(input);
+  EXPECT_THAT(output, ::testing::ElementsAre(0x03020100));
+}
+
+XLA_TEST_F(DequantizeTest, MinCombinedUint16R1) {
+  XlaBuilder builder(TestName());
+  auto input = GenerateInput<uint16>();
+  auto x = ConstantR1<uint32>(&builder, PackToUint32<uint16>(input));
+  QuantizedRange range(0, 255.0f);
+  xla::Dequantize<uint16>(x, range, "MIN_COMBINED");
+  auto expected = GenerateMinCombinedOutput<uint16>(range);
+  ComputeAndCompareR1<bfloat16>(&builder, expected, {});
+}
+
+XLA_TEST_F(DequantizeTest, MinCombinedUint8R1) {
+  XlaBuilder builder(TestName());
+  auto input = GenerateInput<uint8>();
+  auto x = ConstantR1<uint32>(&builder, PackToUint32<uint8>(input));
+  QuantizedRange range(0, 127.0f);
+  xla::Dequantize<uint8>(x, range, "MIN_COMBINED");
+  auto expected = GenerateMinCombinedOutput<uint8>(range);
+  ComputeAndCompareR1<bfloat16>(&builder, expected, {});
+}
+
+XLA_TEST_F(DequantizeTest, MinCombinedUint8R2) {
+  XlaBuilder builder(TestName());
+  std::vector<std::vector<uint8>> input = {
+      {0, 1, 2, 3},
+      {4, 5, 6, 7},
+      {8, 9, 10, 11},
+      {12, 13, 16, 15},
+  };
+  auto x = ConstantR2<uint32>(&builder, {{PackToUint32<uint8>(input[0])[0]},
+                                         {PackToUint32<uint8>(input[1])[0]},
+                                         {PackToUint32<uint8>(input[2])[0]},
+                                         {PackToUint32<uint8>(input[3])[0]}});
+  QuantizedRange range(0, 255.0f);
+  xla::Dequantize<uint8>(x, range, "MIN_COMBINED");
+  const Array2D<bfloat16> expected = {
+      {bfloat16(0.0), bfloat16(1.0), bfloat16(2.0), bfloat16(3.0)},
+      {bfloat16(4.0), bfloat16(5.0), bfloat16(6.0), bfloat16(7.0)},
+      {bfloat16(8.0), bfloat16(9.0), bfloat16(10.0), bfloat16(11.0)},
+      {bfloat16(12.0), bfloat16(13.0), bfloat16(16.0), bfloat16(15.0)},
+  };
+  ComputeAndCompareR2<bfloat16>(&builder, expected, {});
+}
+
+XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TailingZero) {
+  XlaBuilder builder(TestName());
+  std::vector<std::vector<uint8>> input = {
+      {0, 1, 2, 3, 16},
+      {4, 5, 6, 7, 17},
+      {8, 9, 10, 11, 18},
+      {12, 13, 16, 15, 19},
+  };
+  auto x = ConstantR2<uint32>(
+      &builder,
+      {{PackToUint32<uint8>(input[0])[0], PackToUint32<uint8>(input[0])[1]},
+       {PackToUint32<uint8>(input[1])[0], PackToUint32<uint8>(input[1])[1]},
+       {PackToUint32<uint8>(input[2])[0], PackToUint32<uint8>(input[2])[1]},
+       {PackToUint32<uint8>(input[3])[0], PackToUint32<uint8>(input[3])[1]}});
+  QuantizedRange range(0, 255.0f);
+  xla::Dequantize<uint8>(x, range, "MIN_COMBINED");
+
+  const Array2D<bfloat16> expected = {
+      {bfloat16(0.0), bfloat16(1.0), bfloat16(2.0), bfloat16(3.0),
+       bfloat16(16.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
+      {bfloat16(4.0), bfloat16(5.0), bfloat16(6.0), bfloat16(7.0),
+       bfloat16(17.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
+      {bfloat16(8.0), bfloat16(9.0), bfloat16(10.0), bfloat16(11.0),
+       bfloat16(18.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
+      {bfloat16(12.0), bfloat16(13.0), bfloat16(16.0), bfloat16(15.0),
+       bfloat16(19.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
+  };
+  ComputeAndCompareR2<bfloat16>(&builder, expected, {});
+}
+
+XLA_TEST_F(DequantizeTest, MinCombinedUint8LargeSizeTest) {
+  XlaBuilder builder(TestName());
+  Array2D<uint8> input = GenerateLargeSizeInput<uint8>(500, 3547);
+  Array2D<uint32> input_packed = PackLargeInput<uint8>(input);
+
+  auto x = ConstantR2FromArray2D<uint32>(&builder, input_packed);
+  QuantizedRange range(0, 255.0f);
+  xla::Dequantize<uint8>(x, range, "MIN_COMBINED");
+
+  const Array2D<bfloat16> expected =
+      GenerateLargeSizeMinCombinedOutput<uint8>(input, range);
+  ComputeAndCompareR2<bfloat16>(&builder, expected, {});
+}
+
+}  // namespace
+}  // namespace xla
-- 
GitLab


From 4a2abacb2ab8c53245ff293f23b58468f9f52db5 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 12 Dec 2018 10:06:37 -0800
Subject: [PATCH 428/873] [XLA:Python] Add CustomCall support to Python
 LocalComputationBuilder.

PiperOrigin-RevId: 225205868
---
 .../xla/python/local_computation_builder.cc   | 15 +++++++++++
 .../xla/python/local_computation_builder.h    |  6 +++++
 .../xla/python/local_computation_builder.i    |  1 +
 tensorflow/compiler/xla/python/xla_client.py  | 25 +++++++++++++++++++
 .../compiler/xla/service/hlo_verifier.cc      |  4 ++-
 5 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index d4d31fb8c0..8e3ac381ce 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -783,6 +783,21 @@ LocalOp LocalComputationBuilder::Call(const LocalComputation& local_computation,
   return xla::Call(&builder_, local_computation.computation(), xla_ops);
 }
 
+LocalOp LocalComputationBuilder::CustomCall(
+    const string& call_target_name, absl::Span<const LocalOp> operands,
+    const Shape& shape_with_layout,
+    const std::vector<Shape>& operand_shapes_with_layout,
+    const string& opaque) {
+  std::vector<XlaOp> xla_ops;
+  xla_ops.reserve(operands.size());
+  for (const auto& op : operands) {
+    xla_ops.push_back(op.op());
+  }
+  return xla::CustomCallWithLayout(&builder_, call_target_name, xla_ops,
+                                   shape_with_layout,
+                                   operand_shapes_with_layout, opaque);
+}
+
 LocalOp LocalComputationBuilder::Transpose(
     const LocalOp& operand, absl::Span<const int64> permutation) {
   return xla::Transpose(operand.op(), permutation);
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index 7647ef44ad..eebbe674e5 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -352,6 +352,12 @@ class LocalComputationBuilder {
   LocalOp Call(const LocalComputation& local_computation,
                absl::Span<const LocalOp> operands);
 
+  LocalOp CustomCall(const string& call_target_name,
+                     absl::Span<const LocalOp> operands,
+                     const Shape& shape_with_layout,
+                     const std::vector<Shape>& operand_shapes_with_layout,
+                     const string& opaque);
+
   LocalOp Transpose(const LocalOp& operand,
                     absl::Span<const int64> permutation);
 
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
index 82d25304f0..db7e0458f4 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.i
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -1147,6 +1147,7 @@ tensorflow::ImportNumpy();
 %unignore xla::swig::LocalComputationBuilder::Cholesky;
 %unignore xla::swig::LocalComputationBuilder::QR;
 %unignore xla::swig::LocalComputationBuilder::TriangularSolve;
+%unignore xla::swig::LocalComputationBuilder::CustomCall;
 %unignore xla::swig::DeleteLocalComputation;
 %unignore xla::swig::DestructureLocalShapedBufferTuple;
 %unignore xla::swig::DestructureXrtAllocationTuple;
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index 3366a83543..cd85713d72 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -1102,6 +1102,31 @@ class ComputationBuilder(object):
     """
     return self._client.Call(computation_to_apply.computation, operands)
 
+  def CustomCall(self,
+                 call_target_name,
+                 operands,
+                 shape_with_layout,
+                 operand_shapes_with_layout,
+                 opaque=None):
+    """Enqueues a custom call operation onto the computation.
+
+    Args:
+      call_target_name: the name of the function to call.
+      operands: an iterable of LocalOp. The number and types of operands must
+        match the arity of `operand_shapes_with_layout`.
+      shape_with_layout: the shape of the operator's output, with layout.
+      operand_shapes_with_layout: the shapes of `operands`, including the
+        expected layouts.
+      opaque: an opaque string passed to the backend.
+
+    Returns:
+      A LocalOp representing the added custom call op.
+    """
+    opaque = opaque or ''
+    return self._client.CustomCall(call_target_name, operands,
+                                   shape_with_layout,
+                                   operand_shapes_with_layout, opaque)
+
   def Map(self, operands, computation_to_apply, dimensions):
     """Enqueues a map operation onto the computation.
 
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 77db7b098a..ace854ed6a 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -481,7 +481,9 @@ Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) {
       const Shape& operand_shape_with_layout =
           custom_call->operand_shapes_with_layout()[i];
       TF_RET_CHECK(ShapeUtil::Compatible(custom_call->operand(i)->shape(),
-                                         operand_shape_with_layout));
+                                         operand_shape_with_layout))
+          << custom_call->operand(i)->shape().ToString() << " operand "
+          << operand_shape_with_layout.ToString();
       TF_RET_CHECK(LayoutUtil::HasLayout(operand_shape_with_layout));
     }
   }
-- 
GitLab


From c5b7754ce1fa9577be9613b417a85b48669b78ba Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Wed, 12 Dec 2018 10:19:48 -0800
Subject: [PATCH 429/873] Render HLO graph as HTML.

PiperOrigin-RevId: 225208397
---
 .../compiler/xla/debug_options_flags.cc       |   6 +
 tensorflow/compiler/xla/service/BUILD         |   1 +
 .../compiler/xla/service/hlo_graph_dumper.cc  | 143 +++++++++++++++++-
 .../compiler/xla/service/hlo_graph_dumper.h   |   6 +
 .../xla/service/hlo_graph_html_renderer.cc    |  43 ++++++
 tensorflow/compiler/xla/xla.proto             |   5 +-
 6 files changed, 201 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_graph_html_renderer.cc

diff --git a/tensorflow/compiler/xla/debug_options_flags.cc b/tensorflow/compiler/xla/debug_options_flags.cc
index c55ebcd066..a9a91648ac 100644
--- a/tensorflow/compiler/xla/debug_options_flags.cc
+++ b/tensorflow/compiler/xla/debug_options_flags.cc
@@ -33,6 +33,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
   opts.set_xla_cpu_multi_thread_eigen(true);
   opts.set_xla_gpu_cuda_data_dir("./cuda_sdk_lib");
   opts.set_xla_eliminate_hlo_implicit_broadcast(true);
+  opts.set_xla_hlo_dump_as_html(false);
 #ifdef INTEL_MKL
   opts.set_xla_cpu_use_mkl_dnn(true);
 #endif  // INTEL_MKL
@@ -132,6 +133,11 @@ static void AllocateFlags() {
           bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_graphdef),
           flag_values->xla_hlo_dump_as_graphdef(),
           "Dump HLO graphs as TensorFlow GraphDefs."),
+      tensorflow::Flag("xla_hlo_dump_as_html",
+                       bool_setter_for(&DebugOptions::set_xla_hlo_dump_as_html),
+                       flag_values->xla_hlo_dump_as_html(),
+                       "Dump HLO graphs as an HTML (DOT rendered into SVG "
+                       "inlined in HTML)."),
       tensorflow::Flag(
           "xla_hlo_graph_sharding_color",
           bool_setter_for(&DebugOptions::set_xla_hlo_graph_sharding_color),
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 4c21ae2a42..8ed9a7bea2 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -3163,6 +3163,7 @@ cc_library(
     name = "hlo_graph_dumper",
     srcs = [
         "hlo_graph_dumper.cc",
+        "hlo_graph_html_renderer.cc",
     ],
     hdrs = ["hlo_graph_dumper.h"],
     deps = [
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 302eca656b..5db21e47ca 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1474,14 +1474,15 @@ string ExportGraph(const string& graph,
                    GraphRendererInterface::GraphKind graph_kind,
                    const DebugOptions& debug_options) {
   string path = debug_options.xla_hlo_graph_path();
-  if (!path.empty()) {
+  if (!path.empty() && !debug_options.xla_hlo_dump_as_html()) {
     return SaveGraph(graph, graph_kind, path);
   } else {
     auto graph_renderer =
         GraphRendererRegistry::Default()->GetDefaultRenderer();
     CHECK(graph_renderer != nullptr)
         << "No registered renderer for the HLO graph. "
-           "Use --xla_hlo_graph_path=PATH to export to local file system";
+           "Use --xla_hlo_graph_path=PATH --xla_hlo_dump_as_html=false to "
+           "export to local file system";
     return graph_renderer->RenderGraph(graph, graph_kind, debug_options);
   }
 }
@@ -1589,5 +1590,143 @@ string MaybeDumpHloModule(const HloModule& module, const string& label,
   return graph_url;
 }
 
+string WrapDotInHTML(const string& dot) {
+  static const char html_prefix[] = R"html(
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <style type="text/css">
+    html, body { height: 100%; }
+    body { margin: 0; }
+  </style>
+</head>
+<body>
+  <!-- Integrity hash is generated by https://www.srihash.org/ -->
+  <script src="https://cdn.jsdelivr.net/npm/viz.js@2.1.1/viz.js"
+     integrity="sha384-aD1MJYb0WKIUT+CtwJp5LTuV3U4pLAS6B/nUxL7ECimC2pN9N8vjlMr/yQCAkzxE"
+     crossorigin="anonymous"></script>
+  <script src="https://cdn.jsdelivr.net/npm/viz.js@2.1.1/full.render.js"
+     integrity="sha384-bAixY275aIpCj6Te19y0MILZ4V+VEC8CVFujFEH+Lf7W+4XYYeYLwW5IBI6yQmMT"
+     crossorigin="anonymous"></script>
+  <script src="https://cdn.jsdelivr.net/npm/svg-pan-zoom@3.6.0/dist/svg-pan-zoom.min.js"
+     integrity="sha384-3008WpYB2pOBvE7lwkrKf+qTmbTPGGPYxA9C1YVhvbPukns4ZFj7E98QPLkNW9dS"
+     crossorigin="anonymous"></script>
+  <div id="container" style="height:95%; border:1px solid black; "></div>
+  <script>
+    var data = `
+)html";
+
+  static const char html_suffix[] = R"html(
+`;
+    var cssregex = new RegExp('stylesheet=<([^]*)\n>\n', 'gm');
+    var results = cssregex.exec(data)
+    // graphviz has problem dealing with large stylesheets.
+    // https://github.com/tensorflow/tensorflow/issues/17220#issuecomment-369228492
+    // In order to avoid the problem, remove the stylesheet from the dot and
+    // insert it directly info the rendered SVG.
+    var dot_data = data;
+    var css_data = ''
+    if (results !== null) {
+        css_data = results[1].replace(/\s*data:.*\s*,/,''); // Strip content-type field.
+        dot_data = data.replace(cssregex, ''); // Remove the stylesheet
+    }
+
+    var render_start = performance.now()
+    function add_controls(svg) {
+        var htmlblob = new Blob([document.documentElement.innerHTML],
+                                {type: 'text/html'});
+        var savehtml = document.createElement('a');
+        savehtml.setAttribute('href', URL.createObjectURL(htmlblob));
+        savehtml.setAttribute('download', 'graph.html');
+        savehtml.innerHTML = " [Save HTML+SVG] ";
+        document.body.append(savehtml);
+        var svgblob = new Blob([svg.outerHTML], {type: 'image/svg'});
+        var savesvg = document.createElement('a');
+        savesvg.setAttribute('href', URL.createObjectURL(svgblob));
+        savesvg.setAttribute('download', 'graph.svg');
+        savesvg.innerHTML = " [Save SVG] ";
+        document.body.append(savesvg);
+        var dotblob =  new Blob([data], {type: 'text/dot'});
+        var savedot = document.createElement('a');
+        savedot.setAttribute('href', URL.createObjectURL(dotblob));
+        savedot.setAttribute('download', 'graph.dot');
+        savedot.innerHTML = " [Save DOT] ";
+        document.body.append(savedot);
+        // Will get called after embed element was loaded
+        var panzoom = svgPanZoom(svg, {
+            zoomEnabled: true,
+            controlIconsEnabled: true,
+        });
+        document.getElementsByTagName("BODY")[0].onresize = function() {
+            panzoom.resize();
+            panzoom.fit();
+            panzoom.center();
+        };
+        var render_end = performance.now();
+        var render_note = document.createElement('div')
+        render_note.innerHTML = 'Rendering took '
+                                + (render_end - render_start).toFixed(2) + "ms."
+        document.body.append(render_note);
+    }
+    var svg = document.getElementById('graph')
+    if (svg == null) {
+        // Need to render SVG first.
+        var viz = new Viz();
+        viz.renderSVGElement(dot_data)
+            .then(function(svg){
+                var container = document.getElementById('container')
+                var style = document.createElementNS('http://www.w3.org/2000/svg', 'style');
+                var node = document.createTextNode(css_data);
+                style.appendChild(node);
+                svg.setAttribute('width', '100%');
+                svg.setAttribute('height', 'auto');
+                svg.setAttribute('id', 'graph');
+                svg.appendChild(style);
+                container.appendChild(svg);
+                add_controls(svg);
+            })
+    } else {
+        // HTML already has rendered SVG embedded, so we just need to add
+        // controls.
+        add_controls(svg);
+    }
+  </script>
+</body>
+</html>
+)html";
+
+  return html_prefix + dot + html_suffix;
+}
+
+string RenderDotAsHTMLFile(const string& dot,
+                           const DebugOptions& debug_options) {
+  string html = WrapDotInHTML(dot);
+
+  auto env = tensorflow::Env::Default();
+  std::vector<string> dirs;
+  string output_dir = debug_options.xla_hlo_graph_path();
+  if (output_dir.empty()) {
+    env->GetLocalTempDirectories(&dirs);
+  } else {
+    dirs.push_back(output_dir);
+  }
+  // Try each directory, as they might be full, have inappropriate
+  // permissions or have different problems at times.
+  string output;
+  for (const string& dir : dirs) {
+    string filename = tensorflow::io::JoinPath(dir, "graph-");
+    if (env->CreateUniqueFileName(&filename, ".html")) {
+      output = filename;
+      break;
+    }
+  }
+  if (output.empty()) {
+    LOG(FATAL) << "Failed to create unique output file name.";
+  }
+  TF_CHECK_OK(tensorflow::WriteStringToFile(env, output, html));
+  return "file://" + output;
+}
+
 }  // namespace hlo_graph_dumper
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h
index de1eefab77..8e51454ef1 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h
@@ -81,6 +81,12 @@ string DumpAllPathsFromTo(const HloInstruction& from, const HloInstruction& to,
 void DumpText(const HloModule& module, const string& label,
               const string& directory_path, bool do_prefix = true);
 
+// Renders DOT graph as inline SVG and saves it in an HTML file in a temprary
+// directory or directory specified via --xla_hlo_graph_path. Returns the file
+// URI pointing to the file.
+string RenderDotAsHTMLFile(const string& dot,
+                           const DebugOptions& debug_options);
+
 // Graph renderers may be added using a registration mechanism, e.g.:
 // XLA_REGISTER_GRAPH_RENDERER(AGraphRendererClass, 100)
 // The renderer with the highest numeric priority value is used.
diff --git a/tensorflow/compiler/xla/service/hlo_graph_html_renderer.cc b/tensorflow/compiler/xla/service/hlo_graph_html_renderer.cc
new file mode 100644
index 0000000000..84c4cf18df
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_graph_html_renderer.cc
@@ -0,0 +1,43 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Implementation of an DOT graph renderer that uses Javascript to render DOT to
+// SVG in a browser.
+
+#include "tensorflow/compiler/xla/service/hlo_graph_dumper.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace xla {
+namespace hlo_graph_dumper {
+namespace {
+
+class GraphHtmlRenderer : public GraphRendererInterface {
+ public:
+  string RenderGraph(const string& graph, GraphKind graph_kind,
+                     const DebugOptions& debug_options) override {
+    switch (graph_kind) {
+      case DOT_GRAPH:
+        return RenderDotAsHTMLFile(graph, debug_options);
+      default:
+        LOG(FATAL) << "Only DOT graphs can be rendered";
+    }
+  }
+};
+
+XLA_REGISTER_GRAPH_RENDERER(GraphHtmlRenderer);
+
+}  // namespace
+}  // namespace hlo_graph_dumper
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 238312e36b..8b894cc769 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -224,7 +224,10 @@ message DebugOptions {
   // If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
   bool xla_gpu_disable_ptxas_optimizations = 103;
 
-  // Next id: 105
+  // Dump HLO graphs as an HTML (DOT -> SVG inlined in HTML)
+  bool xla_hlo_dump_as_html = 105;
+
+  // Next id: 106
 
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
-- 
GitLab


From 373a764c3812b1f8a3b655b63256f14e541be185 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Wed, 12 Dec 2018 10:32:08 -0800
Subject: [PATCH 430/873] Fix null context construction

PiperOrigin-RevId: 225210711
---
 tensorflow/python/keras/layers/normalization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py
index 75b10222ed..ee37e8a242 100644
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@@ -418,7 +418,7 @@ class BatchNormalizationV2(Layer):
       # because of a bug which leads cond_v2 to skip rewriting them creating
       # conflicts.
       if tf2.enabled():
-        cm = contextlib.contextmanager(lambda: (yield))
+        cm = contextlib.contextmanager(lambda: (yield))()
       else:
         cm = ops.colocate_with(variable)
       with cm:
-- 
GitLab


From 57eb92b7781d46f22f57f89f75010b898e236c42 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 10:38:08 -0800
Subject: [PATCH 431/873] Internal Change

PiperOrigin-RevId: 225212001
---
 tensorflow/python/BUILD                         |  1 +
 tensorflow/python/ops/ragged/__init__.py        | 15 ++++++++++++++-
 tensorflow/python/ops/ragged/ragged_dispatch.py | 11 +++++++++--
 tensorflow/python/ops/standard_ops.py           |  6 ++++++
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 8a7c001321..c11df5534d 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -3033,6 +3033,7 @@ py_library(
         "//tensorflow/python/eager:wrap_function",
         "//tensorflow/python/ops/distributions",
         "//tensorflow/python/ops/linalg",
+        "//tensorflow/python/ops/ragged",
     ],
 )
 
diff --git a/tensorflow/python/ops/ragged/__init__.py b/tensorflow/python/ops/ragged/__init__.py
index 3d915ee269..f23f506e06 100644
--- a/tensorflow/python/ops/ragged/__init__.py
+++ b/tensorflow/python/ops/ragged/__init__.py
@@ -66,6 +66,15 @@ class documentation.
 @@RaggedTensorDynamicShape
 @@broadcast_to
 @@broadcast_dynamic_shape
+
+<!-- Modules -->
+@@ragged_dispatch
+@@ragged_factory_ops
+@@ragged_operators
+@@ragged_string_ops
+@@ragged_tensor
+@@ragged_tensor_value
+@@ragged_util
 """
 
 from __future__ import absolute_import
@@ -73,8 +82,12 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.ops.ragged import ragged_dispatch
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_operators
 from tensorflow.python.ops.ragged import ragged_string_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.ops.ragged import ragged_tensor_value
+from tensorflow.python.ops.ragged import ragged_util
 
 from tensorflow.python.ops.ragged.ragged_array_ops import batch_gather
 from tensorflow.python.ops.ragged.ragged_array_ops import boolean_mask
@@ -133,7 +146,7 @@ from tensorflow.python.util import all_util as _all_util
 
 
 # Register OpDispatchers that override standard TF ops to work w/ RaggedTensors.
-__doc__ += ragged_dispatch.register_dispatchers()  # pylint: disable=redefined-builtin
+__doc__ += ragged_dispatch.ragged_op_list()  # pylint: disable=redefined-builtin
 
 # Any symbol that is not referenced (with "@@name") in the module docstring
 # above will be removed.
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index f334f1fc8e..77990a8b18 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py
@@ -447,10 +447,17 @@ def register_dispatchers():
   for (original_op, ragged_op, args) in _RAGGED_DISPATCH_OPS:
     RaggedDispatcher(original_op, ragged_op, args).register(original_op)
 
-  docstring = (
+
+def ragged_op_list():
+  """Returns a string listing operators that have dispathers registered."""
+  op_list = (
+      _UNARY_ELEMENTWISE_OPS + _UNARY_LIST_ELEMENTWISE_OPS +
+      _BINARY_ELEMENTWISE_OPS + [x[0] for x in _RAGGED_DISPATCH_OPS])
+  return (
       '\n\n### Additional ops that support `RaggedTensor`\n\n' + '\n'.join([
           '* `tf.%s`' % tf_export.get_canonical_name_for_symbol(op)
           for op in op_list
       ]))
 
-  return docstring
+
+register_dispatchers()
diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py
index 8ef0fe8070..ba3bd09492 100644
--- a/tensorflow/python/ops/standard_ops.py
+++ b/tensorflow/python/ops/standard_ops.py
@@ -71,6 +71,8 @@ from tensorflow.python.ops.math_ops import *
 from tensorflow.python.ops.numerics import *
 from tensorflow.python.ops.parsing_ops import *
 from tensorflow.python.ops.partitioned_variables import *
+from tensorflow.python.ops.ragged import ragged_dispatch as _ragged_dispatch
+from tensorflow.python.ops.ragged import ragged_operators as _ragged_operators
 from tensorflow.python.ops.random_ops import *
 from tensorflow.python.ops.script_ops import py_func
 from tensorflow.python.ops.session_ops import *
@@ -102,3 +104,7 @@ from tensorflow.python.ops.variable_scope import *
 from tensorflow.python.ops.variables import *
 # pylint: enable=wildcard-import
 # pylint: enable=g-bad-import-order
+
+
+# These modules were imported to set up RaggedTensor operators and dispatchers:
+del _ragged_dispatch, _ragged_operators
-- 
GitLab


From 6939c38130f6cdaea01a4db7cd2db1d2297eb9e8 Mon Sep 17 00:00:00 2001
From: Zhenyu Tan <tanzheny@google.com>
Date: Wed, 12 Dec 2018 11:06:59 -0800
Subject: [PATCH 432/873] Internal Cleanup.

PiperOrigin-RevId: 225217785
---
 tensorflow/core/kernels/training_ops_test.cc | 44 --------------------
 1 file changed, 44 deletions(-)

diff --git a/tensorflow/core/kernels/training_ops_test.cc b/tensorflow/core/kernels/training_ops_test.cc
index 09804f95dc..2dcc4a500e 100644
--- a/tensorflow/core/kernels/training_ops_test.cc
+++ b/tensorflow/core/kernels/training_ops_test.cc
@@ -194,50 +194,6 @@ static void BM_Adam(int iters, int params) {
 }
 BENCHMARK(BM_Adam)->Arg(128 << 10)->Arg(256 << 10);
 
-static void AdamWithAmsgrad(int32 n, Graph** init_g, Graph** train_g) {
-  TensorShape shape({n});
-  {
-    Graph* g = new Graph(OpRegistry::Global());
-    auto var = Var(g, n);
-    auto m = Var(g, n);
-    auto v = Var(g, n);
-    auto zero = Zeros(g, n);
-    test::graph::Assign(g, var, zero);
-    test::graph::Assign(g, m, zero);
-    test::graph::Assign(g, v, zero);
-    *init_g = g;
-  }
-  {
-    Graph* g = new Graph(OpRegistry::Global());
-    auto var = Var(g, n);
-    auto m = Var(g, n);
-    auto v = Var(g, n);
-    auto vhat = Var(g, n);
-    auto beta1_power = Scalar(g, 0.9);
-    auto beta2_power = Scalar(g, 0.99);
-    auto lr = Scalar(g, 0.01);
-    auto beta1 = Scalar(g, 0.9);
-    auto beta2 = Scalar(g, 0.99);
-    auto epsilon = Scalar(g, 1e-8);
-    auto grad = Random(g, n);
-    test::graph::Multi(g, "ApplyAdamWithAmsgrad",
-                       {var, m, v, vhat, beta1_power, beta2_power, lr, beta1,
-                        beta2, epsilon, grad});
-    *train_g = g;
-  }
-}
-
-static void BM_AdamWithAmsgrad(int iters, int params) {
-  const int64 tot = static_cast<int64>(iters) * params;
-  testing::ItemsProcessed(tot);
-  testing::BytesProcessed(tot * sizeof(float));
-  Graph* init;
-  Graph* train;
-  AdamWithAmsgrad(params, &init, &train);
-  test::Benchmark("cpu", train, GetOptions(), init).Run(iters);
-}
-BENCHMARK(BM_AdamWithAmsgrad)->Arg(128 << 10)->Arg(256 << 10);
-
 static void RMSProp(int32 n, Graph** init_g, Graph** train_g) {
   TensorShape shape({n});
   {
-- 
GitLab


From 3ae0654d41b74538920d1d1cf812f83e35895fc6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 11:08:26 -0800
Subject: [PATCH 433/873] Fix and re-enable three tests under LossWeightingTest
 in training_test.py.

These tests share the same assertion: that weighting a particular class's loss over other classes (by passing in `sample_weight` into `model.fit`) leads to a lower evaluation loss when evaluating test data limited to that class compared to evaluating all test data. My theory is that the models in these tests are not trained enough for that assumption to always hold true, which is why they are flaky. Increased the weight from 2 to 10 and the training epochs from 5 to 10.

PiperOrigin-RevId: 225218063
---
 .../python/keras/engine/training_test.py      | 25 +++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 91a0c7cc2f..a61e2edcd3 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -793,12 +793,12 @@ class TestExceptionsAndWarnings(keras_parameterized.TestCase):
 class LossWeightingTest(keras_parameterized.TestCase):
 
   @keras_parameterized.run_all_keras_modes
-  # TODO(b/120562577): Test failing with assertion error.
-  def DISABLED_test_class_weights(self):
+  def test_class_weights(self):
     num_classes = 5
     batch_size = 5
-    epochs = 5
+    epochs = 10
     weighted_class = 3
+    weight = 10.
     train_samples = 1000
     test_samples = 1000
     input_dim = 5
@@ -827,7 +827,7 @@ class LossWeightingTest(keras_parameterized.TestCase):
     test_ids = np.where(int_y_test == np.array(weighted_class))[0]
 
     class_weight = dict([(i, 1.) for i in range(num_classes)])
-    class_weight[weighted_class] = 2.
+    class_weight[weighted_class] = weight
 
     sample_weight = np.ones((y_train.shape[0]))
     sample_weight[int_y_train == weighted_class] = 2.
@@ -864,12 +864,12 @@ class LossWeightingTest(keras_parameterized.TestCase):
     self.assertLess(score[0], ref_score[0])
 
   @keras_parameterized.run_all_keras_modes
-  @tf_test_util.run_v1_only('b/120545219')
   def test_sample_weights(self):
     num_classes = 5
     batch_size = 5
-    epochs = 5
+    epochs = 10
     weighted_class = 3
+    weight = 10.
     train_samples = 1000
     test_samples = 1000
     input_dim = 5
@@ -898,7 +898,7 @@ class LossWeightingTest(keras_parameterized.TestCase):
     test_ids = np.where(int_y_test == np.array(weighted_class))[0]
 
     sample_weight = np.ones((y_train.shape[0]))
-    sample_weight[int_y_train == weighted_class] = 2.
+    sample_weight[int_y_train == weighted_class] = weight
 
     model.fit(
         x_train,
@@ -962,13 +962,12 @@ class LossWeightingTest(keras_parameterized.TestCase):
       self.assertTrue(msg_found)
 
   @keras_parameterized.run_all_keras_modes
-  @tf_test_util.run_v1_only('b/120545219')
-  # TODO(b/120562577): Test failing with assertion error.
-  def DISABLED_test_temporal_sample_weights(self):
+  def test_temporal_sample_weights(self):
     num_classes = 5
     batch_size = 5
-    epochs = 5
+    epochs = 10
     weighted_class = 3
+    weight = 10.
     train_samples = 1000
     test_samples = 1000
     input_dim = 5
@@ -997,7 +996,7 @@ class LossWeightingTest(keras_parameterized.TestCase):
       test_ids = np.where(int_y_test == np.array(weighted_class))[0]
 
       sample_weight = np.ones((y_train.shape[0]))
-      sample_weight[int_y_train == weighted_class] = 2.
+      sample_weight[int_y_train == weighted_class] = weight
 
       temporal_x_train = np.reshape(x_train, (len(x_train), 1,
                                               x_train.shape[1]))
@@ -1018,7 +1017,7 @@ class LossWeightingTest(keras_parameterized.TestCase):
 
       model.compile(
           RMSPropOptimizer(learning_rate=learning_rate),
-          loss='binary_crossentropy',
+          loss='categorical_crossentropy',
           metrics=['acc', metrics_module.CategoricalAccuracy()],
           weighted_metrics=['mae', metrics_module.CategoricalAccuracy()],
           sample_weight_mode='temporal',
-- 
GitLab


From 350791003de42dbb17c53474a677b108f473b0ba Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 12 Dec 2018 11:52:20 -0800
Subject: [PATCH 434/873] Reduce the cost of serializing ConversionOptions to
 code, by using a more efficient inspect.util.getqualifiedname, reducing its
 max_depth and falling back to caching the value in the namespace. The latter
 step makes it more difficult to run the generated code afterwards, but it
 should in turn speed up the conversion process. This also adds an extra check
 to tf_decorator to improve robustness.

PiperOrigin-RevId: 225226256
---
 .../python/autograph/converters/call_trees.py |  2 +-
 tensorflow/python/autograph/core/converter.py | 18 ++++--
 tensorflow/python/autograph/impl/api.py       |  3 +
 .../python/autograph/pyct/inspect_utils.py    | 25 +++++---
 .../autograph/pyct/inspect_utils_test.py      | 57 +++++++++++++++++++
 tensorflow/python/util/tf_decorator.py        |  5 ++
 6 files changed, 94 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index 3e0b40290f..b1bfe04347 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -261,7 +261,7 @@ class CallTreeTransformer(converter.Base):
         func=func,
         owner=owner,
         options=self.ctx.program.options.to_ast(
-            self.ctx.info.namespace,
+            self.ctx,
             internal_convert_user_code=self.ctx.program.options.recursive),
         args=node.args)
     # TODO(mdan): Improve the template mechanism to better support this.
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index eea2621056..b9c2449566 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -179,15 +179,14 @@ class ConversionOptions(object):
     return (Feature.ALL in self.optional_features or
             feature in self.optional_features)
 
-  def to_ast(self, namespace, internal_convert_user_code=None):
+  def to_ast(self, ctx, internal_convert_user_code=None):
     """Returns a representation of this object as an AST node.
 
     The AST node encodes a constructor that would create an object with the
     same contents.
 
     Args:
-      namespace: Dict[str, Any], the namespace to use when serializing values to
-        names.
+      ctx: EntityContext, the entity with which this AST needs to be consistent.
       internal_convert_user_code: Optional[bool], allows ovrriding the
         corresponding value.
 
@@ -205,10 +204,11 @@ class ConversionOptions(object):
     """
 
     def as_qualified_name(o):
-      name = inspect_utils.getqualifiedname(namespace, o)
+      name = inspect_utils.getqualifiedname(ctx.info.namespace, o, max_depth=1)
       if not name:
-        raise ValueError('Could not locate entity {} in {}'.format(
-            o, namespace))
+        # TODO(mdan): This needs to account for the symbols defined locally.
+        name = ctx.namer.new_symbol(o.__name__, ())
+        ctx.program.add_symbol(name, o)
       return name
 
     def list_of_names(values):
@@ -279,6 +279,7 @@ class ProgramContext(object):
     self.dependency_cache = {}
     self.additional_imports = set()
     self.name_map = {}
+    self.additional_symbols = {}
 
   @property
   def required_imports(self):
@@ -321,6 +322,11 @@ class ProgramContext(object):
       else:
         self.name_map[o] = name
 
+  def add_symbol(self, name, value):
+    if name in self.additional_symbols:
+      assert self.additional_symbols[name] is value
+    self.additional_symbols[name] = value
+
   def add_to_cache(self, original_entity, converted_ast):
     self.conversion_order.append(original_entity)
     self.dependency_cache[original_entity] = converted_ast
diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index 54b46b1efd..a20ad71c97 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -424,6 +424,9 @@ def to_graph(entity,
     # Avoid overwriting entities that have been transformed.
     if key not in compiled_module.__dict__:
       compiled_module.__dict__[key] = val
+  for key, val in program_ctx.additional_symbols.items():
+    if key not in compiled_module.__dict__:
+      compiled_module.__dict__[key] = val
   compiled = getattr(compiled_module, name)
 
   if tf_inspect.isfunction(entity):
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 7c819f364f..56945b464b 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -101,7 +101,7 @@ def getnamespace(f):
   return namespace
 
 
-def getqualifiedname(namespace, object_, max_depth=2):
+def getqualifiedname(namespace, object_, max_depth=7, visited=None):
   """Returns the name by which a value can be referred to in a given namespace.
 
   If the object defines a parent module, the function attempts to use it to
@@ -115,16 +115,20 @@ def getqualifiedname(namespace, object_, max_depth=2):
     object_: Any, the value to search.
     max_depth: Optional[int], a limit to the recursion depth when searching
         inside modules.
+    visited: Optional[Set[int]], ID of modules to avoid visiting.
   Returns: Union[str, None], the fully-qualified name that resolves to the value
       o, or None if it couldn't be found.
   """
-  for name, value in namespace.items():
+  if visited is None:
+    visited = set()
+
+  for name in namespace:
     # The value may be referenced by more than one symbol, case in which
     # any symbol will be fine. If the program contains symbol aliases that
     # change over time, this may capture a symbol that will later point to
     # something else.
     # TODO(mdan): Prefer the symbol that matches the value type name.
-    if object_ is value:
+    if object_ is namespace[name]:
       return name
 
   # If an object is not found, try to search its parent modules.
@@ -132,22 +136,25 @@ def getqualifiedname(namespace, object_, max_depth=2):
   if (parent is not None and parent is not object_ and
       parent is not namespace):
     # No limit to recursion depth because of the guard above.
-    parent_name = getqualifiedname(namespace, parent, max_depth=0)
+    parent_name = getqualifiedname(
+        namespace, parent, max_depth=0, visited=visited)
     if parent_name is not None:
-      name_in_parent = getqualifiedname(parent.__dict__, object_, max_depth=0)
+      name_in_parent = getqualifiedname(
+          parent.__dict__, object_, max_depth=0, visited=visited)
       assert name_in_parent is not None, (
           'An object should always be found in its owner module')
       return '{}.{}'.format(parent_name, name_in_parent)
 
-  # TODO(mdan): Use breadth-first search and avoid visiting modules twice.
   if max_depth:
     # Iterating over a copy prevents "changed size due to iteration" errors.
     # It's unclear why those occur - suspecting new modules may load during
     # iteration.
-    for name, value in namespace.copy().items():
-      if tf_inspect.ismodule(value):
+    for name in tuple(namespace.keys()):
+      value = namespace[name]
+      if tf_inspect.ismodule(value) and id(value) not in visited:
+        visited.add(id(value))
         name_in_module = getqualifiedname(value.__dict__, object_,
-                                          max_depth - 1)
+                                          max_depth - 1, visited)
         if name_in_module is not None:
           return '{}.{}'.format(name, name_in_module)
   return None
diff --git a/tensorflow/python/autograph/pyct/inspect_utils_test.py b/tensorflow/python/autograph/pyct/inspect_utils_test.py
index a2c39056d1..420a20c22f 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils_test.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils_test.py
@@ -183,6 +183,63 @@ class InspectUtilsTest(test.TestCase):
     self.assertEqual(inspect_utils.getqualifiedname(ns, bar), 'bar')
     self.assertEqual(inspect_utils.getqualifiedname(ns, baz), 'bar.baz')
 
+  def test_getqualifiedname_efficiency(self):
+    foo = object()
+
+    # We create a densely connected graph consisting of a relatively small
+    # number of modules and hide our symbol in one of them. The path to the
+    # symbol is at least 10, and each node has about 10 neighbors. However,
+    # by skipping visited modules, the search should take much less.
+    ns = {}
+    prev_level = []
+    for i in range(10):
+      current_level = []
+      for j in range(10):
+        mod_name = 'mod_{}_{}'.format(i, j)
+        mod = imp.new_module(mod_name)
+        current_level.append(mod)
+        if i == 9 and j == 9:
+          mod.foo = foo
+      if prev_level:
+        # All modules at level i refer to all modules at level i+1
+        for prev in prev_level:
+          for mod in current_level:
+            prev.__dict__[mod.__name__] = mod
+      else:
+        for mod in current_level:
+          ns[mod.__name__] = mod
+      prev_level = current_level
+
+    self.assertIsNone(inspect_utils.getqualifiedname(ns, inspect_utils))
+    self.assertIsNotNone(
+        inspect_utils.getqualifiedname(ns, foo, max_depth=10000000000))
+
+  def test_getqualifiedname_cycles(self):
+    foo = object()
+
+    # We create a graph of modules that contains circular references. The
+    # search process should avoid them. The searched object is hidden at the
+    # bottom of a path of length roughly 10.
+    ns = {}
+    mods = []
+    for i in range(10):
+      mod = imp.new_module('mod_{}'.format(i))
+      if i == 9:
+        mod.foo = foo
+      # Module i refers to module i+1
+      if mods:
+        mods[-1].__dict__[mod.__name__] = mod
+      else:
+        ns[mod.__name__] = mod
+      # Module i refers to all modules j < i.
+      for prev in mods:
+        mod.__dict__[prev.__name__] = prev
+      mods.append(mod)
+
+    self.assertIsNone(inspect_utils.getqualifiedname(ns, inspect_utils))
+    self.assertIsNotNone(
+        inspect_utils.getqualifiedname(ns, foo, max_depth=10000000000))
+
   def test_getqualifiedname_finds_via_parent_module(self):
     # TODO(mdan): This test is vulnerable to change in the lib module.
     # A better way to forge modules should be found.
diff --git a/tensorflow/python/util/tf_decorator.py b/tensorflow/python/util/tf_decorator.py
index 0cfc836246..f018e1a1bd 100644
--- a/tensorflow/python/util/tf_decorator.py
+++ b/tensorflow/python/util/tf_decorator.py
@@ -98,6 +98,9 @@ def make_decorator(target,
   if hasattr(target, '__doc__'):
     decorator_func.__doc__ = decorator.__doc__
   decorator_func.__wrapped__ = target
+  # Keeping a second handle to `target` allows callers to detect whether the
+  # decorator was modified using `rewrap`.
+  decorator_func.__original_wrapped__ = target
   return decorator_func
 
 
@@ -173,6 +176,8 @@ def unwrap(maybe_tf_decorator):
       decorators.append(getattr(cur, '_tf_decorator'))
     else:
       break
+    if not hasattr(decorators[-1], 'decorated_target'):
+      break
     cur = decorators[-1].decorated_target
   return decorators, cur
 
-- 
GitLab


From 16069bf8745b029a82ee8eae194909f578b3dea0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 12:05:32 -0800
Subject: [PATCH 435/873] Allow RandomUniform to be quantized. As the
 RandomUniform operator is a custom op, it is up to the tf-lite user to
 provide the implementation. Best to assume it exists so the user can
 implement.

PiperOrigin-RevId: 225228337
---
 tensorflow/lite/toco/graph_transformations/quantize.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/lite/toco/graph_transformations/quantize.cc b/tensorflow/lite/toco/graph_transformations/quantize.cc
index 1146078c30..2fa80f2eda 100644
--- a/tensorflow/lite/toco/graph_transformations/quantize.cc
+++ b/tensorflow/lite/toco/graph_transformations/quantize.cc
@@ -64,6 +64,7 @@ bool SupportsQuantization(const Operator& op) {
          type == OperatorType::kRelu1 || type == OperatorType::kRelu6 ||
          type == OperatorType::kShape || type == OperatorType::kExpandDims ||
          type == OperatorType::kPack || type == OperatorType::kTopK_V2 ||
+         type == OperatorType::kRandomUniform ||
          type == OperatorType::kResizeNearestNeighbor ||
          type == OperatorType::kPRelu;
 }
-- 
GitLab


From 250ab666f6b6fe78818bd040da0a57e6fddd9f89 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Wed, 12 Dec 2018 12:23:40 -0800
Subject: [PATCH 436/873] Make core layers tests run in graph and eager mode.

PiperOrigin-RevId: 225231668
---
 tensorflow/python/keras/BUILD               |   2 +-
 tensorflow/python/keras/layers/core_test.py | 323 ++++++++++----------
 2 files changed, 155 insertions(+), 170 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 36fea36389..3c390cb2b0 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -401,7 +401,7 @@ py_test(
     name = "core_test",
     size = "medium",
     srcs = ["layers/core_test.py"],
-    shard_count = 2,
+    shard_count = 3,
     srcs_version = "PY2AND3",
     deps = [
         ":keras",
diff --git a/tensorflow/python/keras/layers/core_test.py b/tensorflow/python/keras/layers/core_test.py
index f138adf760..9df40f806f 100644
--- a/tensorflow/python/keras/layers/core_test.py
+++ b/tensorflow/python/keras/layers/core_test.py
@@ -22,43 +22,36 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.eager import context
-from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class CoreLayersTest(test.TestCase):
-
-  def test_masking(self):
-    with self.cached_session():
-      testing_utils.layer_test(
-          keras.layers.Masking, kwargs={}, input_shape=(3, 2, 3))
+@keras_parameterized.run_all_keras_modes
+class DropoutLayersTest(keras_parameterized.TestCase):
 
   def test_dropout(self):
-    with self.cached_session():
-      testing_utils.layer_test(
-          keras.layers.Dropout, kwargs={'rate': 0.5}, input_shape=(3, 2))
+    testing_utils.layer_test(
+        keras.layers.Dropout, kwargs={'rate': 0.5}, input_shape=(3, 2))
 
-    with self.cached_session():
-      testing_utils.layer_test(
-          keras.layers.Dropout,
-          kwargs={'rate': 0.5,
-                  'noise_shape': [3, 1]},
-          input_shape=(3, 2))
-
-    # https://github.com/tensorflow/tensorflow/issues/14819
-    with self.cached_session():
-      dropout = keras.layers.Dropout(0.5)
-      self.assertEqual(True, dropout.supports_masking)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_spatial_dropout(self):
+    testing_utils.layer_test(
+        keras.layers.Dropout,
+        kwargs={'rate': 0.5,
+                'noise_shape': [3, 1]},
+        input_shape=(3, 2))
+
+  def test_dropout_supports_masking(self):
+    dropout = keras.layers.Dropout(0.5)
+    self.assertEqual(True, dropout.supports_masking)
+
+  def test_spatial_dropout_1d(self):
     testing_utils.layer_test(
         keras.layers.SpatialDropout1D,
         kwargs={'rate': 0.5},
         input_shape=(2, 3, 4))
 
+  def test_spatial_dropout_2d(self):
     testing_utils.layer_test(
         keras.layers.SpatialDropout2D,
         kwargs={'rate': 0.5},
@@ -69,6 +62,7 @@ class CoreLayersTest(test.TestCase):
         kwargs={'rate': 0.5, 'data_format': 'channels_first'},
         input_shape=(2, 3, 4, 5))
 
+  def test_spatial_dropout_3d(self):
     testing_utils.layer_test(
         keras.layers.SpatialDropout3D,
         kwargs={'rate': 0.5},
@@ -79,7 +73,122 @@ class CoreLayersTest(test.TestCase):
         kwargs={'rate': 0.5, 'data_format': 'channels_first'},
         input_shape=(2, 3, 4, 4, 5))
 
-  @tf_test_util.run_in_graph_and_eager_modes
+
+@keras_parameterized.run_all_keras_modes
+class LambdaLayerTest(keras_parameterized.TestCase):
+
+  def test_lambda(self):
+    testing_utils.layer_test(
+        keras.layers.Lambda,
+        kwargs={'function': lambda x: x + 1},
+        input_shape=(3, 2))
+
+    testing_utils.layer_test(
+        keras.layers.Lambda,
+        kwargs={
+            'function': lambda x, a, b: x * a + b,
+            'arguments': {
+                'a': 0.6,
+                'b': 0.4
+            }
+        },
+        input_shape=(3, 2))
+
+    # test serialization with function
+    def f(x):
+      return x + 1
+
+    ld = keras.layers.Lambda(f)
+    config = ld.get_config()
+    ld = keras.layers.deserialize({
+        'class_name': 'Lambda',
+        'config': config
+    })
+
+    # test with lambda
+    ld = keras.layers.Lambda(
+        lambda x: keras.backend.concatenate([math_ops.square(x), x]))
+    config = ld.get_config()
+    ld = keras.layers.Lambda.from_config(config)
+
+  def test_lambda_multiple_inputs(self):
+    ld = keras.layers.Lambda(lambda x: x[0], output_shape=lambda x: x[0])
+    x1 = np.ones([3, 2], np.float32)
+    x2 = np.ones([3, 5], np.float32)
+    out = ld([x1, x2])
+    self.assertAllEqual(out.shape, [3, 2])
+
+  def test_lambda_output_shape(self):
+    l = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
+    l(keras.backend.variable(np.ones((1, 1))))
+    self.assertEqual((1, 1), l.get_config()['output_shape'])
+
+  def test_lambda_output_shape_function(self):
+    def get_output_shape(input_shape):
+      return 1 * input_shape
+
+    l = keras.layers.Lambda(lambda x: x + 1, output_shape=get_output_shape)
+    l(keras.backend.variable(np.ones((1, 1))))
+    self.assertEqual('lambda', l.get_config()['output_shape_type'])
+
+  def test_lambda_output_shape_autocalculate_multiple_inputs(self):
+
+    def lambda_fn(x):
+      return math_ops.matmul(x[0], x[1])
+
+    l = keras.layers.Lambda(lambda_fn)
+    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
+    self.assertAllEqual((10, 20), output_shape)
+
+  def test_lambda_output_shape_list_multiple_outputs(self):
+
+    def lambda_fn(x):
+      return x
+
+    l = keras.layers.Lambda(lambda_fn, output_shape=[(10,), (20,)])
+    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
+    self.assertAllEqual([(10, 10), (10, 20)], output_shape)
+
+  def test_lambda_output_shape_tuple_with_none(self):
+
+    def lambda_fn(x):
+      return x
+
+    l = keras.layers.Lambda(lambda_fn, output_shape=(None, 10))
+    output_shape = l.compute_output_shape((5, 10, 20))
+    self.assertAllEqual([5, None, 10], output_shape.as_list())
+
+  def test_lambda_output_shape_function_multiple_outputs(self):
+
+    def lambda_fn(x):
+      return x
+
+    def output_shape_fn(input_shape):
+      return input_shape
+
+    l = keras.layers.Lambda(lambda_fn, output_shape=output_shape_fn)
+    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
+    self.assertAllEqual([(10, 10), (10, 20)], output_shape)
+
+  def test_lambda_config_serialization(self):
+    # Test serialization with output_shape and output_shape_type
+    layer = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
+    layer(keras.backend.variable(np.ones((1, 1))))
+    config = layer.get_config()
+    layer = keras.layers.deserialize({
+        'class_name': 'Lambda',
+        'config': config
+    })
+    layer = keras.layers.Lambda.from_config(config)
+
+
+@keras_parameterized.run_all_keras_modes
+class CoreLayersTest(keras_parameterized.TestCase):
+
+  def test_masking(self):
+    testing_utils.layer_test(
+        keras.layers.Masking, kwargs={}, input_shape=(3, 2, 3))
+
   def test_activation(self):
     # with string argument
     testing_utils.layer_test(
@@ -93,7 +202,6 @@ class CoreLayersTest(test.TestCase):
         kwargs={'activation': keras.backend.relu},
         input_shape=(3, 2))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_reshape(self):
     testing_utils.layer_test(
         keras.layers.Reshape,
@@ -115,26 +223,22 @@ class CoreLayersTest(test.TestCase):
         kwargs={'target_shape': (-1, 1)},
         input_shape=(None, None, 2))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_permute(self):
     testing_utils.layer_test(
         keras.layers.Permute, kwargs={'dims': (2, 1)}, input_shape=(3, 2, 4))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_permute_errors_on_invalid_starting_dims_index(self):
     with self.assertRaisesRegexp(ValueError, r'Invalid permutation .*dims.*'):
       testing_utils.layer_test(
           keras.layers.Permute,
           kwargs={'dims': (0, 1, 2)}, input_shape=(3, 2, 4))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_permute_errors_on_invalid_set_of_dims_indices(self):
     with self.assertRaisesRegexp(ValueError, r'Invalid permutation .*dims.*'):
       testing_utils.layer_test(
           keras.layers.Permute,
           kwargs={'dims': (1, 4, 2)}, input_shape=(3, 2, 4))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_flatten(self):
     testing_utils.layer_test(
         keras.layers.Flatten, kwargs={}, input_shape=(3, 2, 4))
@@ -149,7 +253,6 @@ class CoreLayersTest(test.TestCase):
         np.transpose(inputs, (0, 2, 3, 1)), (-1, 5 * 5 * 3))
     self.assertAllClose(outputs, target_outputs)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_flatten_scalar_channels(self):
     testing_utils.layer_test(
         keras.layers.Flatten, kwargs={}, input_shape=(3,))
@@ -163,54 +266,10 @@ class CoreLayersTest(test.TestCase):
     target_outputs = np.expand_dims(inputs, -1)
     self.assertAllClose(outputs, target_outputs)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_repeat_vector(self):
     testing_utils.layer_test(
         keras.layers.RepeatVector, kwargs={'n': 3}, input_shape=(3, 2))
 
-  def test_lambda(self):
-    testing_utils.layer_test(
-        keras.layers.Lambda,
-        kwargs={'function': lambda x: x + 1},
-        input_shape=(3, 2))
-
-    testing_utils.layer_test(
-        keras.layers.Lambda,
-        kwargs={
-            'function': lambda x, a, b: x * a + b,
-            'arguments': {
-                'a': 0.6,
-                'b': 0.4
-            }
-        },
-        input_shape=(3, 2))
-
-    # test serialization with function
-    def f(x):
-      return x + 1
-
-    ld = keras.layers.Lambda(f)
-    config = ld.get_config()
-    ld = keras.layers.deserialize({
-        'class_name': 'Lambda',
-        'config': config
-    })
-
-    # test with lambda
-    ld = keras.layers.Lambda(
-        lambda x: keras.backend.concatenate([math_ops.square(x), x]))
-    config = ld.get_config()
-    ld = keras.layers.Lambda.from_config(config)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_lambda_multiple_inputs(self):
-    ld = keras.layers.Lambda(lambda x: x[0], output_shape=lambda x: x[0])
-    x1 = np.ones([3, 2], np.float32)
-    x2 = np.ones([3, 5], np.float32)
-    out = ld([x1, x2])
-    self.assertAllEqual(out.shape, [3, 2])
-
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_dense(self):
     testing_utils.layer_test(
         keras.layers.Dense, kwargs={'units': 3}, input_shape=(3, 2))
@@ -225,105 +284,31 @@ class CoreLayersTest(test.TestCase):
         keras.layers.Dense, kwargs={'units': 3}, input_shape=(3, 4, 5, 2))
 
   def test_dense_regularization(self):
-    with self.cached_session():
-      layer = keras.layers.Dense(
-          3,
-          kernel_regularizer=keras.regularizers.l1(0.01),
-          bias_regularizer='l1',
-          activity_regularizer='l2',
-          name='dense_reg')
-      layer(keras.backend.variable(np.ones((2, 4))))
-      self.assertEqual(3, len(layer.losses))
+    layer = keras.layers.Dense(
+        3,
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer='l1',
+        activity_regularizer='l2',
+        name='dense_reg')
+    layer(keras.backend.variable(np.ones((2, 4))))
+    self.assertEqual(3, len(layer.losses))
 
   def test_dense_constraints(self):
-    with self.cached_session():
-      k_constraint = keras.constraints.max_norm(0.01)
-      b_constraint = keras.constraints.max_norm(0.01)
-      layer = keras.layers.Dense(
-          3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
-      layer(keras.backend.variable(np.ones((2, 4))))
-      self.assertEqual(layer.kernel.constraint, k_constraint)
-      self.assertEqual(layer.bias.constraint, b_constraint)
+    k_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = keras.layers.Dense(
+        3, kernel_constraint=k_constraint, bias_constraint=b_constraint)
+    layer(keras.backend.variable(np.ones((2, 4))))
+    self.assertEqual(layer.kernel.constraint, k_constraint)
+    self.assertEqual(layer.bias.constraint, b_constraint)
 
   def test_activity_regularization(self):
-    with self.cached_session():
-      layer = keras.layers.ActivityRegularization(l1=0.1)
-      layer(keras.backend.variable(np.ones((2, 4))))
-      self.assertEqual(1, len(layer.losses))
-      _ = layer.get_config()
-
-  def test_lambda_output_shape(self):
-    with self.cached_session():
-      l = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
-      l(keras.backend.variable(np.ones((1, 1))))
-      self.assertEqual((1, 1), l.get_config()['output_shape'])
+    layer = keras.layers.ActivityRegularization(l1=0.1)
+    layer(keras.backend.variable(np.ones((2, 4))))
+    self.assertEqual(1, len(layer.losses))
+    config = layer.get_config()
+    self.assertEqual(config.pop('l1'), 0.1)
 
-  def test_lambda_output_shape_function(self):
-    def get_output_shape(input_shape):
-      return 1 * input_shape
-
-    with self.cached_session():
-      l = keras.layers.Lambda(lambda x: x + 1, output_shape=get_output_shape)
-      l(keras.backend.variable(np.ones((1, 1))))
-      self.assertEqual('lambda', l.get_config()['output_shape_type'])
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_lambda_output_shape_autocalculate_multiple_inputs(self):
-
-    def lambda_fn(x):
-      return math_ops.matmul(x[0], x[1])
-
-    l = keras.layers.Lambda(lambda_fn)
-    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
-    self.assertAllEqual((10, 20), output_shape)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_lambda_output_shape_list_multiple_outputs(self):
-
-    def lambda_fn(x):
-      return x
-
-    l = keras.layers.Lambda(lambda_fn, output_shape=[(10,), (20,)])
-    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
-    self.assertAllEqual([(10, 10), (10, 20)], output_shape)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_lambda_output_shape_tuple_with_none(self):
-
-    def lambda_fn(x):
-      return x
-
-    l = keras.layers.Lambda(lambda_fn, output_shape=(None, 10))
-    output_shape = l.compute_output_shape((5, 10, 20))
-    self.assertAllEqual([5, None, 10], output_shape.as_list())
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_lambda_output_shape_function_multiple_outputs(self):
-
-    def lambda_fn(x):
-      return x
-
-    def output_shape_fn(input_shape):
-      return input_shape
-
-    l = keras.layers.Lambda(lambda_fn, output_shape=output_shape_fn)
-    output_shape = l.compute_output_shape([(10, 10), (10, 20)])
-    self.assertAllEqual([(10, 10), (10, 20)], output_shape)
-
-  def test_lambda_config_serialization(self):
-    with self.cached_session():
-      # test serialization with output_shape and output_shape_type
-      layer = keras.layers.Lambda(lambda x: x + 1, output_shape=(1, 1))
-      layer(keras.backend.variable(np.ones((1, 1))))
-      config = layer.get_config()
-      layer = keras.layers.deserialize({
-          'class_name': 'Lambda',
-          'config': config
-      })
-
-      layer = keras.layers.Lambda.from_config(config)
-
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_numpy_inputs(self):
     if context.executing_eagerly():
       layer = keras.layers.RepeatVector(2)
-- 
GitLab


From 27d89c6b8e2fcb56b3d560196e4dc5c11121fafc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 12:54:39 -0800
Subject: [PATCH 437/873] Automated rollback of commit
 d6a46850353acfe26625c5ab1ffe7bd5c5a4aaf0

PiperOrigin-RevId: 225236744
---
 third_party/nccl/archive.BUILD      | 154 +++++----
 third_party/nccl/build_defs.bzl.tpl | 467 ++++++++++------------------
 2 files changed, 270 insertions(+), 351 deletions(-)

diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD
index 22b9728017..7a08f97ef3 100644
--- a/third_party/nccl/archive.BUILD
+++ b/third_party/nccl/archive.BUILD
@@ -1,110 +1,157 @@
 # NVIDIA NCCL 2
 # A package of optimized primitives for collective multi-GPU communication.
 
-licenses(["notice"])
+licenses(["restricted"])
 
 exports_files(["LICENSE.txt"])
 
 load(
     "@local_config_nccl//:build_defs.bzl",
-    "cuda_rdc_library",
-    "gen_device_srcs",
-    "process_srcs",
+    "gen_nccl_h",
+    "nccl_library",
+    "rdc_copts",
+    "rdc_library",
+)
+load(
+    "@local_config_cuda//cuda:build_defs.bzl",
+    "cuda_default_copts",
 )
-load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cuda_library")
 
-process_srcs(
-    name = "process_srcs",
-    srcs = glob([
-        "**/*.cc",
-        "**/*.h",
-    ]),
+# Generate the nccl.h header file.
+gen_nccl_h(
+    name = "nccl_h",
+    output = "src/nccl.h",
+    template = "src/nccl.h.in",
 )
 
-cc_library(
+nccl_library(
     name = "src_hdrs",
     hdrs = [
-        "src/collectives/collectives.h",
         "src/nccl.h",
+        # src/include/common_coll.h #includes "collectives/collectives.h".
+        # All other #includes of collectives.h are patched in process_srcs.
+        "src/collectives/collectives.h",
     ],
-    data = [":process_srcs"],
     strip_include_prefix = "src",
 )
 
-cc_library(
+nccl_library(
     name = "include_hdrs",
     hdrs = glob(["src/include/*.h"]),
-    data = [":process_srcs"],
     strip_include_prefix = "src/include",
 )
 
-cc_library(
+filegroup(
     name = "device_hdrs",
-    hdrs = glob(["src/collectives/device/*.h"]),
-    strip_include_prefix = "src/collectives/device",
+    srcs = glob(["src/collectives/device/*.h"]),
 )
 
 filegroup(
     name = "device_srcs",
     srcs = [
-        "src/collectives/device/all_gather.cu.cc",
-        "src/collectives/device/all_reduce.cu.cc",
-        "src/collectives/device/broadcast.cu.cc",
-        "src/collectives/device/reduce.cu.cc",
-        "src/collectives/device/reduce_scatter.cu.cc",
+        "src/collectives/device/all_gather.cu",
+        "src/collectives/device/all_reduce.cu",
+        "src/collectives/device/broadcast.cu",
+        "src/collectives/device/reduce.cu",
+        "src/collectives/device/reduce_scatter.cu",
     ],
 )
 
-# NCCL compiles the same source files with different NCCL_OP defines. RDC
-# compilation requires that each compiled module has a unique ID. Clang derives
-# the module ID from the path only so we need to rename the files to get
-# different IDs for different parts of compilation. NVCC does not have that
-# problem because it generates IDs based on preprocessed content.
-gen_device_srcs(
+nccl_library(
     name = "sum",
-    srcs = [":device_srcs"],
-    NCCL_OP = 0,
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=0"] + rdc_copts(),
+    linkstatic = True,
+    prefix = "sum_",
+    deps = [
+        ":include_hdrs",
+        ":src_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
 )
 
-gen_device_srcs(
+nccl_library(
     name = "prod",
-    srcs = [":device_srcs"],
-    NCCL_OP = 1,
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=1"] + rdc_copts(),
+    linkstatic = True,
+    prefix = "_prod",
+    deps = [
+        ":include_hdrs",
+        ":src_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
 )
 
-gen_device_srcs(
+nccl_library(
     name = "min",
-    srcs = [":device_srcs"],
-    NCCL_OP = 2,
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=2"] + rdc_copts(),
+    linkstatic = True,
+    prefix = "min_",
+    deps = [
+        ":include_hdrs",
+        ":src_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
 )
 
-gen_device_srcs(
+nccl_library(
     name = "max",
-    srcs = [":device_srcs"],
-    NCCL_OP = 3,
+    srcs = [
+        ":device_hdrs",
+        ":device_srcs",
+    ],
+    copts = ["-DNCCL_OP=3"] + rdc_copts(),
+    linkstatic = True,
+    prefix = "max_",
+    deps = [
+        ":include_hdrs",
+        ":src_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
 )
 
-cuda_rdc_library(
-    name = "device",
+nccl_library(
+    name = "functions",
     srcs = [
-        "src/collectives/device/functions.cu.cc",
-        ":max",
-        ":min",
-        ":prod",
-        ":sum",
+        "src/collectives/device/functions.cu",
+        ":device_hdrs",
     ],
+    copts = rdc_copts(),
+    linkstatic = True,
     deps = [
-        ":device_hdrs",
         ":include_hdrs",
         ":src_hdrs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ],
+)
+
+rdc_library(
+    name = "device_code",
+    deps = [
+        ":functions",
+        ":max",
+        ":min",
+        ":prod",
+        ":sum",
     ],
 )
 
 # Primary NCCL target.
-tf_cuda_library(
+nccl_library(
     name = "nccl",
     srcs = glob(
-        include = ["src/**/*.cu.cc"],
+        include = ["src/**/*.cu"],
         # Exclude device-library code.
         exclude = ["src/collectives/device/**"],
     ) + [
@@ -115,14 +162,13 @@ tf_cuda_library(
         "src/nccl.h",
     ],
     hdrs = ["src/nccl.h"],
-    copts = ["-Wno-vla"],
+    copts = cuda_default_copts(),
     include_prefix = "third_party/nccl",
     strip_include_prefix = "src",
     visibility = ["//visibility:public"],
     deps = [
-        ":device",
+        ":device_code",
         ":include_hdrs",
         ":src_hdrs",
-        "@local_config_cuda//cuda:cudart_static",
     ],
 )
diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
index fe16f10432..42de79c411 100644
--- a/third_party/nccl/build_defs.bzl.tpl
+++ b/third_party/nccl/build_defs.bzl.tpl
@@ -1,86 +1,87 @@
 """Repository rule for NCCL."""
 
-load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_cuda_library")
-load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain")
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts")
 
-def _process_srcs_impl(ctx):
-    """Appends .cc to .cu files, patches include directives."""
-    files = []
-    for src in ctx.files.srcs:
+def _gen_nccl_h_impl(ctx):
+    """Creates nccl.h from a template."""
+    ctx.actions.expand_template(
+        output = ctx.outputs.output,
+        template = ctx.file.template,
         substitutions = {
-            "\"collectives.h": "\"collectives/collectives.h",
-            "\"../collectives.h": "\"collectives/collectives.h",
-            # Clang does not define __CUDACC_VER_*__, use CUDA_VERSION instead.
-            # TODO(csigg): Apply substitutions upstream and remove here.
-            "#if __CUDACC_VER_MAJOR__ >= 10 || (__CUDACC_VER_MAJOR__ >= 9 && __CUDACC_VER_MINOR__ >= 2)": "#if CUDA_VERSION >= 9200",
-            "#if __CUDACC_VER_MAJOR__ >= 10": "#if CUDA_VERSION >= 10000",
-            "#if __CUDACC_VER_MAJOR__ >= 9": "#if CUDA_VERSION >= 9000",
-            "#if __CUDACC_VER_MAJOR__ < 9": "#if CUDA_VERSION < 9000",
-            "nullptr_t": "std::nullptr_t",
-        }
-        name = src.basename
-        if name == "nccl.in.h":
-            name = "nccl.h"
-            substitutions.update({
-                "${nccl:Major}": "2",
-                "${nccl:Minor}": "3",
-                "${nccl:Patch}": "5",
-                "${nccl:Suffix}": "",
-                "${nccl:Version}": "2305",
-            })
-        if name == "functions.cu":
-            # Don't try to initialize the host shadow copy of this device-side
-            # global variable. There is no host pointer to a device-side
-            # function, which confuses clang.
-            # TODO(csigg): remove when fixed in clang.
-            substitutions.update({
-                "NCCL_FUNCS2B(ncclBroadcast),": "#if __CUDA_ARCH__\nNCCL_FUNCS2B(ncclBroadcast),",
-                "NCCL_FUNCS2A(ncclAllReduce)": "NCCL_FUNCS2A(ncclAllReduce)\n#endif",
-            })
-        if src.extension == "cu":
-            name += ".cc"
-        file = ctx.actions.declare_file(name, sibling = src)
-        ctx.actions.expand_template(
-            output = file,
-            template = src,
-            substitutions = substitutions,
-        )
-        files.append(file)
-    return [DefaultInfo(files = depset(files))]
+            "${nccl:Major}": "2",
+            "${nccl:Minor}": "3",
+            "${nccl:Patch}": "5",
+            "${nccl:Suffix}": "",
+            "${nccl:Version}": "2305",
+        },
+    )
 
-process_srcs = rule(
-    implementation = _process_srcs_impl,
+gen_nccl_h = rule(
+    implementation = _gen_nccl_h_impl,
     attrs = {
-        "srcs": attr.label_list(allow_files = True),
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
     },
 )
-"""Processes the NCCL srcs so they can be compiled with bazel and clang."""
+"""Creates the NCCL header file."""
 
-def _gen_device_srcs_impl(ctx):
+def _process_srcs_impl(ctx):
+    """Appends .cc to .cu files, patches include directives."""
     files = []
     for src in ctx.files.srcs:
-        name = "%s_%s" % (ctx.attr.name, src.basename)
+        if not src.is_source:
+            # Process only once, specifically "src/nccl.h".
+            files.append(src)
+            continue
+        name = src.basename
+        if src.extension == "cu":
+            name = ctx.attr.prefix + name + ".cc"
         file = ctx.actions.declare_file(name, sibling = src)
         ctx.actions.expand_template(
             output = file,
             template = src,
             substitutions = {
-                "#define UNROLL 4": "#define UNROLL 4\n#define NCCL_OP %d" % ctx.attr.NCCL_OP,
+                "\"collectives.h": "\"collectives/collectives.h",
+                "\"../collectives.h": "\"collectives/collectives.h",
+                "#if __CUDACC_VER_MAJOR__": "#if defined __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__",
+                # Substitutions are applied in order.
+                "std::nullptr_t": "nullptr_t",
+                "nullptr_t": "std::nullptr_t",
             },
         )
         files.append(file)
     return [DefaultInfo(files = depset(files))]
 
-gen_device_srcs = rule(
-    implementation = _gen_device_srcs_impl,
+_process_srcs = rule(
+    implementation = _process_srcs_impl,
     attrs = {
         "srcs": attr.label_list(allow_files = True),
-        "NCCL_OP": attr.int(),
+        "prefix": attr.string(default = ""),
     },
 )
-"""Adds prefix to each file name in srcs and adds #define NCCL_OP."""
+"""Processes the NCCL srcs so they can be compiled with bazel and clang."""
+
+def nccl_library(name, srcs = None, hdrs = None, prefix = None, **kwargs):
+    """Processes the srcs and hdrs and creates a cc_library."""
+
+    _process_srcs(
+        name = name + "_srcs",
+        srcs = srcs,
+        prefix = prefix,
+    )
+    _process_srcs(
+        name = name + "_hdrs",
+        srcs = hdrs,
+    )
+
+    native.cc_library(
+        name = name,
+        srcs = [name + "_srcs"] if srcs else [],
+        hdrs = [name + "_hdrs"] if hdrs else [],
+        **kwargs
+    )
 
-def _rdc_copts():
+def rdc_copts():
     """Returns copts for compiling relocatable device code."""
 
     # The global functions can not have a lower register count than the
@@ -88,7 +89,7 @@ def _rdc_copts():
     # https://github.com/NVIDIA/nccl/blob/f93fe9bfd94884cec2ba711897222e0df5569a53/makefiles/common.mk#L48
     maxrregcount = "-maxrregcount=96"
 
-    return select({
+    return cuda_default_copts() + select({
         "@local_config_cuda//cuda:using_nvcc": [
             "-nvcc_options",
             "relocatable-device-code=true",
@@ -99,255 +100,118 @@ def _rdc_copts():
             "-fcuda-rdc",
             "-Xcuda-ptxas",
             maxrregcount,
-            # Work around for clang bug (fixed in r348662), declaring
-            # '__device__ operator delete(void*, std::size_t)' non-inline.
-            # TODO(csigg): Only add this option for older clang versions.
-            "-std=gnu++11",
         ],
         "//conditions:default": [],
-    })
-
-def _lookup_file(filegroup, path):
-    """Extracts file at (relative) path in filegroup."""
-    for file in filegroup.files:
-        if file.path.endswith(path):
-            return file
-    return None
-
-def _pic_only(files):
-    """Returns the PIC files if there are any in 'files', otherwise 'files'."""
-    pic_only = [f for f in files if f.basename.find(".pic.") >= 0]
-    return pic_only if pic_only else files
+    }) + ["-fvisibility=hidden"]
 
-def _device_link_impl(ctx):
-    if not ctx.attr.gpu_archs:
-        fail("No GPU architecture specified. NCCL requires --config=cuda or similar.")
-
-    inputs = []
-    for dep in ctx.attr.deps:
-        inputs += dep.files.to_list()
-    inputs = _pic_only(inputs)
-
-    # Device-link to cubins for each architecture.
-    name = ctx.attr.name
-    register_h = None
-    cubins = []
-    images = []
-    for arch in ctx.attr.gpu_archs:
-        cubin = ctx.actions.declare_file("%s_%s.cubin" % (name, arch))
-        register_h = ctx.actions.declare_file("%s_register_%s.h" % (name, arch))
-        ctx.actions.run(
-            outputs = [register_h, cubin],
-            inputs = inputs,
-            executable = ctx.file._nvlink,
-            arguments = ctx.attr.nvlink_args + [
-                "--arch=%s" % arch,
-                "--register-link-binaries=%s" % register_h.path,
-                "--output-file=%s" % cubin.path,
-            ] + [file.path for file in inputs],
-            mnemonic = "nvlink",
-        )
-        cubins.append(cubin)
-        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
+def _filter_impl(ctx):
+    suffix = ctx.attr.suffix
+    files = [src for src in ctx.files.srcs if src.path.endswith(suffix)]
+    return [DefaultInfo(files = depset(files))]
 
-    # Generate fatbin header from all cubins.
-    tmp_fatbin = ctx.actions.declare_file("%s.fatbin" % name)
-    fatbin_h = ctx.actions.declare_file("%s_fatbin.h" % name)
-    bin2c = ctx.file._bin2c
-    ctx.actions.run(
-        outputs = [tmp_fatbin, fatbin_h],
-        inputs = cubins,
-        executable = ctx.file._fatbinary,
-        arguments = [
-            "-64",
-            "--cmdline=--compile-only",
-            "--link",
-            "--compress-all",
-            "--bin2c-path=%s" % bin2c.dirname,
-            "--create=%s" % tmp_fatbin.path,
-            "--embedded-fatbin=%s" % fatbin_h.path,
-        ] + images,
-        tools = [bin2c],
-        mnemonic = "fatbinary",
-    )
+_filter = rule(
+    implementation = _filter_impl,
+    attrs = {
+        "srcs": attr.label_list(allow_files = True),
+        "suffix": attr.string(),
+    },
+)
+"""Filters the srcs to the ones ending with suffix."""
 
-    # Generate the source file #including the headers generated above.
+def _gen_link_src_impl(ctx):
     ctx.actions.expand_template(
-        output = ctx.outputs.out,
-        template = ctx.file._link_stub,
+        output = ctx.outputs.output,
+        template = ctx.file.template,
         substitutions = {
-            "REGISTERLINKBINARYFILE": '"%s"' % register_h.short_path,
-            "FATBINFILE": '"%s"' % fatbin_h.short_path,
+            "REGISTERLINKBINARYFILE": '"%s"' % ctx.file.register_hdr.short_path,
+            "FATBINFILE": '"%s"' % ctx.file.fatbin_hdr.short_path,
         },
     )
 
-    return [DefaultInfo(files = depset([register_h, fatbin_h]))]
-
-_device_link = rule(
-    implementation = _device_link_impl,
+_gen_link_src = rule(
+    implementation = _gen_link_src_impl,
     attrs = {
-        "deps": attr.label_list(),
-        "out": attr.output(mandatory = True),
-        "gpu_archs": attr.string_list(),
-        "nvlink_args": attr.string_list(),
-        "_nvlink": attr.label(
-            default = Label("@local_config_nccl//:nvlink"),
-            allow_single_file = True,
-            executable = True,
-            cfg = "host",
-        ),
-        "_fatbinary": attr.label(
-            default = Label("@local_config_nccl//:cuda/bin/fatbinary"),
-            allow_single_file = True,
-            executable = True,
-            cfg = "host",
-        ),
-        "_bin2c": attr.label(
-            default = Label("@local_config_nccl//:cuda/bin/bin2c"),
-            allow_single_file = True,
-            executable = True,
-            cfg = "host",
-        ),
-        "_link_stub": attr.label(
-            default = Label("@local_config_nccl//:cuda/bin/crt/link.stub"),
-            allow_single_file = True,
-        ),
+        "register_hdr": attr.label(allow_single_file = True),
+        "fatbin_hdr": attr.label(allow_single_file = True),
+        "template": attr.label(allow_single_file = True),
+        "output": attr.output(),
     },
 )
-"""Links device code and generates source code for kernel registration."""
-
-def _merge_archive_impl(ctx):
-    # Generate an mri script to the merge archives in srcs and pass it to 'ar'.
-    # See https://stackoverflow.com/a/23621751.
-    files = _pic_only(ctx.files.srcs)
-    mri_script = "create " + ctx.outputs.out.path
-    for f in files:
-        mri_script += "\\naddlib " + f.path
-    mri_script += "\\nsave\\nend"
-
-    cc_toolchain = find_cpp_toolchain(ctx)
-    ctx.actions.run_shell(
-        inputs = ctx.files.srcs,  # + ctx.files._crosstool,
-        outputs = [ctx.outputs.out],
-        command = ("printf \"%s\" " % mri_script +
-                   "| %s -M" % cc_toolchain.ar_executable),
+"""Patches the include directives for the link.stub file."""
+
+def rdc_library(name, deps):
+    """Produces a cc_library from deps containing relocatable device code."""
+
+    # From .a and .pic.a archives, just use the latter. Otherwise we get
+    # multiply defined symbols.
+    # TODO(csigg): C++ Sandwich once available should allow passing this target
+    # to a cc_library dependency, which would avoid the linking order issue.
+    _filter(
+        name = name + "_deps_a",
+        srcs = deps,
+        suffix = ".pic.a",
     )
 
-_merge_archive = rule(
-    implementation = _merge_archive_impl,
-    attrs = {
-        "srcs": attr.label_list(mandatory = True, allow_files = True),
-        "_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"),
-        # "_crosstool": attr.label_list(cfg = "host", default = ["@bazel_tools//tools/cpp:crosstool"]),
-    },
-    outputs = {"out": "lib%{name}.a"},
-)
-"""Merges srcs into a single archive."""
-
-def cuda_rdc_library(name, hdrs = None, copts = None, linkstatic = True, **kwargs):
-    """Produces a cuda_library using separate compilation and linking.
-
-    CUDA separate compilation and linking allows device function calls across
-    translation units. This is different from the normal whole program
-    compilation where each translation unit contains all device code. For more
-    background, see
-    https://devblogs.nvidia.com/separate-compilation-linking-cuda-device-code/,
-    https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-options-for-separate-compilation
-
-    During separate compilation, the different CUDA source files are compiled
-    to 'relocatable device code' (RDC) and embedded in the host object files.
-    When using nvcc, linking the device code for each supported GPU
-    architecture and generating kernel registration code for the CUDA runtime
-    is handled automatically. Clang supports generating relocatable device
-    code, but it can't link it. We therefore rely on tools provided by the CUDA
-    SDK to link the device code and generate the host code to register the
-    kernels.
-
-    The nvlink tool extracts the RDC code from the object files and links it
-    into cubin files, one per GPU architecture. It also produces a header file
-    with a list of kernel names to register. The cubins are merged into a
-    binary blob using the fatbinary tool, and converted to a C header file with
-    the help of the bin2c tool. The registration header file, the fatbinary
-    header file, and the link.stub file (shipped with the CUDA SDK) are
-    compiled as ordinary host code.
-
-    Here is a diagram of the CUDA separate compilation trajectory:
-
-     x.cu.cc    y.cu.cc
-           \    /            cc_library (compile RDC and archive)
-            xy.a
-           /    \            * nvlink
-    register.h  xy.cubin
-          :      |           * fatbinary and bin2c
-          :     xy.fatbin.h
-          :      :           * #include
-          dlink.cc           * Expanded from crt/dlink.stub template
-             |               cc_library (host compile and archive)
-          dlink.a
-
-    The steps marked with '*' are implemented in the _device_link rule.
-
-    The object files in both xy.a and dlink.a reference symbols defined in the
-    other archive. The separate archives are a side effect of using two
-    cc_library targets to implement a single compilation trajectory. We could
-    fix this once bazel supports C++ sandwich. For now, we just merge the two
-    archives to avoid unresolved symbols:
-
-    xy.a      dlink.a
-        \    /           merge archive
-      xy_dlink.a
-           |             cc_library (or alternatively, cc_import)
-     final target
-
-    Another complication is that cc_library produces (depending on the
-    configuration) both PIC and non-PIC archives, but the distinction
-    is hidden from Starlark until C++ sandwich becomes available. We work
-    around this by dropping the non-PIC files if PIC files are available.
-
-    Args:
-      name: Target name.
-      hdrs: Header files.
-      copts: Compiler options.
-      linkstatic: Must be true.
-      **kwargs: Any other arguments.
-    """
-
-    if not hdrs:
-        hdrs = []
-    if not copts:
-        copts = []
+    # Device-link to cubins for each architecture.
+    images = []
+    cubins = []
+    for arch in %{gpu_architectures}:
+        cubin = "%s_%s.cubin" % (name, arch)
+        register_hdr = "%s_%s.h" % (name, arch)
+        nvlink = "@local_config_nccl//:nvlink"
+        cmd = ("$(location %s) " % nvlink +
+               select({
+                   # NCCL is only supported on Linux.
+                   "@org_tensorflow//tensorflow:linux_x86_64": "--cpu-arch=X86_64 ",
+                   "@org_tensorflow//tensorflow:linux_ppc64le": "--cpu-arch=PPC64LE ",
+                   "//conditions:default": "",
+               }) +
+               "--arch=%s $(SRCS) " % arch +
+               "--register-link-binaries=$(location %s) " % register_hdr +
+               "--output-file=$(location %s)" % cubin)
+        native.genrule(
+            name = "%s_%s" % (name, arch),
+            outs = [register_hdr, cubin],
+            srcs = [name + "_deps_a"],
+            cmd = cmd,
+            tools = [nvlink],
+        )
+        images.append("--image=profile=%s,file=$(location %s)" % (arch, cubin))
+        cubins.append(cubin)
 
-    # Compile host and device code into library.
-    lib = name + "_lib"
-    tf_cuda_library(
-        name = lib,
-        hdrs = hdrs,
-        copts = _rdc_copts() + copts,
-        linkstatic = linkstatic,
-        **kwargs
+    # Generate fatbin header from all cubins.
+    fatbin_hdr = name + ".fatbin.h"
+    fatbinary = "@local_config_nccl//:cuda/bin/fatbinary"
+    bin2c = "@local_config_nccl//:cuda/bin/bin2c"
+    cmd = ("$(location %s) -64 --cmdline=--compile-only " % fatbinary +
+           "--link --bin2c-path $$(dirname $(location %s)) " % bin2c +
+           "--compress-all %s --create=%%{name}.fatbin " % " ".join(images) +
+           "--embedded-fatbin=$@")
+    native.genrule(
+        name = name + "_fatbin_h",
+        outs = [fatbin_hdr],
+        srcs = cubins,
+        cmd = cmd,
+        tools = [fatbinary, bin2c],
     )
 
-    # Generate source file containing linked device code.
-    dlink_hdrs = name + "_dlink_hdrs"
-    dlink_cc = name + "_dlink.cc"
-    _device_link(
-        name = dlink_hdrs,
-        deps = [lib],
-        out = dlink_cc,
-        gpu_archs = %{gpu_architectures},
-        nvlink_args = select({
-            "@org_tensorflow//tensorflow:linux_x86_64": ["--cpu-arch=X86_64"],
-            "@org_tensorflow//tensorflow:linux_ppc64le": ["--cpu-arch=PPC64LE"],
-            "//conditions:default": [],
-        }),
+    # Generate the source file #including the headers generated above.
+    _gen_link_src(
+        name = name + "_dlink_src",
+        # Include just the last one, they are equivalent.
+        register_hdr = register_hdr,
+        fatbin_hdr = fatbin_hdr,
+        template = "@local_config_nccl//:cuda/bin/crt/link.stub",
+        output = name + ".cc",
     )
 
-    # Compile the source file into a library.
-    dlink = name + "_dlink"
+    # Compile the source file into the cc_library.
     native.cc_library(
-        name = dlink,
-        srcs = [dlink_cc],
-        textual_hdrs = [dlink_hdrs],
+        name = name + "_dlink_a",
+        srcs = [
+            name + "_dlink_src",
+        ],
+        textual_hdrs = [register_hdr, fatbin_hdr],
         deps = [
             "@local_config_cuda//cuda:cuda_headers",
         ],
@@ -358,22 +222,31 @@ def cuda_rdc_library(name, hdrs = None, copts = None, linkstatic = True, **kwarg
             "__NV_EXTRA_INITIALIZATION=",
             "__NV_EXTRA_FINALIZATION=",
         ],
-        linkstatic = linkstatic,
+        linkstatic = True,
     )
 
-    # Repackage the two libs into a single archive. This is required because
-    # both libs reference symbols defined in the other one. For details, see
+    # Repackage deps into a single archive. This avoid unresolved symbols when
+    # the archives happen to be linked in the wrong order. For more details, see
     # https://eli.thegreenplace.net/2013/07/09/library-order-in-static-linking
-    archive = name + "_a"
-    _merge_archive(
-        name = archive,
-        srcs = [lib, dlink],
+    native.genrule(
+        name = name + "_a",
+        srcs = [
+            name + "_deps_a",
+            name + "_dlink_a",
+        ],
+        outs = [name + ".a"],
+        # See https://stackoverflow.com/a/23621751
+        cmd = """
+addlibs=$$(echo $(SRCS) | sed "s/[^ ]* */\\naddlib &/g")
+printf "create $@$${addlibs}\\nsave\\nend" | $(AR) -M
+""",
     )
 
-    # Create cc target from archive.
     native.cc_library(
         name = name,
-        srcs = [archive],
-        hdrs = hdrs,
-        linkstatic = linkstatic,
+        srcs = [name + "_a"],
+        deps = [
+            "@local_config_cuda//cuda:cudart_static",
+        ],
+        linkstatic = True,
     )
-- 
GitLab


From 00151d81f4fc5928f4ce75250d20567558fe1634 Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Wed, 12 Dec 2018 13:40:20 -0800
Subject: [PATCH 438/873] Run all tests in both graph and eager mode.

PiperOrigin-RevId: 225245412
---
 tensorflow/python/keras/activations_test.py | 121 +++++++++-----------
 tensorflow/python/keras/constraints_test.py |  79 ++++++-------
 2 files changed, 91 insertions(+), 109 deletions(-)

diff --git a/tensorflow/python/keras/activations_test.py b/tensorflow/python/keras/activations_test.py
index 6b7bfb698b..33001f419e 100644
--- a/tensorflow/python/keras/activations_test.py
+++ b/tensorflow/python/keras/activations_test.py
@@ -31,6 +31,7 @@ def _ref_softmax(values):
   return e / np.sum(e)
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class KerasActivationsTest(test.TestCase):
 
   def test_serialization(self):
@@ -46,12 +47,11 @@ class KerasActivationsTest(test.TestCase):
       assert fn == ref_fn
 
   def test_softmax(self):
-    with self.cached_session():
-      x = keras.backend.placeholder(ndim=2)
-      f = keras.backend.function([x], [keras.activations.softmax(x)])
-      test_values = np.random.random((2, 5))
+    x = keras.backend.placeholder(ndim=2)
+    f = keras.backend.function([x], [keras.activations.softmax(x)])
+    test_values = np.random.random((2, 5))
 
-      result = f([test_values])[0]
+    result = f([test_values])[0]
     expected = _ref_softmax(test_values[0])
     self.assertAllClose(result[0], expected, rtol=1e-05)
 
@@ -60,40 +60,36 @@ class KerasActivationsTest(test.TestCase):
       keras.activations.softmax(x)
 
   def test_temporal_softmax(self):
-    with self.cached_session():
-      x = keras.backend.placeholder(shape=(2, 2, 3))
-      f = keras.backend.function([x], [keras.activations.softmax(x)])
-      test_values = np.random.random((2, 2, 3)) * 10
-      result = f([test_values])[0]
+    x = keras.backend.placeholder(shape=(2, 2, 3))
+    f = keras.backend.function([x], [keras.activations.softmax(x)])
+    test_values = np.random.random((2, 2, 3)) * 10
+    result = f([test_values])[0]
     expected = _ref_softmax(test_values[0, 0])
     self.assertAllClose(result[0, 0], expected, rtol=1e-05)
 
-  @test_util.run_deprecated_v1
   def test_selu(self):
     x = keras.backend.placeholder(ndim=2)
     f = keras.backend.function([x], [keras.activations.selu(x)])
     alpha = 1.6732632423543772848170429916717
     scale = 1.0507009873554804934193349852946
 
-    with self.cached_session():
-      positive_values = np.array([[1, 2]], dtype=keras.backend.floatx())
-      result = f([positive_values])[0]
-      self.assertAllClose(result, positive_values * scale, rtol=1e-05)
+    positive_values = np.array([[1, 2]], dtype=keras.backend.floatx())
+    result = f([positive_values])[0]
+    self.assertAllClose(result, positive_values * scale, rtol=1e-05)
 
-      negative_values = np.array([[-1, -2]], dtype=keras.backend.floatx())
-      result = f([negative_values])[0]
-      true_result = (np.exp(negative_values) - 1) * scale * alpha
-      self.assertAllClose(result, true_result)
+    negative_values = np.array([[-1, -2]], dtype=keras.backend.floatx())
+    result = f([negative_values])[0]
+    true_result = (np.exp(negative_values) - 1) * scale * alpha
+    self.assertAllClose(result, true_result)
 
   def test_softplus(self):
     def softplus(x):
       return np.log(np.ones_like(x) + np.exp(x))
 
-    with self.cached_session():
-      x = keras.backend.placeholder(ndim=2)
-      f = keras.backend.function([x], [keras.activations.softplus(x)])
-      test_values = np.random.random((2, 5))
-      result = f([test_values])[0]
+    x = keras.backend.placeholder(ndim=2)
+    f = keras.backend.function([x], [keras.activations.softplus(x)])
+    test_values = np.random.random((2, 5))
+    result = f([test_values])[0]
     expected = softplus(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
@@ -101,11 +97,10 @@ class KerasActivationsTest(test.TestCase):
     def softsign(x):
       return np.divide(x, np.ones_like(x) + np.absolute(x))
 
-    with self.cached_session():
-      x = keras.backend.placeholder(ndim=2)
-      f = keras.backend.function([x], [keras.activations.softsign(x)])
-      test_values = np.random.random((2, 5))
-      result = f([test_values])[0]
+    x = keras.backend.placeholder(ndim=2)
+    f = keras.backend.function([x], [keras.activations.softsign(x)])
+    test_values = np.random.random((2, 5))
+    result = f([test_values])[0]
     expected = softsign(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
@@ -118,68 +113,60 @@ class KerasActivationsTest(test.TestCase):
         return z / (1 + z)
     sigmoid = np.vectorize(ref_sigmoid)
 
-    with self.cached_session():
-      x = keras.backend.placeholder(ndim=2)
-      f = keras.backend.function([x], [keras.activations.sigmoid(x)])
-      test_values = np.random.random((2, 5))
-      result = f([test_values])[0]
+    x = keras.backend.placeholder(ndim=2)
+    f = keras.backend.function([x], [keras.activations.sigmoid(x)])
+    test_values = np.random.random((2, 5))
+    result = f([test_values])[0]
     expected = sigmoid(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
-  @test_util.run_deprecated_v1
   def test_hard_sigmoid(self):
     def ref_hard_sigmoid(x):
       x = (x * 0.2) + 0.5
       z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
       return z
     hard_sigmoid = np.vectorize(ref_hard_sigmoid)
-    with self.cached_session():
-      x = keras.backend.placeholder(ndim=2)
-      f = keras.backend.function([x], [keras.activations.hard_sigmoid(x)])
-      test_values = np.random.random((2, 5))
-      result = f([test_values])[0]
+    x = keras.backend.placeholder(ndim=2)
+    f = keras.backend.function([x], [keras.activations.hard_sigmoid(x)])
+    test_values = np.random.random((2, 5))
+    result = f([test_values])[0]
     expected = hard_sigmoid(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
   def test_relu(self):
-    with self.cached_session():
-      x = keras.backend.placeholder(ndim=2)
-      f = keras.backend.function([x], [keras.activations.relu(x)])
-      test_values = np.random.random((2, 5))
-      result = f([test_values])[0]
+    x = keras.backend.placeholder(ndim=2)
+    f = keras.backend.function([x], [keras.activations.relu(x)])
+    test_values = np.random.random((2, 5))
+    result = f([test_values])[0]
     # No negative values in test values...
     self.assertAllClose(result, test_values, rtol=1e-05)
 
-  @test_util.run_deprecated_v1
   def test_elu(self):
-    with self.cached_session():
-      x = keras.backend.placeholder(ndim=2)
-      f = keras.backend.function([x], [keras.activations.elu(x, 0.5)])
-      test_values = np.random.random((2, 5))
-      result = f([test_values])[0]
-      self.assertAllClose(result, test_values, rtol=1e-05)
-      negative_values = np.array([[-1, -2]], dtype=keras.backend.floatx())
-      result = f([negative_values])[0]
-      true_result = (np.exp(negative_values) - 1) / 2
+    x = keras.backend.placeholder(ndim=2)
+    f = keras.backend.function([x], [keras.activations.elu(x, 0.5)])
+    test_values = np.random.random((2, 5))
+    result = f([test_values])[0]
+    self.assertAllClose(result, test_values, rtol=1e-05)
+    negative_values = np.array([[-1, -2]], dtype=keras.backend.floatx())
+    result = f([negative_values])[0]
+    true_result = (np.exp(negative_values) - 1) / 2
     self.assertAllClose(result, true_result)
 
   def test_tanh(self):
-    with self.cached_session():
-      test_values = np.random.random((2, 5))
-      x = keras.backend.placeholder(ndim=2)
-      exp = keras.activations.tanh(x)
-      f = keras.backend.function([x], [exp])
-      result = f([test_values])[0]
+    test_values = np.random.random((2, 5))
+    x = keras.backend.placeholder(ndim=2)
+    exp = keras.activations.tanh(x)
+    f = keras.backend.function([x], [exp])
+    result = f([test_values])[0]
     expected = np.tanh(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
   def test_exponential(self):
-    with self.cached_session():
-      test_values = np.random.random((2, 5))
-      x = keras.backend.placeholder(ndim=2)
-      exp = keras.activations.exponential(x)
-      f = keras.backend.function([x], [exp])
-      result = f([test_values])[0]
+    test_values = np.random.random((2, 5))
+    x = keras.backend.placeholder(ndim=2)
+    exp = keras.activations.exponential(x)
+    f = keras.backend.function([x], [exp])
+    result = f([test_values])[0]
     expected = np.exp(test_values)
     self.assertAllClose(result, expected, rtol=1e-05)
 
diff --git a/tensorflow/python/keras/constraints_test.py b/tensorflow/python/keras/constraints_test.py
index 4f674ea7c5..92bc4852cf 100644
--- a/tensorflow/python/keras/constraints_test.py
+++ b/tensorflow/python/keras/constraints_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
@@ -35,6 +36,7 @@ def get_example_array():
   return example_array
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class KerasConstraintsTest(test.TestCase):
 
   def test_serialization(self):
@@ -49,54 +51,47 @@ class KerasConstraintsTest(test.TestCase):
       assert fn.__class__ == ref_fn.__class__
 
   def test_max_norm(self):
-    with self.cached_session():
-      array = get_example_array()
-      for m in get_test_values():
-        norm_instance = keras.constraints.max_norm(m)
-        normed = norm_instance(keras.backend.variable(array))
-        assert np.all(keras.backend.eval(normed) < m)
-
-      # a more explicit example
-      norm_instance = keras.constraints.max_norm(2.0)
-      x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T
-      x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0],
-                                  [2.0, 0, 0],
-                                  [2. / np.sqrt(3),
-                                   2. / np.sqrt(3),
-                                   2. / np.sqrt(3)]]).T
-      x_normed_actual = keras.backend.eval(
-          norm_instance(keras.backend.variable(x)))
-      self.assertAllClose(x_normed_actual, x_normed_target, rtol=1e-05)
+    array = get_example_array()
+    for m in get_test_values():
+      norm_instance = keras.constraints.max_norm(m)
+      normed = norm_instance(keras.backend.variable(array))
+      assert np.all(keras.backend.eval(normed) < m)
+
+    # a more explicit example
+    norm_instance = keras.constraints.max_norm(2.0)
+    x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T
+    x_normed_target = np.array(
+        [[0, 0, 0], [1.0, 0, 0], [2.0, 0, 0],
+         [2. / np.sqrt(3), 2. / np.sqrt(3), 2. / np.sqrt(3)]]).T
+    x_normed_actual = keras.backend.eval(
+        norm_instance(keras.backend.variable(x)))
+    self.assertAllClose(x_normed_actual, x_normed_target, rtol=1e-05)
 
   def test_non_neg(self):
-    with self.cached_session():
-      non_neg_instance = keras.constraints.non_neg()
-      normed = non_neg_instance(keras.backend.variable(get_example_array()))
-      assert np.all(np.min(keras.backend.eval(normed), axis=1) == 0.)
+    non_neg_instance = keras.constraints.non_neg()
+    normed = non_neg_instance(keras.backend.variable(get_example_array()))
+    assert np.all(np.min(keras.backend.eval(normed), axis=1) == 0.)
 
   def test_unit_norm(self):
-    with self.cached_session():
-      unit_norm_instance = keras.constraints.unit_norm()
-      normalized = unit_norm_instance(
-          keras.backend.variable(get_example_array()))
-      norm_of_normalized = np.sqrt(
-          np.sum(keras.backend.eval(normalized) ** 2, axis=0))
-      # In the unit norm constraint, it should be equal to 1.
-      difference = norm_of_normalized - 1.
-      largest_difference = np.max(np.abs(difference))
-      assert np.abs(largest_difference) < 10e-5
+    unit_norm_instance = keras.constraints.unit_norm()
+    normalized = unit_norm_instance(keras.backend.variable(get_example_array()))
+    norm_of_normalized = np.sqrt(
+        np.sum(keras.backend.eval(normalized)**2, axis=0))
+    # In the unit norm constraint, it should be equal to 1.
+    difference = norm_of_normalized - 1.
+    largest_difference = np.max(np.abs(difference))
+    assert np.abs(largest_difference) < 10e-5
 
   def test_min_max_norm(self):
-    with self.cached_session():
-      array = get_example_array()
-      for m in get_test_values():
-        norm_instance = keras.constraints.min_max_norm(min_value=m,
-                                                       max_value=m * 2)
-        normed = norm_instance(keras.backend.variable(array))
-        value = keras.backend.eval(normed)
-        l2 = np.sqrt(np.sum(np.square(value), axis=0))
-        assert not l2[l2 < m]
-        assert not l2[l2 > m * 2 + 1e-5]
+    array = get_example_array()
+    for m in get_test_values():
+      norm_instance = keras.constraints.min_max_norm(
+          min_value=m, max_value=m * 2)
+      normed = norm_instance(keras.backend.variable(array))
+      value = keras.backend.eval(normed)
+      l2 = np.sqrt(np.sum(np.square(value), axis=0))
+      assert not l2[l2 < m]
+      assert not l2[l2 > m * 2 + 1e-5]
 
 
 if __name__ == '__main__':
-- 
GitLab


From b3b8dff01c238366f456ac7dff7c130d50db7693 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 12 Dec 2018 13:58:46 -0800
Subject: [PATCH 439/873] Checkpointing for OptimizerV2

Copies and pastes the existing Optimizer checkpointing code, and stops adding unconditional dependencies on slot variables (which were based on ops.uid() and so not reproducible across program runs).

PiperOrigin-RevId: 225248820
---
 .../python/keras/optimizer_v2/optimizer_v2.py | 118 ++-
 .../python/training/checkpointable/BUILD      |  37 +
 .../python/training/checkpointable/util.py    |   8 +-
 .../training/checkpointable/util_test.py      | 501 +++++-----
 .../util_with_v1_optimizers_test.py           | 873 ++++++++++++++++++
 ...ensorflow.keras.optimizers.-adadelta.pbtxt |   4 +
 ...tensorflow.keras.optimizers.-adagrad.pbtxt |   4 +
 .../tensorflow.keras.optimizers.-adam.pbtxt   |   4 +
 .../tensorflow.keras.optimizers.-adamax.pbtxt |   4 +
 ...nsorflow.keras.optimizers.-optimizer.pbtxt |   4 +
 ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt |   4 +
 .../tensorflow.keras.optimizers.-s-g-d.pbtxt  |   4 +
 ...ensorflow.keras.optimizers.-adadelta.pbtxt |   4 +
 ...tensorflow.keras.optimizers.-adagrad.pbtxt |   4 +
 .../tensorflow.keras.optimizers.-adam.pbtxt   |   4 +
 .../tensorflow.keras.optimizers.-adamax.pbtxt |   4 +
 ...nsorflow.keras.optimizers.-optimizer.pbtxt |   4 +
 ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt |   4 +
 .../tensorflow.keras.optimizers.-s-g-d.pbtxt  |   4 +
 19 files changed, 1308 insertions(+), 285 deletions(-)
 create mode 100644 tensorflow/python/training/checkpointable/util_with_v1_optimizers_test.py

diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index a130e1d0c3..d3153141ec 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -21,6 +21,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+import functools
 
 import six
 
@@ -28,6 +29,7 @@ from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import distribution_strategy_context as distribute_ctx
 from tensorflow.python.distribute import reduce_util as ds_reduce_util
 from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend
@@ -165,8 +167,17 @@ class OptimizerV2(checkpointable.CheckpointableBase):
     self._hyper = {}
     # dict: {variable name : {slot name : variable}}
     self._slots = {}
+    self._slot_names = []
     self._weights = []
 
+    # For implementing Checkpointable. Stores information about how to restore
+    # slot variables which have not yet been created
+    # (checkpointable._CheckpointPosition objects).
+    #  {slot_name :
+    #      {_var_key(variable_to_train): [checkpoint_position, ... ], ... },
+    #   ... }
+    self._deferred_slot_restorations = {}
+
     decay = kwargs.pop("decay", 0.0)
     if decay < 0.:
       raise ValueError("decay cannot be less than 0: {}".format(decay))
@@ -413,18 +424,36 @@ class OptimizerV2(checkpointable.CheckpointableBase):
     else:
       super(OptimizerV2, self).__setattr__(name, value)
 
+  def get_slot_names(self):
+    """A list of names for this optimizer's slots."""
+    return self._slot_names
+
   def add_slot(self, var, slot_name, initializer="zeros"):
+    """Add a new slot variable for `var`."""
+    if slot_name not in self._slot_names:
+      self._slot_names.append(slot_name)
     var_key = _var_key(var)
     slot_dict = self._slots.setdefault(var_key, {})
-    if slot_name not in slot_dict:
-      slot_key = _get_slot_key_from_var(var, slot_name)
-      weight = self.add_weight(
-          name=slot_key,
-          shape=var.shape,
+    weight = slot_dict.get(slot_name, None)
+    if weight is None:
+      if isinstance(initializer, six.string_types) or callable(initializer):
+        initializer = initializers.get(initializer)
+        initial_value = functools.partial(
+            initializer, shape=var.shape, dtype=var.dtype)
+      else:
+        initial_value = initializer
+      weight = tf_variables.Variable(
+          name="%s/%s" % (var._shared_name, slot_name),  # pylint: disable=protected-access
           dtype=var.dtype,
-          initializer=initializer)
+          trainable=False,
+          initial_value=initial_value)
+      backend.track_variable(weight)
       slot_dict[slot_name] = weight
+      self._restore_slot_variable(
+          slot_name=slot_name, variable=var,
+          slot_variable=weight)
       self._weights.append(weight)
+    return weight
 
   def get_slot(self, var, slot_name):
     var_key = _var_key(var)
@@ -678,6 +707,83 @@ class OptimizerV2(checkpointable.CheckpointableBase):
     """
     raise NotImplementedError()
 
+  # ---------------
+  # For implementing the checkpointable interface
+  # ---------------
+
+  def _restore_slot_variable(self, slot_name, variable, slot_variable):
+    """Restore a newly created slot variable's value."""
+    variable_key = _var_key(variable)
+    deferred_restorations = self._deferred_slot_restorations.get(
+        slot_name, {}).pop(variable_key, [])
+    # Iterate over restores, highest restore UID first to minimize the number
+    # of assignments.
+    deferred_restorations.sort(key=lambda position: position.restore_uid,
+                               reverse=True)
+    for checkpoint_position in deferred_restorations:
+      checkpoint_position.restore(slot_variable)
+
+  def _create_or_restore_slot_variable(
+      self, slot_variable_position, slot_name, variable):
+    """Restore a slot variable's value, possibly creating it.
+
+    Called when a variable which has an associated slot variable is created or
+    restored. When executing eagerly, we create the slot variable with a
+    restoring initializer.
+
+    No new variables are created when graph building. Instead,
+    _restore_slot_variable catches these after normal creation and adds restore
+    ops to the graph. This method is nonetheless important when graph building
+    for the case when a slot variable has already been created but `variable`
+    has just been added to a dependency graph (causing us to realize that the
+    slot variable needs to be restored).
+
+    Args:
+      slot_variable_position: A `checkpointable._CheckpointPosition` object
+        indicating the slot variable `Checkpointable` object to be restored.
+      slot_name: The name of this `Optimizer`'s slot to restore into.
+      variable: The variable object this slot is being created for.
+    """
+    variable_key = _var_key(variable)
+    slot_dict = self._slots.get(variable_key, {})
+    slot_variable = slot_dict.get(slot_name, None)
+    if (slot_variable is None and context.executing_eagerly() and
+        slot_variable_position.is_simple_variable()
+        # Defer slot variable creation if there is an active variable creator
+        # scope. Generally we'd like to eagerly create/restore slot variables
+        # when possible, but this may mean that scopes intended to catch
+        # `variable` also catch its eagerly created slot variable
+        # unintentionally (specifically make_template would add a dependency on
+        # a slot variable if not for this case). Deferring is mostly harmless
+        # (aside from double initialization), and makes variable creator scopes
+        # behave the same way they do when graph building.
+        and not ops.get_default_graph()._variable_creator_stack):  # pylint: disable=protected-access
+      initializer = checkpointable.CheckpointInitialValue(
+          checkpoint_position=slot_variable_position)
+      slot_variable = self.add_slot(
+          var=variable,
+          initializer=initializer,
+          slot_name=slot_name)
+      # Slot variables are not owned by any one object (because we don't want to
+      # save the slot variable if the optimizer is saved without the non-slot
+      # variable, or if the non-slot variable is saved without the optimizer;
+      # it's a dependency hypergraph with edges of the form (optimizer, non-slot
+      # variable, variable)). So we don't _track_ slot variables anywhere, and
+      # instead special-case this dependency and otherwise pretend it's a normal
+      # graph.
+    if slot_variable is not None:
+      # If we've either made this slot variable, or if we've pulled out an
+      # existing slot variable, we should restore it.
+      slot_variable_position.restore(slot_variable)
+    else:
+      # We didn't make the slot variable. Defer restoring until it gets created
+      # normally. We keep a list rather than the one with the highest restore
+      # UID in case slot variables have their own dependencies, in which case
+      # those could differ between restores.
+      self._deferred_slot_restorations.setdefault(
+          slot_name, {}).setdefault(variable_key, []).append(
+              slot_variable_position)
+
 
 def _filter_grads(grads_and_vars):
   """Filter out iterable with grad equal to None."""
diff --git a/tensorflow/python/training/checkpointable/BUILD b/tensorflow/python/training/checkpointable/BUILD
index 26a0ac35b7..3201c755af 100644
--- a/tensorflow/python/training/checkpointable/BUILD
+++ b/tensorflow/python/training/checkpointable/BUILD
@@ -159,6 +159,43 @@ py_test(
         "//tensorflow/python/eager:test",
         "//tensorflow/python/keras:engine",
         "//tensorflow/python/keras:layers",
+        "@absl_py//absl/testing:parameterized",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "util_with_v1_optimizers_test",
+    srcs = ["util_with_v1_optimizers_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],  # b/74395663
+    deps = [
+        ":base",
+        ":tracking",
+        ":util",
+        "//tensorflow/python:checkpoint_management",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:pywrap_tensorflow",
+        "//tensorflow/python:resource_variable_ops",
+        "//tensorflow/python:saver",
+        "//tensorflow/python:session",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:template",
+        "//tensorflow/python:training",
+        "//tensorflow/python:training_util",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/eager:backprop",
+        "//tensorflow/python/eager:context",
+        "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/keras:engine",
+        "//tensorflow/python/keras:layers",
+        "@absl_py//absl/testing:parameterized",
         "@six_archive//:six",
     ],
 )
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index ce1b9c6fc5..fde91948e5 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -39,7 +39,7 @@ from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import checkpoint_management
-from tensorflow.python.training import optimizer as optimizer_lib
+from tensorflow.python.training import optimizer as optimizer_v1
 from tensorflow.python.training import saver as v1_saver_lib
 from tensorflow.python.training.checkpointable import base
 from tensorflow.python.training.checkpointable import data_structures
@@ -560,7 +560,9 @@ def _serialize_slot_variables(checkpointable_objects, node_ids, object_names):
   non_slot_objects = list(checkpointable_objects)
   slot_variables = _ObjectIdentityDictionary()
   for checkpointable in non_slot_objects:
-    if isinstance(checkpointable, optimizer_lib.Optimizer):
+    if (isinstance(checkpointable, optimizer_v1.Optimizer)
+        # TODO(b/110718070): Fix Keras imports.
+        or hasattr(checkpointable, "_create_or_restore_slot_variable")):
       naming_scheme = _slot_variable_naming_for_optimizer(
           optimizer_path=object_names[checkpointable])
       slot_names = checkpointable.get_slot_names()
@@ -570,7 +572,7 @@ def _serialize_slot_variables(checkpointable_objects, node_ids, object_names):
           try:
             slot_variable = checkpointable.get_slot(
                 original_variable, slot_name)
-          except AttributeError:
+          except (AttributeError, KeyError):
             slot_variable = None
           if slot_variable is None:
             continue
diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py
index 3bdab4cb0b..61de46898a 100644
--- a/tensorflow/python/training/checkpointable/util_test.py
+++ b/tensorflow/python/training/checkpointable/util_test.py
@@ -20,10 +20,10 @@ import functools
 import json
 import os
 
+from absl.testing import parameterized
 import six
 
 from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.client import session as session_lib
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
@@ -35,14 +35,14 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.keras.engine import sequential
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.layers import core
+from tensorflow.python.keras.optimizer_v2 import adam
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import template
 from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.training import adam
+from tensorflow.python.ops import variables as variables_lib
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training import training_util
@@ -243,7 +243,7 @@ class _OwnsMirroredVariables(base.CheckpointableBase):
     return self.non_dep_variable.name
 
 
-class CheckpointingTests(test.TestCase):
+class CheckpointingTests(parameterized.TestCase, test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
   def testNamingWithOptimizer(self):
@@ -252,41 +252,32 @@ class CheckpointingTests(test.TestCase):
     # A nuisance Model using the same optimizer. Its slot variables should not
     # go in the checkpoint, since it is never depended on.
     other_model = MyModel()
-    optimizer = adam.AdamOptimizer(0.001)
-    optimizer_step = training_util.get_or_create_global_step()
+    optimizer = adam.Adam(0.001)
+    step = training_util.get_or_create_global_step()
     root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
-    if context.executing_eagerly():
-      optimizer.minimize(
-          lambda: model(input_value),
-          global_step=optimizer_step)
-      optimizer.minimize(
-          lambda: other_model(input_value),
-          global_step=optimizer_step)
-    else:
-      train_op = optimizer.minimize(
-          model(input_value), global_step=optimizer_step)
-      optimizer.minimize(
-          other_model(input_value),
-          global_step=optimizer_step)
-      self.evaluate(checkpointable_utils.gather_initializers(
-          root_checkpointable))
-      self.evaluate(train_op)
+        optimizer=optimizer, model=model, step=step)
+
+    with backprop.GradientTape() as tape:
+      loss = model(input_value)
+    variables = model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    train_op = control_flow_ops.group(
+        optimizer.apply_gradients(zip(gradients, variables)),
+        step.assign_add(1))
+
+    with backprop.GradientTape() as tape:
+      loss = other_model(input_value)
+    variables = other_model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    optimizer.apply_gradients(zip(gradients, variables))
+
+    self.evaluate(checkpointable_utils.gather_initializers(
+        root_checkpointable))
+    self.evaluate(train_op)
     named_variables, serialized_graph, _ = (
         checkpointable_utils._serialize_object_graph(
             root_checkpointable, saveables_cache=None))
-    expected_checkpoint_names = (
-        # Created in the root node, so no prefix.
-        "optimizer_step",
-        "model/_second/kernel",
-        "model/_named_dense/kernel",
-        "model/_named_dense/bias",
-        # non-Layer dependency of the model
-        "model/_non_layer/a_variable",
-        # The optimizer creates two non-slot variables
-        "optimizer/beta1_power",
-        "optimizer/beta2_power",
-        # Slot variables
+    expected_slot_keys = (
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
         "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
         "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
@@ -294,9 +285,26 @@ class CheckpointingTests(test.TestCase):
         "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
         "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
     )
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "step",
+        "model/_second/kernel",
+        "model/_named_dense/kernel",
+        "model/_named_dense/bias",
+        # non-Layer dependency of the model
+        "model/_non_layer/a_variable",
+        "optimizer/learning_rate",
+        "optimizer/beta_1",
+        "optimizer/beta_2",
+        "optimizer/epsilon",
+        "optimizer/iter",
+        "optimizer/decay",
+    ) + expected_slot_keys
     suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
     expected_checkpoint_names = [
         name + suffix for name in expected_checkpoint_names]
+    expected_checkpoint_names.append(
+        "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON")
     # The Dense layers also save get_config() JSON
     expected_checkpoint_names.extend(
         ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
@@ -307,7 +315,7 @@ class CheckpointingTests(test.TestCase):
     # Check that we've mapped to the right variable objects (not exhaustive)
     self.assertEqual(
         "global_step",
-        named_variables["optimizer_step" + suffix].full_name)
+        named_variables["step" + suffix].full_name)
     self.assertEqual(
         "my_model/dense_1/kernel",
         named_variables["model/_second/kernel" + suffix].full_name)
@@ -315,48 +323,31 @@ class CheckpointingTests(test.TestCase):
         "my_model/dense/kernel",
         named_variables["model/_named_dense/kernel" + suffix].full_name)
     self.assertEqual(
-        "beta1_power",
-        named_variables["optimizer/beta1_power" + suffix].full_name)
+        "beta_1",
+        named_variables["optimizer/beta_1" + suffix].full_name)
     self.assertEqual(
-        "beta2_power",
-        named_variables["optimizer/beta2_power" + suffix].full_name)
+        "beta_2",
+        named_variables["optimizer/beta_2" + suffix].full_name)
     # Spot check the generated protocol buffers.
     self.assertEqual("optimizer",
                      serialized_graph.nodes[0].children[1].local_name)
     optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
         1].node_id]
-    self.assertEqual("beta1_power",
-                     optimizer_node.children[0].local_name)
-    self.assertEqual("beta1_power",
-                     serialized_graph.nodes[optimizer_node.children[0].node_id]
-                     .attributes[0].full_name)
-    self.assertEqual(
-        "my_model/dense/kernel",
-        serialized_graph.nodes[optimizer_node.slot_variables[0]
-                               .original_variable_node_id]
-        .attributes[0].full_name)
-    # We strip off the :0 suffix, as variable.name-based saving does.
-    self.assertEqual(
-        "my_model/dense/kernel/Adam",
-        serialized_graph.nodes[optimizer_node.slot_variables[0]
-                               .slot_variable_node_id]
-        .attributes[0].full_name)
-    self.assertEqual(
-        "my_model/dense/kernel/Adam:0",
-        optimizer.get_slot(
-            var=model._named_dense.kernel,
-            name="m").name)
-    self.assertEqual(
-        "model/_named_dense/kernel" + suffix,
-        serialized_graph.nodes[
-            optimizer_node.slot_variables[0]
-            .original_variable_node_id].attributes[0].checkpoint_key)
-    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
-    self.assertEqual(
-        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
-        serialized_graph.nodes[
-            optimizer_node.slot_variables[0]
-            .slot_variable_node_id].attributes[0].checkpoint_key)
+    children = [node.local_name for node in optimizer_node.children]
+    six.assertCountEqual(
+        self,
+        # Non-slot dependencies
+        ["beta_1", "beta_2", "iter", "decay", "epsilon", "learning_rate"],
+        children)
+    serialized_slot_keys = []
+    for slot in optimizer_node.slot_variables:
+      for attribute in (
+          serialized_graph.nodes[slot.slot_variable_node_id].attributes):
+        serialized_slot_keys.append(attribute.checkpoint_key)
+    six.assertCountEqual(
+        self,
+        [key + suffix for key in expected_slot_keys],
+        serialized_slot_keys)
 
   @test_util.run_in_graph_and_eager_modes
   def testMoreComplexSaveableReturned(self):
@@ -397,20 +388,19 @@ class CheckpointingTests(test.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def testSaveRestore(self):
     model = MyModel()
-    optimizer = adam.AdamOptimizer(0.001)
+    optimizer = adam.Adam(0.001)
     root_checkpointable = checkpointable_utils.Checkpoint(
         optimizer=optimizer, model=model)
     input_value = constant_op.constant([[3.]])
-    if context.executing_eagerly():
-      optimizer.minimize(
-          lambda: model(input_value))
-    else:
-      train_op = optimizer.minimize(model(input_value))
-      # TODO(allenl): Make initialization more pleasant when graph building.
-      root_checkpointable.save_counter  # pylint: disable=pointless-statement
-      self.evaluate(checkpointable_utils.gather_initializers(
-          root_checkpointable))
-      self.evaluate(train_op)
+    with backprop.GradientTape() as tape:
+      loss = model(input_value)
+    variables = model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    train_op = optimizer.apply_gradients(zip(gradients, variables))
+    root_checkpointable.save_counter  # pylint: disable=pointless-statement
+    self.evaluate(checkpointable_utils.gather_initializers(
+        root_checkpointable))
+    self.evaluate(train_op)
     prefix = os.path.join(self.get_temp_dir(), "ckpt")
     self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
     m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
@@ -418,7 +408,8 @@ class CheckpointingTests(test.TestCase):
     save_path = root_checkpointable.save(file_prefix=prefix)
     self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
     self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
-    optimizer_variables = self.evaluate(optimizer.variables())
+    optimizer_variables = self.evaluate(
+        sorted(optimizer.variables(), key=lambda v: v.name))
     self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
     # Immediate restoration
     status = root_checkpointable.restore(save_path=save_path).assert_consumed()
@@ -429,11 +420,7 @@ class CheckpointingTests(test.TestCase):
     if not context.executing_eagerly():
       return  # Restore-on-create is only supported when executing eagerly
     on_create_model = MyModel()
-    on_create_optimizer = adam.AdamOptimizer(
-        0.001,
-        # Preserve beta1_power and beta2_power when appying gradients so we can
-        # test that they've been restored correctly.
-        beta1=1.0, beta2=1.0)
+    on_create_optimizer = adam.Adam(0.001)
     on_create_root = checkpointable_utils.Checkpoint(
         optimizer=on_create_optimizer, model=on_create_model)
     # Deferred restoration
@@ -455,15 +442,15 @@ class CheckpointingTests(test.TestCase):
     # Optimizer slot variables are created when the original variable is
     # restored.
     self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
-    self.assertAllEqual(optimizer_variables[2:],
-                        self.evaluate(on_create_optimizer.variables()))
     dummy_var = resource_variable_ops.ResourceVariable([1.])
-    on_create_optimizer.minimize(loss=dummy_var.read_value)
+    on_create_optimizer.minimize(loss=dummy_var.read_value,
+                                 var_list=[dummy_var])
     status.assert_existing_objects_matched()
     status.assert_consumed()
-    beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators()
-    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power))
-    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power))
+    self.assertAllEqual(
+        optimizer_variables,
+        # Creation order is different, so .variables() needs to be re-sorted.
+        self.evaluate(sorted(optimizer.variables(), key=lambda v: v.name)))
 
   # TODO(allenl): Debug garbage created by this test in python3.
   def testDeferredRestorationUsageEager(self):
@@ -473,21 +460,22 @@ class CheckpointingTests(test.TestCase):
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     for training_continuation in range(3):
       model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001)
+      optimizer = adam.Adam(0.001)
       root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model,
-          optimizer_step=training_util.get_or_create_global_step())
+          optimizer=optimizer, model=model)
       root.restore(checkpoint_management.latest_checkpoint(
           checkpoint_directory))
       for _ in range(num_training_steps):
         # TODO(allenl): Use a Dataset and serialize/checkpoint it.
         input_value = constant_op.constant([[3.]])
-        optimizer.minimize(
-            lambda: model(input_value),  # pylint: disable=cell-var-from-loop
-            global_step=root.optimizer_step)
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        optimizer.apply_gradients(zip(gradients, variables))
       root.save(file_prefix=checkpoint_prefix)
       self.assertEqual((training_continuation + 1) * num_training_steps,
-                       root.optimizer_step.numpy())
+                       root.optimizer.iterations.numpy())
 
   def testUsageGraph(self):
     """Expected usage when graph building."""
@@ -498,14 +486,16 @@ class CheckpointingTests(test.TestCase):
       for training_continuation in range(3):
         with ops.Graph().as_default():
           model = MyModel()
-          optimizer = adam.AdamOptimizer(0.001)
+          optimizer = adam.Adam(0.001)
           root = checkpointable_utils.Checkpoint(
-              optimizer=optimizer, model=model,
-              global_step=training_util.get_or_create_global_step())
+              optimizer=optimizer, model=model)
           input_value = constant_op.constant([[3.]])
-          train_op = optimizer.minimize(
-              model(input_value),
-              global_step=root.global_step)
+          with backprop.GradientTape() as tape:
+            loss = model(input_value)
+          variables = model.trainable_variables
+          gradients = tape.gradient(loss, variables)
+          train_op = optimizer.apply_gradients(zip(gradients, variables))
+
           checkpoint_path = checkpoint_management.latest_checkpoint(
               checkpoint_directory)
           with self.session(graph=ops.get_default_graph()) as session:
@@ -524,7 +514,7 @@ class CheckpointingTests(test.TestCase):
               session.run(train_op)
             root.save(file_prefix=checkpoint_prefix, session=session)
             self.assertEqual((training_continuation + 1) * num_training_steps,
-                             session.run(root.global_step))
+                             session.run(root.optimizer.iterations))
             self.assertEqual(training_continuation + 1,
                              session.run(root.save_counter))
 
@@ -534,21 +524,23 @@ class CheckpointingTests(test.TestCase):
     # Does create garbage when executing eagerly due to ops.Graph() creation.
     num_training_steps = 10
     checkpoint_directory = self.get_temp_dir()
+    def _train_fn(model, input_value):
+      with backprop.GradientTape() as tape:
+        loss = model(input_value)
+      variables = model.trainable_variables
+      gradients = tape.gradient(loss, variables)
+      return optimizer.apply_gradients(zip(gradients, variables))
     for training_continuation in range(3):
       with test_util.device(use_gpu=True):
         model = MyModel()
-        optimizer = adam.AdamOptimizer(0.001)
+        optimizer = adam.Adam(0.001)
         root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model,
-            global_step=training_util.get_or_create_global_step())
+            optimizer=optimizer, model=model)
         manager = checkpoint_management.CheckpointManager(
             root, checkpoint_directory, max_to_keep=1)
         status = root.restore(save_path=manager.latest_checkpoint)
         input_value = constant_op.constant([[3.]])
-        train_fn = functools.partial(
-            optimizer.minimize,
-            functools.partial(model, input_value),
-            global_step=root.global_step)
+        train_fn = functools.partial(_train_fn, model, input_value)
         if not context.executing_eagerly():
           train_fn = functools.partial(self.evaluate, train_fn())
         status.initialize_or_restore()
@@ -556,7 +548,7 @@ class CheckpointingTests(test.TestCase):
           train_fn()
         manager.save()
         self.assertEqual((training_continuation + 1) * num_training_steps,
-                         self.evaluate(root.global_step))
+                         self.evaluate(root.optimizer.iterations))
         self.assertEqual(training_continuation + 1,
                          self.evaluate(root.save_counter))
 
@@ -625,10 +617,9 @@ class CheckpointingTests(test.TestCase):
       with test_util.device(use_gpu=True):
         model = MyModel()
         # Don't actually train so we can test variable values
-        optimizer = adam.AdamOptimizer(0.)
+        optimizer = adam.Adam(0.)
         root = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, model=model,
-            global_step=training_util.get_or_create_global_step())
+            optimizer=optimizer, model=model)
         checkpoint_path = checkpoint_management.latest_checkpoint(
             checkpoint_directory)
         status = root.restore(save_path=checkpoint_path)
@@ -639,8 +630,7 @@ class CheckpointingTests(test.TestCase):
           with backprop.GradientTape() as tape:
             loss = _call_model(constant_op.constant([[3.]]))
           gradients = tape.gradient(loss, model.variables)
-          return optimizer.apply_gradients(zip(gradients, model.variables),
-                                           global_step=root.global_step)
+          return optimizer.apply_gradients(zip(gradients, model.variables))
         if not context.executing_eagerly():
           train_fn = functools.partial(
               self.evaluate, train_fn())
@@ -654,7 +644,7 @@ class CheckpointingTests(test.TestCase):
           self.evaluate(model.variables[0].assign([[42.]]))
         root.save(file_prefix=checkpoint_prefix)
         self.assertEqual((training_continuation + 1) * num_training_steps,
-                         self.evaluate(root.global_step))
+                         self.evaluate(optimizer.iterations))
         self.assertEqual(training_continuation + 1,
                          self.evaluate(root.save_counter))
   # pylint: enable=cell-var-from-loop
@@ -716,7 +706,7 @@ class CheckpointingTests(test.TestCase):
 
     with context.eager_mode():
       model = Model()
-      optimizer = adam.AdamOptimizer(learning_rate=0.05)
+      optimizer = adam.Adam(learning_rate=0.05)
       checkpoint_directory = self.get_temp_dir()
       checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
       checkpoint = checkpointable_utils.Checkpoint(
@@ -802,24 +792,24 @@ class CheckpointingTests(test.TestCase):
     root = tracking.Checkpointable()
     root.var = checkpointable_utils.add_variable(
         root, name="var", initializer=0.)
-    optimizer = adam.AdamOptimizer(0.1)
-    if context.executing_eagerly():
-      optimizer.minimize(root.var.read_value)
-    else:
-      train_op = optimizer.minimize(root.var)
-      # Note that `optimizer` has not been added as a dependency of
-      # `root`. Create a one-off grouping so that slot variables for `root.var`
-      # get initialized too.
-      self.evaluate(checkpointable_utils.gather_initializers(
-          checkpointable_utils.Checkpoint(root=root, optimizer=optimizer)))
-      self.evaluate(train_op)
+    optimizer = adam.Adam(0.1)
+    variables = [root.var]
+    gradients = [1.]
+    train_op = optimizer.apply_gradients(zip(gradients, variables))
+    # Note that `optimizer` has not been added as a dependency of
+    # `root`. Create a one-off grouping so that slot variables for `root.var`
+    # get initialized too.
+    self.evaluate(checkpointable_utils.gather_initializers(
+        checkpointable_utils.Checkpoint(root=root, optimizer=optimizer)))
+    self.evaluate(train_op)
     self.evaluate(state_ops.assign(root.var, 12.))
     no_slots_path = checkpointable_utils.CheckpointableSaver(root).save(
         os.path.join(checkpoint_directory, "no_slots"))
     root.optimizer = optimizer
     self.evaluate(state_ops.assign(root.var, 13.))
-    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
-                                   14.))
+    self.evaluate(state_ops.assign(
+        optimizer.get_slot(slot_name="m", var=root.var),
+        14.))
     slots_path = checkpointable_utils.CheckpointableSaver(root).save(
         os.path.join(checkpoint_directory, "with_slots"))
     new_root = tracking.Checkpointable()
@@ -836,29 +826,32 @@ class CheckpointingTests(test.TestCase):
     no_slot_status.assert_consumed()
     no_slot_status.run_restore_ops()
     self.assertEqual(12., self.evaluate(new_root.var))
-    new_root.optimizer = adam.AdamOptimizer(0.1)
+    new_root.optimizer = adam.Adam(0.1)
     slot_status.assert_existing_objects_matched()
-    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
+    with self.assertRaisesRegexp(AssertionError, "Unresolved object"):
       slot_status.assert_consumed()
     self.assertEqual(12., self.evaluate(new_root.var))
     if context.executing_eagerly():
       # Slot variables are only created with restoring initializers when
       # executing eagerly.
       self.assertEqual(14., self.evaluate(
-          new_root.optimizer.get_slot(name="m", var=new_root.var)))
-    else:
-      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
-                    None)
-    if context.executing_eagerly():
-      new_root.optimizer.minimize(new_root.var.read_value)
+          new_root.optimizer.get_slot(slot_name="m", var=new_root.var)))
     else:
-      train_op = new_root.optimizer.minimize(new_root.var)
-      # The slot variable now exists; restore() didn't create it, but we should
-      # now have a restore op for it.
-      slot_status.run_restore_ops()
+      # Slot variables are not created eagerly when graph building.
+      with self.assertRaises(KeyError):
+        new_root.optimizer.get_slot(slot_name="m", var=new_root.var)
+    variables = [new_root.var]
+    gradients = [1.]
+    train_op = new_root.optimizer.apply_gradients(zip(gradients, variables))
+    # The slot variable now exists; restore() didn't create it, but we should
+    # now have a restore op for it.
+    slot_status.run_restore_ops()
+    if not context.executing_eagerly():
+      # The train op hasn't run when graph building, so the slot variable has
+      # its restored value. It has run in eager, so the value will be different.
       self.assertEqual(14., self.evaluate(
-          new_root.optimizer.get_slot(name="m", var=new_root.var)))
-      self.evaluate(train_op)
+          new_root.optimizer.get_slot(slot_name="m", var=new_root.var)))
+    self.evaluate(train_op)
     slot_status.assert_consumed()
 
   @test_util.run_in_graph_and_eager_modes
@@ -1018,18 +1011,18 @@ class CheckpointingTests(test.TestCase):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
     first = tracking.Checkpointable()
-    first.var1 = variables.Variable(0., name="outside_var")
-    first.var2 = variables.Variable(0., name="blah")
+    first.var1 = variables_lib.Variable(0., name="outside_var")
+    first.var2 = variables_lib.Variable(0., name="blah")
     self.evaluate(first.var1.assign(4.))
     self.evaluate(first.var2.assign(8.))
     save_path = checkpointable_utils.CheckpointableSaver(first).save(
         checkpoint_prefix)
 
     second = tracking.Checkpointable()
-    second.var2 = variables.Variable(0., name="blah")
+    second.var2 = variables_lib.Variable(0., name="blah")
     status = checkpointable_utils.CheckpointableSaver(
         second).restore(save_path)
-    recreated_var1 = variables.Variable(0., name="outside_var")
+    recreated_var1 = variables_lib.Variable(0., name="outside_var")
     status.run_restore_ops()
     self.assertEqual(8., self.evaluate(second.var2))
     self.evaluate(recreated_var1.assign(-2.))
@@ -1046,15 +1039,16 @@ class CheckpointingTests(test.TestCase):
         checkpoint_directory = self.get_temp_dir()
         checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
         obj = tracking.Checkpointable()
-        obj.var = variable_scope.get_variable(name="v", initializer=0.)
-        obj.opt = adam.AdamOptimizer(0.1)
-        obj.opt.minimize(obj.var.read_value())
+        obj.var = variables_lib.Variable(0., name="v")
+        obj.opt = adam.Adam(0.1)
+        variables = [obj.var]
+        gradients = [1.]
+        obj.opt.apply_gradients(zip(gradients, variables))
         self.evaluate(checkpointable_utils.gather_initializers(obj))
         saver = checkpointable_utils.CheckpointableSaver(obj)
         saver.save(checkpoint_prefix)
-        before_ops = graph.get_operations()
+        graph.finalize()
         saver.save(checkpoint_prefix)
-        self.assertEqual(before_ops, graph.get_operations())
 
   @test_util.run_in_graph_and_eager_modes
   def testCheckpointState(self):
@@ -1135,74 +1129,17 @@ class CheckpointingTests(test.TestCase):
         checkpoint_directory = self.get_temp_dir()
         checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
         obj = tracking.Checkpointable()
-        obj.var = variable_scope.get_variable(name="v", initializer=0.)
-        obj.opt = adam.AdamOptimizer(0.1)
-        obj.opt.minimize(obj.var.read_value())
+        obj.var = variables_lib.Variable(0., name="v")
+        obj.opt = adam.Adam(0.1)
+        variables = [obj.var]
+        gradients = [1.]
+        obj.opt.apply_gradients(zip(gradients, variables))
         self.evaluate(checkpointable_utils.gather_initializers(obj))
         saver = checkpointable_utils.CheckpointableSaver(obj)
         save_path = saver.save(checkpoint_prefix)
         saver.restore(save_path)
-        before_ops = graph.get_operations()
+        graph.finalize()
         saver.restore(save_path)
-        self.assertEqual(before_ops, graph.get_operations())
-
-  def testMultipleGraphsNonSlotVariables(self):
-    with context.graph_mode():
-      checkpoint_directory = self.get_temp_dir()
-      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
-      optimizer = adam.AdamOptimizer(0.001)
-      # Construct a model in one graph
-      first_graph = ops.Graph()
-      first_session = session_lib.Session(graph=first_graph)
-      with first_graph.as_default(), first_session.as_default():
-        first_variable = resource_variable_ops.ResourceVariable([1.])
-        first_root_checkpointable = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, variable=first_variable)
-        train_op = optimizer.minimize(first_variable.read_value)
-        self.evaluate(checkpointable_utils.gather_initializers(
-            first_root_checkpointable))
-        self.evaluate(train_op)
-        self.evaluate(first_variable.assign([1.]))
-        self.evaluate(optimizer.get_slot(
-            var=first_variable, name="m").assign([2.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(3.))
-
-      # Save and load in a second graph
-      second_graph = ops.Graph()
-      with second_graph.as_default(), session_lib.Session(graph=second_graph):
-        second_variable = resource_variable_ops.ResourceVariable([1.])
-        second_root_checkpointable = checkpointable_utils.Checkpoint(
-            optimizer=optimizer, variable=second_variable)
-        train_op = optimizer.minimize(second_variable.read_value)
-        second_root_checkpointable.restore(None).initialize_or_restore()
-        self.evaluate(train_op)
-        self.evaluate(second_variable.assign([4.]))
-        self.evaluate(optimizer.get_slot(
-            var=second_variable, name="m").assign([5.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.evaluate(beta1_power.assign(6.))
-        save_path = second_root_checkpointable.save(checkpoint_prefix)
-        self.evaluate(second_variable.assign([7.]))
-        self.evaluate(optimizer.get_slot(
-            var=second_variable, name="m").assign([8.]))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
-        status = second_root_checkpointable.restore(save_path)
-        status.assert_consumed().run_restore_ops()
-        self.assertAllEqual([4.], self.evaluate(second_variable))
-        self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
-            var=second_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(6., self.evaluate(beta1_power))
-
-      # Check that the first graph is unmolested
-      with first_graph.as_default(), first_session.as_default():
-        self.assertAllEqual([1.], self.evaluate(first_variable))
-        self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
-            var=first_variable, name="m")))
-        beta1_power, _ = optimizer._get_beta_accumulators()
-        self.assertAllEqual(3., self.evaluate(beta1_power))
 
   @test_util.run_in_graph_and_eager_modes
   def test_sequential(self):
@@ -1243,10 +1180,9 @@ class CheckpointingTests(test.TestCase):
     optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
     with test_util.device(use_gpu=True):
       model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001)
+      optimizer = adam.Adam(0.001)
       root = checkpointable_utils.Checkpoint(
-          model=model,  # Do not save the optimizer with the checkpoint.
-          global_step=training_util.get_or_create_global_step())
+          model=model)  # Do not save the optimizer with the checkpoint.
       optimizer_checkpoint = checkpointable_utils.Checkpoint(
           optimizer=optimizer)
 
@@ -1254,65 +1190,78 @@ class CheckpointingTests(test.TestCase):
           checkpoint_directory)
       status = root.restore(save_path=checkpoint_path)
       input_value = constant_op.constant([[3.]])
-      train_fn = functools.partial(
-          optimizer.minimize,
-          functools.partial(model, input_value),
-          global_step=root.global_step)
+      def train_fn():
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        return optimizer.apply_gradients(zip(gradients, variables))
       if not context.executing_eagerly():
         train_fn = functools.partial(self.evaluate, train_fn())
       status.initialize_or_restore()
-      self.evaluate([v.initializer for v in optimizer.variables()])
+      # TODO(tanzheny): Add hyper variables to .variables(), and set them with
+      # set_weights etc.
+      variables_not_in_the_variables_property = [
+          obj for obj in optimizer._hyper.values()
+          if isinstance(obj, variables_lib.Variable)]
+      self.evaluate([v.initializer for v
+                     in optimizer.variables()
+                     + variables_not_in_the_variables_property])
       train_fn()
       model_save_path = root.save(file_prefix=checkpoint_prefix)
-      self.evaluate(optimizer.variables()[0].assign(42.))
+      self.evaluate(optimizer.beta_1.assign(42.))
       optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix)
+    del train_fn
 
     # Restore into a graph with the optimizer
     with test_util.device(use_gpu=True):
       model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001)
+      optimizer = adam.Adam(0.001)
       root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model,
-          global_step=training_util.get_or_create_global_step())
+          optimizer=optimizer, model=model)
       status = root.restore(save_path=model_save_path)
       input_value = constant_op.constant([[3.]])
-      train_fn = functools.partial(
-          optimizer.minimize,
-          functools.partial(model, input_value),
-          global_step=root.global_step)
+      def train_fn1():
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        return optimizer.apply_gradients(zip(gradients, variables))
       if not context.executing_eagerly():
-        train_fn = functools.partial(self.evaluate, train_fn())
+        train_fn1 = functools.partial(self.evaluate, train_fn1())
       status.initialize_or_restore()
-      train_fn()
+      train_fn1()
       with self.assertRaises(AssertionError):
         status.assert_existing_objects_matched()
       with self.assertRaises(AssertionError):
         status.assert_consumed()
+    del train_fn1
 
     # Make sure initialization doesn't clobber later restores
     with test_util.device(use_gpu=True):
       model = MyModel()
-      optimizer = adam.AdamOptimizer(0.001, beta1=1.0)
+      optimizer = adam.Adam(0.001, beta1=1.0)
       root = checkpointable_utils.Checkpoint(
-          optimizer=optimizer, model=model,
-          global_step=training_util.get_or_create_global_step())
+          optimizer=optimizer, model=model)
       opt_root = checkpointable_utils.Checkpoint(
           optimizer=optimizer)
       status = root.restore(save_path=model_save_path)
       init_only_optimizer_status = opt_root.restore(save_path=None)
       optimizer_status = opt_root.restore(save_path=optimizer_save_path)
       input_value = constant_op.constant([[3.]])
-      train_fn = functools.partial(
-          optimizer.minimize,
-          functools.partial(model, input_value),
-          global_step=root.global_step)
+      def train_fn2():
+        with backprop.GradientTape() as tape:
+          loss = model(input_value)
+        variables = model.trainable_variables
+        gradients = tape.gradient(loss, variables)
+        return optimizer.apply_gradients(zip(gradients, variables))
       if not context.executing_eagerly():
-        train_fn = functools.partial(self.evaluate, train_fn())
+        train_fn2 = functools.partial(self.evaluate, train_fn2())
       optimizer_status.run_restore_ops()
       status.initialize_or_restore()
       init_only_optimizer_status.initialize_or_restore()
-      train_fn()
-      self.assertEqual(42., self.evaluate(optimizer.variables()[0]))
+      train_fn2()
+      self.assertEqual(42., self.evaluate(optimizer.beta_1))
 
   @test_util.run_in_graph_and_eager_modes
   def test_restore_after_adding_empty_checkpointable_data_structure(self):
@@ -1345,7 +1294,7 @@ class _ManualScope(tracking.Checkpointable):
     return variable_scope.get_variable(name="in_manual_scope", shape=[])
 
 
-class TemplateTests(test.TestCase):
+class TemplateTests(parameterized.TestCase, test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def test_checkpointable_save_restore(self):
@@ -1369,10 +1318,11 @@ class TemplateTests(test.TestCase):
     manual_dep, = manual_scope._checkpoint_dependencies
     self.assertEqual("in_manual_scope", manual_dep.name)
     self.assertIs(manual_scope_v, manual_dep.ref)
-    optimizer = adam.AdamOptimizer(0.0)
+    optimizer = adam.Adam(0.0)
     save_root = checkpointable_utils.Checkpoint(
         my_template=save_template, optimizer=optimizer)
-    optimizer.minimize(v1_save.read_value)
+    optimizer.minimize(v1_save.read_value,
+                       var_list=[v1_save])
     self.evaluate([v.initializer for v in save_template.variables])
     self.evaluate([v.initializer for v in optimizer.variables()])
     self.evaluate(v1_save.assign([12.]))
@@ -1382,13 +1332,13 @@ class TemplateTests(test.TestCase):
     save_path = save_root.save(checkpoint_prefix)
 
     load_template = template.make_template("s2", _templated)
-    load_optimizer = adam.AdamOptimizer(0.0)
+    load_optimizer = adam.Adam(0.0)
     load_root = checkpointable_utils.Checkpoint(
         my_template=load_template, optimizer=load_optimizer)
     status = load_root.restore(save_path)
     var, var_plus_one, var2, _, _ = load_template()
-    load_optimizer.minimize(var.read_value)
-    self.assertEqual(3, len(load_template._checkpoint_dependencies))
+    load_optimizer.minimize(var.read_value, var_list=[var])
+    self.assertLen(load_template._checkpoint_dependencies, 3)
     self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
     self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
     self.assertEqual("ManualScope",
@@ -1429,14 +1379,14 @@ class TemplateTests(test.TestCase):
     status = load_root.restore(save_path)
     (inner_template_one, inner_template_two), (v1, v2, v3) = load_template()
     outer_template_dependencies = load_root.my_template._checkpoint_dependencies
-    self.assertEqual(2, len(outer_template_dependencies))
+    self.assertLen(outer_template_dependencies, 2)
     self.assertEqual("i1", outer_template_dependencies[0].name)
     self.assertIs(inner_template_one, outer_template_dependencies[0].ref)
     self.assertEqual("i2", outer_template_dependencies[1].name)
     self.assertIs(inner_template_two, outer_template_dependencies[1].ref)
-    self.assertEqual(1, len(inner_template_one._checkpoint_dependencies))
+    self.assertLen(inner_template_one._checkpoint_dependencies, 1)
     self.assertEqual("v", inner_template_one._checkpoint_dependencies[0].name)
-    self.assertEqual(1, len(inner_template_two._checkpoint_dependencies))
+    self.assertLen(inner_template_two._checkpoint_dependencies, 1)
     self.assertEqual("v", inner_template_two._checkpoint_dependencies[0].name)
     status.assert_consumed().run_restore_ops()
     self.assertAllEqual([20.], self.evaluate(v1))
@@ -1449,13 +1399,14 @@ class CheckpointCompatibilityTests(test.TestCase):
   def _initialized_model(self):
     input_value = constant_op.constant([[3.]])
     model = MyModel()
-    optimizer = adam.AdamOptimizer(0.001)
-    optimizer_step = training_util.get_or_create_global_step()
+    optimizer = adam.Adam(0.001)
     root_checkpointable = checkpointable_utils.Checkpoint(
-        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
-    train_op = optimizer.minimize(
-        functools.partial(model, input_value),
-        global_step=optimizer_step)
+        optimizer=optimizer, model=model)
+    with backprop.GradientTape() as tape:
+      loss = model(input_value)
+    variables = model.trainable_variables
+    gradients = tape.gradient(loss, variables)
+    train_op = optimizer.apply_gradients(zip(gradients, variables))
     self.evaluate(checkpointable_utils.gather_initializers(
         root_checkpointable))
     self.evaluate(train_op)
@@ -1463,28 +1414,26 @@ class CheckpointCompatibilityTests(test.TestCase):
     # with known values to check when loading.
     self.evaluate(model._named_dense.bias.assign([1.]))
     self.evaluate(optimizer.get_slot(
-        var=model._named_dense.bias, name="m").assign([2.]))
-    beta1_power, _ = optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(3.))
+        var=model._named_dense.bias, slot_name="m").assign([2.]))
+    self.evaluate(optimizer.beta_1.assign(3.))
     return root_checkpointable
 
   def _set_sentinels(self, root_checkpointable):
     self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
     self.evaluate(
         root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")
+            var=root_checkpointable.model._named_dense.bias, slot_name="m")
         .assign([102.]))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.evaluate(beta1_power.assign(103.))
+    self.evaluate(root_checkpointable.optimizer.beta_1.assign(103.))
 
   def _check_sentinels(self, root_checkpointable):
     self.assertAllEqual(
         [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
     self.assertAllEqual([2.], self.evaluate(
         root_checkpointable.optimizer.get_slot(
-            var=root_checkpointable.model._named_dense.bias, name="m")))
-    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
-    self.assertAllEqual(3., self.evaluate(beta1_power))
+            var=root_checkpointable.model._named_dense.bias, slot_name="m")))
+    self.assertAllEqual(3.,
+                        self.evaluate(root_checkpointable.optimizer.beta_1))
 
   def _write_name_based_checkpoint(self):
     checkpoint_directory = self.get_temp_dir()
@@ -1497,7 +1446,7 @@ class CheckpointCompatibilityTests(test.TestCase):
         name_saver = saver_lib.Saver()
         return name_saver.save(
             sess=session, save_path=checkpoint_prefix,
-            global_step=root.optimizer_step)
+            global_step=root.optimizer.iterations)
 
   @test_util.run_in_graph_and_eager_modes
   def testLoadFromNameBasedSaver(self):
diff --git a/tensorflow/python/training/checkpointable/util_with_v1_optimizers_test.py b/tensorflow/python/training/checkpointable/util_with_v1_optimizers_test.py
new file mode 100644
index 0000000000..00d5747f78
--- /dev/null
+++ b/tensorflow/python/training/checkpointable/util_with_v1_optimizers_test.py
@@ -0,0 +1,873 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object-based saving which use tf.train.* optimizers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import os
+
+import six
+
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.eager import backprop
+from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.eager import test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.layers import core
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import template
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.training import adam
+from tensorflow.python.training import checkpoint_management
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training import training_util
+from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.checkpointable import util as checkpointable_utils
+
+
+class NonLayerCheckpointable(tracking.Checkpointable):
+
+  def __init__(self):
+    super(NonLayerCheckpointable, self).__init__()
+    self.a_variable = checkpointable_utils.add_variable(
+        self, name="a_variable", shape=[])
+
+
+# pylint: disable=not-callable
+class MyModel(training.Model):
+  """A concrete Model for testing."""
+
+  def __init__(self):
+    super(MyModel, self).__init__()
+    self._named_dense = core.Dense(1, use_bias=True)
+    self._second = core.Dense(1, use_bias=False)
+    # We can still track Checkpointables which aren't Layers.
+    self._non_layer = NonLayerCheckpointable()
+
+  def call(self, values):
+    ret = self._second(self._named_dense(values))
+    return ret
+
+
+class CheckpointingTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testNamingWithOptimizer(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    # A nuisance Model using the same optimizer. Its slot variables should not
+    # go in the checkpoint, since it is never depended on.
+    other_model = MyModel()
+    optimizer = adam.AdamOptimizer(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_checkpointable = checkpointable_utils.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value),
+          global_step=optimizer_step)
+      optimizer.minimize(
+          lambda: other_model(input_value),
+          global_step=optimizer_step)
+    else:
+      train_op = optimizer.minimize(
+          model(input_value), global_step=optimizer_step)
+      optimizer.minimize(
+          other_model(input_value),
+          global_step=optimizer_step)
+      self.evaluate(checkpointable_utils.gather_initializers(
+          root_checkpointable))
+      self.evaluate(train_op)
+    named_variables, serialized_graph, _ = (
+        checkpointable_utils._serialize_object_graph(
+            root_checkpointable, saveables_cache=None))
+    expected_checkpoint_names = (
+        # Created in the root node, so no prefix.
+        "optimizer_step",
+        "model/_second/kernel",
+        "model/_named_dense/kernel",
+        "model/_named_dense/bias",
+        # non-Layer dependency of the model
+        "model/_non_layer/a_variable",
+        # The optimizer creates two non-slot variables
+        "optimizer/beta1_power",
+        "optimizer/beta2_power",
+        # Slot variables
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m",
+        "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v",
+    )
+    suffix = "/.ATTRIBUTES/VARIABLE_VALUE"
+    expected_checkpoint_names = [
+        name + suffix for name in expected_checkpoint_names]
+    # The Dense layers also save get_config() JSON
+    expected_checkpoint_names.extend(
+        ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON",
+         "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"])
+    named_variables = {v.name: v for v in named_variables}
+    six.assertCountEqual(self, expected_checkpoint_names,
+                         named_variables.keys())
+    # Check that we've mapped to the right variable objects (not exhaustive)
+    self.assertEqual(
+        "global_step",
+        named_variables["optimizer_step" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense_1/kernel",
+        named_variables["model/_second/kernel" + suffix].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        named_variables["model/_named_dense/kernel" + suffix].full_name)
+    self.assertEqual(
+        "beta1_power",
+        named_variables["optimizer/beta1_power" + suffix].full_name)
+    self.assertEqual(
+        "beta2_power",
+        named_variables["optimizer/beta2_power" + suffix].full_name)
+    # Spot check the generated protocol buffers.
+    self.assertEqual("optimizer",
+                     serialized_graph.nodes[0].children[1].local_name)
+    optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[
+        1].node_id]
+    self.assertEqual("beta1_power",
+                     optimizer_node.children[0].local_name)
+    self.assertEqual("beta1_power",
+                     serialized_graph.nodes[optimizer_node.children[0].node_id]
+                     .attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .original_variable_node_id]
+        .attributes[0].full_name)
+    # We strip off the :0 suffix, as variable.name-based saving does.
+    self.assertEqual(
+        "my_model/dense/kernel/Adam",
+        serialized_graph.nodes[optimizer_node.slot_variables[0]
+                               .slot_variable_node_id]
+        .attributes[0].full_name)
+    self.assertEqual(
+        "my_model/dense/kernel/Adam:0",
+        optimizer.get_slot(
+            var=model._named_dense.kernel,
+            name="m").name)
+    self.assertEqual(
+        "model/_named_dense/kernel" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .original_variable_node_id].attributes[0].checkpoint_key)
+    self.assertEqual("m", optimizer_node.slot_variables[0].slot_name)
+    self.assertEqual(
+        "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix,
+        serialized_graph.nodes[
+            optimizer_node.slot_variables[0]
+            .slot_variable_node_id].attributes[0].checkpoint_key)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testSaveRestore(self):
+    model = MyModel()
+    optimizer = adam.AdamOptimizer(0.001)
+    root_checkpointable = checkpointable_utils.Checkpoint(
+        optimizer=optimizer, model=model)
+    input_value = constant_op.constant([[3.]])
+    if context.executing_eagerly():
+      optimizer.minimize(
+          lambda: model(input_value))
+    else:
+      train_op = optimizer.minimize(model(input_value))
+      # TODO(allenl): Make initialization more pleasant when graph building.
+      root_checkpointable.save_counter  # pylint: disable=pointless-statement
+      self.evaluate(checkpointable_utils.gather_initializers(
+          root_checkpointable))
+      self.evaluate(train_op)
+    prefix = os.path.join(self.get_temp_dir(), "ckpt")
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.]))
+    m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m")
+    self.evaluate(state_ops.assign(m_bias_slot, [1.5]))
+    save_path = root_checkpointable.save(file_prefix=prefix)
+    self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.]))
+    self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3))
+    optimizer_variables = self.evaluate(optimizer.variables())
+    self.evaluate(state_ops.assign(m_bias_slot, [-2.]))
+    # Immediate restoration
+    status = root_checkpointable.restore(save_path=save_path).assert_consumed()
+    status.run_restore_ops()
+    self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1]))
+    self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter))
+    self.assertAllEqual([1.5], self.evaluate(m_bias_slot))
+    if not context.executing_eagerly():
+      return  # Restore-on-create is only supported when executing eagerly
+    on_create_model = MyModel()
+    on_create_optimizer = adam.AdamOptimizer(
+        0.001,
+        # Preserve beta1_power and beta2_power when appying gradients so we can
+        # test that they've been restored correctly.
+        beta1=1.0, beta2=1.0)
+    on_create_root = checkpointable_utils.Checkpoint(
+        optimizer=on_create_optimizer, model=on_create_model)
+    # Deferred restoration
+    status = on_create_root.restore(save_path=save_path)
+    status.assert_nontrivial_match()
+    status.assert_existing_objects_matched()
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    on_create_model(constant_op.constant([[3.]]))  # create variables
+    self.assertAllEqual(1, self.evaluate(on_create_root.save_counter))
+    self.assertAllEqual([42.],
+                        self.evaluate(
+                            on_create_model._named_dense.variables[1]))
+    on_create_m_bias_slot = on_create_optimizer.get_slot(
+        on_create_model._named_dense.variables[1], "m")
+    status.assert_existing_objects_matched()
+    with self.assertRaises(AssertionError):
+      status.assert_consumed()
+    # Optimizer slot variables are created when the original variable is
+    # restored.
+    self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
+    self.assertAllEqual(optimizer_variables[2:],
+                        self.evaluate(on_create_optimizer.variables()))
+    dummy_var = resource_variable_ops.ResourceVariable([1.])
+    on_create_optimizer.minimize(loss=dummy_var.read_value)
+    status.assert_existing_objects_matched()
+    status.assert_consumed()
+    beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators()
+    self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power))
+    self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power))
+
+  # TODO(allenl): Debug garbage created by this test in python3.
+  def testDeferredRestorationUsageEager(self):
+    """An idiomatic eager execution example."""
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001)
+      root = checkpointable_utils.Checkpoint(
+          optimizer=optimizer, model=model,
+          optimizer_step=training_util.get_or_create_global_step())
+      root.restore(checkpoint_management.latest_checkpoint(
+          checkpoint_directory))
+      for _ in range(num_training_steps):
+        # TODO(allenl): Use a Dataset and serialize/checkpoint it.
+        input_value = constant_op.constant([[3.]])
+        optimizer.minimize(
+            lambda: model(input_value),  # pylint: disable=cell-var-from-loop
+            global_step=root.optimizer_step)
+      root.save(file_prefix=checkpoint_prefix)
+      self.assertEqual((training_continuation + 1) * num_training_steps,
+                       root.optimizer_step.numpy())
+
+  def testUsageGraph(self):
+    """Expected usage when graph building."""
+    with context.graph_mode():
+      num_training_steps = 10
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      for training_continuation in range(3):
+        with ops.Graph().as_default():
+          model = MyModel()
+          optimizer = adam.AdamOptimizer(0.001)
+          root = checkpointable_utils.Checkpoint(
+              optimizer=optimizer, model=model,
+              global_step=training_util.get_or_create_global_step())
+          input_value = constant_op.constant([[3.]])
+          train_op = optimizer.minimize(
+              model(input_value),
+              global_step=root.global_step)
+          checkpoint_path = checkpoint_management.latest_checkpoint(
+              checkpoint_directory)
+          with self.session(graph=ops.get_default_graph()) as session:
+            status = root.restore(save_path=checkpoint_path)
+            status.initialize_or_restore(session=session)
+            if checkpoint_path is None:
+              self.assertEqual(0, training_continuation)
+              with self.assertRaises(AssertionError):
+                status.assert_consumed()
+              with self.assertRaises(AssertionError):
+                status.assert_existing_objects_matched()
+            else:
+              status.assert_consumed()
+              status.assert_existing_objects_matched()
+            for _ in range(num_training_steps):
+              session.run(train_op)
+            root.save(file_prefix=checkpoint_prefix, session=session)
+            self.assertEqual((training_continuation + 1) * num_training_steps,
+                             session.run(root.global_step))
+            self.assertEqual(training_continuation + 1,
+                             session.run(root.save_counter))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testAgnosticUsage(self):
+    """Graph/eager agnostic usage."""
+    # Does create garbage when executing eagerly due to ops.Graph() creation.
+    num_training_steps = 10
+    checkpoint_directory = self.get_temp_dir()
+    for training_continuation in range(3):
+      with test_util.device(use_gpu=True):
+        model = MyModel()
+        optimizer = adam.AdamOptimizer(0.001)
+        root = checkpointable_utils.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        manager = checkpoint_management.CheckpointManager(
+            root, checkpoint_directory, max_to_keep=1)
+        status = root.restore(save_path=manager.latest_checkpoint)
+        input_value = constant_op.constant([[3.]])
+        train_fn = functools.partial(
+            optimizer.minimize,
+            functools.partial(model, input_value),
+            global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        manager.save()
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+
+  # pylint: disable=cell-var-from-loop
+  @test_util.run_in_graph_and_eager_modes
+  def testWithDefun(self):
+    num_training_steps = 2
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    for training_continuation in range(3):
+      with test_util.device(use_gpu=True):
+        model = MyModel()
+        # Don't actually train so we can test variable values
+        optimizer = adam.AdamOptimizer(0.)
+        root = checkpointable_utils.Checkpoint(
+            optimizer=optimizer, model=model,
+            global_step=training_util.get_or_create_global_step())
+        checkpoint_path = checkpoint_management.latest_checkpoint(
+            checkpoint_directory)
+        status = root.restore(save_path=checkpoint_path)
+        def train_fn():
+          @def_function.function
+          def _call_model(x):
+            return model(x)
+          with backprop.GradientTape() as tape:
+            loss = _call_model(constant_op.constant([[3.]]))
+          gradients = tape.gradient(loss, model.variables)
+          return optimizer.apply_gradients(zip(gradients, model.variables),
+                                           global_step=root.global_step)
+        if not context.executing_eagerly():
+          train_fn = functools.partial(
+              self.evaluate, train_fn())
+        status.initialize_or_restore()
+        for _ in range(num_training_steps):
+          train_fn()
+        if training_continuation > 0:
+          status.assert_consumed()
+          self.assertAllClose([[42.]], self.evaluate(model.variables[0]))
+        else:
+          self.evaluate(model.variables[0].assign([[42.]]))
+        root.save(file_prefix=checkpoint_prefix)
+        self.assertEqual((training_continuation + 1) * num_training_steps,
+                         self.evaluate(root.global_step))
+        self.assertEqual(training_continuation + 1,
+                         self.evaluate(root.save_counter))
+  # pylint: enable=cell-var-from-loop
+
+  def _get_checkpoint_name(self, name):
+    root = tracking.Checkpointable()
+    checkpointable_utils.add_variable(
+        root, name=name, shape=[1, 2], dtype=dtypes.float64)
+    (named_variable,), _, _ = checkpointable_utils._serialize_object_graph(
+        root, saveables_cache=None)
+    with ops.name_scope("root/" + named_variable.name):
+      pass  # Make sure we can use this as an op name if we prefix it.
+    return named_variable.name
+
+  def testAnonymousVarsInInit(self):
+
+    class Model(training.Model):
+
+      def __init__(self):
+        super(Model, self).__init__()
+        self.w = resource_variable_ops.ResourceVariable(0.0)
+        self.b = resource_variable_ops.ResourceVariable(0.0)
+        self.vars = [self.w, self.b]
+
+      def call(self, x):
+        return x * self.w + self.b
+
+    with context.eager_mode():
+      model = Model()
+      optimizer = adam.AdamOptimizer(learning_rate=0.05)
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      checkpoint = checkpointable_utils.Checkpoint(
+          model=model, optimizer=optimizer)
+      for _ in range(2):
+        checkpoint.save(checkpoint_prefix)
+        with backprop.GradientTape() as tape:
+          loss = (constant_op.constant(1.)
+                  - model(constant_op.constant(1.))) ** 2
+        grad = tape.gradient(loss, model.vars)
+        optimizer.apply_gradients(
+            [(g, v) for g, v in zip(grad, model.vars)])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDeferredSlotRestoration(self):
+    checkpoint_directory = self.get_temp_dir()
+
+    root = tracking.Checkpointable()
+    root.var = checkpointable_utils.add_variable(
+        root, name="var", initializer=0.)
+    optimizer = adam.AdamOptimizer(0.1)
+    if context.executing_eagerly():
+      optimizer.minimize(root.var.read_value)
+    else:
+      train_op = optimizer.minimize(root.var)
+      # Note that `optimizer` has not been added as a dependency of
+      # `root`. Create a one-off grouping so that slot variables for `root.var`
+      # get initialized too.
+      self.evaluate(checkpointable_utils.gather_initializers(
+          checkpointable_utils.Checkpoint(root=root, optimizer=optimizer)))
+      self.evaluate(train_op)
+    self.evaluate(state_ops.assign(root.var, 12.))
+    no_slots_path = checkpointable_utils.CheckpointableSaver(root).save(
+        os.path.join(checkpoint_directory, "no_slots"))
+    root.optimizer = optimizer
+    self.evaluate(state_ops.assign(root.var, 13.))
+    self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var),
+                                   14.))
+    slots_path = checkpointable_utils.CheckpointableSaver(root).save(
+        os.path.join(checkpoint_directory, "with_slots"))
+    new_root = tracking.Checkpointable()
+    # Load the slot-containing checkpoint (deferred), then immediately overwrite
+    # the non-slot variable (also deferred).
+    slot_status = checkpointable_utils.CheckpointableSaver(
+        new_root).restore(slots_path)
+    no_slot_status = checkpointable_utils.CheckpointableSaver(
+        new_root).restore(no_slots_path)
+    with self.assertRaises(AssertionError):
+      no_slot_status.assert_consumed()
+    new_root.var = checkpointable_utils.add_variable(
+        new_root, name="var", shape=[])
+    no_slot_status.assert_consumed()
+    no_slot_status.run_restore_ops()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    new_root.optimizer = adam.AdamOptimizer(0.1)
+    slot_status.assert_existing_objects_matched()
+    with self.assertRaisesRegexp(AssertionError, "beta1_power"):
+      slot_status.assert_consumed()
+    self.assertEqual(12., self.evaluate(new_root.var))
+    if context.executing_eagerly():
+      # Slot variables are only created with restoring initializers when
+      # executing eagerly.
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+    else:
+      self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var),
+                    None)
+    if context.executing_eagerly():
+      new_root.optimizer.minimize(new_root.var.read_value)
+    else:
+      train_op = new_root.optimizer.minimize(new_root.var)
+      # The slot variable now exists; restore() didn't create it, but we should
+      # now have a restore op for it.
+      slot_status.run_restore_ops()
+      self.assertEqual(14., self.evaluate(
+          new_root.optimizer.get_slot(name="m", var=new_root.var)))
+      self.evaluate(train_op)
+    slot_status.assert_consumed()
+
+  def testManySavesGraph(self):
+    """Saves after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = tracking.Checkpointable()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.AdamOptimizer(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(checkpointable_utils.gather_initializers(obj))
+        saver = checkpointable_utils.CheckpointableSaver(obj)
+        saver.save(checkpoint_prefix)
+        before_ops = graph.get_operations()
+        saver.save(checkpoint_prefix)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testManyRestoresGraph(self):
+    """Restores after the first should not modify the graph."""
+    with context.graph_mode():
+      graph = ops.Graph()
+      with graph.as_default(), self.session(graph):
+        checkpoint_directory = self.get_temp_dir()
+        checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+        obj = tracking.Checkpointable()
+        obj.var = variable_scope.get_variable(name="v", initializer=0.)
+        obj.opt = adam.AdamOptimizer(0.1)
+        obj.opt.minimize(obj.var.read_value())
+        self.evaluate(checkpointable_utils.gather_initializers(obj))
+        saver = checkpointable_utils.CheckpointableSaver(obj)
+        save_path = saver.save(checkpoint_prefix)
+        saver.restore(save_path)
+        before_ops = graph.get_operations()
+        saver.restore(save_path)
+        self.assertEqual(before_ops, graph.get_operations())
+
+  def testMultipleGraphsNonSlotVariables(self):
+    with context.graph_mode():
+      checkpoint_directory = self.get_temp_dir()
+      checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+      optimizer = adam.AdamOptimizer(0.001)
+      # Construct a model in one graph
+      first_graph = ops.Graph()
+      first_session = session_lib.Session(graph=first_graph)
+      with first_graph.as_default(), first_session.as_default():
+        first_variable = resource_variable_ops.ResourceVariable([1.])
+        first_root_checkpointable = checkpointable_utils.Checkpoint(
+            optimizer=optimizer, variable=first_variable)
+        train_op = optimizer.minimize(first_variable.read_value)
+        self.evaluate(checkpointable_utils.gather_initializers(
+            first_root_checkpointable))
+        self.evaluate(train_op)
+        self.evaluate(first_variable.assign([1.]))
+        self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m").assign([2.]))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta1_power.assign(3.))
+
+      # Save and load in a second graph
+      second_graph = ops.Graph()
+      with second_graph.as_default(), session_lib.Session(graph=second_graph):
+        second_variable = resource_variable_ops.ResourceVariable([1.])
+        second_root_checkpointable = checkpointable_utils.Checkpoint(
+            optimizer=optimizer, variable=second_variable)
+        train_op = optimizer.minimize(second_variable.read_value)
+        second_root_checkpointable.restore(None).initialize_or_restore()
+        self.evaluate(train_op)
+        self.evaluate(second_variable.assign([4.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([5.]))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.evaluate(beta1_power.assign(6.))
+        save_path = second_root_checkpointable.save(checkpoint_prefix)
+        self.evaluate(second_variable.assign([7.]))
+        self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m").assign([8.]))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta1_power))
+        status = second_root_checkpointable.restore(save_path)
+        status.assert_consumed().run_restore_ops()
+        self.assertAllEqual([4.], self.evaluate(second_variable))
+        self.assertAllEqual([5.], self.evaluate(optimizer.get_slot(
+            var=second_variable, name="m")))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(6., self.evaluate(beta1_power))
+
+      # Check that the first graph is unmolested
+      with first_graph.as_default(), first_session.as_default():
+        self.assertAllEqual([1.], self.evaluate(first_variable))
+        self.assertAllEqual([2.], self.evaluate(optimizer.get_slot(
+            var=first_variable, name="m")))
+        beta1_power, _ = optimizer._get_beta_accumulators()
+        self.assertAllEqual(3., self.evaluate(beta1_power))
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_initialize_if_not_restoring(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    optimizer_only_prefix = os.path.join(checkpoint_directory, "opt")
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001)
+      root = checkpointable_utils.Checkpoint(
+          model=model,  # Do not save the optimizer with the checkpoint.
+          global_step=training_util.get_or_create_global_step())
+      optimizer_checkpoint = checkpointable_utils.Checkpoint(
+          optimizer=optimizer)
+
+      checkpoint_path = checkpoint_management.latest_checkpoint(
+          checkpoint_directory)
+      status = root.restore(save_path=checkpoint_path)
+      input_value = constant_op.constant([[3.]])
+      train_fn = functools.partial(
+          optimizer.minimize,
+          functools.partial(model, input_value),
+          global_step=root.global_step)
+      if not context.executing_eagerly():
+        train_fn = functools.partial(self.evaluate, train_fn())
+      status.initialize_or_restore()
+      self.evaluate([v.initializer for v in optimizer.variables()])
+      train_fn()
+      model_save_path = root.save(file_prefix=checkpoint_prefix)
+      self.evaluate(optimizer.variables()[0].assign(42.))
+      optimizer_save_path = optimizer_checkpoint.save(optimizer_only_prefix)
+
+    # Restore into a graph with the optimizer
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001)
+      root = checkpointable_utils.Checkpoint(
+          optimizer=optimizer, model=model,
+          global_step=training_util.get_or_create_global_step())
+      status = root.restore(save_path=model_save_path)
+      input_value = constant_op.constant([[3.]])
+      train_fn = functools.partial(
+          optimizer.minimize,
+          functools.partial(model, input_value),
+          global_step=root.global_step)
+      if not context.executing_eagerly():
+        train_fn = functools.partial(self.evaluate, train_fn())
+      status.initialize_or_restore()
+      train_fn()
+      with self.assertRaises(AssertionError):
+        status.assert_existing_objects_matched()
+      with self.assertRaises(AssertionError):
+        status.assert_consumed()
+
+    # Make sure initialization doesn't clobber later restores
+    with test_util.device(use_gpu=True):
+      model = MyModel()
+      optimizer = adam.AdamOptimizer(0.001, beta1=1.0)
+      root = checkpointable_utils.Checkpoint(
+          optimizer=optimizer, model=model,
+          global_step=training_util.get_or_create_global_step())
+      opt_root = checkpointable_utils.Checkpoint(
+          optimizer=optimizer)
+      status = root.restore(save_path=model_save_path)
+      init_only_optimizer_status = opt_root.restore(save_path=None)
+      optimizer_status = opt_root.restore(save_path=optimizer_save_path)
+      input_value = constant_op.constant([[3.]])
+      train_fn = functools.partial(
+          optimizer.minimize,
+          functools.partial(model, input_value),
+          global_step=root.global_step)
+      if not context.executing_eagerly():
+        train_fn = functools.partial(self.evaluate, train_fn())
+      optimizer_status.run_restore_ops()
+      status.initialize_or_restore()
+      init_only_optimizer_status.initialize_or_restore()
+      train_fn()
+      self.assertEqual(42., self.evaluate(optimizer.variables()[0]))
+
+
+class _ManualScope(tracking.Checkpointable):
+
+  def __call__(self):
+    with variable_scope.variable_scope("ManualScope") as vs:
+      self.variable_scope = vs
+      with checkpointable_utils.capture_dependencies(template=self):
+        return self._build()
+
+  def _build(self):
+    return variable_scope.get_variable(name="in_manual_scope", shape=[])
+
+
+class TemplateTests(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_checkpointable_save_restore(self):
+
+    def _templated():
+      v = variable_scope.get_variable(
+          "v", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      v2 = variable_scope.get_variable(
+          "v2", shape=[1], initializer=init_ops.zeros_initializer(),
+          use_resource=True)
+      manual = _ManualScope()
+      return v, v + 1., v2, manual, manual()
+
+    save_template = template.make_template("s1", _templated)
+    v1_save, _, v2_save, manual_scope, manual_scope_v = save_template()
+    six.assertCountEqual(
+        self,
+        [v1_save, v2_save, manual_scope, manual_scope_v, save_template],
+        checkpointable_utils.list_objects(save_template))
+    manual_dep, = manual_scope._checkpoint_dependencies
+    self.assertEqual("in_manual_scope", manual_dep.name)
+    self.assertIs(manual_scope_v, manual_dep.ref)
+    optimizer = adam.AdamOptimizer(0.0)
+    save_root = checkpointable_utils.Checkpoint(
+        my_template=save_template, optimizer=optimizer)
+    optimizer.minimize(v1_save.read_value)
+    self.evaluate([v.initializer for v in save_template.variables])
+    self.evaluate([v.initializer for v in optimizer.variables()])
+    self.evaluate(v1_save.assign([12.]))
+    self.evaluate(v2_save.assign([14.]))
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    save_path = save_root.save(checkpoint_prefix)
+
+    load_template = template.make_template("s2", _templated)
+    load_optimizer = adam.AdamOptimizer(0.0)
+    load_root = checkpointable_utils.Checkpoint(
+        my_template=load_template, optimizer=load_optimizer)
+    status = load_root.restore(save_path)
+    var, var_plus_one, var2, _, _ = load_template()
+    load_optimizer.minimize(var.read_value)
+    self.assertEqual(3, len(load_template._checkpoint_dependencies))
+    self.assertEqual("v", load_template._checkpoint_dependencies[0].name)
+    self.assertEqual("v2", load_template._checkpoint_dependencies[1].name)
+    self.assertEqual("ManualScope",
+                     load_template._checkpoint_dependencies[2].name)
+    status.assert_consumed().run_restore_ops()
+    self.assertAllEqual([12.], self.evaluate(var))
+    self.assertAllEqual([13.], self.evaluate(var_plus_one))
+    self.assertAllEqual([14.], self.evaluate(var2))
+
+
+class CheckpointCompatibilityTests(test.TestCase):
+
+  def _initialized_model(self):
+    input_value = constant_op.constant([[3.]])
+    model = MyModel()
+    optimizer = adam.AdamOptimizer(0.001)
+    optimizer_step = training_util.get_or_create_global_step()
+    root_checkpointable = checkpointable_utils.Checkpoint(
+        optimizer=optimizer, model=model, optimizer_step=optimizer_step)
+    train_op = optimizer.minimize(
+        functools.partial(model, input_value),
+        global_step=optimizer_step)
+    self.evaluate(checkpointable_utils.gather_initializers(
+        root_checkpointable))
+    self.evaluate(train_op)
+    # A regular variable, a slot variable, and a non-slot Optimizer variable
+    # with known values to check when loading.
+    self.evaluate(model._named_dense.bias.assign([1.]))
+    self.evaluate(optimizer.get_slot(
+        var=model._named_dense.bias, name="m").assign([2.]))
+    beta1_power, _ = optimizer._get_beta_accumulators()
+    self.evaluate(beta1_power.assign(3.))
+    return root_checkpointable
+
+  def _set_sentinels(self, root_checkpointable):
+    self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.]))
+    self.evaluate(
+        root_checkpointable.optimizer.get_slot(
+            var=root_checkpointable.model._named_dense.bias, name="m")
+        .assign([102.]))
+    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.evaluate(beta1_power.assign(103.))
+
+  def _check_sentinels(self, root_checkpointable):
+    self.assertAllEqual(
+        [1.], self.evaluate(root_checkpointable.model._named_dense.bias))
+    self.assertAllEqual([2.], self.evaluate(
+        root_checkpointable.optimizer.get_slot(
+            var=root_checkpointable.model._named_dense.bias, name="m")))
+    beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators()
+    self.assertAllEqual(3., self.evaluate(beta1_power))
+
+  def _write_name_based_checkpoint(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        name_saver = saver_lib.Saver()
+        return name_saver.save(
+            sess=session, save_path=checkpoint_prefix,
+            global_step=root.optimizer_step)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testLoadFromNameBasedSaver(self):
+    """Save a name-based checkpoint, load it using the object-based API."""
+    with test_util.device(use_gpu=True):
+      save_path = self._write_name_based_checkpoint()
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      with self.assertRaises(AssertionError):
+        self._check_sentinels(root)
+      object_saver = checkpointable_utils.CheckpointableSaver(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      if context.executing_eagerly():
+        self._check_sentinels(root)
+      if context.executing_eagerly():
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_consumed()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "OBJECT_CONFIG_JSON"):
+          status.assert_nontrivial_match()
+      else:
+        # When graph building, we haven't read any keys, so we don't know
+        # whether the restore will be complete.
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_consumed()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_existing_objects_matched()
+        with self.assertRaisesRegexp(AssertionError, "not restored"):
+          status.assert_nontrivial_match()
+      status.run_restore_ops()
+      self._check_sentinels(root)
+      self._set_sentinels(root)
+      status = object_saver.restore(save_path)
+      status.initialize_or_restore()
+      self._check_sentinels(root)
+      # Check that there is no error when keys are missing from the name-based
+      # checkpoint.
+      root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable([1.])
+      status = object_saver.restore(save_path)
+      with self.assertRaises(AssertionError):
+        status.assert_existing_objects_matched()
+
+  def testSaveGraphLoadEager(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph) as session:
+        root = self._initialized_model()
+        save_path = root.save(session=session, file_prefix=checkpoint_prefix)
+    with context.eager_mode():
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      root.restore(save_path).assert_consumed()
+      self._check_sentinels(root)
+
+  def testSaveEagerLoadGraph(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt")
+    with context.eager_mode():
+      root = self._initialized_model()
+      save_path = root.save(file_prefix=checkpoint_prefix)
+    with context.graph_mode():
+      save_graph = ops.Graph()
+      with save_graph.as_default(), self.session(
+          graph=save_graph):
+        root = self._initialized_model()
+        self._set_sentinels(root)
+        root.restore(save_path).assert_consumed().run_restore_ops()
+        self._check_sentinels(root)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
index 00cd5aca4c..5426269793 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
index 6d47fe310d..c39fe6ba4f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
index 417362d211..05d46d380b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
index 7b43abee23..78829def67 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
@@ -45,6 +45,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
index a996746dac..58b7f27491 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
@@ -43,6 +43,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
index bfc9d67a47..8de796edde 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
index 3f3d57962b..393eeb3d6c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
index 00cd5aca4c..5426269793 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adadelta.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
index 6d47fe310d..c39fe6ba4f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adagrad.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
index 417362d211..05d46d380b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adam.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
index 7b43abee23..78829def67 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-adamax.pbtxt
@@ -45,6 +45,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
index a996746dac..58b7f27491 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-optimizer.pbtxt
@@ -43,6 +43,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
index bfc9d67a47..8de796edde 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
index 3f3d57962b..393eeb3d6c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.optimizers.-s-g-d.pbtxt
@@ -44,6 +44,10 @@ tf_class {
     name: "get_slot"
     argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
   }
+  member_method {
+    name: "get_slot_names"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
-- 
GitLab


From 686ba58692349a8f414d9a48ec1ee6ea296a9a6e Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Wed, 12 Dec 2018 14:01:31 -0800
Subject: [PATCH 440/873] Add int8 support in AddTensor.

PiperOrigin-RevId: 225249344
---
 tensorflow/lite/kernels/test_util.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h
index dadabb86ab..f5c67c3e9c 100644
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@@ -307,10 +307,12 @@ class SingleOpModel {
 
     if (is_quantized) {
       if (t.min != 0 || t.max != 0) {
-        // TODO(b/119422369): Handle signed int8 here.
         if (t.type == TensorType_UINT8) {
           std::tie(t.scale, t.zero_point) =
               QuantizationParams<uint8_t>(t.min, t.max);
+        } else if (t.type == TensorType_INT8) {
+          std::tie(t.scale, t.zero_point) =
+              QuantizationParams<int8_t>(t.min, t.max);
         } else if (t.type == TensorType_INT32) {
           std::tie(t.scale, t.zero_point) =
               QuantizationParams<int32_t>(t.min, t.max);
-- 
GitLab


From 6603c69fa71d6ebdee717863079ca34308c9ddb1 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 12 Dec 2018 14:28:32 -0800
Subject: [PATCH 441/873] Switch tf.saved_model.save back to experimental in
 1.x since it doesn't work when graph building

Adds some explanation of this in the docstring and some better exceptions. Having it non-experimental would be pretty confusing, since most users would try it without enable_eager_execution() and run into strange errors which we don't plan to fix.

PiperOrigin-RevId: 225254705
---
 tensorflow/python/saved_model/save.py         | 21 +++++++++++++++++--
 tensorflow/python/saved_model/save_test.py    | 15 +++++++++++++
 .../golden/v1/tensorflow.saved_model.pbtxt    |  4 ----
 .../tools/compatibility/tf_upgrade_v2_test.py |  9 +++++++-
 4 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index b065a5a265..84d7b614d0 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -533,8 +533,7 @@ def _write_object_proto(obj, proto, asset_file_def_index):
     proto.user_object.SetInParent()
 
 
-@tf_export("saved_model.save",
-           v1=["saved_model.save", "saved_model.experimental.save"])
+@tf_export("saved_model.save", v1=["saved_model.experimental.save"])
 def save(obj, export_dir, signatures=None):
   # pylint: disable=line-too-long
   """Exports the Checkpointable object `obj` to [SavedModel format](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md).
@@ -681,7 +680,25 @@ def save(obj, export_dir, signatures=None):
 
   Raises:
     ValueError: If `obj` is not checkpointable.
+
+  @compatibility(eager)
+  Not supported when graph building. From TensorFlow 1.x,
+  `tf.enable_eager_execution()` must run first. May not be called from within a
+  function body.
+  @end_compatibility
   """
+  if not context.executing_eagerly():
+    with ops.init_scope():
+      if context.executing_eagerly():
+        raise AssertionError(
+            "tf.saved_model.save is not supported inside a traced "
+            "@tf.function. Move the call to the outer eagerly-executed "
+            "context.")
+      else:
+        raise AssertionError(
+            "tf.saved_model.save is not supported when graph building. "
+            "tf.enable_eager_execution() must run first when calling it from "
+            "TensorFlow 1.x.")
   # pylint: enable=line-too-long
   if not isinstance(obj, base.CheckpointableBase):
     raise ValueError(
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index 1c6eb1b538..5381c2f031 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -334,6 +334,21 @@ class AssetTests(test.TestCase):
         {"output_0": [0.2]},
         _import_and_infer(export_dir, {"x": [0.1]}))
 
+  def test_sensible_graph_building_exception(self):
+    root = util.Checkpoint(v=variables.Variable(2.))
+    root.f = def_function.function(
+        lambda x: 2. * root.v,
+        input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
+    export_dir = os.path.join(self.get_temp_dir(), "save_dir")
+    @def_function.function
+    def _calls_save():
+      save.save(root, export_dir)
+    with self.assertRaisesRegexp(AssertionError, "tf.function"):
+      _calls_save()
+    with ops.Graph().as_default():
+      with self.assertRaisesRegexp(AssertionError, "enable_eager_execution"):
+        save.save(root, export_dir)
+
 
 class MemoryTests(test.TestCase):
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
index 2a7c789105..3929003fa1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.saved_model.pbtxt
@@ -180,10 +180,6 @@ tf_module {
     name: "regression_signature_def"
     argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "save"
-    argspec: "args=[\'obj\', \'export_dir\', \'signatures\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
   member_method {
     name: "simple_save"
     argspec: "args=[\'session\', \'export_dir\', \'inputs\', \'outputs\', \'legacy_init_op\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index 484900d000..d5428e7536 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -159,6 +159,11 @@ class TestUpgrade(test_util.TensorFlowTestCase):
     collect = True
     v1_symbols = set([])
 
+    # Symbols which may be generated by the conversion script which do not exist
+    # in TF 1.x. This should be a very short list of symbols which are
+    # experimental in 1.x but stable for 2.x.
+    whitelisted_v2_only_symbols = set(["tf.saved_model.save"])
+
     # Converts all symbols in the v1 namespace to the v2 namespace, raising
     # an error if the target of the conversion is not in the v1 namespace.
     def conversion_visitor(unused_path, unused_parent, children):
@@ -173,7 +178,8 @@ class TestUpgrade(test_util.TensorFlowTestCase):
             if (text and
                 not text.startswith("tf.compat.v1") and
                 not text.startswith("tf.estimator") and
-                text not in v1_symbols):
+                text not in v1_symbols and
+                text not in whitelisted_v2_only_symbols):
               self.assertFalse(
                   True, "Symbol %s generated from %s not in v1 API" % (
                       text, name))
@@ -737,3 +743,4 @@ class TestUpgradeFiles(test_util.TensorFlowTestCase):
 
 if __name__ == "__main__":
   test_lib.main()
+
-- 
GitLab


From 31c0bed4cb194153151e6164938eb83252e6ef72 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 14:33:34 -0800
Subject: [PATCH 442/873] Re-enable training_test

PiperOrigin-RevId: 225255718
---
 tensorflow/python/keras/BUILD                   | 7 +------
 tensorflow/python/keras/engine/training_test.py | 6 ++++--
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 3c390cb2b0..361d88fe83 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -764,12 +764,7 @@ py_test(
     srcs = ["engine/training_test.py"],
     shard_count = 16,
     srcs_version = "PY2AND3",
-    tags = [
-        "manual",  # TODO(b/120560388)
-        "no_oss",  # TODO(b/120560388)
-        "notap",  # TODO(b/120560388)
-        "notsan",
-    ],
+    tags = ["notsan"],
     deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index a61e2edcd3..9d56eb261d 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -250,8 +250,10 @@ class TrainingTest(keras_parameterized.TestCase):
                   run_eagerly=testing_utils.should_run_eagerly())
     # This will work
     model.fit([input_a_np], output_d_np, epochs=1)
-    with self.assertRaises(ValueError):
-      model.fit([input_a_np, input_a_np], output_d_np, epochs=1)
+    # TODO(gsundeep) Test only works in eager, file ticket
+    if testing_utils.should_run_eagerly() and context.executing_eagerly():
+      with self.assertRaises(ValueError):
+        model.fit([input_a_np, input_a_np], output_d_np, epochs=1)
 
     # Test model on a list of floats
     input_a_np = np.random.random((10, 3))
-- 
GitLab


From 1b7e1c7c39e677dbd8a7d326666ba3e273faf46d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 14:35:48 -0800
Subject: [PATCH 443/873] Get rid of to_int64 deprecation warning in the logs.

PiperOrigin-RevId: 225256193
---
 tensorflow/python/ops/math_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index e2b634ee8f..e656998b70 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -1465,7 +1465,7 @@ def count_nonzero_v2(input,  # pylint: disable=redefined-builtin
     return cast(
         reduce_sum(
             # int64 reduction happens on GPU
-            to_int64(gen_math_ops.not_equal(input, zero)),
+            cast(gen_math_ops.not_equal(input, zero), dtypes.int64),
             axis=axis,
             keepdims=keepdims),
         dtype=dtype)
-- 
GitLab


From 22af085fee9bc9fca2efd695a9440200cc66e623 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 14:36:59 -0800
Subject: [PATCH 444/873] [XLA] add Iota and BroadcastedIota to local Python
 client

PiperOrigin-RevId: 225256432
---
 .../xla/python/local_computation_builder.cc   |  9 +++++++
 .../xla/python/local_computation_builder.h    |  4 +++
 .../xla/python/local_computation_builder.i    |  2 ++
 tensorflow/compiler/xla/python/xla_client.py  | 27 +++++++++++++++++++
 .../compiler/xla/python/xla_client_test.py    | 11 ++++++++
 5 files changed, 53 insertions(+)

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index 8e3ac381ce..5d191f5a18 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -647,6 +647,15 @@ LocalOp LocalComputationBuilder::ConstantLiteral(const Literal& literal) {
   return xla::ConstantLiteral(&builder_, literal);
 }
 
+LocalOp LocalComputationBuilder::Iota(PrimitiveType element_type, int64 size) {
+  return xla::Iota(&builder_, element_type, size);
+}
+
+LocalOp LocalComputationBuilder::BroadcastedIota(const Shape& shape,
+                                                 int64 dimension) {
+  return xla::Iota(&builder_, shape, dimension);
+}
+
 LocalOp LocalComputationBuilder::Broadcast(
     const LocalOp& operand, absl::Span<const int64> broadcast_sizes) {
   return xla::Broadcast(operand.op(), broadcast_sizes);
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index eebbe674e5..c6e58ac971 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -286,6 +286,10 @@ class LocalComputationBuilder {
 
   LocalOp ConstantLiteral(const Literal& literal);
 
+  LocalOp Iota(PrimitiveType element_type, int64 size);
+
+  LocalOp BroadcastedIota(const Shape& shape, int64 dimension);
+
   LocalOp Broadcast(const LocalOp& operand,
                     absl::Span<const int64> broadcast_sizes);
 
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
index db7e0458f4..11fb00e616 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.i
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -1051,6 +1051,8 @@ tensorflow::ImportNumpy();
 %unignore xla::swig::LocalComputationBuilder::Outfeed;
 %unignore xla::swig::LocalComputationBuilder::ConstantLiteral;
 %unignore xla::swig::LocalComputationBuilder::ConstantR0;
+%unignore xla::swig::LocalComputationBuilder::Iota;
+%unignore xla::swig::LocalComputationBuilder::BroadcastedIota;
 %unignore xla::swig::LocalComputationBuilder::Broadcast;
 %unignore xla::swig::LocalComputationBuilder::BroadcastInDim;
 %unignore xla::swig::LocalComputationBuilder::Pad;
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index cd85713d72..4166fa0327 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -831,6 +831,33 @@ class ComputationBuilder(object):
     return self.ParameterWithShape(
         Shape.from_pyval(value), name=name, parameter_num=parameter_num)
 
+  def Iota(self, dtype, size):
+    """Enqueues an iota constant onto the computation.
+
+    Args:
+      dtype: expected numpy dtype of the output.
+      size: integer, the number of elements in the array.
+
+    Returns:
+      A LocalOp representing the added iota constant.
+    """
+    element_type = DTYPE_TO_XLA_ELEMENT_TYPE[str(np.dtype(dtype))]
+    return self._client.Iota(element_type, size)
+
+  def BroadcastedIota(self, dtype, shape, dimension):
+    """Enqueues a broadcasted iota constant onto the computation.
+
+    Args:
+      dtype: expected numpy dtype of the output.
+      shape: tuple of integers, the expected output shape (dimensions).
+      dimension: positive integer, dimension along which to increment values.
+
+    Returns:
+      A LocalOp representing the added broadcasted iota constant.
+    """
+    xla_shape = Shape.array_shape(dtype, shape)
+    return self._client.BroadcastedIota(xla_shape, dimension)
+
   def Broadcast(self, operand, sizes):
     """Enqueues a broadcast operation onto the computation.
 
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index a4c615846e..95c6dc8c45 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -146,6 +146,17 @@ class ComputationsWithConstantsTest(LocalComputationTest):
     c.Pow(c.Constant(NumpyArrayF64([1.5, 2.5, 3.0])), c.ConstantF64Scalar(2.))
     self._ExecuteAndCompareClose(c, expected=[2.25, 6.25, 9.])
 
+  def testIota(self):
+    c = self._NewComputation()
+    c.Iota(np.float32, 10)
+    self._ExecuteAndCompareExact(c, expected=np.arange(10, dtype=np.float32))
+
+  def testBroadcastedIota(self):
+    c = self._NewComputation()
+    c.BroadcastedIota(np.int64, (2, 3), 1)
+    expected = np.array([[0, 1, 2], [0, 1, 2]], dtype=np.int64)
+    self._ExecuteAndCompareExact(c, expected=expected)
+
   def testBooleanAnd(self):
     c = self._NewComputation()
     c.And(
-- 
GitLab


From e0f979b8dce918d0c31ea106b0a39dce4bb6e8c6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 14:40:52 -0800
Subject: [PATCH 445/873] Fix `predict` with `run_eagerly=True`

PiperOrigin-RevId: 225257343
---
 .../python/keras/engine/training_eager_test.py    | 15 +++++++++++++++
 .../python/keras/engine/training_generator.py     |  8 +++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py
index 3fabbb17ed..f95a502cbc 100644
--- a/tensorflow/python/keras/engine/training_eager_test.py
+++ b/tensorflow/python/keras/engine/training_eager_test.py
@@ -246,6 +246,21 @@ class CorrectnessTest(test.TestCase):
     layer(1.)  # Plain-value inputs are only valid in eager mode.
     self.assertEqual(1, len(layer.losses))
 
+  def test_predict_correctness(self):
+    i1 = keras.layers.Input(shape=(4, 5))
+    i2 = keras.layers.Input(shape=(4, 5))
+    i3 = keras.layers.Input(shape=(4, 5))
+    o = keras.layers.add([i1, i2, i3])
+    model = keras.models.Model([i1, i2, i3], o)
+    model.run_eagerly = True
+
+    x1 = np.random.random((2, 4, 5))
+    x2 = np.random.random((2, 4, 5))
+    x3 = np.random.random((2, 4, 5))
+    out = model.predict([x1, x2, x3])
+
+    self.assertAllClose(out, x1 + x2 + x3)
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py
index 0abf0b8270..88f21b3707 100644
--- a/tensorflow/python/keras/engine/training_generator.py
+++ b/tensorflow/python/keras/engine/training_generator.py
@@ -49,7 +49,7 @@ def model_iteration(model,
                     max_queue_size=10,
                     workers=1,
                     use_multiprocessing=False,
-                    shuffle=True,
+                    shuffle=False,
                     initial_epoch=0,
                     mode='train',
                     batch_size=None,
@@ -246,8 +246,10 @@ def model_iteration(model,
 
 # Maintain compatibility with the existing names.
 fit_generator = functools.partial(model_iteration, mode='train')
-evaluate_generator = functools.partial(model_iteration, mode='test')
-predict_generator = functools.partial(model_iteration, mode='predict')
+evaluate_generator = functools.partial(
+    model_iteration, mode='test', shuffle=False)
+predict_generator = functools.partial(
+    model_iteration, mode='predict', shuffle=False)
 
 
 def _get_next_batch(output_generator, mode):
-- 
GitLab


From 9ed22473db5e3b5d555e951c2dfc92a75ab235ca Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 14:41:49 -0800
Subject: [PATCH 446/873] Capture the distribute.Strategy scope from the outer
 graph when entering the FuncGraph.as_default scope instead of __init__. Fixes
 issues with the grobal Keras FuncGraph keeping state between tests.

PiperOrigin-RevId: 225257506
---
 .../python/distribute/mirrored_strategy.py    | 11 ++-
 tensorflow/python/framework/func_graph.py     | 79 +++++++++++++------
 2 files changed, 61 insertions(+), 29 deletions(-)

diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index 9692c88dfc..605e2cc8e7 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -50,12 +50,17 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 @contextlib.contextmanager
-def _enter_graph(g, eager):
+def _enter_graph(g, eager, creator_stack=None):
+  """Context manager for selecting a graph and maybe eager mode."""
   if eager:
     with g.as_default(), context.eager_mode():
+      if creator_stack is not None:
+        g._variable_creator_stack = creator_stack  # pylint: disable=protected-access
       yield
   else:
     with g.as_default():
+      if creator_stack is not None:
+        g._variable_creator_stack = creator_stack  # pylint: disable=protected-access
       yield
 
 
@@ -865,7 +870,6 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
 
     def run(self):
       # pylint: disable=protected-access
-      self.graph._variable_creator_stack = self._variable_creator_stack
       self.should_run.wait()
       self.should_run.clear()
       try:
@@ -873,7 +877,8 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
           return
         with self.coord.stop_on_exception(), \
             _enter_graph(self._init_graph, self._init_in_eager), \
-            _enter_graph(self.graph, self.in_eager), \
+            _enter_graph(self.graph, self.in_eager,
+                         self._variable_creator_stack), \
             context.context().device_policy(self.context_device_policy), \
             MirroredReplicaContext(self.distribution, constant_op.constant(
                 self.replica_id, dtypes.int32)), \
diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py
index bd4ed5553e..75a420e91a 100644
--- a/tensorflow/python/framework/func_graph.py
+++ b/tensorflow/python/framework/func_graph.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
+from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.lazy_loader import LazyLoader
 
@@ -108,38 +109,20 @@ class FuncGraph(ops.Graph):
 
     graph = self.outer_graph
 
-    # pylint: disable=protected-access
-    # TODO(b/112906995, nareshmodi): distribution strategy depends on inheriting
-    # this stack from the default graph even in eager mode. Maybe it should be
-    # part of the eager context? This would also allow us to remove a
-    # get_default_graph() call from the function cache lookup.
-    self._distribution_strategy_stack = list(graph._distribution_strategy_stack)
-    # We ignore device placements from any outer scopes while tracing the
-    # function when possible, to avoid hard-coding them in the function
-    # graph. "Default" placements come from the PartitionedCallOp's placement,
-    # so that the same trace of the Python function may be placed on several
-    # different devices and saved functions may be placed on new devices when
-    # restored.
     if context.executing_eagerly():
       self.seed = context.global_seed()
       device_type = context.context().device_spec.device_type
       self._xla_compile = (device_type == "TPU" or device_type == "XLA_GPU"
                            or device_type == "XLA_CPU")
-      if self._distribution_strategy_stack or self._xla_compile:
-        self._add_device_to_stack(context.context().device_name)
     else:
       self.seed = graph.seed
       self._xla_compile = getattr(graph, "_xla_compile", False)
       # TODO(allenl): Figure out if we can remove colocation stack
       # specialization (currently used in cond_v2), here and in the cache key.
-      self._colocation_stack = graph._colocation_stack.copy()
-      if (self._distribution_strategy_stack
-          or self._xla_compile
-          or device_stack_has_callable(graph._device_function_stack)):
-        # Hard-code devices from device functions in the function body
-        self._device_function_stack = graph._device_function_stack.copy()
+      self._colocation_stack = graph._colocation_stack.copy()  # pylint: disable=protected-access
+
     if not self._read_only_collections:
-      self._collections = graph._collections
+      self._collections = graph._collections  # pylint: disable=protected-access
     else:
       for collection_name in graph.get_all_collection_keys():
         if collection_name not in WHITELIST_COLLECTIONS:
@@ -149,11 +132,55 @@ class FuncGraph(ops.Graph):
         self._collections[collection_name] = graph.get_collection_ref(
             collection_name)
 
-    self._variable_creator_stack = graph._variable_creator_stack
-    # Inherit the graph key, since this is used for matching variables in
-    # optimizers.
-    self._graph_key = graph._graph_key
-    # pylint: enable=protected-access
+  def as_default(self):
+    outer_cm = super(FuncGraph, self).as_default()
+
+    @tf_contextlib.contextmanager
+    def inner_cm():
+      """Context manager for copying distribute.Strategy scope information."""
+      graph = ops.get_default_graph()
+      # pylint: disable=protected-access
+      # TODO(b/112906995, nareshmodi): distribution strategy depends on
+      # inheriting this stack from the default graph even in eager mode. Maybe
+      # it should be part of the eager context? This would also allow us to
+      # remove a get_default_graph() call from the function cache lookup.
+      old_strategy_stack = self._distribution_strategy_stack
+      self._distribution_strategy_stack = list(
+          graph._distribution_strategy_stack)
+      # We ignore device placements from any outer scopes while tracing the
+      # function when possible, to avoid hard-coding them in the function
+      # graph. "Default" placements come from the PartitionedCallOp's placement,
+      # so that the same trace of the Python function may be placed on several
+      # different devices and saved functions may be placed on new devices when
+      # restored.
+      old_device_stack = self._device_function_stack
+      if context.executing_eagerly():
+        if self._distribution_strategy_stack or self._xla_compile:
+          self._add_device_to_stack(context.context().device_name)
+      else:
+        if (self._distribution_strategy_stack
+            or self._xla_compile
+            or device_stack_has_callable(graph._device_function_stack)):
+          # Hard-code devices from device functions in the function body
+          self._device_function_stack = graph._device_function_stack.copy()
+
+      old_creator_stack = self._variable_creator_stack
+      self._variable_creator_stack = graph._variable_creator_stack
+      # Inherit the graph key, since this is used for matching variables in
+      # optimizers.
+      old_graph_key = self._graph_key
+      self._graph_key = graph._graph_key
+      # pylint: enable=protected-access
+
+      with outer_cm as g:
+        try:
+          yield g
+        finally:
+          self._distribution_strategy_stack = old_strategy_stack
+          self._device_function_stack = old_device_stack
+          self._variable_creator_stack = old_creator_stack
+          self._graph_key = old_graph_key
+    return inner_cm()
 
   @property
   def output_types(self):
-- 
GitLab


From 8644b6d4c77646407758a2ef93eb3567f9f03577 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Wed, 12 Dec 2018 14:50:13 -0800
Subject: [PATCH 447/873] Move reduce non distributed values and share the code
 with TPU Strategy and also improve print output of TPUMirroredVariable.

PiperOrigin-RevId: 225259008
---
 .../distribute/python/mirrored_strategy.py    |  1 -
 .../python/parameter_server_strategy.py       |  2 +-
 .../contrib/distribute/python/tpu_strategy.py |  8 ++++
 .../python/distribute/cross_device_ops.py     | 37 ++++++++++++++
 .../python/distribute/mirrored_strategy.py    | 48 ++-----------------
 tensorflow/python/distribute/values.py        |  6 +++
 6 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 20f1a08d42..24399db652 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -28,7 +28,6 @@ from tensorflow.python.distribute import values
 
 # pylint: disable=protected-access,invalid-name
 _call_for_each_replica = mirrored_strategy._call_for_each_replica
-_reduce_non_distributed_value = mirrored_strategy._reduce_non_distributed_value
 _create_mirrored_variable = mirrored_strategy._create_mirrored_variable
 all_local_devices = mirrored_strategy.all_local_devices
 CoreMirroredStrategy = mirrored_strategy.MirroredStrategy
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
index 2c7766f95f..ca51b07be6 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
@@ -356,7 +356,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     self._verify_destinations_not_different_worker(destinations)
     if not isinstance(value, values.DistributedValues):
       # pylint: disable=protected-access
-      return mirrored_strategy._reduce_non_distributed_value(
+      return cross_device_ops_lib.reduce_non_distributed_value(
           self, reduce_op, value, destinations)
     return self._cross_device_ops.reduce(
         reduce_op, value, destinations=destinations)
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index b6f5b49201..7ea245eb6e 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -465,6 +465,14 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
             "Currently only support sum & mean in TPUStrategy.")
       return tpu_ops.cross_replica_sum(value)
 
+    if not isinstance(value, values.DistributedValues):
+      # This function handles reducing values that are not PerReplica or
+      # Mirrored values. For example, the same value could be present on all
+      # replicas in which case `value` would be a single value or value could
+      # be 0.
+      return cross_device_ops_lib.reduce_non_distributed_value(
+          self, reduce_op, value, destinations)
+
     # Validate that the destination is same as the host device
     # Note we don't do this when in replicate context as the reduction is
     # performed on the TPU device itself.
diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py
index 57c552ca8f..6bb3639bf0 100644
--- a/tensorflow/python/distribute/cross_device_ops.py
+++ b/tensorflow/python/distribute/cross_device_ops.py
@@ -62,6 +62,43 @@ def validate_destinations(destinations):
     raise ValueError("destinations can not be empty")
 
 
+def reduce_non_distributed_value(extended, reduce_op, value, destinations):
+  """Reduce a non-DistributedValue `value` to `destinations`."""
+  if isinstance(value, value_lib.DistributedValues):
+    raise ValueError("You are passing a `DistributedValue` to "
+                     "`reduce_non_distributed_value`, which is not allowed.")
+
+  # If the same value is present on all replicas then the PerReplica value will
+  # be a single value. We also handle the case when `value` is a single value
+  # and equal to 0.
+  if value == 0:
+    return 0
+  # If there is only a single value and the reduce op is MEAN,
+  # that value should be on all destinations.
+  if reduce_op == reduce_util.ReduceOp.MEAN:
+    return value
+
+  validate_destinations(destinations)
+  # We do not support a reduce op of SUM if the value is the same across
+  # all replicas. We call this as part of assign functions for MirroredVariables
+  # and summing up identical values across replicas is not clearly defined.
+  if (len(extended.worker_devices) != 1 or
+      not check_destinations(destinations)):
+    raise ValueError("A non-DistributedValues value %s cannot be reduced with "
+                     "the given reduce op %s." % (value, reduce_op))
+  # TODO(anjalisridhar): Moves these methods to a device utility file?
+  devices = get_devices_from(destinations)
+  if len(devices) == 1:
+    with ops.device(devices[0]):
+      return array_ops.identity(value)
+  else:
+    value_updates = {}
+    for d in devices:
+      with ops.device(d):
+        value_updates[d] = array_ops.identity(value)
+    return value_lib.Mirrored(value_updates)
+
+
 def _make_tensor_into_per_replica(input_tensor):
   """Converts a single tensor into a PerReplica object."""
   if isinstance(input_tensor, (tuple, list)):
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index 605e2cc8e7..fb3cf84449 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -74,10 +74,9 @@ class _RequestedStop(Exception):  # pylint: disable=g-bad-exception-name
   pass
 
 
-# _call_for_each_replica and _reduce_non_distributed_value are not members of
-# MirroredStrategy so that they are generally not allowed to use anything
-# specific to MirroredStrategy and thus can be shared with other distribution
-# strategies.
+# _call_for_each_replica is not a member of MirroredStrategy so that it is
+# not allowed to use anything specific to MirroredStrategy and thus
+# can be shared with other distribution strategies.
 
 
 # TODO(yuefengz): maybe create a common class for those who need to call this
@@ -192,43 +191,6 @@ def _call_for_each_replica(distribution, fn, args, kwargs):
   return values.regroup({t.device: t.main_result for t in threads})
 
 
-def _reduce_non_distributed_value(extended, reduce_op, value, destinations):
-  """Reduce a non-DistributedValue `value` to `destinations`."""
-  if isinstance(value, values.DistributedValues):
-    raise ValueError("You are passing a `DistributedValue` to "
-                     "`_reduce_non_distributed_value`, which is not allowed.")
-
-  # If the same value is present on all replicas then the PerReplica value will
-  # be a single value. We also handle the case when `value` is a single value
-  # and equal to 0.
-  if value == 0:
-    return 0
-  # If there is only a single value and the reduce op is MEAN,
-  # that value should be on all destinations.
-  if reduce_op == reduce_util.ReduceOp.MEAN:
-    return value
-
-  cross_device_ops_lib.validate_destinations(destinations)
-  # We do not support a reduce op of SUM if the value is the same across
-  # all replicas. We call this as part of assign functions for MirroredVariables
-  # and summing up identical values across replicas is not clearly defined.
-  if (len(extended.worker_devices) != 1 or
-      not cross_device_ops_lib.check_destinations(destinations)):
-    raise ValueError("A non-DistributedValues value %s cannot be reduced with "
-                     "the given reduce op %s." % (value, reduce_op))
-  # TODO(anjalisridhar): Moves these methods to a device utility file?
-  devices = cross_device_ops_lib.get_devices_from(destinations)
-  if len(devices) == 1:
-    with ops.device(devices[0]):
-      return array_ops.identity(value)
-  else:
-    value_updates = {}
-    for d in devices:
-      with ops.device(d):
-        value_updates[d] = array_ops.identity(value)
-    return values.Mirrored(value_updates)
-
-
 def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs):  # pylint: disable=g-missing-docstring
   # Figure out what collections this variable should be added to.
   # We'll add the MirroredVariable to those collections instead.
@@ -714,8 +676,8 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
       # Mirrored values. For example, the same value could be present on all
       # replicas in which case `value` would be a single value or value could
       # be 0.
-      return _reduce_non_distributed_value(self, reduce_op, value,
-                                           destinations)
+      return cross_device_ops_lib.reduce_non_distributed_value(
+          self, reduce_op, value, destinations)
     return self._get_cross_device_ops().reduce(
         reduce_op, value, destinations=destinations)
 
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index 01a1680a24..a5918b7b73 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -570,6 +570,12 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
       # See https://docs.python.org/3/library/constants.html#NotImplemented
       return NotImplemented
 
+  def __str__(self):
+    return "%s:%s" % (self.__class__.__name__, self._index)
+
+  def __repr__(self):
+    return "%s(%r)" % (self.__class__.__name__, self._index)
+
   @property
   def handle(self):
     # If we're in a tpu.rewrite(), return the replicated handle.
-- 
GitLab


From 515f9575629a2521fe4d0e7fef3e1f252d8b8f6e Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Wed, 12 Dec 2018 15:07:45 -0800
Subject: [PATCH 448/873] Eager function: Do not create a set of input ops each
 time. This can take a very long time for big models. For e.g. when building a
 function for ResNet50, this increased the time to create the eager function
 by 72 times.

PiperOrigin-RevId: 225262498
---
 tensorflow/python/eager/function.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f3480ebb56..3aa7b7e27f 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -149,10 +149,9 @@ class _EagerDefinedFunction(object):
       outputs: the tensors in the graph which will be outputs to the function
       attrs: dict mapping names of attributes to their AttrValue values
     """
-    operations = [
-        op for op in graph.get_operations()
-        if op not in set(arg.op for arg in inputs)
-    ]
+    input_ops = set(arg.op for arg in inputs)
+    operations = [op for op in graph.get_operations() if op not in input_ops]
+
     fn = pywrap_tensorflow.TF_GraphToFunction_wrapper(
         graph._c_graph,  # pylint: disable=protected-access
         compat.as_str(name),
-- 
GitLab


From face5a8f9c7821b7c415089a3988d0badaf29783 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Wed, 12 Dec 2018 15:20:58 -0800
Subject: [PATCH 449/873] Run noise layer tests in all execution modes.

PiperOrigin-RevId: 225264988
---
 tensorflow/python/keras/layers/noise_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/layers/noise_test.py b/tensorflow/python/keras/layers/noise_test.py
index 325dd933b2..f1537a6919 100644
--- a/tensorflow/python/keras/layers/noise_test.py
+++ b/tensorflow/python/keras/layers/noise_test.py
@@ -19,13 +19,13 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python import keras
-from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class NoiseLayersTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class NoiseLayersTest(keras_parameterized.TestCase):
 
   def test_GaussianNoise(self):
     testing_utils.layer_test(
-- 
GitLab


From 3dfb4df6e54968237014cfa6c8904ea35e4518c5 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Wed, 12 Dec 2018 15:22:10 -0800
Subject: [PATCH 450/873] Use split_compile_and_replicate with TPUEstimator.

PiperOrigin-RevId: 225265200
---
 tensorflow/contrib/tpu/python/tpu/tpu.py      | 106 +++++++++++++++---
 .../contrib/tpu/python/tpu/tpu_estimator.py   |  38 +++++--
 2 files changed, 119 insertions(+), 25 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index def57da20d..59722bc246 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -777,15 +777,15 @@ def split_compile_and_replicate(computation,
       ]
 
 
-def shard(computation,
-          inputs=None,
-          num_shards=1,
-          input_shard_axes=None,
-          outputs_from_all_shards=True,
-          output_shard_axes=None,
-          infeed_queue=None,
-          device_assignment=None,
-          name=None):
+def split_compile_and_shard(computation,
+                            inputs=None,
+                            num_shards=1,
+                            input_shard_axes=None,
+                            outputs_from_all_shards=True,
+                            output_shard_axes=None,
+                            infeed_queue=None,
+                            device_assignment=None,
+                            name=None):
   """Shards `computation` for parallel execution.
 
   `inputs` must be a list of Tensors or None (equivalent to an empty list), each
@@ -839,7 +839,7 @@ def shard(computation,
       is equal to the number of cores in the TPU system.
     name: (Deprecated) Does nothing.
   Returns:
-    A list of output tensors.
+    A tuple of (compile op, [output tensors]).
   Raises:
     ValueError: If num_shards <= 0
     ValueError: If len(input_shard_axes) != len(inputs)
@@ -874,7 +874,7 @@ def shard(computation,
   else:
     transposed_inputs = [[]] * num_shards
 
-  outputs = replicate(
+  compile_op, outputs = split_compile_and_replicate(
       computation,
       transposed_inputs,
       infeed_queue=infeed_queue,
@@ -891,7 +891,7 @@ def shard(computation,
     # one so it can be used as a control dependency or fetch node.
     # TODO(b/36647078) remove disable when pylint bug is fixed.
     # pylint: disable=indexing-exception
-    return [outputs[0]]
+    return compile_op, [outputs[0]]
     # pylint: enable=indexing-exception
 
   # TODO(b/36647078) remove disable when pylint bug is fixed.
@@ -925,7 +925,87 @@ def shard(computation,
       # TODO(phawkins): use a smarter policy, e.g., round-robin across shards.
       results.append(x[0])
 
-  return results
+  return compile_op, results
+
+
+def shard(computation,
+          inputs=None,
+          num_shards=1,
+          input_shard_axes=None,
+          outputs_from_all_shards=True,
+          output_shard_axes=None,
+          infeed_queue=None,
+          device_assignment=None,
+          name=None):
+  """Shards `computation` for parallel execution.
+
+  `inputs` must be a list of Tensors or None (equivalent to an empty list), each
+  of which has a corresponding split axis (from `input_shard_axes`). Each input
+  is split into `num_shards` pieces along the corresponding axis, and
+  computation is applied to each shard in parallel.
+
+  Tensors are broadcast to all shards if they are lexically captured by
+  `computation`. e.g.,
+
+  x = tf.constant(7)
+  def computation():
+    return x + 3
+  ... = shard(computation, ...)
+
+  TODO(phawkins): consider adding support for broadcasting Tensors passed
+  as inputs.
+
+  If `outputs_from_all_shards` is true, the outputs from all shards of
+  `computation` are concatenated back together along their `output_shards_axes`.
+  Otherwise, each output is taken from an arbitrary shard.
+
+  Inputs and outputs of the computation must be at least rank-1 Tensors.
+
+  Args:
+    computation: A Python function that builds a computation to apply to each
+      shard of the input.
+    inputs: A list of input tensors or None (equivalent to an empty list). Each
+      input tensor has a corresponding shard axes, given by `input_shard_axes`,
+      which must have size divisible by `num_shards`.
+    num_shards: The number of shards.
+    input_shard_axes: A list of dimensions along which to shard `inputs`, or
+      `None`. `None` means "shard all inputs along dimension 0". If not `None`,
+      there must be one dimension per input.
+    outputs_from_all_shards: Boolean or list of boolean. For each output, if
+      `True`, outputs from all shards are concatenated along the corresponding
+      `output_shard_axes` entry. Otherwise, each output is taken
+      from an arbitrary shard. If the argument is a boolean, the argument's
+      value is used for each output.
+    output_shard_axes: A list of dimensions along which to concatenate the
+      outputs of `computation`, or `None`. `None` means "concatenate all outputs
+      along dimension 0". If not `None`, there must be one dimension per output.
+      Ignored if `outputs_from_all_shards` is False.
+    infeed_queue: If not `None`, the `InfeedQueue` to use to augment the inputs
+      of `computation`.
+    device_assignment: If not `None`, a `DeviceAssignment` describing the
+      mapping between logical cores in the computation with physical cores in
+      the TPU topology. Uses a default device assignment if `None`. The
+      `DeviceAssignment` may be omitted if each shard of the computation uses
+      only one core, and there is either only one shard, or the number of shards
+      is equal to the number of cores in the TPU system.
+    name: (Deprecated) Does nothing.
+  Returns:
+    A list of output tensors.
+  Raises:
+    ValueError: If num_shards <= 0
+    ValueError: If len(input_shard_axes) != len(inputs)
+    ValueError: If len(output_shard_axes) != len(outputs from `computation`)
+  """
+  return split_compile_and_shard(
+      computation,
+      inputs=inputs,
+      num_shards=num_shards,
+      input_shard_axes=input_shard_axes,
+      outputs_from_all_shards=outputs_from_all_shards,
+      output_shard_axes=output_shard_axes,
+      infeed_queue=infeed_queue,
+      device_assignment=device_assignment,
+      name=name)[1]
 
 
 def batch_parallel(computation,
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index fe2ac61bf9..f179289584 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -422,6 +422,7 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
                ctx,
                enqueue_ops,
                dequeue_ops,
+               tpu_compile_op,
                run_infeed_loop_on_coordinator=True,
                rendezvous=None,
                master=None,
@@ -439,6 +440,7 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
     self._feed_error = None
     self._finished = False
     self._should_initialize_tpu = True
+    self._tpu_compile_op = tpu_compile_op
 
   def begin(self):
     logging.info('TPU job name %s', self._master_job)
@@ -500,6 +502,10 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
     session.run(self._init_ops,
                 options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
 
+    if os.environ.get('TPU_SPLIT_COMPILE_AND_EXECUTE', '') == '1':
+      logging.info('Compiling user program: this may take a while...')
+      logging.info('Compile finished: %s', session.run(self._tpu_compile_op))
+
     self._infeed_controller = self._create_infeed_controller(
         name='InfeedController', target=self._run_infeed, args=(session,))
 
@@ -540,12 +546,13 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
 class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook):
 
-  def __init__(self, ctx, enqueue_ops, dequeue_ops, rendezvous=None,
-               master=None, session_config=None):
+  def __init__(self, ctx, enqueue_ops, dequeue_ops, tpu_compile_op,
+               rendezvous=None, master=None, session_config=None):
     super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__(
         ctx,
         enqueue_ops,
         dequeue_ops,
+        tpu_compile_op=tpu_compile_op,
         run_infeed_loop_on_coordinator=False,
         rendezvous=rendezvous,
         master=master,
@@ -2545,7 +2552,7 @@ class TPUEstimator(estimator_lib.Estimator):
             graph.add_to_collection(_TPU_ENQUEUE_OPS, enqueue_op)
 
         if mode == model_fn_lib.ModeKeys.TRAIN:
-          loss, host_call, scaffold, training_hooks = (
+          compile_op, loss, host_call, scaffold, training_hooks = (
               _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
           host_ops = host_call.create_tpu_hostcall()
           if host_ops is None:
@@ -2580,6 +2587,7 @@ class TPUEstimator(estimator_lib.Estimator):
                   ctx,
                   enqueue_ops,
                   host_ops,
+                  tpu_compile_op=compile_op,
                   run_infeed_loop_on_coordinator=(
                       run_infeed_loop_on_coordinator),
                   rendezvous=self._rendezvous[mode],
@@ -2637,8 +2645,8 @@ class TPUEstimator(estimator_lib.Estimator):
               scaffold=scaffold)
 
         if mode == model_fn_lib.ModeKeys.EVAL:
-          total_loss, host_calls, scaffold, eval_hooks = _eval_on_tpu_system(
-              ctx, model_fn_wrapper, dequeue_fn)
+          compile_op, total_loss, host_calls, scaffold, eval_hooks = (
+              _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
           iterations_per_loop_var = _create_or_get_iterations_per_loop()
           mean_loss = math_ops.div(
               total_loss,
@@ -2685,6 +2693,7 @@ class TPUEstimator(estimator_lib.Estimator):
                   ctx,
                   enqueue_ops,
                   eval_update_ops + host_ops,
+                  tpu_compile_op=compile_op,
                   run_infeed_loop_on_coordinator=(
                       run_infeed_loop_on_coordinator),
                   rendezvous=self._rendezvous[mode],
@@ -2705,7 +2714,7 @@ class TPUEstimator(estimator_lib.Estimator):
         # Predict
         assert mode == model_fn_lib.ModeKeys.PREDICT
 
-        (dummy_predict_op, host_calls,
+        (compile_op, dummy_predict_op, host_calls,
          scaffold, prediction_hooks) = _predict_on_tpu_system(
              ctx, model_fn_wrapper, dequeue_fn)
         with ops.control_dependencies([dummy_predict_op]):
@@ -2762,6 +2771,7 @@ class TPUEstimator(estimator_lib.Estimator):
             _StoppingPredictHook(scalar_stopping_signal),
             TPUInfeedOutfeedSessionHookForPrediction(
                 ctx, enqueue_ops, host_ops, rendezvous=self._rendezvous[mode],
+                tpu_compile_op=compile_op,
                 master=self._config.master,
                 session_config=self._session_config),
         ] + input_hooks
@@ -2860,15 +2870,16 @@ def _eval_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
     return training_loop.repeat(iterations_per_loop_var, single_tpu_eval_step,
                                 [_ZERO_LOSS])
 
-  (loss,) = tpu.shard(
+  (compile_op, loss,) = tpu.split_compile_and_shard(
       multi_tpu_eval_steps_on_single_shard,
       inputs=[],
       num_shards=ctx.num_replicas,
       outputs_from_all_shards=False,
       device_assignment=ctx.device_assignment)
 
+  loss = loss[0]
   scaffold = _get_scaffold(captured_scaffold_fn)
-  return loss, host_calls, scaffold, captured_eval_hooks.get()
+  return compile_op, loss, host_calls, scaffold, captured_eval_hooks.get()
 
 
 def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
@@ -2883,15 +2894,16 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
     return training_loop.repeat(iterations_per_loop_var, single_tpu_train_step,
                                 [_INITIAL_LOSS])
 
-  (loss,) = tpu.shard(
+  (compile_op, loss,) = tpu.split_compile_and_shard(
       multi_tpu_train_steps_on_single_shard,
       inputs=[],
       num_shards=ctx.num_replicas,
       outputs_from_all_shards=False,
       device_assignment=ctx.device_assignment)
 
+  loss = loss[0]
   scaffold = _get_scaffold(captured_scaffold_fn)
-  return loss, host_call, scaffold, captured_training_hooks.get()
+  return compile_op, loss, host_call, scaffold, captured_training_hooks.get()
 
 
 def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
@@ -2911,15 +2923,17 @@ def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn):
         cond, single_tpu_predict_step, inputs=inputs, name=b'loop')
     return outputs
 
-  (dummy_predict_op,) = tpu.shard(
+  (compile_op, dummy_predict_op,) = tpu.split_compile_and_shard(
       multi_tpu_predict_steps_on_single_shard,
       inputs=[],
       num_shards=ctx.num_replicas,
       outputs_from_all_shards=False,
       device_assignment=ctx.device_assignment)
 
+  dummy_predict_op = dummy_predict_op[0]
   scaffold = _get_scaffold(captured_scaffold_fn)
-  return dummy_predict_op, host_calls, scaffold, captured_predict_hooks.get()
+  return (compile_op, dummy_predict_op, host_calls, scaffold,
+          captured_predict_hooks.get())
 
 
 def _wrap_computation_in_while_loop(device, op_fn):
-- 
GitLab


From de6406575b2c1ad29a8cbc0173702dc76a961403 Mon Sep 17 00:00:00 2001
From: Lukasz Kaiser <lukaszkaiser@google.com>
Date: Wed, 12 Dec 2018 15:41:44 -0800
Subject: [PATCH 451/873] Similar to cl/198786266 specify the
 `maximum_iterations` to tf.while_loop in tf.foldl and tf.foldr to be
 compatible with XLA.

PiperOrigin-RevId: 225268779
---
 tensorflow/python/ops/functional_ops.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
index 57542e3c7b..df4be1d65a 100644
--- a/tensorflow/python/ops/functional_ops.py
+++ b/tensorflow/python/ops/functional_ops.py
@@ -143,7 +143,8 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
         lambda i, a: i < n, compute, [i, a],
         parallel_iterations=parallel_iterations,
         back_prop=back_prop,
-        swap_memory=swap_memory)
+        swap_memory=swap_memory,
+        maximum_iterations=n)
 
     # TODO(akshayka): Remove the in_graph_mode check once caching devices are
     # supported in Eager
@@ -253,7 +254,8 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
         compute, [i, a],
         parallel_iterations=parallel_iterations,
         back_prop=back_prop,
-        swap_memory=swap_memory)
+        swap_memory=swap_memory,
+        maximum_iterations=n)
 
     # TODO(akshayka): Remove the in_graph_mode check once caching devices are
     # supported in Eager
-- 
GitLab


From 6563253207e6b57cd6762f2d9b1dae7ebff1d927 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 12 Dec 2018 15:44:45 -0800
Subject: [PATCH 452/873] [TF:XLA] Bump open source abseil revision to
 8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f

PiperOrigin-RevId: 225269293
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index f8b6bd1a3f..a84c51813e 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -123,11 +123,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "be91500afe4d2768a7aeeeae616d9f7fc4fe237a1493b630883dbf8f20d4682d",
-        strip_prefix = "abseil-cpp-455dc17ba1af9635f0b60155bc565bc572a1e722",
+        sha256 = "c2f8a1a399994df49db348a4725933b12fc807909cee21d48e46f53a28e79d4b",
+        strip_prefix = "abseil-cpp-8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/455dc17ba1af9635f0b60155bc565bc572a1e722.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/455dc17ba1af9635f0b60155bc565bc572a1e722.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f.tar.gz",
         ],
     )
 
-- 
GitLab


From e9f8aff858b729b8aee33536888fa4c41645aa7a Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Wed, 12 Dec 2018 15:47:20 -0800
Subject: [PATCH 453/873] Test case cleanup.

PiperOrigin-RevId: 225269741
---
 .../python/keras/layers/unified_lstm_test.py  | 925 +++++++++---------
 1 file changed, 462 insertions(+), 463 deletions(-)

diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index 55ccebb43b..c51304666d 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -54,9 +55,252 @@ _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
 _config = config_pb2.ConfigProto(graph_options=_graph_options)
 
 
-@test_util.run_v1_only('b/120545219')
-class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
+@keras_parameterized.run_all_keras_modes(config=_config)
+class UnifiedLSTMTest(keras_parameterized.TestCase):
 
+  @parameterized.named_parameters(
+      ('non_tan_activation', 'relu', 'sigmoid', 0, False, True),
+      ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True),
+      ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True),
+      ('unroll', 'tanh', 'sigmoid', 0, True, True),
+      ('not_use_bias', 'tanh', 'sigmoid', 0, False, False),
+  )
+  def test_could_use_defun_backend(self, activation, recurrent_activation,
+                                   recurrent_dropout, unroll, use_bias):
+    layer = keras.layers.UnifiedLSTM(
+        1,
+        activation=activation,
+        recurrent_activation=recurrent_activation,
+        recurrent_dropout=recurrent_dropout,
+        unroll=unroll,
+        use_bias=use_bias)
+    self.assertFalse(layer.could_use_cudnn)
+
+  def test_static_shape_inference_LSTM(self):
+    # Github issue: 15165
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+
+    model = keras.models.Sequential()
+    inputs = keras.layers.Dense(
+        embedding_dim, input_shape=(timesteps, embedding_dim))
+    model.add(inputs)
+    layer = keras.layers.UnifiedLSTM(units, return_sequences=True)
+    model.add(layer)
+    outputs = model.layers[-1].output
+    self.assertEqual(outputs.get_shape().as_list(), [None, timesteps, units])
+
+  def test_dynamic_behavior_LSTM(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    layer = keras.layers.UnifiedLSTM(units, input_shape=(None, embedding_dim))
+    model = keras.models.Sequential()
+    model.add(layer)
+    model.compile(gradient_descent.GradientDescentOptimizer(0.001), 'mse')
+    x = np.random.random((num_samples, timesteps, embedding_dim))
+    y = np.random.random((num_samples, units))
+    model.train_on_batch(x, y)
+
+  def test_stacking_LSTM(self):
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.UnifiedLSTM(10, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedLSTM(5, return_sequences=True, unroll=False))
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
+
+  def test_from_config_LSTM(self):
+    layer_class = keras.layers.UnifiedLSTM
+    for stateful in (False, True):
+      l1 = layer_class(units=1, stateful=stateful)
+      l2 = layer_class.from_config(l1.get_config())
+      assert l1.get_config() == l2.get_config()
+
+  def test_specify_initial_state_keras_tensor(self):
+    num_states = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 3
+    num_samples = 2
+
+    # Test with Keras tensor
+    inputs = keras.Input((timesteps, embedding_dim))
+    initial_state = [keras.Input((units,)) for _ in range(num_states)]
+    layer = keras.layers.UnifiedLSTM(units)
+    if len(initial_state) == 1:
+      output = layer(inputs, initial_state=initial_state[0])
+    else:
+      output = layer(inputs, initial_state=initial_state)
+    assert initial_state[0] in layer._inbound_nodes[0].input_tensors
+
+    model = keras.models.Model([inputs] + initial_state, output)
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    initial_state = [
+        np.random.random((num_samples, units)) for _ in range(num_states)
+    ]
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch([inputs] + initial_state, targets)
+
+  def DISABLED_test_specify_initial_state_non_keras_tensor(self):
+    num_states = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 3
+    num_samples = 2
+
+    # Test with non-Keras tensor
+    inputs = keras.Input((timesteps, embedding_dim))
+    initial_state = [
+        keras.backend.random_normal_variable((num_samples, units), 0, 1)
+        for _ in range(num_states)
+    ]
+    layer = keras.layers.UnifiedLSTM(units)
+    output = layer(inputs, initial_state=initial_state)
+
+    model = keras.models.Model(inputs, output)
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch(inputs, targets)
+
+  def test_reset_states_with_values(self):
+    num_states = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 3
+    num_samples = 2
+
+    layer = keras.layers.UnifiedLSTM(units, stateful=True)
+    layer.build((num_samples, timesteps, embedding_dim))
+    layer.reset_states()
+    assert len(layer.states) == num_states
+    assert layer.states[0] is not None
+    self.assertAllClose(
+        keras.backend.eval(layer.states[0]),
+        np.zeros(keras.backend.int_shape(layer.states[0])),
+        atol=1e-4)
+    state_shapes = [keras.backend.int_shape(state) for state in layer.states]
+    values = [np.ones(shape) for shape in state_shapes]
+    if len(values) == 1:
+      values = values[0]
+    layer.reset_states(values)
+    self.assertAllClose(
+        keras.backend.eval(layer.states[0]),
+        np.ones(keras.backend.int_shape(layer.states[0])),
+        atol=1e-4)
+
+    # Test with invalid data
+    with self.assertRaises(ValueError):
+      layer.reset_states([1] * (len(layer.states) + 1))
+
+  def test_specify_state_with_masking(self):
+    num_states = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 3
+    num_samples = 2
+
+    inputs = keras.Input((timesteps, embedding_dim))
+    _ = keras.layers.Masking()(inputs)
+    initial_state = [keras.Input((units,)) for _ in range(num_states)]
+    output = keras.layers.UnifiedLSTM(units)(
+        inputs, initial_state=initial_state)
+
+    model = keras.models.Model([inputs] + initial_state, output)
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    initial_state = [
+        np.random.random((num_samples, units)) for _ in range(num_states)
+    ]
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch([inputs] + initial_state, targets)
+
+  def test_return_state(self):
+    num_states = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 3
+    num_samples = 2
+
+    inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
+    layer = keras.layers.UnifiedLSTM(units, return_state=True, stateful=True)
+    outputs = layer(inputs)
+    state = outputs[1:]
+    assert len(state) == num_states
+    model = keras.models.Model(inputs, state[0])
+
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    state = model.predict(inputs)
+    self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4)
+
+  def test_state_reuse(self):
+    timesteps = 3
+    embedding_dim = 4
+    units = 3
+    num_samples = 2
+
+    inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
+    layer = keras.layers.UnifiedLSTM(
+        units, return_state=True, return_sequences=True)
+    outputs = layer(inputs)
+    output, state = outputs[0], outputs[1:]
+    output = keras.layers.UnifiedLSTM(units)(output, initial_state=state)
+    model = keras.models.Model(inputs, output)
+
+    inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    model.predict(inputs)
+
+  def test_initial_states_as_other_inputs(self):
+    timesteps = 3
+    embedding_dim = 4
+    units = 3
+    num_samples = 2
+    num_states = 2
+    layer_class = keras.layers.UnifiedLSTM
+
+    # Test with Keras tensor
+    main_inputs = keras.Input((timesteps, embedding_dim))
+    initial_state = [keras.Input((units,)) for _ in range(num_states)]
+    inputs = [main_inputs] + initial_state
+
+    layer = layer_class(units)
+    output = layer(inputs)
+    assert initial_state[0] in layer._inbound_nodes[0].input_tensors
+
+    model = keras.models.Model(inputs, output)
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+
+    main_inputs = np.random.random((num_samples, timesteps, embedding_dim))
+    initial_state = [
+        np.random.random((num_samples, units)) for _ in range(num_states)
+    ]
+    targets = np.random.random((num_samples, units))
+    model.train_on_batch([main_inputs] + initial_state, targets)
+
+
+class LSTMLayerGraphOnlyTest(test.TestCase):
+
+  # Need session for test
+  @test_util.run_deprecated_v1
   def test_unifiedLSTM(self):
     input_shape = 10
     rnn_state_size = 8
@@ -101,6 +345,8 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         self.assertNotEqual(existing_loss, loss_value)
         existing_loss = loss_value
 
+  # Need session for test
+  @test_util.run_deprecated_v1
   def test_unifiedLSTM_with_cond(self):
     # This test is to demonstrate the graph rewrite of grappler plugin under
     # the condition that the function returns different number of internal
@@ -158,25 +404,48 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         self.assertNotEqual(existing_loss, loss_value)
         existing_loss = loss_value
 
-  @parameterized.named_parameters(
-      ('non_tan_activation', 'relu', 'sigmoid', 0, False, True),
-      ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True),
-      ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True),
-      ('unroll', 'tanh', 'sigmoid', 0, True, True),
-      ('not_use_bias', 'tanh', 'sigmoid', 0, False, False),
-  )
+  # b/120919032
+  @test_util.run_deprecated_v1
+  def test_regularizers_LSTM(self):
+    embedding_dim = 4
+    layer_class = keras.layers.UnifiedLSTM
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        recurrent_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer='l2',
+        activity_regularizer='l1')
+    layer.build((None, None, 2))
+    self.assertEqual(len(layer.losses), 3)
+    x = keras.backend.variable(np.ones((2, 3, 2)))
+    layer(x)
+    self.assertEqual(len(layer.get_losses_for(x)), 1)
+
+
+# TODO(scottzhu): Re-enable those tests in v2 mode once bugs attached are fixed.
+@test_util.run_v1_only
+class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
+
+  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_could_use_defun_backend(self, activation, recurrent_activation,
-                                   recurrent_dropout, unroll, use_bias):
-    layer = keras.layers.UnifiedLSTM(
-        1,
-        activation=activation,
-        recurrent_activation=recurrent_activation,
-        recurrent_dropout=recurrent_dropout,
-        unroll=unroll,
-        use_bias=use_bias)
-    self.assertFalse(layer.could_use_cudnn)
+  def test_dropout_LSTM(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    testing_utils.layer_test(
+        keras.layers.UnifiedLSTM,
+        kwargs={
+            'units': units,
+            'dropout': 0.1,
+            'recurrent_dropout': 0.1
+        },
+        input_shape=(num_samples, timesteps, embedding_dim))
 
+  # b/120911602
   def test_unified_lstm_feature_parity_with_canonical_lstm(self):
     with context.eager_mode():
       # Run this test under eager only due to b/120160788 for model.set_weights.
@@ -216,85 +485,67 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
       self.assertAllClose(y_1, y_3)
       self.assertAllClose(y_2, y_4)
 
-  @parameterized.named_parameters(
-      # test_name, use_bias, bias_initializer, activation
-      ('normal', True, 'zeros'),
-      ('no_bias', False, 'zeros'),
-      ('random_bias', True, 'random_uniform'),
-  )
+  # b/120911602
+  @parameterized.named_parameters(('v0', 0), ('v1', 1), ('v2', 2))
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_unified_lstm_model_save_load(self, use_bias, bias_initializer):
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir)
-    h5_path = os.path.join(temp_dir, 'test.h5')
-
-    batch = 10
-    timestep = 3
-    input_dim = 5
+  def test_implementation_mode_LSTM(self, implementation_mode):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
     units = 2
+    testing_utils.layer_test(
+        keras.layers.UnifiedLSTM,
+        kwargs={
+            'units': units,
+            'implementation': implementation_mode
+        },
+        input_shape=(num_samples, timesteps, embedding_dim))
 
-    x = np.random.random((batch, timestep, input_dim))
-
-    def build_model():
-      inputs = keras.layers.Input(
-          shape=[timestep, input_dim], dtype=dtypes.float32)
-      layer = keras.layers.UnifiedLSTM(
-          units,
-          use_bias=use_bias,
-          bias_initializer=bias_initializer)
-      output = layer(inputs)
-      return keras.models.Model(inputs, output), layer
-
-    model, layer = build_model()
-    y_ref = model.predict(x)
-    model.save_weights(h5_path)
-
-    cloned_model, new_layer = build_model()
-    cloned_model.load_weights(h5_path)
-    y = cloned_model.predict(x)
+    layer_class = keras.layers.UnifiedLSTM
+    k_constraint = keras.constraints.max_norm(0.01)
+    r_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_constraint=k_constraint,
+        recurrent_constraint=r_constraint,
+        bias_constraint=b_constraint)
+    layer.build((None, None, embedding_dim))
+    self.assertEqual(layer.cell.kernel.constraint, k_constraint)
+    self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
+    self.assertEqual(layer.cell.bias.constraint, b_constraint)
 
-    self.assertAllClose(y, y_ref)
-    self.assertAllClose(layer.get_weights(), new_layer.get_weights())
+    layer_class = keras.layers.UnifiedLSTM
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(input_shape=(3, 4)))
+    model.add(layer_class(units=5, return_sequences=True, unroll=False))
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
+  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_unified_lstm_output_on_multiple_kernel(self):
-    input_shape = 10
-    rnn_state_size = 8
-    timestep = 4
-    batch = 100
-
-    x_train = np.random.random((batch, timestep, input_shape))
-
-    inputs = keras.layers.Input(
-        shape=[timestep, input_shape], dtype=dtypes.float32)
-    with test_util.device(use_gpu=False):
-      layer = keras.layers.UnifiedLSTM(rnn_state_size)
-      output = layer(inputs)
-      cpu_model = keras.models.Model(inputs, output)
-      weights = cpu_model.get_weights()
-      y_1 = cpu_model.predict(x_train)
-
-    with test_util.device(use_gpu=True):
-      layer = keras.layers.UnifiedLSTM(rnn_state_size)
-      output = layer(inputs)
-      gpu_model = keras.models.Model(inputs, output)
-      gpu_model.set_weights(weights)
-      y_2 = gpu_model.predict(x_train)
-
-    # Note that CuDNN uses 'sigmoid' as activation, so the unified LSTM uses
-    # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve
-    # the same output.
-    with test_util.device(use_gpu=True):
-      layer = keras.layers.LSTM(rnn_state_size, recurrent_activation='sigmoid')
-      output = layer(inputs)
-      canonical_model = keras.models.Model(inputs, output)
-      # Remove the extra cudnn bias since canonical lstm will not use it.
-      canonical_model.set_weights(weights[:3])
-      y_3 = canonical_model.predict(x_train)
-
-    self.assertAllClose(y_1, y_2)
-    self.assertAllClose(y_2, y_3)
+  def test_masking_with_stacking_LSTM(self):
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(input_shape=(3, 4)))
+    model.add(keras.layers.UnifiedLSTM(10, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedLSTM(5, return_sequences=True, unroll=False))
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
+  # b/120911602
   @parameterized.named_parameters(
       # test_name, time_major, go_backwards
       ('normal', False, False),
@@ -339,8 +590,6 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllClose(y, y_ref)
 
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_keras_model_with_lstm(self):
     input_shape = 10
     rnn_state_size = 8
     output_shape = 8
@@ -367,52 +616,89 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
     model.evaluate(x_train, y_train)
     model.predict(x_train)
 
+  # b/120911602
+  @parameterized.named_parameters(
+      # test_name, use_bias, bias_initializer, activation
+      ('normal', True, 'zeros'),
+      ('no_bias', False, 'zeros'),
+      ('random_bias', True, 'random_uniform'),
+  )
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_return_sequences_LSTM(self):
-    num_samples = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 2
-    testing_utils.layer_test(
-        keras.layers.UnifiedLSTM,
-        kwargs={
-            'units': units,
-            'return_sequences': True
-        },
-        input_shape=(num_samples, timesteps, embedding_dim))
+  def test_unified_lstm_model_save_load(self, use_bias, bias_initializer):
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir)
+    h5_path = os.path.join(temp_dir, 'test.h5')
 
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_static_shape_inference_LSTM(self):
-    # Github issue: 15165
-    timesteps = 3
-    embedding_dim = 4
+    batch = 10
+    timestep = 3
+    input_dim = 5
     units = 2
 
-    model = keras.models.Sequential()
-    inputs = keras.layers.Dense(
-        embedding_dim, input_shape=(timesteps, embedding_dim))
-    model.add(inputs)
-    layer = keras.layers.UnifiedLSTM(units, return_sequences=True)
-    model.add(layer)
-    outputs = model.layers[-1].output
-    self.assertEqual(outputs.get_shape().as_list(), [None, timesteps, units])
+    x = np.random.random((batch, timestep, input_dim))
+
+    def build_model():
+      inputs = keras.layers.Input(
+          shape=[timestep, input_dim], dtype=dtypes.float32)
+      layer = keras.layers.UnifiedLSTM(
+          units,
+          use_bias=use_bias,
+          bias_initializer=bias_initializer)
+      output = layer(inputs)
+      return keras.models.Model(inputs, output), layer
 
+    model, layer = build_model()
+    y_ref = model.predict(x)
+    model.save_weights(h5_path)
+
+    cloned_model, new_layer = build_model()
+    cloned_model.load_weights(h5_path)
+    y = cloned_model.predict(x)
+
+    self.assertAllClose(y, y_ref)
+    self.assertAllClose(layer.get_weights(), new_layer.get_weights())
+
+  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_dynamic_behavior_LSTM(self):
-    num_samples = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 2
-    layer = keras.layers.UnifiedLSTM(units, input_shape=(None, embedding_dim))
-    model = keras.models.Sequential()
-    model.add(layer)
-    model.compile(gradient_descent.GradientDescentOptimizer(0.001), 'mse')
-    x = np.random.random((num_samples, timesteps, embedding_dim))
-    y = np.random.random((num_samples, units))
-    model.train_on_batch(x, y)
+  def test_unified_lstm_output_on_multiple_kernel(self):
+    input_shape = 10
+    rnn_state_size = 8
+    timestep = 4
+    batch = 100
+
+    x_train = np.random.random((batch, timestep, input_shape))
+
+    inputs = keras.layers.Input(
+        shape=[timestep, input_shape], dtype=dtypes.float32)
+    with test_util.device(use_gpu=False):
+      layer = keras.layers.UnifiedLSTM(rnn_state_size)
+      output = layer(inputs)
+      cpu_model = keras.models.Model(inputs, output)
+      weights = cpu_model.get_weights()
+    y_1 = cpu_model.predict(x_train)
+
+    with test_util.device(use_gpu=True):
+      layer = keras.layers.UnifiedLSTM(rnn_state_size)
+      output = layer(inputs)
+      gpu_model = keras.models.Model(inputs, output)
+      gpu_model.set_weights(weights)
+    y_2 = gpu_model.predict(x_train)
+
+    # Note that CuDNN uses 'sigmoid' as activation, so the unified LSTM uses
+    # 'sigmoid' as default. Construct the canonical LSTM with sigmoid to achieve
+    # the same output.
+    with test_util.device(use_gpu=True):
+      layer = keras.layers.LSTM(rnn_state_size, recurrent_activation='sigmoid')
+      output = layer(inputs)
+      canonical_model = keras.models.Model(inputs, output)
+      # Remove the extra cudnn bias since canonical lstm will not use it.
+      canonical_model.set_weights(weights[:3])
+    y_3 = canonical_model.predict(x_train)
+
+    self.assertAllClose(y_1, y_2)
+    self.assertAllClose(y_2, y_3)
 
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_dropout_LSTM(self):
+  def test_return_sequences_LSTM(self):
     num_samples = 2
     timesteps = 3
     embedding_dim = 4
@@ -421,360 +707,73 @@ class UnifiedLSTMTest(test.TestCase, parameterized.TestCase):
         keras.layers.UnifiedLSTM,
         kwargs={
             'units': units,
-            'dropout': 0.1,
-            'recurrent_dropout': 0.1
+            'return_sequences': True
         },
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  @parameterized.parameters([0, 1, 2])
+  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_implementation_mode_LSTM(self, implementation_mode):
+  def test_statefulness_LSTM(self):
     num_samples = 2
     timesteps = 3
     embedding_dim = 4
     units = 2
-    testing_utils.layer_test(
-        keras.layers.UnifiedLSTM,
-        kwargs={
-            'units': units,
-            'implementation': implementation_mode
-        },
-        input_shape=(num_samples, timesteps, embedding_dim))
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_constraints_LSTM(self):
-    embedding_dim = 4
     layer_class = keras.layers.UnifiedLSTM
-    k_constraint = keras.constraints.max_norm(0.01)
-    r_constraint = keras.constraints.max_norm(0.01)
-    b_constraint = keras.constraints.max_norm(0.01)
-    layer = layer_class(
-        5,
-        return_sequences=False,
-        weights=None,
-        input_shape=(None, embedding_dim),
-        kernel_constraint=k_constraint,
-        recurrent_constraint=r_constraint,
-        bias_constraint=b_constraint)
-    layer.build((None, None, embedding_dim))
-    self.assertEqual(layer.cell.kernel.constraint, k_constraint)
-    self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
-    self.assertEqual(layer.cell.bias.constraint, b_constraint)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_with_masking_layer_LSTM(self):
-    layer_class = keras.layers.UnifiedLSTM
-    inputs = np.random.random((2, 3, 4))
-    targets = np.abs(np.random.random((2, 3, 5)))
-    targets /= targets.sum(axis=-1, keepdims=True)
     model = keras.models.Sequential()
-    model.add(keras.layers.Masking(input_shape=(3, 4)))
-    model.add(layer_class(units=5, return_sequences=True, unroll=False))
-    model.compile(
-        loss='categorical_crossentropy',
-        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_stacking_LSTM(self):
-    inputs = np.random.random((2, 3, 4))
-    targets = np.abs(np.random.random((2, 3, 5)))
-    targets /= targets.sum(axis=-1, keepdims=True)
-    model = keras.models.Sequential()
-    model.add(keras.layers.UnifiedLSTM(10, return_sequences=True, unroll=False))
-    model.add(keras.layers.UnifiedLSTM(5, return_sequences=True, unroll=False))
-    model.compile(
-        loss='categorical_crossentropy',
-        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_masking_with_stacking_LSTM(self):
-    inputs = np.random.random((2, 3, 4))
-    targets = np.abs(np.random.random((2, 3, 5)))
-    targets /= targets.sum(axis=-1, keepdims=True)
-    model = keras.models.Sequential()
-    model.add(keras.layers.Masking(input_shape=(3, 4)))
-    model.add(keras.layers.UnifiedLSTM(10, return_sequences=True, unroll=False))
-    model.add(keras.layers.UnifiedLSTM(5, return_sequences=True, unroll=False))
-    model.compile(
-        loss='categorical_crossentropy',
-        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_from_config_LSTM(self):
-    layer_class = keras.layers.UnifiedLSTM
-    for stateful in (False, True):
-      l1 = layer_class(units=1, stateful=stateful)
-      l2 = layer_class.from_config(l1.get_config())
-      assert l1.get_config() == l2.get_config()
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_specify_initial_state_keras_tensor(self):
-    num_states = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 3
-    num_samples = 2
-
-    # Test with Keras tensor
-    inputs = keras.Input((timesteps, embedding_dim))
-    initial_state = [keras.Input((units,)) for _ in range(num_states)]
-    layer = keras.layers.UnifiedLSTM(units)
-    if len(initial_state) == 1:
-      output = layer(inputs, initial_state=initial_state[0])
-    else:
-      output = layer(inputs, initial_state=initial_state)
-    assert initial_state[0] in layer._inbound_nodes[0].input_tensors
-
-    model = keras.models.Model([inputs] + initial_state, output)
-    model.compile(
-        loss='categorical_crossentropy',
-        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    initial_state = [
-        np.random.random((num_samples, units)) for _ in range(num_states)
-    ]
-    targets = np.random.random((num_samples, units))
-    model.train_on_batch([inputs] + initial_state, targets)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def DISABLED_test_specify_initial_state_non_keras_tensor(self):
-    num_states = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 3
-    num_samples = 2
-
-    # Test with non-Keras tensor
-    inputs = keras.Input((timesteps, embedding_dim))
-    initial_state = [
-        keras.backend.random_normal_variable((num_samples, units), 0, 1)
-        for _ in range(num_states)
-    ]
-    layer = keras.layers.UnifiedLSTM(units)
-    output = layer(inputs, initial_state=initial_state)
-
-    model = keras.models.Model(inputs, output)
+    model.add(
+        keras.layers.Embedding(
+            4,
+            embedding_dim,
+            mask_zero=True,
+            input_length=timesteps,
+            batch_input_shape=(num_samples, timesteps)))
+    layer = layer_class(
+        units, return_sequences=False, stateful=True, weights=None)
+    model.add(layer)
     model.compile(
-        loss='categorical_crossentropy',
-        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01), loss='mse')
+    out1 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertEqual(out1.shape, (num_samples, units))
 
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    targets = np.random.random((num_samples, units))
-    model.train_on_batch(inputs, targets)
+    # train once so that the states change
+    model.train_on_batch(
+        np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
+    out2 = model.predict(np.ones((num_samples, timesteps)))
 
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_reset_states_with_values(self):
-    num_states = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 3
-    num_samples = 2
+    # if the state is not reset, output should be different
+    self.assertNotEqual(out1.max(), out2.max())
 
-    layer = keras.layers.UnifiedLSTM(units, stateful=True)
-    layer.build((num_samples, timesteps, embedding_dim))
+    # check that output changes after states are reset
+    # (even though the model itself didn't change)
     layer.reset_states()
-    assert len(layer.states) == num_states
-    assert layer.states[0] is not None
-    self.assertAllClose(
-        keras.backend.eval(layer.states[0]),
-        np.zeros(keras.backend.int_shape(layer.states[0])),
-        atol=1e-4)
-    state_shapes = [keras.backend.int_shape(state) for state in layer.states]
-    values = [np.ones(shape) for shape in state_shapes]
-    if len(values) == 1:
-      values = values[0]
-    layer.reset_states(values)
-    self.assertAllClose(
-        keras.backend.eval(layer.states[0]),
-        np.ones(keras.backend.int_shape(layer.states[0])),
-        atol=1e-4)
+    out3 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertNotEqual(out2.max(), out3.max())
 
-    # Test with invalid data
-    with self.assertRaises(ValueError):
-      layer.reset_states([1] * (len(layer.states) + 1))
+    # check that container-level reset_states() works
+    model.reset_states()
+    out4 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertAllClose(out3, out4, atol=1e-5)
 
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_specify_state_with_masking(self):
-    num_states = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 3
-    num_samples = 2
+    # check that the call to `predict` updated the states
+    out5 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertNotEqual(out4.max(), out5.max())
 
-    inputs = keras.Input((timesteps, embedding_dim))
-    _ = keras.layers.Masking()(inputs)
-    initial_state = [keras.Input((units,)) for _ in range(num_states)]
-    output = keras.layers.UnifiedLSTM(units)(
-        inputs, initial_state=initial_state)
-
-    model = keras.models.Model([inputs] + initial_state, output)
-    model.compile(
-        loss='categorical_crossentropy',
-        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    initial_state = [
-        np.random.random((num_samples, units)) for _ in range(num_states)
-    ]
-    targets = np.random.random((num_samples, units))
-    model.train_on_batch([inputs] + initial_state, targets)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_return_state(self):
-    num_states = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 3
-    num_samples = 2
-
-    inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    layer = keras.layers.UnifiedLSTM(units, return_state=True, stateful=True)
-    outputs = layer(inputs)
-    state = outputs[1:]
-    assert len(state) == num_states
-    model = keras.models.Model(inputs, state[0])
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    state = model.predict(inputs)
-    self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_state_reuse(self):
-    timesteps = 3
-    embedding_dim = 4
-    units = 3
-    num_samples = 2
-
-    inputs = keras.Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    layer = keras.layers.UnifiedLSTM(
-        units, return_state=True, return_sequences=True)
-    outputs = layer(inputs)
-    output, state = outputs[0], outputs[1:]
-    output = keras.layers.UnifiedLSTM(units)(output, initial_state=state)
-    model = keras.models.Model(inputs, output)
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    model.predict(inputs)
-
-  @test_util.run_in_graph_and_eager_modes(config=_config)
-  def test_initial_states_as_other_inputs(self):
-    timesteps = 3
-    embedding_dim = 4
-    units = 3
-    num_samples = 2
-    num_states = 2
-    layer_class = keras.layers.UnifiedLSTM
-
-    # Test with Keras tensor
-    main_inputs = keras.Input((timesteps, embedding_dim))
-    initial_state = [keras.Input((units,)) for _ in range(num_states)]
-    inputs = [main_inputs] + initial_state
-
-    layer = layer_class(units)
-    output = layer(inputs)
-    assert initial_state[0] in layer._inbound_nodes[0].input_tensors
-
-    model = keras.models.Model(inputs, output)
-    model.compile(
-        loss='categorical_crossentropy',
-        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-
-    main_inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    initial_state = [
-        np.random.random((num_samples, units)) for _ in range(num_states)
-    ]
-    targets = np.random.random((num_samples, units))
-    model.train_on_batch([main_inputs] + initial_state, targets)
+    # Check masking
+    layer.reset_states()
 
+    left_padded_input = np.ones((num_samples, timesteps))
+    left_padded_input[0, :1] = 0
+    left_padded_input[1, :2] = 0
+    out6 = model.predict(left_padded_input)
 
-@test_util.run_v1_only('b/120545219')
-class LSTMLayerGraphOnlyTest(test.TestCase):
+    layer.reset_states()
 
-  def test_statefulness_LSTM(self):
-    num_samples = 2
-    timesteps = 3
-    embedding_dim = 4
-    units = 2
-    layer_class = keras.layers.UnifiedLSTM
-    with self.cached_session(config=_config):
-      model = keras.models.Sequential()
-      model.add(
-          keras.layers.Embedding(
-              4,
-              embedding_dim,
-              mask_zero=True,
-              input_length=timesteps,
-              batch_input_shape=(num_samples, timesteps)))
-      layer = layer_class(
-          units, return_sequences=False, stateful=True, weights=None)
-      model.add(layer)
-      model.compile(
-          optimizer=gradient_descent.GradientDescentOptimizer(0.01), loss='mse')
-      out1 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertEqual(out1.shape, (num_samples, units))
-
-      # train once so that the states change
-      model.train_on_batch(
-          np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
-      out2 = model.predict(np.ones((num_samples, timesteps)))
-
-      # if the state is not reset, output should be different
-      self.assertNotEqual(out1.max(), out2.max())
-
-      # check that output changes after states are reset
-      # (even though the model itself didn't change)
-      layer.reset_states()
-      out3 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertNotEqual(out2.max(), out3.max())
-
-      # check that container-level reset_states() works
-      model.reset_states()
-      out4 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertAllClose(out3, out4, atol=1e-5)
-
-      # check that the call to `predict` updated the states
-      out5 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertNotEqual(out4.max(), out5.max())
-
-      # Check masking
-      layer.reset_states()
-
-      left_padded_input = np.ones((num_samples, timesteps))
-      left_padded_input[0, :1] = 0
-      left_padded_input[1, :2] = 0
-      out6 = model.predict(left_padded_input)
-
-      layer.reset_states()
-
-      right_padded_input = np.ones((num_samples, timesteps))
-      right_padded_input[0, -1:] = 0
-      right_padded_input[1, -2:] = 0
-      out7 = model.predict(right_padded_input)
-
-      self.assertAllClose(out7, out6, atol=1e-5)
+    right_padded_input = np.ones((num_samples, timesteps))
+    right_padded_input[0, -1:] = 0
+    right_padded_input[1, -2:] = 0
+    out7 = model.predict(right_padded_input)
 
-  def test_regularizers_LSTM(self):
-    embedding_dim = 4
-    layer_class = keras.layers.UnifiedLSTM
-    with self.cached_session(config=_config):
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_regularizer=keras.regularizers.l1(0.01),
-          recurrent_regularizer=keras.regularizers.l1(0.01),
-          bias_regularizer='l2',
-          activity_regularizer='l1')
-      layer.build((None, None, 2))
-      self.assertEqual(len(layer.losses), 3)
-      x = keras.backend.variable(np.ones((2, 3, 2)))
-      layer(x)
-      self.assertEqual(len(layer.get_losses_for(x)), 1)
+    self.assertAllClose(out7, out6, atol=1e-5)
 
 
 class UnifiedLSTMPerformanceTest(test.Benchmark):
-- 
GitLab


From 8b8adf8598b53503675447f391fdca7d0ed4f30a Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Wed, 12 Dec 2018 15:58:03 -0800
Subject: [PATCH 454/873] Update simplernn_test to use v2 mode.

Test case that can only run in v1 has bug attached.

PiperOrigin-RevId: 225271476
---
 .../python/keras/layers/simplernn_test.py     | 155 +++++++++---------
 1 file changed, 77 insertions(+), 78 deletions(-)

diff --git a/tensorflow/python/keras/layers/simplernn_test.py b/tensorflow/python/keras/layers/simplernn_test.py
index bb3fea2692..58f2f9a913 100644
--- a/tensorflow/python/keras/layers/simplernn_test.py
+++ b/tensorflow/python/keras/layers/simplernn_test.py
@@ -22,14 +22,15 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 from tensorflow.python.training import gradient_descent
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class SimpleRNNLayerTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class SimpleRNNLayerTest(keras_parameterized.TestCase):
 
   def test_return_sequences_SimpleRNN(self):
     num_samples = 2
@@ -118,93 +119,91 @@ class SimpleRNNLayerTest(test.TestCase):
       l2 = layer_class.from_config(l1.get_config())
       assert l1.get_config() == l2.get_config()
 
-
-class SimpleRNNLayerGraphOnlyTest(test.TestCase):
-
-  @tf_test_util.run_v1_only('b/120545219')
   def test_statefulness_SimpleRNN(self):
     num_samples = 2
     timesteps = 3
     embedding_dim = 4
     units = 2
     layer_class = keras.layers.SimpleRNN
-    with self.cached_session():
-      model = keras.models.Sequential()
-      model.add(
-          keras.layers.Embedding(
-              4,
-              embedding_dim,
-              mask_zero=True,
-              input_length=timesteps,
-              batch_input_shape=(num_samples, timesteps)))
-      layer = layer_class(
-          units, return_sequences=False, stateful=True, weights=None)
-      model.add(layer)
-      model.compile(optimizer=gradient_descent.GradientDescentOptimizer(0.01),
-                    loss='mse')
-      out1 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertEqual(out1.shape, (num_samples, units))
-
-      # train once so that the states change
-      model.train_on_batch(
-          np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
-      out2 = model.predict(np.ones((num_samples, timesteps)))
-
-      # if the state is not reset, output should be different
-      self.assertNotEqual(out1.max(), out2.max())
-
-      # check that output changes after states are reset
-      # (even though the model itself didn't change)
-      layer.reset_states()
-      out3 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertNotEqual(out2.max(), out3.max())
-
-      # check that container-level reset_states() works
-      model.reset_states()
-      out4 = model.predict(np.ones((num_samples, timesteps)))
-      np.testing.assert_allclose(out3, out4, atol=1e-5)
-
-      # check that the call to `predict` updated the states
-      out5 = model.predict(np.ones((num_samples, timesteps)))
-      self.assertNotEqual(out4.max(), out5.max())
-
-      # Check masking
-      layer.reset_states()
-
-      left_padded_input = np.ones((num_samples, timesteps))
-      left_padded_input[0, :1] = 0
-      left_padded_input[1, :2] = 0
-      out6 = model.predict(left_padded_input)
-
-      layer.reset_states()
-
-      right_padded_input = np.ones((num_samples, timesteps))
-      right_padded_input[0, -1:] = 0
-      right_padded_input[1, -2:] = 0
-      out7 = model.predict(right_padded_input)
-
-      np.testing.assert_allclose(out7, out6, atol=1e-5)
+    model = keras.models.Sequential()
+    model.add(
+        keras.layers.Embedding(
+            4,
+            embedding_dim,
+            mask_zero=True,
+            input_length=timesteps,
+            batch_input_shape=(num_samples, timesteps)))
+    layer = layer_class(
+        units, return_sequences=False, stateful=True, weights=None)
+    model.add(layer)
+    model.compile(optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                  loss='mse')
+    out1 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertEqual(out1.shape, (num_samples, units))
+
+    # train once so that the states change
+    model.train_on_batch(
+        np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
+    out2 = model.predict(np.ones((num_samples, timesteps)))
+
+    # if the state is not reset, output should be different
+    self.assertNotEqual(out1.max(), out2.max())
+
+    # check that output changes after states are reset
+    # (even though the model itself didn't change)
+    layer.reset_states()
+    out3 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertNotEqual(out2.max(), out3.max())
+
+    # check that container-level reset_states() works
+    model.reset_states()
+    out4 = model.predict(np.ones((num_samples, timesteps)))
+    np.testing.assert_allclose(out3, out4, atol=1e-5)
+
+    # check that the call to `predict` updated the states
+    out5 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertNotEqual(out4.max(), out5.max())
+
+    # Check masking
+    layer.reset_states()
 
+    left_padded_input = np.ones((num_samples, timesteps))
+    left_padded_input[0, :1] = 0
+    left_padded_input[1, :2] = 0
+    out6 = model.predict(left_padded_input)
+
+    layer.reset_states()
+
+    right_padded_input = np.ones((num_samples, timesteps))
+    right_padded_input[0, -1:] = 0
+    right_padded_input[1, -2:] = 0
+    out7 = model.predict(right_padded_input)
+
+    np.testing.assert_allclose(out7, out6, atol=1e-5)
+
+
+class SimpleRNNLayerGraphOnlyTest(test.TestCase):
+
+  # b/120919032
   @tf_test_util.run_deprecated_v1
   def test_regularizers_SimpleRNN(self):
     embedding_dim = 4
     layer_class = keras.layers.SimpleRNN
-    with self.cached_session():
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_regularizer=keras.regularizers.l1(0.01),
-          recurrent_regularizer=keras.regularizers.l1(0.01),
-          bias_regularizer='l2',
-          activity_regularizer='l1')
-      layer.build((None, None, 2))
-      self.assertEqual(len(layer.losses), 3)
-
-      x = keras.backend.variable(np.ones((2, 3, 2)))
-      layer(x)
-      self.assertEqual(len(layer.get_losses_for(x)), 1)
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        recurrent_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer='l2',
+        activity_regularizer='l1')
+    layer.build((None, None, 2))
+    self.assertEqual(len(layer.losses), 3)
+
+    x = keras.backend.variable(np.ones((2, 3, 2)))
+    layer(x)
+    self.assertEqual(len(layer.get_losses_for(x)), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 758fcb5909dc31ac6c2e0c424b1e412379d96e7b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 16:26:06 -0800
Subject: [PATCH 455/873] Allows Keras optimizer_v2's to be specified via
 string names in tf 1.x (And moves optimizer checks in eager to after the
 optimizer is deserialized)

PiperOrigin-RevId: 225276345
---
 tensorflow/python/keras/engine/saving_test.py | 15 +++++----
 tensorflow/python/keras/engine/training.py    |  6 ++--
 tensorflow/python/keras/models_test.py        |  3 +-
 .../python/keras/optimizer_v2/optimizer_v2.py |  9 +++--
 tensorflow/python/keras/optimizers.py         | 33 +++++++------------
 tensorflow/python/keras/optimizers_test.py    | 18 +++++-----
 6 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index bc33a3ea7f..f6ed3f45c4 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -30,8 +30,10 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.engine import saving
 from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
@@ -332,7 +334,6 @@ class TestWeightSavingAndLoading(test.TestCase, parameterized.TestCase):
 
 class TestWholeModelSaving(test.TestCase):
 
-  @test_util.run_v1_only('b/120545219')
   def test_sequential_model_saving(self):
     if h5py is None:
       self.skipTest('h5py required to run this test')
@@ -344,7 +345,7 @@ class TestWholeModelSaving(test.TestCase):
       model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
       model.compile(
           loss=keras.losses.MSE,
-          optimizer=keras.optimizers.RMSprop(lr=0.0001),
+          optimizer=rmsprop.RMSprop(lr=0.0001),
           metrics=[
               keras.metrics.categorical_accuracy,
               keras.metrics.CategoricalAccuracy()
@@ -383,7 +384,10 @@ class TestWholeModelSaving(test.TestCase):
 
       out = model.predict(x)
       out2 = new_model.predict(x)
-      self.assertAllClose(out, out2, atol=1e-05)
+
+      # TODO(b/120930751) This tolerance should be 1e-05,
+      # very concerning that its not.
+      self.assertAllClose(out, out2, atol=1e-03)
 
   @test_util.run_deprecated_v1
   def test_sequential_model_saving_without_input_shape(self):
@@ -635,8 +639,8 @@ class TestWholeModelSaving(test.TestCase):
       os.close(fd)
       os.remove(fname)
 
-  @test_util.run_v1_only('b/120545219')
   def test_saving_model_with_long_weights_names(self):
+    self.skipTest('b/120921503')
     if h5py is None:
       self.skipTest('h5py required to run this test')
 
@@ -756,14 +760,13 @@ class SubclassedModel(training.Model):
 
 class TestWeightSavingAndLoadingTFFormat(test.TestCase):
 
-  @test_util.run_v1_only('b/120545219')
   def test_keras_optimizer_warning(self):
     graph = ops.Graph()
     with graph.as_default(), self.session(graph):
       model = keras.models.Sequential()
       model.add(keras.layers.Dense(2, input_shape=(3,)))
       model.add(keras.layers.Dense(3))
-      model.compile(loss='mse', optimizer='adam', metrics=['acc'])
+      model.compile(loss='mse', optimizer=optimizers.Adam(), metrics=['acc'])
       model._make_train_function()
       temp_dir = self.get_temp_dir()
       prefix = os.path.join(temp_dir, 'ckpt')
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 65a5d00d74..38c8819c36 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -191,6 +191,7 @@ class Model(Network):
     """
     run_eagerly = kwargs.pop('run_eagerly', None)
     self._run_eagerly = run_eagerly
+    optimizer = optimizers.get(optimizer)
 
     # Validate that arguments passed by the user to `compile` are supported by
     # DistributionStrategy.
@@ -213,13 +214,14 @@ class Model(Network):
 
     loss = loss or {}
     if self.run_eagerly and not isinstance(
-        optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)):
+        optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer,
+                    optimizer_v2.OptimizerV2)):
       raise ValueError(
           'When running a model in eager execution, the optimizer must be an '
           'instance of tf.train.Optimizer. Received: '
           '%s' % optimizer)
 
-    self.optimizer = optimizers.get(optimizer)
+    self.optimizer = optimizer
     # We've disabled automatic dependency tracking for this method, but do want
     # to add a checkpoint dependency on the optimizer if it's checkpointable.
     if isinstance(self.optimizer, checkpointable.CheckpointableBase):
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index c466d94fed..fe7d8a5f59 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -31,6 +31,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import metrics
 from tensorflow.python.keras import models
+from tensorflow.python.keras import optimizers
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -481,7 +482,7 @@ class TestCloneAndBuildModel(test.TestCase):
     self.assert_optimizer_iterations_increases(adam.AdamOptimizer(0.01))
 
   def test_replace_keras_optimizer_iterations_variable(self):
-    self.assert_optimizer_iterations_increases('adam')
+    self.assert_optimizer_iterations_increases(optimizers.Adam())
 
   def test_clone_and_build_sequential_model_without_inputs_defined(self):
     with self.cached_session():
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index d3153141ec..874d0f7fe6 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -510,7 +510,12 @@ class OptimizerV2(checkpointable.CheckpointableBase):
     Returns:
         Python dictionary.
     """
-    return {"name": self._name}
+    config = {"name": self._name}
+    if hasattr(self, "clipnorm"):
+      config["clipnorm"] = self.clipnorm
+    if hasattr(self, "clipvalue"):
+      config["clipvalue"] = self.clipvalue
+    return config
 
   @classmethod
   def from_config(cls, config, custom_objects=None):
@@ -789,7 +794,7 @@ def _filter_grads(grads_and_vars):
   """Filter out iterable with grad equal to None."""
   grads_and_vars = tuple(grads_and_vars)
   if not grads_and_vars:
-    raise ValueError("No variables provided.")
+    return grads_and_vars
   filtered = []
   vars_with_empty_grads = []
   for grad, var in grads_and_vars:
diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index a558c2532b..decfcf993c 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -799,27 +799,18 @@ def deserialize(config, custom_objects=None):
   Returns:
       A Keras Optimizer instance.
   """
-  if tf2.enabled():
-    all_classes = {
-        'adadelta': adadelta_v2.Adadelta,
-        'adagrad': adagrad_v2.Adagrad,
-        'adam': adam_v2.Adam,
-        'adamax': adamax_v2.Adamax,
-        'nadam': nadam_v2.Nadam,
-        'rmsprop': rmsprop_v2.RMSprop,
-        'sgd': gradient_descent_v2.SGD
-    }
-  else:
-    all_classes = {
-        'adadelta': Adadelta,
-        'adagrad': Adagrad,
-        'adam': Adam,
-        'adamax': Adamax,
-        'nadam': Nadam,
-        'rmsprop': RMSprop,
-        'sgd': SGD,
-        'tfoptimizer': TFOptimizer
-    }
+  all_classes = {
+      'adadelta': adadelta_v2.Adadelta,
+      'adagrad': adagrad_v2.Adagrad,
+      'adam': adam_v2.Adam,
+      'adamax': adamax_v2.Adamax,
+      'nadam': nadam_v2.Nadam,
+      'rmsprop': rmsprop_v2.RMSprop,
+      'sgd': gradient_descent_v2.SGD
+  }
+  if not tf2.enabled():
+    all_classes['nadam'] = Nadam
+
   # Make deserialization case-insensitive for built-in optimizers.
   if config['class_name'].lower() in all_classes:
     config['class_name'] = config['class_name'].lower()
diff --git a/tensorflow/python/keras/optimizers_test.py b/tensorflow/python/keras/optimizers_test.py
index 77104a5d4d..33d65e690a 100644
--- a/tensorflow/python/keras/optimizers_test.py
+++ b/tensorflow/python/keras/optimizers_test.py
@@ -65,6 +65,15 @@ def _test_optimizer(optimizer, target=0.75):
   optim = keras.optimizers.deserialize(config)
   new_config = keras.optimizers.serialize(optim)
   new_config['class_name'] = new_config['class_name'].lower()
+  new_config['config'].pop('name', None)
+  if 'amsgrad' not in config['config']:
+    new_config['config'].pop('amsgrad', None)
+  if 'decay' in new_config['config'] and 'schedule_decay' in config['config']:
+    new_config['config']['schedule_decay'] = new_config['config'].pop('decay')
+  if 'momentum' not in config['config']:
+    new_config['config'].pop('momentum', None)
+  if 'centered' not in config['config']:
+    new_config['config'].pop('centered', None)
   assert config == new_config
 
   # Test constraints.
@@ -91,26 +100,22 @@ def _test_optimizer(optimizer, target=0.75):
 
 class KerasOptimizersTest(test.TestCase):
 
-  @test_util.run_v1_only('b/120545219')
   def test_sgd(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.SGD(lr=0.01,
                                            momentum=0.9,
                                            nesterov=True))
 
-  @test_util.run_v1_only('b/120545219')
   def test_rmsprop(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.RMSprop())
       _test_optimizer(keras.optimizers.RMSprop(decay=1e-3))
 
-  @test_util.run_v1_only('b/120545219')
   def test_adagrad(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adagrad())
       _test_optimizer(keras.optimizers.Adagrad(decay=1e-3))
 
-  @test_util.run_v1_only('b/120545219')
   def test_adadelta(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adadelta(), target=0.6)
@@ -119,32 +124,27 @@ class KerasOptimizersTest(test.TestCase):
       # the accuracy.
       _test_optimizer(keras.optimizers.Adadelta(decay=1e-3), target=0.4)
 
-  @test_util.run_v1_only('b/120545219')
   def test_adam(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adam())
       _test_optimizer(keras.optimizers.Adam(decay=1e-3))
       _test_optimizer(keras.optimizers.Adam(amsgrad=True))
 
-  @test_util.run_v1_only('b/120545219')
   def test_adamax(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adamax())
       _test_optimizer(keras.optimizers.Adamax(decay=1e-3))
 
-  @test_util.run_v1_only('b/120545219')
   def test_nadam(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Nadam())
 
-  @test_util.run_v1_only('b/120545219')
   def test_clipnorm(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.SGD(lr=0.01,
                                            momentum=0.9,
                                            clipnorm=0.5))
 
-  @test_util.run_v1_only('b/120545219')
   def test_clipvalue(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.SGD(lr=0.01,
-- 
GitLab


From 090c5ed0730ead8e52a7347ebe53150d4f691610 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Wed, 12 Dec 2018 16:26:53 -0800
Subject: [PATCH 456/873] Exclude contrib from the TF 2.0 pip packages.

PiperOrigin-RevId: 225276483
---
 tensorflow/BUILD                   |  6 ++-
 tensorflow/tools/pip_package/BUILD | 76 ++++++++++++++++++------------
 2 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index fd4b94202a..823ad8f506 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -606,9 +606,11 @@ py_library(
     name = "tensorflow_py",
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
-    deps = [
+    deps = select({
+        "api_version_2": [],
+        "//conditions:default": ["//tensorflow/contrib:contrib_py"],
+    }) + [
         ":tensorflow_py_no_contrib",
-        "//tensorflow/contrib:contrib_py",
         "//tensorflow/python/estimator:estimator_py",
     ],
 )
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index baacb87239..4ed2f6ce34 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -18,6 +18,13 @@ load(
     "if_ngraph",
 )
 
+# This flag specifies whether TensorFlow 2.0 API should be built instead
+# of 1.* API. Note that TensorFlow 2.0 API is currently under development.
+config_setting(
+    name = "api_version_2",
+    define_values = {"tf_api_version": "2"},
+)
+
 # This returns a list of headers of all public header libraries (e.g.,
 # framework, lib), and all of the transitive dependencies of those
 # public headers.  Not all of the headers returned by the filegroup
@@ -59,34 +66,6 @@ COMMON_PIP_DEPS = [
     "setup.py",
     ":included_headers",
     "//tensorflow:tensorflow_py",
-    "//tensorflow/contrib/autograph:autograph",
-    "//tensorflow/contrib/boosted_trees:boosted_trees_pip",
-    "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
-    "//tensorflow/contrib/compiler:xla",
-    "//tensorflow/contrib/constrained_optimization:constrained_optimization_pip",
-    "//tensorflow/contrib/eager/python/examples:examples_pip",
-    "//tensorflow/contrib/eager/python:evaluator",
-    "//tensorflow/contrib/gan:gan",
-    "//tensorflow/contrib/graph_editor:graph_editor_pip",
-    "//tensorflow/contrib/keras:keras",
-    "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip",
-    "//tensorflow/contrib/nn:nn_py",
-    "//tensorflow/contrib/predictor:predictor_pip",
-    "//tensorflow/contrib/proto:proto",
-    "//tensorflow/contrib/receptive_field:receptive_field_pip",
-    "//tensorflow/contrib/rate:rate",
-    "//tensorflow/contrib/rpc:rpc_pip",
-    "//tensorflow/contrib/session_bundle:session_bundle_pip",
-    "//tensorflow/contrib/signal:signal_py",
-    "//tensorflow/contrib/slim:slim",
-    "//tensorflow/contrib/slim/python/slim/data:data_pip",
-    "//tensorflow/contrib/slim/python/slim/nets:nets_pip",
-    "//tensorflow/contrib/specs:specs",
-    "//tensorflow/contrib/summary:summary_test_util",
-    "//tensorflow/contrib/tensor_forest:init_py",
-    "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip",
-    "//tensorflow/contrib/timeseries:timeseries_pip",
-    "//tensorflow/contrib/tpu",
     "//tensorflow/examples/tutorials/mnist:package",
     "//tensorflow/lite/python:interpreter_test_data",
     "//tensorflow/lite/python:tflite_convert",
@@ -122,13 +101,47 @@ COMMON_PIP_DEPS = [
     "//tensorflow/tools/dist_test/server:grpc_tensorflow_server",
 ]
 
+COMMON_PIP_DEPS_V1 = COMMON_PIP_DEPS + [
+    "//tensorflow/contrib/autograph:autograph",
+    "//tensorflow/contrib/boosted_trees:boosted_trees_pip",
+    "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip",
+    "//tensorflow/contrib/compiler:xla",
+    "//tensorflow/contrib/constrained_optimization:constrained_optimization_pip",
+    "//tensorflow/contrib/eager/python/examples:examples_pip",
+    "//tensorflow/contrib/eager/python:evaluator",
+    "//tensorflow/contrib/gan:gan",
+    "//tensorflow/contrib/graph_editor:graph_editor_pip",
+    "//tensorflow/contrib/keras:keras",
+    "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip",
+    "//tensorflow/contrib/nn:nn_py",
+    "//tensorflow/contrib/predictor:predictor_pip",
+    "//tensorflow/contrib/proto:proto",
+    "//tensorflow/contrib/receptive_field:receptive_field_pip",
+    "//tensorflow/contrib/rate:rate",
+    "//tensorflow/contrib/rpc:rpc_pip",
+    "//tensorflow/contrib/session_bundle:session_bundle_pip",
+    "//tensorflow/contrib/signal:signal_py",
+    "//tensorflow/contrib/slim:slim",
+    "//tensorflow/contrib/slim/python/slim/data:data_pip",
+    "//tensorflow/contrib/slim/python/slim/nets:nets_pip",
+    "//tensorflow/contrib/specs:specs",
+    "//tensorflow/contrib/summary:summary_test_util",
+    "//tensorflow/contrib/tensor_forest:init_py",
+    "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip",
+    "//tensorflow/contrib/timeseries:timeseries_pip",
+    "//tensorflow/contrib/tpu",
+]
+
 # On Windows, python binary is a zip file of runfiles tree.
 # Add everything to its data dependency for generating a runfiles tree
 # for building the pip package on Windows.
 py_binary(
     name = "simple_console_for_windows",
     srcs = ["simple_console_for_windows.py"],
-    data = COMMON_PIP_DEPS + ["//tensorflow/python:pywrap_tensorflow_import_lib_file"],
+    data = select({
+        "api_version_2": COMMON_PIP_DEPS,
+        "//conditions:default": COMMON_PIP_DEPS_V1,
+    }) + ["//tensorflow/python:pywrap_tensorflow_import_lib_file"],
     srcs_version = "PY2AND3",
     deps = ["//tensorflow:tensorflow_py"],
 )
@@ -232,7 +245,10 @@ sh_binary(
         "//tensorflow:windows": [
             ":simple_console_for_windows",
         ],
-        "//conditions:default": COMMON_PIP_DEPS + [
+        "api_version_2": COMMON_PIP_DEPS + [
+            ":simple_console",
+        ],
+        "//conditions:default": COMMON_PIP_DEPS_V1 + [
             ":simple_console",
         ],
     }) + if_mkl_ml(["//third_party/mkl:intel_binary_blob"]),
-- 
GitLab


From 2885c4bb67696f47baad8b921cb39bcb33b1f6c2 Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Wed, 12 Dec 2018 16:29:27 -0800
Subject: [PATCH 457/873] Export tf.train.* session_run_hook.py classes to
 tf.estimator.* (exporting to both v1 and v2). Keep the existing only in v1.

PiperOrigin-RevId: 225276892
---
 .../python/training/session_run_hook.py       |  2 +-
 ...nsorflow.estimator.-session-run-args.pbtxt | 27 ++++++++++++++++++
 ...rflow.estimator.-session-run-context.pbtxt | 25 +++++++++++++++++
 ...sorflow.estimator.-session-run-hook.pbtxt} |  2 +-
 ...orflow.estimator.-session-run-values.pbtxt | 27 ++++++++++++++++++
 .../api/golden/v1/tensorflow.estimator.pbtxt  | 16 +++++++++++
 ...nsorflow.estimator.-session-run-args.pbtxt | 27 ++++++++++++++++++
 ...rflow.estimator.-session-run-context.pbtxt | 25 +++++++++++++++++
 ...nsorflow.estimator.-session-run-hook.pbtxt | 28 +++++++++++++++++++
 ...orflow.estimator.-session-run-values.pbtxt | 27 ++++++++++++++++++
 .../api/golden/v2/tensorflow.estimator.pbtxt  | 16 +++++++++++
 .../api/golden/v2/tensorflow.train.pbtxt      |  4 ---
 tensorflow/tools/compatibility/renames_v2.py  |  7 +++--
 13 files changed, 224 insertions(+), 9 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-args.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-context.pbtxt
 rename tensorflow/tools/api/golden/{v2/tensorflow.train.-session-run-hook.pbtxt => v1/tensorflow.estimator.-session-run-hook.pbtxt} (95%)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-values.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-args.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-context.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-hook.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-values.pbtxt

diff --git a/tensorflow/python/training/session_run_hook.py b/tensorflow/python/training/session_run_hook.py
index e9a61def74..886ca46ed5 100644
--- a/tensorflow/python/training/session_run_hook.py
+++ b/tensorflow/python/training/session_run_hook.py
@@ -94,7 +94,7 @@ import collections
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("train.SessionRunHook")
+@tf_export(v1=["train.SessionRunHook"])
 class SessionRunHook(object):
   """Hook to extend calls to MonitoredSession.run()."""
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-args.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-args.pbtxt
new file mode 100644
index 0000000000..b375c74294
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-args.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.estimator.SessionRunArgs"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunArgs\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunArgs\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "feed_dict"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "fetches"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "options"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-context.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-context.pbtxt
new file mode 100644
index 0000000000..cb4ac9f50e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-context.pbtxt
@@ -0,0 +1,25 @@
+path: "tensorflow.estimator.SessionRunContext"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunContext\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "original_args"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "session"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stop_requested"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'original_args\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "request_stop"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-session-run-hook.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-hook.pbtxt
similarity index 95%
rename from tensorflow/tools/api/golden/v2/tensorflow.train.-session-run-hook.pbtxt
rename to tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-hook.pbtxt
index db1aa24acf..54e9ad9ed4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-session-run-hook.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-hook.pbtxt
@@ -1,4 +1,4 @@
-path: "tensorflow.train.SessionRunHook"
+path: "tensorflow.estimator.SessionRunHook"
 tf_class {
   is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
   is_instance: "<type \'object\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-values.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-values.pbtxt
new file mode 100644
index 0000000000..6788141696
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.-session-run-values.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.estimator.SessionRunValues"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunValues\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunValues\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "options"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "results"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "run_metadata"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt
index d3656ae045..6f57505afe 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.pbtxt
@@ -132,6 +132,22 @@ tf_module {
     name: "SecondOrStepTimer"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SessionRunArgs"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SessionRunContext"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SessionRunHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SessionRunValues"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "StepCounterHook"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-args.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-args.pbtxt
new file mode 100644
index 0000000000..b375c74294
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-args.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.estimator.SessionRunArgs"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunArgs\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunArgs\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "feed_dict"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "fetches"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "options"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-context.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-context.pbtxt
new file mode 100644
index 0000000000..cb4ac9f50e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-context.pbtxt
@@ -0,0 +1,25 @@
+path: "tensorflow.estimator.SessionRunContext"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunContext\'>"
+  is_instance: "<type \'object\'>"
+  member {
+    name: "original_args"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "session"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "stop_requested"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+    argspec: "args=[\'self\', \'original_args\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "request_stop"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-hook.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-hook.pbtxt
new file mode 100644
index 0000000000..54e9ad9ed4
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-hook.pbtxt
@@ -0,0 +1,28 @@
+path: "tensorflow.estimator.SessionRunHook"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunHook\'>"
+  is_instance: "<type \'object\'>"
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "after_create_session"
+    argspec: "args=[\'self\', \'session\', \'coord\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "after_run"
+    argspec: "args=[\'self\', \'run_context\', \'run_values\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "before_run"
+    argspec: "args=[\'self\', \'run_context\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "begin"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+  }
+  member_method {
+    name: "end"
+    argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=None"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-values.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-values.pbtxt
new file mode 100644
index 0000000000..6788141696
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-session-run-values.pbtxt
@@ -0,0 +1,27 @@
+path: "tensorflow.estimator.SessionRunValues"
+tf_class {
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunValues\'>"
+  is_instance: "<class \'tensorflow.python.training.session_run_hook.SessionRunValues\'>"
+  is_instance: "<type \'tuple\'>"
+  member {
+    name: "options"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "results"
+    mtype: "<type \'property\'>"
+  }
+  member {
+    name: "run_metadata"
+    mtype: "<type \'property\'>"
+  }
+  member_method {
+    name: "__init__"
+  }
+  member_method {
+    name: "count"
+  }
+  member_method {
+    name: "index"
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
index d3656ae045..6f57505afe 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
@@ -132,6 +132,22 @@ tf_module {
     name: "SecondOrStepTimer"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "SessionRunArgs"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SessionRunContext"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SessionRunHook"
+    mtype: "<type \'type\'>"
+  }
+  member {
+    name: "SessionRunValues"
+    mtype: "<type \'type\'>"
+  }
   member {
     name: "StepCounterHook"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
index cc63a7fd82..c72564e598 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt
@@ -72,10 +72,6 @@ tf_module {
     name: "ServerDef"
     mtype: "<class \'google.protobuf.pyext.cpp_message.GeneratedProtocolMessageType\'>"
   }
-  member {
-    name: "SessionRunHook"
-    mtype: "<type \'type\'>"
-  }
   member_method {
     name: "cosine_decay"
     argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], "
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index ad4c3d2750..2763a0ca63 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -625,9 +625,10 @@ renames = {
     'tf.train.Server': 'tf.distribute.Server',
     'tf.train.SessionCreator': 'tf.compat.v1.train.SessionCreator',
     'tf.train.SessionManager': 'tf.compat.v1.train.SessionManager',
-    'tf.train.SessionRunArgs': 'tf.compat.v1.train.SessionRunArgs',
-    'tf.train.SessionRunContext': 'tf.compat.v1.train.SessionRunContext',
-    'tf.train.SessionRunValues': 'tf.compat.v1.train.SessionRunValues',
+    'tf.train.SessionRunArgs': 'tf.estimator.SessionRunArgs',
+    'tf.train.SessionRunContext': 'tf.estimator.SessionRunContext',
+    'tf.train.SessionRunHook': 'tf.estimator.SessionRunHook',
+    'tf.train.SessionRunValues': 'tf.estimator.SessionRunValues',
     'tf.train.SingularMonitoredSession': 'tf.compat.v1.train.SingularMonitoredSession',
     'tf.train.StepCounterHook': 'tf.estimator.StepCounterHook',
     'tf.train.StopAtStepHook': 'tf.estimator.StopAtStepHook',
-- 
GitLab


From 03e7214049ea6ae802e36d3ffbf49b0e57f1a721 Mon Sep 17 00:00:00 2001
From: Kay Zhu <kayzhu@google.com>
Date: Wed, 12 Dec 2018 16:38:40 -0800
Subject: [PATCH 458/873] [TF2XLA] In Resampler correctly handle out of
 boundary samples by returning 0 for the backward pass.

Note out of boundary here means outside of (-1, image_size) index, instead of
(0, image_size -1). As a result the images will be padded with 0 before
gathering / scattering operation is performed, then sliced back to obtain the
actual results.

PiperOrigin-RevId: 225278400
---
 .../compiler/tf2xla/kernels/resampler_ops.cc  | 158 +++++++++++++++---
 .../resampler/xla/resampler_ops_xla_test.py   |  36 ++++
 2 files changed, 168 insertions(+), 26 deletions(-)

diff --git a/tensorflow/compiler/tf2xla/kernels/resampler_ops.cc b/tensorflow/compiler/tf2xla/kernels/resampler_ops.cc
index 54d34a38ab..f9985d5260 100644
--- a/tensorflow/compiler/tf2xla/kernels/resampler_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/resampler_ops.cc
@@ -125,7 +125,7 @@ XlaOp ConcatenateIota(xla::XlaBuilder* b, XlaOp indices,
   dimensions.back() = 1;
 
   auto batch_indices =
-      xla::Iota(b, xla::ShapeUtil::MakeShape(xla::U32, dimensions),
+      xla::Iota(b, xla::ShapeUtil::MakeShape(xla::S32, dimensions),
                 /*iota_dimension=*/0);
 
   return xla::ConcatInDim(b, {batch_indices, indices}, dimensions.size() - 1);
@@ -189,11 +189,53 @@ XlaOp ScatterToGradData(XlaOpKernelContext* ctx, XlaOp grad_data, XlaOp indices,
                       scatter_dim_numbers);
 }
 
+// Bounds samples to 0 if the warp image indices are out of the (-1, image_size)
+// bound.
+// The resulting dimension is given by 'result_dims'.
+XlaOp BoundSamples(XlaOpKernelContext* ctx, XlaOp warp,
+                   xla::PrimitiveType warp_type, TensorShape warp_shape,
+                   std::vector<int64> result_dims,
+                   std::vector<int64> broadcasted_dims, int64 last_warp_dim,
+                   xla::Shape data_shape, XlaOp sample) {
+  auto is_gt_minus_one =
+      xla::Gt(warp,
+              xla::ConvertElementType(
+                  xla::ConstantR1<float>(ctx->builder(), {-1, -1}), warp_type),
+              /*broadcast_dimensions=*/{warp_shape.dims() - 1});
+  auto is_lt_image_size = xla::Lt(
+      warp,
+      xla::ConvertElementType(
+          xla::ConstantR1<float>(
+              ctx->builder(),
+              {/*width=*/static_cast<float>(data_shape.dimensions(2)),
+               /*height=*/static_cast<float>(data_shape.dimensions(1))}),
+          warp_type),
+      /*broadcast_dimensions=*/{warp_shape.dims() - 1});
+
+  auto is_in_bound_padded_x_y = xla::And(is_gt_minus_one, is_lt_image_size);
+  // Reduce along last dimension. The resulting dimension is:
+  // [batch, dim_0, ...dim_n].
+  auto is_in_bound = xla::Reduce(
+      is_in_bound_padded_x_y, xla::ConstantR0<bool>(ctx->builder(), true),
+      xla::CreateScalarAndComputation(xla::PrimitiveType::PRED, ctx->builder()),
+      {last_warp_dim});
+
+  // Broadcast 'is_in_bound' to the same dimension as 'result_dims'.
+  auto broadcasted_is_in_bound =
+      xla::BroadcastInDim(is_in_bound, result_dims, broadcasted_dims);
+
+  // Set out of bound samples to zero.
+  auto zeros =
+      xla::Broadcast(xla::Zero(ctx->builder(), warp_type), result_dims);
+  return xla::Select(broadcasted_is_in_bound, sample, zeros);
+}
+
 // Build computation the backprop into input 'data'.
 // Where input:
 // grad_output is of dimension [batch, dim_0, ...dim_n, channel]
 // ratio is of dimension [batch, dim_0, ...dim_n, 2]
 // gather_indices is of dimension [batch, dim_0, ...dim_n, 3]
+// data_shape is of dimension [batch, x(width), y(height), channel]
 //
 // Output:
 // scatter-add to each 2x2 grad_data neighbor:
@@ -201,10 +243,12 @@ XlaOp ScatterToGradData(XlaOpKernelContext* ctx, XlaOp grad_data, XlaOp indices,
 //  grad_data[cx, fy, chan] += output_grad * (1 - dx) * dy
 //  grad_data[fx, cy, chan] += output_grad * dx * (1 - dy)
 //  grad_data[cx, cy, chan] += output_grad * (1 - dx) * (1 - dy)
-// where (dx, dy) is (1 - ratio).
+// where (dx, dy) is (1 - ratio). If (dx, dy) is out of bound, then the their
+// contribution is 0 to 'grad_data'.
 XlaOp CalculateGradData(XlaOpKernelContext* ctx, XlaOp grad_output, XlaOp ratio,
-                        XlaOp gather_indices, xla::PrimitiveType warp_type,
-                        TensorShape warp_shape, int64 data_channels,
+                        XlaOp gather_indices, XlaOp warp,
+                        xla::PrimitiveType warp_type, TensorShape warp_shape,
+                        int64 last_warp_dim, int64 data_channels,
                         xla::Shape data_shape) {
   // Weights tensor has dimension [batch, dim_0, ... dim_n, 4].
   auto weights = BilinearWeights(ctx, ratio, warp_shape, warp_type);
@@ -229,6 +273,18 @@ XlaOp CalculateGradData(XlaOpKernelContext* ctx, XlaOp grad_output, XlaOp ratio,
   std::iota(reshaped_weights_indices.begin(), reshaped_weights_indices.end(),
             0);
 
+  // Set out of bound weights to 0.
+  // The dimension of the reshaped_weight: [batch, dim_0, ...dim_n, 2, 2].
+  std::vector<int64> reshaped_result_dims(warp_dims.begin(),
+                                          warp_dims.end() - 1);
+  reshaped_result_dims.push_back(2);
+  reshaped_result_dims.push_back(2);
+  std::vector<int64> broadcasted_dims(warp_dims.size() - 1);
+  std::iota(broadcasted_dims.begin(), broadcasted_dims.end(), 0);
+  reshaped_weights = BoundSamples(ctx, warp, warp_type, warp_shape,
+                                  reshaped_result_dims, broadcasted_dims,
+                                  last_warp_dim, data_shape, reshaped_weights);
+
   // The dimension is [batch, dim_0, ..., dim_n, 2, 2, data_channel].
   auto broadcast_reshaped_weights = xla::BroadcastInDim(
       reshaped_weights, weights_with_channels_dims, reshaped_weights_indices);
@@ -245,18 +301,41 @@ XlaOp CalculateGradData(XlaOpKernelContext* ctx, XlaOp grad_output, XlaOp ratio,
   auto grad_data = xla::ConstantLiteral(
       ctx->builder(), xla::Literal::CreateFromShape(data_shape));
 
-  return ScatterToGradData(ctx, grad_data, gather_indices,
-                           grad_output_multiply_weights, warp_shape.dims(),
-                           warp_type);
+  // Pad grad data then slice it back.
+  //
+  // After left and right column 0-padding, the new dimension of padded data
+  // will be [batch, x+2, y+2, channel].
+  auto padded_grad_data =
+      xla::Pad(grad_data, xla::Zero(ctx->builder(), warp_type),
+               xla::MakeEdgePaddingConfig({{0, 0}, {1, 1}, {1, 1}, {0, 0}}));
+
+  auto shifting_value = xla::ConstantR1<int32>(
+      ctx->builder(), {/*batch=*/0, /*x(width)=*/1, /*y(height)=*/1});
+  auto shifted_gather_indices =
+      xla::Add(gather_indices, shifting_value, {last_warp_dim});
+
+  auto updated_grad_data = ScatterToGradData(
+      ctx, padded_grad_data, shifted_gather_indices,
+      grad_output_multiply_weights, warp_shape.dims(), warp_type);
+
+  const int64 batch_size = data_shape.dimensions(0);
+  const int64 width = data_shape.dimensions(1);
+  const int64 height = data_shape.dimensions(2);
+  // Slice out the result accounting for the padding.
+  return xla::Slice(
+      updated_grad_data, /*start_indices=*/{0, 1, 1, 0},
+      /*limit_indices=*/{batch_size, width + 1, height + 1, data_channels},
+      /*strides=*/{1, 1, 1, 1});
 }
 
 // Build computation for the backprop into input 'warp'.
 // Where input:
-// warp is of dimension [batch, dim_0, ...dim_n, 2]
-// grad_output is of dimension [batch, dim_0, ...dim_n, channel]
-// ratio is of dimension [batch, dim_0, ...dim_n, 2]
-// gather_indices is of dimension [batch, dim_0, ...dim_n, 3]
-// data is of dimension [batch, x, y, channel]
+//  warp is of dimension [batch, dim_0, ...dim_n, 2]
+//  grad_output is of dimension [batch, dim_0, ...dim_n, channel]
+//  ratio is of dimension [batch, dim_0, ...dim_n, 2]
+//  gather_indices is of dimension [batch, dim_0, ...dim_n, 3] where the last
+//  dimension of size 3 is for {batch, x(width), y(height)}.
+//  data is of dimension [batch, x, y, channel]
 //
 // Output (simplified by ignoring the batch dimensions):
 // Since the forward path has:
@@ -275,12 +354,12 @@ XlaOp CalculateGradData(XlaOpKernelContext* ctx, XlaOp grad_output, XlaOp ratio,
 //    grad_warp_x = py * (img_cxcy - img_fxcy) + (1-py) * (img_cxfy-img_fxfy)
 //    grad_warp_y = px * (img_cxcy - img_cxfy) + (1-px) * (img_fxcy-img_fxfy)
 //
-// where (px, py) is warp, (fx, fy) is the left top corner and (cx, cy) is the
+// where (px, py) is warp, (fx, fy) is the top left corner and (cx, cy) is the
 // bottom right corner in a 2x2 neighborhood.
 XlaOp CalculateGradWarp(XlaOpKernelContext* ctx, XlaOp grad_output, XlaOp ratio,
                         XlaOp gather_indices, XlaOp data,
                         TensorShape warp_shape, int64 data_channels,
-                        xla::PrimitiveType data_type) {
+                        xla::PrimitiveType data_type, xla::Shape data_shape) {
   auto warp_dims = warp_shape.dim_sizes();
   std::vector<int64> warp_dims_without_last_dims(warp_dims.begin(),
                                                  warp_dims.end() - 1);
@@ -289,12 +368,30 @@ XlaOp CalculateGradWarp(XlaOpKernelContext* ctx, XlaOp grad_output, XlaOp ratio,
   std::vector<int64> neighbor_broadcast_dims = warp_dims_without_last_dims;
   neighbor_broadcast_dims.push_back(4);
 
-  // The dimension is [batch, dim_0, ... dim_n, 4, data_channels]
-  auto neighbors_data = Gather2by2Neighbors(
-      ctx->builder(), data, gather_indices, data_channels, warp_shape.dims());
+  // With dimension [batch, dim_0, ...dim_n, 4]
+  auto neighbor_broadcast_shape =
+      xla::ShapeUtil::MakeShape(data_type, neighbor_broadcast_dims);
 
   const int64 last_warp_dim = warp_shape.dims() - 1;
 
+  // Pad data with 0, before gathering such that 0 will be returned for samples
+  // in the range of (-1, 0) or (image_dimension-1, image_dimension).
+  // After left and right column 0-padding, the new dimension of padded data
+  // will be [batch, x+2, y+2, channel].
+  auto padded_data =
+      xla::Pad(data, xla::Zero(ctx->builder(), data_type),
+               xla::MakeEdgePaddingConfig({{0, 0}, {1, 1}, {1, 1}, {0, 0}}));
+
+  auto shifting_value = xla::ConstantR1<int32>(
+      ctx->builder(), {/*batch=*/0, /*x(width)=*/1, /*y(height)=*/1});
+  auto shifted_gather_indices =
+      xla::Add(gather_indices, shifting_value, {last_warp_dim});
+
+  // The dimension is [batch, dim_0, ... dim_n, 4, data_channels]
+  auto neighbors_data =
+      Gather2by2Neighbors(ctx->builder(), padded_data, shifted_gather_indices,
+                          data_channels, warp_shape.dims());
+
   // Since we will be creating the dot product of:
   //  lhs: [batch, dim_0, ...dim_n, 4]
   // and
@@ -417,7 +514,7 @@ class ResamplerOp : public XlaOpKernel {
     // Find the coordinates of the top left corner for the 2x2 region to be
     // sampled from. The dimensions are [batch, dim_0, ... dim_n, 2] where the
     // last dimension of size 2 in turn is [x, y].
-    XlaOp top_left = xla::ConvertElementType(warp, xla::U32);
+    XlaOp top_left = xla::ConvertElementType(warp, xla::S32);
 
     auto gather_indices = ConcatenateIota(ctx->builder(), top_left, warp_shape);
 
@@ -526,7 +623,8 @@ class ResamplerGradOp : public XlaOpKernel {
                                           size, "]"));
     }
     // Last dimension of warp shape must be of size 2.
-    OP_REQUIRES(ctx, warp_shape.dim_size(warp_shape.dims() - 1) == 2,
+    const int64 last_warp_dim = warp_shape.dims() - 1;
+    OP_REQUIRES(ctx, warp_shape.dim_size(last_warp_dim) == 2,
                 errors::InvalidArgument(
                     "the last dimension of warp must be exactly size 2."));
     xla::PrimitiveType warp_type = ctx->input_xla_type(1);
@@ -549,24 +647,32 @@ class ResamplerGradOp : public XlaOpKernel {
     // Find the top left corner coordinate for the region to be sampled from.
     // The dimensions are [batch, dim_0, ... dim_n, 2] where the last dimension
     // of size 2 in turn is [x, y].
-    XlaOp top_left = xla::ConvertElementType(warp, xla::U32);
+    XlaOp top_left = xla::ConvertElementType(xla::Floor(warp), xla::S32);
 
-    // Dimensions are [batch, dim_0, ... dim_n, 2]
+    // Dimensions are [batch, dim_0, ... dim_n, 2].
     XlaOp ratio = warp - xla::ConvertElementType(top_left, warp_type);
 
     // Indices for gathering neighboring pixels.
     auto gather_indices = ConcatenateIota(ctx->builder(), top_left, warp_shape);
 
-    auto grad_data =
-        CalculateGradData(ctx, grad_output, ratio, gather_indices, warp_type,
-                          warp_shape, data_channels, data_shape);
+    auto grad_data = CalculateGradData(
+        ctx, grad_output, ratio, gather_indices, warp, warp_type, warp_shape,
+        last_warp_dim, data_channels, data_shape);
 
     auto grad_warp =
         CalculateGradWarp(ctx, grad_output, ratio, gather_indices, data,
-                          warp_shape, data_channels, data_type);
+                          warp_shape, data_channels, data_type, data_shape);
+    auto warp_dims = warp_shape.dim_sizes();
+    std::vector<int64> result_dims(warp_dims.begin(), warp_dims.end() - 1);
+    result_dims.push_back(2);
+    std::vector<int64> broadcasted_dims(warp_dims.size() - 1);
+    std::iota(broadcasted_dims.begin(), broadcasted_dims.end(), 0);
+    auto grad_warp_bounded =
+        BoundSamples(ctx, warp, warp_type, warp_shape, result_dims,
+                     broadcasted_dims, last_warp_dim, data_shape, grad_warp);
 
     ctx->SetOutput(0, grad_data);
-    ctx->SetOutput(1, grad_warp);
+    ctx->SetOutput(1, grad_warp_bounded);
   }
 };
 
diff --git a/tensorflow/contrib/resampler/xla/resampler_ops_xla_test.py b/tensorflow/contrib/resampler/xla/resampler_ops_xla_test.py
index d8ca0eab27..cec4c3c233 100644
--- a/tensorflow/contrib/resampler/xla/resampler_ops_xla_test.py
+++ b/tensorflow/contrib/resampler/xla/resampler_ops_xla_test.py
@@ -164,6 +164,15 @@ class ResamplerOpsTest(xla_test.XLATestCase):
       expected = [[[0.0], [27.62]]]
       self._assertForwardOpMatchesExpected(input_np, warp_np, expected)
 
+      expected_grad_data = [[[[0.12], [0.27999997]], [[0.18000001],
+                                                      [0.42000002]]]]
+      expected_grad_warp = [[[0., 0.], [22.60000038, 35.20000076]]]
+
+      grad_output = np.ones([1, 2, 1], dtype=dtype)
+      self._assertBackwardOpMatchesExpected(input_np, warp_np, grad_output,
+                                            expected_grad_data,
+                                            expected_grad_warp)
+
     # One of (x, y) is less than 0.
     for dtype in self.float_types:
       input_shape = [1, 2, 2, 1]
@@ -171,11 +180,21 @@ class ResamplerOpsTest(xla_test.XLATestCase):
       input_np = np.array(input_data, dtype=dtype).reshape(input_shape)
 
       warp_shape = [1, 2, 2]
+      # -1 is out of bound for grad_warp.
       warp_data = [-1, 0.1, 0.7, 0.6]
       warp_np = np.array(warp_data, dtype=dtype).reshape(warp_shape)
       expected = [[[0.0], [27.62]]]
       self._assertForwardOpMatchesExpected(input_np, warp_np, expected)
 
+      expected_grad_data = [[[[0.12], [0.27999997]], [[0.18000001],
+                                                      [0.42000002]]]]
+      expected_grad_warp = [[[0., 0.], [22.60000038, 35.20000076]]]
+
+      grad_output = np.ones([1, 2, 1], dtype=dtype)
+      self._assertBackwardOpMatchesExpected(input_np, warp_np, grad_output,
+                                            expected_grad_data,
+                                            expected_grad_warp)
+
     # Both of (x, y) are greater than image size.
     for dtype in self.float_types:
       input_shape = [1, 2, 2, 1]
@@ -183,11 +202,20 @@ class ResamplerOpsTest(xla_test.XLATestCase):
       input_np = np.array(input_data, dtype=dtype).reshape(input_shape)
 
       warp_shape = [1, 2, 2]
+      # -0.1 is *inbound* for grad_warp and grad_data, 2.1 is out of bound.
       warp_data = [-0.1, 0.1, 1.2, 2.1]
       warp_np = np.array(warp_data, dtype=dtype).reshape(warp_shape)
       expected = [[[0.0], [0.0]]]
       self._assertForwardOpMatchesExpected(input_np, warp_np, expected)
 
+      expected_grad_data = [[[[0.81], [0.0]], [[0.09], [0.0]]]]
+      expected_grad_warp = [[[10.30, 2.7], [0.0, 0.0]]]
+
+      grad_output = np.ones([1, 2, 1], dtype=dtype)
+      self._assertBackwardOpMatchesExpected(input_np, warp_np, grad_output,
+                                            expected_grad_data,
+                                            expected_grad_warp)
+
     # One of (x, y) is greater than image size.
     for dtype in self.float_types:
       input_shape = [1, 2, 2, 1]
@@ -200,6 +228,14 @@ class ResamplerOpsTest(xla_test.XLATestCase):
       expected = [[[0.0], [0.0]]]
       self._assertForwardOpMatchesExpected(input_np, warp_np, expected)
 
+      expected_grad_data = [[[[0.81], [0.81]], [[0.0], [0.08]]]]
+      expected_grad_warp = [[[-4.5, 9.5], [-9.9, 39.20]]]
+
+      grad_output = np.ones([1, 2, 1], dtype=dtype)
+      self._assertBackwardOpMatchesExpected(input_np, warp_np, grad_output,
+                                            expected_grad_data,
+                                            expected_grad_warp)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 1bcae5d84b937ea17b70ff25824ea292b8d95f4f Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Mon, 26 Nov 2018 09:31:56 -0800
Subject: [PATCH 459/873] StridedSlice op + some unit tests

Fix typo

Refactor. Add Ok unit tests

Improve unit tests, comments.
---
 .../contrib/tensorrt/convert/convert_graph.cc |  65 ++--
 .../contrib/tensorrt/convert/convert_nodes.cc | 251 ++++++++++++++-
 .../tensorrt/convert/convert_nodes_test.cc    | 302 +++++++++++++++++-
 3 files changed, 582 insertions(+), 36 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index ae211a93c3..623cd79f32 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -89,51 +89,52 @@ Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) {
   // TODO(laigd): move this set to TrtNodeValidator where it should belong.
   // LINT.IfChange
   static const std::set<string> candidate_ops = {
-      "Identity",
-      "Snapshot",
-      "Const",
-      "Conv2D",
-      "MaxPool",
-      "BiasAdd",
-      "Relu",
-      "Sigmoid",
-      "Tanh",
+      "Abs",
       "Add",
-      "Mul",
-      "Sub",
-      "Rsqrt",
-      "Pad",
-      "Mean",
       "AvgPool",
+      "BatchMatMul",
+      "BiasAdd",
       "ConcatV2",
+      "Const",
+      "Conv2D",
       "DepthwiseConv2dNative",
-      "FusedBatchNorm",
-      "FusedBatchNormV2",
       "Div",
-      "RealDiv",
-      "Rsqrt",
-      "Reciprocal",
       "Exp",
+      "ExpandDims",
+      "FusedBatchNorm",
+      "FusedBatchNormV2",
+      "Identity",
       "Log",
-      "Sqrt",
-      "Abs",
-      "Neg",
-      "Transpose",
-      "Reshape",
       "MatMul",
-      "BatchMatMul",
-      "Softmax",
-      "Minimum",
-      "Maximum",
-      "TopKV2",
-      "Sum",
-      "Prod",
       "Max",
+      "MaxPool",
+      "Maximum",
+      "Mean",
       "Min",
+      "Minimum",
+      "Mul",
+      "Neg",
+      "Pad",
+      "Prod",
+      "RealDiv",
+      "Reciprocal",
+      "Relu",
       "Relu6",
+      "Reshape",
+      "Rsqrt",
+      "Rsqrt",
+      "Sigmoid",
+      "Snapshot",
+      "Softmax",
+      "Sqrt",
       "Square",
-      "ExpandDims",
       "Squeeze",
+      "StridedSlice",
+      "Sub",
+      "Sum",
+      "Tanh",
+      "TopKV2",
+      "Transpose",
   };
   bool is_supported_op_type =
       (candidate_ops.count(node->type_string()) ||
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 561ea37dae..fdecfe5928 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -632,6 +632,11 @@ bool TFAttrs::get<bool>(const string& key) const {
   return this->at(key)->b();
 }
 
+template <>
+int TFAttrs::get<int>(const string& key) const {
+  return this->at(key)->i();
+}
+
 // TODO(jie): reorder4 & reorder2 should be merged?
 // TODO(aaroey): fix the order of parameters.
 template <typename T>
@@ -2028,6 +2033,245 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
   return tensorflow::Status::OK();
 }
 
+tensorflow::Status GetStridedSliceBound(
+    const std::vector<int>& input_dims,
+    const TRT_ShapedWeights& bound_weights,
+    string bound_name,
+    string node_name,
+    std::vector<int>& output_bound) {
+  const int* weights_ptr =
+      static_cast<int*>(const_cast<void*>(bound_weights.GetValues()));
+  output_bound = std::vector<int>(weights_ptr,
+                                  weights_ptr + bound_weights.count());
+  if (output_bound.size() != input_dims.size()) {
+    return tensorflow::errors::InvalidArgument(
+        "StridedSlice \"", bound_name, "\" specified ",
+        std::to_string(output_bound.size()), " dimensions, but input rank is ",
+        std::to_string(input_dims.size()), ", at ", node_name);
+  }
+  for (int i = 0; i < output_bound.size(); i++) {
+    // Make sure bound is valid.
+    if ((output_bound[i] < -input_dims[i]) ||
+        (output_bound[i] > input_dims[i])) {
+      return tensorflow::errors::InvalidArgument(
+          bound_name, " for StridedSlice is invalid, must be in the range "
+          "[-rank(input), rank(input)], at ", node_name);
+    }
+    // Convert negative values to their positive equivalent.
+    if (output_bound[i] < 0) {
+      output_bound[i] += input_dims[i];
+    }
+  }
+  return tensorflow::Status::OK();
+}
+
+tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
+  const auto& inputs = params->inputs;
+  const auto& node_def = params->node_def;
+  if (inputs.size() != 4) {
+    return tensorflow::errors::InvalidArgument(
+        "StridedSlice expects 4 inputs, at ", node_def.name());
+  }
+  if (!inputs.at(1).is_weights() ||
+      !inputs.at(2).is_weights() ||
+      !inputs.at(3).is_weights()) {
+    return tensorflow::errors::InvalidArgument(
+        "StridedSlice expects weights for begin, end, and strides, at ",
+        node_def.name());
+  }
+  if (!inputs.at(0).is_tensor()) {
+    return tensorflow::errors::Unimplemented(
+        "StridedSlice is only implemented for tensors, at ",
+        node_def.name());
+  }
+  // Get input dims.
+  nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
+  std::vector<int> input_dims(dims.d, dims.d + dims.nbDims);
+  if (inputs.at(0).is_tensor()) {
+    // Temporarily add batch dimension so that indexes line up properly.
+    input_dims.insert(input_dims.begin(), inputs.at(0).batch_size());
+  }
+  if (input_dims.size() > 4) {
+    return tensorflow::errors::Unimplemented(
+      "StridedSlice is not implemented for tensors with rank > 4, at ", 
+      node_def.name());
+  }
+  TFAttrs attrs(node_def);
+  // Get begin and end bounds per axis.
+  std::vector<int> begin, end;
+  TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(1).weights(),
+                                          "begin", node_def.name(), begin));
+  TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(2).weights(),
+                                          "end", node_def.name(), end));
+  int begin_mask = attrs.get<int>("begin_mask");
+  for (int i = 0; i < begin.size(); i++) {
+    if ((1 << i) & begin_mask) {
+      begin[i] = 0;
+    }
+  }
+  int end_mask = attrs.get<int>("end_mask");
+  for (int i = 0; i < end.size(); i++) {
+    if ((1 << i) & end_mask) {
+      end[i] = input_dims[i];
+    }
+  }
+  // Get strides per axis (must all be 1).
+  TRT_ShapedWeights stride_weights = inputs.at(3).weights();
+  const int* stride_weights_ptr =
+      static_cast<int*>(const_cast<void*>(stride_weights.GetValues()));
+  std::vector<int> strides(stride_weights_ptr,
+                           stride_weights_ptr + stride_weights.count());
+  for (int x : strides) {
+    if (x != 1) {
+      return tensorflow::errors::Unimplemented(
+        "StridedSlice is only implemented for stride of 1, at ", 
+        node_def.name());
+    }
+  }
+  // Unsupported options.
+  for (string attr : {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) {
+    int ellipsis_mask = attrs.get<int>(attr);
+    if (ellipsis_mask != 0) {
+      return tensorflow::errors::Unimplemented(
+        attr, " is not implemented for StridedSlice, at ", 
+        node_def.name());
+    }
+  }
+
+  nvinfer1::ITensor* tensor = const_cast<nvinfer1::ITensor*>(
+      inputs.at(0).tensor());
+  // Reshape if necessary to 4-D.
+  const bool need_reshape = (input_dims.size() != 4);
+  int reshape_dims_added = 0;
+  nvinfer1::Dims reshape_dims; 
+  if (need_reshape) {
+    // Add new dims after batch dim until tensor is 4D.
+    while (input_dims.size() < 4) {
+      input_dims.insert(input_dims.begin()+1, 1);
+      begin.insert(begin.begin()+1, 0);
+      end.insert(end.begin()+1, 1);
+      reshape_dims_added++;
+    }
+    reshape_dims = VectorToTrtDims(input_dims, /*ignore_first_dim=*/true);
+  }
+  // Find dimensions which need to be sliced.
+  std::vector<int> pad_dims;
+  for (int i = 0; i < input_dims.size(); i++) {
+    if (begin[i] != 0 || (end[i] - input_dims[i]) != 0) {
+      if (i == 0) {
+        return tensorflow::errors::Unimplemented(
+            "StridedSlice can't modify batch dim, at ", node_def.name());
+      }
+      else if ((end[i] - begin[i]) < 0) {
+        LOG(INFO) << begin[i] << ", " << end[i];
+        return tensorflow::errors::InvalidArgument(
+            "New size of sliced dimension is negative, at ", node_def.name());
+      }
+      pad_dims.push_back(i);
+    }
+  }
+  if (pad_dims.size() == 0) {
+    // No dimensions are changed. We could create a padding layer anyway with
+    // values of 0.
+    if (params->validation_only) return Status::OK();
+    params->outputs->push_back(inputs.at(0));
+    return tensorflow::Status::OK();
+  } else if (pad_dims.size() == 1) {
+    // Only one dim is modified but we have to have 2, mark a second dim which
+    // will have padding of 0.
+    if (pad_dims[0] == 1 || pad_dims[0] == 3) {
+      pad_dims.push_back(2);
+    } else if (pad_dims[0] == 2) {
+      pad_dims.push_back(3);
+    }
+  } else if (pad_dims.size() > 2) {
+    return tensorflow::errors::Unimplemented(
+      "StridedSlice can only modify 2 dimensions, at ", 
+      node_def.name());
+  }
+  std::sort(pad_dims.begin(), pad_dims.end());
+  // Convert to pre/post padding values.
+  nvinfer1::DimsHW pre_padding, post_padding;
+  for (int i = 0; i < pad_dims.size(); i++) {
+    const int axis = pad_dims[i];
+    pre_padding.d[i] = -begin[axis];
+    post_padding.d[i] = end[axis] - input_dims[axis];
+  }
+
+  // IPaddingLayer will always apply the padding to dims 2,3 (input format is
+  // NCHW).
+  const bool need_transpose = !(pad_dims[0] == 2 && pad_dims[1] == 3);
+  std::vector<int> transpose_order(input_dims.size());
+  std::vector<int> inv_transpose_order(input_dims.size());
+  if (need_transpose) {
+    if (pad_dims[0] == 1 && pad_dims[1] == 3) {
+      transpose_order = {0, 2, 1, 3};
+      inv_transpose_order = {0, 2, 1, 3};
+    } else if (pad_dims[0] == 1 && pad_dims[1] == 2) {
+      transpose_order = {0, 3, 1, 2};
+      inv_transpose_order = {0, 2, 3, 1};
+    }
+  }
+  if (params->validation_only) return Status::OK();
+
+  // Start conversion.
+  if (need_reshape) {
+    const nvinfer1::ITensor* output_tensor = nullptr;
+    TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
+        inputs.at(0), reshape_dims, &output_tensor));
+    tensor = const_cast<nvinfer1::ITensor*>(output_tensor);
+  }
+  if (need_transpose) {
+    const nvinfer1::ITensor* output_tensor = nullptr;
+    TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
+        tensor, transpose_order, &output_tensor));
+    tensor = const_cast<nvinfer1::ITensor*>(output_tensor);
+  }
+
+  // Add padding layer
+  nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
+      *const_cast<nvinfer1::ITensor*>(tensor), pre_padding, post_padding);
+  TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
+  tensor = layer->getOutput(0);
+
+  // Restore transpose
+  if (need_transpose) {
+    const nvinfer1::ITensor* output_tensor = nullptr;
+    TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
+        tensor, inv_transpose_order, &output_tensor));
+    tensor = const_cast<nvinfer1::ITensor*>(output_tensor);
+  }
+  // Restore reshape
+  if (need_reshape) {
+    // Calculate output dimensions
+    for(int i = 0; i < pad_dims.size(); i++) {
+      const int axis = pad_dims[i];
+      input_dims[axis] = end[axis] - begin[axis];
+    }
+    // Remove added 1 dimensions
+    for (int i = 0; i < reshape_dims_added; i++) {
+      int value = input_dims[1];
+      if (value != 1) {
+        return tensorflow::errors::Internal(
+            "StridedSlice error when reshaping, at ", 
+            node_def.name());
+      }
+      input_dims.erase(input_dims.begin()+1);
+    }
+
+    nvinfer1::Dims new_dims = VectorToTrtDims(input_dims,
+                                              /*ignore_first_dim=*/true);
+    const nvinfer1::ITensor* output_tensor = nullptr;
+    TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
+        TRT_TensorOrWeights(tensor), new_dims, &output_tensor));
+    tensor = const_cast<nvinfer1::ITensor*>(output_tensor);
+  }
+
+  params->outputs->push_back(
+      TRT_TensorOrWeights(const_cast<nvinfer1::ITensor*>(tensor)));
+  return tensorflow::Status::OK();
+}
+
 tensorflow::Status ConvertConv2D(OpConverterParams* params) {
   return ConvertConv2DHelper(params, ConvolutionType::DEFAULT);
 }
@@ -3335,14 +3579,15 @@ static void RegisterValidatableOpConverters(
   (*registration)["Const"] = ConvertConst;
   (*registration)["Conv2D"] = ConvertConv2D;
   (*registration)["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
-  (*registration)["Transpose"] = ConvertTranspose;
-  (*registration)["Reshape"] = ConvertReshape;
+  (*registration)["ExpandDims"] = ConvertExpandDims;
   (*registration)["MatMul"] = ConvertMatMul;
   (*registration)["Pad"] = ConvertPad;
   (*registration)["Relu6"] = ConvertRelu6;
+  (*registration)["Reshape"] = ConvertReshape;
   (*registration)["Square"] = ConvertSquare;
-  (*registration)["ExpandDims"] = ConvertExpandDims;
   (*registration)["Squeeze"] = ConvertSqueeze;
+  (*registration)["StridedSlice"] = ConvertStridedSlice;
+  (*registration)["Transpose"] = ConvertTranspose;
 
   for (auto quantization_op_type :
        {"QuantizeAndDequantizeV2", "QuantizeAndDequantizeV3",
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index c37a43dd5d..07649f04b2 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2129,7 +2129,6 @@ TEST_F(OpConverterTest, ConvertExpandDims) {
   auto expanddims =
       ops::ExpandDims(s.WithOpName("my_expanddims"), input, weights);
   const NodeDef& node_def = expanddims.operation.node()->def();
-
   {
     // Input is weights, should fail.
     Reset();
@@ -2349,6 +2348,307 @@ TEST_F(OpConverterTest, ConvertSqueeze) {
   }
 }
 
+TEST_F(OpConverterTest, ConvertStridedSlice) {
+  {
+    // Input list is empty, should fail.
+    NodeDef node_def = MakeNodeDef("my_strided_slice", "StridedSlice", {});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "StridedSlice expects 4 inputs, at my_strided_slice");
+  }
+
+  // Get nodedef for StridedSlice layer.
+  auto get_strided_slice_nodedef = [](int begin_mask = 0,
+                                      int ellipsis_mask = 0,
+                                      int end_mask = 0,
+                                      int new_axis_mask = 0,
+                                      int shrink_axis_mask = 0) -> NodeDef {
+    Scope s = Scope::NewRootScope();
+    auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
+    auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
+    auto end = ops::Placeholder(s.WithOpName("end"), DT_INT32);
+    auto strides = ops::Placeholder(s.WithOpName("strides"), DT_INT32);
+    ops::StridedSlice::Attrs strided_slice_attrs;
+    strided_slice_attrs.begin_mask_ = begin_mask;
+    strided_slice_attrs.ellipsis_mask_ = ellipsis_mask;
+    strided_slice_attrs.end_mask_ = end_mask;
+    strided_slice_attrs.new_axis_mask_ = new_axis_mask;
+    strided_slice_attrs.shrink_axis_mask_ = shrink_axis_mask;
+    auto strided_slice = ops::StridedSlice(s.WithOpName("my_strided_slice"),
+        input, begin, end, strides, strided_slice_attrs);
+    return strided_slice.operation.node()->def();
+  };
+
+  {
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestWeights<int32>("input", {1, 2, 3}, {1, 2, 3, 4, 5, 6});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "StridedSlice is only implemented for tensors, at my_strided_slice");
+  }
+  {
+    // Begin, end, strides are tensors, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestTensor("begin", {4});
+    AddTestTensor("end", {4});
+    AddTestTensor("strides", {4});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "StridedSlice expects weights for begin, end, and strides, at "
+        "my_strided_slice");
+  }
+  {
+    // Non-zero ellipsis_mask, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
+        /*ellipsis_mask=*/2, /*end_mask=*/0, /*new_axis_mask=*/0,
+        /*shrink_axis_mask=*/0);
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "ellipsis_mask is not implemented for StridedSlice, at "
+        "my_strided_slice");
+  }
+  {
+    // Non-zero ellipsis_mask, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
+        /*ellipsis_mask=*/0, /*end_mask=*/0, /*new_axis_mask=*/2,
+        /*shrink_axis_mask=*/0);
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "new_axis_mask is not implemented for StridedSlice, at "
+        "my_strided_slice");
+  }
+  {
+    // Non-zero shrink_axis_mask, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
+        /*ellipsis_mask=*/0, /*end_mask=*/0, /*new_axis_mask=*/0,
+        /*shrink_axis_mask=*/2);
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "shrink_axis_mask is not implemented for StridedSlice, at "
+        "my_strided_slice");
+  }
+  {
+    // Modify batch dim, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {0, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        "StridedSlice can't modify batch dim, at my_strided_slice");
+  }
+  {
+    // Stride is not 1, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 2, -1, 3});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED, "StridedSlice is only implemented for "
+        "stride of 1, at my_strided_slice");
+  }
+  {
+    // Begin out of bounds, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {1, 2, 3, 4});
+    AddTestWeights<int32>("end", {4}, {0, 1, 2, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "begin for StridedSlice is invalid, must be in the range "
+        "[-rank(input), rank(input)], at my_strided_slice");
+  }
+  {
+    // End out of bounds, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
+    AddTestWeights<int32>("end", {4}, {1, 2, 3, 4});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "end for StridedSlice is invalid, must be in the range "
+        "[-rank(input), rank(input)], at my_strided_slice");
+  }
+  {
+    // Size of sliced dim is negative, should fail.
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef();
+    AddTestTensor("input", {1, 2, 3});
+    AddTestWeights<int32>("begin", {4}, {0, 0, 2, 0});
+    AddTestWeights<int32>("end", {4}, {1, 1, 0, 3});
+    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
+    RunValidationAndConversion(
+        node_def, error::INVALID_ARGUMENT,
+        "New size of sliced dimension is negative, at my_strided_slice");
+  }
+
+  struct TestParams {
+    TestParams(const std::vector<int>& input_dims,
+               const std::vector<int>& expected_output_dims,
+               const std::vector<int>& begin,
+               const std::vector<int>& end,
+               const std::vector<int>& begin_mask,
+               const std::vector<int>& end_mask,
+               const std::vector<int>& expected_output)
+        : input_dims(input_dims),
+          expected_output_dims(expected_output_dims),
+          begin(begin),
+          end(end),
+          expected_output(expected_output) {
+      // Masks are provided in terms of vectors for readability. Convert them to
+      // binary here.
+      this->begin_mask = 0;
+      for (int i = 0; i < begin_mask.size(); i++) {
+        if (begin_mask[i]) this->begin_mask |= (1 << i);
+      }
+      this->end_mask = 0;
+      for (int i = 0; i < end_mask.size(); i++) {
+        if (end_mask[i]) this->end_mask |= (1 << i);
+      }
+  }
+
+    std::vector<int> input_dims;
+    std::vector<int> expected_output_dims;
+    std::vector<int> begin;
+    std::vector<int> end;
+    int begin_mask;
+    int end_mask;
+    std::vector<int> expected_output;
+  };
+
+  // Ok.
+  const int kStridedSliceOKCases = 18;
+  TestParams ok_params[kStridedSliceOKCases] = {
+      // 2D Crop.
+      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
+                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 1, 2},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 0, 0},
+                  /*expected_output=*/{1, 2}},
+      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
+                 /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 0, 0, 0},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 1},
+                  /*expected_output=*/{5, 6}},
+      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
+                 /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 1, 2, 3},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 0, 0},
+                 /*expected_output=*/{5, 6}},
+      // 2D Crop, with transpose.
+      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 2, 1},
+                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 2, 1},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
+                 /*expected_output=*/{1, 2}},
+      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 2, 1},
+                 /*begin=*/{0, 1, 1, 0}, /*end=*/{0, 2, 3, 1},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
+                 /*expected_output=*/{5, 6}},
+      TestParams{/*input_dims=*/{2, 1, 3}, /*expected_output_dims=*/{1, 1, 2},
+                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 1, 2},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
+                 /*expected_output=*/{1, 2}},
+      TestParams{/*input_dims=*/{2, 1, 3}, /*expected_output_dims=*/{1, 1, 2},
+                 /*begin=*/{0, 1, 0, 1}, /*end=*/{0, 2, 1, 3},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 0, 0},
+                 /*expected_output=*/{5, 6}},
+      // 2D Crop, with reshape.
+      TestParams{/*input_dims=*/{2, 3}, /*expected_output_dims=*/{1, 2},
+                 /*begin=*/{0, 0, 0}, /*end=*/{0, 1, 2},
+                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 0},
+                 /*expected_output=*/{1, 2}},
+      TestParams{/*input_dims=*/{2, 3}, /*expected_output_dims=*/{1, 2},
+                 /*begin=*/{0, 1, 1}, /*end=*/{0, 0, 0},
+                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 1, 1},
+                 /*expected_output=*/{5, 6}},
+      // 1D Crop.
+      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 2, 2},
+                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 0, 2},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 0},
+                 /*expected_output=*/{1, 2, 4, 5}},
+      TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 3},
+                 /*begin=*/{0, 0, 1, 0}, /*end=*/{0, 0, 0, 0},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 1},
+                 /*expected_output=*/{4, 5, 6}},
+      // 1D Crop, with transpose.
+      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 3, 1},
+                 /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 0, 0},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 0, 1, 1},
+                 /*expected_output=*/{1, 2, 3}},
+      TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 3, 1},
+                 /*begin=*/{0, 1, 0, 0}, /*end=*/{0, 0, 0, 0},
+                 /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 1},
+                 /*expected_output=*/{4, 5, 6}},
+      // 1D Crop, with reshape.
+      TestParams{/*input_dims=*/{6}, /*expected_output_dims=*/{3},
+                 /*begin=*/{0, 0}, /*end=*/{0, 3},
+                 /*begin_mask=*/{0, 0}, /*end_mask=*/{1, 0},
+                 /*expected_output=*/{1, 2, 3}},
+      TestParams{/*input_dims=*/{1, 6}, /*expected_output_dims=*/{1, 3},
+                 /*begin=*/{0, 0, 2}, /*end=*/{0, 0, 5},
+                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 1, 0},
+                 /*expected_output=*/{3, 4, 5}},
+      TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{3, 1},
+                 /*begin=*/{0, 2, 0}, /*end=*/{0, 5, 0},
+                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 1},
+                 /*expected_output=*/{3, 4, 5}},
+      // Negative axis.
+      TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{3, 1},
+                 /*begin=*/{0, -6, 0}, /*end=*/{0, -3, 0},
+                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 1},
+                 /*expected_output=*/{1, 2, 3}},
+      TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{5, 1},
+                 /*begin=*/{0, 0, 0}, /*end=*/{0, -1, 0},
+                 /*begin_mask=*/{0, 0, 0}, /*end_mask=*/{1, 0, 1},
+                 /*expected_output=*/{1, 2, 3, 4, 5}},
+  };
+
+  for (int i = 0; i < kStridedSliceOKCases; i++) {
+    Reset();
+    NodeDef node_def = get_strided_slice_nodedef(ok_params[i].begin_mask, 0,
+                                                 ok_params[i].end_mask);
+    AddTestTensor("input", ok_params[i].input_dims);
+    AddTestWeights<int32>("begin", {ok_params[i].begin.size()},
+                          ok_params[i].begin);
+    AddTestWeights<int32>("end", {ok_params[i].end.size()}, ok_params[i].end);
+    std::vector<int> strides(ok_params[i].input_dims.size(), 1);
+    AddTestWeights<int32>("strides", {strides.size()}, strides);
+    RunValidationAndConversion(node_def);
+
+    TRT_TensorOrWeights output;
+    TF_EXPECT_OK(GetTensorOrWeights("my_strided_slice", &output));
+    std::vector<float> output_data(ok_params[i].expected_output.size());
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_strided_slice", 
+                       &output_data);
+    EXPECT_THAT(output_data, ElementsAreArray(ok_params[i].expected_output));
+  }
+}
+
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
-- 
GitLab


From 286b04fcf96e7bbd68e992a2801ce7f18338e7c4 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Fri, 30 Nov 2018 11:49:49 -0800
Subject: [PATCH 460/873] VectorToTrtDims -> TensorShapeArrayToTrtDims

---
 tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index fdecfe5928..cdc77ac8b3 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2152,7 +2152,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
       end.insert(end.begin()+1, 1);
       reshape_dims_added++;
     }
-    reshape_dims = VectorToTrtDims(input_dims, /*ignore_first_dim=*/true);
+    reshape_dims = TensorShapeArrayToTrtDims(input_dims,
+                                             /*ignore_first_dim=*/true);
   }
   // Find dimensions which need to be sliced.
   std::vector<int> pad_dims;
@@ -2259,8 +2260,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
       input_dims.erase(input_dims.begin()+1);
     }
 
-    nvinfer1::Dims new_dims = VectorToTrtDims(input_dims,
-                                              /*ignore_first_dim=*/true);
+    nvinfer1::Dims new_dims = TensorShapeArrayToTrtDims(
+        input_dims, /*ignore_first_dim=*/true);
     const nvinfer1::ITensor* output_tensor = nullptr;
     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
         TRT_TensorOrWeights(tensor), new_dims, &output_tensor));
-- 
GitLab


From edccece99302c8c2f8e787fa59d13d54f7b0b001 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Fri, 30 Nov 2018 12:49:24 -0800
Subject: [PATCH 461/873] Formatting

---
 tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 07649f04b2..078c36a9a1 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2533,7 +2533,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
       for (int i = 0; i < end_mask.size(); i++) {
         if (end_mask[i]) this->end_mask |= (1 << i);
       }
-  }
+    }
 
     std::vector<int> input_dims;
     std::vector<int> expected_output_dims;
-- 
GitLab


From 7e17c6afcae045e976fe0508c59b1b7e4b19e7b5 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Mon, 3 Dec 2018 13:47:46 -0800
Subject: [PATCH 462/873] Fix clang-format

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 68 +++++++++----------
 .../tensorrt/convert/convert_nodes_test.cc    | 44 ++++++------
 2 files changed, 52 insertions(+), 60 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index cdc77ac8b3..fee4f2341b 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2033,16 +2033,15 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
   return tensorflow::Status::OK();
 }
 
-tensorflow::Status GetStridedSliceBound(
-    const std::vector<int>& input_dims,
-    const TRT_ShapedWeights& bound_weights,
-    string bound_name,
-    string node_name,
-    std::vector<int>& output_bound) {
+// Gets the bounds (start or end) from the weights of a StridedSlice op.
+tensorflow::Status GetStridedSliceBound(const std::vector<int>& input_dims,
+                                        const TRT_ShapedWeights& bound_weights,
+                                        string bound_name, string node_name,
+                                        std::vector<int>& output_bound) {
   const int* weights_ptr =
       static_cast<int*>(const_cast<void*>(bound_weights.GetValues()));
-  output_bound = std::vector<int>(weights_ptr,
-                                  weights_ptr + bound_weights.count());
+  output_bound = 
+      std::vector<int>(weights_ptr, weights_ptr + bound_weights.count());
   if (output_bound.size() != input_dims.size()) {
     return tensorflow::errors::InvalidArgument(
         "StridedSlice \"", bound_name, "\" specified ",
@@ -2054,8 +2053,10 @@ tensorflow::Status GetStridedSliceBound(
     if ((output_bound[i] < -input_dims[i]) ||
         (output_bound[i] > input_dims[i])) {
       return tensorflow::errors::InvalidArgument(
-          bound_name, " for StridedSlice is invalid, must be in the range "
-          "[-rank(input), rank(input)], at ", node_name);
+          bound_name,
+          " for StridedSlice is invalid, must be in the range "
+          "[-rank(input), rank(input)], at ",
+          node_name);
     }
     // Convert negative values to their positive equivalent.
     if (output_bound[i] < 0) {
@@ -2072,8 +2073,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
     return tensorflow::errors::InvalidArgument(
         "StridedSlice expects 4 inputs, at ", node_def.name());
   }
-  if (!inputs.at(1).is_weights() ||
-      !inputs.at(2).is_weights() ||
+  if (!inputs.at(1).is_weights() || !inputs.at(2).is_weights() ||
       !inputs.at(3).is_weights()) {
     return tensorflow::errors::InvalidArgument(
         "StridedSlice expects weights for begin, end, and strides, at ",
@@ -2081,8 +2081,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   }
   if (!inputs.at(0).is_tensor()) {
     return tensorflow::errors::Unimplemented(
-        "StridedSlice is only implemented for tensors, at ",
-        node_def.name());
+        "StridedSlice is only implemented for tensors, at ", node_def.name());
   }
   // Get input dims.
   nvinfer1::Dims dims = inputs.at(0).GetTrtDims();
@@ -2093,8 +2092,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   }
   if (input_dims.size() > 4) {
     return tensorflow::errors::Unimplemented(
-      "StridedSlice is not implemented for tensors with rank > 4, at ", 
-      node_def.name());
+        "StridedSlice is not implemented for tensors with rank > 4, at ", 
+        node_def.name());
   }
   TFAttrs attrs(node_def);
   // Get begin and end bounds per axis.
@@ -2124,8 +2123,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   for (int x : strides) {
     if (x != 1) {
       return tensorflow::errors::Unimplemented(
-        "StridedSlice is only implemented for stride of 1, at ", 
-        node_def.name());
+          "StridedSlice is only implemented for stride of 1, at ", 
+          node_def.name());
     }
   }
   // Unsupported options.
@@ -2133,23 +2132,22 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
     int ellipsis_mask = attrs.get<int>(attr);
     if (ellipsis_mask != 0) {
       return tensorflow::errors::Unimplemented(
-        attr, " is not implemented for StridedSlice, at ", 
-        node_def.name());
+          attr, " is not implemented for StridedSlice, at ", node_def.name());
     }
   }
 
-  nvinfer1::ITensor* tensor = const_cast<nvinfer1::ITensor*>(
-      inputs.at(0).tensor());
+  nvinfer1::ITensor* tensor = 
+      const_cast<nvinfer1::ITensor*>(inputs.at(0).tensor());
   // Reshape if necessary to 4-D.
   const bool need_reshape = (input_dims.size() != 4);
   int reshape_dims_added = 0;
-  nvinfer1::Dims reshape_dims; 
+  nvinfer1::Dims reshape_dims;
   if (need_reshape) {
     // Add new dims after batch dim until tensor is 4D.
     while (input_dims.size() < 4) {
-      input_dims.insert(input_dims.begin()+1, 1);
-      begin.insert(begin.begin()+1, 0);
-      end.insert(end.begin()+1, 1);
+      input_dims.insert(input_dims.begin() + 1, 1);
+      begin.insert(begin.begin() + 1, 0);
+      end.insert(end.begin() + 1, 1);
       reshape_dims_added++;
     }
     reshape_dims = TensorShapeArrayToTrtDims(input_dims,
@@ -2162,9 +2160,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
       if (i == 0) {
         return tensorflow::errors::Unimplemented(
             "StridedSlice can't modify batch dim, at ", node_def.name());
-      }
-      else if ((end[i] - begin[i]) < 0) {
-        LOG(INFO) << begin[i] << ", " << end[i];
+      } else if ((end[i] - begin[i]) < 0) {
         return tensorflow::errors::InvalidArgument(
             "New size of sliced dimension is negative, at ", node_def.name());
       }
@@ -2187,8 +2183,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
     }
   } else if (pad_dims.size() > 2) {
     return tensorflow::errors::Unimplemented(
-      "StridedSlice can only modify 2 dimensions, at ", 
-      node_def.name());
+        "StridedSlice can only modify 2 dimensions, at ", node_def.name());
   }
   std::sort(pad_dims.begin(), pad_dims.end());
   // Convert to pre/post padding values.
@@ -2245,7 +2240,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   // Restore reshape
   if (need_reshape) {
     // Calculate output dimensions
-    for(int i = 0; i < pad_dims.size(); i++) {
+    for (int i = 0; i < pad_dims.size(); i++) {
       const int axis = pad_dims[i];
       input_dims[axis] = end[axis] - begin[axis];
     }
@@ -2254,14 +2249,13 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
       int value = input_dims[1];
       if (value != 1) {
         return tensorflow::errors::Internal(
-            "StridedSlice error when reshaping, at ", 
-            node_def.name());
+            "StridedSlice error when reshaping, at ", node_def.name());
       }
-      input_dims.erase(input_dims.begin()+1);
+      input_dims.erase(input_dims.begin() + 1);
     }
 
-    nvinfer1::Dims new_dims = TensorShapeArrayToTrtDims(
-        input_dims, /*ignore_first_dim=*/true);
+    nvinfer1::Dims new_dims =
+        TensorShapeArrayToTrtDims(input_dims, /*ignore_first_dim=*/true);
     const nvinfer1::ITensor* output_tensor = nullptr;
     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
         TRT_TensorOrWeights(tensor), new_dims, &output_tensor));
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 078c36a9a1..c370895899 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2358,10 +2358,8 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   }
 
   // Get nodedef for StridedSlice layer.
-  auto get_strided_slice_nodedef = [](int begin_mask = 0,
-                                      int ellipsis_mask = 0,
-                                      int end_mask = 0,
-                                      int new_axis_mask = 0,
+  auto get_strided_slice_nodedef = [](int begin_mask = 0, int ellipsis_mask = 0,
+                                      int end_mask = 0,int new_axis_mask = 0,
                                       int shrink_axis_mask = 0) -> NodeDef {
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
@@ -2374,8 +2372,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     strided_slice_attrs.end_mask_ = end_mask;
     strided_slice_attrs.new_axis_mask_ = new_axis_mask;
     strided_slice_attrs.shrink_axis_mask_ = shrink_axis_mask;
-    auto strided_slice = ops::StridedSlice(s.WithOpName("my_strided_slice"),
-        input, begin, end, strides, strided_slice_attrs);
+    auto strided_slice = 
+        ops::StridedSlice(s.WithOpName("my_strided_slice"), input, begin, end,
+                          strides, strided_slice_attrs);
     return strided_slice.operation.node()->def();
   };
 
@@ -2405,9 +2404,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   {
     // Non-zero ellipsis_mask, should fail.
     Reset();
-    NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
-        /*ellipsis_mask=*/2, /*end_mask=*/0, /*new_axis_mask=*/0,
-        /*shrink_axis_mask=*/0);
+    NodeDef node_def = get_strided_slice_nodedef(
+        /*begin_mask=*/0, /*ellipsis_mask=*/2, /*end_mask=*/0,
+        /*new_axis_mask=*/0, /*shrink_axis_mask=*/0);
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
@@ -2420,9 +2419,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   {
     // Non-zero ellipsis_mask, should fail.
     Reset();
-    NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
-        /*ellipsis_mask=*/0, /*end_mask=*/0, /*new_axis_mask=*/2,
-        /*shrink_axis_mask=*/0);
+    NodeDef node_def = get_strided_slice_nodedef(
+        /*begin_mask=*/0, /*ellipsis_mask=*/0, /*end_mask=*/0,
+        /*new_axis_mask=*/2, /*shrink_axis_mask=*/0);
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
@@ -2435,9 +2434,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   {
     // Non-zero shrink_axis_mask, should fail.
     Reset();
-    NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0,
-        /*ellipsis_mask=*/0, /*end_mask=*/0, /*new_axis_mask=*/0,
-        /*shrink_axis_mask=*/2);
+    NodeDef node_def = get_strided_slice_nodedef(
+        /*begin_mask=*/0, /*ellipsis_mask=*/0, /*end_mask=*/0,
+        /*new_axis_mask=*/0, /*shrink_axis_mask=*/2);
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
@@ -2467,9 +2466,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
     AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
     AddTestWeights<int32>("strides", {4}, {1, 2, -1, 3});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED, "StridedSlice is only implemented for "
-        "stride of 1, at my_strided_slice");
+    RunValidationAndConversion(node_def, error::UNIMPLEMENTED,
+                               "StridedSlice is only implemented for stride of "
+                               "1, at my_strided_slice");
   }
   {
     // Begin out of bounds, should fail.
@@ -2513,8 +2512,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   struct TestParams {
     TestParams(const std::vector<int>& input_dims,
                const std::vector<int>& expected_output_dims,
-               const std::vector<int>& begin,
-               const std::vector<int>& end,
+               const std::vector<int>& begin, const std::vector<int>& end,
                const std::vector<int>& begin_mask,
                const std::vector<int>& end_mask,
                const std::vector<int>& expected_output)
@@ -2551,11 +2549,11 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
       TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
                  /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 1, 2},
                  /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 0, 0},
-                  /*expected_output=*/{1, 2}},
+                 /*expected_output=*/{1, 2}},
       TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
                  /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 0, 0, 0},
                  /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 1, 1},
-                  /*expected_output=*/{5, 6}},
+                 /*expected_output=*/{5, 6}},
       TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2},
                  /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 1, 2, 3},
                  /*begin_mask=*/{0, 0, 0, 0}, /*end_mask=*/{1, 1, 0, 0},
@@ -2643,7 +2641,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     TRT_TensorOrWeights output;
     TF_EXPECT_OK(GetTensorOrWeights("my_strided_slice", &output));
     std::vector<float> output_data(ok_params[i].expected_output.size());
-    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_strided_slice", 
+    BuildAndRun<float>({{"input", {1, 2, 3, 4, 5, 6}}}, "my_strided_slice",
                        &output_data);
     EXPECT_THAT(output_data, ElementsAreArray(ok_params[i].expected_output));
   }
-- 
GitLab


From d16eafc60081f5481fb1a9a727e247a995d8da5f Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 5 Dec 2018 11:29:33 -0800
Subject: [PATCH 463/873] Make unsupported mask options clearer

---
 tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index fee4f2341b..3961374903 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2127,10 +2127,10 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
           node_def.name());
     }
   }
-  // Unsupported options.
+  // Unsupported mask options.
   for (string attr : {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) {
-    int ellipsis_mask = attrs.get<int>(attr);
-    if (ellipsis_mask != 0) {
+    int attr_val = attrs.get<int>(attr);
+    if (attr_val != 0) {
       return tensorflow::errors::Unimplemented(
           attr, " is not implemented for StridedSlice, at ", node_def.name());
     }
-- 
GitLab


From 661acd19903cb2fae49b1b4dd12a3170b2950ff3 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Mon, 10 Dec 2018 10:32:56 -0800
Subject: [PATCH 464/873] Fix usage of TensorShapeArrayToTrtDims

---
 tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 3961374903..ae4f99be26 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2150,8 +2150,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
       end.insert(end.begin() + 1, 1);
       reshape_dims_added++;
     }
-    reshape_dims = TensorShapeArrayToTrtDims(input_dims,
-                                             /*ignore_first_dim=*/true);
+    TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &reshape_dims,
+                                                 /*ignore_first_dim=*/true));
   }
   // Find dimensions which need to be sliced.
   std::vector<int> pad_dims;
@@ -2254,8 +2254,9 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
       input_dims.erase(input_dims.begin() + 1);
     }
 
-    nvinfer1::Dims new_dims =
-        TensorShapeArrayToTrtDims(input_dims, /*ignore_first_dim=*/true);
+    nvinfer1::Dims new_dims;
+    TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims,
+                                                 /*ignore_first_dim=*/true));
     const nvinfer1::ITensor* output_tensor = nullptr;
     TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape(
         TRT_TensorOrWeights(tensor), new_dims, &output_tensor));
-- 
GitLab


From 296b83f13346fb70fc7ee70ae256b96a6366896a Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Tue, 11 Dec 2018 10:50:53 -0800
Subject: [PATCH 465/873] Apply smit-hinsu's suggestions

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 51 ++++++++--------
 .../tensorrt/convert/convert_nodes_test.cc    | 60 +++++--------------
 2 files changed, 40 insertions(+), 71 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index ae4f99be26..303db95921 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2037,30 +2037,29 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
 tensorflow::Status GetStridedSliceBound(const std::vector<int>& input_dims,
                                         const TRT_ShapedWeights& bound_weights,
                                         string bound_name, string node_name,
-                                        std::vector<int>& output_bound) {
-  const int* weights_ptr =
-      static_cast<int*>(const_cast<void*>(bound_weights.GetValues()));
-  output_bound = 
+                                        std::vector<int>* output_bound) {
+  const int* weights_ptr = static_cast<int*>(bound_weights.GetValues());
+  *output_bound = 
       std::vector<int>(weights_ptr, weights_ptr + bound_weights.count());
-  if (output_bound.size() != input_dims.size()) {
+  if (output_bound->size() != input_dims.size()) {
     return tensorflow::errors::InvalidArgument(
         "StridedSlice \"", bound_name, "\" specified ",
-        std::to_string(output_bound.size()), " dimensions, but input rank is ",
+        std::to_string(output_bound->size()), " dimensions, but input rank is ",
         std::to_string(input_dims.size()), ", at ", node_name);
   }
-  for (int i = 0; i < output_bound.size(); i++) {
+  for (int i = 0; i < output_bound->size(); i++) {
     // Make sure bound is valid.
-    if ((output_bound[i] < -input_dims[i]) ||
-        (output_bound[i] > input_dims[i])) {
+    if (((*output_bound)[i] < -input_dims[i]) ||
+        ((*output_bound)[i] > input_dims[i])) {
       return tensorflow::errors::InvalidArgument(
           bound_name,
-          " for StridedSlice is invalid, must be in the range "
-          "[-rank(input), rank(input)], at ",
+          " value for StridedSlice is invalid, must be in the range "
+          "[-dim_size(i), dim_size(i)], at ",
           node_name);
     }
     // Convert negative values to their positive equivalent.
-    if (output_bound[i] < 0) {
-      output_bound[i] += input_dims[i];
+    if ((*output_bound)[i] < 0) {
+      (*output_bound)[i] += input_dims[i];
     }
   }
   return tensorflow::Status::OK();
@@ -2099,9 +2098,9 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   // Get begin and end bounds per axis.
   std::vector<int> begin, end;
   TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(1).weights(),
-                                          "begin", node_def.name(), begin));
+                                          "begin", node_def.name(), &begin));
   TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(2).weights(),
-                                          "end", node_def.name(), end));
+                                          "end", node_def.name(), &end));
   int begin_mask = attrs.get<int>("begin_mask");
   for (int i = 0; i < begin.size(); i++) {
     if ((1 << i) & begin_mask) {
@@ -2116,8 +2115,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   }
   // Get strides per axis (must all be 1).
   TRT_ShapedWeights stride_weights = inputs.at(3).weights();
-  const int* stride_weights_ptr =
-      static_cast<int*>(const_cast<void*>(stride_weights.GetValues()));
+  const int* stride_weights_ptr = static_cast<int*>(stride_weights.GetValues());
   std::vector<int> strides(stride_weights_ptr,
                            stride_weights_ptr + stride_weights.count());
   for (int x : strides) {
@@ -2128,17 +2126,18 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
     }
   }
   // Unsupported mask options.
-  for (string attr : {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) {
+  for (const string& attr :
+       {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) {
     int attr_val = attrs.get<int>(attr);
     if (attr_val != 0) {
       return tensorflow::errors::Unimplemented(
-          attr, " is not implemented for StridedSlice, at ", node_def.name());
+          attr, " is not supported for StridedSlice, at ", node_def.name());
     }
   }
 
   nvinfer1::ITensor* tensor = 
       const_cast<nvinfer1::ITensor*>(inputs.at(0).tensor());
-  // Reshape if necessary to 4-D.
+  // Reshape if necessary to 4-D, since IPaddingLayer requires a 4-D input.
   const bool need_reshape = (input_dims.size() != 4);
   int reshape_dims_added = 0;
   nvinfer1::Dims reshape_dims;
@@ -2156,7 +2155,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   // Find dimensions which need to be sliced.
   std::vector<int> pad_dims;
   for (int i = 0; i < input_dims.size(); i++) {
-    if (begin[i] != 0 || (end[i] - input_dims[i]) != 0) {
+    if ((begin[i] != 0) || (end[i] != input_dims[i])) {
       if (i == 0) {
         return tensorflow::errors::Unimplemented(
             "StridedSlice can't modify batch dim, at ", node_def.name());
@@ -2175,10 +2174,11 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
     return tensorflow::Status::OK();
   } else if (pad_dims.size() == 1) {
     // Only one dim is modified but we have to have 2, mark a second dim which
-    // will have padding of 0.
-    if (pad_dims[0] == 1 || pad_dims[0] == 3) {
+    // will have padding of 0. The dim we add is chosen to avoid an unecessary
+    // transpose.
+    if (pad_dims[0] != 2) {
       pad_dims.push_back(2);
-    } else if (pad_dims[0] == 2) {
+    } else {
       pad_dims.push_back(3);
     }
   } else if (pad_dims.size() > 2) {
@@ -2186,7 +2186,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
         "StridedSlice can only modify 2 dimensions, at ", node_def.name());
   }
   std::sort(pad_dims.begin(), pad_dims.end());
-  // Convert to pre/post padding values.
+  // Convert to pre/post padding values. Since TRT does not have a StridedSlice
+  // or Slice layer, we instead create an IPaddingLayer with negative padding.
   nvinfer1::DimsHW pre_padding, post_padding;
   for (int i = 0; i < pad_dims.size(); i++) {
     const int axis = pad_dims[i];
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index c370895899..91d9e60010 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2358,23 +2358,21 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   }
 
   // Get nodedef for StridedSlice layer.
-  auto get_strided_slice_nodedef = [](int begin_mask = 0, int ellipsis_mask = 0,
-                                      int end_mask = 0,int new_axis_mask = 0,
+  auto get_strided_slice_nodedef = [](int begin_mask = 0, int end_mask = 0,
+                                      int ellipsis_mask = 0,
+                                      int new_axis_mask = 0,
                                       int shrink_axis_mask = 0) -> NodeDef {
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
     auto end = ops::Placeholder(s.WithOpName("end"), DT_INT32);
     auto strides = ops::Placeholder(s.WithOpName("strides"), DT_INT32);
-    ops::StridedSlice::Attrs strided_slice_attrs;
-    strided_slice_attrs.begin_mask_ = begin_mask;
-    strided_slice_attrs.ellipsis_mask_ = ellipsis_mask;
-    strided_slice_attrs.end_mask_ = end_mask;
-    strided_slice_attrs.new_axis_mask_ = new_axis_mask;
-    strided_slice_attrs.shrink_axis_mask_ = shrink_axis_mask;
+    ops::StridedSlice::Attrs attrs = ops::StridedSlice::Attrs()
+        .BeginMask(begin_mask).EndMask(end_mask).EllipsisMask(ellipsis_mask)
+        .NewAxisMask(new_axis_mask).ShrinkAxisMask(shrink_axis_mask);
     auto strided_slice = 
         ops::StridedSlice(s.WithOpName("my_strided_slice"), input, begin, end,
-                          strides, strided_slice_attrs);
+                          strides, attrs);
     return strided_slice.operation.node()->def();
   };
 
@@ -2405,7 +2403,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     // Non-zero ellipsis_mask, should fail.
     Reset();
     NodeDef node_def = get_strided_slice_nodedef(
-        /*begin_mask=*/0, /*ellipsis_mask=*/2, /*end_mask=*/0,
+        /*begin_mask=*/0, /*end_mask=*/0, /*ellipsis_mask=*/2, 
         /*new_axis_mask=*/0, /*shrink_axis_mask=*/0);
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
@@ -2413,37 +2411,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
     RunValidationAndConversion(
         node_def, error::UNIMPLEMENTED,
-        "ellipsis_mask is not implemented for StridedSlice, at "
-        "my_strided_slice");
-  }
-  {
-    // Non-zero ellipsis_mask, should fail.
-    Reset();
-    NodeDef node_def = get_strided_slice_nodedef(
-        /*begin_mask=*/0, /*ellipsis_mask=*/0, /*end_mask=*/0,
-        /*new_axis_mask=*/2, /*shrink_axis_mask=*/0);
-    AddTestTensor("input", {1, 2, 3});
-    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
-    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
-    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED,
-        "new_axis_mask is not implemented for StridedSlice, at "
-        "my_strided_slice");
-  }
-  {
-    // Non-zero shrink_axis_mask, should fail.
-    Reset();
-    NodeDef node_def = get_strided_slice_nodedef(
-        /*begin_mask=*/0, /*ellipsis_mask=*/0, /*end_mask=*/0,
-        /*new_axis_mask=*/0, /*shrink_axis_mask=*/2);
-    AddTestTensor("input", {1, 2, 3});
-    AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
-    AddTestWeights<int32>("end", {4}, {1, 1, 2, 3});
-    AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
-    RunValidationAndConversion(
-        node_def, error::UNIMPLEMENTED,
-        "shrink_axis_mask is not implemented for StridedSlice, at "
+        "ellipsis_mask is not supported for StridedSlice, at "
         "my_strided_slice");
   }
   {
@@ -2480,8 +2448,8 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
     RunValidationAndConversion(
         node_def, error::INVALID_ARGUMENT,
-        "begin for StridedSlice is invalid, must be in the range "
-        "[-rank(input), rank(input)], at my_strided_slice");
+        "begin value for StridedSlice is invalid, must be in the range "
+        "[-dim_size(i), dim_size(i)], at my_strided_slice");
   }
   {
     // End out of bounds, should fail.
@@ -2493,8 +2461,8 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
     RunValidationAndConversion(
         node_def, error::INVALID_ARGUMENT,
-        "end for StridedSlice is invalid, must be in the range "
-        "[-rank(input), rank(input)], at my_strided_slice");
+        "end value for StridedSlice is invalid, must be in the range "
+        "[-dim_size(i), dim_size(i)], at my_strided_slice");
   }
   {
     // Size of sliced dim is negative, should fail.
@@ -2628,7 +2596,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
 
   for (int i = 0; i < kStridedSliceOKCases; i++) {
     Reset();
-    NodeDef node_def = get_strided_slice_nodedef(ok_params[i].begin_mask, 0,
+    NodeDef node_def = get_strided_slice_nodedef(ok_params[i].begin_mask,
                                                  ok_params[i].end_mask);
     AddTestTensor("input", ok_params[i].input_dims);
     AddTestWeights<int32>("begin", {ok_params[i].begin.size()},
-- 
GitLab


From 1254fdd1e0d57f0efb330313ab4b6b325adb9f04 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Tue, 11 Dec 2018 14:15:46 -0800
Subject: [PATCH 466/873] Fix bug with masking and undefined batch dims.
 Masking needs to take place inside of GetStridedSliceBound

---
 .../contrib/tensorrt/convert/convert_nodes.cc | 45 ++++++++++---------
 .../tensorrt/convert/convert_nodes_test.cc    | 25 ++++++-----
 2 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 303db95921..adf8831b96 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -2036,10 +2036,11 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) {
 // Gets the bounds (start or end) from the weights of a StridedSlice op.
 tensorflow::Status GetStridedSliceBound(const std::vector<int>& input_dims,
                                         const TRT_ShapedWeights& bound_weights,
-                                        string bound_name, string node_name,
+                                        int mask, bool begin, string node_name,
                                         std::vector<int>* output_bound) {
+  const string bound_name = (begin) ? "begin" : "end";
   const int* weights_ptr = static_cast<int*>(bound_weights.GetValues());
-  *output_bound = 
+  *output_bound =
       std::vector<int>(weights_ptr, weights_ptr + bound_weights.count());
   if (output_bound->size() != input_dims.size()) {
     return tensorflow::errors::InvalidArgument(
@@ -2048,12 +2049,22 @@ tensorflow::Status GetStridedSliceBound(const std::vector<int>& input_dims,
         std::to_string(input_dims.size()), ", at ", node_name);
   }
   for (int i = 0; i < output_bound->size(); i++) {
+    if ((1 << i) & mask) {
+      // Apply mask.
+      (*output_bound)[i] = (begin) ? 0 : input_dims[i];
+      // Masked bound will always result in a valid, non-negative bound, so we
+      // don't need the following checks. For the common case of using masks on
+      // a undefined batch dim (-1), we specifically don't want to do the
+      // following checks because they will erroneously detect an out of range
+      // bound or try to correct the negative value.
+      continue;
+    }
     // Make sure bound is valid.
     if (((*output_bound)[i] < -input_dims[i]) ||
         ((*output_bound)[i] > input_dims[i])) {
       return tensorflow::errors::InvalidArgument(
-          bound_name,
-          " value for StridedSlice is invalid, must be in the range "
+          bound_name, " value of ", std::to_string((*output_bound)[i]),
+          " for StridedSlice is invalid, must be in the range "
           "[-dim_size(i), dim_size(i)], at ",
           node_name);
     }
@@ -2091,28 +2102,18 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   }
   if (input_dims.size() > 4) {
     return tensorflow::errors::Unimplemented(
-        "StridedSlice is not implemented for tensors with rank > 4, at ", 
+        "StridedSlice is not implemented for tensors with rank > 4, at ",
         node_def.name());
   }
   TFAttrs attrs(node_def);
   // Get begin and end bounds per axis.
   std::vector<int> begin, end;
   TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(1).weights(),
-                                          "begin", node_def.name(), &begin));
+                                          attrs.get<int>("begin_mask"), true,
+                                          node_def.name(), &begin));
   TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(2).weights(),
-                                          "end", node_def.name(), &end));
-  int begin_mask = attrs.get<int>("begin_mask");
-  for (int i = 0; i < begin.size(); i++) {
-    if ((1 << i) & begin_mask) {
-      begin[i] = 0;
-    }
-  }
-  int end_mask = attrs.get<int>("end_mask");
-  for (int i = 0; i < end.size(); i++) {
-    if ((1 << i) & end_mask) {
-      end[i] = input_dims[i];
-    }
-  }
+                                          attrs.get<int>("end_mask"), false,
+                                          node_def.name(), &end));
   // Get strides per axis (must all be 1).
   TRT_ShapedWeights stride_weights = inputs.at(3).weights();
   const int* stride_weights_ptr = static_cast<int*>(stride_weights.GetValues());
@@ -2121,7 +2122,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   for (int x : strides) {
     if (x != 1) {
       return tensorflow::errors::Unimplemented(
-          "StridedSlice is only implemented for stride of 1, at ", 
+          "StridedSlice is only implemented for stride of 1, at ",
           node_def.name());
     }
   }
@@ -2135,7 +2136,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
     }
   }
 
-  nvinfer1::ITensor* tensor = 
+  nvinfer1::ITensor* tensor =
       const_cast<nvinfer1::ITensor*>(inputs.at(0).tensor());
   // Reshape if necessary to 4-D, since IPaddingLayer requires a 4-D input.
   const bool need_reshape = (input_dims.size() != 4);
@@ -2229,6 +2230,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) {
   nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding(
       *const_cast<nvinfer1::ITensor*>(tensor), pre_padding, post_padding);
   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
+  params->converter->MarkQuantizationRangesAsInferrable(tensor,
+                                                        layer->getOutput(0));
   tensor = layer->getOutput(0);
 
   // Restore transpose
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index 91d9e60010..d71ebb4cae 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2358,21 +2358,22 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
   }
 
   // Get nodedef for StridedSlice layer.
-  auto get_strided_slice_nodedef = [](int begin_mask = 0, int end_mask = 0,
-                                      int ellipsis_mask = 0,
-                                      int new_axis_mask = 0,
-                                      int shrink_axis_mask = 0) -> NodeDef {
+  auto get_strided_slice_nodedef = [](
+      int begin_mask = 0, int end_mask = 0, int ellipsis_mask = 0,
+      int new_axis_mask = 0, int shrink_axis_mask = 0) -> NodeDef {
     Scope s = Scope::NewRootScope();
     auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
     auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32);
     auto end = ops::Placeholder(s.WithOpName("end"), DT_INT32);
     auto strides = ops::Placeholder(s.WithOpName("strides"), DT_INT32);
     ops::StridedSlice::Attrs attrs = ops::StridedSlice::Attrs()
-        .BeginMask(begin_mask).EndMask(end_mask).EllipsisMask(ellipsis_mask)
-        .NewAxisMask(new_axis_mask).ShrinkAxisMask(shrink_axis_mask);
-    auto strided_slice = 
-        ops::StridedSlice(s.WithOpName("my_strided_slice"), input, begin, end,
-                          strides, attrs);
+                                         .BeginMask(begin_mask)
+                                         .EndMask(end_mask)
+                                         .EllipsisMask(ellipsis_mask)
+                                         .NewAxisMask(new_axis_mask)
+                                         .ShrinkAxisMask(shrink_axis_mask);
+    auto strided_slice = ops::StridedSlice(s.WithOpName("my_strided_slice"),
+                                           input, begin, end, strides, attrs);
     return strided_slice.operation.node()->def();
   };
 
@@ -2403,7 +2404,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     // Non-zero ellipsis_mask, should fail.
     Reset();
     NodeDef node_def = get_strided_slice_nodedef(
-        /*begin_mask=*/0, /*end_mask=*/0, /*ellipsis_mask=*/2, 
+        /*begin_mask=*/0, /*end_mask=*/0, /*ellipsis_mask=*/2,
         /*new_axis_mask=*/0, /*shrink_axis_mask=*/0);
     AddTestTensor("input", {1, 2, 3});
     AddTestWeights<int32>("begin", {4}, {0, 0, 0, 0});
@@ -2448,7 +2449,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
     RunValidationAndConversion(
         node_def, error::INVALID_ARGUMENT,
-        "begin value for StridedSlice is invalid, must be in the range "
+        "begin value of 2 for StridedSlice is invalid, must be in the range "
         "[-dim_size(i), dim_size(i)], at my_strided_slice");
   }
   {
@@ -2461,7 +2462,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     AddTestWeights<int32>("strides", {4}, {1, 1, 1, 1});
     RunValidationAndConversion(
         node_def, error::INVALID_ARGUMENT,
-        "end value for StridedSlice is invalid, must be in the range "
+        "end value of 2 for StridedSlice is invalid, must be in the range "
         "[-dim_size(i), dim_size(i)], at my_strided_slice");
   }
   {
-- 
GitLab


From ca10e2d3acd1a93643bcae11aedd25e6ac2f7e66 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Wed, 12 Dec 2018 13:05:56 -0800
Subject: [PATCH 467/873] Fix failed narrowing checks

---
 .../contrib/tensorrt/convert/convert_nodes_test.cc       | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
index d71ebb4cae..87c9bea82c 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes_test.cc
@@ -2600,11 +2600,14 @@ TEST_F(OpConverterTest, ConvertStridedSlice) {
     NodeDef node_def = get_strided_slice_nodedef(ok_params[i].begin_mask,
                                                  ok_params[i].end_mask);
     AddTestTensor("input", ok_params[i].input_dims);
-    AddTestWeights<int32>("begin", {ok_params[i].begin.size()},
+    AddTestWeights<int32>("begin",
+                          {static_cast<int>(ok_params[i].begin.size())},
                           ok_params[i].begin);
-    AddTestWeights<int32>("end", {ok_params[i].end.size()}, ok_params[i].end);
+    AddTestWeights<int32>("end", {static_cast<int>(ok_params[i].end.size())},
+                          ok_params[i].end);
     std::vector<int> strides(ok_params[i].input_dims.size(), 1);
-    AddTestWeights<int32>("strides", {strides.size()}, strides);
+    AddTestWeights<int32>("strides", {static_cast<int>(strides.size())},
+                          strides);
     RunValidationAndConversion(node_def);
 
     TRT_TensorOrWeights output;
-- 
GitLab


From ca0ccc6f9fb66b19e2ad72aff0a4a717c5e4920b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 16:56:48 -0800
Subject: [PATCH 468/873] Fix a bug in lstm_eval.

PiperOrigin-RevId: 225281253
---
 tensorflow/lite/kernels/lstm_eval.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc
index f179ecb195..0c6a462d29 100644
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@@ -1118,7 +1118,7 @@ TfLiteStatus EvalHybrid(
             cell_to_output_weights_scale, input_gate_bias_ptr,
             forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
             projection_weights_ptr, projection_weights_scale,
-            projection_bias_ptr, params, n_batch, n_cell, n_input,
+            projection_bias_ptr, params, /*n_batch=*/1, n_cell, n_input,
             aux_input_size, n_output, output_batch_leading_dim,
             input_gate_scratch, forget_gate_scratch, cell_scratch,
             output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
-- 
GitLab


From c0b2e3eb7c2c02b3725bdda834e7b5d2875e1cf0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 17:00:06 -0800
Subject: [PATCH 469/873] Adds unicode_decode and unicode_decode_with_offsets
 ops, which decode strings into unicode codepoints.

Adds unicode_split and unicode_split_with_offset ops, which split strings into unicode characters.

RELNOTES: Adds unicode_decode, unicode_decode_with_offsets, unicode_split, and unicode_split_with_offset ops.
PiperOrigin-RevId: 225281768
---
 .../base_api/api_def_UnicodeDecode.pbtxt      |  76 ++
 tensorflow/core/kernels/unicode_ops.cc        |  72 +-
 tensorflow/core/ops/string_ops.cc             |  21 +
 tensorflow/python/kernel_tests/BUILD          |   7 +
 .../kernel_tests/unicode_decode_op_test.py    | 790 +++++++++++++++---
 tensorflow/python/ops/ragged/BUILD            |   3 +
 tensorflow/python/ops/ragged/__init__.py      |  11 +
 .../python/ops/ragged/ragged_string_ops.py    | 296 ++++++-
 .../api/golden/v1/tensorflow.strings.pbtxt    |  16 +
 .../api/golden/v2/tensorflow.strings.pbtxt    |  16 +
 10 files changed, 1156 insertions(+), 152 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_UnicodeDecode.pbtxt

diff --git a/tensorflow/core/api_def/base_api/api_def_UnicodeDecode.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnicodeDecode.pbtxt
new file mode 100644
index 0000000000..9b3f69023f
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_UnicodeDecode.pbtxt
@@ -0,0 +1,76 @@
+op {
+  graph_op_name: "UnicodeDecode"
+  in_arg {
+    name: "input"
+    description: <<END
+The text to be decoded. Can have any shape. Note that the output is flattened
+to a vector of char values.
+END
+  }
+  out_arg {
+    name: "row_splits"
+    description: <<END
+A 1D int32 tensor containing the row splits.
+END
+  }
+  out_arg {
+    name: "char_values"
+    description: <<END
+A 1D int32 Tensor containing the decoded codepoints.
+END
+  }
+  attr {
+    name: "input_encoding"
+    description: <<END
+Text encoding of the input strings. This is any of the encodings supported
+by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+END
+  }
+  attr {
+    name: "errors"
+    description: <<END
+Error handling policy when there is invalid formatting found in the input.
+The value of 'strict' will cause the operation to produce a InvalidArgument
+error on any invalid input formatting. A value of 'replace' (the default) will
+cause the operation to replace any invalid formatting in the input with the
+`replacement_char` codepoint. A value of 'ignore' will cause the operation to
+skip any invalid formatting in the input and produce no corresponding output
+character.
+END
+  }
+  attr {
+    name: "replacement_char"
+    description: <<END
+The replacement character codepoint to be used in place of any invalid
+formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+be used. The default value is the default unicode replacement character is
+0xFFFD or U+65533.)
+END
+  }
+  attr {
+    name: "replace_control_characters"
+    description: <<END
+Whether to replace the C0 control characters (00-1F) with the
+`replacement_char`. Default is false.
+END
+  }
+  summary: <<END
+Decodes each string in `input` into a sequence of Unicode code points.
+END
+  description: <<END
+The character codepoints for all strings are returned using a single vector
+`char_values`, with strings expanded to characters in row-major order.
+
+The `row_splits` tensor indicates where the codepoints for
+each input string begin and end within the `char_values` tensor.
+In particular, the values for the `i`th
+string (in row-major order) are stored in the slice
+`[row_splits[i]:row_splits[i+1]]`. Thus:
+
+* `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
+  character in the `i`th string (in row-major order).
+* `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
+  string (in row-major order).
+END
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/kernels/unicode_ops.cc b/tensorflow/core/kernels/unicode_ops.cc
index 3ee0edb35a..c9c2ac1e69 100644
--- a/tensorflow/core/kernels/unicode_ops.cc
+++ b/tensorflow/core/kernels/unicode_ops.cc
@@ -350,10 +350,10 @@ class UnicodeTranscodeOp : public OpKernel {
 REGISTER_KERNEL_BUILDER(Name("UnicodeTranscode").Device(DEVICE_CPU),
                         UnicodeTranscodeOp);
 
-class UnicodeDecodeWithOffsetsOp : public OpKernel {
+class UnicodeDecodeBaseOp : public OpKernel {
  public:
-  explicit UnicodeDecodeWithOffsetsOp(OpKernelConstruction* ctx)
-      : OpKernel(ctx) {
+  explicit UnicodeDecodeBaseOp(OpKernelConstruction* ctx, bool generate_offsets)
+      : OpKernel(ctx), generate_offsets_(generate_offsets) {
     OP_REQUIRES_OK(ctx, GetErrorOptions(ctx, &error_options_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr("input_encoding", &input_encoding_));
     // Make a temporary UConverter to ensure it will create without error
@@ -369,7 +369,7 @@ class UnicodeDecodeWithOffsetsOp : public OpKernel {
   }
 
   void Decode(OpKernelContext* ctx, std::vector<UChar32>* char_values,
-              std::vector<int64>* offset_values, int* string_length,
+              std::vector<int64>* offset_values, int* current_offset,
               int64* next_row_split, UChar32 char_value, int char_length,
               bool found_any_format_error) {
     if (error_options_.error_on_malformatting && found_any_format_error) {
@@ -379,7 +379,8 @@ class UnicodeDecodeWithOffsetsOp : public OpKernel {
     UChar32 decoded_value = char_value;
     if (ShouldHandleFormatError(error_options_, char_value,
                                 found_any_format_error)) {
-      if (error_options_.elide_replacement) {
+      if (error_options_.elide_replacement && (offset_values != nullptr)) {
+        *current_offset += char_length;
         return;
       } else {
         decoded_value = error_options_.subst;
@@ -390,8 +391,10 @@ class UnicodeDecodeWithOffsetsOp : public OpKernel {
     char_values->push_back(decoded_value);
 
     // Emit the byte offset
-    offset_values->push_back(*string_length);
-    *string_length += char_length;
+    if (offset_values != nullptr) {
+      offset_values->push_back(*current_offset);
+      *current_offset += char_length;
+    }
     *next_row_split += 1;
   }
 
@@ -428,42 +431,63 @@ class UnicodeDecodeWithOffsetsOp : public OpKernel {
       // the fields needed to construct a RaggedTensor.
       out_row_splits(row_split_index) = next_row_split;
       row_split_index++;
-      int string_length = 0;
+      int current_offset = 0;
       IterateUnicodeString(
           input, input_encoder->converter_,
-          std::bind(&UnicodeDecodeWithOffsetsOp::Decode, this, ctx,
-                    &char_values, &offset_values, &string_length,
-                    &next_row_split, std::placeholders::_1,
-                    std::placeholders::_2, std::placeholders::_3));
+          std::bind(&UnicodeDecodeBaseOp::Decode, this, ctx, &char_values,
+                    &offset_values, &current_offset, &next_row_split,
+                    std::placeholders::_1, std::placeholders::_2,
+                    std::placeholders::_3));
     }
     out_row_splits(row_split_index) = next_row_split;
 
-    DCHECK(offset_values.size() == char_values.size());
     Tensor* output_char_values;
     OP_REQUIRES_OK(
         ctx, ctx->allocate_output("char_values",
                                   {static_cast<int64>(char_values.size())},
                                   &output_char_values));
-    Tensor* output_offset_values;
-    OP_REQUIRES_OK(
-        ctx, ctx->allocate_output("char_to_byte_starts",
-                                  {static_cast<int64>(offset_values.size())},
-                                  &output_offset_values));
     auto out_char_values = output_char_values->vec<int32>();
-    auto out_offset_values = output_offset_values->vec<int64>();
-
-    // Load output tensors from intermediate value arrays.
-    for (int i = 0; i < char_values.size(); ++i) {
-      out_char_values(i) = static_cast<int32>(char_values[i]);
-      out_offset_values(i) = offset_values[i];
+    if (generate_offsets_) {
+      DCHECK(offset_values.size() == char_values.size());
+      Tensor* output_offset_values;
+      OP_REQUIRES_OK(
+          ctx, ctx->allocate_output("char_to_byte_starts",
+                                    {static_cast<int64>(offset_values.size())},
+                                    &output_offset_values));
+      auto out_offset_values = output_offset_values->vec<int64>();
+
+      // Load output tensors from intermediate value arrays.
+      for (int i = 0; i < char_values.size(); ++i) {
+        out_char_values(i) = static_cast<int32>(char_values[i]);
+        out_offset_values(i) = offset_values[i];
+      }
+    } else {
+      for (int i = 0; i < char_values.size(); ++i) {
+        out_char_values(i) = static_cast<int32>(char_values[i]);
+      }
     }
   }
 
  private:
   string input_encoding_;
   ErrorOptions error_options_;
+  bool generate_offsets_ = false;
+};
+
+class UnicodeDecodeOp : public UnicodeDecodeBaseOp {
+ public:
+  explicit UnicodeDecodeOp(OpKernelConstruction* ctx)
+      : UnicodeDecodeBaseOp(ctx, false) {}
+};
+
+class UnicodeDecodeWithOffsetsOp : public UnicodeDecodeBaseOp {
+ public:
+  explicit UnicodeDecodeWithOffsetsOp(OpKernelConstruction* ctx)
+      : UnicodeDecodeBaseOp(ctx, true) {}
 };
 
+REGISTER_KERNEL_BUILDER(Name("UnicodeDecode").Device(DEVICE_CPU),
+                        UnicodeDecodeOp);
 REGISTER_KERNEL_BUILDER(Name("UnicodeDecodeWithOffsets").Device(DEVICE_CPU),
                         UnicodeDecodeWithOffsetsOp);
 
diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc
index 8ea74f1d43..d012ce67fd 100644
--- a/tensorflow/core/ops/string_ops.cc
+++ b/tensorflow/core/ops/string_ops.cc
@@ -296,6 +296,27 @@ REGISTER_OP("UnicodeTranscode")
     .Attr("replace_control_characters: bool = false")
     .SetShapeFn(shape_inference::UnchangedShape);
 
+REGISTER_OP("UnicodeDecode")
+    .Input("input: string")
+    .Output("row_splits: int64")
+    .Output("char_values: int32")
+    .Attr("input_encoding: string")
+    .Attr("errors: {'strict', 'replace', 'ignore'} = 'replace'")
+    .Attr("replacement_char: int = 65533")  // 0xFFFD unicode replacement char
+    .Attr("replace_control_characters: bool = false")
+    .SetShapeFn([](InferenceContext* c) {
+      // row_splits.shape == [input.size() + 1]
+      DimensionHandle num_row_splits;
+      DimensionHandle input_size = c->NumElements(c->input(0));
+      TF_RETURN_IF_ERROR(c->Add(input_size, 1, &num_row_splits));
+      c->set_output(0, c->Vector(num_row_splits));
+
+      // char_values.shape == [num_chars]
+      DimensionHandle num_chars = c->UnknownDim();
+      c->set_output(1, c->Vector(num_chars));
+      return Status::OK();
+    });
+
 REGISTER_OP("UnicodeDecodeWithOffsets")
     .Input("input: string")
     .Output("row_splits: int64")
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index df8c14970a..ddb2ddaf63 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1190,8 +1190,15 @@ tf_py_test(
     srcs = ["unicode_decode_op_test.py"],
     additional_deps = [
         "@absl_py//absl/testing:parameterized",
+        "//tensorflow/python/ops/ragged:ragged_factory_ops",
+        "//tensorflow/python:array_ops",
         "//tensorflow/python:client_testlib",
+        "//tensorflow/python:errors",
         "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python/ops/ragged:ragged",
+        "//tensorflow/python/ops/ragged:ragged_test_util",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
     ],
 )
diff --git a/tensorflow/python/kernel_tests/unicode_decode_op_test.py b/tensorflow/python/kernel_tests/unicode_decode_op_test.py
index c165021eea..c3b4370499 100644
--- a/tensorflow/python/kernel_tests/unicode_decode_op_test.py
+++ b/tensorflow/python/kernel_tests/unicode_decode_op_test.py
@@ -19,134 +19,680 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+import numpy as np
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import errors_impl as errors
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_string_ops
+from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import test
 
 
-# Account for python2 and python3 execution of the test.
-def codepoint(s):
-  if isinstance(s, bytes):
-    return ord(s.decode("utf-8"))
-  elif isinstance(s, str):
-    return ord(s)
-
-
-class UnicodeDecodeTest(test.TestCase):
-
-  def testBatchDecode(self):
-    text = constant_op.constant(
-        ["仅今年前", "分享介面終於迎來更新"])
-    row_splits, utf8_text, offsets = gen_string_ops.unicode_decode_with_offsets(
-        text, "utf-8")
-
-    with self.test_session():
-      self.assertAllEqual([
-          codepoint("仅"),
-          codepoint("今"),
-          codepoint("年"),
-          codepoint("前"),
-          codepoint("分"),
-          codepoint("享"),
-          codepoint("介"),
-          codepoint("面"),
-          codepoint("終"),
-          codepoint("於"),
-          codepoint("迎"),
-          codepoint("來"),
-          codepoint("更"),
-          codepoint("新")
-      ],
-                          self.evaluate(utf8_text).tolist())
-      self.assertAllEqual([0, 4, 14], self.evaluate(row_splits).tolist())
-      self.assertAllEqual([0, 3, 6, 9, 0, 3, 6, 9, 12, 15, 18, 21, 24, 27],
-                          self.evaluate(offsets).tolist())
-
-  def testBasicDecodeWithOffset(self):
-    text = constant_op.constant(["仅今年前"])
-    row_splits, utf8_text, starts = gen_string_ops.unicode_decode_with_offsets(
-        text, "utf-8")
-
-    with self.test_session():
-      self.assertAllEqual([
-          codepoint("仅"),
-          codepoint("今"),
-          codepoint("年"),
-          codepoint("前"),
-      ],
-                          self.evaluate(utf8_text).tolist())
-      self.assertAllEqual(self.evaluate(row_splits).tolist(), [0, 4])
-      self.assertAllEqual(self.evaluate(starts).tolist(), [0, 3, 6, 9])
-
-  @test_util.run_deprecated_v1
-  def testStrictError(self):
-    text = constant_op.constant([b"\xFEED"])
-    _, error, _ = gen_string_ops.unicode_decode_with_offsets(
-        text, "utf-8", errors="strict")
-
-    with self.assertRaises(errors.InvalidArgumentError):
-      with self.test_session():
-        self.evaluate(error)
-
-  def testReplaceOnError(self):
-    text = constant_op.constant([b"\xFE"])
-
-    _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
-        text, "utf-8", errors="replace")
-
-    with self.test_session():
-      self.assertAllEqual(self.evaluate(utf8_text).tolist(), [65533])
-
-  @test_util.run_deprecated_v1
-  def testBadReplacementChar(self):
-    text = constant_op.constant([b"\xFE"])
-    _, error, _ = gen_string_ops.unicode_decode_with_offsets(
-        text, "utf-8", errors="replace", replacement_char=11141111)
-
-    with self.assertRaises(errors.InvalidArgumentError):
-      with self.test_session():
-        self.evaluate(error)
-
-  def testIgnoreOnError(self):
-    text = constant_op.constant([b"\xFEhello"])
-
-    _, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
-        text, "utf-8", errors="ignore")
-
-    with self.test_session():
-      self.assertAllEqual(self.evaluate(utf8_text).tolist(), [
-          codepoint("h"),
-          codepoint("e"),
-          codepoint("l"),
-          codepoint("l"),
-          codepoint("o")
-      ])
-
-  @test_util.run_deprecated_v1
-  def testBadErrorPolicy(self):
-    text = constant_op.constant(["hippopotamus"])
-
-    with self.assertRaises(ValueError):
-      _, _, _ = gen_string_ops.unicode_decode_with_offsets(
-          text, "utf-8", errors="oranguatan")
-
-  def testReplaceControlChars(self):
-    text = constant_op.constant(["\x02仅今年前"])
-    row_splits, utf8_text, _ = gen_string_ops.unicode_decode_with_offsets(
-        text, "utf-8", replace_control_characters=True)
-
-    with self.test_session():
-      self.assertAllEqual([
-          65533,
-          codepoint("仅"),
-          codepoint("今"),
-          codepoint("年"),
-          codepoint("前"),
-      ],
-                          self.evaluate(utf8_text).tolist())
-      self.assertAllEqual([0, 5], self.evaluate(row_splits).tolist())
+def _nested_encode(x, encoding):
+  """Encode each string in a nested list with `encoding`."""
+  if isinstance(x, list):
+    return [_nested_encode(v, encoding) for v in x]
+  else:
+    return x.encode(encoding)
+
+
+def _nested_codepoints(x):
+  """Replace each string in a nested list with a list of its codepoints."""
+  # Works for Python 2 and 3, and for both UCS2 and UCS4 builds
+  if isinstance(x, list):
+    return [_nested_codepoints(v) for v in x]
+  else:
+    b = list(x.encode("utf-32-be"))
+    if any(isinstance(c, str) for c in b):
+      b = [ord(c) for c in b]
+    return [(b0 << 24) + (b1 << 16) + (b2 << 8) + b3
+            for b0, b1, b2, b3 in zip(b[::4], b[1::4], b[2::4], b[3::4])]
+
+
+def _nested_offsets(x, encoding):
+  """Replace each string in a nested list with a list of start offsets."""
+  if isinstance(x, list):
+    return [_nested_offsets(v, encoding) for v in x]
+  else:
+    if not x:
+      return []
+    encoded_x = x.encode("utf-32-be")
+    encoded_chars = [encoded_x[i:i + 4] for i in range(0, len(encoded_x), 4)]
+    char_lens = [
+        len(c.decode("utf-32-be").encode(encoding)) for c in encoded_chars
+    ]
+    return [0] + np.cumsum(char_lens).tolist()[:-1]
+
+
+def _nested_splitchars(x, encoding):
+  """Replace each string in a nested list with a list of char substrings."""
+  if isinstance(x, list):
+    return [_nested_splitchars(v, encoding) for v in x]
+  else:
+    b = x.encode("utf-32-be")
+    chars = zip(b[::4], b[1::4], b[2::4], b[3::4])
+    if str is bytes:
+      return [b"".join(c).decode("utf-32-be").encode(encoding) for c in chars]
+    else:
+      return [bytes(c).decode("utf-32-be").encode(encoding) for c in chars]
+
+
+def _make_sparse_tensor(indices, values, dense_shape, dtype=np.int32):
+  return sparse_tensor.SparseTensorValue(
+      np.array(indices, np.int64), np.array(values, dtype),
+      np.array(dense_shape, np.int64))
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
+                        parameterized.TestCase):
+
+  def testScalarDecode(self):
+    text = constant_op.constant(u"仅今年前".encode("utf-8"))
+    chars = ragged.unicode_decode(text, "utf-8")
+    self.assertAllEqual(chars, [ord(c) for c in u"仅今年前"])
+
+  def testScalarDecodeWithOffset(self):
+    text = constant_op.constant(u"仅今年前".encode("utf-8"))
+    chars, starts = ragged.unicode_decode_with_offsets(text, "utf-8")
+    self.assertAllEqual(chars, [ord(c) for c in u"仅今年前"])
+    self.assertAllEqual(starts, [0, 3, 6, 9])
+
+  def testVectorDecode(self):
+    text = constant_op.constant([u"仅今年前".encode("utf-8"), b"hello"])
+    chars = ragged.unicode_decode(text, "utf-8")
+    expected_chars = [[ord(c) for c in u"仅今年前"],
+                      [ord(c) for c in u"hello"]]
+    self.assertRaggedEqual(chars, expected_chars)
+
+  def testVectorDecodeWithOffset(self):
+    text = constant_op.constant([u"仅今年前".encode("utf-8"), b"hello"])
+    chars, starts = ragged.unicode_decode_with_offsets(text, "utf-8")
+    expected_chars = [[ord(c) for c in u"仅今年前"],
+                      [ord(c) for c in u"hello"]]
+    self.assertRaggedEqual(chars, expected_chars)
+    self.assertRaggedEqual(starts, [[0, 3, 6, 9], [0, 1, 2, 3, 4]])
+
+  @parameterized.parameters([
+      {"texts": u"仅今年前"},
+      {"texts": [u"G\xf6\xf6dnight", u"\U0001f60a"]},
+      {"texts": ["Hello", "world", "", u"👍"]},
+      {"texts": [["Hi", "there"], ["", u"\U0001f60a"]], "ragged_rank": 0},
+      {"texts": [["Hi", "there", ""], [u"😊"]], "ragged_rank": 1},
+      {"texts": [[[u"😊", u"🤠🧐"], []], [[u"🤓👻🤖"]]], "ragged_rank": 2},
+      {"texts": []}
+  ])  # pyformat: disable
+  def testBasicDecode(self, texts, ragged_rank=None):
+    input_tensor = ragged.constant_value(
+        _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
+    result = ragged.unicode_decode(input_tensor, "UTF-8")
+    expected = _nested_codepoints(texts)
+    self.assertRaggedEqual(expected, result)
+
+  @parameterized.parameters([
+      {"texts": u"仅今年前"},
+      {"texts": [u"G\xf6\xf6dnight", u"\U0001f60a"]},
+      {"texts": ["Hello", "world", "", u"👍"]},
+      {"texts": [["Hi", "there"], ["", u"\U0001f60a"]], "ragged_rank": 0},
+      {"texts": [["Hi", "there", ""], [u"😊"]], "ragged_rank": 1},
+      {"texts": [[[u"😊", u"🤠🧐"], []], [[u"🤓👻🤖"]]], "ragged_rank": 2},
+      {"texts": []}
+  ])  # pyformat: disable
+  def testBasicDecodeWithOffsets(self, texts, ragged_rank=None):
+    input_tensor = ragged.constant_value(
+        _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
+    result = ragged.unicode_decode_with_offsets(input_tensor, "UTF-8")
+    expected_codepoints = _nested_codepoints(texts)
+    expected_offsets = _nested_offsets(texts, "UTF-8")
+    self.assertRaggedEqual(expected_codepoints, result[0])
+    self.assertRaggedEqual(expected_offsets, result[1])
+
+  def testDocstringExamples(self):
+    texts = [s.encode("utf8") for s in [u"G\xf6\xf6dnight", u"\U0001f60a"]]
+    codepoints1 = ragged.unicode_decode(texts, "UTF-8")
+    codepoints2, offsets = ragged.unicode_decode_with_offsets(texts, "UTF-8")
+    self.assertRaggedEqual(
+        codepoints1, [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]])
+    self.assertRaggedEqual(
+        codepoints2, [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]])
+    self.assertRaggedEqual(offsets, [[0, 1, 3, 5, 6, 7, 8, 9, 10], [0]])
+
+  @parameterized.parameters([
+      dict(
+          texts=["Hello", "world", "", u"👍"],
+          expected=_make_sparse_tensor(
+              indices=[[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [1, 0], [1, 1],
+                       [1, 2], [1, 3], [1, 4], [3, 0]],
+              values=[72, 101, 108, 108, 111, 119, 111, 114, 108, 100, 128077],
+              dense_shape=[4, 5])),
+      dict(
+          texts=[["Hi", "there"], ["", u"\U0001f60a"]],
+          expected=_make_sparse_tensor(
+              indices=[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [0, 1, 2],
+                       [0, 1, 3], [0, 1, 4], [1, 1, 0]],
+              values=[72, 105, 116, 104, 101, 114, 101, 128522],
+              dense_shape=[2, 2, 5])),
+      dict(
+          texts=[],
+          expected=_make_sparse_tensor(np.zeros([0, 2], np.int64), [], [0, 0])),
+  ])
+  def testDecodeWithSparseOutput(self, texts, expected):
+    input_tensor = np.array(_nested_encode(texts, "UTF-8"), dtype=bytes)
+    result = ragged.unicode_decode(
+        input_tensor, "UTF-8").to_sparse()
+    self.assertIsInstance(result, sparse_tensor.SparseTensor)
+    self.assertAllEqual(expected.indices, result.indices)
+    self.assertAllEqual(expected.values, result.values)
+    self.assertAllEqual(expected.dense_shape, result.dense_shape)
+
+  @parameterized.parameters([
+      dict(
+          texts=["Hello", "world", "", u"👍"],
+          expected=[[72, 101, 108, 108, 111], [119, 111, 114, 108, 100],
+                    [-1, -1, -1, -1, -1], [128077, -1, -1, -1, -1]]),
+      dict(
+          texts=[["Hi", "there"], ["", u"\U0001f60a"]],
+          expected=[[[72, 105, -1, -1, -1], [116, 104, 101, 114, 101]],
+                    [[-1, -1, -1, -1, -1], [128522, -1, -1, -1, -1]]],
+          ragged_rank=0),
+      dict(
+          texts=[["Hi", "there", ""], [u"😊"]],
+          expected=[[[72, 105, -1, -1, -1],
+                     [116, 104, 101, 114, 101],
+                     [-1, -1, -1, -1, -1]],
+                    [[128522, -1, -1, -1, -1],
+                     [-1, -1, -1, -1, -1],
+                     [-1, -1, -1, -1, -1]]]),
+      dict(
+          texts=[[[u"😊", u"🤠🧐"], []], [[u"🤓👻🤖"]]],
+          expected=[
+              [[[128522, -1, -1], [129312, 129488, -1]],
+               [[-1, -1, -1], [-1, -1, -1]]],
+              [[[129299, 128123, 129302], [-1, -1, -1]],
+               [[-1, -1, -1], [-1, -1, -1]]]]),
+      dict(texts=[], expected=np.zeros([0, 0], np.int64)),
+  ])  # pyformat: disable
+  def testDecodeWithPaddedOutput(self, texts, expected, ragged_rank=None):
+    input_tensor = ragged.constant_value(
+        _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
+    result = ragged.unicode_decode(
+        input_tensor, "UTF-8").to_tensor(default_value=-1)
+    self.assertAllEqual(expected, result)
+
+  @parameterized.parameters([
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          expected=[[65533], [104, 101, 108, 108, 111],
+                    [61, 61, 65533, 61, 61], [119, 111, 114, 108, 100]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          replacement_char=0,
+          expected=[[0], [104, 101, 108, 108, 111],
+                    [61, 61, 0, 61, 61], [119, 111, 114, 108, 100]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="ignore",
+          expected=[[], [104, 101, 108, 108, 111],
+                    [61, 61, 61, 61], [119, 111, 114, 108, 100]]),
+      dict(
+          input=[b"\x00", b"hello", b"==\x01==", b"world"],
+          input_encoding="UTF-8",
+          replace_control_characters=True,
+          expected=[[65533], [104, 101, 108, 108, 111],
+                    [61, 61, 65533, 61, 61], [119, 111, 114, 108, 100]]),
+      dict(
+          input=[b"\x00", b"hello", b"==\x01==", b"world"],
+          input_encoding="UTF-8",
+          replace_control_characters=True,
+          replacement_char=0,
+          expected=[[0], [104, 101, 108, 108, 111],
+                    [61, 61, 0, 61, 61], [119, 111, 114, 108, 100]]),
+  ])  # pyformat: disable
+  def testErrorModes(self, expected=None, **args):
+    result = ragged.unicode_decode(**args)
+    self.assertRaggedEqual(expected, result)
+
+  @parameterized.parameters([
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          expected=[[65533], [104, 101, 108, 108, 111],
+                    [61, 61, 65533, 61, 61], [119, 111, 114, 108, 100]],
+          expected_offsets=[[0], [0, 1, 2, 3, 4],
+                            [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          replacement_char=0,
+          expected=[[0], [104, 101, 108, 108, 111],
+                    [61, 61, 0, 61, 61], [119, 111, 114, 108, 100]],
+          expected_offsets=[[0], [0, 1, 2, 3, 4],
+                            [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="ignore",
+          expected=[[], [104, 101, 108, 108, 111],
+                    [61, 61, 61, 61], [119, 111, 114, 108, 100]],
+          expected_offsets=[[], [0, 1, 2, 3, 4],
+                            [0, 1, 3, 4], [0, 1, 2, 3, 4]]),
+      dict(
+          input=[b"\x00", b"hello", b"==\x01==", b"world"],
+          input_encoding="UTF-8",
+          replace_control_characters=True,
+          expected=[[65533], [104, 101, 108, 108, 111],
+                    [61, 61, 65533, 61, 61], [119, 111, 114, 108, 100]],
+          expected_offsets=[[0], [0, 1, 2, 3, 4],
+                            [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]),
+      dict(
+          input=[b"\x00", b"hello", b"==\x01==", b"world"],
+          input_encoding="UTF-8",
+          replace_control_characters=True,
+          replacement_char=0,
+          expected=[[0], [104, 101, 108, 108, 111],
+                    [61, 61, 0, 61, 61], [119, 111, 114, 108, 100]],
+          expected_offsets=[[0], [0, 1, 2, 3, 4],
+                            [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]),
+  ])  # pyformat: disable
+  def testErrorModesWithOffsets(self,
+                                expected=None,
+                                expected_offsets=None,
+                                **args):
+    result = ragged.unicode_decode_with_offsets(**args)
+    self.assertRaggedEqual(result[0], expected)
+    self.assertRaggedEqual(result[1], expected_offsets)
+
+  @parameterized.parameters(
+      ("UTF-8", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-16-BE", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-32-BE", [u"こんにちは", u"你好", u"Hello"]),
+      ("US-ASCII", [u"Hello", "world"]),
+      ("ISO-8859-1", [u"ÀÈÓ", "AEO"]),
+      ("SHIFT-JIS", [u"Hello", u"こんにちは"]),
+  )
+  def testDecodeWithDifferentEncodings(self, encoding, texts):
+    expected = _nested_codepoints(texts)
+    input_tensor = constant_op.constant(_nested_encode(texts, encoding))
+    result = ragged.unicode_decode(input_tensor, encoding)
+    self.assertRaggedEqual(expected, result)
+
+  @parameterized.parameters(
+      ("UTF-8", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-16-BE", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-32-BE", [u"こんにちは", u"你好", u"Hello"]),
+      ("US-ASCII", [u"Hello", "world"]),
+      ("ISO-8859-1", [u"ÀÈÓ", "AEO"]),
+      ("SHIFT-JIS", [u"Hello", u"こんにちは"]),
+  )
+  def testDecodeWithOffsetsWithDifferentEncodings(self, encoding, texts):
+    expected_codepoints = _nested_codepoints(texts)
+    expected_offsets = _nested_offsets(texts, encoding)
+    input_tensor = constant_op.constant(_nested_encode(texts, encoding))
+    result = ragged.unicode_decode_with_offsets(input_tensor, encoding)
+    self.assertRaggedEqual(expected_codepoints, result[0])
+    self.assertRaggedEqual(expected_offsets, result[1])
+
+  @parameterized.parameters([
+      dict(input=[b"\xFEED"],
+           errors="strict",
+           input_encoding="UTF-8",
+           exception=errors.InvalidArgumentError,
+           message="Invalid formatting on input string"),
+      dict(input="x",
+           input_encoding="UTF-8",
+           replacement_char=11141111,
+           exception=errors.InvalidArgumentError,
+           message="replacement_char out of unicode codepoint range"),
+      dict(input="x",
+           input_encoding="UTF-8",
+           errors="oranguatan",
+           exception=(ValueError, errors.InvalidArgumentError)),
+  ])  # pyformat: disable
+  def testExceptions(self, exception=None, message=None, **args):
+    with self.assertRaisesRegexp(exception, message):
+      self.evaluate(ragged.unicode_decode(**args))
+
+  def testUnknownRankError(self):
+    if context.executing_eagerly(): return
+    s = array_ops.placeholder(dtypes.string)
+    message = "Rank of `input` must be statically known."
+    with self.assertRaisesRegexp(ValueError, message):
+      self.evaluate(ragged.unicode_decode(s, input_encoding="UTF-8"))
+
+  @parameterized.parameters([
+      dict(
+          doc="Single string",
+          input=_nested_encode([u"仅今年前"], "utf-8"),
+          input_encoding="UTF-8",
+          expected_char_values=_nested_codepoints(u"仅今年前"),
+          expected_row_splits=[0, 4],
+          expected_char_to_byte_starts=[0, 3, 6, 9]),
+      dict(
+          doc="Multiple strings",
+          input=_nested_encode([u"仅今年前", u"你好"], "utf-8"),
+          input_encoding="UTF-8",
+          expected_char_values=_nested_codepoints(u"仅今年前你好"),
+          expected_row_splits=[0, 4, 6],
+          expected_char_to_byte_starts=[0, 3, 6, 9, 0, 3]),
+      dict(
+          doc="errors=replace",
+          input=b"=\xFE=",
+          input_encoding="UTF-8",
+          errors="replace",
+          expected_char_values=[61, 65533, 61],
+          expected_row_splits=[0, 3],
+          expected_char_to_byte_starts=[0, 1, 2]),
+      dict(
+          doc="errors=ignore",
+          input=b"=\xFE=",
+          input_encoding="UTF-8",
+          errors="ignore",
+          expected_char_values=[61, 61],
+          expected_row_splits=[0, 2],
+          expected_char_to_byte_starts=[0, 2]),
+  ])
+  def testDecodeGenOp(self,
+                      doc,
+                      expected_row_splits=None,
+                      expected_char_values=None,
+                      expected_char_to_byte_starts=None,
+                      **args):
+    """Test for the c++ interface (gen_string_ops.unicode_decode)."""
+    result = gen_string_ops.unicode_decode_with_offsets(**args)
+    self.assertAllEqual(expected_row_splits, result.row_splits)
+    self.assertAllEqual(expected_char_values, result.char_values)
+    self.assertAllEqual(expected_char_to_byte_starts,
+                        result.char_to_byte_starts)
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
+                       parameterized.TestCase):
+
+  def testScalarSplit(self):
+    text = constant_op.constant(u"仅今年前".encode("UTF-8"))
+    chars = ragged.unicode_split(text, "UTF-8")
+    self.assertAllEqual(chars, [c.encode("UTF-8") for c in u"仅今年前"])
+
+  def testScalarSplitWithOffset(self):
+    text = constant_op.constant(u"仅今年前".encode("UTF-8"))
+    chars, starts = ragged.unicode_split_with_offsets(text, "UTF-8")
+    self.assertAllEqual(chars, [c.encode("UTF-8") for c in u"仅今年前"])
+    self.assertAllEqual(starts, [0, 3, 6, 9])
+
+  def testVectorSplit(self):
+    text = constant_op.constant([u"仅今年前".encode("UTF-8"), b"hello"])
+    chars = ragged.unicode_split(text, "UTF-8")
+    expected_chars = [[c.encode("UTF-8") for c in u"仅今年前"],
+                      [c.encode("UTF-8") for c in u"hello"]]
+    self.assertRaggedEqual(chars, expected_chars)
+
+  def testVectorSplitWithOffset(self):
+    text = constant_op.constant([u"仅今年前".encode("UTF-8"), b"hello"])
+    chars, starts = ragged.unicode_split_with_offsets(text, "UTF-8")
+    expected_chars = [[c.encode("UTF-8") for c in u"仅今年前"],
+                      [c.encode("UTF-8") for c in u"hello"]]
+    self.assertRaggedEqual(chars, expected_chars)
+    self.assertRaggedEqual(starts, [[0, 3, 6, 9], [0, 1, 2, 3, 4]])
+
+  @parameterized.parameters([
+      {"texts": u"仅今年前"},
+      {"texts": [u"G\xf6\xf6dnight", u"\U0001f60a"]},
+      {"texts": ["Hello", "world", "", u"👍"]},
+      {"texts": [["Hi", "there"], ["", u"\U0001f60a"]], "ragged_rank": 0},
+      {"texts": [["Hi", "there", ""], [u"😊"]], "ragged_rank": 1},
+      {"texts": [[[u"😊", u"🤠🧐"], []], [[u"🤓👻🤖"]]], "ragged_rank": 2},
+      {"texts": []}
+  ])  # pyformat: disable
+  def testBasicSplit(self, texts, ragged_rank=None):
+    input_tensor = ragged.constant_value(
+        _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
+    result = ragged.unicode_split(input_tensor, "UTF-8")
+    expected = _nested_splitchars(texts, "UTF-8")
+    self.assertRaggedEqual(expected, result)
+
+  @parameterized.parameters([
+      {"texts": u"仅今年前"},
+      {"texts": [u"G\xf6\xf6dnight", u"\U0001f60a"]},
+      {"texts": ["Hello", "world", "", u"👍"]},
+      {"texts": [["Hi", "there"], ["", u"\U0001f60a"]], "ragged_rank": 0},
+      {"texts": [["Hi", "there", ""], [u"😊"]], "ragged_rank": 1},
+      {"texts": [[[u"😊", u"🤠🧐"], []], [[u"🤓👻🤖"]]], "ragged_rank": 2},
+      {"texts": []}
+  ])  # pyformat: disable
+  def testBasicSplitWithOffsets(self, texts, ragged_rank=None):
+    input_tensor = ragged.constant_value(
+        _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
+    result = ragged.unicode_split_with_offsets(input_tensor, "UTF-8")
+    expected_codepoints = _nested_splitchars(texts, "UTF-8")
+    expected_offsets = _nested_offsets(texts, "UTF-8")
+    self.assertRaggedEqual(expected_codepoints, result[0])
+    self.assertRaggedEqual(expected_offsets, result[1])
+
+  def testDocstringExamples(self):
+    texts = [s.encode("utf8") for s in [u"G\xf6\xf6dnight", u"\U0001f60a"]]
+    codepoints1 = ragged.unicode_split(texts, "UTF-8")
+    codepoints2, offsets = ragged.unicode_split_with_offsets(texts, "UTF-8")
+    self.assertRaggedEqual(
+        codepoints1,
+        [[b"G", b"\xc3\xb6", b"\xc3\xb6", b"d", b"n", b"i", b"g", b"h", b"t"],
+         [b"\xf0\x9f\x98\x8a"]])
+    self.assertRaggedEqual(
+        codepoints2,
+        [[b"G", b"\xc3\xb6", b"\xc3\xb6", b"d", b"n", b"i", b"g", b"h", b"t"],
+         [b"\xf0\x9f\x98\x8a"]])
+    self.assertRaggedEqual(offsets, [[0, 1, 3, 5, 6, 7, 8, 9, 10], [0]])
+
+  @parameterized.parameters([
+      dict(
+          texts=["Hello", "world", "", u"👍"],
+          expected=_make_sparse_tensor(
+              indices=[[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [1, 0], [1, 1],
+                       [1, 2], [1, 3], [1, 4], [3, 0]],
+              values=[b"H", b"e", b"l", b"l", b"o",
+                      b"w", b"o", b"r", b"l", b"d", b"\xf0\x9f\x91\x8d"],
+              dense_shape=[4, 5],
+              dtype=bytes)),
+      dict(
+          texts=[["Hi", "there"], ["", u"\U0001f60a"]],
+          expected=_make_sparse_tensor(
+              indices=[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [0, 1, 2],
+                       [0, 1, 3], [0, 1, 4], [1, 1, 0]],
+              values=[b"H", b"i", b"t", b"h", b"e", b"r", b"e",
+                      b"\xf0\x9f\x98\x8a"],
+              dense_shape=[2, 2, 5],
+              dtype=bytes)),
+      dict(
+          texts=[],
+          expected=_make_sparse_tensor(
+              np.zeros([0, 2], np.int64), [], [0, 0], dtype=bytes)),
+  ])  # pyformat: disable
+  def testSplitWithSparseOutput(self, texts, expected):
+    input_tensor = np.array(_nested_encode(texts, "UTF-8"), dtype=bytes)
+    result = ragged.unicode_split(
+        input_tensor, "UTF-8").to_sparse()
+    self.assertIsInstance(result, sparse_tensor.SparseTensor)
+    self.assertAllEqual(expected.indices, result.indices)
+    self.assertAllEqual(expected.values, result.values)
+    self.assertAllEqual(expected.dense_shape, result.dense_shape)
+
+  @parameterized.parameters([
+      dict(
+          texts=["Hello", "world", "", u"👍"],
+          expected=[[b"H", b"e", b"l", b"l", b"o"],
+                    [b"w", b"o", b"r", b"l", b"d"],
+                    ["", "", "", "", ""],
+                    [b"\xf0\x9f\x91\x8d", "", "", "", ""]]),
+      dict(
+          texts=[["Hi", "there"], ["", u"\U0001f60a"]],
+          expected=[[[b"H", b"i", "", "", ""],
+                     [b"t", b"h", b"e", b"r", b"e"]],
+                    [["", "", "", "", ""],
+                     [b"\xf0\x9f\x98\x8a", "", "", "", ""]]],
+          ragged_rank=0),
+      dict(
+          texts=[["Hi", "there", ""], [u"😊"]],
+          expected=[[[b"H", b"i", "", "", ""],
+                     [b"t", b"h", b"e", b"r", b"e"],
+                     ["", "", "", "", ""]],
+                    [[b"\xf0\x9f\x98\x8a", "", "", "", ""],
+                     ["", "", "", "", ""],
+                     ["", "", "", "", ""]]]),
+      dict(
+          texts=[[[u"😊", u"🤠🧐"], []], [[u"🤓👻🤖"]]],
+          expected=[[[[b"\xf0\x9f\x98\x8a", "", ""],
+                      [b"\xf0\x9f\xa4\xa0", b"\xf0\x9f\xa7\x90", ""]],
+                     [["", "", ""],
+                      ["", "", ""]]],
+                    [[[b"\xf0\x9f\xa4\x93", b"\xf0\x9f\x91\xbb",
+                       b"\xf0\x9f\xa4\x96"],
+                      ["", "", ""]],
+                     [["", "", ""],
+                      ["", "", ""]]]]),
+      dict(texts=[], expected=np.zeros([0, 0], np.int64)),
+  ])  # pyformat: disable
+  def testSplitWithPaddedOutput(self, texts, expected, ragged_rank=None):
+    input_tensor = ragged.constant_value(
+        _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
+    result = ragged.unicode_split(
+        input_tensor, "UTF-8").to_tensor(default_value="")
+    self.assertAllEqual(np.array(expected, dtype=bytes), result)
+
+  @parameterized.parameters([
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          expected=[[b"\xef\xbf\xbd"],
+                    [b"h", b"e", b"l", b"l", b"o"],
+                    [b"=", b"=", b"\xef\xbf\xbd", b"=", b"="],
+                    [b"w", b"o", b"r", b"l", b"d"]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          replacement_char=0,
+          expected=[[b"\x00"],
+                    [b"h", b"e", b"l", b"l", b"o"],
+                    [b"=", b"=", b"\x00", b"=", b"="],
+                    [b"w", b"o", b"r", b"l", b"d"]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="ignore",
+          expected=[[],
+                    [b"h", b"e", b"l", b"l", b"o"],
+                    [b"=", b"=", b"=", b"="],
+                    [b"w", b"o", b"r", b"l", b"d"]]),
+  ])  # pyformat: disable
+  def testErrorModes(self, expected=None, **args):
+    result = ragged.unicode_split(**args)
+    self.assertRaggedEqual(expected, result)
+
+  @parameterized.parameters([
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          expected=[[b"\xef\xbf\xbd"],
+                    [b"h", b"e", b"l", b"l", b"o"],
+                    [b"=", b"=", b"\xef\xbf\xbd", b"=", b"="],
+                    [b"w", b"o", b"r", b"l", b"d"]],
+          expected_offsets=[[0], [0, 1, 2, 3, 4],
+                            [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="replace",
+          replacement_char=0,
+          expected=[[b"\x00"],
+                    [b"h", b"e", b"l", b"l", b"o"],
+                    [b"=", b"=", b"\x00", b"=", b"="],
+                    [b"w", b"o", b"r", b"l", b"d"]],
+          expected_offsets=[[0], [0, 1, 2, 3, 4],
+                            [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]),
+      dict(
+          input=[b"\xFE", b"hello", b"==\xFF==", b"world"],
+          input_encoding="UTF-8",
+          errors="ignore",
+          expected=[[],
+                    [b"h", b"e", b"l", b"l", b"o"],
+                    [b"=", b"=", b"=", b"="],
+                    [b"w", b"o", b"r", b"l", b"d"]],
+          expected_offsets=[[], [0, 1, 2, 3, 4],
+                            [0, 1, 3, 4], [0, 1, 2, 3, 4]]),
+  ])  # pyformat: disable
+  def testErrorModesWithOffsets(self,
+                                expected=None,
+                                expected_offsets=None,
+                                **args):
+    result = ragged.unicode_split_with_offsets(**args)
+    self.assertRaggedEqual(expected, result[0])
+    self.assertRaggedEqual(expected_offsets, result[1])
+
+  @parameterized.parameters(
+      ("UTF-8", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-16-BE", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-32-BE", [u"こんにちは", u"你好", u"Hello"]),
+  )
+  def testSplitWithDifferentEncodings(self, encoding, texts):
+    expected = _nested_splitchars(texts, encoding)
+    input_tensor = constant_op.constant(_nested_encode(texts, encoding))
+    result = ragged.unicode_split(input_tensor, encoding)
+    self.assertRaggedEqual(expected, result)
+
+  @parameterized.parameters(
+      ("UTF-8", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-16-BE", [u"こんにちは", u"你好", u"Hello"]),
+      ("UTF-32-BE", [u"こんにちは", u"你好", u"Hello"]),
+  )
+  def testSplitWithOffsetsWithDifferentEncodings(self, encoding, texts):
+    expected_codepoints = _nested_splitchars(texts, encoding)
+    expected_offsets = _nested_offsets(texts, encoding)
+    input_tensor = constant_op.constant(_nested_encode(texts, encoding))
+    result = ragged.unicode_split_with_offsets(input_tensor, encoding)
+    self.assertRaggedEqual(expected_codepoints, result[0])
+    self.assertRaggedEqual(expected_offsets, result[1])
+
+  @parameterized.parameters([
+      dict(input=[b"\xFEED"],
+           errors="strict",
+           input_encoding="UTF-8",
+           exception=errors.InvalidArgumentError,
+           message="Invalid formatting on input string"),
+      dict(input="x",
+           input_encoding="UTF-8",
+           replacement_char=11141111,
+           exception=errors.InvalidArgumentError,
+           message="replacement_char out of unicode codepoint range"),
+      dict(input="x",
+           input_encoding="UTF-8",
+           errors="oranguatan",
+           exception=(ValueError, errors.InvalidArgumentError)),
+  ])  # pyformat: disable
+  def testExceptions(self, exception=None, message=None, **args):
+    with self.assertRaisesRegexp(exception, message):
+      self.evaluate(ragged.unicode_split(**args))
+
+  def testUnknownRankError(self):
+    if context.executing_eagerly(): return
+    s = array_ops.placeholder(dtypes.string)
+    message = "Rank of `input` must be statically known."
+    with self.assertRaisesRegexp(ValueError, message):
+      self.evaluate(ragged.unicode_decode(s, input_encoding="UTF-8"))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index 440d9db824..1922e4b92a 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -172,12 +172,15 @@ py_library(
     srcs = ["ragged_string_ops.py"],
     srcs_version = "PY2AND3",
     deps = [
+        ":ragged_array_ops",
         ":ragged_conversion_ops",
         ":ragged_factory_ops",
         ":ragged_tensor",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:util",
     ],
diff --git a/tensorflow/python/ops/ragged/__init__.py b/tensorflow/python/ops/ragged/__init__.py
index f23f506e06..3e35892925 100644
--- a/tensorflow/python/ops/ragged/__init__.py
+++ b/tensorflow/python/ops/ragged/__init__.py
@@ -67,6 +67,12 @@ class documentation.
 @@broadcast_to
 @@broadcast_dynamic_shape
 
+<!-- String ops -->
+@@unicode_decode
+@@unicode_decode_with_offsets
+@@unicode_split
+@@unicode_split_with_offsets
+
 <!-- Modules -->
 @@ragged_dispatch
 @@ragged_factory_ops
@@ -128,6 +134,11 @@ from tensorflow.python.ops.ragged.ragged_math_ops import segment_prod
 from tensorflow.python.ops.ragged.ragged_math_ops import segment_sqrt_n
 from tensorflow.python.ops.ragged.ragged_math_ops import segment_sum
 
+from tensorflow.python.ops.ragged.ragged_string_ops import unicode_decode
+from tensorflow.python.ops.ragged.ragged_string_ops import unicode_decode_with_offsets
+from tensorflow.python.ops.ragged.ragged_string_ops import unicode_split
+from tensorflow.python.ops.ragged.ragged_string_ops import unicode_split_with_offsets
+
 from tensorflow.python.ops.ragged.ragged_tensor import convert_to_tensor_or_ragged_tensor
 from tensorflow.python.ops.ragged.ragged_tensor import is_ragged
 from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor
diff --git a/tensorflow/python/ops/ragged/ragged_string_ops.py b/tensorflow/python/ops/ragged/ragged_string_ops.py
index 1f9f0abe4f..80216376f3 100644
--- a/tensorflow/python/ops/ragged/ragged_string_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_string_ops.py
@@ -22,6 +22,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_string_ops
+from tensorflow.python.ops.ragged import ragged_array_ops
 from tensorflow.python.ops.ragged import ragged_conversion_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.util.tf_export import tf_export
@@ -29,16 +30,19 @@ from tensorflow.python.util.tf_export import tf_export
 
 # pylint: disable=redefined-builtin
 @tf_export("strings.unicode_encode")
-def unicode_encode(input, output_encoding, errors="replace",
-                   replacement_char=65533, name=None):
+def unicode_encode(input,
+                   output_encoding,
+                   errors="replace",
+                   replacement_char=65533,
+                   name=None):
   r"""Encodes each sequence of Unicode code points in `input` into a string.
 
   `result[i1...iN]` is the string formed by concatenating the Unicode
   codepoints `input[1...iN, :]`, encoded using `output_encoding`.
 
   Args:
-    input: An `N+1` dimensional potentially ragged integer tensor with
-        shape `[D1...DN, num_chars]`.
+    input: An `N+1` dimensional potentially ragged integer tensor with shape
+      `[D1...DN, num_chars]`.
     output_encoding: Unicode encoding that should be used to encode each
       codepoint sequence.  Can be `"UTF-8"`, `"UTF-16-BE"`, or `"UTF-32-BE"`.
     errors: Specifies the response when an invalid codepoint is encountered
@@ -92,8 +96,9 @@ def unicode_encode(input, output_encoding, errors="replace",
     else:
       if input_tensor.shape.ndims == 2:
         # The input tensor is of the correct 2-D shape, it's just not ragged.
-        return unicode_encode(ragged_conversion_ops.from_tensor(input_tensor),
-                              output_encoding, errors, replacement_char)
+        return unicode_encode(
+            ragged_conversion_ops.from_tensor(input_tensor), output_encoding,
+            errors, replacement_char)
       elif input_tensor.shape.ndims > 2:
         # We need to initially flatten the input tensor to 2-D, and then can
         # reshape the output of our processed flattened tensor.
@@ -116,3 +121,282 @@ def unicode_encode(input, output_encoding, errors="replace",
         output_tensor = unicode_encode(ragged_input_tensor, output_encoding,
                                        errors, replacement_char)
         return array_ops.reshape(output_tensor, [])
+
+
+# pylint: disable=redefined-builtin
+@tf_export("strings.unicode_decode")
+def unicode_decode(input,
+                   input_encoding,
+                   errors="replace",
+                   replacement_char=0xFFFD,
+                   replace_control_characters=False,
+                   name=None):
+  r"""Decodes each string in `input` into a sequence of Unicode code points.
+
+  `result[i1...iN, j]` is the Unicode codepoint for the `j`th character in
+  `input[i1...iN]`, when decoded using `input_encoding`.
+
+  Args:
+    input: An `N` dimensional potentially ragged `string` tensor with shape
+      `[D1...DN]`.  `N` must be statically known.
+    input_encoding: String name for the unicode encoding that should be used to
+      decode each string.
+    errors: Specifies the response when an input string can't be converted
+      using the indicated encoding. One of:
+      * `'strict'`: Raise an exception for any illegal substrings.
+      * `'replace'`: Replace illegal substrings with `replacement_char`.
+      * `'ignore'`: Skip illegal substrings.
+    replacement_char: The replacement codepoint to be used in place of invalid
+      substrings in `input` when `errors='replace'`; and in place of C0 control
+      characters in `input` when `replace_control_characters=True`.
+    replace_control_characters: Whether to replace the C0 control characters
+      `(U+0000 - U+001F)` with the `replacement_char`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `N+1` dimensional `int32` tensor with shape `[D1...DN, (num_chars)]`.
+    The returned tensor is a `tf.Tensor` if `input` is a scalar, or a
+    `tf.RaggedTensor` otherwise.
+
+  #### Example:
+    ```python
+    >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
+    >>> tf.strings.unicode_decode(input, 'UTF-8').tolist()
+    [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]]
+    ```
+  """
+  with ops.name_scope(name, "UnicodeDecode", [input]):
+    return _unicode_decode(input, input_encoding, errors, replacement_char,
+                           replace_control_characters, with_offsets=False)
+
+
+@tf_export("strings.unicode_decode_with_offsets")
+def unicode_decode_with_offsets(input,
+                                input_encoding,
+                                errors="replace",
+                                replacement_char=0xFFFD,
+                                replace_control_characters=False,
+                                name=None):
+  r"""Decodes each string into a sequence of code points with start offsets.
+
+  This op is similar to `tf.strings.decode(...)`, but it also returns the
+  start offset for each character in its respective string.  This information
+  can be used to align the characters with the original byte sequence.
+
+  Returns a tuple `(codepoints, start_offsets)` where:
+
+  * `codepoints[i1...iN, j]` is the Unicode codepoint for the `j`th character
+    in `input[i1...iN]`, when decoded using `input_encoding`.
+  * `start_offsets[i1...iN, j]` is the start byte offset for the `j`th
+    character in `input[i1...iN]`, when decoded using `input_encoding`.
+
+  Args:
+    input: An `N` dimensional potentially ragged `string` tensor with shape
+      `[D1...DN]`.  `N` must be statically known.
+    input_encoding: String name for the unicode encoding that should be used to
+      decode each string.
+    errors: Specifies the response when an input string can't be converted
+      using the indicated encoding. One of:
+      * `'strict'`: Raise an exception for any illegal substrings.
+      * `'replace'`: Replace illegal substrings with `replacement_char`.
+      * `'ignore'`: Skip illegal substrings.
+    replacement_char: The replacement codepoint to be used in place of invalid
+      substrings in `input` when `errors='replace'`; and in place of C0 control
+      characters in `input` when `replace_control_characters=True`.
+    replace_control_characters: Whether to replace the C0 control characters
+      `(U+0000 - U+001F)` with the `replacement_char`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tuple of `N+1` dimensional tensors `(codepoints, start_offsets)`.
+
+    * `codepoints` is an `int32` tensor with shape `[D1...DN, (num_chars)]`.
+    * `offsets` is an `int64` tensor with shape `[D1...DN, (num_chars)]`.
+
+    The returned tensors are `tf.Tensor`s if `input` is a scalar, or
+    `tf.RaggedTensor`s otherwise.
+
+  #### Example:
+    ```python
+    >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
+    >>> result = tf.strings.unicode_decode_with_offsets(input, 'UTF-8')
+    >>> result[0].tolist()  # codepoints
+    [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]]
+    >>> result[1].tolist()  # offsets
+   [[0, 1, 3, 5, 6, 7, 8, 9, 10], [0]]
+    ```
+  """
+  with ops.name_scope(name, "UnicodeDecodeWithOffsets", [input]):
+    return _unicode_decode(input, input_encoding, errors, replacement_char,
+                           replace_control_characters, with_offsets=True)
+
+
+@tf_export("strings.unicode_split")
+def unicode_split(input,
+                  input_encoding,
+                  errors="replace",
+                  replacement_char=0xFFFD,
+                  name=None):
+  r"""Splits each string in `input` into a sequence of Unicode code points.
+
+  `result[i1...iN, j]` is the substring of `input[i1...iN] that encodes its
+  `j`th character, when decoded using `input_encoding`.
+
+  Args:
+    input: An `N` dimensional potentially ragged `string` tensor with shape
+      `[D1...DN]`.  `N` must be statically known.
+    input_encoding: String name for the unicode encoding that should be used to
+      decode each string.
+    errors: Specifies the response when an input string can't be converted
+      using the indicated encoding. One of:
+      * `'strict'`: Raise an exception for any illegal substrings.
+      * `'replace'`: Replace illegal substrings with `replacement_char`.
+      * `'ignore'`: Skip illegal substrings.
+    replacement_char: The replacement codepoint to be used in place of invalid
+      substrings in `input` when `errors='replace'`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `N+1` dimensional `int32` tensor with shape `[D1...DN, (num_chars)]`.
+    The returned tensor is a `tf.Tensor` if `input` is a scalar, or a
+    `tf.RaggedTensor` otherwise.
+
+  #### Example:
+    ```python
+    >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
+    >>> tf.strings.unicode_split(input, 'UTF-8').tolist()
+    [['G', '\xc3\xb6', '\xc3\xb6', 'd', 'n', 'i', 'g', 'h', 't'],
+     ['\xf0\x9f\x98\x8a']]
+    ```
+  """
+  with ops.name_scope(name, "UnicodeSplit", [input]):
+    codepoints = _unicode_decode(input, input_encoding, errors,
+                                 replacement_char, False, with_offsets=False)
+    return unicode_encode(
+        ragged_array_ops.expand_dims(codepoints, -1),
+        output_encoding=input_encoding,
+        errors=errors,
+        replacement_char=replacement_char)
+
+
+@tf_export("strings.unicode_split_with_offsets")
+def unicode_split_with_offsets(input,
+                               input_encoding,
+                               errors="replace",
+                               replacement_char=0xFFFD,
+                               name=None):
+  r"""Splits each string into a sequence of code points with start offsets.
+
+  This op is similar to `tf.strings.decode(...)`, but it also returns the
+  start offset for each character in its respective string.  This information
+  can be used to align the characters with the original byte sequence.
+
+  Returns a tuple `(chars, start_offsets)` where:
+
+  * `chars[i1...iN, j]` is the substring of `input[i1...iN] that encodes its
+    `j`th character, when decoded using `input_encoding`.
+  * `start_offsets[i1...iN, j]` is the start byte offset for the `j`th
+    character in `input[i1...iN]`, when decoded using `input_encoding`.
+
+  Args:
+    input: An `N` dimensional potentially ragged `string` tensor with shape
+      `[D1...DN]`.  `N` must be statically known.
+    input_encoding: String name for the unicode encoding that should be used to
+      decode each string.
+    errors: Specifies the response when an input string can't be converted
+      using the indicated encoding. One of:
+      * `'strict'`: Raise an exception for any illegal substrings.
+      * `'replace'`: Replace illegal substrings with `replacement_char`.
+      * `'ignore'`: Skip illegal substrings.
+    replacement_char: The replacement codepoint to be used in place of invalid
+      substrings in `input` when `errors='replace'`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A tuple of `N+1` dimensional tensors `(codepoints, start_offsets)`.
+
+    * `codepoints` is an `int32` tensor with shape `[D1...DN, (num_chars)]`.
+    * `offsets` is an `int64` tensor with shape `[D1...DN, (num_chars)]`.
+
+    The returned tensors are `tf.Tensor`s if `input` is a scalar, or
+    `tf.RaggedTensor`s otherwise.
+
+  #### Example:
+    ```python
+    >>> input = [s.encode('utf8') for s in (u'G\xf6\xf6dnight', u'\U0001f60a')]
+    >>> result = tf.strings.unicode_split_with_offsets(input, 'UTF-8')
+    >>> result[0].tolist()  # character substrings
+    [['G', '\xc3\xb6', '\xc3\xb6', 'd', 'n', 'i', 'g', 'h', 't'],
+     ['\xf0\x9f\x98\x8a']]
+    >>> result[1].tolist()  # offsets
+   [[0, 1, 3, 5, 6, 7, 8, 9, 10], [0]]
+    ```
+  """
+  with ops.name_scope(name, "UnicodeSplitWithOffsets", [input]):
+    codepoints, offsets = _unicode_decode(input, input_encoding, errors,
+                                          replacement_char, False,
+                                          with_offsets=True)
+    chars = unicode_encode(
+        ragged_array_ops.expand_dims(codepoints, -1),
+        output_encoding=input_encoding,
+        errors=errors,
+        replacement_char=replacement_char)
+    return chars, offsets
+
+
+def _unicode_decode(input, input_encoding, errors, replacement_char,
+                    replace_control_characters, with_offsets):
+  """Decodes each string into a sequence of codepoints."""
+  input = ragged_tensor.convert_to_tensor_or_ragged_tensor(input, name="input")
+  input_ndims = input.shape.ndims
+  if input_ndims is None:
+    raise ValueError("Rank of `input` must be statically known.")
+
+  if input_ndims > 1:
+    # Convert to a ragged tensor with ragged_rank = input_ndims - 1.
+    if not ragged_tensor.is_ragged(input):
+      input = ragged_conversion_ops.from_tensor(
+          input, ragged_rank=input_ndims - 1)
+    elif input.ragged_rank < input_ndims - 1:
+      input = input.with_flat_values(
+          ragged_conversion_ops.from_tensor(
+              input.flat_values,
+              ragged_rank=input_ndims - input.ragged_rank + 1))
+
+  # Reshape the input to a flat vector, and apply the gen_string_ops op.
+  if ragged_tensor.is_ragged(input):
+    flat_input = array_ops.reshape(input.flat_values, [-1])
+  else:
+    flat_input = array_ops.reshape(input, [-1])
+
+  if with_offsets:
+    decode_op = gen_string_ops.unicode_decode_with_offsets
+  else:
+    decode_op = gen_string_ops.unicode_decode
+  flat_result = decode_op(
+      input=flat_input,
+      input_encoding=input_encoding,
+      errors=errors,
+      replacement_char=replacement_char,
+      replace_control_characters=replace_control_characters)
+
+  if input_ndims == 0:
+    codepoints = flat_result.char_values
+    if with_offsets:
+      offsets = flat_result.char_to_byte_starts
+  else:
+    codepoints = ragged_tensor.RaggedTensor.from_row_splits(
+        flat_result.char_values, flat_result.row_splits)
+    if input_ndims > 1:
+      codepoints = input.with_flat_values(codepoints)
+    if with_offsets:
+      offsets = ragged_tensor.RaggedTensor.from_row_splits(
+          flat_result.char_to_byte_starts, flat_result.row_splits)
+      if input_ndims > 1:
+        offsets = input.with_flat_values(offsets)
+
+  if with_offsets:
+    return codepoints, offsets
+  else:
+    return codepoints
+
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
index a1cd581a86..ad26ded10b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt
@@ -52,6 +52,14 @@ tf_module {
     name: "to_number"
     argspec: "args=[\'string_tensor\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "unicode_decode"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'replace_control_characters\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "unicode_decode_with_offsets"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'replace_control_characters\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'False\', \'None\'], "
+  }
   member_method {
     name: "unicode_encode"
     argspec: "args=[\'input\', \'output_encoding\', \'errors\', \'replacement_char\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'None\'], "
@@ -60,6 +68,14 @@ tf_module {
     name: "unicode_script"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unicode_split"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'None\'], "
+  }
+  member_method {
+    name: "unicode_split_with_offsets"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'None\'], "
+  }
   member_method {
     name: "unicode_transcode"
     argspec: "args=[\'input\', \'input_encoding\', \'output_encoding\', \'errors\', \'replacement_char\', \'replace_control_characters\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'False\', \'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
index f6e32ed08c..962cf9a723 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt
@@ -52,6 +52,14 @@ tf_module {
     name: "to_number"
     argspec: "args=[\'input\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "
   }
+  member_method {
+    name: "unicode_decode"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'replace_control_characters\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'False\', \'None\'], "
+  }
+  member_method {
+    name: "unicode_decode_with_offsets"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'replace_control_characters\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'False\', \'None\'], "
+  }
   member_method {
     name: "unicode_encode"
     argspec: "args=[\'input\', \'output_encoding\', \'errors\', \'replacement_char\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'None\'], "
@@ -60,6 +68,14 @@ tf_module {
     name: "unicode_script"
     argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "unicode_split"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'None\'], "
+  }
+  member_method {
+    name: "unicode_split_with_offsets"
+    argspec: "args=[\'input\', \'input_encoding\', \'errors\', \'replacement_char\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'None\'], "
+  }
   member_method {
     name: "unicode_transcode"
     argspec: "args=[\'input\', \'input_encoding\', \'output_encoding\', \'errors\', \'replacement_char\', \'replace_control_characters\', \'name\'], varargs=None, keywords=None, defaults=[\'replace\', \'65533\', \'False\', \'None\'], "
-- 
GitLab


From 00d91e7bc3111b00c2e679627362ec21dab64833 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Wed, 12 Dec 2018 17:09:34 -0800
Subject: [PATCH 470/873] Enable a number of tests for merge layers to run in
 eager mode.

PiperOrigin-RevId: 225283392
---
 tensorflow/python/keras/layers/merge.py      |   6 +-
 tensorflow/python/keras/layers/merge_test.py | 132 ++++++++++---------
 2 files changed, 72 insertions(+), 66 deletions(-)

diff --git a/tensorflow/python/keras/layers/merge.py b/tensorflow/python/keras/layers/merge.py
index 45e705c696..c73b21d965 100644
--- a/tensorflow/python/keras/layers/merge.py
+++ b/tensorflow/python/keras/layers/merge.py
@@ -87,7 +87,7 @@ class _Merge(Layer):
   def build(self, input_shape):
     # Used purely for shape validation.
     if not isinstance(input_shape, list):
-      raise ValueError('A merge layer should be called ' 'on a list of inputs.')
+      raise ValueError('A merge layer should be called on a list of inputs.')
     if len(input_shape) < 2:
       raise ValueError('A merge layer should be called '
                        'on a list of at least 2 inputs. '
@@ -118,7 +118,7 @@ class _Merge(Layer):
 
   def call(self, inputs):
     if not isinstance(inputs, list):
-      raise ValueError('A merge layer should be called ' 'on a list of inputs.')
+      raise ValueError('A merge layer should be called on a list of inputs.')
     if self._reshape_required:
       reshaped_inputs = []
       input_ndims = list(map(K.ndim, inputs))
@@ -504,7 +504,7 @@ class Dot(_Merge):
 
   def _merge_function(self, inputs):
     if len(inputs) != 2:
-      raise ValueError('A `Dot` layer should be called ' 'on exactly 2 inputs')
+      raise ValueError('A `Dot` layer should be called on exactly 2 inputs')
     x1 = inputs[0]
     x2 = inputs[1]
     if isinstance(self.axes, int):
diff --git a/tensorflow/python/keras/layers/merge_test.py b/tensorflow/python/keras/layers/merge_test.py
index fcb161ae20..f962a75b32 100644
--- a/tensorflow/python/keras/layers/merge_test.py
+++ b/tensorflow/python/keras/layers/merge_test.py
@@ -22,12 +22,13 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util as tf_test_util
-from tensorflow.python.ops import array_ops
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class MergeLayersTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class MergeLayersTest(keras_parameterized.TestCase):
 
   def test_merge_add(self):
     i1 = keras.layers.Input(shape=(4, 5))
@@ -35,8 +36,9 @@ class MergeLayersTest(test.TestCase):
     i3 = keras.layers.Input(shape=(4, 5))
 
     o = keras.layers.add([i1, i2, i3])
-    self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
+    self.assertListEqual(o.shape.as_list(), [None, 4, 5])
     model = keras.models.Model([i1, i2, i3], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
     x1 = np.random.random((2, 4, 5))
     x2 = np.random.random((2, 4, 5))
@@ -45,25 +47,14 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 4, 5))
     self.assertAllClose(out, x1 + x2 + x3, atol=1e-4)
 
-  def test_merge_elementwise_errors(self):
-    i1 = keras.layers.Input(shape=(4, 5))
-    i2 = keras.layers.Input(shape=(4, 6))
-    with self.assertRaises(ValueError):
-      keras.layers.add([i1, i2])
-    with self.assertRaises(ValueError):
-      keras.layers.add([i1])
-    with self.assertRaises(ValueError):
-      keras.layers.add(i1)
-    with self.assertRaises(ValueError):
-      keras.layers.add([i1])
-
   def test_merge_multiply(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
     i3 = keras.layers.Input(shape=(4, 5))
     o = keras.layers.multiply([i1, i2, i3])
-    self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
+    self.assertListEqual(o.shape.as_list(), [None, 4, 5])
     model = keras.models.Model([i1, i2, i3], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
     x1 = np.random.random((2, 4, 5))
     x2 = np.random.random((2, 4, 5))
@@ -76,8 +67,9 @@ class MergeLayersTest(test.TestCase):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
     o = keras.layers.average([i1, i2])
-    self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
+    self.assertListEqual(o.shape.as_list(), [None, 4, 5])
     model = keras.models.Model([i1, i2], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
     x1 = np.random.random((2, 4, 5))
     x2 = np.random.random((2, 4, 5))
@@ -89,8 +81,9 @@ class MergeLayersTest(test.TestCase):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
     o = keras.layers.maximum([i1, i2])
-    self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
+    self.assertListEqual(o.shape.as_list(), [None, 4, 5])
     model = keras.models.Model([i1, i2], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
     x1 = np.random.random((2, 4, 5))
     x2 = np.random.random((2, 4, 5))
@@ -102,8 +95,9 @@ class MergeLayersTest(test.TestCase):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
     o = keras.layers.minimum([i1, i2])
-    self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
+    self.assertListEqual(o.shape.as_list(), [None, 4, 5])
     model = keras.models.Model([i1, i2], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
     x1 = np.random.random((2, 4, 5))
     x2 = np.random.random((2, 4, 5))
@@ -115,8 +109,9 @@ class MergeLayersTest(test.TestCase):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
     o = keras.layers.concatenate([i1, i2], axis=1)
-    self.assertListEqual(o.get_shape().as_list(), [None, 8, 5])
+    self.assertListEqual(o.shape.as_list(), [None, 8, 5])
     model = keras.models.Model([i1, i2], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
     x1 = np.random.random((2, 4, 5))
     x2 = np.random.random((2, 4, 5))
@@ -124,22 +119,13 @@ class MergeLayersTest(test.TestCase):
     self.assertEqual(out.shape, (2, 8, 5))
     self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4)
 
-  def test_concatenate_errors(self):
-    i1 = keras.layers.Input(shape=(4, 5))
-    i2 = keras.layers.Input(shape=(3, 5))
-    with self.assertRaisesRegexp(ValueError, 'inputs with matching shapes'):
-      keras.layers.concatenate([i1, i2], axis=-1)
-    with self.assertRaisesRegexp(ValueError, 'called on a list'):
-      keras.layers.concatenate(i1, axis=-1)
-    with self.assertRaisesRegexp(ValueError, 'called on a list'):
-      keras.layers.concatenate([i1], axis=-1)
-
   def test_merge_dot(self):
     i1 = keras.layers.Input(shape=(4,))
     i2 = keras.layers.Input(shape=(4,))
     o = keras.layers.dot([i1, i2], axes=1)
-    self.assertListEqual(o.get_shape().as_list(), [None, 1])
+    self.assertListEqual(o.shape.as_list(), [None, 1])
     model = keras.models.Model([i1, i2], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
     _ = keras.layers.Dot(axes=1).get_config()
 
     x1 = np.random.random((2, 4))
@@ -153,8 +139,9 @@ class MergeLayersTest(test.TestCase):
 
     # Test with negative tuple of axes.
     o = keras.layers.dot([i1, i2], axes=(-1, -1))
-    self.assertListEqual(o.get_shape().as_list(), [None, 1])
+    self.assertListEqual(o.shape.as_list(), [None, 1])
     model = keras.models.Model([i1, i2], o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
     out = model.predict([x1, x2])
     self.assertEqual(out.shape, (2, 1))
     self.assertAllClose(out, expected, atol=1e-4)
@@ -163,6 +150,32 @@ class MergeLayersTest(test.TestCase):
     layer = keras.layers.Dot(axes=-1)
     self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1))
 
+
+@tf_test_util.run_all_in_graph_and_eager_modes
+class MergeLayersTestNoExecution(test.TestCase):
+
+  def test_merge_elementwise_errors(self):
+    i1 = keras.layers.Input(shape=(4, 5))
+    i2 = keras.layers.Input(shape=(4, 6))
+    with self.assertRaises(ValueError):
+      keras.layers.add([i1, i2])
+    with self.assertRaises(ValueError):
+      keras.layers.add([i1])
+    with self.assertRaises(ValueError):
+      keras.layers.add(i1)
+    with self.assertRaises(ValueError):
+      keras.layers.add([i1])
+
+  def test_concatenate_errors(self):
+    i1 = keras.layers.Input(shape=(4, 5))
+    i2 = keras.layers.Input(shape=(3, 5))
+    with self.assertRaisesRegexp(ValueError, 'inputs with matching shapes'):
+      keras.layers.concatenate([i1, i2], axis=-1)
+    with self.assertRaisesRegexp(ValueError, 'called on a list'):
+      keras.layers.concatenate(i1, axis=-1)
+    with self.assertRaisesRegexp(ValueError, 'called on a list'):
+      keras.layers.concatenate([i1], axis=-1)
+
   def test_dot_errors(self):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 6))
@@ -183,7 +196,7 @@ class MergeLayersTest(test.TestCase):
     i1 = keras.layers.Input(shape=(4, 5))
     i2 = keras.layers.Input(shape=(4, 5))
     y = keras.layers.subtract([i1, i2])
-    self.assertEqual(y.get_shape().as_list(), [None, 4, 5])
+    self.assertEqual(y.shape.as_list(), [None, 4, 5])
 
     # Test invalid use cases
     i1 = keras.layers.Input(shape=(4, 5))
@@ -193,39 +206,32 @@ class MergeLayersTest(test.TestCase):
     with self.assertRaises(ValueError):
       keras.layers.subtract([i1, i1, i1])
 
-
-class MergeLayersGraphOnlyTest(test.TestCase):
-
   def test_merge_add_masking(self):
-    with self.cached_session():
-      i1 = keras.layers.Input(shape=(4, 5))
-      i2 = keras.layers.Input(shape=(4, 5))
-      m1 = keras.layers.Masking()(i1)
-      layer = keras.layers.Add()
-      o = layer([m1, i2])
-      self.assertListEqual(o.get_shape().as_list(), [None, 4, 5])
-      mask = layer.output_mask
-      self.assertListEqual(mask.get_shape().as_list(), [None, 4])
-
-  @tf_test_util.run_deprecated_v1
+    i1 = keras.layers.Input(shape=(4, 5))
+    i2 = keras.layers.Input(shape=(4, 5))
+    m1 = keras.layers.Masking()(i1)
+    layer = keras.layers.Add()
+    o = layer([m1, i2])
+    self.assertListEqual(o.shape.as_list(), [None, 4, 5])
+    mask = layer.output_mask
+    self.assertListEqual(mask.shape.as_list(), [None, 4])
+
   def test_merge_add_dynamic_shape(self):
-    with self.cached_session():
-      i1 = array_ops.placeholder(shape=(4, None), dtype='float32')
-      i2 = array_ops.placeholder(shape=(4, 5), dtype='float32')
-      layer = keras.layers.Add()
-      o = layer([i1, i2])
-      self.assertListEqual(o.get_shape().as_list(), [4, 5])
+    i1 = keras.Input(batch_shape=(4, None), dtype='float32')
+    i2 = keras.Input(batch_shape=(4, 5), dtype='float32')
+    layer = keras.layers.Add()
+    o = layer([i1, i2])
+    self.assertListEqual(o.shape.as_list(), [4, 5])
 
   def test_merge_concatenate_masking(self):
-    with self.cached_session():
-      i1 = keras.layers.Input(shape=(4, 5))
-      i2 = keras.layers.Input(shape=(4, 5))
-      m1 = keras.layers.Masking()(i1)
-      layer = keras.layers.Concatenate()
-      o = layer([m1, i2])
-      self.assertListEqual(o.get_shape().as_list(), [None, 4, 10])
-      mask = layer.output_mask
-      self.assertListEqual(mask.get_shape().as_list(), [None, 4])
+    i1 = keras.layers.Input(shape=(4, 5))
+    i2 = keras.layers.Input(shape=(4, 5))
+    m1 = keras.layers.Masking()(i1)
+    layer = keras.layers.Concatenate()
+    o = layer([m1, i2])
+    self.assertListEqual(o.shape.as_list(), [None, 4, 10])
+    mask = layer.output_mask
+    self.assertListEqual(mask.shape.as_list(), [None, 4])
 
 
 if __name__ == '__main__':
-- 
GitLab


From 1ec3b398482347b4611c6fd043c85e597345a39b Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Wed, 12 Dec 2018 17:09:38 -0800
Subject: [PATCH 471/873] Default `to_code` to brief verbosity (since it
 returns the code already).

PiperOrigin-RevId: 225283401
---
 tensorflow/python/autograph/impl/api.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index a20ad71c97..0debf24fb9 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -494,6 +494,7 @@ def to_code(entity,
   program_ctx = converter.ProgramContext(
       options=converter.ConversionOptions(
           recursive=recursive,
+          verbose=converter.Verbosity.BRIEF,
           strip_decorators=(convert, do_not_convert, converted_call),
           optional_features=optional_features),
       partial_types=experimental_partial_types,
-- 
GitLab


From a3d634438e9cc70073faa796018b6173212e2f85 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Wed, 12 Dec 2018 17:40:27 -0800
Subject: [PATCH 472/873] Run BatchNorm layer tests in all relevant execution
 modes.

PiperOrigin-RevId: 225287527
---
 tensorflow/python/keras/BUILD                 |   1 +
 .../python/keras/layers/normalization_test.py | 113 +++++++++++-------
 2 files changed, 71 insertions(+), 43 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 361d88fe83..aef79ff2c8 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -466,6 +466,7 @@ py_test(
     name = "normalization_test",
     size = "medium",
     srcs = ["layers/normalization_test.py"],
+    shard_count = 3,
     srcs_version = "PY2AND3",
     tags = ["notsan"],
     deps = [
diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py
index c1acc2eb3a..780e02cf06 100644
--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@@ -22,16 +22,16 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.layers import normalization
 from tensorflow.python.platform import test
 from tensorflow.python.training import gradient_descent
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-@tf_test_util.run_v1_only('b/120545219')
-class NormalizationLayersTest(test.TestCase):
+class BatchNormalizationTest(keras_parameterized.TestCase):
 
+  @keras_parameterized.run_all_keras_modes
   def test_basic_batchnorm(self):
     testing_utils.layer_test(
         keras.layers.BatchNormalization,
@@ -56,15 +56,8 @@ class NormalizationLayersTest(test.TestCase):
         kwargs={'scale': False,
                 'center': False},
         input_shape=(3, 3))
-    testing_utils.layer_test(
-        normalization.BatchNormalizationV2,
-        kwargs={'fused': True},
-        input_shape=(3, 3, 3, 3))
-    testing_utils.layer_test(
-        normalization.BatchNormalizationV2,
-        kwargs={'fused': None},
-        input_shape=(3, 3, 3))
 
+  @tf_test_util.run_in_graph_and_eager_modes
   def test_batchnorm_weights(self):
     layer = keras.layers.BatchNormalization(scale=False, center=False)
     layer.build((None, 3, 4))
@@ -76,6 +69,7 @@ class NormalizationLayersTest(test.TestCase):
     self.assertEqual(len(layer.trainable_weights), 2)
     self.assertEqual(len(layer.weights), 4)
 
+  @tf_test_util.run_in_graph_and_eager_modes
   def test_batchnorm_regularization(self):
     layer = keras.layers.BatchNormalization(
         gamma_regularizer='l1', beta_regularizer='l1')
@@ -88,36 +82,7 @@ class NormalizationLayersTest(test.TestCase):
     self.assertEqual(layer.gamma.constraint, max_norm)
     self.assertEqual(layer.beta.constraint, max_norm)
 
-  def _test_batchnorm_correctness(self, dtype, use_v2=True, fused=False):
-    model = keras.models.Sequential()
-    layer_ctor = (normalization.BatchNormalizationV2 if use_v2
-                  else normalization.BatchNormalizationV1)
-    norm = layer_ctor(input_shape=(2, 2, 2), momentum=0.8, fused=fused)
-    model.add(norm)
-    model.compile(loss='mse',
-                  optimizer=gradient_descent.GradientDescentOptimizer(0.01))
-
-    # centered on 5.0, variance 10.0
-    x = (np.random.normal(loc=5.0, scale=10.0, size=(1000, 2, 2, 2))
-         .astype(dtype))
-    model.fit(x, x, epochs=4, verbose=0)
-    out = model.predict(x)
-    out -= keras.backend.eval(norm.beta)
-    out /= keras.backend.eval(norm.gamma)
-
-    np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
-    np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
-
-  def test_batchnorm_correctness(self):
-    self._test_batchnorm_correctness(np.float32)
-    self._test_batchnorm_correctness(np.float32, fused=True)
-    self._test_batchnorm_correctness(np.float32, use_v2=False)
-
-  def test_batchnorm_mixed_precision(self):
-    self._test_batchnorm_correctness(np.float16)
-    self._test_batchnorm_correctness(np.float16, fused=True)
-    self._test_batchnorm_correctness(np.float16, use_v2=False)
-
+  @keras_parameterized.run_all_keras_modes
   def test_batchnorm_convnet(self):
     if test.is_gpu_available(cuda_only=True):
       with self.session(use_gpu=True):
@@ -126,7 +91,8 @@ class NormalizationLayersTest(test.TestCase):
             axis=1, input_shape=(3, 4, 4), momentum=0.8)
         model.add(norm)
         model.compile(loss='mse',
-                      optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+                      optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                      run_eagerly=testing_utils.should_run_eagerly())
 
         # centered on 5.0, variance 10.0
         x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4))
@@ -138,13 +104,15 @@ class NormalizationLayersTest(test.TestCase):
         np.testing.assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1)
         np.testing.assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1)
 
+  @keras_parameterized.run_all_keras_modes
   def test_batchnorm_convnet_channel_last(self):
     model = keras.models.Sequential()
     norm = keras.layers.BatchNormalization(
         axis=-1, input_shape=(4, 4, 3), momentum=0.8)
     model.add(norm)
     model.compile(loss='mse',
-                  optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+                  optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     # centered on 5.0, variance 10.0
     x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3))
@@ -156,6 +124,28 @@ class NormalizationLayersTest(test.TestCase):
     np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1)
     np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1)
 
+  @keras_parameterized.run_all_keras_modes
+  def test_batchnorm_correctness(self):
+    _run_batchnorm_correctness_test(
+        normalization.BatchNormalization, dtype='float32')
+    _run_batchnorm_correctness_test(
+        normalization.BatchNormalization, dtype='float32', fused=True)
+    _run_batchnorm_correctness_test(
+        normalization.BatchNormalization, dtype='float32', fused=False)
+
+  @keras_parameterized.run_all_keras_modes
+  def test_batchnorm_mixed_precision(self):
+    _run_batchnorm_correctness_test(
+        normalization.BatchNormalization, dtype='float16')
+    _run_batchnorm_correctness_test(
+        normalization.BatchNormalization, dtype='float16', fused=True)
+    _run_batchnorm_correctness_test(
+        normalization.BatchNormalization, dtype='float16', fused=False)
+
+
+class BatchNormalizationV1Test(test.TestCase):
+
+  @tf_test_util.run_in_graph_and_eager_modes
   def test_v1_fused_attribute(self):
     norm = normalization.BatchNormalizationV1()
     inp = keras.layers.Input((4, 4, 4))
@@ -174,6 +164,21 @@ class NormalizationLayersTest(test.TestCase):
     norm(inp)
     self.assertEqual(norm.fused, False)
 
+
+class BatchNormalizationV2Test(keras_parameterized.TestCase):
+
+  @keras_parameterized.run_all_keras_modes
+  def test_basic_batchnorm_v2(self):
+    testing_utils.layer_test(
+        normalization.BatchNormalizationV2,
+        kwargs={'fused': True},
+        input_shape=(3, 3, 3, 3))
+    testing_utils.layer_test(
+        normalization.BatchNormalizationV2,
+        kwargs={'fused': None},
+        input_shape=(3, 3, 3))
+
+  @tf_test_util.run_in_graph_and_eager_modes
   def test_v2_fused_attribute(self):
     norm = normalization.BatchNormalizationV2()
     self.assertEqual(norm.fused, None)
@@ -228,6 +233,26 @@ class NormalizationLayersTest(test.TestCase):
       norm(inp)
 
 
+def _run_batchnorm_correctness_test(layer, dtype='float32', fused=False):
+  model = keras.models.Sequential()
+  norm = layer(input_shape=(2, 2, 2), momentum=0.8, fused=fused)
+  model.add(norm)
+  model.compile(loss='mse',
+                optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                run_eagerly=testing_utils.should_run_eagerly())
+
+  # centered on 5.0, variance 10.0
+  x = (np.random.normal(loc=5.0, scale=10.0, size=(1000, 2, 2, 2))
+       .astype(dtype))
+  model.fit(x, x, epochs=4, verbose=0)
+  out = model.predict(x)
+  out -= keras.backend.eval(norm.beta)
+  out /= keras.backend.eval(norm.gamma)
+
+  np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
+  np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
+
+
 @tf_test_util.run_v1_only('b/120545219')
 class NormalizationLayersGraphModeOnlyTest(test.TestCase):
 
@@ -309,6 +334,8 @@ class NormalizationLayersGraphModeOnlyTest(test.TestCase):
     Computes mean and std for current inputs then
     applies batch normalization using them.
     """
+    # TODO(fchollet): enable in all execution modes when issue with
+    # learning phase setting is resolved.
     with self.cached_session():
       bn_mean = 0.5
       bn_std = 10.
-- 
GitLab


From 3eb7616b5459aec3dabaa4152a00de14a1fa0914 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 17:55:15 -0800
Subject: [PATCH 473/873] Upstream Eigen update to
 9f48e814419e823f4a0a31b0cc365aa850c164f5.

PiperOrigin-RevId: 225289423
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index a84c51813e..4eca0bf3cc 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -134,11 +134,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "eigen_archive",
         build_file = clean_dep("//third_party:eigen.BUILD"),
-        sha256 = "aae7a680d141c978301dfae2c7945c06039f65849fcf64269595a9cdbba82638",
-        strip_prefix = "eigen-eigen-729d33d11c81",
+        sha256 = "753fbb58d0a49b6bcbcfb126ebfa2e21fc97f7471529ba835a096008ce588d8a",
+        strip_prefix = "eigen-eigen-9f48e814419e",
         urls = [
-            "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/729d33d11c81.tar.gz",
-            "https://bitbucket.org/eigen/eigen/get/729d33d11c81.tar.gz",
+            "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz",
+            "https://bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz",
         ],
     )
 
-- 
GitLab


From 4637671e4e6e3f88fc8f5b92538bfb813f3da682 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Wed, 12 Dec 2018 18:13:37 -0800
Subject: [PATCH 474/873] Enable v2 mode for lstm_test.

Test that can only run in v1 has the proper bug attached.

PiperOrigin-RevId: 225291768
---
 tensorflow/python/keras/layers/lstm_test.py | 59 ++++++++++++---------
 1 file changed, 33 insertions(+), 26 deletions(-)

diff --git a/tensorflow/python/keras/layers/lstm_test.py b/tensorflow/python/keras/layers/lstm_test.py
index aea4261502..5c0ad5cf71 100644
--- a/tensorflow/python/keras/layers/lstm_test.py
+++ b/tensorflow/python/keras/layers/lstm_test.py
@@ -23,6 +23,7 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 from tensorflow.python.training import adam
@@ -30,8 +31,8 @@ from tensorflow.python.training import gradient_descent
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class LSTMLayerTest(test.TestCase, parameterized.TestCase):
+@keras_parameterized.run_all_keras_modes
+class LSTMLayerTest(keras_parameterized.TestCase):
 
   def test_return_sequences_LSTM(self):
     num_samples = 2
@@ -67,7 +68,9 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
     layer = keras.layers.LSTM(units, input_shape=(None, embedding_dim))
     model = keras.models.Sequential()
     model.add(layer)
-    model.compile(RMSPropOptimizer(0.001), 'mse')
+    model.compile(RMSPropOptimizer(0.001), 'mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
+
     x = np.random.random((num_samples, timesteps, embedding_dim))
     y = np.random.random((num_samples, units))
     model.train_on_batch(x, y)
@@ -115,7 +118,6 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
     self.assertEqual(layer.cell.bias.constraint, b_constraint)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_with_masking_layer_LSTM(self):
     layer_class = keras.layers.LSTM
     inputs = np.random.random((2, 3, 4))
@@ -125,10 +127,10 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
     model.add(keras.layers.Masking(input_shape=(3, 4)))
     model.add(layer_class(units=5, return_sequences=True, unroll=False))
     model.compile(loss='categorical_crossentropy',
-                  optimizer=RMSPropOptimizer(0.01))
+                  optimizer=RMSPropOptimizer(0.01),
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_masking_with_stacking_LSTM(self):
     inputs = np.random.random((2, 3, 4))
     targets = np.abs(np.random.random((2, 3, 5)))
@@ -138,7 +140,8 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
     lstm_cells = [keras.layers.LSTMCell(10), keras.layers.LSTMCell(5)]
     model.add(keras.layers.RNN(lstm_cells, return_sequences=True, unroll=False))
     model.compile(loss='categorical_crossentropy',
-                  optimizer=RMSPropOptimizer(0.01))
+                  optimizer=RMSPropOptimizer(0.01),
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
   def test_from_config_LSTM(self):
@@ -167,7 +170,8 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
 
     model = keras.models.Model([inputs] + initial_state, output)
     model.compile(loss='categorical_crossentropy',
-                  optimizer=adam.AdamOptimizer())
+                  optimizer=adam.AdamOptimizer(),
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     inputs = np.random.random((num_samples, timesteps, embedding_dim))
     initial_state = [np.random.random((num_samples, units))
@@ -192,7 +196,8 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
 
     model = keras.models.Model(inputs, output)
     model.compile(loss='categorical_crossentropy',
-                  optimizer=adam.AdamOptimizer())
+                  optimizer=adam.AdamOptimizer(),
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     inputs = np.random.random((num_samples, timesteps, embedding_dim))
     targets = np.random.random((num_samples, units))
@@ -242,7 +247,8 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
 
     model = keras.models.Model([inputs] + initial_state, output)
     model.compile(loss='categorical_crossentropy',
-                  optimizer=RMSPropOptimizer(0.01))
+                  optimizer=RMSPropOptimizer(0.01),
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     inputs = np.random.random((num_samples, timesteps, embedding_dim))
     initial_state = [np.random.random((num_samples, units))
@@ -303,7 +309,8 @@ class LSTMLayerTest(test.TestCase, parameterized.TestCase):
 
     model = keras.models.Model(inputs, output)
     model.compile(loss='categorical_crossentropy',
-                  optimizer=adam.AdamOptimizer())
+                  optimizer=adam.AdamOptimizer(),
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     main_inputs = np.random.random((num_samples, timesteps, embedding_dim))
     initial_state = [np.random.random((num_samples, units))
@@ -378,25 +385,25 @@ class LSTMLayerGraphOnlyTest(test.TestCase):
 
       self.assertAllClose(out7, out6, atol=1e-5)
 
+  # b/120919032
   @tf_test_util.run_deprecated_v1
   def test_regularizers_LSTM(self):
     embedding_dim = 4
     layer_class = keras.layers.LSTM
-    with self.cached_session():
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_regularizer=keras.regularizers.l1(0.01),
-          recurrent_regularizer=keras.regularizers.l1(0.01),
-          bias_regularizer='l2',
-          activity_regularizer='l1')
-      layer.build((None, None, 2))
-      self.assertEqual(len(layer.losses), 3)
-      x = keras.backend.variable(np.ones((2, 3, 2)))
-      layer(x)
-      self.assertEqual(len(layer.get_losses_for(x)), 1)
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        recurrent_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer='l2',
+        activity_regularizer='l1')
+    layer.build((None, None, 2))
+    self.assertEqual(len(layer.losses), 3)
+    x = keras.backend.variable(np.ones((2, 3, 2)))
+    layer(x)
+    self.assertEqual(len(layer.get_losses_for(x)), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 6345287a1836428b74a91ed0ebaa73488461ec69 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 18:19:47 -0800
Subject: [PATCH 475/873] Update ops-related pbtxt files.

PiperOrigin-RevId: 225292379
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 47 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 47 +++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 1492741e8b..602d4a009d 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -77807,6 +77807,53 @@ op {
     type: "type"
   }
 }
+op {
+  name: "UnicodeDecode"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "row_splits"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "char_values"
+    type: DT_INT32
+  }
+  attr {
+    name: "input_encoding"
+    type: "string"
+  }
+  attr {
+    name: "errors"
+    type: "string"
+    default_value {
+      s: "replace"
+    }
+    allowed_values {
+      list {
+        s: "strict"
+        s: "replace"
+        s: "ignore"
+      }
+    }
+  }
+  attr {
+    name: "replacement_char"
+    type: "int"
+    default_value {
+      i: 65533
+    }
+  }
+  attr {
+    name: "replace_control_characters"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "UnicodeDecodeWithOffsets"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 89bdcc571e..779d4297c7 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -37406,6 +37406,53 @@ op {
     type: "type"
   }
 }
+op {
+  name: "UnicodeDecode"
+  input_arg {
+    name: "input"
+    type: DT_STRING
+  }
+  output_arg {
+    name: "row_splits"
+    type: DT_INT64
+  }
+  output_arg {
+    name: "char_values"
+    type: DT_INT32
+  }
+  attr {
+    name: "input_encoding"
+    type: "string"
+  }
+  attr {
+    name: "errors"
+    type: "string"
+    default_value {
+      s: "replace"
+    }
+    allowed_values {
+      list {
+        s: "strict"
+        s: "replace"
+        s: "ignore"
+      }
+    }
+  }
+  attr {
+    name: "replacement_char"
+    type: "int"
+    default_value {
+      i: 65533
+    }
+  }
+  attr {
+    name: "replace_control_characters"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+}
 op {
   name: "UnicodeDecodeWithOffsets"
   input_arg {
-- 
GitLab


From 99afa4b40b3414a1ad690fa08a662adef3f48d9b Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Wed, 12 Dec 2018 18:35:31 -0800
Subject: [PATCH 476/873] Enable recurrent_test to run in v2 mode.

Also increase the shard to speed up the tests.

PiperOrigin-RevId: 225293834
---
 tensorflow/python/keras/BUILD                 |   3 +-
 .../python/keras/layers/recurrent_test.py     | 119 +++++++++++-------
 2 files changed, 76 insertions(+), 46 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index aef79ff2c8..7c3fca2c82 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -525,8 +525,9 @@ py_test(
 
 py_test(
     name = "recurrent_test",
-    size = "large",
+    size = "medium",
     srcs = ["layers/recurrent_test.py"],
+    shard_count = 4,
     srcs_version = "PY2AND3",
     deps = [
         ":keras",
diff --git a/tensorflow/python/keras/layers/recurrent_test.py b/tensorflow/python/keras/layers/recurrent_test.py
index b1449069e3..e9bf788740 100644
--- a/tensorflow/python/keras/layers/recurrent_test.py
+++ b/tensorflow/python/keras/layers/recurrent_test.py
@@ -30,7 +30,8 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -48,8 +49,8 @@ NestedInput = collections.namedtuple('NestedInput', ['t1', 't2'])
 NestedState = collections.namedtuple('NestedState', ['s1', 's2'])
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class RNNTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class RNNTest(keras_parameterized.TestCase):
 
   def test_minimal_rnn_cell_non_layer(self):
 
@@ -73,7 +74,8 @@ class RNNTest(test.TestCase):
     y = layer(x)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
     # Test stacking.
@@ -84,7 +86,8 @@ class RNNTest(test.TestCase):
     y = layer(x)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
   def test_minimal_rnn_cell_non_layer_multiple_states(self):
@@ -112,7 +115,8 @@ class RNNTest(test.TestCase):
     y = layer(x)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
     # Test stacking.
@@ -125,7 +129,8 @@ class RNNTest(test.TestCase):
     y = layer(x)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
   def test_minimal_rnn_cell_layer(self):
@@ -165,7 +170,8 @@ class RNNTest(test.TestCase):
     y = layer(x)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
     # Test basic case serialization.
@@ -189,7 +195,8 @@ class RNNTest(test.TestCase):
     y = layer(x)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
 
     # Test stacked RNN serialization.
@@ -228,7 +235,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         np.zeros((batch, time_step, embedding_dim)),
         np.zeros((batch, time_step, units)))
@@ -246,7 +254,8 @@ class RNNTest(test.TestCase):
     y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(y)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         np.zeros((batch, time_step, embedding_dim)),
         np.zeros((batch, time_step, cell_units[-1])))
@@ -261,7 +270,8 @@ class RNNTest(test.TestCase):
     y = keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2]))(rnn)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         np.zeros((batch, time_step, embedding_dim)),
         np.zeros((batch, time_step, units)))
@@ -273,7 +283,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         np.zeros((batch, time_step, embedding_dim)),
         np.zeros((batch, time_step, units)))
@@ -347,7 +358,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model([x, c], y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((6, 5, 5)), np.zeros((6, 3))],
         np.zeros((6, 32))
@@ -385,7 +397,8 @@ class RNNTest(test.TestCase):
     y = layer(x, constants=c)
     model = keras.models.Model([x, c], y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((6, 5, 5)), np.zeros((6, 3))],
         np.zeros((6, 32))
@@ -399,7 +412,8 @@ class RNNTest(test.TestCase):
     y = layer(x, constants=c)
     model = keras.models.Model([x, c], y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((6, 5, 5)), np.zeros((6, 3))],
         np.zeros((6, 32))
@@ -471,7 +485,8 @@ class RNNTest(test.TestCase):
     y = layer(x, initial_state=s, constants=c)
     model = keras.models.Model([x, s, c], y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))],
         np.zeros((6, 32))
@@ -601,7 +616,8 @@ class RNNTest(test.TestCase):
       y = layer(x)
       model = keras.models.Model(x, y)
       model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                    loss='mse')
+                    loss='mse',
+                    run_eagerly=testing_utils.should_run_eagerly())
 
       # Test basic case serialization.
       x_np = np.random.random((6, 5, 5))
@@ -623,7 +639,8 @@ class RNNTest(test.TestCase):
       y = layer(x)
       model = keras.models.Model(x, y)
       model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                    loss='mse')
+                    loss='mse',
+                    run_eagerly=testing_utils.should_run_eagerly())
 
       # Test stacked RNN serialization.
       x_np = np.random.random((6, 5, 5))
@@ -647,7 +664,7 @@ class RNNTest(test.TestCase):
     x = keras.Input((None, 5))
     y = layer(x)
     model = keras.models.Model(x, y)
-    model.compile('sgd', 'mse')
+    model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly())
     x_np = np.random.random((6, 5, 5))
     y_np = np.random.random((6, 3))
     model.train_on_batch(x_np, y_np)
@@ -690,7 +707,8 @@ class RNNTest(test.TestCase):
     model = keras.models.Sequential()
     model.add(rnn(2))
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.fit(x, y, epochs=1, batch_size=1)
 
     # check whether the model variables are present in the
@@ -723,7 +741,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         np.zeros((batch, time_step, input_a, input_b)),
         np.zeros((batch, unit_a, unit_b)))
@@ -739,7 +758,8 @@ class RNNTest(test.TestCase):
     y = layer(x)
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         np.zeros((batch, time_step, input_a, input_b)),
         np.zeros((batch, unit_a * 4, unit_b * 4)))
@@ -762,7 +782,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model([x, s], y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch([
         np.zeros((batch, time_step, input_a, input_b)),
         np.zeros((batch, unit_a, unit_b))
@@ -799,7 +820,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model(x, y)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         np.zeros((batch, time_step, input_size)),
         np.zeros((batch, input_size)))
@@ -854,7 +876,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model((input_1, input_2), outputs)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((batch, t, i1)), np.zeros((batch, t, i2, i3))],
         [np.zeros((batch, o1)), np.zeros((batch, o2, o3))])
@@ -875,7 +898,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model([input_1, input_2], outputs)
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((batch, t, i1)),
          np.zeros((batch, t, i2, i3))],
@@ -903,7 +927,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model([input_1, input_2], [output1, output2])
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((batch, t, i1)),
          np.zeros((batch, t, i2, i3))],
@@ -927,7 +952,8 @@ class RNNTest(test.TestCase):
 
     model = keras.models.Model([input_1, input_2], [output1, output2])
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((batch, t, i1)),
          np.zeros((batch, t, i2, i3))],
@@ -960,7 +986,8 @@ class RNNTest(test.TestCase):
     model = keras.models.Model([input_1, input_2, init_s1, init_s2],
                                [output1, output2])
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((batch, t, i1)),
          np.zeros((batch, t, i2, i3)),
@@ -991,7 +1018,8 @@ class RNNTest(test.TestCase):
     model = keras.models.Model([input_1, input_2, init_s1, init_s2],
                                [output1, output2])
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch(
         [np.zeros((batch, t, i1)),
          np.zeros((batch, t, i2, i3)),
@@ -1004,18 +1032,17 @@ class RNNTest(test.TestCase):
   def test_peephole_lstm_cell(self):
 
     def _run_cell(cell_fn, **kwargs):
-      with self.cached_session() as sess:
-        inputs = array_ops.one_hot([1, 2, 3, 4], 4)
-        cell = cell_fn(5, **kwargs)
-        cell.build(inputs.shape)
-        initial_state = cell.get_initial_state(
-            inputs=inputs, batch_size=4, dtype=dtypes.float32)
-        inputs, _ = cell(inputs, initial_state)
-        output = inputs
-        if not context.executing_eagerly():
-          self.evaluate(variables_lib.global_variables_initializer())
-          output = self.evaluate(output)
-        return output
+      inputs = array_ops.one_hot([1, 2, 3, 4], 4)
+      cell = cell_fn(5, **kwargs)
+      cell.build(inputs.shape)
+      initial_state = cell.get_initial_state(
+          inputs=inputs, batch_size=4, dtype=dtypes.float32)
+      inputs, _ = cell(inputs, initial_state)
+      output = inputs
+      if not context.executing_eagerly():
+        self.evaluate(variables_lib.global_variables_initializer())
+        output = self.evaluate(output)
+      return output
 
     random_seed.set_random_seed(12345)
     # `recurrent_activation` kwarg is set to sigmoid as that is hardcoded into
@@ -1067,7 +1094,8 @@ class RNNTest(test.TestCase):
         Cell(), return_state=True)(x_masked, initial_state=s_0)
     model = keras.models.Model([x, s_0], [y, s])
     model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                  loss='mse')
+                  loss='mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     # last time step masked
     x_np = np.array([[[1.], [2.], [0.]]])
@@ -1091,7 +1119,8 @@ class RNNTest(test.TestCase):
       y = layer(masked_input)
       model = keras.models.Model(x, y)
       model.compile(optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.001),
-                    loss='mse')
+                    loss='mse',
+                    run_eagerly=testing_utils.should_run_eagerly())
 
       np_x = np.ones((6, 5, 5))
       result_1 = model.predict(np_x)
-- 
GitLab


From 86e4902d211c04b5d52df9943c6276db31446bad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 18:45:42 -0800
Subject: [PATCH 477/873] Go: Update generated wrapper functions for TensorFlow
 ops. PiperOrigin-RevId: 225294738

---
 tensorflow/go/op/wrappers.go | 1552 +++++++++++++++++-----------------
 1 file changed, 776 insertions(+), 776 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 6e49fbb9ea..4624d12061 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -5977,6 +5977,78 @@ func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (s
 	return op.Output(0)
 }
 
+// StagePeekAttr is an optional argument to StagePeek.
+type StagePeekAttr func(optionalAttr)
+
+// StagePeekCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StagePeekCapacity(value int64) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// StagePeekMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StagePeekMemoryLimit(value int64) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// StagePeekContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StagePeekContainer(value string) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// StagePeekSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StagePeekSharedName(value string) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op peeks at the values at the specified index.  If the
+//
+// underlying container does not contain sufficient elements
+// this op will block until it does.   This Op is optimized for
+// performance.
+func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...StagePeekAttr) (values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StagePeek",
+		Input: []tf.Input{
+			index,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("StagePeek", err)
+		return
+	}
+	return values
+}
+
 // Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
 //
 // The regularized incomplete beta integral is defined as:
@@ -12380,6 +12452,47 @@ func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, upd
 	return scope.AddOperation(opspec)
 }
 
+// Adds sparse updates to the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] += updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] += updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions add.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterAdd",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
 // ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
 type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
 
@@ -12984,84 +13097,90 @@ func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.O
 	return op.Output(0)
 }
 
-// Encode audio data using the WAV file format.
+// Returns the element-wise sum of a list of tensors.
 //
-// This operation will generate a string suitable to be saved out to create a .wav
-// audio file. It will be encoded in the 16-bit PCM format. It takes in float
-// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
-// that range.
+// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
+// wait for all of its inputs to be ready before beginning to sum. This can
+// save memory if inputs are ready at different times, since minimum temporary
+// storage is proportional to the output size rather than the inputs size.
 //
-// `audio` is a 2-D float Tensor of shape `[length, channels]`.
-// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
 //
-// Arguments:
-//	audio: 2-D with shape `[length, channels]`.
-//	sample_rate: Scalar containing the sample frequency.
+// Returns a `Tensor` of same shape and type as the elements of `inputs`.
 //
-// Returns 0-D. WAV-encoded file contents.
-func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+// Arguments:
+//	inputs: A list of `Tensor` objects, each with same shape and type.
+//	shape: Shape of elements of `inputs`.
+func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "EncodeWav",
+		Type: "AccumulateNV2",
 		Input: []tf.Input{
-			audio, sample_rate,
+			tf.OutputList(inputs),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes atan of x element-wise.
-func Atan(scope *Scope, x tf.Output) (y tf.Output) {
+// Outputs deterministic pseudorandom random integers from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[minval, maxval)`.
+//
+// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//	minval: Minimum value (inclusive, scalar).
+//	maxval: Maximum value (exclusive, scalar).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Atan",
+		Type: "StatelessRandomUniformInt",
 		Input: []tf.Input{
-			x,
+			shape, seed, minval, maxval,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
-type ResourceApplyAdaMaxAttr func(optionalAttr)
+// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
+type StatelessTruncatedNormalAttr func(optionalAttr)
 
-// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
+// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
 //
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["dtype"] = value
 	}
 }
 
-// Update '*var' according to the AdaMax algorithm.
+// Outputs deterministic pseudorandom values from a truncated normal distribution.
 //
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// v_t <- max(beta2 * v_{t-1}, abs(g))
-// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
 //
-// Returns the created operation.
-func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
+// Returns Random values with specified shape.
+func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -13070,149 +13189,134 @@ func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdaMax",
+		Type: "StatelessTruncatedNormal",
 		Input: []tf.Input{
-			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+			shape, seed,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// AssertAttr is an optional argument to Assert.
-type AssertAttr func(optionalAttr)
+// RestoreSliceAttr is an optional argument to RestoreSlice.
+type RestoreSliceAttr func(optionalAttr)
 
-// AssertSummarize sets the optional summarize attribute to value.
+// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
 //
-// value: Print this many entries of each tensor.
-// If not specified, defaults to 3
-func AssertSummarize(value int64) AssertAttr {
+// value: Index of file to open first if multiple files match
+// `file_pattern`. See the documentation for `Restore`.
+// If not specified, defaults to -1
+func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
 	return func(m optionalAttr) {
-		m["summarize"] = value
+		m["preferred_shard"] = value
 	}
 }
 
-// Asserts that the given condition is true.
+// Restores a tensor from checkpoint files.
 //
-// If `condition` evaluates to false, print the list of tensors in `data`.
-// `summarize` determines how many entries of the tensors to print.
+// This is like `Restore` except that restored tensor can be listed as filling
+// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
+// larger tensor and the slice that the restored tensor covers.
+//
+// The `shape_and_slice` input has the same format as the
+// elements of the `shapes_and_slices` input of the `SaveSlices` op.
 //
 // Arguments:
-//	condition: The condition to evaluate.
-//	data: The tensors to print out when condition is false.
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	shape_and_slice: Scalar. The shapes and slice specifications to use when
+// restoring a tensors.
+//	dt: The type of the tensor to be restored.
 //
-// Returns the created operation.
-func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+// Returns The restored tensor.
+func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dt": dt}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Assert",
+		Type: "RestoreSlice",
 		Input: []tf.Input{
-			condition, tf.OutputList(data),
+			file_pattern, tensor_name, shape_and_slice,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop.
-type CudnnRNNBackpropAttr func(optionalAttr)
-
-// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// CudnnRNNBackpropDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
+// Divides sparse updates into the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] /= updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] /= updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions multiply.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// CudnnRNNBackpropDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterDiv",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
 	}
+	return scope.AddOperation(opspec)
 }
 
-// CudnnRNNBackpropSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
+// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
+type StatelessRandomNormalAttr func(optionalAttr)
 
-// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr {
+// StatelessRandomNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["dtype"] = value
 	}
 }
 
-// Backprop step of CudnnRNN.
+// Outputs deterministic pseudorandom values from a normal distribution.
 //
-// Compute the backprop of both data and weights in a RNN.
+// The generated values will have mean 0 and standard deviation 1.
 //
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     the actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// output_backprop: A 3-D tensor with the same shape as output in the forward pass.
-// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward
-//     pass.
-// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward
-//     pass.
-// reserve_space: The same reserve_space produced in for forward operation.
-// input_backprop: The backprop to input in the forward pass. Has the same shape
-//     as input.
-// input_h_backprop: The backprop to input_h in the forward pass. Has the same
-//     shape as input_h.
-// input_c_backprop: The backprop to input_c in the forward pass. Has the same
-//     shape as input_c.
-// params_backprop: The backprop to the params buffer in the forward pass. Has the
-//     same shape as params.
-func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) {
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -13221,360 +13325,89 @@ func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CudnnRNNBackprop",
+		Type: "StatelessRandomNormal",
 		Input: []tf.Input{
-			input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space,
+			shape, seed,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+	return op.Output(0)
 }
 
-// Split a `SparseTensor` into `num_split` tensors along one dimension.
-//
-// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
-// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
-// For example, if `split_dim = 1` and `num_split = 2` and the input is
-//
-//     input_tensor = shape = [2, 7]
-//     [    a   d e  ]
-//     [b c          ]
-//
-// Graphically the output tensors are:
+// Adds up a SparseTensor and a dense Tensor, using these special rules:
 //
-//     output_tensor[0] = shape = [2, 4]
-//     [    a  ]
-//     [b c    ]
+// (1) Broadcasts the dense side to have the same shape as the sparse side, if
+//     eligible;
+// (2) Then, only the dense values pointed to by the indices of the SparseTensor
+//     participate in the cwise addition.
 //
-//     output_tensor[1] = shape = [2, 3]
-//     [ d e  ]
-//     [      ]
+// By these rules, the result is a logical SparseTensor with exactly the same
+// indices and shape, but possibly with different non-zero values.  The output of
+// this Op is the resultant non-zero values.
 //
 // Arguments:
-//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
-// `[0, rank(shape))`.
-//	indices: 2-D tensor represents the indices of the sparse tensor.
-//	values: 1-D tensor represents the values of the sparse tensor.
-//	shape: 1-D. tensor represents the shape of the sparse tensor.
-// output indices: A list of 1-D tensors represents the indices of the output
-// sparse tensors.
-//	num_split: The number of ways to split.
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
 //
-// Returns A list of 1-D tensors represents the values of the output sparse
-// tensors.A list of 1-D tensors represents the shape of the output sparse
-// tensors.
-func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "SparseSplit",
+		Type: "SparseDenseCwiseAdd",
 		Input: []tf.Input{
-			split_dim, indices, values, shape,
+			sp_indices, sp_values, sp_shape, dense,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the complementary error function of `x` element-wise.
+func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	var idx int
-	var err error
-	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
+	opspec := tf.OpSpec{
+		Type: "Erfc",
+		Input: []tf.Input{
+			x,
+		},
 	}
-	return output_indices, output_values, output_shape
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Returns the element-wise sum of a list of tensors.
-//
-// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
-// wait for all of its inputs to be ready before beginning to sum. This can
-// save memory if inputs are ready at different times, since minimum temporary
-// storage is proportional to the output size rather than the inputs size.
-//
-// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
-//
-// Returns a `Tensor` of same shape and type as the elements of `inputs`.
+// Returns the number of tensors in the input tensor list.
 //
-// Arguments:
-//	inputs: A list of `Tensor` objects, each with same shape and type.
-//	shape: Shape of elements of `inputs`.
-func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
+// input_handle: the input list
+// length: the number of tensors in the list
+func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "AccumulateNV2",
+		Type: "TensorListLength",
 		Input: []tf.Input{
-			tf.OutputList(inputs),
+			input_handle,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Outputs deterministic pseudorandom random integers from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[minval, maxval)`.
+// Determine the script codes of a given tensor of Unicode integer code points.
 //
-// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//	minval: Minimum value (inclusive, scalar).
-//	maxval: Maximum value (exclusive, scalar).
-//
-// Returns Random values with specified shape.
-func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniformInt",
-		Input: []tf.Input{
-			shape, seed, minval, maxval,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
-type StatelessTruncatedNormalAttr func(optionalAttr)
-
-// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessTruncatedNormal",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RestoreSliceAttr is an optional argument to RestoreSlice.
-type RestoreSliceAttr func(optionalAttr)
-
-// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`. See the documentation for `Restore`.
-// If not specified, defaults to -1
-func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
-	return func(m optionalAttr) {
-		m["preferred_shard"] = value
-	}
-}
-
-// Restores a tensor from checkpoint files.
-//
-// This is like `Restore` except that restored tensor can be listed as filling
-// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
-// larger tensor and the slice that the restored tensor covers.
-//
-// The `shape_and_slice` input has the same format as the
-// elements of the `shapes_and_slices` input of the `SaveSlices` op.
-//
-// Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	shape_and_slice: Scalar. The shapes and slice specifications to use when
-// restoring a tensors.
-//	dt: The type of the tensor to be restored.
-//
-// Returns The restored tensor.
-func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RestoreSlice",
-		Input: []tf.Input{
-			file_pattern, tensor_name, shape_and_slice,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Divides sparse updates into the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] /= updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] /= updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions multiply.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterDiv",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
-type StatelessRandomNormalAttr func(optionalAttr)
-
-// StatelessRandomNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom values from a normal distribution.
-//
-// The generated values will have mean 0 and standard deviation 1.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomNormal",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the complementary error function of `x` element-wise.
-func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Erfc",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the number of tensors in the input tensor list.
-//
-// input_handle: the input list
-// length: the number of tensors in the list
-func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListLength",
-		Input: []tf.Input{
-			input_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Determine the script codes of a given tensor of Unicode integer code points.
-//
-// This operation converts Unicode code points to script codes corresponding to
-// each code point. Script codes correspond to International Components for
-// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
-// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
-// match input shape.
+// This operation converts Unicode code points to script codes corresponding to
+// each code point. Script codes correspond to International Components for
+// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
+// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
+// match input shape.
 //
 // Arguments:
 //	input: A Tensor of int32 Unicode code points.
@@ -14122,13 +13955,312 @@ func StringSplitSkipEmpty(value bool) StringSplitAttr {
 //   values = ['hello', 'world', 'a', 'b', 'c']
 //
 // Arguments:
-//	input: 1-D. Strings to split.
-//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
+//	input: 1-D. Strings to split.
+//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
+//
+// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse
+// tensor, where the first value is N and the second value is the maximum number
+// of tokens in a single input entry.
+func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringSplit",
+		Input: []tf.Input{
+			input, delimiter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
+type ResourceSparseApplyMomentumAttr func(optionalAttr)
+
+// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+//
+// accum = accum * momentum + grad
+// var -= lr * accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, indices, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns the complex conjugate of a complex number.
+//
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// complex numbers that are the complex conjugate of each element in `input`. The
+// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
+// real part and *b* is the imaginary part.
+//
+// The complex conjugate returned by this operation is of the form \\(a - bj\\).
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+// ```
+func Conj(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Conj",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop.
+type CudnnRNNBackpropAttr func(optionalAttr)
+
+// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNBackpropDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNBackpropDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNBackpropSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Backprop step of CudnnRNN.
+//
+// Compute the backprop of both data and weights in a RNN.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     the actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// output_backprop: A 3-D tensor with the same shape as output in the forward pass.
+// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward
+//     pass.
+// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward
+//     pass.
+// reserve_space: The same reserve_space produced in for forward operation.
+// input_backprop: The backprop to input in the forward pass. Has the same shape
+//     as input.
+// input_h_backprop: The backprop to input_h in the forward pass. Has the same
+//     shape as input_h.
+// input_c_backprop: The backprop to input_c in the forward pass. Has the same
+//     shape as input_c.
+// params_backprop: The backprop to the params buffer in the forward pass. Has the
+//     same shape as params.
+func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNBackprop",
+		Input: []tf.Input{
+			input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Encode audio data using the WAV file format.
+//
+// This operation will generate a string suitable to be saved out to create a .wav
+// audio file. It will be encoded in the 16-bit PCM format. It takes in float
+// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
+// that range.
+//
+// `audio` is a 2-D float Tensor of shape `[length, channels]`.
+// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+//
+// Arguments:
+//	audio: 2-D with shape `[length, channels]`.
+//	sample_rate: Scalar containing the sample frequency.
+//
+// Returns 0-D. WAV-encoded file contents.
+func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeWav",
+		Input: []tf.Input{
+			audio, sample_rate,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes atan of x element-wise.
+func Atan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
+type ResourceApplyAdaMaxAttr func(optionalAttr)
+
+// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AdaMax algorithm.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// v_t <- max(beta2 * v_{t-1}, abs(g))
+// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
 //
-// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse
-// tensor, where the first value is N and the second value is the maximum number
-// of tokens in a single input entry.
-func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
+// Returns the created operation.
+func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14137,62 +14269,39 @@ func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ..
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringSplit",
+		Type: "ResourceApplyAdaMax",
 		Input: []tf.Input{
-			input, delimiter,
+			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return scope.AddOperation(opspec)
 }
 
-// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
-type ResourceSparseApplyMomentumAttr func(optionalAttr)
-
-// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
+// AssertAttr is an optional argument to Assert.
+type AssertAttr func(optionalAttr)
 
-// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+// AssertSummarize sets the optional summarize attribute to value.
 //
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
+// value: Print this many entries of each tensor.
+// If not specified, defaults to 3
+func AssertSummarize(value int64) AssertAttr {
 	return func(m optionalAttr) {
-		m["use_nesterov"] = value
+		m["summarize"] = value
 	}
 }
 
-// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-//
-// Set use_nesterov = True if you want to use Nesterov momentum.
-//
-// That is for rows we have grad for, we update var and accum as follows:
+// Asserts that the given condition is true.
 //
-// accum = accum * momentum + grad
-// var -= lr * accum
+// If `condition` evaluates to false, print the list of tensors in `data`.
+// `summarize` determines how many entries of the tensors to print.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	momentum: Momentum. Must be a scalar.
+//	condition: The condition to evaluate.
+//	data: The tensors to print out when condition is false.
 //
 // Returns the created operation.
-func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
+func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -14201,42 +14310,79 @@ func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyMomentum",
+		Type: "Assert",
 		Input: []tf.Input{
-			var_, accum, lr, grad, indices, momentum,
+			condition, tf.OutputList(data),
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Returns the complex conjugate of a complex number.
+// Split a `SparseTensor` into `num_split` tensors along one dimension.
 //
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// complex numbers that are the complex conjugate of each element in `input`. The
-// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
-// real part and *b* is the imaginary part.
+// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
+// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
+// For example, if `split_dim = 1` and `num_split = 2` and the input is
 //
-// The complex conjugate returned by this operation is of the form \\(a - bj\\).
+//     input_tensor = shape = [2, 7]
+//     [    a   d e  ]
+//     [b c          ]
 //
-// For example:
+// Graphically the output tensors are:
 //
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
-// ```
-func Conj(scope *Scope, input tf.Output) (output tf.Output) {
+//     output_tensor[0] = shape = [2, 4]
+//     [    a  ]
+//     [b c    ]
+//
+//     output_tensor[1] = shape = [2, 3]
+//     [ d e  ]
+//     [      ]
+//
+// Arguments:
+//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
+// `[0, rank(shape))`.
+//	indices: 2-D tensor represents the indices of the sparse tensor.
+//	values: 1-D tensor represents the values of the sparse tensor.
+//	shape: 1-D. tensor represents the shape of the sparse tensor.
+// output indices: A list of 1-D tensors represents the indices of the output
+// sparse tensors.
+//	num_split: The number of ways to split.
+//
+// Returns A list of 1-D tensors represents the values of the output sparse
+// tensors.A list of 1-D tensors represents the shape of the output sparse
+// tensors.
+func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_split": num_split}
 	opspec := tf.OpSpec{
-		Type: "Conj",
+		Type: "SparseSplit",
 		Input: []tf.Input{
-			input,
+			split_dim, indices, values, shape,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	return output_indices, output_values, output_shape
 }
 
 // Computes numerical negative value element-wise.
@@ -18132,6 +18278,93 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt
 	return op.Output(0)
 }
 
+// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
+type DenseToSparseSetOperationAttr func(optionalAttr)
+
+// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
+//
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set2`
+// indices.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
+//
+// Arguments:
+//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
+// max set size across `n-1` dimensions.
+//
+//
+// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"set_operation": set_operation}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DenseToSparseSetOperation",
+		Input: []tf.Input{
+			set1, set2_indices, set2_values, set2_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// L2 Loss.
+//
+// Computes half the L2 norm of a tensor without the `sqrt`:
+//
+//     output = sum(t ** 2) / 2
+//
+// Arguments:
+//	t: Typically 2-D, but may have any dimensions.
+//
+// Returns 0-D.
+func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "L2Loss",
+		Input: []tf.Input{
+			t,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Computes cos of x element-wise.
 func Cos(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
@@ -20427,29 +20660,11 @@ func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "HistogramFixedWidth",
-		Input: []tf.Input{
-			values, value_range, nbins,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of (x >= y) element-wise.
-//
-// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "GreaterEqual",
+		Type: "HistogramFixedWidth",
 		Input: []tf.Input{
-			x, y,
+			values, value_range, nbins,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -20520,39 +20735,6 @@ func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa
 	return op.Output(0)
 }
 
-// Adds up a SparseTensor and a dense Tensor, using these special rules:
-//
-// (1) Broadcasts the dense side to have the same shape as the sparse side, if
-//     eligible;
-// (2) Then, only the dense values pointed to by the indices of the SparseTensor
-//     participate in the cwise addition.
-//
-// By these rules, the result is a logical SparseTensor with exactly the same
-// indices and shape, but possibly with different non-zero values.  The output of
-// this Op is the resultant non-zero values.
-//
-// Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
-//
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseAdd",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // QuantizeV2Attr is an optional argument to QuantizeV2.
 type QuantizeV2Attr func(optionalAttr)
 
@@ -22134,93 +22316,6 @@ func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) {
 	return scope.AddOperation(opspec)
 }
 
-// L2 Loss.
-//
-// Computes half the L2 norm of a tensor without the `sqrt`:
-//
-//     output = sum(t ** 2) / 2
-//
-// Arguments:
-//	t: Typically 2-D, but may have any dimensions.
-//
-// Returns 0-D.
-func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "L2Loss",
-		Input: []tf.Input{
-			t,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
-type DenseToSparseSetOperationAttr func(optionalAttr)
-
-// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
-//
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-//
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set2`
-// indices.
-//
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
-//
-// Arguments:
-//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
-// max set size across `n-1` dimensions.
-//
-//
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DenseToSparseSetOperation",
-		Input: []tf.Input{
-			set1, set2_indices, set2_values, set2_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // Subtracts a value from the current value of a variable.
 //
 // Any ReadVariableOp with a control dependency on this op is guaranteed to
@@ -22749,47 +22844,6 @@ func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataTy
 	return output
 }
 
-// Adds sparse updates to the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] += updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] += updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions add.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterAdd",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Says whether the targets are in the top `K` predictions.
 //
 // This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
@@ -22954,6 +23008,24 @@ func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// Returns the truth value of (x >= y) element-wise.
+//
+// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GreaterEqual",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // ApproximateEqualAttr is an optional argument to ApproximateEqual.
 type ApproximateEqualAttr func(optionalAttr)
 
@@ -33598,75 +33670,3 @@ func Stage(scope *Scope, values []tf.Output, optional ...StageAttr) (o *tf.Opera
 	}
 	return scope.AddOperation(opspec)
 }
-
-// StagePeekAttr is an optional argument to StagePeek.
-type StagePeekAttr func(optionalAttr)
-
-// StagePeekCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StagePeekCapacity(value int64) StagePeekAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// StagePeekMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StagePeekMemoryLimit(value int64) StagePeekAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// StagePeekContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StagePeekContainer(value string) StagePeekAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// StagePeekSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StagePeekSharedName(value string) StagePeekAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op peeks at the values at the specified index.  If the
-//
-// underlying container does not contain sufficient elements
-// this op will block until it does.   This Op is optimized for
-// performance.
-func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...StagePeekAttr) (values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StagePeek",
-		Input: []tf.Input{
-			index,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("StagePeek", err)
-		return
-	}
-	return values
-}
-- 
GitLab


From 306cf4b2834cca1f1fc1fc58ba16c3248516dfd9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 19:02:56 -0800
Subject: [PATCH 478/873] Add a config setting to exclude xla from
 tensorflow_opensource target.

PiperOrigin-RevId: 225296419
---
 tensorflow/BUILD | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 823ad8f506..449a1372ed 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -267,6 +267,15 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+# By default, XLA GPU is compiled into tensorflow when building with
+# --config=cuda even when `with_xla_support` is false. The config setting
+# here allows us to override the behavior if needed.
+config_setting(
+    name = "no_xla_deps_in_cuda",
+    define_values = {"no_xla_deps_in_cuda": "true"},
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "with_gdr_support",
     define_values = {"with_gdr_support": "true"},
-- 
GitLab


From 7ac4a1e47554ad375d94f2cd5db57e0a93139457 Mon Sep 17 00:00:00 2001
From: Youlong Cheng <ylc@google.com>
Date: Wed, 12 Dec 2018 19:26:17 -0800
Subject: [PATCH 479/873] Stop TPU computation if compilation fails.

PiperOrigin-RevId: 225298488
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index f179289584..11ca5e1024 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -31,6 +31,7 @@ import six
 from six.moves import queue as Queue  # pylint: disable=redefined-builtin
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.contrib.tpu.proto import compilation_result_pb2 as tpu_compilation_result
 from tensorflow.contrib.tpu.python.tpu import tensor_tracer
 from tensorflow.contrib.tpu.python.ops import tpu_ops
 from tensorflow.contrib.tpu.python.tpu import error_handling
@@ -489,6 +490,15 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
   def _create_infeed_controller(self, name, target, args):
     return _OpQueueContext(name=name, target=target, args=args)
 
+  def _assertCompilationSucceeded(self, result, coord):
+    proto = tpu_compilation_result.CompilationResultProto()
+    proto.ParseFromString(result)
+    if proto.status_error_message:
+      logging.error('Compilation failed: {}'.format(proto.status_error_message))
+      coord.request_stop()
+    else:
+      logging.info('Compilation succeeded')
+
   def after_create_session(self, session, coord):
     if self._should_initialize_tpu:
       logging.info('Init TPU system')
@@ -504,7 +514,7 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook):
 
     if os.environ.get('TPU_SPLIT_COMPILE_AND_EXECUTE', '') == '1':
       logging.info('Compiling user program: this may take a while...')
-      logging.info('Compile finished: %s', session.run(self._tpu_compile_op))
+      self._assertCompilationSucceeded(session.run(self._tpu_compile_op), coord)
 
     self._infeed_controller = self._create_infeed_controller(
         name='InfeedController', target=self._run_infeed, args=(session,))
-- 
GitLab


From 7b9f418c374b3e919fe88e22ddcaf5528222eef7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 19:34:15 -0800
Subject: [PATCH 480/873] use optimized_ops::TanH directly

PiperOrigin-RevId: 225299240
---
 tensorflow/lite/kernels/activations.cc | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc
index a766542560..ab09cf7196 100644
--- a/tensorflow/lite/kernels/activations.cc
+++ b/tensorflow/lite/kernels/activations.cc
@@ -373,11 +373,8 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* output = GetOutput(context, node, 0);
   switch (input->type) {
     case kTfLiteFloat32: {
-      size_t elements = input->bytes / sizeof(float);
-      float* in = input->data.f;
-      float* in_end = in + elements;
-      float* out = output->data.f;
-      for (; in < in_end; in++, out++) *out = std::tanh(*in);
+      optimized_ops::Tanh(GetTensorShape(input), GetTensorData<float>(input),
+                          GetTensorShape(output), GetTensorData<float>(output));
       return kTfLiteOk;
     } break;
     case kTfLiteInt16: {
-- 
GitLab


From 7bae8aadd4519ce9e104fd68127dbb94c11b21ea Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Wed, 12 Dec 2018 19:48:59 -0800
Subject: [PATCH 481/873] - Change std::set<int> to
 absl::optional<std::set<int>> for allowed devices - Addres review comments.

---
 tensorflow/compiler/jit/xla_device.h          | 14 +++++++++----
 .../compiler/xla/client/client_library.cc     | 21 ++++++++++---------
 .../compiler/xla/client/client_library.h      | 13 +++++++-----
 tensorflow/compiler/xla/service/backend.cc    | 13 ++++++------
 tensorflow/compiler/xla/service/backend.h     | 13 ++++--------
 .../compiler/xla/service/local_service.cc     |  2 +-
 .../compiler/xla/service/platform_util.cc     |  9 ++++----
 .../compiler/xla/service/platform_util.h      |  3 ++-
 tensorflow/compiler/xla/service/service.cc    |  9 ++++----
 tensorflow/compiler/xla/service/service.h     |  7 ++++---
 10 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h
index f1bc882f2c..94706a7056 100644
--- a/tensorflow/compiler/jit/xla_device.h
+++ b/tensorflow/compiler/jit/xla_device.h
@@ -26,6 +26,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_JIT_XLA_DEVICE_H_
 #include <set>
 
+#include "absl/types/optional.h"
 #include "tensorflow/compiler/jit/xla_device_context.h"
 #include "tensorflow/compiler/jit/xla_tensor.h"
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
@@ -124,8 +125,11 @@ class XlaDevice : public LocalDevice {
     // If padded_shape_fn is empty, a default implementation that returns
     // the logical on-device shape without padding is used.
     PaddedShapeFn padded_shape_fn;
-    // Set of allowed devices. -1 is all devices
-    std::set<int> allowed_devices = {-1};
+
+    // Set of devices to use. This controls which of the devices given type in
+    // the system will have resources allocated for. For GPUs this will be
+    // filled from visible_gpu_devices list from session configuration.
+    absl::optional<std::set<int>> allowed_devices;
   };
 
   // Creates a new XLA Device.
@@ -260,8 +264,10 @@ class XlaDevice : public LocalDevice {
   int64 outstanding_asynchronous_operations_ GUARDED_BY(mu_) = 0;
   condition_variable outstanding_asynchronous_operations_cv_;
 
-  // Set of allowed gpu devices at the time of construction.
-  std::set<int> allowed_devices_ = {-1};
+  // Set of devices to use. This controls which of the devices of current type
+  // in the system will have resources allocated for. For GPUs this will be
+  // filled from visible_gpu_devices list from session configuration.
+  absl::optional<std::set<int>> allowed_devices_;
 };
 
 // Builds OpKernel registrations on 'device' for the JIT operators
diff --git a/tensorflow/compiler/xla/client/client_library.cc b/tensorflow/compiler/xla/client/client_library.cc
index 33d8fa9841..42aae02622 100644
--- a/tensorflow/compiler/xla/client/client_library.cc
+++ b/tensorflow/compiler/xla/client/client_library.cc
@@ -24,14 +24,14 @@ limitations under the License.
 
 namespace xla {
 
-LocalClientOptions::LocalClientOptions(se::Platform* platform,
-                                       int number_of_replicas,
-                                       int intra_op_parallelism_threads,
-                                       std::set<int> device_set)
+LocalClientOptions::LocalClientOptions(
+    se::Platform* platform, int number_of_replicas,
+    int intra_op_parallelism_threads,
+    const absl::optional<std::set<int>>& allowed_devices)
     : platform_(platform),
       number_of_replicas_(number_of_replicas),
       intra_op_parallelism_threads_(intra_op_parallelism_threads),
-      allowed_devices_(device_set) {}
+      allowed_devices_(allowed_devices) {}
 
 LocalClientOptions& LocalClientOptions::set_platform(se::Platform* platform) {
   platform_ = platform;
@@ -61,12 +61,13 @@ int LocalClientOptions::intra_op_parallelism_threads() const {
 }
 
 LocalClientOptions& LocalClientOptions::set_allowed_devices(
-    std::set<int> device_set) {
-  allowed_devices_ = device_set;
+    const absl::optional<std::set<int>>& allowed_devices) {
+  allowed_devices_ = allowed_devices;
   return *this;
 }
 
-std::set<int> LocalClientOptions::get_allowed_devices() const {
+const absl::optional<std::set<int>>& LocalClientOptions::allowed_devices()
+    const {
   return allowed_devices_;
 }
 
@@ -79,7 +80,7 @@ ClientLibrary::ClientLibrary() = default;
 ClientLibrary::~ClientLibrary() = default;
 
 /* static */ StatusOr<LocalClient*> ClientLibrary::GetOrCreateLocalClient(
-    se::Platform* platform, const std::set<int> device_set) {
+    se::Platform* platform, const absl::optional<std::set<int>>& device_set) {
   LocalClientOptions default_options;
   default_options.set_platform(platform);
   default_options.set_allowed_devices(device_set);
@@ -107,7 +108,7 @@ ClientLibrary::~ClientLibrary() = default;
   service_options.set_number_of_replicas(replica_count);
   service_options.set_intra_op_parallelism_threads(
       options.intra_op_parallelism_threads());
-  service_options.set_allowed_devices(options.get_allowed_devices());
+  service_options.set_allowed_devices(options.allowed_devices());
   auto instance = absl::make_unique<LocalInstance>();
   TF_ASSIGN_OR_RETURN(instance->service,
                       LocalService::NewService(service_options));
diff --git a/tensorflow/compiler/xla/client/client_library.h b/tensorflow/compiler/xla/client/client_library.h
index 1e6e4c6bf3..4d615f03f2 100644
--- a/tensorflow/compiler/xla/client/client_library.h
+++ b/tensorflow/compiler/xla/client/client_library.h
@@ -27,6 +27,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/client/compile_only_client.h"
 #include "tensorflow/compiler/xla/client/local_client.h"
 #include "tensorflow/compiler/xla/service/compile_only_service.h"
@@ -47,7 +48,7 @@ class LocalClientOptions {
   LocalClientOptions(se::Platform* platform = nullptr,
                      int number_of_replicas = 1,
                      int intra_op_parallelism_threads = -1,
-                     std::set<int> device_set = {-1});
+                     const absl::optional<std::set<int>>& allowed_devices = {});
 
   // Set the platform backing the service, or nullptr for the default platform.
   LocalClientOptions& set_platform(se::Platform* platform);
@@ -63,15 +64,16 @@ class LocalClientOptions {
   int intra_op_parallelism_threads() const;
 
   // Sets the allowed_devices set for creation of stream executors.
-  LocalClientOptions& set_allowed_devices(const std::set<int> device_set);
+  LocalClientOptions& set_allowed_devices(
+      const absl::optional<std::set<int>>& allowed_devices);
 
-  std::set<int> get_allowed_devices() const;
+  const absl::optional<std::set<int>>& allowed_devices() const;
 
  private:
   se::Platform* platform_;
   int number_of_replicas_;
   int intra_op_parallelism_threads_;
-  std::set<int> allowed_devices_;
+  absl::optional<std::set<int>> allowed_devices_;
 };
 
 class ClientLibrary {
@@ -84,7 +86,8 @@ class ClientLibrary {
   //   device_set: Set of device IDs for which the stream executor will be
   //   created for, for the given platform.
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
-      se::Platform* platform = nullptr, const std::set<int> device_set = {-1});
+      se::Platform* platform = nullptr,
+      const absl::optional<std::set<int>>& allowed_devices = {});
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
       const LocalClientOptions& options);
 
diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc
index 99e963b929..cf4a24d7b4 100644
--- a/tensorflow/compiler/xla/service/backend.cc
+++ b/tensorflow/compiler/xla/service/backend.cc
@@ -57,12 +57,13 @@ int BackendOptions::intra_op_parallelism_threads() const {
   return intra_op_parallelism_threads_;
 }
 
-BackendOptions& BackendOptions::set_allowed_devices(std::set<int> device_set) {
-  allowed_devices_ = device_set;
+BackendOptions& BackendOptions::set_allowed_devices(
+    const absl::optional<std::set<int>>& allowed_devices) {
+  allowed_devices_ = allowed_devices;
   return *this;
 }
 
-std::set<int> BackendOptions::get_allowed_devices() const {
+const absl::optional<std::set<int>>& BackendOptions::allowed_devices() const {
   return allowed_devices_;
 }
 
@@ -85,9 +86,9 @@ struct Backend::EigenThreadPoolWrapper {
     const BackendOptions& options) {
   se::Platform* platform = options.platform();
   TF_ASSIGN_OR_RETURN(auto compiler, Compiler::GetForPlatform(platform));
-  TF_ASSIGN_OR_RETURN(auto stream_executors,
-                      PlatformUtil::GetStreamExecutors(
-                          platform, options.get_allowed_devices()));
+  TF_ASSIGN_OR_RETURN(
+      auto stream_executors,
+      PlatformUtil::GetStreamExecutors(platform, options.allowed_devices()));
   TF_ASSIGN_OR_RETURN(auto transfer_manager,
                       TransferManager::GetForPlatform(platform));
   TF_ASSIGN_OR_RETURN(auto computation_placer,
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index 3fc0f8de62..3527761503 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h
@@ -55,14 +55,15 @@ class BackendOptions {
   int intra_op_parallelism_threads() const;
 
   // Sets the allowed_devices set for creation of stream executors.
-  BackendOptions& set_allowed_devices(const std::set<int> device_set);
+  BackendOptions& set_allowed_devices(
+      const absl::optional<std::set<int>>& allowed_devices);
 
-  std::set<int> get_allowed_devices() const;
+  const absl::optional<std::set<int>>& allowed_devices() const;
 
  private:
   se::Platform* platform_ = nullptr;
   int intra_op_parallelism_threads_ = -1;
-  std::set<int> allowed_devices_ = {-1};
+  absl::optional<std::set<int>> allowed_devices_;
 };
 
 // Class which encapsulates an XLA backend. It includes everything necessary
@@ -113,12 +114,6 @@ class Backend {
   // can be > 1).
   se::StreamExecutor* default_stream_executor() const {
     CHECK(!stream_executors_.empty());
-
-    for (se::StreamExecutor* e : stream_executors_) {
-      if (e) {
-        return e;
-      }
-    }
     return stream_executors_[0];
   }
 
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index 4b0b8fc347..600b069ecd 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -54,7 +54,7 @@ namespace xla {
   BackendOptions backend_options;
   backend_options.set_platform(platform)
       .set_intra_op_parallelism_threads(options.intra_op_parallelism_threads())
-      .set_allowed_devices(options.get_allowed_devices());
+      .set_allowed_devices(options.allowed_devices());
 
   TF_ASSIGN_OR_RETURN(std::unique_ptr<Backend> backend,
                       Backend::CreateBackend(backend_options));
diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc
index b8fb2047e6..f01724126c 100644
--- a/tensorflow/compiler/xla/service/platform_util.cc
+++ b/tensorflow/compiler/xla/service/platform_util.cc
@@ -205,8 +205,9 @@ static bool IsDeviceSupported(se::StreamExecutor* executor) {
 }
 
 /* static */ StatusOr<std::vector<se::StreamExecutor*>>
-PlatformUtil::GetStreamExecutors(se::Platform* platform,
-                                 std::set<int> allowed_devices) {
+PlatformUtil::GetStreamExecutors(
+    se::Platform* platform,
+    const absl::optional<std::set<int>>& allowed_devices) {
   int device_count = platform->VisibleDeviceCount();
   if (device_count <= 0) {
     return NotFound("no %s devices found", platform->Name());
@@ -227,8 +228,8 @@ PlatformUtil::GetStreamExecutors(se::Platform* platform,
     tensorflow::thread::ThreadPool thread_pool(
         tensorflow::Env::Default(), "device_initialization", device_count);
     for (int i = 0; i < device_count; ++i) {
-      if (allowed_devices.count(-1) == 0 && allowed_devices.count(i) == 0) {
-        VLOG(1) << "Skipping stream executor for device " << i
+      if (allowed_devices && (*allowed_devices).count(i) == 0) {
+        VLOG(1) << "Not initializing StreamExecutor for device " << i
                 << " since it is not in the visible device list";
         continue;
       }
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index 89291e8b74..46123eb5d7 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -64,7 +64,8 @@ class PlatformUtil {
   //
   // If the platform has no visible devices, a not-found error is returned.
   static StatusOr<std::vector<se::StreamExecutor*>> GetStreamExecutors(
-      se::Platform* platform, std::set<int> allowed_devices = {-1});
+      se::Platform* platform,
+      const absl::optional<std::set<int>>& allowed_devices = {});
 
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(PlatformUtil);
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index 06a16e8c79..b8a6c92f06 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -113,12 +113,13 @@ int ServiceOptions::intra_op_parallelism_threads() const {
   return intra_op_parallelism_threads_;
 }
 
-ServiceOptions& ServiceOptions::set_allowed_devices(std::set<int> device_set) {
-  allowed_devices_ = device_set;
+ServiceOptions& ServiceOptions::set_allowed_devices(
+    const absl::optional<std::set<int>>& allowed_devices) {
+  allowed_devices_ = allowed_devices;
   return *this;
 }
 
-std::set<int> ServiceOptions::get_allowed_devices() const {
+const absl::optional<std::set<int>>& ServiceOptions::allowed_devices() const {
   return allowed_devices_;
 }
 
@@ -138,7 +139,7 @@ std::set<int> ServiceOptions::get_allowed_devices() const {
   }
   BackendOptions backend_options;
   backend_options.set_platform(platform);
-  backend_options.set_allowed_devices(options.get_allowed_devices());
+  backend_options.set_allowed_devices(options.allowed_devices());
   TF_ASSIGN_OR_RETURN(execute_backend, Backend::CreateBackend(backend_options));
 
   std::unique_ptr<Service> service(
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index b6c0039ccc..0682a880de 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -63,15 +63,16 @@ class ServiceOptions {
   int intra_op_parallelism_threads() const;
 
   // Sets the allowed_devices set for creation of stream executors.
-  ServiceOptions& set_allowed_devices(const std::set<int> device_set);
+  ServiceOptions& set_allowed_devices(
+      const absl::optional<std::set<int>>& allowed_devices);
 
-  std::set<int> get_allowed_devices() const;
+  const absl::optional<std::set<int>>& allowed_devices() const;
 
  private:
   se::Platform* platform_ = nullptr;
   int number_of_replicas_ = 1;
   int intra_op_parallelism_threads_ = -1;
-  std::set<int> allowed_devices_ = {-1};
+  absl::optional<std::set<int>> allowed_devices_;
 };
 
 // The XLA service object, which is the same across all platforms. It maintains
-- 
GitLab


From c2cb414e546556b887e28c9d0b1e4e6a2f918479 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 21:19:47 -0800
Subject: [PATCH 482/873] Add tpu_ordinal_selector_op.

PiperOrigin-RevId: 225307980
---
 tensorflow/contrib/tpu/BUILD                  |  8 ++++
 .../tpu/ops/tpu_ordinal_selector_op.cc        | 39 +++++++++++++++++++
 2 files changed, 47 insertions(+)
 create mode 100644 tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 4bf3a0463d..007aeaec15 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -102,6 +102,7 @@ tf_gen_op_libs(
         "replication_ops",
         "tpu_configuration_ops",
         "tpu_embedding_ops",
+        "tpu_ordinal_selector_op",
     ],
     deps = [
         "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
@@ -153,6 +154,13 @@ tf_gen_op_wrapper_py(
     ],
 )
 
+tf_gen_op_wrapper_py(
+    name = "tpu_ordinal_selector_op",
+    deps = [
+        ":tpu_ordinal_selector_op_op_lib",
+    ],
+)
+
 py_library(
     name = "profiler",
     srcs = ["python/profiler/__init__.py"],
diff --git a/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc b/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc
new file mode 100644
index 0000000000..54e6b20f7f
--- /dev/null
+++ b/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc
@@ -0,0 +1,39 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+REGISTER_OP("TPUOrdinalSelector")
+    .Output("device_ordinals: int32")
+    .SetIsStateful()
+    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
+      c->set_output(0,
+                    c->Vector(shape_inference::InferenceContext::kUnknownDim));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+A TPU core selector Op.
+
+This Op produces a set of TPU cores (for warm-up) or a single TPU core
+(for regular inference) to execute the TPU program on. The output is
+consumed by TPUPartitionedCall.
+
+device_ordinals: A vector 1 or more TPU cores.
+)doc");
+
+}  // namespace tensorflow
-- 
GitLab


From ad26fe7015ce03a08862a69b3556215b3d62911b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 12 Dec 2018 21:53:09 -0800
Subject: [PATCH 483/873] Automated rollback of commit
 306cf4b2834cca1f1fc1fc58ba16c3248516dfd9

PiperOrigin-RevId: 225310802
---
 tensorflow/BUILD | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 449a1372ed..823ad8f506 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -267,15 +267,6 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
-# By default, XLA GPU is compiled into tensorflow when building with
-# --config=cuda even when `with_xla_support` is false. The config setting
-# here allows us to override the behavior if needed.
-config_setting(
-    name = "no_xla_deps_in_cuda",
-    define_values = {"no_xla_deps_in_cuda": "true"},
-    visibility = ["//visibility:public"],
-)
-
 config_setting(
     name = "with_gdr_support",
     define_values = {"with_gdr_support": "true"},
-- 
GitLab


From e12f2e421d8fe8ff040f9a31f3992731d3db89e3 Mon Sep 17 00:00:00 2001
From: Siju <siju.samuel@huawei.com>
Date: Thu, 13 Dec 2018 11:51:49 +0530
Subject: [PATCH 484/873] Update README.md

---
 tensorflow/tools/graph_transforms/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md
index 9f6f553ba1..c5c0f2da89 100644
--- a/tensorflow/tools/graph_transforms/README.md
+++ b/tensorflow/tools/graph_transforms/README.md
@@ -1086,7 +1086,7 @@ in the future.
 
 The Graph Transform Tool associates names of transforms with the code to
 implement them using the `REGISTER_GRAPH_TRANSFORM()` macro. This takes a string
-and a function, and automagically registers the transform with the tool. You
+and a function, and automatically registers the transform with the tool. You
 will need to watch out for a few things though:
 
 *   Because it's using global C++ objects in each file under the hood, the
-- 
GitLab


From 6e65bb640a5430b3043b75759c29b0a66808e45c Mon Sep 17 00:00:00 2001
From: Siju <siju.samuel@huawei.com>
Date: Thu, 13 Dec 2018 12:09:44 +0530
Subject: [PATCH 485/873] Update README.md

chnage to change
---
 tensorflow/lite/java/ovic/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/java/ovic/README.md b/tensorflow/lite/java/ovic/README.md
index 368c486f4f..a50d3130c0 100644
--- a/tensorflow/lite/java/ovic/README.md
+++ b/tensorflow/lite/java/ovic/README.md
@@ -137,7 +137,7 @@ If you are adding a detection model, simply modify `modelPath` and `testImagePat
 
 * Adjust the benchmark parameters when needed:
 
-You can chnage the length of each experiment, and the processor affinity below. `BIG_CORE_MASK` is an integer whose binary encoding represents the set of used cores. This number is phone-specific. For example, Pixel 2 has 8 cores: the 4 little cores are represented by the 4 less significant bits, and the 4 big cores by the 4 more significant bits. Therefore a mask value of 16, or in binary `00010000`, represents using only the first big core. The mask 32, or in binary `00100000` uses the second big core and should deliver identical results as the mask 16 because the big cores are interchangeable.
+You can change the length of each experiment, and the processor affinity below. `BIG_CORE_MASK` is an integer whose binary encoding represents the set of used cores. This number is phone-specific. For example, Pixel 2 has 8 cores: the 4 little cores are represented by the 4 less significant bits, and the 4 big cores by the 4 more significant bits. Therefore a mask value of 16, or in binary `00010000`, represents using only the first big core. The mask 32, or in binary `00100000` uses the second big core and should deliver identical results as the mask 16 because the big cores are interchangeable.
 
 ```
   /** Wall time for each benchmarking experiment. */
-- 
GitLab


From b99d914cfc1122eab717999324032a60cc0204db Mon Sep 17 00:00:00 2001
From: Guangda Lai <laigd@google.com>
Date: Wed, 12 Dec 2018 23:27:38 -0800
Subject: [PATCH 486/873] Add TRT conversion support to saved_model_cli

PiperOrigin-RevId: 225318469
---
 tensorflow/python/tools/saved_model_cli.py | 87 ++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py
index c4c3756c04..afc4e517cd 100644
--- a/tensorflow/python/tools/saved_model_cli.py
+++ b/tensorflow/python/tools/saved_model_cli.py
@@ -659,6 +659,28 @@ def scan(args):
       scan_meta_graph_def(meta_graph_def)
 
 
+def convert_with_tensorrt(args):
+  """Function triggered by 'convert tensorrt' command.
+
+  Args:
+    args: A namespace parsed from command line.
+  """
+  # Import here instead of at top, because this will crash if TensorRT is
+  # not installed
+  from tensorflow.contrib import tensorrt  # pylint: disable=g-import-not-at-top
+  tensorrt.create_inference_graph(
+      None,
+      None,
+      max_batch_size=args.max_batch_size,
+      max_workspace_size_bytes=args.max_workspace_size_bytes,
+      precision_mode=args.precision_mode,
+      minimum_segment_size=args.minimum_segment_size,
+      is_dynamic_op=args.is_dynamic_op,
+      input_saved_model_dir=args.dir,
+      input_saved_model_tags=args.tag_set.split(','),
+      output_saved_model_dir=args.output_dir)
+
+
 def create_parser():
   """Creates a parser that parse the command line arguments.
 
@@ -812,6 +834,71 @@ def create_parser():
       help='tag-set of graph in SavedModel to scan, separated by \',\'')
   parser_scan.set_defaults(func=scan)
 
+  # convert command
+  convert_msg = ('Usage example:\n'
+                 'To convert the SavedModel to one that have TensorRT ops:\n'
+                 '$saved_model_cli convert \\\n'
+                 '   --dir /tmp/saved_model \\\n'
+                 '   --tag_set serve \\\n'
+                 '   --output_dir /tmp/saved_model_trt \\\n'
+                 '   tensorrt \n')
+  parser_convert = subparsers.add_parser(
+      'convert',
+      description=convert_msg,
+      formatter_class=argparse.RawTextHelpFormatter)
+  parser_convert.add_argument(
+      '--dir',
+      type=str,
+      required=True,
+      help='directory containing the SavedModel to convert')
+  parser_convert.add_argument(
+      '--output_dir',
+      type=str,
+      required=True,
+      help='output directory for the converted SavedModel')
+  parser_convert.add_argument(
+      '--tag_set',
+      type=str,
+      required=True,
+      help='tag-set of graph in SavedModel to convert, separated by \',\'')
+  convert_subparsers = parser_convert.add_subparsers(
+      title='conversion methods',
+      description='valid conversion methods',
+      help='the conversion to run with the SavedModel')
+  parser_convert_with_tensorrt = convert_subparsers.add_parser(
+      'tensorrt',
+      description='Convert the SavedModel with Tensorflow-TensorRT integration',
+      formatter_class=argparse.RawTextHelpFormatter)
+  parser_convert_with_tensorrt.add_argument(
+      '--max_batch_size',
+      type=int,
+      default=1,
+      help='max size for the input batch')
+  parser_convert_with_tensorrt.add_argument(
+      '--max_workspace_size_bytes',
+      type=int,
+      default=2 << 20,
+      help=('the maximum GPU temporary memory which the TRT engine can use at '
+            'execution time'))
+  parser_convert_with_tensorrt.add_argument(
+      '--precision_mode',
+      type=str,
+      default='FP32',
+      help='one of FP32, FP16 and INT8')
+  parser_convert_with_tensorrt.add_argument(
+      '--minimum_segment_size',
+      type=int,
+      default=3,
+      help=('the minimum number of nodes required for a subgraph to be replaced'
+            'in a TensorRT node'))
+  parser_convert_with_tensorrt.add_argument(
+      '--is_dynamic_op',
+      type=bool,
+      default=False,
+      help=('whether to generate dynamic TRT ops which will build the TRT '
+            'network and engine at run time'))
+  parser_convert_with_tensorrt.set_defaults(func=convert_with_tensorrt)
+
   return parser
 
 
-- 
GitLab


From 6beb8734fbd7b247f96b869ab547882c6c004130 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 13 Dec 2018 00:02:41 -0800
Subject: [PATCH 487/873] Update TF 2.0 package to require Estimator 2.0
 package.

PiperOrigin-RevId: 225321405
---
 tensorflow/tools/pip_package/setup.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 85c913f158..f4b27fed73 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -89,6 +89,8 @@ if 'tf_nightly' in project_name:
       REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.13.0a0, < 1.14.0a0'
     if 'tensorflow_estimator' in pkg:
       REQUIRED_PACKAGES[i] = 'tf-estimator-nightly'
+    if 'tensorflow_estimator' in pkg and '2.0' in project_name:
+      REQUIRED_PACKAGES[i] = 'tensorflow-estimator-2.0-preview'
 
 # weakref.finalize and enum were introduced in Python 3.4
 if sys.version_info < (3, 4):
-- 
GitLab


From 62bc640c2d48c4efa68b63eeddd60df83a0f3be7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 00:11:34 -0800
Subject: [PATCH 488/873] Automated rollback of commit
 221f4d23c6cffa2ad5fb492a300fafda2a640cd8

PiperOrigin-RevId: 225322589
---
 WORKSPACE                                     | 35 +++++++++----------
 tensorflow/opensource_only.files              |  1 +
 tensorflow/version_check.bzl                  |  2 ++
 .../preconfig/generate/archives.bzl           | 27 ++++++++++++++
 .../preconfig/generate/generate.bzl           |  4 +--
 .../toolchains/preconfig/generate/generate.sh |  2 +-
 6 files changed, 48 insertions(+), 23 deletions(-)
 create mode 100644 third_party/toolchains/preconfig/generate/archives.bzl

diff --git a/WORKSPACE b/WORKSPACE
index 7cc08e0164..99d368ff91 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -16,30 +16,27 @@ load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")
 
 closure_repositories()
 
-http_archive(
-    name = "base_images_docker",
-    sha256 = "e2b1b7254270bb7605e814a9dbf6d1e4ae04a11136ff1714fbfdabe3f87f7cf9",
-    strip_prefix = "base-images-docker-12801524f867e657fbb5d1a74f31618aff181ac6",
-    urls = ["https://github.com/GoogleCloudPlatform/base-images-docker/archive/12801524f867e657fbb5d1a74f31618aff181ac6.tar.gz"],
-)
+load("//third_party/toolchains/preconfig/generate:archives.bzl",
+     "bazel_toolchains_archive")
 
-http_archive(
-    name = "bazel_toolchains",
-    sha256 = "15b5858b1b5541ec44df31b94c3b8672815b31d71215a98398761ea9f4c4eedb",
-    strip_prefix = "bazel-toolchains-6200b238c9c2d137c0d9a7262c80cc71d98e692b",
-    urls = [
-        "https://github.com/bazelbuild/bazel-toolchains/archive/6200b238c9c2d137c0d9a7262c80cc71d98e692b.tar.gz",
-    ],
+bazel_toolchains_archive()
+
+load(
+    "@bazel_toolchains//repositories:repositories.bzl",
+    bazel_toolchains_repositories = "repositories",
 )
 
-http_archive(
-    name = "io_bazel_rules_docker",
-    sha256 = "29d109605e0d6f9c892584f07275b8c9260803bf0c6fcb7de2623b2bedc910bd",
-    strip_prefix = "rules_docker-0.5.1",
-    urls = ["https://github.com/bazelbuild/rules_docker/archive/v0.5.1.tar.gz"],
+bazel_toolchains_repositories()
+
+load(
+    "@io_bazel_rules_docker//container:container.bzl",
+    container_repositories = "repositories",
 )
 
-load("//third_party/toolchains/preconfig/generate:workspace.bzl", "remote_config_workspace")
+container_repositories()
+
+load("//third_party/toolchains/preconfig/generate:workspace.bzl",
+     "remote_config_workspace")
 
 remote_config_workspace()
 
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 347dc9fc6b..418ef1a369 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -49,6 +49,7 @@ tensorflow/third_party/toolchains/preconfig/ubuntu14.04/nccl2/BUILD
 tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
 tensorflow/third_party/toolchains/preconfig/generate/containers.bzl
 tensorflow/third_party/toolchains/preconfig/generate/generate.bzl
+tensorflow/third_party/toolchains/preconfig/generate/archives.bzl
 tensorflow/third_party/toolchains/preconfig/generate/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/dummy_toolchain.bzl
diff --git a/tensorflow/version_check.bzl b/tensorflow/version_check.bzl
index 3b61827139..74feaa19ff 100644
--- a/tensorflow/version_check.bzl
+++ b/tensorflow/version_check.bzl
@@ -48,3 +48,5 @@ def check_bazel_version_at_least(minimum_bazel_version):
             native.bazel_version,
             minimum_bazel_version,
         ))
+
+parse_bazel_version = _parse_bazel_version
diff --git a/third_party/toolchains/preconfig/generate/archives.bzl b/third_party/toolchains/preconfig/generate/archives.bzl
new file mode 100644
index 0000000000..0850893589
--- /dev/null
+++ b/third_party/toolchains/preconfig/generate/archives.bzl
@@ -0,0 +1,27 @@
+load("//tensorflow:version_check.bzl", "parse_bazel_version")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+def bazel_toolchains_archive():
+    # Not all bazel versions have set native.bazel_version - if it is not set,
+    # fall back to the more compatible version of the toolchains archive.
+    if native.bazel_version and parse_bazel_version(native.bazel_version) >= parse_bazel_version("0.19"):
+        # This version of the toolchains repo is incompatible with older bazel
+        # versions - we can remove this once TensorFlow drops support for bazel
+        # before 0.19.
+        http_archive(
+            name = "bazel_toolchains",
+            sha256 = "41c48a189be489e2d15dec40e0057ea15b95ee5b39cc2a7e6cf663e31432c75e",
+            strip_prefix = "bazel-toolchains-3f8c58fe530fedc446de04673bc1e32985887dea",
+            urls = [
+                "https://github.com/nlopezgi/bazel-toolchains/archive/3f8c58fe530fedc446de04673bc1e32985887dea.tar.gz",
+            ],
+        )
+    else:
+        http_archive(
+            name = "bazel_toolchains",
+            sha256 = "15b5858b1b5541ec44df31b94c3b8672815b31d71215a98398761ea9f4c4eedb",
+            strip_prefix = "bazel-toolchains-6200b238c9c2d137c0d9a7262c80cc71d98e692b",
+            urls = [
+                "https://github.com/bazelbuild/bazel-toolchains/archive/6200b238c9c2d137c0d9a7262c80cc71d98e692b.tar.gz",
+            ],
+        )
diff --git a/third_party/toolchains/preconfig/generate/generate.bzl b/third_party/toolchains/preconfig/generate/generate.bzl
index 2fb3a94cdc..fb2af02a53 100644
--- a/third_party/toolchains/preconfig/generate/generate.bzl
+++ b/third_party/toolchains/preconfig/generate/generate.bzl
@@ -36,9 +36,7 @@ def _tensorflow_rbe_config(name, cuda_version, cudnn_version, python_version, co
             "TF_NCCL_VERSION": "2",
             "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
         },
-        # TODO(klimek): We should use the sources that we currently work on, not
-        # just the latest snapshot of tensorflow that is checked in.
-        git_repo = "https://github.com/tensorflow/tensorflow",
+        mount_project = "$(mount_project)",
         tags = ["manual"],
         incompatible_changes_off = True,
     )
diff --git a/third_party/toolchains/preconfig/generate/generate.sh b/third_party/toolchains/preconfig/generate/generate.sh
index 37c5211278..1f39fcdf6d 100755
--- a/third_party/toolchains/preconfig/generate/generate.sh
+++ b/third_party/toolchains/preconfig/generate/generate.sh
@@ -46,7 +46,7 @@ echo "CUDA: ${CUDA_VERSION}"
 echo "CUDNN: ${CUDNN_VERSION}"
 echo "NCCL: ${NCCL_VERSION}"
 
-bazel build "${PKG}/generate:${TARGET}"
+bazel build --define=mount_project="${PWD}" "${PKG}/generate:${TARGET}"
 cd "${TEMPDIR}"
 tar xvf "${ROOT}/bazel-bin/${PKG}/generate/${TARGET}_outputs.tar"
 
-- 
GitLab


From b5969ad5cffbfef242c94f60c14b8fdbd3bceebc Mon Sep 17 00:00:00 2001
From: Michael Reneer <michaelreneer@google.com>
Date: Thu, 13 Dec 2018 00:17:42 -0800
Subject: [PATCH 489/873] Remove duplicate build flag.

PiperOrigin-RevId: 225323136
---
 tools/bazel.rc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/bazel.rc b/tools/bazel.rc
index 1fdf51f53e..e78071fdde 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -76,7 +76,6 @@ build:nonccl --define=no_nccl_support=true
 
 build --define=use_fast_cpp_protos=true
 build --define=allow_oversize_protos=true
-build --define=grpc_no_ares=true
 
 build --spawn_strategy=standalone
 build --genrule_strategy=standalone
-- 
GitLab


From 5dfb096b53315e3b4768d570e0ff31f5340a587f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 01:02:28 -0800
Subject: [PATCH 490/873] compat: Update forward compatibility horizon to
 2018-12-13

PiperOrigin-RevId: 225327770
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 57a4c8be7d..3111ef6771 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 12)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 13)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From ca096a329c28ad45c03a23339e34669fce0914ff Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Thu, 13 Dec 2018 03:16:24 -0800
Subject: [PATCH 491/873] TF Go Wrapper: Add String() method to Device

This is useful when printing devices.

PiperOrigin-RevId: 225343692
---
 tensorflow/go/session.go      |  9 +++++++++
 tensorflow/go/session_test.go | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/tensorflow/go/session.go b/tensorflow/go/session.go
index db6ae4f26c..bd4fd4f32f 100644
--- a/tensorflow/go/session.go
+++ b/tensorflow/go/session.go
@@ -71,6 +71,15 @@ type Device struct {
 	MemoryLimitBytes int64
 }
 
+// String describes d and implements fmt.Stringer.
+func (d Device) String() string {
+	memStr := "no memory limit"
+	if d.MemoryLimitBytes >= 0 {
+		memStr = fmt.Sprintf("memory limit %d bytes", d.MemoryLimitBytes)
+	}
+	return fmt.Sprintf("(Device: name \"%s\", type %s, %s)", d.Name, d.Type, memStr)
+}
+
 // Return list of devices associated with a Session
 func (s *Session) ListDevices() ([]Device, error) {
 	var devices []Device
diff --git a/tensorflow/go/session_test.go b/tensorflow/go/session_test.go
index 05ace99a23..c9bda00167 100644
--- a/tensorflow/go/session_test.go
+++ b/tensorflow/go/session_test.go
@@ -299,3 +299,21 @@ func TestListDevices(t *testing.T) {
 		t.Fatalf("no devices detected")
 	}
 }
+
+func TestDeviceString(t *testing.T) {
+	d := Device{Name: "foo", Type: "bar", MemoryLimitBytes: 12345}
+	got := d.String()
+	want := "(Device: name \"foo\", type bar, memory limit 12345 bytes)"
+	if got != want {
+		t.Errorf("Got \"%s\", want \"%s\"", got, want)
+	}
+}
+
+func TestDeviceStringNoMemoryLimit(t *testing.T) {
+	d := Device{Name: "foo", Type: "bar", MemoryLimitBytes: -1}
+	got := d.String()
+	want := "(Device: name \"foo\", type bar, no memory limit)"
+	if got != want {
+		t.Errorf("Got \"%s\", want \"%s\"", got, want)
+	}
+}
-- 
GitLab


From 045ac6c3ca349a5aa289fec7e0f0282283112c17 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Thu, 13 Dec 2018 03:20:58 -0800
Subject: [PATCH 492/873] Refactor ListDevices in TF Go API

I pull the logic to convert a *C.TF_DeviceList to a []Device into its own function `deviceSliceFromDeviceList`. This is so I can reuse it when wrapping the Eager C API.

PiperOrigin-RevId: 225344064
---
 tensorflow/go/session.go | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/tensorflow/go/session.go b/tensorflow/go/session.go
index bd4fd4f32f..48909ffe39 100644
--- a/tensorflow/go/session.go
+++ b/tensorflow/go/session.go
@@ -80,37 +80,30 @@ func (d Device) String() string {
 	return fmt.Sprintf("(Device: name \"%s\", type %s, %s)", d.Name, d.Type, memStr)
 }
 
-// Return list of devices associated with a Session
-func (s *Session) ListDevices() ([]Device, error) {
+func deviceSliceFromDeviceList(list *C.TF_DeviceList) ([]Device, error) {
 	var devices []Device
-
 	status := newStatus()
-	devices_list := C.TF_SessionListDevices(s.c, status.c)
-	if err := status.Err(); err != nil {
-		return nil, fmt.Errorf("SessionListDevices() failed: %v", err)
-	}
-	defer C.TF_DeleteDeviceList(devices_list)
 
-	for i := 0; i < int(C.TF_DeviceListCount(devices_list)); i++ {
-		device_name := C.TF_DeviceListName(devices_list, C.int(i), status.c)
+	for i := 0; i < int(C.TF_DeviceListCount(list)); i++ {
+		name := C.TF_DeviceListName(list, C.int(i), status.c)
 		if err := status.Err(); err != nil {
 			return nil, fmt.Errorf("DeviceListName(index=%d) failed: %v", i, err)
 		}
 
-		device_type := C.TF_DeviceListType(devices_list, C.int(i), status.c)
+		deviceType := C.TF_DeviceListType(list, C.int(i), status.c)
 		if err := status.Err(); err != nil {
 			return nil, fmt.Errorf("DeviceListType(index=%d) failed: %v", i, err)
 		}
 
-		memory_limit_bytes := C.TF_DeviceListMemoryBytes(devices_list, C.int(i), status.c)
+		memoryLimitBytes := C.TF_DeviceListMemoryBytes(list, C.int(i), status.c)
 		if err := status.Err(); err != nil {
 			return nil, fmt.Errorf("DeviceListMemoryBytes(index=%d) failed: %v", i, err)
 		}
 
 		device := Device{
-			Name:             C.GoString(device_name),
-			Type:             C.GoString(device_type),
-			MemoryLimitBytes: int64(memory_limit_bytes),
+			Name:             C.GoString(name),
+			Type:             C.GoString(deviceType),
+			MemoryLimitBytes: int64(memoryLimitBytes),
 		}
 
 		devices = append(devices, device)
@@ -119,6 +112,17 @@ func (s *Session) ListDevices() ([]Device, error) {
 	return devices, nil
 }
 
+// ListDevices returns the list of devices associated with a Session.
+func (s *Session) ListDevices() ([]Device, error) {
+	status := newStatus()
+	devicesList := C.TF_SessionListDevices(s.c, status.c)
+	if err := status.Err(); err != nil {
+		return nil, fmt.Errorf("SessionListDevices() failed: %v", err)
+	}
+	defer C.TF_DeleteDeviceList(devicesList)
+	return deviceSliceFromDeviceList(devicesList)
+}
+
 // Run the graph with the associated session starting with the supplied feeds
 // to compute the value of the requested fetches. Runs, but does not return
 // Tensors for operations specified in targets.
-- 
GitLab


From 67d2144c6b3118730090883fb7b598ba30191ab4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 04:34:49 -0800
Subject: [PATCH 493/873] Remove fused_batch_norm_util from default deps of
 kernels.

It contains the same source files as fused_batch_norm_op, leading to duplicate
symbol errors in some configurations.

PiperOrigin-RevId: 225350457
---
 tensorflow/core/kernels/BUILD | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index d62992233b..e2234c1f9d 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3714,7 +3714,6 @@ NN_DEPS = [
     ":bounds_check",
     ":conv_2d",
     ":eigen_contraction_kernel",
-    ":fused_batch_norm_util_gpu",
     ":ops_util",
     ":pooling_ops",
     "//tensorflow/core:framework",
-- 
GitLab


From 3169f3295a9b1956c069e8708902dcdc4913cdb3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 05:26:14 -0800
Subject: [PATCH 494/873] Directly call python for the nvcc wrapper.

.bat files have an 8k limit on the command line length which we are now hitting.

PiperOrigin-RevId: 225355646
---
 tensorflow/opensource_only.files              |  1 -
 third_party/gpus/crosstool/CROSSTOOL.tpl      | 25 +++++++++++++++++++
 .../windows/msvc_wrapper_for_nvcc.bat.tpl     | 20 ---------------
 third_party/gpus/cuda_configure.bzl           | 10 +-------
 4 files changed, 26 insertions(+), 30 deletions(-)
 delete mode 100644 third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.bat.tpl

diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 418ef1a369..0af84f8f54 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -82,7 +82,6 @@ tensorflow/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
 tensorflow/third_party/gpus/crosstool/LICENSE
 tensorflow/third_party/gpus/crosstool/remote.BUILD.tpl
 tensorflow/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
-tensorflow/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.bat.tpl
 tensorflow/third_party/gpus/crosstool/BUILD.tpl
 tensorflow/third_party/gpus/crosstool/BUILD
 tensorflow/third_party/gpus/cuda/LICENSE
diff --git a/third_party/gpus/crosstool/CROSSTOOL.tpl b/third_party/gpus/crosstool/CROSSTOOL.tpl
index 921188cbb4..5ca9b2deb4 100644
--- a/third_party/gpus/crosstool/CROSSTOOL.tpl
+++ b/third_party/gpus/crosstool/CROSSTOOL.tpl
@@ -642,6 +642,31 @@ toolchain {
     name: "no_legacy_features"
   }
 
+  # TODO(klimek): Previously we were using a .bat file to start python to run
+  # the python script that can redirect to nvcc - unfortunately .bat files
+  # have a rather short maximum length for command lines (8k). Instead, we
+  # now use the python binary as the compiler and pass the python script to
+  # it at the start of the command line. Investigate different possibilities
+  # to run the nvcc wrapper, either using pyinstaller --onefile, or writing
+  # a small C++ wrapper to redirect.
+  feature {
+    name: "redirector"
+    enabled: true
+    flag_set {
+      action: "c-compile"
+      action: "c++-compile"
+      action: "c++-module-compile"
+      action: "c++-module-codegen"
+      action: "c++-header-parsing"
+      action: "assemble"
+      action: "preprocess-assemble"
+      flag_group {
+        flag: "-B"
+        flag: "external/local_config_cuda/crosstool/windows/msvc_wrapper_for_nvcc.py"
+      }
+    }
+  }
+
   # Suppress startup banner.
   feature {
     name: "nologo"
diff --git a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.bat.tpl b/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.bat.tpl
deleted file mode 100644
index 8f8fb3e423..0000000000
--- a/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.bat.tpl
+++ /dev/null
@@ -1,20 +0,0 @@
-:: Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-::
-:: Licensed under the Apache License, Version 2.0 (the "License");
-:: you may not use this file except in compliance with the License.
-:: You may obtain a copy of the License at
-::
-::     http://www.apache.org/licenses/LICENSE-2.0
-::
-:: Unless required by applicable law or agreed to in writing, software
-:: distributed under the License is distributed on an "AS IS" BASIS,
-:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-:: See the License for the specific language governing permissions and
-:: limitations under the License.
-:: =============================================================================
-
-:: Invoke msvc_wrapper_for_nvcc.py, which is located in the same directory.
-@echo OFF
-set arg0=%~0
-for %%F in ("%arg0%") do set DRIVER_BIN=%%~dpF
-"%{python_binary}" -B "%DRIVER_BIN%\msvc_wrapper_for_nvcc.py" %*
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 03c67bcb3d..8aa5b89cdd 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -190,7 +190,7 @@ def _get_win_cuda_defines(repository_ctx):
       get_env_var(repository_ctx, "TMP", "C:\\Windows\\Temp").replace(
           "\\", "\\\\"),)
 
-  msvc_cl_path = "windows/msvc_wrapper_for_nvcc.bat"
+  msvc_cl_path = _get_python_bin(repository_ctx)
   msvc_ml_path = find_msvc_tool(repository_ctx, vc_path, "ml64.exe").replace(
       "\\", "/")
   msvc_link_path = find_msvc_tool(repository_ctx, vc_path, "link.exe").replace(
@@ -1426,7 +1426,6 @@ def _create_local_cuda_repository(repository_ctx):
     repository_ctx.file(
         "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", "")
     repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.py", "")
-    repository_ctx.file("crosstool/windows/msvc_wrapper_for_nvcc.bat", "")
   else:
     cuda_defines[
         "%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc"
@@ -1486,13 +1485,6 @@ def _create_local_cuda_repository(repository_ctx):
         "crosstool:windows/msvc_wrapper_for_nvcc.py",
         wrapper_defines,
     )
-    _tpl(
-        repository_ctx,
-        "crosstool:windows/msvc_wrapper_for_nvcc.bat",
-        {
-            "%{python_binary}": _get_python_bin(repository_ctx),
-        },
-    )
 
   _tpl(
       repository_ctx,
-- 
GitLab


From c5002272d31f55daa7e24e1b5ebb419ec45c4f20 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 06:53:16 -0800
Subject: [PATCH 495/873] Avoid keyword arguments to core Starlark builtins.

PiperOrigin-RevId: 225364007
---
 tensorflow/compiler/aot/tfcompile.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl
index 2dc3e8c911..4051664c24 100644
--- a/tensorflow/compiler/aot/tfcompile.bzl
+++ b/tensorflow/compiler/aot/tfcompile.bzl
@@ -283,7 +283,7 @@ def tf_library(
     )
 
     # Variables used for gen_test and gen_benchmark.
-    cpp_class_split = cpp_class.rsplit("::", maxsplit = 2)
+    cpp_class_split = cpp_class.rsplit("::", 2)
     if len(cpp_class_split) == 1:
         no_ns_name = cpp_class_split[0]
     else:
-- 
GitLab


From 199fe84d7432ad6371ea6cd5169cb604b7dc5dac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 07:15:40 -0800
Subject: [PATCH 496/873] Update sequence categorical columns to new
 FeatureColumn API.

PiperOrigin-RevId: 225366627
---
 tensorflow/contrib/feature_column/BUILD       |   2 +-
 .../sequence_feature_column_v2.py             |  44 ++++---
 .../sequence_feature_column_v2_test.py        | 122 +++++++++---------
 .../feature_column/feature_column_v2.py       |  16 +--
 4 files changed, 96 insertions(+), 88 deletions(-)

diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD
index 1cd83bdb5d..4c1d1a29f2 100644
--- a/tensorflow/contrib/feature_column/BUILD
+++ b/tensorflow/contrib/feature_column/BUILD
@@ -110,8 +110,8 @@ py_test(
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
-        "//tensorflow/python/feature_column",
         "//tensorflow/python/feature_column:feature_column_py",
+        "//tensorflow/python/feature_column:feature_column_v2_test",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2.py
index 0d34ad1618..83b93ec332 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2.py
@@ -203,7 +203,8 @@ def sequence_categorical_column_with_identity(
   columns = [watches_embedding]
 
   features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  input_layer, sequence_length = sequence_input_layer(features, columns)
+  sequence_feature_layer = SequenceFeatureLayer(columns)
+  input_layer, sequence_length = sequence_feature_layer(features)
 
   rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
   outputs, state = tf.nn.dynamic_rnn(
@@ -219,15 +220,17 @@ def sequence_categorical_column_with_identity(
       `[0, num_buckets)`, and will replace out-of-range inputs.
 
   Returns:
-    A `_SequenceCategoricalColumn`.
+    A `SequenceCategoricalColumn`.
 
   Raises:
     ValueError: if `num_buckets` is less than one.
     ValueError: if `default_value` is not in range `[0, num_buckets)`.
   """
-  return fc_old._SequenceCategoricalColumn(
-      fc_old._categorical_column_with_identity(
-          key=key, num_buckets=num_buckets, default_value=default_value))
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_identity(
+          key=key,
+          num_buckets=num_buckets,
+          default_value=default_value))
 
 
 def sequence_categorical_column_with_hash_bucket(
@@ -247,7 +250,8 @@ def sequence_categorical_column_with_hash_bucket(
   columns = [tokens_embedding]
 
   features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  input_layer, sequence_length = sequence_input_layer(features, columns)
+  sequence_feature_layer = SequenceFeatureLayer(columns)
+  input_layer, sequence_length = sequence_feature_layer(features)
 
   rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
   outputs, state = tf.nn.dynamic_rnn(
@@ -260,15 +264,17 @@ def sequence_categorical_column_with_hash_bucket(
     dtype: The type of features. Only string and integer types are supported.
 
   Returns:
-    A `_SequenceCategoricalColumn`.
+    A `SequenceCategoricalColumn`.
 
   Raises:
     ValueError: `hash_bucket_size` is not greater than 1.
     ValueError: `dtype` is neither string nor integer.
   """
-  return fc_old._SequenceCategoricalColumn(
-      fc_old._categorical_column_with_hash_bucket(
-          key=key, hash_bucket_size=hash_bucket_size, dtype=dtype))
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_hash_bucket(
+          key=key,
+          hash_bucket_size=hash_bucket_size,
+          dtype=dtype))
 
 
 def sequence_categorical_column_with_vocabulary_file(
@@ -290,7 +296,8 @@ def sequence_categorical_column_with_vocabulary_file(
   columns = [states_embedding]
 
   features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  input_layer, sequence_length = sequence_input_layer(features, columns)
+  sequence_feature_layer = SequenceFeatureLayer(columns)
+  input_layer, sequence_length = sequence_feature_layer(features)
 
   rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
   outputs, state = tf.nn.dynamic_rnn(
@@ -314,7 +321,7 @@ def sequence_categorical_column_with_vocabulary_file(
     dtype: The type of features. Only string and integer types are supported.
 
   Returns:
-    A `_SequenceCategoricalColumn`.
+    A `SequenceCategoricalColumn`.
 
   Raises:
     ValueError: `vocabulary_file` is missing or cannot be opened.
@@ -323,8 +330,8 @@ def sequence_categorical_column_with_vocabulary_file(
     ValueError: `num_oov_buckets` and `default_value` are both specified.
     ValueError: `dtype` is neither string nor integer.
   """
-  return fc_old._SequenceCategoricalColumn(
-      fc_old._categorical_column_with_vocabulary_file(
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_vocabulary_file(
           key=key,
           vocabulary_file=vocabulary_file,
           vocabulary_size=vocabulary_size,
@@ -351,7 +358,8 @@ def sequence_categorical_column_with_vocabulary_list(
   columns = [colors_embedding]
 
   features = tf.parse_example(..., features=make_parse_example_spec(columns))
-  input_layer, sequence_length = sequence_input_layer(features, columns)
+  sequence_feature_layer = SequenceFeatureLayer(columns)
+  input_layer, sequence_length = sequence_feature_layer(features)
 
   rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size)
   outputs, state = tf.nn.dynamic_rnn(
@@ -375,7 +383,7 @@ def sequence_categorical_column_with_vocabulary_list(
       with `default_value`.
 
   Returns:
-    A `_SequenceCategoricalColumn`.
+    A `SequenceCategoricalColumn`.
 
   Raises:
     ValueError: if `vocabulary_list` is empty, or contains duplicate keys.
@@ -383,8 +391,8 @@ def sequence_categorical_column_with_vocabulary_list(
     ValueError: `num_oov_buckets` and `default_value` are both specified.
     ValueError: if `dtype` is not integer or string.
   """
-  return fc_old._SequenceCategoricalColumn(
-      fc_old._categorical_column_with_vocabulary_list(
+  return fc.SequenceCategoricalColumn(
+      fc.categorical_column_with_vocabulary_list(
           key=key,
           vocabulary_list=vocabulary_list,
           dtype=dtype,
diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_test.py
index ca4398a142..be012a8769 100644
--- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_test.py
+++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_v2_test.py
@@ -26,7 +26,7 @@ from tensorflow.contrib.feature_column.python.feature_column import sequence_fea
 from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column_v2 as sfc
 from tensorflow.python.feature_column import feature_column as fc_old
 from tensorflow.python.feature_column import feature_column_lib as fc
-from tensorflow.python.feature_column.feature_column import _LazyBuilder
+from tensorflow.python.feature_column.feature_column_v2_test import _TestStateManager
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -131,7 +131,7 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
         feature_columns=[embedding_column_b, embedding_column_a])
 
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(
+    self.assertCountEqual(
         ('sequence_input_layer/aaa_embedding/embedding_weights:0',
          'sequence_input_layer/bbb_embedding/embedding_weights:0'),
         tuple([v.name for v in global_vars]))
@@ -223,7 +223,7 @@ class SequenceInputLayerTest(test.TestCase, parameterized.TestCase):
         feature_columns=shared_embedding_columns)
 
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(
+    self.assertCountEqual(
         ('sequence_input_layer/aaa_bbb_shared_embedding/embedding_weights:0',),
         tuple([v.name for v in global_vars]))
     with monitored_session.MonitoredSession() as sess:
@@ -670,6 +670,23 @@ def _assert_sparse_tensor_indices_shape(test_case, expected, actual):
   test_case.assertAllEqual(expected.dense_shape, actual.dense_shape)
 
 
+def _get_sequence_dense_tensor(column, features):
+  return column.get_sequence_dense_tensor(
+      fc.FeatureTransformationCache(features), None)
+
+
+def _get_sequence_dense_tensor_state(column, features):
+  state_manager = _TestStateManager()
+  column.create_state(state_manager)
+  return column.get_sequence_dense_tensor(
+      fc.FeatureTransformationCache(features), state_manager)
+
+
+def _get_sparse_tensors(column, features):
+  return column.get_sparse_tensors(
+      fc.FeatureTransformationCache(features), None)
+
+
 class SequenceCategoricalColumnWithIdentityTest(
     test.TestCase, parameterized.TestCase):
 
@@ -698,7 +715,7 @@ class SequenceCategoricalColumnWithIdentityTest(
     expected = sparse_tensor.SparseTensorValue(**expected_args)
     column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9)
 
-    id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
@@ -737,7 +754,7 @@ class SequenceCategoricalColumnWithHashBucketTest(
     column = sfc.sequence_categorical_column_with_hash_bucket(
         'aaa', hash_bucket_size=10)
 
-    id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
@@ -790,7 +807,7 @@ class SequenceCategoricalColumnWithVocabularyFileTest(
         vocabulary_file=self._wire_vocabulary_file_name,
         vocabulary_size=self._wire_vocabulary_size)
 
-    id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
@@ -814,8 +831,7 @@ class SequenceCategoricalColumnWithVocabularyFileTest(
     input_placeholder_shape[1] = None
     input_placeholder = array_ops.sparse_placeholder(
         dtypes.string, shape=input_placeholder_shape)
-    id_weight_pair = column._get_sparse_tensors(
-        _LazyBuilder({'aaa': input_placeholder}))
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': input_placeholder})
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
@@ -855,7 +871,7 @@ class SequenceCategoricalColumnWithVocabularyListTest(
         key='aaa',
         vocabulary_list=('omar', 'stringer', 'marlo'))
 
-    id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs}))
+    id_weight_pair = _get_sparse_tensors(column, {'aaa': inputs})
 
     self.assertIsNone(id_weight_pair.weight_tensor)
     with monitored_session.MonitoredSession() as sess:
@@ -922,13 +938,12 @@ class SequenceEmbeddingColumnTest(
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc_old._embedding_column(
-        categorical_column,
-        dimension=embedding_dimension,
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=embedding_dimension,
         initializer=_initializer)
 
-    embedding_lookup, _ = embedding_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': inputs}))
+    embedding_lookup, _ = _get_sequence_dense_tensor_state(
+        embedding_column, {'aaa': inputs})
 
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
     self.assertItemsEqual(
@@ -961,10 +976,11 @@ class SequenceEmbeddingColumnTest(
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc_old._embedding_column(categorical_column, dimension=2)
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=2)
 
-    _, sequence_length = embedding_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': inputs}))
+    _, sequence_length = _get_sequence_dense_tensor_state(
+        embedding_column, {'aaa': inputs})
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
@@ -988,10 +1004,11 @@ class SequenceEmbeddingColumnTest(
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
-    embedding_column = fc_old._embedding_column(categorical_column, dimension=2)
+    embedding_column = fc.embedding_column(
+        categorical_column, dimension=2)
 
-    _, sequence_length = embedding_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+    _, sequence_length = _get_sequence_dense_tensor_state(
+        embedding_column, {'aaa': sparse_input})
 
     with monitored_session.MonitoredSession() as sess:
       self.assertAllEqual(
@@ -1058,22 +1075,18 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase):
         key='aaa', num_buckets=vocabulary_size)
     categorical_column_b = sfc.sequence_categorical_column_with_identity(
         key='bbb', num_buckets=vocabulary_size)
-    shared_embedding_columns = fc.shared_embedding_columns(
+    shared_embedding_columns = fc.shared_embedding_columns_v2(
         [categorical_column_a, categorical_column_b],
         dimension=embedding_dimension,
         initializer=_initializer)
 
-    embedding_lookup_a = shared_embedding_columns[0]._get_sequence_dense_tensor(
-        _LazyBuilder({
-            'aaa': sparse_input_a
-        }))[0]
-    embedding_lookup_b = shared_embedding_columns[1]._get_sequence_dense_tensor(
-        _LazyBuilder({
-            'bbb': sparse_input_b
-        }))[0]
+    embedding_lookup_a = _get_sequence_dense_tensor(
+        shared_embedding_columns[0], {'aaa': sparse_input_a})[0]
+    embedding_lookup_b = _get_sequence_dense_tensor(
+        shared_embedding_columns[1], {'bbb': sparse_input_b})[0]
 
     global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
-    self.assertItemsEqual(('embedding_weights:0',),
+    self.assertItemsEqual(('aaa_bbb_shared_embedding:0',),
                           tuple([v.name for v in global_vars]))
     with monitored_session.MonitoredSession() as sess:
       self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess))
@@ -1104,17 +1117,13 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase):
     expected_sequence_length_b = [2, 1]
     categorical_column_b = sfc.sequence_categorical_column_with_identity(
         key='bbb', num_buckets=vocabulary_size)
-    shared_embedding_columns = fc.shared_embedding_columns(
+    shared_embedding_columns = fc.shared_embedding_columns_v2(
         [categorical_column_a, categorical_column_b], dimension=2)
 
-    sequence_length_a = shared_embedding_columns[0]._get_sequence_dense_tensor(
-        _LazyBuilder({
-            'aaa': sparse_input_a
-        }))[1]
-    sequence_length_b = shared_embedding_columns[1]._get_sequence_dense_tensor(
-        _LazyBuilder({
-            'bbb': sparse_input_b
-        }))[1]
+    sequence_length_a = _get_sequence_dense_tensor(
+        shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
+    sequence_length_b = _get_sequence_dense_tensor(
+        shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length_a = sess.run(sequence_length_a)
@@ -1155,17 +1164,13 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase):
     categorical_column_b = sfc.sequence_categorical_column_with_identity(
         key='bbb', num_buckets=vocabulary_size)
 
-    shared_embedding_columns = fc.shared_embedding_columns(
+    shared_embedding_columns = fc.shared_embedding_columns_v2(
         [categorical_column_a, categorical_column_b], dimension=2)
 
-    sequence_length_a = shared_embedding_columns[0]._get_sequence_dense_tensor(
-        _LazyBuilder({
-            'aaa': sparse_input_a
-        }))[1]
-    sequence_length_b = shared_embedding_columns[1]._get_sequence_dense_tensor(
-        _LazyBuilder({
-            'bbb': sparse_input_b
-        }))[1]
+    sequence_length_a = _get_sequence_dense_tensor(
+        shared_embedding_columns[0], {'aaa': sparse_input_a})[1]
+    sequence_length_b = _get_sequence_dense_tensor(
+        shared_embedding_columns[1], {'bbb': sparse_input_b})[1]
 
     with monitored_session.MonitoredSession() as sess:
       self.assertAllEqual(
@@ -1221,10 +1226,10 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
-    indicator_column = fc_old._indicator_column(categorical_column)
+    indicator_column = fc.indicator_column(categorical_column)
 
-    indicator_tensor, _ = indicator_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': inputs}))
+    indicator_tensor, _ = _get_sequence_dense_tensor(
+        indicator_column, {'aaa': inputs})
 
     with monitored_session.MonitoredSession() as sess:
       self.assertAllEqual(expected, indicator_tensor.eval(session=sess))
@@ -1253,10 +1258,10 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
 
     categorical_column = sfc.sequence_categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
-    indicator_column = fc_old._indicator_column(categorical_column)
+    indicator_column = fc.indicator_column(categorical_column)
 
-    _, sequence_length = indicator_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': inputs}))
+    _, sequence_length = _get_sequence_dense_tensor(
+        indicator_column, {'aaa': inputs})
 
     with monitored_session.MonitoredSession() as sess:
       sequence_length = sess.run(sequence_length)
@@ -1282,19 +1287,14 @@ class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase):
         key='aaa', num_buckets=vocabulary_size)
     indicator_column = fc.indicator_column(categorical_column)
 
-    _, sequence_length = indicator_column._get_sequence_dense_tensor(
-        _LazyBuilder({'aaa': sparse_input}))
+    _, sequence_length = _get_sequence_dense_tensor(
+        indicator_column, {'aaa': sparse_input})
 
     with monitored_session.MonitoredSession() as sess:
       self.assertAllEqual(
           expected_sequence_length, sequence_length.eval(session=sess))
 
 
-def _get_sequence_dense_tensor(column, features):
-  return column.get_sequence_dense_tensor(
-      fc.FeatureTransformationCache(features), None)
-
-
 class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase):
 
   def test_defaults(self):
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 6308926494..4cc8efa925 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -3111,7 +3111,7 @@ class EmbeddingColumn(
           'Suggested fix: Use one of sequence_categorical_column_with_*. '
           'Given (type {}): {}'.format(self.name, type(self.categorical_column),
                                        self.categorical_column))
-    sparse_tensors = self.categorical_column.get_sequence_sparse_tensors(
+    sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
     dense_tensor = self._get_dense_tensor_internal(sparse_tensors,
                                                    state_manager)
@@ -3307,7 +3307,7 @@ class SharedEmbeddingColumn(
           'Suggested fix A: If you wish to use input_layer, use a '
           'non-sequence categorical_column_with_*. '
           'Suggested fix B: If you wish to create sequence input, use '
-          'sequence_input_layer instead of input_layer. '
+          'SequenceFeatureLayer instead of FeatureLayer. '
           'Given (type {}): {}'.format(self.name, type(self.categorical_column),
                                        self.categorical_column))
     return self._get_dense_tensor_internal(transformation_cache, state_manager)
@@ -3321,12 +3321,12 @@ class SharedEmbeddingColumn(
       raise ValueError(
           'In embedding_column: {}. '
           'categorical_column must be of type SequenceCategoricalColumn '
-          'to use sequence_input_layer. '
+          'to use SequenceFeatureLayer. '
           'Suggested fix: Use one of sequence_categorical_column_with_*. '
           'Given (type {}): {}'.format(self.name, type(self.categorical_column),
                                        self.categorical_column))
-    dense_tensor = self.get_dense_tensor_internal(transformation_cache,
-                                                  state_manager)
+    dense_tensor = self._get_dense_tensor_internal(transformation_cache,
+                                                   state_manager)
     sparse_tensors = self.categorical_column.get_sparse_tensors(
         transformation_cache, state_manager)
     sequence_length = fc_old._sequence_length_from_sparse_tensor(  # pylint: disable=protected-access
@@ -4469,8 +4469,8 @@ def _verify_static_batch_size_equality(tensors, columns):
 
 
 class SequenceCategoricalColumn(
-    FeatureColumn,
-    fc_old._CategoricalColumn,  # pylint: disable=protected-access
+    CategoricalColumn,
+    fc_old._SequenceCategoricalColumn,  # pylint: disable=protected-access
     collections.namedtuple('SequenceCategoricalColumn',
                            ('categorical_column'))):
   """Represents sequences of categorical data."""
@@ -4533,7 +4533,7 @@ class SequenceCategoricalColumn(
       weight_tensor = sparse_ops.sparse_reshape(weight_tensor, target_shape)
     return CategoricalColumn.IdWeightPair(id_tensor, weight_tensor)
 
-  def get_sequence_sparse_tensors(self, transformation_cache, state_manager):
+  def get_sparse_tensors(self, transformation_cache, state_manager):
     """Returns an IdWeightPair.
 
     `IdWeightPair` is a pair of `SparseTensor`s which represents ids and
-- 
GitLab


From 3dfbc353c299ef08f21d76fcb1c4f145441b771f Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Thu, 13 Dec 2018 07:39:34 -0800
Subject: [PATCH 497/873] Misc. fixes for optional variant tensors.

These are required for cond_v2 to work properly on GPU. The fix in optional_ops.cc is subtle, but I can't think of a way to more generally fix this lifetime issue.

PiperOrigin-RevId: 225369355
---
 .../core/common_runtime/gpu/gpu_device.cc     |  4 +++-
 tensorflow/core/kernels/data/optional_ops.cc  | 10 +++++++---
 tensorflow/core/kernels/function_ops.cc       |  2 ++
 .../python/data/kernel_tests/optional_test.py | 20 +++++++++++++++++++
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 5152d97fde..14b57cc337 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -601,7 +601,9 @@ Status BaseGPUDevice::MaybeCopyTensorToGPU(
         [to, copy](StatusCallback done_,
                    // Begin unbound arguments.
                    const Status& s) {
-          *to = std::move(*copy);
+          if (s.ok()) {
+            *to = std::move(*copy);
+          }
           delete copy;
           done_(s);
         },
diff --git a/tensorflow/core/kernels/data/optional_ops.cc b/tensorflow/core/kernels/data/optional_ops.cc
index d8a7f21c5f..a406f7467f 100644
--- a/tensorflow/core/kernels/data/optional_ops.cc
+++ b/tensorflow/core/kernels/data/optional_ops.cc
@@ -159,9 +159,13 @@ static Status OptionalDeviceCopy(
     to_values.reserve(from_values.size());
     for (const Tensor& t : from_values) {
       if (DMAHelper::CanUseDMA(&t) || t.dtype() == DT_VARIANT) {
-        Tensor tmp(t.dtype());
-        TF_RETURN_IF_ERROR(copy(t, &tmp));
-        to_values.push_back(std::move(tmp));
+        // NOTE(skyewm): we're careful to make sure the lifetime of the 'to'
+        // Tensor passed to `copy` (i.e. to_values.back()) is the same as the
+        // returned 'to' OptionalVariant. This is because `copy` may spawn async
+        // callbacks that don't run until after this function returns and access
+        // the 'to' Tensor (e.g. BaseGPUDevice::MaybeCopyTensorToGPU).
+        to_values.emplace_back(t.dtype());
+        TF_RETURN_IF_ERROR(copy(t, &to_values.back()));
       } else {
         to_values.push_back(t);
       }
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index 90f94ee4a0..88a8a523e4 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -130,6 +130,7 @@ REGISTER_KERNEL_BUILDER(
       Name(kRetOp).Device(DEVICE_GPU).TypeConstraint<type>("T"), RetvalOp);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER)
 TF_CALL_QUANTIZED_TYPES(REGISTER)
+REGISTER(Variant)
 TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp)
                                                    .Device(DEVICE_GPU)
                                                    .HostMemory("input")
@@ -137,6 +138,7 @@ TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp)
                                                RetvalOp);
 REGISTER_KERNEL_BUILDER(
     Name(kDeviceRetOp).Device(DEVICE_GPU).TypeConstraint<int32>("T"), RetvalOp);
+
 REGISTER_KERNEL_BUILDER(Name(kRetOp)
                             .Device(DEVICE_GPU)
                             .TypeConstraint<ResourceHandle>("T")
diff --git a/tensorflow/python/data/kernel_tests/optional_test.py b/tensorflow/python/data/kernel_tests/optional_test.py
index c2c62e9423..ba5ee9b661 100644
--- a/tensorflow/python/data/kernel_tests/optional_test.py
+++ b/tensorflow/python/data/kernel_tests/optional_test.py
@@ -25,6 +25,7 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import optional_ops
 from tensorflow.python.data.util import structure
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -361,6 +362,25 @@ class OptionalTest(test_base.DatasetTestBase, parameterized.TestCase):
         with self.assertRaises(errors.InvalidArgumentError):
           sess.run(elem_value_t)
 
+  def testFunctionBoundaries(self):
+    @def_function.function
+    def get_optional():
+      x = constant_op.constant(1.0)
+      opt = optional_ops.Optional.from_value(x)
+      # TODO(skyewm): support returning Optionals from functions?
+      return opt._variant_tensor
+
+    # TODO(skyewm): support Optional arguments?
+    @def_function.function
+    def consume_optional(opt_tensor):
+      value_structure = structure.TensorStructure(dtypes.float32, [])
+      opt = optional_ops._OptionalImpl(opt_tensor, value_structure)
+      return opt.get_value()
+
+    opt_tensor = get_optional()
+    val = consume_optional(opt_tensor)
+    self.assertEqual(self.evaluate(val), 1.0)
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 4e7413a2d51c6ecd8655df7b51e0936cb4e9aeff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 08:05:00 -0800
Subject: [PATCH 498/873] [TF:XLA] Use XLA CPU runtime functions to speed up R2
 dot in the HLO evaluator.

This CL adds a fast-path for R2 dot. For now the fast implementation has certain limitations:

1. Only operands with default layout, and
2. float type

It uses the XLA's CPU runtime functions which invoke eigen.

PiperOrigin-RevId: 225372611
---
 tensorflow/compiler/xla/BUILD                 |  1 -
 tensorflow/compiler/xla/reference_util.cc     | 36 +--------
 tensorflow/compiler/xla/service/BUILD         |  2 +
 .../compiler/xla/service/hlo_evaluator.cc     | 43 +++++++++++
 .../compiler/xla/service/hlo_evaluator.h      | 17 +++++
 .../xla/service/hlo_evaluator_typed_visitor.h | 73 +++++++++++++++++++
 .../xla/service/interpreter/compiler.cc       |  7 +-
 .../xla/tests/client_library_test_base.cc     |  6 ++
 .../compiler/xla/tests/hlo_test_base.cc       |  1 +
 tensorflow/compiler/xla/xla.proto             |  5 +-
 10 files changed, 154 insertions(+), 37 deletions(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 19f12569ff..aa5776a912 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -723,7 +723,6 @@ cc_library(
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_evaluator",
         "//tensorflow/compiler/xla/service:shape_inference",
-        "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_matmul",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:span",
diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index ceb5e74db7..a27e2005da 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -21,7 +21,6 @@ limitations under the License.
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal_util.h"
-#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/shape_inference.h"
@@ -32,48 +31,19 @@ limitations under the License.
 
 namespace xla {
 
-namespace {
-
-template <typename T>
-std::unique_ptr<Array2D<T>> MatmulArray2DImpl(
-    const Array2D<T>& lhs, const Array2D<T>& rhs,
-    const std::function<void(
-        const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, int64 n,
-        int64 k, int32 transpose_lhs, int32 transpose_rhs)>& impl_fn) {
-  CHECK_EQ(lhs.width(), rhs.height());
-  int m = lhs.height();
-  int n = rhs.width();
-  int k = lhs.width();
-  auto result = absl::make_unique<Array2D<T>>(m, n);
-  // Because Eigen is a header-oriented library, make sure that the Eigen code
-  // is the same as the code used by the CPU backend (otherwise the linker will
-  // randomly pick *some* definition).
-  impl_fn(
-      /*run_options_ptr=*/nullptr, result->data(), rhs.data(), lhs.data(), n, m,
-      k,
-      /*transpose_lhs=*/0,
-      /*transpose_rhs=*/0);
-  return result;
-}
-
-}  // namespace
-
 /* static */ std::unique_ptr<Array2D<Eigen::half>> ReferenceUtil::MatmulArray2D(
     const Array2D<Eigen::half>& lhs, const Array2D<Eigen::half>& rhs) {
-  return MatmulArray2DImpl<Eigen::half>(
-      lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF16);
+  return HloEvaluator::MatmulArray2D(lhs, rhs);
 }
 
 /* static */ std::unique_ptr<Array2D<float>> ReferenceUtil::MatmulArray2D(
     const Array2D<float>& lhs, const Array2D<float>& rhs) {
-  return MatmulArray2DImpl<float>(
-      lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF32);
+  return HloEvaluator::MatmulArray2D(lhs, rhs);
 }
 
 /* static */ std::unique_ptr<Array2D<double>> ReferenceUtil::MatmulArray2D(
     const Array2D<double>& lhs, const Array2D<double>& rhs) {
-  return MatmulArray2DImpl<double>(
-      lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF64);
+  return HloEvaluator::MatmulArray2D(lhs, rhs);
 }
 
 /* static */ std::unique_ptr<Array2D<double>> ReferenceUtil::Array2DF32ToF64(
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 8ed9a7bea2..4aaa8a5b65 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -241,6 +241,7 @@ cc_library(
         ":hlo_casting_utils",
         ":hlo_query",
         ":shape_inference",
+        "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_util",
@@ -249,6 +250,7 @@ cc_library(
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla:xla_data_proto",
+        "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_matmul",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/base",
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index e98fc0a5de..934c082bb9 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
+#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
 #include "tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -1452,4 +1453,46 @@ template StatusOr<Literal> HloEvaluator::Evaluate<const Literal*>(
 template StatusOr<Literal> HloEvaluator::Evaluate<const Literal*>(
     HloInstruction* instruction, absl::Span<const Literal* const> arg_literals);
 
+namespace {
+template <typename T>
+std::unique_ptr<Array2D<T>> MatmulArray2DImpl(
+    const Array2D<T>& lhs, const Array2D<T>& rhs,
+    const std::function<void(
+        const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, int64 n,
+        int64 k, int32 transpose_lhs, int32 transpose_rhs)>& impl_fn) {
+  CHECK_EQ(lhs.width(), rhs.height());
+  int m = lhs.height();
+  int n = rhs.width();
+  int k = lhs.width();
+  auto result = absl::make_unique<Array2D<T>>(m, n);
+  // Because Eigen is a header-oriented library, make sure that the Eigen code
+  // is the same as the code used by the CPU backend (otherwise the linker will
+  // randomly pick *some* definition).
+  impl_fn(
+      /*run_options_ptr=*/nullptr, result->data(), rhs.data(), lhs.data(), n, m,
+      k,
+      /*transpose_lhs=*/0,
+      /*transpose_rhs=*/0);
+  return result;
+}
+}  // namespace
+
+std::unique_ptr<Array2D<Eigen::half>> HloEvaluator::MatmulArray2D(
+    const Array2D<Eigen::half>& lhs, const Array2D<Eigen::half>& rhs) {
+  return MatmulArray2DImpl<Eigen::half>(
+      lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF16);
+}
+
+std::unique_ptr<Array2D<float>> HloEvaluator::MatmulArray2D(
+    const Array2D<float>& lhs, const Array2D<float>& rhs) {
+  return MatmulArray2DImpl<float>(
+      lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF32);
+}
+
+std::unique_ptr<Array2D<double>> HloEvaluator::MatmulArray2D(
+    const Array2D<double>& lhs, const Array2D<double>& rhs) {
+  return MatmulArray2DImpl<double>(
+      lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF64);
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 45ed8131dc..d363a51c63 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include "absl/container/node_hash_map.h"
 #include "absl/memory/memory.h"
 #include "absl/types/span.h"
+#include "tensorflow/compiler/xla/array2d.h"
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -119,6 +120,17 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
                                   const PrecisionConfig& precision_config,
                                   const Literal& lhs, const Literal& rhs);
 
+  // Enable the fast path for certain operations like dot or convolution.
+  void set_use_fast_path(bool value) { use_fast_path_ = value; }
+
+  // Returns the result of a matrix multiply `lhs x rhs`.
+  static std::unique_ptr<Array2D<Eigen::half>> MatmulArray2D(
+      const Array2D<Eigen::half>& lhs, const Array2D<Eigen::half>& rhs);
+  static std::unique_ptr<Array2D<float>> MatmulArray2D(
+      const Array2D<float>& lhs, const Array2D<float>& rhs);
+  static std::unique_ptr<Array2D<double>> MatmulArray2D(
+      const Array2D<double>& lhs, const Array2D<double>& rhs);
+
  protected:
   // Make HloEvaluatorTypedVisitor a friend because it is logically part of this
   // class.
@@ -217,6 +229,9 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // we cannot use flat_hash_map any more.
   absl::node_hash_map<const HloInstruction*, Literal> evaluated_;
 
+  // Use fast path that uses eigen in the evaluator.
+  bool use_fast_path_ = false;
+
  private:
   template <typename ReturnT, typename NativeT>
   static StatusOr<Literal> ElementWiseUnaryOpImpl(
@@ -250,6 +265,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   TF_DISALLOW_COPY_AND_ASSIGN(HloEvaluator);
 };
 
+std::unique_ptr<Array2D<float>> MatmulArray2D(const Array2D<float>& lhs,
+                                              const Array2D<float>& rhs);
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_EVALUATOR_H_
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index cd79117cbe..03d42990ce 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -23,6 +23,7 @@ limitations under the License.
 #include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/array2d.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_evaluator.h"
@@ -1154,6 +1155,78 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   }
 
   Status HandleDot(HloInstruction* dot) override {
+    if (parent_->use_fast_path_) {
+      return HandleDot<ReturnT>(dot);
+    }
+    return HandleDotSlowPath(dot);
+  }
+
+  template <typename NativeT, typename std::enable_if<std::is_same<
+                                  NativeT, float>::value>::type* = nullptr>
+  Status HandleDot(HloInstruction* dot) {
+    const HloInstruction* lhs = dot->operand(0);
+    const HloInstruction* rhs = dot->operand(1);
+    CHECK(ShapeUtil::IsArray(dot->shape()));
+    CHECK(ShapeUtil::IsArray(lhs->shape()));
+    CHECK(ShapeUtil::IsArray(rhs->shape()));
+
+    const auto& dnums = dot->dot_dimension_numbers();
+
+    const int64 lhs_rank = ShapeUtil::Rank(lhs->shape());
+    const int64 rhs_rank = ShapeUtil::Rank(rhs->shape());
+
+    CHECK(ShapeUtil::SameElementType(lhs->shape(), rhs->shape()));
+    CHECK(ShapeUtil::SameElementType(lhs->shape(), dot->shape()));
+
+    // There must be 1 and only 1 Contracting dimension for lhs and rhs.
+    CHECK_EQ(dnums.lhs_contracting_dimensions_size(), 1);
+    CHECK_EQ(dnums.rhs_contracting_dimensions_size(), 1);
+    const int64 lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0);
+    const int64 rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0);
+    // Contracted dimension sizes must be the same.
+    CHECK_EQ(lhs->shape().dimensions(lhs_contracting_dimension),
+             rhs->shape().dimensions(rhs_contracting_dimension))
+        << "lhs contracted dimension: "
+        << lhs->shape().dimensions(lhs_contracting_dimension)
+        << " rhs contracted dimension: "
+        << rhs->shape().dimensions(rhs_contracting_dimension);
+
+    // The fast path is for a simple rank 2 dot with default layout operands.
+    if (lhs_rank == 2 && rhs_rank == 2 && lhs_contracting_dimension == 1 &&
+        rhs_contracting_dimension == 0 &&
+        LayoutUtil::Equal(lhs->shape().layout(),
+                          LayoutUtil::GetDefaultLayoutForR2()) &&
+        LayoutUtil::Equal(rhs->shape().layout(),
+                          LayoutUtil::GetDefaultLayoutForR2()) &&
+        LayoutUtil::Equal(dot->shape().layout(),
+                          LayoutUtil::GetDefaultLayoutForR2())) {
+      const Literal& lhs_literal = parent_->GetEvaluatedLiteralFor(lhs);
+      const Literal& rhs_literal = parent_->GetEvaluatedLiteralFor(rhs);
+      const int64 contracted_dimension_size =
+          lhs->shape().dimensions(lhs_contracting_dimension);
+      Array2D<NativeT> lhs_array(lhs->shape().dimensions(0),
+                                 contracted_dimension_size);
+      lhs_array.SetValues(lhs_literal.data<NativeT>());
+      Array2D<NativeT> rhs_array(contracted_dimension_size,
+                                 rhs->shape().dimensions(1));
+      rhs_array.SetValues(rhs_literal.data<NativeT>());
+      std::unique_ptr<Array2D<NativeT>> result_array =
+          HloEvaluator::MatmulArray2D(lhs_array, rhs_array);
+      Literal result(dot->shape());
+      result.PopulateR2FromArray2D(*result_array);
+      parent_->evaluated_[dot] = std::move(result);
+      return Status::OK();
+    }
+    return HandleDotSlowPath(dot);
+  }
+
+  template <typename NativeT, typename std::enable_if<!std::is_same<
+                                  NativeT, float>::value>::type* = nullptr>
+  Status HandleDot(HloInstruction* dot) {
+    return HandleDotSlowPath(dot);
+  }
+
+  Status HandleDotSlowPath(HloInstruction* dot) {
     auto lhs = dot->operand(0);
     auto rhs = dot->operand(1);
     CHECK(ShapeUtil::IsArray(dot->shape()));
diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc
index 3a5177c418..d37ae94bf6 100644
--- a/tensorflow/compiler/xla/service/interpreter/compiler.cc
+++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc
@@ -76,9 +76,12 @@ StatusOr<std::unique_ptr<Executable>> InterpreterCompiler::RunBackend(
   // need to compile anything
 
   // Create executable from only the Hlo module.
+  auto evaluator = absl::make_unique<HloEvaluator>();
+  evaluator->set_use_fast_path(
+      hlo_module->config().debug_options().xla_hlo_evaluator_use_fast_path());
   std::unique_ptr<Executable> executable =
-      absl::make_unique<InterpreterExecutable>(
-          std::move(hlo_module), absl::make_unique<HloEvaluator>());
+      absl::make_unique<InterpreterExecutable>(std::move(hlo_module),
+                                               std::move(evaluator));
 
   return std::move(executable);
 }
diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc
index 12c0299833..697236dc62 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.cc
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc
@@ -74,6 +74,9 @@ ClientLibraryTestBase::ClientLibraryTestBase(
   // default.
   execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
       "constant_folding");
+
+  execution_options_.mutable_debug_options()
+      ->set_xla_hlo_evaluator_use_fast_path(true);
 }
 
 ClientLibraryTestBase::ClientLibraryTestBase(se::Platform* platform)
@@ -88,6 +91,9 @@ ClientLibraryTestBase::ClientLibraryTestBase(se::Platform* platform)
 
   execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes(
       "constant_folding");
+
+  execution_options_.mutable_debug_options()
+      ->set_xla_hlo_evaluator_use_fast_path(true);
 }
 
 string ClientLibraryTestBase::TestName() const {
diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc
index 989a7c705a..d57846e19b 100644
--- a/tensorflow/compiler/xla/tests/hlo_test_base.cc
+++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc
@@ -181,6 +181,7 @@ DebugOptions HloTestBase::GetDebugOptionsForTest() {
   // TODO(b/38354253): Change tests to use Parameters instead of Constants.
   debug_options.add_xla_disable_hlo_passes("constant_folding");
   debug_options.set_xla_gpu_max_kernel_unroll_factor(1);
+  debug_options.set_xla_hlo_evaluator_use_fast_path(true);
   return debug_options;
 }
 
diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto
index 8b894cc769..0e8fa73f81 100644
--- a/tensorflow/compiler/xla/xla.proto
+++ b/tensorflow/compiler/xla/xla.proto
@@ -227,7 +227,10 @@ message DebugOptions {
   // Dump HLO graphs as an HTML (DOT -> SVG inlined in HTML)
   bool xla_hlo_dump_as_html = 105;
 
-  // Next id: 106
+  // Enable fast math with eigen in the HLO evaluator.
+  bool xla_hlo_evaluator_use_fast_path = 106;
+
+  // Next id: 107
 
   // Extra options to pass to the compilation backend (e.g. LLVM); specific
   // interpretation of these values is left to the backend.
-- 
GitLab


From 04f51da57eb77486368c223412df2487c0f8f423 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 08:06:35 -0800
Subject: [PATCH 499/873] Add concrete functions to graph.

PiperOrigin-RevId: 225372823
---
 .../saved_model/function_serialization.py     | 79 ++++++++++---------
 tensorflow/python/saved_model/load_test.py    | 17 ++++
 tensorflow/python/saved_model/save.py         | 12 +++
 3 files changed, 72 insertions(+), 36 deletions(-)

diff --git a/tensorflow/python/saved_model/function_serialization.py b/tensorflow/python/saved_model/function_serialization.py
index 7cf82776bd..69f34f0fd6 100644
--- a/tensorflow/python/saved_model/function_serialization.py
+++ b/tensorflow/python/saved_model/function_serialization.py
@@ -23,49 +23,56 @@ from tensorflow.python.eager import function as defun_lib
 from tensorflow.python.saved_model import saved_object_graph_pb2
 
 
-def _serialize_polymorphic_function(function):
-  """Represents a PolymorphicFunction in a SavedModel.
-
-  Adds `function`'s concrete functions to the current graph.
-
-  Args:
-    function: A `PolymorphicFunction` to serialize.
-
-  Returns:
-    An unserialized `SavedPolymorphicFunction` protocol buffer object.
-  """
+def _serialize_polymorphic_function(polymorphic_function):
   monomorphic_functions = []
-  for signature in function._cached_input_signatures:  # pylint: disable=protected-access
-    if any(isinstance(arg, defun_lib.UnknownArgument) for arg in signature):
-      continue
-    concrete_function = function.get_concrete_function(*signature)
-    concrete_function.add_to_graph()
+  for concrete_function in list_all_concrete_functions(polymorphic_function):
     monomorphic_functions.append(
         saved_object_graph_pb2.SavedMonomorphicFunction(
             concrete_function=concrete_function.name))
-  return saved_object_graph_pb2.SavedPolymorphicFunction(
+  saved_polymorphic_function = saved_object_graph_pb2.SavedPolymorphicFunction(
       monomorphic_function=monomorphic_functions)
+  return saved_polymorphic_function
+
+
+def list_all_concrete_functions(polymorphic_function):
+  """Given a polymorphic function, returns all of its concrete functions."""
+  concrete_functions = []
+  for signature in polymorphic_function._cached_input_signatures:  # pylint: disable=protected-access
+    if any(isinstance(arg, defun_lib.UnknownArgument) for arg in signature):
+      continue
+    concrete_function = polymorphic_function.get_concrete_function(*signature)
+    concrete_functions.append(concrete_function)
+  return concrete_functions
+
+
+def list_all_polymorphic_functions(checkpointable_object):
+  """Given a checkpointable object, returns all of its polymorphic functions."""
+  polymorphic_functions = dict()
+  for attribute_name in dir(checkpointable_object):
+    try:
+      attribute_value = getattr(checkpointable_object, attribute_name, None)
+    except:  # pylint: disable=bare-except
+      # We really don't want to throw an exception just because some object's
+      # attribute accessor is broken.
+      attribute_value = None
+    # TODO(allenl): Consider de-duplicating functions which are referenced
+    # from multiple attributes.
+    if isinstance(attribute_value, def_function.PolymorphicFunction):
+      polymorphic_functions[attribute_name] = attribute_value
+  return polymorphic_functions
 
 
-def add_polymorphic_functions_to_object_graph_proto(
-    checkpointable_objects, saved_object_graph):
+def add_polymorphic_functions_to_object_graph_proto(checkpointable_objects,
+                                                    saved_object_graph):
   """Finds PolymorphicFunctions attached to objects and saves them."""
   existing_objects = list(zip(checkpointable_objects, saved_object_graph.nodes))
   for obj, obj_proto in existing_objects:
-    for attribute_name in dir(obj):
-      try:
-        attribute_value = getattr(obj, attribute_name, None)
-      except:  # pylint: disable=bare-except
-        # We really don't want to throw an exception just because some object's
-        # attribute accessor is broken.
-        attribute_value = None
-      # TODO(allenl): Consider de-duplicating functions which are referenced
-      # from multiple attributes.
-      if isinstance(attribute_value, def_function.PolymorphicFunction):
-        function_node_id = len(saved_object_graph.nodes)
-        function_node = saved_object_graph.nodes.add()
-        function_node.function.CopyFrom(
-            _serialize_polymorphic_function(attribute_value))
-        reference = obj_proto.children.add()
-        reference.node_id = function_node_id
-        reference.local_name = attribute_name
+    for name, polymorphic_function in list_all_polymorphic_functions(
+        obj).items():
+      function_node_id = len(saved_object_graph.nodes)
+      function_node = saved_object_graph.nodes.add()
+      function_node.function.CopyFrom(
+          _serialize_polymorphic_function(polymorphic_function))
+      reference = obj_proto.children.add()
+      reference.node_id = function_node_id
+      reference.local_name = name
diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py
index 6a10ac432d..303b8f66ef 100644
--- a/tensorflow/python/saved_model/load_test.py
+++ b/tensorflow/python/saved_model/load_test.py
@@ -98,6 +98,23 @@ class LoadTest(test.TestCase):
     self.assertEqual(imported.asset1.asset_path.numpy(),
                      imported.asset2.asset_path.numpy())
 
+  def test_only_implicit_signatures(self):
+    def func(x):
+      return 2 * x
+
+    root = tracking.Checkpointable()
+    root.f = def_function.function(func)
+
+    # Add two traces.
+    root.f(constant_op.constant(1.))
+    root.f(constant_op.constant(1))
+
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save.save(root, save_dir, signatures=dict())
+    imported = load.load(save_dir)
+
+    self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
+    self.assertEqual(14, imported.f(constant_op.constant(7)).numpy())
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 84d7b614d0..57c63f8cda 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -481,8 +481,20 @@ def _fill_meta_graph_def(meta_graph_def, obj, signature_functions,
   # variables, but want any operations associated with the save/restore to be in
   # the exported graph (thus the `to_graph` argument).
   saver = object_saver.freeze(object_map=object_map, to_graph=exported_graph)
+
+  # We must resolve the concrete function to add to MetaGraph while in eager
+  # mode.
+  concrete_functions = []
+  for accessible_object in accessible_objects:
+    for function in function_serialization.list_all_polymorphic_functions(
+        accessible_object).values():
+      concrete_functions.extend(
+          function_serialization.list_all_concrete_functions(function))
+
   with exported_graph.as_default():
     signatures = _generate_signatures(signature_functions, resource_map)
+    for concrete_function in concrete_functions:
+      concrete_function.add_to_graph()
     saver_def = saver.to_proto()
     meta_graph_def.saver_def.CopyFrom(saver_def)
   graph_def = exported_graph.as_graph_def(add_shapes=True)
-- 
GitLab


From de67c554a1496616e007548ca862ea74d7fc6ae3 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 13 Dec 2018 08:32:38 -0800
Subject: [PATCH 500/873] Use weak refs when caching symbols in the namespace,
 to avoid tripping circular reference detectors in tests.

PiperOrigin-RevId: 225376019
---
 .../python/autograph/converters/call_trees.py |  2 +-
 tensorflow/python/autograph/core/converter.py | 14 +++++++-
 .../python/autograph/core/converter_test.py   | 32 +++++++++++++++++++
 .../python/autograph/pyct/inspect_utils.py    |  4 +--
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/autograph/converters/call_trees.py b/tensorflow/python/autograph/converters/call_trees.py
index b1bfe04347..d4eb17e976 100644
--- a/tensorflow/python/autograph/converters/call_trees.py
+++ b/tensorflow/python/autograph/converters/call_trees.py
@@ -183,7 +183,7 @@ class CallTreeTransformer(converter.Base):
       for dec in target_node.decorator_list:
         decorator_fn = self._resolve_decorator_name(dec)
         if (decorator_fn is not None and
-            decorator_fn in self.ctx.program.options.strip_decorators):
+            self.ctx.program.options.should_strip(decorator_fn)):
           return False
 
     return True
diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index b9c2449566..4543b11398 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -63,6 +63,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import weakref
+
 import enum
 
 from tensorflow.python.autograph.core import config
@@ -175,6 +177,16 @@ class ConversionOptions(object):
     # TODO(mdan): Revert if function.defun becomes a public symbol.
     return self._strip_decorators + (function.defun,)
 
+  def should_strip(self, decorator):
+    for blacklisted in self.strip_decorators:
+      if blacklisted is decorator:
+        return True
+      if isinstance(blacklisted, weakref.ref):
+        blacklisted_deref = blacklisted()
+        if (blacklisted_deref is not None and blacklisted_deref is decorator):
+          return True
+    return False
+
   def uses(self, feature):
     return (Feature.ALL in self.optional_features or
             feature in self.optional_features)
@@ -208,7 +220,7 @@ class ConversionOptions(object):
       if not name:
         # TODO(mdan): This needs to account for the symbols defined locally.
         name = ctx.namer.new_symbol(o.__name__, ())
-        ctx.program.add_symbol(name, o)
+        ctx.program.add_symbol(name, weakref.ref(o))
       return name
 
     def list_of_names(values):
diff --git a/tensorflow/python/autograph/core/converter_test.py b/tensorflow/python/autograph/core/converter_test.py
index b73c67e337..864ea6c7d2 100644
--- a/tensorflow/python/autograph/core/converter_test.py
+++ b/tensorflow/python/autograph/core/converter_test.py
@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import weakref
+
 from tensorflow.python.autograph.core import converter
 from tensorflow.python.autograph.core import converter_testing
 from tensorflow.python.autograph.pyct import anno
@@ -29,6 +31,36 @@ class TestConverter(converter.Base):
   pass
 
 
+class ConversionOptionsTest(test.TestCase):
+
+  def test_should_strip_weakrefs(self):
+    def test_fn():
+      pass
+
+    def weak_test_fn_a():
+      pass
+
+    def weak_test_fn_b():
+      pass
+
+    def weak_test_fn_c():
+      pass
+
+    wr_a = weakref.ref(weak_test_fn_a)
+    # Create an extra weakref to check whether the existence of multiple weak
+    # references influences the process.
+    _ = weakref.ref(weak_test_fn_b)
+    wr_b = weakref.ref(weak_test_fn_b)
+    _ = weakref.ref(weak_test_fn_c)
+
+    opts = converter.ConversionOptions(strip_decorators=(test_fn, wr_a, wr_b))
+
+    self.assertTrue(opts.should_strip(test_fn))
+    self.assertTrue(opts.should_strip(weak_test_fn_a))
+    self.assertTrue(opts.should_strip(weak_test_fn_b))
+    self.assertFalse(opts.should_strip(weak_test_fn_c))
+
+
 class ConverterBaseTest(converter_testing.TestCase):
 
   def test_get_definition_directive_basic(self):
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 56945b464b..360dd83b5e 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -101,7 +101,7 @@ def getnamespace(f):
   return namespace
 
 
-def getqualifiedname(namespace, object_, max_depth=7, visited=None):
+def getqualifiedname(namespace, object_, max_depth=5, visited=None):
   """Returns the name by which a value can be referred to in a given namespace.
 
   If the object defines a parent module, the function attempts to use it to
@@ -149,7 +149,7 @@ def getqualifiedname(namespace, object_, max_depth=7, visited=None):
     # Iterating over a copy prevents "changed size due to iteration" errors.
     # It's unclear why those occur - suspecting new modules may load during
     # iteration.
-    for name in tuple(namespace.keys()):
+    for name in namespace.keys():
       value = namespace[name]
       if tf_inspect.ismodule(value) and id(value) not in visited:
         visited.add(id(value))
-- 
GitLab


From 20fee25c54dfb6fa6378548d410de429d69027a2 Mon Sep 17 00:00:00 2001
From: Shashi Shekhar <shashishekhar@google.com>
Date: Thu, 13 Dec 2018 08:39:23 -0800
Subject: [PATCH 501/873] Fix instructions for building demo.

Add --cxxopt='--std=c++11' to instructions.

PiperOrigin-RevId: 225376880
---
 tensorflow/examples/android/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/android/README.md b/tensorflow/examples/android/README.md
index 82bc3ffda9..4e4e1685f6 100644
--- a/tensorflow/examples/android/README.md
+++ b/tensorflow/examples/android/README.md
@@ -180,7 +180,7 @@ After editing your WORKSPACE file to update the SDK/NDK configuration, you may
 build the APK. Run this from your workspace root:
 
 ```bash
-bazel build -c opt //tensorflow/examples/android:tensorflow_demo
+bazel build --cxxopt='--std=c++11' -c opt //tensorflow/examples/android:tensorflow_demo
 ```
 
 ##### Install
-- 
GitLab


From f260cb043a7877657ec0b25d21b023aa3fdcc7cf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 08:43:01 -0800
Subject: [PATCH 502/873] Internal change.

PiperOrigin-RevId: 225377288
---
 tensorflow/compiler/xla/BUILD        | 2 +-
 tensorflow/compiler/xla/client/BUILD | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index aa5776a912..0a20ddf662 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -109,7 +109,7 @@ cc_library(
     name = "status_macros",
     srcs = ["status_macros.cc"],
     hdrs = ["status_macros.h"],
-    visibility = [":friends"],
+    visibility = ["//visibility:public"],
     deps = [
         ":statusor",
         ":types",
diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index fe99564d3c..e61d9d2520 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -3,7 +3,7 @@
 
 licenses(["notice"])  # Apache 2.0
 
-package(default_visibility = [":friends"])
+package(default_visibility = ["//visibility:public"])
 
 package_group(
     name = "friends",
-- 
GitLab


From e3f1e41ac6e5dc529b6d40730a0e0298223b3cc8 Mon Sep 17 00:00:00 2001
From: Karmel Allison <karmel@google.com>
Date: Thu, 13 Dec 2018 09:06:55 -0800
Subject: [PATCH 503/873] Refactoring metrics tests.

PiperOrigin-RevId: 225380695
---
 tensorflow/python/keras/BUILD                 |  18 ++-
 .../python/keras/metrics_functional_test.py   | 122 ++++++++++++++++++
 tensorflow/python/keras/metrics_test.py       | 113 ++--------------
 3 files changed, 146 insertions(+), 107 deletions(-)
 create mode 100644 tensorflow/python/keras/metrics_functional_test.py

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 7c3fca2c82..ca44a7bbe9 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -300,16 +300,24 @@ py_test(
     ],
 )
 
+py_test(
+    name = "metrics_functional_test",
+    size = "medium",
+    srcs = ["metrics_functional_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":keras",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "metrics_test",
     size = "medium",
     srcs = ["metrics_test.py"],
+    shard_count = 4,
     srcs_version = "PY2AND3",
-    tags = [
-        "manual",
-        "no_oss",
-        "notap",
-    ],
     deps = [
         ":keras",
         "//tensorflow/python:client_testlib",
diff --git a/tensorflow/python/keras/metrics_functional_test.py b/tensorflow/python/keras/metrics_functional_test.py
new file mode 100644
index 0000000000..513daaf9fc
--- /dev/null
+++ b/tensorflow/python/keras/metrics_functional_test.py
@@ -0,0 +1,122 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for Keras metrics functions."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.python.eager import context
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import metrics
+from tensorflow.python.platform import test
+
+
+class KerasFunctionalMetricsTest(test.TestCase):
+
+  def test_metrics(self):
+    with self.cached_session():
+      y_a = K.variable(np.random.random((6, 7)))
+      y_b = K.variable(np.random.random((6, 7)))
+      for metric in [metrics.binary_accuracy, metrics.categorical_accuracy]:
+        output = metric(y_a, y_b)
+        self.assertEqual(K.eval(output).shape, (6,))
+
+  def test_sparse_categorical_accuracy_int(self):
+    with self.cached_session():
+      metric = metrics.sparse_categorical_accuracy
+      y_true = K.variable(np.random.randint(0, 7, (6,)))
+      y_pred = K.variable(np.random.random((6, 7)))
+      self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,))
+
+      # Test correctness if the shape of y_true is (num_samples,)
+      y_true = K.variable([1., 0., 0., 0.])
+      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
+      print(K.eval(metric(y_true, y_pred)))
+      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
+
+      # Test correctness if the shape of y_true is (num_samples, 1)
+      y_true = K.variable([[1.], [0.], [0.], [0.]])
+      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
+      print(K.eval(metric(y_true, y_pred)))
+      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
+
+  def test_sparse_categorical_accuracy_float(self):
+    with self.cached_session():
+      metric = metrics.sparse_categorical_accuracy
+      y_true = K.variable(np.random.random((6,)))
+      y_pred = K.variable(np.random.random((6, 7)))
+      self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,))
+
+  def test_sparse_categorical_accuracy_eager(self):
+    """Tests that ints passed in via Eager return results. See b/113504761."""
+    with context.eager_mode():
+      metric = metrics.sparse_categorical_accuracy
+      y_true = np.arange(6).reshape([6, 1])
+      y_pred = np.arange(36).reshape([6, 6])
+      self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.])
+
+  def test_sparse_categorical_accuracy_float_eager(self):
+    """Tests that floats passed in via Eager return results. See b/113504761."""
+    with context.eager_mode():
+      metric = metrics.sparse_categorical_accuracy
+      y_true = np.arange(6, dtype=np.float32).reshape([6, 1])
+      y_pred = np.arange(36).reshape([6, 6])
+      self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.])
+
+  def test_sparse_top_k_categorical_accuracy(self):
+    with self.cached_session():
+      # Test correctness if the shape of y_true is (num_samples, 1)
+      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
+      y_true = K.variable(np.array([[1], [0]]))
+      result = K.eval(
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
+      self.assertEqual(result, 1)
+      result = K.eval(
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
+      self.assertEqual(result, 0.5)
+      result = K.eval(
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
+      self.assertEqual(result, 0.)
+
+      # Test correctness if the shape of y_true is (num_samples,)
+      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
+      y_true = K.variable(np.array([1, 0]))
+      result = K.eval(
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
+      self.assertEqual(result, 1)
+      result = K.eval(
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
+      self.assertEqual(result, 0.5)
+      result = K.eval(
+          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
+      self.assertEqual(result, 0.)
+
+  def test_top_k_categorical_accuracy(self):
+    with self.cached_session():
+      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
+      y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]]))
+      result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=3))
+      self.assertEqual(result, 1)
+      result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=2))
+      self.assertEqual(result, 0.5)
+      result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=1))
+      self.assertEqual(result, 0.)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 9cad948966..1f13a97d75 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -27,7 +27,6 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
-from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import layers
 from tensorflow.python.keras import metrics
 from tensorflow.python.keras.models import Sequential
@@ -40,98 +39,11 @@ from tensorflow.python.training.checkpointable import util as checkpointable_uti
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
-class KerasMetricsTest(test.TestCase):
-
-  def test_metrics(self):
-    with self.cached_session():
-      y_a = K.variable(np.random.random((6, 7)))
-      y_b = K.variable(np.random.random((6, 7)))
-      for metric in [metrics.binary_accuracy, metrics.categorical_accuracy]:
-        output = metric(y_a, y_b)
-        self.assertEqual(K.eval(output).shape, (6,))
-
-  def test_sparse_categorical_accuracy_int(self):
-    with self.cached_session():
-      metric = metrics.sparse_categorical_accuracy
-      y_true = K.variable(np.random.randint(0, 7, (6,)))
-      y_pred = K.variable(np.random.random((6, 7)))
-      self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,))
-
-      # Test correctness if the shape of y_true is (num_samples,)
-      y_true = K.variable([1., 0., 0., 0.])
-      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
-      print(K.eval(metric(y_true, y_pred)))
-      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
-
-      # Test correctness if the shape of y_true is (num_samples, 1)
-      y_true = K.variable([[1.], [0.], [0.], [0.]])
-      y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]])
-      print(K.eval(metric(y_true, y_pred)))
-      self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.])
-
-  def test_sparse_categorical_accuracy_float(self):
-    with self.cached_session():
-      metric = metrics.sparse_categorical_accuracy
-      y_true = K.variable(np.random.random((6,)))
-      y_pred = K.variable(np.random.random((6, 7)))
-      self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,))
-
-  def test_sparse_categorical_accuracy_eager(self):
-    """Tests that ints passed in via Eager return results. See b/113504761."""
-    with context.eager_mode():
-      metric = metrics.sparse_categorical_accuracy
-      y_true = np.arange(6).reshape([6, 1])
-      y_pred = np.arange(36).reshape([6, 6])
-      self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.])
-
-  def test_sparse_categorical_accuracy_float_eager(self):
-    """Tests that floats passed in via Eager return results. See b/113504761."""
-    with context.eager_mode():
-      metric = metrics.sparse_categorical_accuracy
-      y_true = np.arange(6, dtype=np.float32).reshape([6, 1])
-      y_pred = np.arange(36).reshape([6, 6])
-      self.assertAllEqual(metric(y_true, y_pred), [0., 0., 0., 0., 0., 1.])
-
-  def test_sparse_top_k_categorical_accuracy(self):
-    with self.cached_session():
-      # Test correctness if the shape of y_true is (num_samples, 1)
-      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
-      y_true = K.variable(np.array([[1], [0]]))
-      result = K.eval(
-          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
-      self.assertEqual(result, 1)
-      result = K.eval(
-          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
-      self.assertEqual(result, 0.5)
-      result = K.eval(
-          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
-      self.assertEqual(result, 0.)
-
-      # Test correctness if the shape of y_true is (num_samples,)
-      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
-      y_true = K.variable(np.array([1, 0]))
-      result = K.eval(
-          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
-      self.assertEqual(result, 1)
-      result = K.eval(
-          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
-      self.assertEqual(result, 0.5)
-      result = K.eval(
-          metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
-      self.assertEqual(result, 0.)
-
-  def test_top_k_categorical_accuracy(self):
-    with self.cached_session():
-      y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
-      y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]]))
-      result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=3))
-      self.assertEqual(result, 1)
-      result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=2))
-      self.assertEqual(result, 0.5)
-      result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, k=1))
-      self.assertEqual(result, 0.)
-
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+@test_util.run_all_in_graph_and_eager_modes
+class KerasMeanTest(test.TestCase):
+
+  # TODO(b/120949004): Re-enable garbage collection check
+  # @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
   def test_mean(self):
     m = metrics.Mean(name='my_mean')
 
@@ -163,7 +75,6 @@ class KerasMetricsTest(test.TestCase):
     self.assertEqual(self.evaluate(m.total), 0)
     self.assertEqual(self.evaluate(m.count), 0)
 
-  @test_util.run_in_graph_and_eager_modes
   def test_mean_with_sample_weight(self):
     m = metrics.Mean(dtype=dtypes.float64)
     self.assertEqual(m.dtype, dtypes.float64)
@@ -227,7 +138,6 @@ class KerasMetricsTest(test.TestCase):
       self.assertAlmostEqual(self.evaluate(m.count), 1.7, 2)  # 0.5 + 1.2
       self.assertAlmostEqual(result, 52 / 1.7, 2)
 
-  @test_util.run_in_graph_and_eager_modes
   def test_save_restore(self):
     checkpoint_directory = self.get_temp_dir()
     checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt')
@@ -258,7 +168,10 @@ class KerasMetricsTest(test.TestCase):
     self.assertEqual(200., self.evaluate(restore_mean.result()))
     self.assertEqual(3, self.evaluate(restore_mean.count))
 
-  @test_util.run_in_graph_and_eager_modes
+
+@test_util.run_all_in_graph_and_eager_modes
+class KerasAccuracyTest(test.TestCase):
+
   def test_accuracy(self):
     acc_obj = metrics.Accuracy(name='my acc')
 
@@ -280,7 +193,6 @@ class KerasMetricsTest(test.TestCase):
     result = self.evaluate(result_t)
     self.assertAlmostEqual(result, 0.96, 2)  # 4.5/4.7
 
-  @test_util.run_in_graph_and_eager_modes
   def test_binary_accuracy(self):
     acc_obj = metrics.BinaryAccuracy(name='my acc')
 
@@ -313,7 +225,6 @@ class KerasMetricsTest(test.TestCase):
     result = self.evaluate(result_t)
     self.assertAlmostEqual(result, 0.67, 2)  # 4.5/6.7
 
-  @test_util.run_in_graph_and_eager_modes
   def test_binary_accuracy_threshold(self):
     acc_obj = metrics.BinaryAccuracy(threshold=0.7)
     self.evaluate(variables.variables_initializer(acc_obj.variables))
@@ -321,7 +232,6 @@ class KerasMetricsTest(test.TestCase):
     result = self.evaluate(result_t)
     self.assertAlmostEqual(result, 0.5, 2)
 
-  @test_util.run_in_graph_and_eager_modes
   def test_categorical_accuracy(self):
     acc_obj = metrics.CategoricalAccuracy(name='my acc')
 
@@ -345,7 +255,6 @@ class KerasMetricsTest(test.TestCase):
     result = self.evaluate(result_t)
     self.assertAlmostEqual(result, 0.93, 2)  # 2.5/2.7
 
-  @test_util.run_in_graph_and_eager_modes
   def test_sparse_categorical_accuracy(self):
     acc_obj = metrics.SparseCategoricalAccuracy(name='my acc')
 
@@ -689,7 +598,7 @@ class PrecisionTest(test.TestCase):
   def test_config(self):
     p_obj = metrics.Precision(name='my_precision', thresholds=[0.4, 0.9])
     self.assertEqual(p_obj.name, 'my_precision')
-    self.assertLen(p_obj.variables, 2)
+    self.assertEqual(len(p_obj.variables), 2)
     self.assertEqual([v.name for v in p_obj.variables],
                      ['true_positives:0', 'false_positives:0'])
     self.assertEqual(p_obj.thresholds, [0.4, 0.9])
@@ -813,7 +722,7 @@ class RecallTest(test.TestCase):
   def test_config(self):
     r_obj = metrics.Recall(name='my_recall', thresholds=[0.4, 0.9])
     self.assertEqual(r_obj.name, 'my_recall')
-    self.assertLen(r_obj.variables, 2)
+    self.assertEqual(len(r_obj.variables), 2)
     self.assertEqual([v.name for v in r_obj.variables],
                      ['true_positives:0', 'false_negatives:0'])
     self.assertEqual(r_obj.thresholds, [0.4, 0.9])
-- 
GitLab


From 5c254e247af7f2c785e266db58e0e99f204f8461 Mon Sep 17 00:00:00 2001
From: Tamara Norman <tamaranorman@google.com>
Date: Thu, 13 Dec 2018 09:32:28 -0800
Subject: [PATCH 504/873] Fix typo in doc strings

PiperOrigin-RevId: 225384221
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 3111ef6771..82ed7da830 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -160,7 +160,7 @@ def enable_v2_behavior():
 
 @tf_export(v1=["disable_v2_behavior"])
 def disable_v2_behavior():
-  """Enables TensorFlow 2.x behaviors.
+  """Disables TensorFlow 2.x behaviors.
 
   This function can be called at the beginning of the program (before `Tensors`,
   `Graphs` or other structures have been created, and before devices have been
-- 
GitLab


From a1ff9d7a2e4170af288e69934fd33980f9e5e40f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 09:53:10 -0800
Subject: [PATCH 505/873] Clarify how to run tf_upgrade_v2.

PiperOrigin-RevId: 225387352
---
 tensorflow/tools/compatibility/README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/compatibility/README.md b/tensorflow/tools/compatibility/README.md
index 6ff42b1fef..5e2de35338 100644
--- a/tensorflow/tools/compatibility/README.md
+++ b/tensorflow/tools/compatibility/README.md
@@ -7,7 +7,7 @@ Specifically: \
 
 ## Running the script from pip package
 
-First, install TensorFlow pip package. See
+First, install TensorFlow pip package*. See
 https://www.tensorflow.org/install/pip.
 
 Upgrade script can be run on a single Python file:
@@ -27,6 +27,8 @@ tf_upgrade_v2 --intree coolcode --outtree coolcode-upgraded
 tf_upgrade_v2 --intree coolcode --outtree coolcode-upgraded --copyotherfiles False
 ```
 
+*Note: `tf_upgrade_v2` is installed automatically as a script by the pip install 
+after TensorFlow 1.12.
 
 ## Report
 
-- 
GitLab


From 07136e8fe3a77f8889c0ee95c78b15f01f05eb49 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 10:00:12 -0800
Subject: [PATCH 506/873] Internal Change

PiperOrigin-RevId: 225388468
---
 .../kernel_tests/unicode_decode_op_test.py    |  92 ++--
 tensorflow/python/ops/ragged/BUILD            | 125 ++++--
 tensorflow/python/ops/ragged/__init__.py      | 171 ++------
 ...vert_to_tensor_or_ragged_tensor_op_test.py |  77 ++--
 .../python/ops/ragged/ragged_array_ops.py     |  39 +-
 .../ops/ragged/ragged_batch_gather_op_test.py | 146 +++---
 .../ops/ragged/ragged_boolean_mask_op_test.py | 149 ++++---
 .../ops/ragged/ragged_concat_op_test.py       |  22 +-
 .../python/ops/ragged/ragged_const_op_test.py |  20 +-
 .../ragged/ragged_constant_value_op_test.py   |  17 +-
 .../python/ops/ragged/ragged_dispatch_test.py | 415 +++++++++++-------
 .../python/ops/ragged/ragged_eager_test.py    |   6 +-
 .../ops/ragged/ragged_expand_dims_op_test.py  |  11 +-
 .../ops/ragged/ragged_from_sparse_op_test.py  |   2 +-
 .../ops/ragged/ragged_from_tensor_op_test.py  |   2 +-
 .../ops/ragged/ragged_gather_nd_op_test.py    | 147 ++++---
 .../ops/ragged/ragged_gather_op_test.py       |  66 +--
 .../ragged/ragged_map_flat_values_op_test.py  |  89 ++--
 .../ops/ragged/ragged_map_fn_op_test.py       | 103 +++--
 .../ops/ragged/ragged_operators_test.py       |  16 +-
 .../python/ops/ragged/ragged_range_op_test.py |  56 +--
 .../ops/ragged/ragged_reduce_op_test.py       | 104 ++---
 .../ops/ragged/ragged_row_lengths_op_test.py  |   9 +-
 ...agged_row_splits_to_segment_ids_op_test.py |  16 +-
 ...agged_segment_ids_to_row_splits_op_test.py |  20 +-
 .../ops/ragged/ragged_segment_op_test.py      | 126 +++---
 .../python/ops/ragged/ragged_stack_op_test.py |  16 +-
 .../ragged_tensor_bounding_shape_op_test.py   |  22 +-
 .../ops/ragged/ragged_tensor_shape_test.py    | 234 +++++-----
 .../python/ops/ragged/ragged_tensor_test.py   |  36 +-
 .../python/ops/ragged/ragged_test_util.py     |  21 +-
 .../python/ops/ragged/ragged_tile_op_test.py  |  11 +-
 .../ops/ragged/ragged_to_sparse_op_test.py    |  56 +--
 .../ops/ragged/ragged_to_tensor_op_test.py    |   8 +-
 .../python/ops/ragged/ragged_where_op_test.py |  92 ++--
 35 files changed, 1353 insertions(+), 1189 deletions(-)

diff --git a/tensorflow/python/kernel_tests/unicode_decode_op_test.py b/tensorflow/python/kernel_tests/unicode_decode_op_test.py
index c3b4370499..9a59f8a7ac 100644
--- a/tensorflow/python/kernel_tests/unicode_decode_op_test.py
+++ b/tensorflow/python/kernel_tests/unicode_decode_op_test.py
@@ -29,7 +29,8 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_string_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_string_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import test
 
@@ -95,25 +96,25 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
 
   def testScalarDecode(self):
     text = constant_op.constant(u"仅今年前".encode("utf-8"))
-    chars = ragged.unicode_decode(text, "utf-8")
+    chars = ragged_string_ops.unicode_decode(text, "utf-8")
     self.assertAllEqual(chars, [ord(c) for c in u"仅今年前"])
 
   def testScalarDecodeWithOffset(self):
     text = constant_op.constant(u"仅今年前".encode("utf-8"))
-    chars, starts = ragged.unicode_decode_with_offsets(text, "utf-8")
+    chars, starts = ragged_string_ops.unicode_decode_with_offsets(text, "utf-8")
     self.assertAllEqual(chars, [ord(c) for c in u"仅今年前"])
     self.assertAllEqual(starts, [0, 3, 6, 9])
 
   def testVectorDecode(self):
     text = constant_op.constant([u"仅今年前".encode("utf-8"), b"hello"])
-    chars = ragged.unicode_decode(text, "utf-8")
+    chars = ragged_string_ops.unicode_decode(text, "utf-8")
     expected_chars = [[ord(c) for c in u"仅今年前"],
                       [ord(c) for c in u"hello"]]
     self.assertRaggedEqual(chars, expected_chars)
 
   def testVectorDecodeWithOffset(self):
     text = constant_op.constant([u"仅今年前".encode("utf-8"), b"hello"])
-    chars, starts = ragged.unicode_decode_with_offsets(text, "utf-8")
+    chars, starts = ragged_string_ops.unicode_decode_with_offsets(text, "utf-8")
     expected_chars = [[ord(c) for c in u"仅今年前"],
                       [ord(c) for c in u"hello"]]
     self.assertRaggedEqual(chars, expected_chars)
@@ -129,9 +130,9 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
       {"texts": []}
   ])  # pyformat: disable
   def testBasicDecode(self, texts, ragged_rank=None):
-    input_tensor = ragged.constant_value(
+    input_tensor = ragged_factory_ops.constant_value(
         _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
-    result = ragged.unicode_decode(input_tensor, "UTF-8")
+    result = ragged_string_ops.unicode_decode(input_tensor, "UTF-8")
     expected = _nested_codepoints(texts)
     self.assertRaggedEqual(expected, result)
 
@@ -145,9 +146,10 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
       {"texts": []}
   ])  # pyformat: disable
   def testBasicDecodeWithOffsets(self, texts, ragged_rank=None):
-    input_tensor = ragged.constant_value(
+    input_tensor = ragged_factory_ops.constant_value(
         _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
-    result = ragged.unicode_decode_with_offsets(input_tensor, "UTF-8")
+    result = ragged_string_ops.unicode_decode_with_offsets(
+        input_tensor, "UTF-8")
     expected_codepoints = _nested_codepoints(texts)
     expected_offsets = _nested_offsets(texts, "UTF-8")
     self.assertRaggedEqual(expected_codepoints, result[0])
@@ -155,8 +157,9 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
 
   def testDocstringExamples(self):
     texts = [s.encode("utf8") for s in [u"G\xf6\xf6dnight", u"\U0001f60a"]]
-    codepoints1 = ragged.unicode_decode(texts, "UTF-8")
-    codepoints2, offsets = ragged.unicode_decode_with_offsets(texts, "UTF-8")
+    codepoints1 = ragged_string_ops.unicode_decode(texts, "UTF-8")
+    codepoints2, offsets = ragged_string_ops.unicode_decode_with_offsets(
+        texts, "UTF-8")
     self.assertRaggedEqual(
         codepoints1, [[71, 246, 246, 100, 110, 105, 103, 104, 116], [128522]])
     self.assertRaggedEqual(
@@ -184,8 +187,7 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
   ])
   def testDecodeWithSparseOutput(self, texts, expected):
     input_tensor = np.array(_nested_encode(texts, "UTF-8"), dtype=bytes)
-    result = ragged.unicode_decode(
-        input_tensor, "UTF-8").to_sparse()
+    result = ragged_string_ops.unicode_decode(input_tensor, "UTF-8").to_sparse()
     self.assertIsInstance(result, sparse_tensor.SparseTensor)
     self.assertAllEqual(expected.indices, result.indices)
     self.assertAllEqual(expected.values, result.values)
@@ -219,9 +221,9 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
       dict(texts=[], expected=np.zeros([0, 0], np.int64)),
   ])  # pyformat: disable
   def testDecodeWithPaddedOutput(self, texts, expected, ragged_rank=None):
-    input_tensor = ragged.constant_value(
+    input_tensor = ragged_factory_ops.constant_value(
         _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
-    result = ragged.unicode_decode(
+    result = ragged_string_ops.unicode_decode(
         input_tensor, "UTF-8").to_tensor(default_value=-1)
     self.assertAllEqual(expected, result)
 
@@ -260,7 +262,7 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
                     [61, 61, 0, 61, 61], [119, 111, 114, 108, 100]]),
   ])  # pyformat: disable
   def testErrorModes(self, expected=None, **args):
-    result = ragged.unicode_decode(**args)
+    result = ragged_string_ops.unicode_decode(**args)
     self.assertRaggedEqual(expected, result)
 
   @parameterized.parameters([
@@ -311,7 +313,7 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
                                 expected=None,
                                 expected_offsets=None,
                                 **args):
-    result = ragged.unicode_decode_with_offsets(**args)
+    result = ragged_string_ops.unicode_decode_with_offsets(**args)
     self.assertRaggedEqual(result[0], expected)
     self.assertRaggedEqual(result[1], expected_offsets)
 
@@ -326,7 +328,7 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
   def testDecodeWithDifferentEncodings(self, encoding, texts):
     expected = _nested_codepoints(texts)
     input_tensor = constant_op.constant(_nested_encode(texts, encoding))
-    result = ragged.unicode_decode(input_tensor, encoding)
+    result = ragged_string_ops.unicode_decode(input_tensor, encoding)
     self.assertRaggedEqual(expected, result)
 
   @parameterized.parameters(
@@ -341,7 +343,8 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
     expected_codepoints = _nested_codepoints(texts)
     expected_offsets = _nested_offsets(texts, encoding)
     input_tensor = constant_op.constant(_nested_encode(texts, encoding))
-    result = ragged.unicode_decode_with_offsets(input_tensor, encoding)
+    result = ragged_string_ops.unicode_decode_with_offsets(
+        input_tensor, encoding)
     self.assertRaggedEqual(expected_codepoints, result[0])
     self.assertRaggedEqual(expected_offsets, result[1])
 
@@ -363,14 +366,15 @@ class UnicodeDecodeTest(ragged_test_util.RaggedTensorTestCase,
   ])  # pyformat: disable
   def testExceptions(self, exception=None, message=None, **args):
     with self.assertRaisesRegexp(exception, message):
-      self.evaluate(ragged.unicode_decode(**args))
+      self.evaluate(ragged_string_ops.unicode_decode(**args))
 
   def testUnknownRankError(self):
-    if context.executing_eagerly(): return
+    if context.executing_eagerly():
+      return
     s = array_ops.placeholder(dtypes.string)
     message = "Rank of `input` must be statically known."
     with self.assertRaisesRegexp(ValueError, message):
-      self.evaluate(ragged.unicode_decode(s, input_encoding="UTF-8"))
+      self.evaluate(ragged_string_ops.unicode_decode(s, input_encoding="UTF-8"))
 
   @parameterized.parameters([
       dict(
@@ -424,25 +428,25 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
 
   def testScalarSplit(self):
     text = constant_op.constant(u"仅今年前".encode("UTF-8"))
-    chars = ragged.unicode_split(text, "UTF-8")
+    chars = ragged_string_ops.unicode_split(text, "UTF-8")
     self.assertAllEqual(chars, [c.encode("UTF-8") for c in u"仅今年前"])
 
   def testScalarSplitWithOffset(self):
     text = constant_op.constant(u"仅今年前".encode("UTF-8"))
-    chars, starts = ragged.unicode_split_with_offsets(text, "UTF-8")
+    chars, starts = ragged_string_ops.unicode_split_with_offsets(text, "UTF-8")
     self.assertAllEqual(chars, [c.encode("UTF-8") for c in u"仅今年前"])
     self.assertAllEqual(starts, [0, 3, 6, 9])
 
   def testVectorSplit(self):
     text = constant_op.constant([u"仅今年前".encode("UTF-8"), b"hello"])
-    chars = ragged.unicode_split(text, "UTF-8")
+    chars = ragged_string_ops.unicode_split(text, "UTF-8")
     expected_chars = [[c.encode("UTF-8") for c in u"仅今年前"],
                       [c.encode("UTF-8") for c in u"hello"]]
     self.assertRaggedEqual(chars, expected_chars)
 
   def testVectorSplitWithOffset(self):
     text = constant_op.constant([u"仅今年前".encode("UTF-8"), b"hello"])
-    chars, starts = ragged.unicode_split_with_offsets(text, "UTF-8")
+    chars, starts = ragged_string_ops.unicode_split_with_offsets(text, "UTF-8")
     expected_chars = [[c.encode("UTF-8") for c in u"仅今年前"],
                       [c.encode("UTF-8") for c in u"hello"]]
     self.assertRaggedEqual(chars, expected_chars)
@@ -458,9 +462,9 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
       {"texts": []}
   ])  # pyformat: disable
   def testBasicSplit(self, texts, ragged_rank=None):
-    input_tensor = ragged.constant_value(
+    input_tensor = ragged_factory_ops.constant_value(
         _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
-    result = ragged.unicode_split(input_tensor, "UTF-8")
+    result = ragged_string_ops.unicode_split(input_tensor, "UTF-8")
     expected = _nested_splitchars(texts, "UTF-8")
     self.assertRaggedEqual(expected, result)
 
@@ -474,9 +478,9 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
       {"texts": []}
   ])  # pyformat: disable
   def testBasicSplitWithOffsets(self, texts, ragged_rank=None):
-    input_tensor = ragged.constant_value(
+    input_tensor = ragged_factory_ops.constant_value(
         _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
-    result = ragged.unicode_split_with_offsets(input_tensor, "UTF-8")
+    result = ragged_string_ops.unicode_split_with_offsets(input_tensor, "UTF-8")
     expected_codepoints = _nested_splitchars(texts, "UTF-8")
     expected_offsets = _nested_offsets(texts, "UTF-8")
     self.assertRaggedEqual(expected_codepoints, result[0])
@@ -484,8 +488,9 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
 
   def testDocstringExamples(self):
     texts = [s.encode("utf8") for s in [u"G\xf6\xf6dnight", u"\U0001f60a"]]
-    codepoints1 = ragged.unicode_split(texts, "UTF-8")
-    codepoints2, offsets = ragged.unicode_split_with_offsets(texts, "UTF-8")
+    codepoints1 = ragged_string_ops.unicode_split(texts, "UTF-8")
+    codepoints2, offsets = ragged_string_ops.unicode_split_with_offsets(
+        texts, "UTF-8")
     self.assertRaggedEqual(
         codepoints1,
         [[b"G", b"\xc3\xb6", b"\xc3\xb6", b"d", b"n", b"i", b"g", b"h", b"t"],
@@ -522,8 +527,7 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
   ])  # pyformat: disable
   def testSplitWithSparseOutput(self, texts, expected):
     input_tensor = np.array(_nested_encode(texts, "UTF-8"), dtype=bytes)
-    result = ragged.unicode_split(
-        input_tensor, "UTF-8").to_sparse()
+    result = ragged_string_ops.unicode_split(input_tensor, "UTF-8").to_sparse()
     self.assertIsInstance(result, sparse_tensor.SparseTensor)
     self.assertAllEqual(expected.indices, result.indices)
     self.assertAllEqual(expected.values, result.values)
@@ -565,9 +569,9 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
       dict(texts=[], expected=np.zeros([0, 0], np.int64)),
   ])  # pyformat: disable
   def testSplitWithPaddedOutput(self, texts, expected, ragged_rank=None):
-    input_tensor = ragged.constant_value(
+    input_tensor = ragged_factory_ops.constant_value(
         _nested_encode(texts, "UTF-8"), ragged_rank=ragged_rank, dtype=bytes)
-    result = ragged.unicode_split(
+    result = ragged_string_ops.unicode_split(
         input_tensor, "UTF-8").to_tensor(default_value="")
     self.assertAllEqual(np.array(expected, dtype=bytes), result)
 
@@ -599,7 +603,7 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
                     [b"w", b"o", b"r", b"l", b"d"]]),
   ])  # pyformat: disable
   def testErrorModes(self, expected=None, **args):
-    result = ragged.unicode_split(**args)
+    result = ragged_string_ops.unicode_split(**args)
     self.assertRaggedEqual(expected, result)
 
   @parameterized.parameters([
@@ -639,7 +643,7 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
                                 expected=None,
                                 expected_offsets=None,
                                 **args):
-    result = ragged.unicode_split_with_offsets(**args)
+    result = ragged_string_ops.unicode_split_with_offsets(**args)
     self.assertRaggedEqual(expected, result[0])
     self.assertRaggedEqual(expected_offsets, result[1])
 
@@ -651,7 +655,7 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
   def testSplitWithDifferentEncodings(self, encoding, texts):
     expected = _nested_splitchars(texts, encoding)
     input_tensor = constant_op.constant(_nested_encode(texts, encoding))
-    result = ragged.unicode_split(input_tensor, encoding)
+    result = ragged_string_ops.unicode_split(input_tensor, encoding)
     self.assertRaggedEqual(expected, result)
 
   @parameterized.parameters(
@@ -663,7 +667,8 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
     expected_codepoints = _nested_splitchars(texts, encoding)
     expected_offsets = _nested_offsets(texts, encoding)
     input_tensor = constant_op.constant(_nested_encode(texts, encoding))
-    result = ragged.unicode_split_with_offsets(input_tensor, encoding)
+    result = ragged_string_ops.unicode_split_with_offsets(
+        input_tensor, encoding)
     self.assertRaggedEqual(expected_codepoints, result[0])
     self.assertRaggedEqual(expected_offsets, result[1])
 
@@ -685,14 +690,15 @@ class UnicodeSplitTest(ragged_test_util.RaggedTensorTestCase,
   ])  # pyformat: disable
   def testExceptions(self, exception=None, message=None, **args):
     with self.assertRaisesRegexp(exception, message):
-      self.evaluate(ragged.unicode_split(**args))
+      self.evaluate(ragged_string_ops.unicode_split(**args))
 
   def testUnknownRankError(self):
-    if context.executing_eagerly(): return
+    if context.executing_eagerly():
+      return
     s = array_ops.placeholder(dtypes.string)
     message = "Rank of `input` must be statically known."
     with self.assertRaisesRegexp(ValueError, message):
-      self.evaluate(ragged.unicode_decode(s, input_encoding="UTF-8"))
+      self.evaluate(ragged_string_ops.unicode_decode(s, input_encoding="UTF-8"))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index 1922e4b92a..89b8c4a2b3 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -1,3 +1,5 @@
+load("//tensorflow:tensorflow.bzl", "py_test")
+
 package(
     default_visibility = [
         "//intelligence/datum/prensor:__pkg__",
@@ -11,8 +13,6 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
-
 #-------------------------------------------------------------------------------
 # RaggedTensor
 #-------------------------------------------------------------------------------
@@ -48,7 +48,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged_conversion_ops",
-        ":ragged_factory_ops",
         ":ragged_functional_ops",
         ":ragged_math_ops",
         ":ragged_tensor",
@@ -62,7 +61,6 @@ py_library(
         "//tensorflow/python:ragged_array_ops_gen",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:tensor_util",
-        "//tensorflow/python:util",
     ],
 )
 
@@ -195,6 +193,7 @@ py_library(
         ":ragged_util",
         ":segment_id_ops",
         "//tensorflow/python:framework_ops",
+        "//tensorflow/python:ragged_conversion_ops_gen",
         "//tensorflow/python:session",
         "//tensorflow/python:tensor_shape",
         "//tensorflow/python:util",
@@ -289,7 +288,6 @@ py_library(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged_array_ops",
-        ":ragged_factory_ops",
         ":ragged_math_ops",
         ":ragged_tensor",
         ":ragged_tensor_shape",
@@ -302,6 +300,7 @@ py_library(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
         "//tensorflow/python:util",
+        "//tensorflow/python:variables",
         "//third_party/py/numpy",
     ],
 )
@@ -319,7 +318,7 @@ py_library(
         ":ragged_tensor_value",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:platform_test",
+        "//third_party/py/numpy",
     ],
 )
 
@@ -332,14 +331,20 @@ py_test(
         "no_windows",
     ],
     deps = [
-        ":ragged",
+        ":ragged",  # fixdeps: keep
+        ":ragged_factory_ops",
+        ":ragged_math_ops",
+        ":ragged_tensor",
+        ":ragged_tensor_value",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -351,10 +356,9 @@ py_test(
     srcs = ["ragged_eager_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:framework_ops",
-        "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -365,7 +369,7 @@ py_test(
     srcs = ["ragged_range_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_math_ops",
         ":ragged_test_util",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
@@ -378,7 +382,8 @@ py_test(
     srcs = ["ragged_tensor_bounding_shape_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -390,8 +395,10 @@ py_test(
     srcs = ["ragged_row_lengths_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
+        "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
         "@absl_py//absl/testing:parameterized",
@@ -403,7 +410,8 @@ py_test(
     srcs = ["ragged_gather_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -412,6 +420,7 @@ py_test(
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
     ],
 )
 
@@ -420,13 +429,16 @@ py_test(
     srcs = ["ragged_batch_gather_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
         "@absl_py//absl/testing:parameterized",
     ],
 )
@@ -436,12 +448,15 @@ py_test(
     srcs = ["ragged_gather_nd_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
+        "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -452,8 +467,8 @@ py_test(
     srcs = ["ragged_row_splits_to_segment_ids_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
         ":ragged_test_util",
+        ":segment_id_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -465,8 +480,8 @@ py_test(
     srcs = ["ragged_segment_ids_to_row_splits_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
         ":ragged_test_util",
+        ":segment_id_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -478,7 +493,7 @@ py_test(
     srcs = ["ragged_from_tensor_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -497,7 +512,10 @@ py_test(
         "no_windows",
     ],
     deps = [
-        ":ragged",
+        ":ragged",  # fixdeps: keep
+        ":ragged_factory_ops",
+        ":ragged_functional_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
@@ -506,6 +524,7 @@ py_test(
         "//tensorflow/python:gradients_impl",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
     ],
 )
 
@@ -514,7 +533,7 @@ py_test(
     srcs = ["ragged_from_sparse_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
@@ -522,6 +541,7 @@ py_test(
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
         "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/eager:context",
     ],
 )
 
@@ -530,7 +550,7 @@ py_test(
     srcs = ["ragged_to_tensor_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:framework_ops",
@@ -545,7 +565,9 @@ py_test(
     srcs = ["ragged_segment_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_math_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -561,13 +583,15 @@ py_test(
     srcs = ["ragged_reduce_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_math_ops",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -578,7 +602,9 @@ py_test(
     srcs = ["ragged_map_flat_values_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_functional_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -587,7 +613,6 @@ py_test(
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform_test",
-        "@absl_py//absl/testing:parameterized",
     ],
 )
 
@@ -597,6 +622,8 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
@@ -613,7 +640,9 @@ py_test(
         "no_windows",
     ],
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_tensor",
+        ":ragged_tensor_value",
         ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -627,7 +656,8 @@ py_test(
     srcs = ["convert_to_tensor_or_ragged_tensor_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_factory_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
@@ -643,13 +673,15 @@ py_test(
     srcs = ["ragged_boolean_mask_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
         "@absl_py//absl/testing:parameterized",
     ],
 )
@@ -659,7 +691,8 @@ py_test(
     srcs = ["ragged_concat_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -667,6 +700,7 @@ py_test(
         "//tensorflow/python:errors",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python/eager:context",
         "@absl_py//absl/testing:parameterized",
     ],
 )
@@ -676,7 +710,8 @@ py_test(
     srcs = ["ragged_stack_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:framework_test_lib",
@@ -690,7 +725,8 @@ py_test(
     srcs = ["ragged_tile_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -722,7 +758,8 @@ py_test(
     srcs = ["ragged_expand_dims_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -735,7 +772,8 @@ py_test(
     srcs = ["ragged_where_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged_array_ops",
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -748,7 +786,9 @@ py_test(
     srcs = ["ragged_dispatch_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged",  # fixdeps: keep
+        ":ragged_factory_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:clip_ops",
@@ -759,7 +799,9 @@ py_test(
         "//tensorflow/python:math_ops",
         "//tensorflow/python:parsing_ops",
         "//tensorflow/python:platform_test",
+        "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:string_ops",
+        "//tensorflow/python/eager:context",
         "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
     ],
@@ -770,7 +812,8 @@ py_test(
     srcs = ["ragged_operators_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged",  # fixdeps: keep
+        ":ragged_factory_ops",
         ":ragged_test_util",
         "//tensorflow/python:framework_test_lib",
         "//tensorflow/python:platform_test",
@@ -783,7 +826,12 @@ py_test(
     srcs = ["ragged_map_fn_op_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged",  # fixdeps: keep
+        ":ragged_factory_ops",
+        ":ragged_functional_ops",
+        ":ragged_map_ops",
+        ":ragged_math_ops",
+        ":ragged_tensor",
         ":ragged_test_util",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:dtypes",
@@ -802,7 +850,10 @@ py_test(
     srcs = ["ragged_tensor_shape_test.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":ragged",
+        ":ragged",  # fixdeps: keep
+        ":ragged_factory_ops",
+        ":ragged_tensor",
+        ":ragged_tensor_shape",
         ":ragged_test_util",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:framework_test_lib",
diff --git a/tensorflow/python/ops/ragged/__init__.py b/tensorflow/python/ops/ragged/__init__.py
index 3e35892925..7806f56978 100644
--- a/tensorflow/python/ops/ragged/__init__.py
+++ b/tensorflow/python/ops/ragged/__init__.py
@@ -1,164 +1,47 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """Ragged Tensors.
 
-This package defines the `tf.RaggedTensor` class, which
-represents tensors with non-uniform shapes.  In particular, each `RaggedTensor`
+This package defines ops for manipulating ragged tensors (`tf.RaggedTensor`),
+which are tensors with non-uniform shapes.  In particular, each `RaggedTensor`
 has one or more *ragged dimensions*, which are dimensions whose slices may have
 different lengths.  For example, the inner (column) dimension of
 `rt=[[3, 1, 4, 1], [], [5, 9, 2], [6], []]` is ragged, since the column slices
 (`rt[0, :]`, ..., `rt[4, :]`) have different lengths.  For a more detailed
-description of ragged tensors, see the `tf.RaggedTensor`
-class documentation.
-
-<!-- Ragged Classes & related helper functions -->
-@@RaggedTensor
-@@RaggedTensorType
-@@RaggedTensorValue
-@@is_ragged
-
-<!-- Factory Ops -->
-@@ragged_factory_ops
-@@constant
-@@constant_value
-@@convert_to_tensor_or_ragged_tensor
-
-<!-- Conversion Ops -->
-@@from_tensor
-@@to_tensor
-@@from_sparse
-@@to_sparse
-@@row_splits_to_segment_ids
-@@segment_ids_to_row_splits
-
-<!-- Array Ops -->
-@@gather
-@@batch_gather
-@@gather_nd
-@@boolean_mask
-@@concat
-@@stack
-@@tile
-@@expand_dims
-@@where
-
-<!-- Math Ops -->
-@@range
-
-@@segment_sum
-@@segment_prod
-@@segment_min
-@@segment_max
-@@segment_mean
-@@segment_sqrt_n
-
-@@reduce_sum
-@@reduce_prod
-@@reduce_min
-@@reduce_max
-@@reduce_mean
-@@reduce_all
-@@reduce_any
-
-<!-- Functional Ops -->
-@@map_flat_values
-@@map_fn
-
-<!-- Shape & broadcasting -->
-@@RaggedTensorDynamicShape
-@@broadcast_to
-@@broadcast_dynamic_shape
-
-<!-- String ops -->
-@@unicode_decode
-@@unicode_decode_with_offsets
-@@unicode_split
-@@unicode_split_with_offsets
-
-<!-- Modules -->
-@@ragged_dispatch
-@@ragged_factory_ops
-@@ragged_operators
-@@ragged_string_ops
-@@ragged_tensor
-@@ragged_tensor_value
-@@ragged_util
+description of ragged tensors, see the `tf.RaggedTensor` class documentation
+and the [Ragged Tensor Guide](/guides/ragged_tensor).
 """
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_conversion_ops
 from tensorflow.python.ops.ragged import ragged_dispatch
 from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_functional_ops
+from tensorflow.python.ops.ragged import ragged_getitem
+from tensorflow.python.ops.ragged import ragged_map_ops
+from tensorflow.python.ops.ragged import ragged_math_ops
 from tensorflow.python.ops.ragged import ragged_operators
 from tensorflow.python.ops.ragged import ragged_string_ops
 from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.ops.ragged import ragged_tensor_shape
 from tensorflow.python.ops.ragged import ragged_tensor_value
-from tensorflow.python.ops.ragged import ragged_util
-
-from tensorflow.python.ops.ragged.ragged_array_ops import batch_gather
-from tensorflow.python.ops.ragged.ragged_array_ops import boolean_mask
-from tensorflow.python.ops.ragged.ragged_array_ops import concat
-from tensorflow.python.ops.ragged.ragged_array_ops import expand_dims
-from tensorflow.python.ops.ragged.ragged_array_ops import gather
-from tensorflow.python.ops.ragged.ragged_array_ops import gather_nd
-from tensorflow.python.ops.ragged.ragged_array_ops import stack
-from tensorflow.python.ops.ragged.ragged_array_ops import tile
-from tensorflow.python.ops.ragged.ragged_array_ops import where
-
-from tensorflow.python.ops.ragged.ragged_conversion_ops import from_sparse
-from tensorflow.python.ops.ragged.ragged_conversion_ops import from_tensor
-from tensorflow.python.ops.ragged.ragged_conversion_ops import to_sparse
-from tensorflow.python.ops.ragged.ragged_conversion_ops import to_tensor
-
-from tensorflow.python.ops.ragged.ragged_factory_ops import constant
-from tensorflow.python.ops.ragged.ragged_factory_ops import constant_value
+from tensorflow.python.ops.ragged import segment_id_ops
 
-from tensorflow.python.ops.ragged.ragged_functional_ops import map_flat_values
-
-from tensorflow.python.ops.ragged.ragged_map_ops import map_fn
-
-from tensorflow.python.ops.ragged.ragged_math_ops import range  # pylint: disable=redefined-builtin
-
-from tensorflow.python.ops.ragged.ragged_math_ops import reduce_all
-from tensorflow.python.ops.ragged.ragged_math_ops import reduce_any
-from tensorflow.python.ops.ragged.ragged_math_ops import reduce_max
-from tensorflow.python.ops.ragged.ragged_math_ops import reduce_mean
-from tensorflow.python.ops.ragged.ragged_math_ops import reduce_min
-from tensorflow.python.ops.ragged.ragged_math_ops import reduce_prod
-from tensorflow.python.ops.ragged.ragged_math_ops import reduce_sum
-
-from tensorflow.python.ops.ragged.ragged_math_ops import segment_max
-from tensorflow.python.ops.ragged.ragged_math_ops import segment_mean
-from tensorflow.python.ops.ragged.ragged_math_ops import segment_min
-from tensorflow.python.ops.ragged.ragged_math_ops import segment_prod
-from tensorflow.python.ops.ragged.ragged_math_ops import segment_sqrt_n
-from tensorflow.python.ops.ragged.ragged_math_ops import segment_sum
-
-from tensorflow.python.ops.ragged.ragged_string_ops import unicode_decode
-from tensorflow.python.ops.ragged.ragged_string_ops import unicode_decode_with_offsets
-from tensorflow.python.ops.ragged.ragged_string_ops import unicode_split
-from tensorflow.python.ops.ragged.ragged_string_ops import unicode_split_with_offsets
-
-from tensorflow.python.ops.ragged.ragged_tensor import convert_to_tensor_or_ragged_tensor
-from tensorflow.python.ops.ragged.ragged_tensor import is_ragged
-from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor
-from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensorType
-
-from tensorflow.python.ops.ragged.ragged_tensor_shape import broadcast_dynamic_shape
-from tensorflow.python.ops.ragged.ragged_tensor_shape import broadcast_to
-from tensorflow.python.ops.ragged.ragged_tensor_shape import RaggedTensorDynamicShape
-
-from tensorflow.python.ops.ragged.ragged_tensor_value import RaggedTensorValue
-
-from tensorflow.python.ops.ragged.segment_id_ops import row_splits_to_segment_ids
-from tensorflow.python.ops.ragged.segment_id_ops import segment_ids_to_row_splits
-
-from tensorflow.python.util import all_util as _all_util
-
-
-# Register OpDispatchers that override standard TF ops to work w/ RaggedTensors.
+# Add a list of the ops that support Ragged Tensors.
 __doc__ += ragged_dispatch.ragged_op_list()  # pylint: disable=redefined-builtin
-
-# Any symbol that is not referenced (with "@@name") in the module docstring
-# above will be removed.
-_all_util.remove_undocumented(__name__)
diff --git a/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py b/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py
index b88f18c8b6..be1ccd9c72 100644
--- a/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/convert_to_tensor_or_ragged_tensor_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.convert_to_tensor_or_ragged."""
+"""Tests for ragged_tensor.convert_to_tensor_or_ragged."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -24,7 +24,8 @@ import numpy as np
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -42,8 +43,8 @@ class RaggedConvertToTensorOrRaggedTensorTest(
       dict(pylist=[[1, 2], [3]], preferred_dtype=dtypes.string),
   ])
   def testConvertRaggedTensor(self, pylist, dtype=None, preferred_dtype=None):
-    rt = ragged.constant(pylist)
-    converted = ragged.convert_to_tensor_or_ragged_tensor(
+    rt = ragged_factory_ops.constant(pylist)
+    converted = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         rt, dtype, preferred_dtype)
     self.assertIs(converted, rt)
 
@@ -64,34 +65,40 @@ class RaggedConvertToTensorOrRaggedTensorTest(
                                    message,
                                    dtype=None,
                                    preferred_dtype=None):
-    rt = ragged.constant(pylist)
+    rt = ragged_factory_ops.constant(pylist)
 
     with self.assertRaisesRegexp(ValueError, message):
-      ragged.convert_to_tensor_or_ragged_tensor(rt, dtype, preferred_dtype)
+      ragged_tensor.convert_to_tensor_or_ragged_tensor(rt, dtype,
+                                                       preferred_dtype)
 
   #=============================================================================
   # Tests where the 'value' param is a RaggedTensorValue
   #=============================================================================
-  @parameterized.parameters([
-      dict(
-          value=ragged.constant_value([[1, 2], [3]], dtype=np.int32),
-          expected_dtype=dtypes.int32),
-      dict(
-          value=ragged.constant_value([[b'a', b'b'], [b'c']]),
-          expected_dtype=dtypes.string),
-      dict(
-          value=ragged.constant_value([[1, 2], [3]], dtype=np.int32),
-          dtype=dtypes.float32,
-          expected_dtype=dtypes.float32),
-      dict(
-          value=ragged.constant_value([[1, 2], [3]], dtype=np.int32),
-          preferred_dtype=dtypes.float32,
-          expected_dtype=dtypes.float32),
-      dict(
-          value=ragged.constant_value([[1, 2], [3]], dtype=np.int32),
-          preferred_dtype=dtypes.string,
-          expected_dtype=dtypes.int32),
-  ])
+  @parameterized.parameters(
+      [
+          dict(
+              value=ragged_factory_ops.constant_value([[1, 2], [3]],
+                                                      dtype=np.int32),
+              expected_dtype=dtypes.int32),
+          dict(
+              value=ragged_factory_ops.constant_value([[b'a', b'b'], [b'c']]),
+              expected_dtype=dtypes.string),
+          dict(
+              value=ragged_factory_ops.constant_value([[1, 2], [3]],
+                                                      dtype=np.int32),
+              dtype=dtypes.float32,
+              expected_dtype=dtypes.float32),
+          dict(
+              value=ragged_factory_ops.constant_value([[1, 2], [3]],
+                                                      dtype=np.int32),
+              preferred_dtype=dtypes.float32,
+              expected_dtype=dtypes.float32),
+          dict(
+              value=ragged_factory_ops.constant_value([[1, 2], [3]],
+                                                      dtype=np.int32),
+              preferred_dtype=dtypes.string,
+              expected_dtype=dtypes.int32),
+      ])
   def testConvertRaggedTensorValue(self,
                                    value,
                                    dtype=None,
@@ -99,7 +106,7 @@ class RaggedConvertToTensorOrRaggedTensorTest(
                                    expected_dtype=None):
     if expected_dtype is None:
       expected_dtype = value.dtype if dtype is None else dtype
-    converted = ragged.convert_to_tensor_or_ragged_tensor(
+    converted = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         value, dtype, preferred_dtype)
     self.assertEqual(value.ragged_rank, converted.ragged_rank)
     self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype)
@@ -107,7 +114,8 @@ class RaggedConvertToTensorOrRaggedTensorTest(
 
   @parameterized.parameters([
       dict(
-          value=ragged.constant_value([['a', 'b'], ['c']], dtype=str),
+          value=ragged_factory_ops.constant_value([['a', 'b'], ['c']],
+                                                  dtype=str),
           dtype=dtypes.int32,
           message=r"invalid literal for int\(\) with base 10: 'a'"),
   ])
@@ -117,7 +125,8 @@ class RaggedConvertToTensorOrRaggedTensorTest(
                                         dtype=None,
                                         preferred_dtype=None):
     with self.assertRaisesRegexp(ValueError, message):
-      ragged.convert_to_tensor_or_ragged_tensor(value, dtype, preferred_dtype)
+      ragged_tensor.convert_to_tensor_or_ragged_tensor(value, dtype,
+                                                       preferred_dtype)
 
   #=============================================================================
   # Tests where the 'value' param is a Tensor
@@ -129,7 +138,7 @@ class RaggedConvertToTensorOrRaggedTensorTest(
   ])
   def testConvertTensor(self, pylist, dtype=None, preferred_dtype=None):
     tensor = constant_op.constant(pylist)
-    converted = ragged.convert_to_tensor_or_ragged_tensor(
+    converted = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         tensor, dtype, preferred_dtype)
     self.assertIs(tensor, converted)
 
@@ -152,7 +161,8 @@ class RaggedConvertToTensorOrRaggedTensorTest(
                              preferred_dtype=None):
     tensor = constant_op.constant(pylist)
     with self.assertRaisesRegexp(ValueError, message):
-      ragged.convert_to_tensor_or_ragged_tensor(tensor, dtype, preferred_dtype)
+      ragged_tensor.convert_to_tensor_or_ragged_tensor(tensor, dtype,
+                                                       preferred_dtype)
 
   #=============================================================================
   # Tests where the 'value' param is a np.array
@@ -184,7 +194,7 @@ class RaggedConvertToTensorOrRaggedTensorTest(
                             expected_dtype=None):
     if expected_dtype is None:
       expected_dtype = value.dtype if dtype is None else dtype
-    converted = ragged.convert_to_tensor_or_ragged_tensor(
+    converted = ragged_tensor.convert_to_tensor_or_ragged_tensor(
         value, dtype, preferred_dtype)
     self.assertEqual(dtypes.as_dtype(expected_dtype), converted.dtype)
     self.assertAllEqual(value, converted)
@@ -201,7 +211,8 @@ class RaggedConvertToTensorOrRaggedTensorTest(
                                  dtype=None,
                                  preferred_dtype=None):
     with self.assertRaisesRegexp(ValueError, message):
-      ragged.convert_to_tensor_or_ragged_tensor(value, dtype, preferred_dtype)
+      ragged_tensor.convert_to_tensor_or_ragged_tensor(value, dtype,
+                                                       preferred_dtype)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_array_ops.py b/tensorflow/python/ops/ragged/ragged_array_ops.py
index dfa9790cd8..8ba8c53212 100644
--- a/tensorflow/python/ops/ragged/ragged_array_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_array_ops.py
@@ -58,8 +58,8 @@ def gather(params, indices, validate_indices=None, axis=0, name=None):
   ```python
   >>> params = tf.constant(['a', 'b', 'c', 'd', 'e'])
   >>> indices = tf.constant([3, 1, 2, 1, 0])
-  >>> ragged_params = ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
-  >>> ragged_indices = ragged.constant([[3, 1, 2], [1], [], [0]])
+  >>> ragged_params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
+  >>> ragged_indices = tf.ragged.constant([[3, 1, 2], [1], [], [0]])
 
   >>> print ragged.gather(params, ragged_indices)
   [['d', 'b', 'c'], ['b'], [], ['a']]
@@ -149,8 +149,8 @@ def batch_gather(params, indices, name=None):
 
   #### Example:
     ```python
-    >>> params = ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
-    >>> indices = ragged.constant([[1, 2, 0], [], [], [0, 0]])
+    >>> params = tf.ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
+    >>> indices = tf.ragged.constant([[1, 2, 0], [], [], [0, 0]])
     >>> ragged.batch_gather(params, indices)
     [['b', 'c', 'a'], [], [], ['e', 'e']]
     ```
@@ -573,8 +573,8 @@ def concat(values, axis, name=None):
 
   #### Example:
     ```python
-    >>> t1 = ragged.constant([[1, 2], [3, 4, 5]])
-    >>> t2 = ragged.constant([[6], [7, 8, 9]])
+    >>> t1 = tf.ragged.constant([[1, 2], [3, 4, 5]])
+    >>> t2 = tf.ragged.constant([[6], [7, 8, 9]])
     >>> ragged.concat([t1, t2], axis=0)
     [[1, 2], [3, 4, 5], [6], [7, 8, 9]]
     >>> ragged.concat([t1, t2], axis=1)
@@ -614,8 +614,8 @@ def stack(values, axis=0, name=None):
 
   #### Example:
     ```python
-    >>> t1 = ragged.constant([[1, 2], [3, 4, 5]])
-    >>> t2 = ragged.constant([[6], [7, 8, 9]])
+    >>> t1 = tf.ragged.constant([[1, 2], [3, 4, 5]])
+    >>> t2 = tf.ragged.constant([[6], [7, 8, 9]])
     >>> ragged.stack([t1, t2], axis=0)
     [[[1, 2], [3, 4, 5]], [[6], [7, 9, 0]]]
     >>> ragged.stack([t1, t2], axis=1)
@@ -819,7 +819,7 @@ def tile(input, multiples, name=None):  # pylint: disable=redefined-builtin
 
   #### Example:
     ```python
-    >>> rt = ragged.constant([[1, 2], [3]])
+    >>> rt = tf.ragged.constant([[1, 2], [3]])
     >>> ragged.tile(rt, [3, 2])
     [[1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3], [1, 2, 1, 2], [3, 3]]
     ```
@@ -862,7 +862,7 @@ def _tile_ragged_values(rt_input, multiples, const_multiples=None):
 
   #### Example:
     ```python
-    >>> rt = ragged.constant([[1, 2], [3]])
+    >>> rt = tf.ragged.constant([[1, 2], [3]])
     >>> _tile_ragged_values(rt, [3, 2])
     [1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3, 1, 2, 1, 2, 3, 3]
     ```
@@ -921,7 +921,7 @@ def _tile_ragged_splits(rt_input, multiples, const_multiples=None):
 
   #### Example:
     ```python
-    >>> rt = ragged.constant([[1, 2], [3]])
+    >>> rt = tf.ragged.constant([[1, 2], [3]])
     >>> _tile_ragged_splits(rt, [3, 2])
     [0, 4, 6, 10, 12, 16, 18]
     ```
@@ -1018,7 +1018,7 @@ def expand_dims(input, axis, name=None):  # pylint: disable=redefined-builtin
 
   #### Examples:
     ```python
-    >>> rt = ragged.constant([[1, 2], [3]])
+    >>> rt = tf.ragged.constant([[1, 2], [3]])
     >>> print rt.shape
     TensorShape([2, None])
 
@@ -1109,21 +1109,23 @@ def where(condition, x=None, y=None, name=None):
   #### Examples:
     ```python
     >>> # Coordinates where condition is true.
-    >>> condition = ragged.constant_value([[True, False, True], [False, True]])
+    >>> condition = tf.ragged.constant_value(
+    ...     [[True, False, True], [False, True]])
     >>> ragged.where(condition)
     [[0, 0], [0, 2], [1, 1]]
 
     >>> # Elementwise selection between x and y, based on condition.
-    >>> condition = ragged.constant_value([[True, False, True], [False, True]])
-    >>> x=ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
-    >>> y=ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
+    >>> condition = tf.ragged.constant_value(
+    ...     [[True, False, True], [False, True]])
+    >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
+    >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
     >>> ragged.where(condition, x, y)
     [['A', 'b', 'C'], ['d', 'E']]
 
     >>> # Row selection between x and y, based on condition.
     >>> condition = [True, False]
-    >>> x=ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
-    >>> y=ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
+    >>> x = tf.ragged.constant_value([['A', 'B', 'C'], ['D', 'E']])
+    >>> y = tf.ragged.constant_value([['a', 'b', 'c'], ['d', 'e']])
     >>> ragged.where(condition, x, y)
     [['A', 'B', 'C'], ['d', 'e']]
     ```
@@ -1220,4 +1222,3 @@ def _nrows(rt_input, out_type=dtypes.int64, name=None):
   else:
     with ops.name_scope(name, 'RaggedNRows', [rt_input]):
       return array_ops.shape(rt_input, out_type=out_type)[0]
-
diff --git a/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py b/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
index 79f1ae591f..431d350db8 100644
--- a/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_batch_gather_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tf.ragged.batch_gather."""
+"""Tests for ragged_array_ops.batch_gather."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -25,7 +25,9 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -40,10 +42,12 @@ class RaggedBatchGatherOpTest(ragged_test_util.RaggedTensorTestCase,
       #=========================================================================
       dict(
           descr='Docstring example',
-          params=ragged.constant_value([['a', 'b', 'c'], ['d'], [], ['e']]),
-          indices=ragged.constant_value([[1, 2, 0], [], [], [0, 0]]),
-          expected=ragged.constant_value([[b'b', b'c', b'a'], [], [],
-                                          [b'e', b'e']])),
+          params=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d'], [],
+                                                    ['e']]),
+          indices=ragged_factory_ops.constant_value([[1, 2, 0], [], [], [0,
+                                                                         0]]),
+          expected=ragged_factory_ops.constant_value([[b'b', b'c', b'a'], [],
+                                                      [], [b'e', b'e']])),
       #=========================================================================
       # 0 Batch Dimensions
       #=========================================================================
@@ -54,9 +58,10 @@ class RaggedBatchGatherOpTest(ragged_test_util.RaggedTensorTestCase,
           expected=[b'd', b'c']),
       dict(
           descr='params: [P1, (P2)], indices: [I], result: [I, (P2)]',
-          params=ragged.constant_value([['a', 'b'], [], ['c'], ['d', 'e']]),
+          params=ragged_factory_ops.constant_value([['a', 'b'], [], ['c'],
+                                                    ['d', 'e']]),
           indices=[3, 2],
-          expected=ragged.constant_value([[b'd', b'e'], [b'c']])),
+          expected=ragged_factory_ops.constant_value([[b'd', b'e'], [b'c']])),
       #=========================================================================
       # 1 Batch Dimension
       #=========================================================================
@@ -67,22 +72,24 @@ class RaggedBatchGatherOpTest(ragged_test_util.RaggedTensorTestCase,
           expected=[[b'c', b'a'], [b'd', b'e'], [b'h', b'g']]),
       dict(
           descr='params: [B1, (P1)], indices: [B1, I], result: [B1, I]',
-          params=ragged.constant_value([['a', 'b', 'c'], ['d', 'e'], ['g']]),
+          params=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d', 'e'],
+                                                    ['g']]),
           indices=[[2, 0], [0, 1], [0, 0]],
           expected=[[b'c', b'a'], [b'd', b'e'], [b'g', b'g']]),
       dict(
           descr='params: [B1, P1], indices: [B1, (I)], result: [B1, (I)]',
           params=[['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']],
-          indices=ragged.constant_value([[2, 0, 2], [0], [1]]),
-          expected=ragged.constant_value([[b'c', b'a', b'c'], [b'd'], [b'h']])),
+          indices=ragged_factory_ops.constant_value([[2, 0, 2], [0], [1]]),
+          expected=ragged_factory_ops.constant_value([[b'c', b'a', b'c'],
+                                                      [b'd'], [b'h']])),
       dict(
           descr=('params: [B1, (P1), (P2), P3], indices: [B1, I], '
                  'result: [B1, I, (P2), P3]'),
-          params=ragged.constant_value(
+          params=ragged_factory_ops.constant_value(
               [[[['a']], [['b'], ['c']]], [[['d'], ['e']], [['f']]], [[['g']]]],
               ragged_rank=2),
           indices=[[1, 0], [0, 1], [0, 0]],
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[[b'b'], [b'c']], [[b'a']]], [[[b'd'], [b'e']], [[b'f']]],
                [[[b'g']], [[b'g']]]],
               ragged_rank=2)),
@@ -98,31 +105,31 @@ class RaggedBatchGatherOpTest(ragged_test_util.RaggedTensorTestCase,
       dict(
           descr=('params: [B1, (B2), P1], indices: [B1, (B2), I], '
                  'result: [B1, (B2), I]'),
-          params=ragged.constant_value(
+          params=ragged_factory_ops.constant_value(
               [[['a', 'b', 'c'], ['d', 'e', 'f']], [['g', 'h', 'i']]],
               ragged_rank=1),
-          indices=ragged.constant_value([[[2, 0], [0, 1]], [[1, 0]]],
-                                        ragged_rank=1),
-          expected=ragged.constant_value(
+          indices=ragged_factory_ops.constant_value(
+              [[[2, 0], [0, 1]], [[1, 0]]], ragged_rank=1),
+          expected=ragged_factory_ops.constant_value(
               [[[b'c', b'a'], [b'd', b'e']], [[b'h', b'g']]], ragged_rank=1)),
       dict(
           descr=('params: [B1, (B2), (P1)], indices: [B1, (B2), I], '
                  'result: [B1, (B2), I]'),
-          params=ragged.constant_value([[['a', 'b', 'c'], ['d']], [['e', 'f']]],
-                                       ragged_rank=2),
-          indices=ragged.constant_value([[[2, 0], [0, 0]], [[1, 0]]],
-                                        ragged_rank=1),
-          expected=ragged.constant_value(
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b', 'c'], ['d']], [['e', 'f']]], ragged_rank=2),
+          indices=ragged_factory_ops.constant_value(
+              [[[2, 0], [0, 0]], [[1, 0]]], ragged_rank=1),
+          expected=ragged_factory_ops.constant_value(
               [[[b'c', b'a'], [b'd', b'd']], [[b'f', b'e']]], ragged_rank=1)),
       dict(
           descr=('params: [B1, (B2), P1], indices: [B1, (B2), (I)], '
                  'result: [B1, (B2), (I)]'),
-          params=ragged.constant_value(
+          params=ragged_factory_ops.constant_value(
               [[['a', 'b', 'c'], ['d', 'e', 'f']], [['g', 'h', 'i']]],
               ragged_rank=1),
-          indices=ragged.constant_value([[[2, 1, 0], [0]], [[1, 1]]],
-                                        ragged_rank=2),
-          expected=ragged.constant_value(
+          indices=ragged_factory_ops.constant_value(
+              [[[2, 1, 0], [0]], [[1, 1]]], ragged_rank=2),
+          expected=ragged_factory_ops.constant_value(
               [[[b'c', b'b', b'a'], [b'd']], [[b'h', b'h']]], ragged_rank=2)),
       #=========================================================================
       # 3 Batch Dimensions
@@ -131,15 +138,15 @@ class RaggedBatchGatherOpTest(ragged_test_util.RaggedTensorTestCase,
           descr=(
               'params: [B1, (B2), (B3), (P1)], indices: [B1, (B2), (B3), I], '
               'result: [B1, (B2), (B3), I]'),
-          params=ragged.constant_value(
+          params=ragged_factory_ops.constant_value(
               [[[['a', 'b', 'c'], ['d']], [['e', 'f']]]], ragged_rank=3),
-          indices=ragged.constant_value([[[[2, 0], [0, 0]], [[1, 0]]]],
-                                        ragged_rank=2),
-          expected=ragged.constant_value(
+          indices=ragged_factory_ops.constant_value(
+              [[[[2, 0], [0, 0]], [[1, 0]]]], ragged_rank=2),
+          expected=ragged_factory_ops.constant_value(
               [[[[b'c', b'a'], [b'd', b'd']], [[b'f', b'e']]]], ragged_rank=2)),
   ])
   def testRaggedBatchGather(self, descr, params, indices, expected):
-    result = ragged.batch_gather(params, indices)
+    result = ragged_array_ops.batch_gather(params, indices)
     self.assertRaggedEqual(result, expected)
 
   def testRaggedBatchGatherUnknownRankError(self):
@@ -147,54 +154,61 @@ class RaggedBatchGatherOpTest(ragged_test_util.RaggedTensorTestCase,
       return
     params = [['a', 'b'], ['c', 'd']]
     indices = array_ops.placeholder(dtypes.int32, shape=None)
-    ragged_indices = ragged.RaggedTensor.from_row_splits(indices, [0, 2, 4])
+    ragged_indices = ragged_tensor.RaggedTensor.from_row_splits(
+        indices, [0, 2, 4])
 
     with self.assertRaisesRegexp(
         ValueError, 'batch_gather does not allow indices with unknown shape.'):
-      ragged.batch_gather(params, indices)
+      ragged_array_ops.batch_gather(params, indices)
 
     with self.assertRaisesRegexp(
         ValueError, 'batch_gather does not allow indices with unknown shape.'):
-      ragged.batch_gather(params, ragged_indices)
+      ragged_array_ops.batch_gather(params, ragged_indices)
 
-  @parameterized.parameters([
-      dict(
-          params=ragged.constant_value([['a'], ['b'], ['c']]),
-          indices=ragged.constant_value([[0], [0]]),
-          message='Dimensions 3 and 2 are not compatible'),
-      dict(
-          params=[[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
-          indices=ragged.constant_value([[[0, 0], [0, 0, 0]], [[0]]]),
-          message='batch shape from indices does not match params shape'),
-      dict(  # rank mismatch
-          params=ragged.constant_value([[[0, 0], [0, 0, 0]], [[0]]]),
-          indices=ragged.constant_value([[[0, 0]], [[0, 0, 0]], [[0]]]),
-          error=(ValueError, errors.InvalidArgumentError)),
-      dict(
-          params=ragged.constant_value([[[0, 0], [0, 0, 0]], [[0]], [[0]]]),
-          indices=ragged.constant_value([[[0, 0]], [[0, 0, 0]], [[0]]]),
-          error=errors.InvalidArgumentError,
-          message='.*Condition x == y did not hold.*'),
-      dict(
-          params=ragged.constant_value(['a', 'b', 'c']),
-          indices=ragged.constant_value([[0], [0]]),
-          message='batch shape from indices does not match params shape'),
-      dict(
-          params=ragged.constant_value([['a']]),
-          indices=0,
-          message='indices.rank must be at least 1.'),
-      dict(
-          params=ragged.constant_value([['a']]),
-          indices=[[[0]]],
-          message='batch shape from indices does not match params shape'),
-  ])
+  @parameterized.parameters(
+      [
+          dict(
+              params=ragged_factory_ops.constant_value([['a'], ['b'], ['c']]),
+              indices=ragged_factory_ops.constant_value([[0], [0]]),
+              message='Dimensions 3 and 2 are not compatible'),
+          dict(
+              params=[[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
+              indices=ragged_factory_ops.constant_value([[[0, 0], [0, 0, 0]],
+                                                         [[0]]]),
+              message='batch shape from indices does not match params shape'),
+          dict(  # rank mismatch
+              params=ragged_factory_ops.constant_value([[[0, 0], [0, 0, 0]],
+                                                        [[0]]]),
+              indices=ragged_factory_ops.constant_value([[[0, 0]], [[0, 0, 0]],
+                                                         [[0]]]),
+              error=(ValueError, errors.InvalidArgumentError)),
+          dict(
+              params=ragged_factory_ops.constant_value([[[0, 0], [0, 0, 0]],
+                                                        [[0]], [[0]]]),
+              indices=ragged_factory_ops.constant_value([[[0, 0]], [[0, 0, 0]],
+                                                         [[0]]]),
+              error=errors.InvalidArgumentError,
+              message='.*Condition x == y did not hold.*'),
+          dict(
+              params=ragged_factory_ops.constant_value(['a', 'b', 'c']),
+              indices=ragged_factory_ops.constant_value([[0], [0]]),
+              message='batch shape from indices does not match params shape'),
+          dict(
+              params=ragged_factory_ops.constant_value([['a']]),
+              indices=0,
+              message='indices.rank must be at least 1.'),
+          dict(
+              params=ragged_factory_ops.constant_value([['a']]),
+              indices=[[[0]]],
+              message='batch shape from indices does not match params shape'),
+      ])
   def testRaggedBatchGatherStaticError(self,
                                        params,
                                        indices,
                                        message=None,
                                        error=ValueError):
     with self.assertRaisesRegexp(error, message):
-      ragged.batch_gather(params, indices)
+      ragged_array_ops.batch_gather(params, indices)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py b/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
index b0f7459322..19f7d216d2 100644
--- a/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_boolean_mask_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.boolean_mask."""
+"""Tests for ragged_array_ops.boolean_mask."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -25,7 +25,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -54,25 +55,25 @@ class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
           data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
           mask=[[T, F, T], [F, F, F], [T, F, F]],
           keepdims=True,
-          expected=ragged.constant_value([[1, 3], [], [7]])),
+          expected=ragged_factory_ops.constant_value([[1, 3], [], [7]])),
       dict(
           descr='Docstring example 3',
-          data=ragged.constant_value([[1, 2, 3], [4], [5, 6]]),
-          mask=ragged.constant_value([[F, F, T], [F], [T, T]]),
+          data=ragged_factory_ops.constant_value([[1, 2, 3], [4], [5, 6]]),
+          mask=ragged_factory_ops.constant_value([[F, F, T], [F], [T, T]]),
           keepdims=False,
           expected=[3, 5, 6]),
       dict(
           descr='Docstring example 4',
-          data=ragged.constant_value([[1, 2, 3], [4], [5, 6]]),
-          mask=ragged.constant_value([[F, F, T], [F], [T, T]]),
+          data=ragged_factory_ops.constant_value([[1, 2, 3], [4], [5, 6]]),
+          mask=ragged_factory_ops.constant_value([[F, F, T], [F], [T, T]]),
           keepdims=True,
-          expected=ragged.constant_value([[3], [], [5, 6]])),
+          expected=ragged_factory_ops.constant_value([[3], [], [5, 6]])),
       dict(
           descr='Docstring example 5',
-          data=ragged.constant_value([[1, 2, 3], [4], [5, 6]]),
+          data=ragged_factory_ops.constant_value([[1, 2, 3], [4], [5, 6]]),
           mask=[True, False, True],
           keepdims=False,
-          expected=ragged.constant_value([[1, 2, 3], [5, 6]])),
+          expected=ragged_factory_ops.constant_value([[1, 2, 3], [5, 6]])),
       #=========================================================================
       # Uniform data and uniform mask.
       #=========================================================================
@@ -93,7 +94,8 @@ class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
           data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [0, 1, 2], [3, 4, 5]],
           mask=[[F, F, F], [T, F, T], [T, T, T], [F, F, F], [T, T, F]],
           keepdims=True,
-          expected=ragged.constant_value([[], [4, 6], [7, 8, 9], [], [3, 4]])),
+          expected=ragged_factory_ops.constant_value(
+              [[], [4, 6], [7, 8, 9], [], [3, 4]])),
       dict(
           descr='data.shape=[3, 2, 2]; mask.shape=[3]; keepdims=True',
           data=[[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4], [6, 8]]],
@@ -111,8 +113,9 @@ class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
           data=[[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4], [6, 8]]],
           mask=[[T, F], [T, T], [F, F]],
           keepdims=True,
-          expected=ragged.constant_value([[[1, 2]], [[5, 6], [7, 8]], []],
-                                         ragged_rank=1)),
+          expected=ragged_factory_ops.constant_value(
+              [[[1, 2]], [[5, 6], [7, 8]], []],
+              ragged_rank=1)),
       dict(
           descr='data.shape=[3, 2, 2]; mask.shape=[3, 2]; keepdims=False',
           data=[[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4], [6, 8]]],
@@ -124,7 +127,7 @@ class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
           data=[[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4], [6, 8]]],
           mask=[[[T, T], [F, T]], [[F, F], [F, F]], [[T, F], [T, T]]],
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2], [4]], [[], []], [[2], [6, 8]]])),
       dict(
           descr='data.shape=mask.shape=[2, 2, 2, 2]; keepdims=True',
@@ -133,7 +136,7 @@ class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
           mask=[[[[T, T], [F, F]], [[T, F], [F, F]]],
                 [[[F, F], [F, F]], [[T, T], [T, F]]]],
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[[1, 2], []], [[5], []]], [[[], []], [[1, 3], [5]]]])),
       dict(
           descr='data.shape=mask.shape=[2, 2, 2, 2]; keepdims=False',
@@ -149,63 +152,64 @@ class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
       #=========================================================================
       dict(
           descr='data.shape=[5, (D2)]; mask.shape=[5, (D2)]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[1, 2], [3, 4, 5, 6], [7, 8, 9], [], [1, 2, 3]]),
-          mask=ragged.constant_value(
+          mask=ragged_factory_ops.constant_value(
               [[F, F], [F, T, F, T], [F, F, F], [], [T, F, T]]),
           keepdims=True,
-          expected=ragged.constant_value([[], [4, 6], [], [], [1, 3]])),
+          expected=ragged_factory_ops.constant_value(
+              [[], [4, 6], [], [], [1, 3]])),
       dict(
           descr='data.shape=[3, (D2), (D3)]; mask.shape=[3, (D2)]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4], [6, 8]]]),
-          mask=ragged.constant_value([[T, F], [T, T], [F, F]]),
+          mask=ragged_factory_ops.constant_value([[T, F], [T, T], [F, F]]),
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2]], [[5, 6], [7, 8]], []])),
       dict(
           descr='data.shape=[3, (D2), (D3)]; mask.shape=[3, (D2)]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4], [6, 8]]]),
-          mask=ragged.constant_value([[T, F], [T, T], [F, F]]),
+          mask=ragged_factory_ops.constant_value([[T, F], [T, T], [F, F]]),
           keepdims=False,
-          expected=ragged.constant_value([[1, 2], [5, 6], [7, 8]])),
+          expected=ragged_factory_ops.constant_value([[1, 2], [5, 6], [7, 8]])),
       dict(
           descr='data.shape=[3, (D2), D3]; mask.shape=[3, (D2)]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [[5, 6], [7, 8], [2, 4]], [[6, 8]]],
               ragged_rank=1),
-          mask=ragged.constant_value([[T, F], [T, T, F], [F]]),
+          mask=ragged_factory_ops.constant_value([[T, F], [T, T, F], [F]]),
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2]], [[5, 6], [7, 8]], []],
               ragged_rank=1)),
       dict(
           descr='data.shape=[3, (D2), D3]; mask.shape=[3, (D2)]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4], [6, 8]]],
               ragged_rank=1),
-          mask=ragged.constant_value([[T, F], [T, T], [F, F]]),
+          mask=ragged_factory_ops.constant_value([[T, F], [T, T], [F, F]]),
           keepdims=False,
           expected=[[1, 2], [5, 6], [7, 8]]),
       dict(
           descr='data.shape=[3, (D2), (D3)]; mask.shape=[3, (D2), (D3)]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[2, 4]]]),
-          mask=ragged.constant_value(
+          mask=ragged_factory_ops.constant_value(
               [[[T, T], [F, T]], [[F, F], [F, F]], [[T, F]]]),
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2], [4]], [[], []], [[2]]])),
       dict(
           descr=('data.shape=[3, (D2), (D3), (D4)]; '
                  'mask.shape=[3, (D2), (D3), (D4)]'),
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[[1, 2], [3, 4]], [[5, 6]]], [[[2, 4], [6, 8]]]]),
-          mask=ragged.constant_value(
+          mask=ragged_factory_ops.constant_value(
               [[[[T, T], [F, F]], [[T, F]]], [[[F, F], [T, T]]]]),
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[[1, 2], []], [[5]]], [[[], [6, 8]]]])),
 
       #=========================================================================
@@ -214,125 +218,132 @@ class RaggedBooleanMaskOpTest(ragged_test_util.RaggedTensorTestCase,
       dict(
           descr='data.shape=[2, 3]; mask.shape=[2, (3)]',
           data=[[1, 2, 3], [4, 5, 6]],
-          mask=ragged.constant_value([[T, F, F], [F, T, T]]),
+          mask=ragged_factory_ops.constant_value([[T, F, F], [F, T, T]]),
           keepdims=True,
-          expected=ragged.constant_value([[1], [5, 6]])),
+          expected=ragged_factory_ops.constant_value([[1], [5, 6]])),
       dict(
           descr='data.shape=[2, 3, 2]; mask.shape=[2, (3)]',
           data=[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 0], [2, 4]]],
-          mask=ragged.constant_value([[T, F, F], [F, T, T]]),
+          mask=ragged_factory_ops.constant_value([[T, F, F], [F, T, T]]),
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2]], [[9, 0], [2, 4]]],
               ragged_rank=1)),
       dict(
           descr='data.shape=[2, 3, 2]; mask.shape=[2, (3), 2]',
           data=[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 0], [2, 4]]],
-          mask=ragged.constant_value(
+          mask=ragged_factory_ops.constant_value(
               [[[T, F], [F, F], [T, T]], [[T, F], [F, T], [F, F]]],
               ragged_rank=1),
           keepdims=True,
-          expected=ragged.constant_value([[[1], [], [5, 6]], [[7], [0], []]])),
+          expected=ragged_factory_ops.constant_value(
+              [[[1], [], [5, 6]], [[7], [0], []]])),
 
       #=========================================================================
       # Ragged data and uniform mask.
       #=========================================================================
       dict(
           descr='data.shape=[4, (D2)]; mask.shape=[4]',
-          data=ragged.constant_value([[1, 2, 3], [4], [], [5, 6]]),
+          data=ragged_factory_ops.constant_value([[1, 2, 3], [4], [], [5, 6]]),
           mask=[T, F, T, F],
           keepdims=False,
-          expected=ragged.constant_value([[1, 2, 3], []])),
+          expected=ragged_factory_ops.constant_value([[1, 2, 3], []])),
       dict(
           descr='data.shape=[4, (D2), (D3)]; mask.shape=[4]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2, 3]], [[4], []], [[5, 6]], []]),
           mask=[T, F, T, T],
           keepdims=False,
-          expected=ragged.constant_value([[[1, 2, 3]], [[5, 6]], []])),
+          expected=ragged_factory_ops.constant_value(
+              [[[1, 2, 3]], [[5, 6]], []])),
       dict(
           descr='data.shape=[4, (D2), 2]; mask.shape=[4]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [], [[5, 6]], [[7, 8], [9, 0], [1, 2]]],
               ragged_rank=1),
           mask=[T, F, F, T],
           keepdims=False,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [[7, 8], [9, 0], [1, 2]]],
               ragged_rank=1)),
       dict(
           descr='data.shape=[4, (D2), 2]; mask.shape=[4]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [], [[5, 6]], [[7, 8], [9, 0], [1, 2]]],
               ragged_rank=1),
           mask=[T, F, F, T],
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2], [3, 4]], [[7, 8], [9, 0], [1, 2]]],
               ragged_rank=1)),
       dict(
           descr='data.shape=[1, (2)]; mask.shape=[1, 2]',
-          data=ragged.constant_value([[1, 2]]),
+          data=ragged_factory_ops.constant_value([[1, 2]]),
           mask=[[T, F]],
           keepdims=True,
-          expected=ragged.constant_value([[1]])),
+          expected=ragged_factory_ops.constant_value([[1]])),
       dict(
           descr='data.shape=[2, (2), (D3)]; mask.shape=[2, 2]',
-          data=ragged.constant_value([[[1], [2, 3]], [[], [4, 5, 6]]]),
+          data=ragged_factory_ops.constant_value(
+              [[[1], [2, 3]], [[], [4, 5, 6]]]),
           mask=[[T, F], [T, T]],
           keepdims=True,
-          expected=ragged.constant_value([[[1]], [[], [4, 5, 6]]])),
+          expected=ragged_factory_ops.constant_value([[[1]], [[], [4, 5, 6]]])),
       dict(
           descr='data.shape=[2, (2), 3]; mask.shape=[2, 2]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [2, 4, 6]]],
               ragged_rank=1),
           mask=[[T, F], [T, T]],
           keepdims=True,
-          expected=ragged.constant_value(
+          expected=ragged_factory_ops.constant_value(
               [[[1, 2, 3]], [[7, 8, 9], [2, 4, 6]]],
               ragged_rank=1)),
       dict(
           descr='data.shape=[2, (2), 3]; mask.shape=[2, 2, 3]',
-          data=ragged.constant_value(
+          data=ragged_factory_ops.constant_value(
               [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [2, 4, 6]]],
               ragged_rank=1),
           mask=[[[T, F, F], [T, F, T]], [[T, F, T], [F, F, F]]],
           keepdims=True,
-          expected=ragged.constant_value([[[1], [4, 6]], [[7, 9], []]])),
+          expected=ragged_factory_ops.constant_value(
+              [[[1], [4, 6]], [[7, 9], []]])),
   ])  # pyformat: disable
   def testBooleanMask(self, descr, data, mask, keepdims, expected):
-    actual = ragged.boolean_mask(data, mask, keepdims=keepdims)
+    actual = ragged_array_ops.boolean_mask(data, mask, keepdims=keepdims)
     self.assertRaggedEqual(actual, expected)
 
   def testErrors(self):
     if not context.executing_eagerly():
       self.assertRaisesRegexp(ValueError,
                               r'mask\.shape\.ndims must be kown statically',
-                              ragged.boolean_mask, [[1, 2]],
+                              ragged_array_ops.boolean_mask, [[1, 2]],
                               array_ops.placeholder(dtypes.bool))
 
-    self.assertRaises(TypeError, ragged.boolean_mask, [[1, 2]], [[0, 1]])
+    self.assertRaises(TypeError, ragged_array_ops.boolean_mask, [[1, 2]],
+                      [[0, 1]])
     self.assertRaisesRegexp(
         ValueError, 'Tensor conversion requested dtype bool for '
-        'RaggedTensor with dtype int32', ragged.boolean_mask,
-        ragged.constant([[1, 2]]), ragged.constant([[0, 0]]))
+        'RaggedTensor with dtype int32', ragged_array_ops.boolean_mask,
+        ragged_factory_ops.constant([[1, 2]]),
+        ragged_factory_ops.constant([[0, 0]]))
 
     self.assertRaisesRegexp(
         ValueError, r'Shapes \(1, 2\) and \(1, 3\) are incompatible',
-        ragged.boolean_mask, [[1, 2]], [[True, False, True]])
+        ragged_array_ops.boolean_mask, [[1, 2]], [[True, False, True]])
 
     self.assertRaisesRegexp(errors.InvalidArgumentError,
                             r'Inputs must have identical ragged splits',
-                            ragged.boolean_mask, ragged.constant([[1, 2]]),
-                            ragged.constant([[True, False, True]]))
+                            ragged_array_ops.boolean_mask,
+                            ragged_factory_ops.constant([[1, 2]]),
+                            ragged_factory_ops.constant([[True, False, True]]))
 
     self.assertRaisesRegexp(ValueError, 'mask cannot be scalar',
-                            ragged.boolean_mask, [[1, 2]], True)
+                            ragged_array_ops.boolean_mask, [[1, 2]], True)
 
-    self.assertRaisesRegexp(ValueError,
-                            'mask cannot be scalar', ragged.boolean_mask,
-                            ragged.constant([[1, 2]]), True)
+    self.assertRaisesRegexp(ValueError, 'mask cannot be scalar',
+                            ragged_array_ops.boolean_mask,
+                            ragged_factory_ops.constant([[1, 2]]), True)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_concat_op_test.py b/tensorflow/python/ops/ragged/ragged_concat_op_test.py
index e72afb0448..254afdaa21 100644
--- a/tensorflow/python/ops/ragged/ragged_concat_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_concat_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.concat."""
+"""Tests for ragged_array_ops.concat."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -26,7 +26,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -38,8 +39,8 @@ class RaggedConcatOpTest(ragged_test_util.RaggedTensorTestCase,
   def _rt_inputs_to_tensors(self, rt_inputs, ragged_ranks=None):
     if ragged_ranks is None:
       ragged_ranks = [None] * len(rt_inputs)
-    return [
-        ragged.constant(rt_input, ragged_rank=rrank)
+    return [  # pylint: disable=g-long-ternary
+        ragged_factory_ops.constant(rt_input, ragged_rank=rrank)
         if rrank != 0 else constant_op.constant(rt_input)
         for (rt_input, rrank) in zip(rt_inputs, ragged_ranks)
     ]
@@ -234,7 +235,7 @@ class RaggedConcatOpTest(ragged_test_util.RaggedTensorTestCase,
                        expected_ragged_rank=None,
                        expected_shape=None):
     rt_inputs = self._rt_inputs_to_tensors(rt_inputs, ragged_ranks)
-    concatenated = ragged.concat(rt_inputs, axis)
+    concatenated = ragged_array_ops.concat(rt_inputs, axis)
     if expected_ragged_rank is not None:
       self.assertEqual(concatenated.ragged_rank, expected_ragged_rank)
     if expected_shape is not None:
@@ -275,7 +276,8 @@ class RaggedConcatOpTest(ragged_test_util.RaggedTensorTestCase,
                       message=None,
                       ragged_ranks=None):
     rt_inputs = self._rt_inputs_to_tensors(rt_inputs, ragged_ranks)
-    self.assertRaisesRegexp(error, message, ragged.concat, rt_inputs, axis)
+    self.assertRaisesRegexp(error, message, ragged_array_ops.concat, rt_inputs,
+                            axis)
 
   @parameterized.parameters([
       dict(
@@ -292,7 +294,7 @@ class RaggedConcatOpTest(ragged_test_util.RaggedTensorTestCase,
     rt_inputs = [
         array_ops.placeholder_with_default(rt, shape=None) for rt in rt_inputs
     ]
-    concatenated = ragged.concat(rt_inputs, axis)
+    concatenated = ragged_array_ops.concat(rt_inputs, axis)
     with self.assertRaisesRegexp(error, message):
       self.evaluate(concatenated)
 
@@ -305,7 +307,7 @@ class RaggedConcatOpTest(ragged_test_util.RaggedTensorTestCase,
     ]
     self.assertRaisesRegexp(
         ValueError, r'axis may only be negative if ndims is statically known.',
-        ragged.concat, rt_inputs, -1)
+        ragged_array_ops.concat, rt_inputs, -1)
 
   def testSingleTensorInput(self):
     """Tests ragged_concat with a single tensor input.
@@ -314,8 +316,8 @@ class RaggedConcatOpTest(ragged_test_util.RaggedTensorTestCase,
     also pass in a single value (as with tf.concat), in which case it simply
     returns that tensor.  This test exercises that path.
     """
-    rt_inputs = ragged.constant([[1, 2], [3, 4]])
-    concatenated = ragged.concat(rt_inputs, 0)
+    rt_inputs = ragged_factory_ops.constant([[1, 2], [3, 4]])
+    concatenated = ragged_array_ops.concat(rt_inputs, 0)
     self.assertRaggedEqual(concatenated, [[1, 2], [3, 4]])
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_const_op_test.py b/tensorflow/python/ops/ragged/ragged_const_op_test.py
index c014f71030..29a9bdf53d 100644
--- a/tensorflow/python/ops/ragged/ragged_const_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_const_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.constant."""
+"""Tests for ragged_factory_ops.constant."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -23,6 +23,8 @@ from absl.testing import parameterized
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -157,7 +159,7 @@ class RaggedConstOpTest(ragged_test_util.RaggedTensorTestCase,
       expected_dtype: The expected dtype for the resulting ragged tensor (used
         to test default/inferred types when dtype=None).
     """
-    rt = ragged.constant(
+    rt = ragged_factory_ops.constant(
         pylist, dtype=dtype, ragged_rank=ragged_rank, inner_shape=inner_shape)
 
     # If dtype was explicitly specified, check it.
@@ -168,14 +170,14 @@ class RaggedConstOpTest(ragged_test_util.RaggedTensorTestCase,
 
     # If ragged_rank was explicitly specified, check it.
     if ragged_rank is not None:
-      if isinstance(rt, ragged.RaggedTensor):
+      if isinstance(rt, ragged_tensor.RaggedTensor):
         self.assertEqual(rt.ragged_rank, ragged_rank)
       else:
         self.assertEqual(0, ragged_rank)
 
     # If inner_shape was explicitly specified, check it.
     if inner_shape is not None:
-      if isinstance(rt, ragged.RaggedTensor):
+      if isinstance(rt, ragged_tensor.RaggedTensor):
         self.assertEqual(rt.flat_values.shape.as_list()[1:], list(inner_shape))
       else:
         self.assertEqual(rt.shape.as_list(), list(inner_shape))
@@ -257,7 +259,7 @@ class RaggedConstOpTest(ragged_test_util.RaggedTensorTestCase,
     self.assertRaisesRegexp(
         exception,
         message,
-        ragged.constant,
+        ragged_factory_ops.constant,
         pylist,
         dtype=dtype,
         ragged_rank=ragged_rank,
@@ -294,12 +296,12 @@ class RaggedConstOpTest(ragged_test_util.RaggedTensorTestCase,
                                   message=None):
     """Tests for the _find_scalar_and_max_depth helper function."""
     if exception is not None:
-      self.assertRaisesRegexp(
-          exception, message,
-          ragged.ragged_factory_ops._find_scalar_and_max_depth, pylist)
+      self.assertRaisesRegexp(exception, message,
+                              ragged_factory_ops._find_scalar_and_max_depth,
+                              pylist)
     else:
       self.assertEqual(
-          ragged.ragged_factory_ops._find_scalar_and_max_depth(pylist),
+          ragged_factory_ops._find_scalar_and_max_depth(pylist),
           (scalar_depth, max_depth))
 
   @parameterized.parameters([
diff --git a/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py b/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py
index 56768a9a47..7f474594b4 100644
--- a/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_constant_value_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.constant_value."""
+"""Tests for ragged_factory_ops.constant_value."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -22,7 +22,8 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor_value
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -147,7 +148,7 @@ class RaggedConstantValueOpTest(ragged_test_util.RaggedTensorTestCase,
                        expected_shape=None,
                        expected_dtype=None):
     """Tests that `ragged_value(pylist).to_list() == pylist`."""
-    rt = ragged.constant_value(
+    rt = ragged_factory_ops.constant_value(
         pylist, dtype=dtype, ragged_rank=ragged_rank, inner_shape=inner_shape)
 
     # If dtype was explicitly specified, check it.
@@ -158,14 +159,14 @@ class RaggedConstantValueOpTest(ragged_test_util.RaggedTensorTestCase,
 
     # If ragged_rank was explicitly specified, check it.
     if ragged_rank is not None:
-      if isinstance(rt, ragged.RaggedTensorValue):
+      if isinstance(rt, ragged_tensor_value.RaggedTensorValue):
         self.assertEqual(rt.ragged_rank, ragged_rank)
       else:
         self.assertEqual(0, ragged_rank)
 
     # If inner_shape was explicitly specified, check it.
     if inner_shape is not None:
-      if isinstance(rt, ragged.RaggedTensorValue):
+      if isinstance(rt, ragged_tensor_value.RaggedTensorValue):
         self.assertEqual(rt.flat_values.shape[1:], inner_shape)
       else:
         self.assertEqual(rt.shape, inner_shape)
@@ -174,7 +175,7 @@ class RaggedConstantValueOpTest(ragged_test_util.RaggedTensorTestCase,
       self.assertEqual(tuple(rt.shape), expected_shape)
 
     if rt.shape:
-      if isinstance(rt, ragged.RaggedTensorValue):
+      if isinstance(rt, ragged_tensor_value.RaggedTensorValue):
         self.assertEqual(rt.to_list(), pylist)
       else:
         self.assertEqual(rt.tolist(), pylist)
@@ -257,11 +258,11 @@ class RaggedConstantValueOpTest(ragged_test_util.RaggedTensorTestCase,
                             inner_shape=None,
                             exception=None,
                             message=None):
-    """Tests that `ragged.constant_value()` raises an expected exception."""
+    """Tests that `constant_value()` raises an expected exception."""
     self.assertRaisesRegexp(
         exception,
         message,
-        ragged.constant_value,
+        ragged_factory_ops.constant_value,
         pylist,
         dtype=dtype,
         ragged_rank=ragged_rank,
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
index 9d63dcf7c4..fb3dabc3eb 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
@@ -31,8 +31,9 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import parsing_ops
-from tensorflow.python.ops import ragged
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -134,8 +135,8 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
 
   def assertSameShape(self, x, y):
     """Checks that x and y have the same shape (including ragged shapes)."""
-    if isinstance(x, ragged.RaggedTensor):
-      self.assertIsInstance(y, ragged.RaggedTensor)
+    if isinstance(x, ragged_tensor.RaggedTensor):
+      self.assertIsInstance(y, ragged_tensor.RaggedTensor)
       self.assertEqual(x.ragged_rank, y.ragged_rank)
       for (x_splits, y_splits) in zip(x.nested_row_splits, y.nested_row_splits):
         self.assertAllEqual(x_splits, y_splits)
@@ -156,75 +157,85 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
           {'x': [1, -2, 3]},
           # 2-dimensional input
           {'x': [[-2, 3], [-3, 4]]},
-          {'x': ragged.constant_value([[-2, 3], [-3]], ragged_rank=1)},
+          {'x': ragged_factory_ops.constant_value(
+              [[-2, 3], [-3]], ragged_rank=1)},
           # 3-dimensional inputs
           {'x': [[[-2, 3], [3, 4]], [[7, 6], [5, 4]]]},
-          {'x': ragged.constant_value([[[-2, 3], [3, 4]], [[7, 6]]],
-                                      ragged_rank=1)},
-          {'x': ragged.constant_value([[[-2, 3, 4], []], [[7, 6]], []],
-                                      ragged_rank=2)},
+          {'x': ragged_factory_ops.constant_value(
+              [[[-2, 3], [3, 4]], [[7, 6]]],
+              ragged_rank=1)},
+          {'x': ragged_factory_ops.constant_value(
+              [[[-2, 3, 4], []], [[7, 6]], []],
+              ragged_rank=2)},
           ] +
       #=========================================================================
       # Test each unary op.
       #=========================================================================
-      [{'x': ragged.constant_value([[-2.0, 3.0], [-3.0]]), 'op': op}
+      [{'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]), 'op': op}
        for op in UNARY_FLOAT_OPS] +
-      [{'x': ragged.constant_value([[True, False], [True]]), 'op': op}
+      [{'x': ragged_factory_ops.constant_value([[True, False], [True]]),
+        'op': op}
        for op in UNARY_BOOL_OPS] +
-      [{'x': ragged.constant_value([[18, 512], [12412]], np.int32), 'op': op}
+      [{'x': ragged_factory_ops.constant_value([[18, 512], [12412]], np.int32),
+        'op': op}
        for op in UNARY_INT_OPS] +
-      [{'x': ragged.constant_value([['abcd', 'efgh'], ['aabbccdd']]), 'op': op}
+      [{'x': ragged_factory_ops.constant_value([['abcd', 'efgh'],
+                                                ['aabbccdd']]),
+        'op': op}
        for op in UNARY_STRING_OPS] +
       [
           {'op': clip_ops.clip_by_value,
-           'x': ragged.constant_value([[-2.0, 3.0], [-3.0]]),
+           'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]),
            'clip_value_min': 0.1, 'clip_value_max': 4.0},
           {'op': math_ops.cast,
-           'x': ragged.constant_value([[-2.0, 3.0], [-3.0]]),
+           'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]),
            'dtype': dtypes.int32},
           {'op': math_ops.saturate_cast,
-           'x': ragged.constant_value([[-2.0, 3.0], [-3.0]]),
+           'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]),
            'dtype': dtypes.int32},
           {'op': string_ops.string_to_hash_bucket,
-           'x': ragged.constant_value([['abcd', 'efgh'], ['aabbccdd']]),
+           'x': ragged_factory_ops.constant_value(
+               [['abcd', 'efgh'], ['aabbccdd']]),
            'num_buckets': 1000},
           {'op': string_ops.string_to_hash_bucket_fast,
-           'x': ragged.constant_value([['abcd', 'efgh'], ['aabbccdd']]),
+           'x': ragged_factory_ops.constant_value(
+               [['abcd', 'efgh'], ['aabbccdd']]),
            'num_buckets': 1000},
           {'op': string_ops.string_to_hash_bucket_strong,
-           'x': ragged.constant_value([['abcd', 'efgh'], ['aabbccdd']]),
+           'x': ragged_factory_ops.constant_value(
+               [['abcd', 'efgh'], ['aabbccdd']]),
            'num_buckets': 1000,
            'key': [1231, 12512]},
           {'op': string_ops.string_to_number,
-           'x': ragged.constant_value([['-2.0', '3.0'], ['-3.0']])},
+           'x': ragged_factory_ops.constant_value([['-2.0', '3.0'], ['-3.0']])},
           {'op': string_ops.regex_full_match,
-           'x': ragged.constant_value([['hello', '123'], ['1+1']]),
+           'x': ragged_factory_ops.constant_value([['hello', '123'], ['1+1']]),
            'pattern': r'\w+'},
           {'op': string_ops.regex_replace,
-           'x': ragged.constant_value([['hello', '123'], ['1+1']]),
+           'x': ragged_factory_ops.constant_value([['hello', '123'], ['1+1']]),
            'pattern': r'\d',
            'rewrite': '#'},
           {'op': string_ops.substr,
-           'x': ragged.constant_value([['hello', '123'], ['1+1']]),
+           'x': ragged_factory_ops.constant_value([['hello', '123'], ['1+1']]),
            'pos': 2, 'len': 3},
           {'op': array_ops.check_numerics,
-           'x': ragged.constant_value([[-2.0, 3.0], [-3.0]]),
+           'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]),
            'message': 'check-numerics'},
       ]
       )  # pyformat: disable
   def testUnaryElementwiseOp(self, x, op=math_ops.abs, **extra_args):
-    x = ragged.convert_to_tensor_or_ragged_tensor(x)
+    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x)
     result = op(x, **extra_args)
 
     # Run the wrapped op on the dense values, for comparison.
-    dense_x = x.flat_values if isinstance(x, ragged.RaggedTensor) else x
+    dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
     expected_flat_values = array_ops.reshape(op(dense_x, **extra_args), [-1])
 
     # Check that the result has the expected shape.
     self.assertSameShape(x, result)
 
     # Check that the result has the expected (flattened) values.
-    if isinstance(result, ragged.RaggedTensor):
+    if isinstance(result, ragged_tensor.RaggedTensor):
       result_flat_values = array_ops.reshape(result.flat_values, [-1])
     else:
       result_flat_values = array_ops.reshape(result, [-1])
@@ -245,19 +256,23 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
           {'x': [[-2, 3], [-3, -4]],
            'y': [[1, 2], [3, 4]]},
           # Shapes: x:(2, None), y:(2, None)
-          {'x': ragged.constant_value([[-2, 3], [-3]]),
-           'y': ragged.constant_value([[5, 6], [7]])},
+          {'x': ragged_factory_ops.constant_value([[-2, 3], [-3]]),
+           'y': ragged_factory_ops.constant_value([[5, 6], [7]])},
           # Shapes: x:(2, 2, 2), y:(2, 2, 2)
           {'x': [[[1, 2], [3, 4]], [[5, 6], [7, 8]]],
            'y': [[[9, 3], [3, 4]], [[5, 2], [7, 6]]]},
           # Shapes: x:(2, None, None), y: (2, None, None)
-          {'x': ragged.constant_value([[[1, 2], [3], [4]], [[], [5, 7, 8]]]),
-           'y': ragged.constant_value([[[3, 8], [2], [5]], [[], [1, 9, 8]]])},
+          {'x': ragged_factory_ops.constant_value(
+              [[[1, 2], [3], [4]], [[], [5, 7, 8]]]),
+           'y': ragged_factory_ops.constant_value(
+               [[[3, 8], [2], [5]], [[], [1, 9, 8]]])},
           # Shapes: x:(2, None, 2), y: (2, None, 2)
-          {'x': ragged.constant_value([[[1, 2]], [[3, 4], [5, 6], [7, 8]]],
-                                      ragged_rank=1),
-           'y': ragged.constant_value([[[9, 3]], [[5, 2], [3, 4], [7, 6]]],
-                                      ragged_rank=1)},
+          {'x': ragged_factory_ops.constant_value(
+              [[[1, 2]], [[3, 4], [5, 6], [7, 8]]],
+              ragged_rank=1),
+           'y': ragged_factory_ops.constant_value(
+               [[[9, 3]], [[5, 2], [3, 4], [7, 6]]],
+               ragged_rank=1)},
 
           #=====================================================================
           # With broadcasting
@@ -279,47 +294,54 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
            'y': [[1, 2], [3, 4]]},
           # Shapes: x:(), y:(2, None)
           {'x': 10,                                 # Broadcast () -> (2, None)
-           'y': ragged.constant_value([[1, 2], [3]], dtype=np.int32)},
+           'y': ragged_factory_ops.constant_value(
+               [[1, 2], [3]], dtype=np.int32)},
           # TODO(edloper): Add tests for more advanced broadcasting, once we add
           # support for it.
 
           #=====================================================================
           # Keyword Args
           #=====================================================================
-          {'x': ragged.constant_value([[[1, 2], [3], [4]], [[], [5, 7, 8]]]),
-           'y': ragged.constant_value([[[3, 8], [2], [5]], [[], [1, 9, 8]]]),
+          {'x': ragged_factory_ops.constant_value(
+              [[[1, 2], [3], [4]], [[], [5, 7, 8]]]),
+           'y': ragged_factory_ops.constant_value(
+               [[[3, 8], [2], [5]], [[], [1, 9, 8]]]),
            'use_kwargs': ('x', 'y')},
-          {'x': ragged.constant_value([[[1, 2]], [[3, 4], [5, 6], [7, 8]]],
-                                      ragged_rank=1),
-           'y': ragged.constant_value([[[9, 3]], [[5, 2], [3, 4], [7, 6]]],
-                                      ragged_rank=1),
+          {'x': ragged_factory_ops.constant_value(
+              [[[1, 2]], [[3, 4], [5, 6], [7, 8]]],
+              ragged_rank=1),
+           'y': ragged_factory_ops.constant_value(
+               [[[9, 3]], [[5, 2], [3, 4], [7, 6]]],
+               ragged_rank=1),
            'use_kwargs': ('x', 'y')},
-          {'x': ragged.constant_value([[[1, 2]], [[3, 4], [5, 6], [7, 8]]],
-                                      ragged_rank=1),
-           'y': ragged.constant_value([[[9, 3]], [[5, 2], [3, 4], [7, 6]]],
-                                      ragged_rank=1),
+          {'x': ragged_factory_ops.constant_value(
+              [[[1, 2]], [[3, 4], [5, 6], [7, 8]]],
+              ragged_rank=1),
+           'y': ragged_factory_ops.constant_value(
+               [[[9, 3]], [[5, 2], [3, 4], [7, 6]]],
+               ragged_rank=1),
            'use_kwargs': ('x',)},
       ] +
       #=========================================================================
       # Test each unary op.
       #=========================================================================
-      [{'x': ragged.constant_value([[-2.0, 3.0], [-3.0]]),
-        'y': ragged.constant_value([[5.0, 1.0], [12.0]]),
+      [{'x': ragged_factory_ops.constant_value([[-2.0, 3.0], [-3.0]]),
+        'y': ragged_factory_ops.constant_value([[5.0, 1.0], [12.0]]),
         'op': op}
        for op in BINARY_FLOAT_OPS] +
-      [{'x': ragged.constant_value([[-2, 3], [-3]]),
-        'y': ragged.constant_value([[5, 1], [12]]),
+      [{'x': ragged_factory_ops.constant_value([[-2, 3], [-3]]),
+        'y': ragged_factory_ops.constant_value([[5, 1], [12]]),
         'op': op}
        for op in BINARY_INT_OPS] +
-      [{'x': ragged.constant_value([[True, True], [False]]),
-        'y': ragged.constant_value([[False, True], [False]]),
+      [{'x': ragged_factory_ops.constant_value([[True, True], [False]]),
+        'y': ragged_factory_ops.constant_value([[False, True], [False]]),
         'op': op}
        for op in BINARY_BOOL_OPS]
       )  # pyformat: disable
   def testBinaryElementwiseOp(self, x, y, op=math_ops.add, **extra_args):
     use_kwargs = extra_args.pop('use_kwargs', ())
-    x = ragged.convert_to_tensor_or_ragged_tensor(x)
-    y = ragged.convert_to_tensor_or_ragged_tensor(y)
+    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x)
+    y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y)
     if 'x' in use_kwargs and 'y' in use_kwargs:
       result = op(x=x, y=y, **extra_args)
     elif 'y' in use_kwargs:
@@ -328,8 +350,8 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
       result = op(x, y, **extra_args)
 
     # Run the wrapped op on the dense values, for comparison.
-    dense_x = x.flat_values if isinstance(x, ragged.RaggedTensor) else x
-    dense_y = y.flat_values if isinstance(y, ragged.RaggedTensor) else y
+    dense_x = x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
+    dense_y = y.flat_values if isinstance(y, ragged_tensor.RaggedTensor) else y
     expected_flat_values = array_ops.reshape(
         op(dense_x, dense_y, **extra_args), [-1])
 
@@ -337,7 +359,7 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
     self.assertSameShape(y, result)
 
     # Check that the result has the expected (flattened) values.
-    if isinstance(result, ragged.RaggedTensor):
+    if isinstance(result, ragged_tensor.RaggedTensor):
       result_flat_values = array_ops.reshape(result.flat_values, [-1])
     else:
       result_flat_values = array_ops.reshape(result, [-1])
@@ -348,36 +370,44 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
           {'inputs': (12, 8, 3)},
           {'inputs': ([1, 2, 3], [7, 8, 9], [3, 6, 9])},
           {'inputs': ([[1, 2]], [[3, 4]], [[5, 6]])},
-          {'inputs': (ragged.constant_value([[1, 3], [-3]]),
-                      ragged.constant_value([[4, 7], [88]]),
-                      ragged.constant_value([[2, 9], [12]]))},
-          {'inputs': (ragged.constant_value([[[1, 3], [-3]], [[1]]]),
-                      ragged.constant_value([[[4, 7], [88]], [[2]]]),
-                      ragged.constant_value([[[2, 9], [12]], [[8]]]))},
-          {'inputs': (ragged.constant_value([[[1, 3], [3, 4]], [[1, 5]]],
-                                            ragged_rank=1),
-                      ragged.constant_value([[[4, 7], [1, 2]], [[2, 2]]],
-                                            ragged_rank=1),
-                      ragged.constant_value([[[2, 9], [5, 2]], [[8, 0]]],
-                                            ragged_rank=1))},
-          {'inputs': (ragged.constant_value([[[1, 3], [-3]], [[1]]]),
-                      ragged.constant_value([[[4, 7], [88]], [[2]]]),
-                      ragged.constant_value([[[2, 9], [12]], [[8]]])),
+          {'inputs': (ragged_factory_ops.constant_value([[1, 3], [-3]]),
+                      ragged_factory_ops.constant_value([[4, 7], [88]]),
+                      ragged_factory_ops.constant_value([[2, 9], [12]]))},
+          {'inputs': (ragged_factory_ops.constant_value(
+              [[[1, 3], [-3]], [[1]]]),
+                      ragged_factory_ops.constant_value(
+                          [[[4, 7], [88]], [[2]]]),
+                      ragged_factory_ops.constant_value(
+                          [[[2, 9], [12]], [[8]]]))},
+          {'inputs': (
+              ragged_factory_ops.constant_value([[[1, 3], [3, 4]], [[1, 5]]],
+                                                ragged_rank=1),
+              ragged_factory_ops.constant_value([[[4, 7], [1, 2]], [[2, 2]]],
+                                                ragged_rank=1),
+              ragged_factory_ops.constant_value([[[2, 9], [5, 2]], [[8, 0]]],
+                                                ragged_rank=1))},
+          {'inputs': (
+              ragged_factory_ops.constant_value([[[1, 3], [-3]], [[1]]]),
+              ragged_factory_ops.constant_value([[[4, 7], [88]], [[2]]]),
+              ragged_factory_ops.constant_value([[[2, 9], [12]], [[8]]])),
            'use_kwargs': True},
       ] + [
           {'op': math_ops.add_n,
-           'inputs': (ragged.constant_value([[1, 3], [-3]]),
-                      ragged.constant_value([[4, 7], [88]]),
-                      ragged.constant_value([[2, 9], [12]]))},
+           'inputs': (ragged_factory_ops.constant_value([[1, 3], [-3]]),
+                      ragged_factory_ops.constant_value([[4, 7], [88]]),
+                      ragged_factory_ops.constant_value([[2, 9], [12]]))},
           {'op': string_ops.string_join,
-           'inputs': (ragged.constant_value([['a', 'b'], ['c']]),
-                      ragged.constant_value([['foo', 'bar'], ['baz']]),
-                      ragged.constant_value([['2', '9'], ['12']]))},
+           'inputs': (
+               ragged_factory_ops.constant_value([['a', 'b'], ['c']]),
+               ragged_factory_ops.constant_value([['foo', 'bar'], ['baz']]),
+               ragged_factory_ops.constant_value([['2', '9'], ['12']]))},
       ])  # pyformat: disable
   def testListValuedElementwiseOp(self, inputs, op=math_ops.add_n,
                                   **extra_args):
     use_kwargs = extra_args.pop('use_kwargs', False)
-    inputs = [ragged.convert_to_tensor_or_ragged_tensor(x) for x in inputs]
+    inputs = [
+        ragged_tensor.convert_to_tensor_or_ragged_tensor(x) for x in inputs
+    ]
     if use_kwargs:
       result = op(inputs=inputs, **extra_args)
     else:
@@ -385,7 +415,7 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
 
     # Run the wrapped op on the dense values, for comparison.
     dense_inputs = [
-        x.flat_values if isinstance(x, ragged.RaggedTensor) else x
+        x.flat_values if isinstance(x, ragged_tensor.RaggedTensor) else x
         for x in inputs
     ]
     expected_flat_values = array_ops.reshape(
@@ -395,7 +425,7 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
     self.assertSameShape(inputs[0], result)
 
     # Check that the result has the expected (flattened) values.
-    if isinstance(result, ragged.RaggedTensor):
+    if isinstance(result, ragged_tensor.RaggedTensor):
       result_flat_values = array_ops.reshape(result.flat_values, [-1])
     else:
       result_flat_values = array_ops.reshape(result, [-1])
@@ -404,8 +434,8 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
   def testElementwiseOpUnknownRankError(self):
     if context.executing_eagerly():
       return
-    x = ragged.constant([[1, 2], [3]])
-    y = ragged.RaggedTensor.from_row_splits(
+    x = ragged_factory_ops.constant([[1, 2], [3]])
+    y = ragged_tensor.RaggedTensor.from_row_splits(
         array_ops.placeholder_with_default([1, 2, 3], shape=None), x.row_splits)
     with self.assertRaisesRegexp(ValueError,
                                  r'Unable to broadcast: unknown rank'):
@@ -413,32 +443,34 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
 
   @parameterized.parameters([
       dict(
-          x=ragged.constant_value([[1, 2], [3]]),
+          x=ragged_factory_ops.constant_value([[1, 2], [3]]),
           y=[[10]],
           expected=[[11, 12], [13]]),
       dict(
-          x=ragged.constant_value([[[1, 2], [3, 4]], [[5]]], ragged_rank=2),
-          y=ragged.constant_value([[[10], [20]], [[30]]], ragged_rank=1),
+          x=ragged_factory_ops.constant_value([[[1, 2], [3, 4]], [[5]]],
+                                              ragged_rank=2),
+          y=ragged_factory_ops.constant_value([[[10], [20]], [[30]]],
+                                              ragged_rank=1),
           expected=[[[11, 12], [23, 24]], [[35]]]),
       dict(
-          x=ragged.constant_value([[[1]]]),
-          y=ragged.constant_value([[1]]),
+          x=ragged_factory_ops.constant_value([[[1]]]),
+          y=ragged_factory_ops.constant_value([[1]]),
           expected=[[[2]]]),
   ])
   def testElementwiseOpBroadcast(self, x, y, expected):
-    x = ragged.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
-    y = ragged.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32)
+    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
+    y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32)
     result = x + y
     self.assertRaggedEqual(result, expected)
 
   def testElementwiseOpShapeMismatch(self):
-    x = ragged.constant([[1, 2, 3], [4, 5]])
-    y = ragged.constant([[1, 2, 3], [4, 5, 6]])
+    x = ragged_factory_ops.constant([[1, 2, 3], [4, 5]])
+    y = ragged_factory_ops.constant([[1, 2, 3], [4, 5, 6]])
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(math_ops.add(x, y))
 
   def testBinaryOpSparseAndRagged(self):
-    x = ragged.constant([[1, 2, 3], [4, 5]])
+    x = ragged_factory_ops.constant([[1, 2, 3], [4, 5]])
     y = sparse_tensor.SparseTensor([[0, 0], [0, 1], [2, 0]], [1, 2, 3], [3, 2])
     with self.assertRaises((TypeError, ValueError)):
       self.evaluate(math_ops.add(x, y))
@@ -449,129 +481,196 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
   @parameterized.parameters([
       dict(
           op=array_ops.batch_gather,
-          args=(ragged.constant_value([[5, 6, 7], [8, 9]]),
-                ragged.constant_value([[2, 1, 0], [1]])),
-          expected=ragged.constant_value([[7, 6, 5], [9]])),
+          args=(ragged_factory_ops.constant_value([[5, 6, 7], [8, 9]]),
+                ragged_factory_ops.constant_value([[2, 1, 0], [1]])),
+          expected=ragged_factory_ops.constant_value([[7, 6, 5], [9]])),
       dict(
           op=array_ops.concat,
-          args=([ragged.constant_value([[1, 2, 3], [4]], dtype=np.int32),
-                 np.array([[5, 6]], dtype=np.int32)],),
+          args=([
+              ragged_factory_ops.constant_value([[1, 2, 3], [4]],
+                                                dtype=np.int32),
+              np.array([[5, 6]], dtype=np.int32)
+          ],),
           kwargs={'axis': 0},
-          expected=ragged.constant_value([[1, 2, 3], [4], [5, 6]])),
+          expected=ragged_factory_ops.constant_value([[1, 2, 3], [4], [5, 6]])),
       dict(
           op=array_ops.expand_dims,
-          kwargs={'input': ragged.constant_value([[1, 2], [3]]),
-                  'axis': 0},
-          expected=ragged.constant_value([[[1, 2], [3]]])),
+          kwargs={
+              'input': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'axis': 0
+          },
+          expected=ragged_factory_ops.constant_value([[[1, 2], [3]]])),
       dict(
           op=array_ops.expand_dims_v2,
-          kwargs={'input': ragged.constant_value([[1, 2], [3]]),
-                  'axis': -1},
-          expected=ragged.constant_value([[[1], [2]], [[3]]],
-                                         ragged_rank=1),),
+          kwargs={
+              'input': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'axis': -1
+          },
+          expected=ragged_factory_ops.constant_value([[[1], [2]], [[3]]],
+                                                     ragged_rank=1),
+      ),
       dict(
           op=array_ops.gather,
-          kwargs={'params': ragged.constant_value([[1, 2], [3]]),
-                  'indices': [1, 0, 1]},
-          expected=ragged.constant_value([[3], [1, 2], [3]])),
+          kwargs={
+              'params': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'indices': [1, 0, 1]
+          },
+          expected=ragged_factory_ops.constant_value([[3], [1, 2], [3]])),
       dict(
           op=array_ops.gather_v2,
-          kwargs={'params': ragged.constant_value([[1, 2], [3]]),
-                  'indices': ragged.constant_value([[1, 0], [1]])},
-          expected=ragged.constant_value([[[3], [1, 2]], [[3]]])),
+          kwargs={
+              'params': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'indices': ragged_factory_ops.constant_value([[1, 0], [1]])
+          },
+          expected=ragged_factory_ops.constant_value([[[3], [1, 2]], [[3]]])),
       dict(
           op=array_ops.gather_nd,
-          kwargs={'params': ragged.constant_value([[7, 8], [9]]),
-                  'indices': [[0, 1], [1, 0], [0, 0]]},
-          expected=ragged.constant_value([8, 9, 7])),
+          kwargs={
+              'params': ragged_factory_ops.constant_value([[7, 8], [9]]),
+              'indices': [[0, 1], [1, 0], [0, 0]]
+          },
+          expected=ragged_factory_ops.constant_value([8, 9, 7])),
       dict(
           op=array_ops.stack,
-          args=([ragged.constant_value([[1, 2, 3], [4]], dtype=np.int32),
-                 np.array([[5, 6]], dtype=np.int32)],),
-          expected=ragged.constant_value([[[1, 2, 3], [4]], [[5, 6]]])),
+          args=([
+              ragged_factory_ops.constant_value([[1, 2, 3], [4]],
+                                                dtype=np.int32),
+              np.array([[5, 6]], dtype=np.int32)
+          ],),
+          expected=ragged_factory_ops.constant_value([[[1, 2, 3], [4]],
+                                                      [[5, 6]]])),
       dict(
           op=array_ops.tile,
-          args=([ragged.constant_value([[1, 2], [3]], dtype=np.int32), [2, 3]]),
-          expected=ragged.constant_value([[1, 2, 1, 2, 1, 2], [3, 3, 3],
-                                          [1, 2, 1, 2, 1, 2], [3, 3, 3]])),
+          args=([
+              ragged_factory_ops.constant_value([[1, 2], [3]], dtype=np.int32),
+              [2, 3]
+          ]),
+          expected=ragged_factory_ops.constant_value([[1, 2, 1, 2, 1, 2],
+                                                      [3, 3, 3],
+                                                      [1, 2, 1, 2, 1, 2],
+                                                      [3, 3, 3]])),
       dict(
           op=array_ops.where,
-          args=(ragged.constant_value([[True, False], [True]]),
-                ragged.constant_value([[b'A', b'B'], [b'C']]),
-                ragged.constant_value([[b'a', b'b'], [b'c']])),
-          expected=ragged.constant_value([[b'A', b'b'], [b'C']])),
+          args=(ragged_factory_ops.constant_value([[True, False], [True]]),
+                ragged_factory_ops.constant_value([[b'A', b'B'], [b'C']]),
+                ragged_factory_ops.constant_value([[b'a', b'b'], [b'c']])),
+          expected=ragged_factory_ops.constant_value([[b'A', b'b'], [b'C']])),
       dict(
           op=math_ops.unsorted_segment_sum,
-          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
-                  'segment_ids': ragged.constant_value([[0, 2], [0]]),
-                  'num_segments': 3},
+          kwargs={
+              'data': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'segment_ids': ragged_factory_ops.constant_value([[0, 2], [0]]),
+              'num_segments': 3
+          },
           expected=[4, 0, 2]),
       dict(
           op=math_ops.unsorted_segment_prod,
-          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
-                  'segment_ids': ragged.constant_value([[0, 2], [0]]),
-                  'num_segments': 3},
+          kwargs={
+              'data': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'segment_ids': ragged_factory_ops.constant_value([[0, 2], [0]]),
+              'num_segments': 3
+          },
           expected=[3, 1, 2]),
       dict(
           op=math_ops.unsorted_segment_min,
-          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
-                  'segment_ids': ragged.constant_value([[0, 1], [0]]),
-                  'num_segments': 2},
+          kwargs={
+              'data': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'segment_ids': ragged_factory_ops.constant_value([[0, 1], [0]]),
+              'num_segments': 2
+          },
           expected=[1, 2]),
       dict(
           op=math_ops.unsorted_segment_max,
-          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
-                  'segment_ids': ragged.constant_value([[0, 1], [0]]),
-                  'num_segments': 2},
+          kwargs={
+              'data': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'segment_ids': ragged_factory_ops.constant_value([[0, 1], [0]]),
+              'num_segments': 2
+          },
           expected=[3, 2]),
       dict(
           op=math_ops.unsorted_segment_mean,
-          kwargs={'data': ragged.constant_value([[1, 2], [3]]),
-                  'segment_ids': ragged.constant_value([[0, 1], [0]]),
-                  'num_segments': 2},
+          kwargs={
+              'data': ragged_factory_ops.constant_value([[1, 2], [3]]),
+              'segment_ids': ragged_factory_ops.constant_value([[0, 1], [0]]),
+              'num_segments': 2
+          },
           expected=[2, 2]),
       dict(
           op=math_ops.unsorted_segment_sqrt_n,
-          kwargs={'data': ragged.constant_value([[1.0, 2.0], [3.0, 4.0, 6.0]]),
-                  'segment_ids': ragged.constant_value([[0, 1], [0, 0, 0]]),
-                  'num_segments': 2},
+          kwargs={
+              'data':
+                  ragged_factory_ops.constant_value([[1.0, 2.0],
+                                                     [3.0, 4.0, 6.0]]),
+              'segment_ids':
+                  ragged_factory_ops.constant_value([[0, 1], [0, 0, 0]]),
+              'num_segments':
+                  2
+          },
           expected=[7.0, 2.0]),
       dict(
           op=math_ops.reduce_sum,
-          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
-                  'axis': 1},
+          kwargs={
+              'input_tensor':
+                  ragged_factory_ops.constant_value([[1, 2], [3, 4, 5]]),
+              'axis':
+                  1
+          },
           expected=[3, 12]),
       dict(
           op=math_ops.reduce_prod,
-          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
-                  'axis': 1},
+          kwargs={
+              'input_tensor':
+                  ragged_factory_ops.constant_value([[1, 2], [3, 4, 5]]),
+              'axis':
+                  1
+          },
           expected=[2, 60]),
       dict(
           op=math_ops.reduce_min,
-          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
-                  'axis': 1},
+          kwargs={
+              'input_tensor':
+                  ragged_factory_ops.constant_value([[1, 2], [3, 4, 5]]),
+              'axis':
+                  1
+          },
           expected=[1, 3]),
       dict(
           op=math_ops.reduce_max,
-          kwargs={'input_tensor': ragged.constant_value([[1, 2], [3, 4, 5]]),
-                  'axis': 1},
+          kwargs={
+              'input_tensor':
+                  ragged_factory_ops.constant_value([[1, 2], [3, 4, 5]]),
+              'axis':
+                  1
+          },
           expected=[2, 5]),
       dict(
           op=math_ops.reduce_mean,
-          kwargs={'input_tensor': ragged.constant_value([[1, 3], [3, 4, 5]]),
-                  'axis': 1},
+          kwargs={
+              'input_tensor':
+                  ragged_factory_ops.constant_value([[1, 3], [3, 4, 5]]),
+              'axis':
+                  1
+          },
           expected=[2, 4]),
       dict(
           op=math_ops.reduce_any,
-          kwargs={'input_tensor': ragged.constant_value([[True, False],
-                                                         [True, True, True]]),
-                  'axis': 1},
+          kwargs={
+              'input_tensor':
+                  ragged_factory_ops.constant_value([[True, False],
+                                                     [True, True, True]]),
+              'axis':
+                  1
+          },
           expected=[True, True]),
       dict(
           op=math_ops.reduce_all,
-          kwargs={'input_tensor': ragged.constant_value([[True, False],
-                                                         [True, True, True]]),
-                  'axis': 1},
+          kwargs={
+              'input_tensor':
+                  ragged_factory_ops.constant_value([[True, False],
+                                                     [True, True, True]]),
+              'axis':
+                  1
+          },
           expected=[False, True]),
   ])
   def testRaggedDispatch(self, op, expected, args=(), kwargs=None):
diff --git a/tensorflow/python/ops/ragged/ragged_eager_test.py b/tensorflow/python/ops/ragged/ragged_eager_test.py
index f1befbf961..86f01aace0 100644
--- a/tensorflow/python/ops/ragged/ragged_eager_test.py
+++ b/tensorflow/python/ops/ragged/ragged_eager_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -35,7 +35,7 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
       dict(pylist=[[[1, 2], [3, 4]], [[5, 6], [], [7, 8]]], ragged_rank=1),
   ])
   def testRaggedTensorToList(self, pylist, ragged_rank=None):
-    rt = ragged.constant(pylist, ragged_rank)
+    rt = ragged_factory_ops.constant(pylist, ragged_rank)
     self.assertRaggedEqual(rt, pylist)
 
   @parameterized.parameters([
@@ -43,7 +43,7 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
       dict(pylist=[[[1, 2], [3]], [[4, 5, 6], [], [7]]]),
   ])
   def testRaggedTensorStr(self, pylist):
-    rt = ragged.constant(pylist)
+    rt = ragged_factory_ops.constant(pylist)
     self.assertEqual(str(rt), '<tf.RaggedTensor %s>' % pylist)
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py b/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py
index 072f330e3c..c747bb3049 100644
--- a/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_expand_dims_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.expand_dims."""
+"""Tests for ragged_array_ops.expand_dims."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -21,7 +21,8 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -32,7 +33,7 @@ class RaggedExpandDimsOpTest(ragged_test_util.RaggedTensorTestCase,
 
   # An example 4-d ragged tensor with shape [3, (D2), (D3), 2], and the
   # expected result calling for expand_dims on each axis.  c.f. the table of
-  # expected result shapes in the ragged.expand_dims docstring.
+  # expected result shapes in the ragged_array_ops.expand_dims docstring.
   EXAMPLE4D = [[[[1, 1], [2, 2]], [[3, 3]]],
                [],
                [[], [[4, 4], [5, 5], [6, 6]]]]  # pyformat: disable
@@ -113,8 +114,8 @@ class RaggedExpandDimsOpTest(ragged_test_util.RaggedTensorTestCase,
                            expected,
                            ragged_rank=None,
                            expected_shape=None):
-    rt = ragged.constant(rt_input, ragged_rank=ragged_rank)
-    expanded = ragged.expand_dims(rt, axis=axis)
+    rt = ragged_factory_ops.constant(rt_input, ragged_rank=ragged_rank)
+    expanded = ragged_array_ops.expand_dims(rt, axis=axis)
     self.assertEqual(expanded.shape.ndims, rt.shape.ndims + 1)
     if expected_shape is not None:
       self.assertEqual(expanded.shape.as_list(), expected_shape)
diff --git a/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py b/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
index 07cf910202..c6998e274b 100644
--- a/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_from_sparse_op_test.py
@@ -25,7 +25,7 @@ from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.ragged import ragged_test_util
-from tensorflow.python.ops.ragged import RaggedTensor
+from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor
 from tensorflow.python.platform import googletest
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py b/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
index 6a3d639c5e..68d3953f4c 100644
--- a/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_from_tensor_op_test.py
@@ -25,7 +25,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_test_util
-from tensorflow.python.ops.ragged import RaggedTensor
+from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor
 from tensorflow.python.platform import googletest
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
index 6673192752..d4bffeb401 100644
--- a/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_nd_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for tf.ragged.gather_nd."""
+"""Tests for ragged_array_ops.gather_nd."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -26,7 +26,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -45,18 +46,19 @@ class RaggedGatherNdOpTest(ragged_test_util.RaggedTensorTestCase,
       #=========================================================================
       dict(
           descr='Docstring example 1',
-          params=ragged.constant_value(DOCSTRING_PARAMS),
+          params=ragged_factory_ops.constant_value(DOCSTRING_PARAMS),
           indices=[[2], [0]],
-          expected=ragged.constant_value([[[], [b'210']],
-                                          [[b'000', b'001'], [b'010']]])),
+          expected=ragged_factory_ops.constant_value(
+              [[[], [b'210']], [[b'000', b'001'], [b'010']]])),
       dict(
           descr='Docstring example 2',
-          params=ragged.constant_value(DOCSTRING_PARAMS),
+          params=ragged_factory_ops.constant_value(DOCSTRING_PARAMS),
           indices=[[2, 1], [0, 0]],
-          expected=ragged.constant_value([[b'210'], [b'000', b'001']])),
+          expected=ragged_factory_ops.constant_value(
+              [[b'210'], [b'000', b'001']])),
       dict(
           descr='Docstring example 3',
-          params=ragged.constant_value(DOCSTRING_PARAMS),
+          params=ragged_factory_ops.constant_value(DOCSTRING_PARAMS),
           indices=[[0, 0, 1], [1, 1, 2]],
           expected=[b'001', b'112']),
       #=========================================================================
@@ -64,146 +66,157 @@ class RaggedGatherNdOpTest(ragged_test_util.RaggedTensorTestCase,
       #=========================================================================
       dict(
           descr='params: [B1, (B2)], indices: [0], result: [B1, (B2)]',
-          params=ragged.constant_value([['a', 'b', 'c'], ['d']]),
+          params=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d']]),
           indices=np.zeros([0], dtype=np.int32),
-          expected=ragged.constant_value([[b'a', b'b', b'c'], [b'd']])),
+          expected=ragged_factory_ops.constant_value(
+              [[b'a', b'b', b'c'], [b'd']])),
       dict(
           descr='params: [B1, (B2)], indices: [A1, 0], result: [A1, B1, (B2)]',
-          params=ragged.constant_value([['a', 'b', 'c'], ['d']]),
+          params=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d']]),
           indices=np.zeros([3, 0], dtype=np.int32),
-          expected=ragged.constant_value([[[b'a', b'b', b'c'], [b'd']],
-                                          [[b'a', b'b', b'c'], [b'd']],
-                                          [[b'a', b'b', b'c'], [b'd']]])),
+          expected=ragged_factory_ops.constant_value(
+              [[[b'a', b'b', b'c'], [b'd']],
+               [[b'a', b'b', b'c'], [b'd']],
+               [[b'a', b'b', b'c'], [b'd']]])),
       dict(
           descr=('params: [B1, (B2)], indices: [A1, A2, 0], '
                  'result: [A1, A2, B1, (B2)]'),
-          params=ragged.constant_value([['a', 'b', 'c'], ['d']]),
+          params=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d']]),
           indices=np.zeros([1, 3, 0], dtype=np.int32),
-          expected=ragged.constant_value([[[[b'a', b'b', b'c'], [b'd']],
-                                           [[b'a', b'b', b'c'], [b'd']],
-                                           [[b'a', b'b', b'c'], [b'd']]]])),
+          expected=ragged_factory_ops.constant_value(
+              [[[[b'a', b'b', b'c'], [b'd']],
+                [[b'a', b'b', b'c'], [b'd']],
+                [[b'a', b'b', b'c'], [b'd']]]])),
       dict(
           descr='params: [B1], indices: [A1, (A2), 0], result: [A1, (A2), B1]',
           params=['a'],
-          indices=ragged.constant_value([[[], []], [[]]],
-                                        ragged_rank=1,
-                                        dtype=np.int32),
-          expected=ragged.constant_value([[[b'a'], [b'a']], [[b'a']]],
-                                         ragged_rank=1)),
+          indices=ragged_factory_ops.constant_value(
+              [[[], []], [[]]],
+              ragged_rank=1,
+              dtype=np.int32),
+          expected=ragged_factory_ops.constant_value(
+              [[[b'a'], [b'a']], [[b'a']]],
+              ragged_rank=1)),
       #=========================================================================
       # Indices with 1 value (selects row from params)
       #=========================================================================
       dict(
           descr='params: [B1, (B2)], indices: [A1, 1], result: [A1, (B2)]',
-          params=ragged.constant_value([['a', 'b', 'c'], ['d']]),
+          params=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d']]),
           indices=[[1], [0]],
-          expected=ragged.constant_value([[b'd'], [b'a', b'b', b'c']])),
+          expected=ragged_factory_ops.constant_value(
+              [[b'd'], [b'a', b'b', b'c']])),
       dict(
           descr=('params: [B1, (B2), (B3)], indices: [A1, 1], '
                  'result: [A1, (B2), (B3)]'),
-          params=ragged.constant_value([[['a', 'b', 'c'], ['d']],
-                                        [['e', 'f']]]),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b', 'c'], ['d']], [['e', 'f']]]),
           indices=[[1], [1]],
-          expected=ragged.constant_value([[[b'e', b'f']], [[b'e', b'f']]])),
+          expected=ragged_factory_ops.constant_value(
+              [[[b'e', b'f']], [[b'e', b'f']]])),
       dict(
           descr=('params: [B1, B2, B3], indices: [A1, (A2), 1], '
                  'result: [A1, (A2), B2, B3]'),
           params=[[['a']], [['b']]],
-          indices=ragged.constant_value([[[0]]], ragged_rank=1),
-          expected=ragged.constant_value([[[[b'a']]]], ragged_rank=1)),
+          indices=ragged_factory_ops.constant_value([[[0]]], ragged_rank=1),
+          expected=ragged_factory_ops.constant_value(
+              [[[[b'a']]]], ragged_rank=1)),
       #=========================================================================
       # Indices with 2 values (selects row & col from params)
       #=========================================================================
       dict(
           descr='params: [B1, (B2)], indices: [A1, 2], result: [A1]',
-          params=ragged.constant_value([['a', 'b', 'c'], ['d']]),
+          params=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d']]),
           indices=[[1, 0], [0, 0], [0, 2]],
-          expected=ragged.constant_value([b'd', b'a', b'c'])),
+          expected=ragged_factory_ops.constant_value([b'd', b'a', b'c'])),
       dict(
           descr=('params: [B1, (B2), (B3)], indices: [A1, 2], '
                  'result: [A1, (B3)]'),
-          params=ragged.constant_value([[['a', 'b', 'c'], ['d']],
-                                        [['e', 'f']]]),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b', 'c'], ['d']], [['e', 'f']]]),
           indices=[[1, 0], [0, 1], [0, 0]],
-          expected=ragged.constant_value([[b'e', b'f'], [b'd'],
-                                          [b'a', b'b', b'c']])),
+          expected=ragged_factory_ops.constant_value(
+              [[b'e', b'f'], [b'd'], [b'a', b'b', b'c']])),
       dict(
           descr=('params: [B1, (B2), (B3)], indices: [A1, A2, 2], '
                  'result: [A1, (A2), (B3)]'),
-          params=ragged.constant_value([[['a', 'b', 'c'], ['d']],
-                                        [['e', 'f']]]),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b', 'c'], ['d']], [['e', 'f']]]),
           indices=[[[1, 0], [0, 1], [0, 0]]],
-          expected=ragged.constant_value([[[b'e', b'f'], [b'd'],
-                                           [b'a', b'b', b'c']]])),
+          expected=ragged_factory_ops.constant_value(
+              [[[b'e', b'f'], [b'd'], [b'a', b'b', b'c']]])),
       dict(
           descr=('params: [B1, (B2), B3], indices: [A1, A2, 2], '
                  'result: [A1, A2, B3]'),
-          params=ragged.constant_value([[['a', 'b'], ['c', 'd']],
-                                        [['e', 'f']]],
-                                       ragged_rank=1),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b'], ['c', 'd']],
+               [['e', 'f']]],
+              ragged_rank=1),
           indices=[[[1, 0], [0, 1], [0, 0]]],
           expected=[[[b'e', b'f'], [b'c', b'd'], [b'a', b'b']]]),
       dict(
           descr=('params: [B1, (B2), B3], indices: [A1, A2, A3, 2], '
                  'result: [A1, A2, A3, B3]'),
-          params=ragged.constant_value([[['a', 'b'], ['c', 'd']],
-                                        [['e', 'f']]],
-                                       ragged_rank=1),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b'], ['c', 'd']],
+               [['e', 'f']]],
+              ragged_rank=1),
           indices=[[[[1, 0], [0, 1], [0, 0]]]],
           expected=[[[[b'e', b'f'], [b'c', b'd'], [b'a', b'b']]]]),
       dict(
           descr=('params: [B1, (B2), (B3)], indices: [A1, (A2), 2], '
                  'result: [A1, (A2), (B3)]'),
-          params=ragged.constant_value([[['a', 'b', 'c'], ['d']],
-                                        [['e', 'f']]]),
-          indices=ragged.constant_value([[[1, 0], [0, 1]], [[0, 0]]],
-                                        ragged_rank=1),
-          expected=ragged.constant_value([[[b'e', b'f'], [b'd']],
-                                          [[b'a', b'b', b'c']]])),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b', 'c'], ['d']], [['e', 'f']]]),
+          indices=ragged_factory_ops.constant_value(
+              [[[1, 0], [0, 1]], [[0, 0]]],
+              ragged_rank=1),
+          expected=ragged_factory_ops.constant_value(
+              [[[b'e', b'f'], [b'd']], [[b'a', b'b', b'c']]])),
       #=========================================================================
       # Indices with 3 values
       #=========================================================================
       dict(
           descr=('params: [B1, (B2), (B3)], indices: [A1, 3], '
                  'result: [A1]'),
-          params=ragged.constant_value([[['a', 'b', 'c'], ['d']],
-                                        [['e', 'f']]]),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b', 'c'], ['d']], [['e', 'f']]]),
           indices=[[1, 0, 1], [0, 0, 0], [0, 1, 0]],
           expected=[b'f', b'a', b'd']),
       dict(
           descr=('params: [B1, (B2), B3], indices: [A1, 3], '
                  'result: [A1]'),
-          params=ragged.constant_value([[['a', 'b'], ['c', 'd']],
-                                        [['e', 'f']]],
-                                       ragged_rank=1),
+          params=ragged_factory_ops.constant_value(
+              [[['a', 'b'], ['c', 'd']], [['e', 'f']]],
+              ragged_rank=1),
           indices=[[1, 0, 1], [0, 0, 0], [0, 1, 1]],
           expected=[b'f', b'a', b'd']),
       dict(
           descr=('params: [B1, (B2), (B3), B4], indices: [A1, 3], '
                  'result: [A1, B4]'),
-          params=ragged.constant_value([[[['a', 'b'], ['c', 'd']],
-                                         [['e', 'f']]]],
-                                       ragged_rank=2),
+          params=ragged_factory_ops.constant_value(
+              [[[['a', 'b'], ['c', 'd']], [['e', 'f']]]],
+              ragged_rank=2),
           indices=[[0, 0, 1], [0, 0, 0], [0, 1, 0]],
           expected=[[b'c', b'd'], [b'a', b'b'], [b'e', b'f']]),
   ])  # pyformat: disable
   def testRaggedGatherNd(self, descr, params, indices, expected):
-    result = ragged.gather_nd(params, indices)
+    result = ragged_array_ops.gather_nd(params, indices)
     self.assertRaggedEqual(result, expected)
 
   def testRaggedGatherNdUnknownRankError(self):
     if context.executing_eagerly():
       return
-    params = ragged.constant([['a', 'b'], ['c', 'd']])
+    params = ragged_factory_ops.constant([['a', 'b'], ['c', 'd']])
     indices1 = array_ops.placeholder(dtypes.int32, shape=None)
     indices2 = array_ops.placeholder(dtypes.int32, shape=[None])
 
     with self.assertRaisesRegexp(ValueError,
                                  'indices.rank be statically known.'):
-      ragged.gather_nd(params, indices1)
+      ragged_array_ops.gather_nd(params, indices1)
     with self.assertRaisesRegexp(
         ValueError, r'indices.shape\[-1\] must be statically known.'):
-      ragged.gather_nd(params, indices2)
+      ragged_array_ops.gather_nd(params, indices2)
 
   @parameterized.parameters([
       dict(
@@ -211,12 +224,12 @@ class RaggedGatherNdOpTest(ragged_test_util.RaggedTensorTestCase,
           indices=0,
           error=(ValueError, errors.InvalidArgumentError)),
       dict(
-          params=ragged.constant_value([['a']]),
+          params=ragged_factory_ops.constant_value([['a']]),
           indices=0,
           message='indices.rank must be at least 1.'),
       dict(
           params=['a', 'b', 'c'],
-          indices=ragged.constant_value([[0]]),
+          indices=ragged_factory_ops.constant_value([[0]]),
           message='The innermost dimension of indices may not be ragged'),
   ])
   def testRaggedGatherNdStaticError(self,
@@ -225,7 +238,7 @@ class RaggedGatherNdOpTest(ragged_test_util.RaggedTensorTestCase,
                                     message=None,
                                     error=ValueError):
     with self.assertRaisesRegexp(error, message):
-      ragged.gather_nd(params, indices)
+      ragged_array_ops.gather_nd(params, indices)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_gather_op_test.py b/tensorflow/python/ops/ragged/ragged_gather_op_test.py
index 42efdc8a7d..9914b56448 100644
--- a/tensorflow/python/ops/ragged/ragged_gather_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_gather_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.gather."""
+"""Tests for ragged_array_ops.gather."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -25,7 +25,8 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -36,74 +37,79 @@ class RaggedGatherOpTest(ragged_test_util.RaggedTensorTestCase):
   def testDocStringExamples(self):
     params = constant_op.constant(['a', 'b', 'c', 'd', 'e'])
     indices = constant_op.constant([3, 1, 2, 1, 0])
-    ragged_params = ragged.constant([['a', 'b', 'c'], ['d'], [], ['e']])
-    ragged_indices = ragged.constant([[3, 1, 2], [1], [], [0]])
+    ragged_params = ragged_factory_ops.constant([['a', 'b', 'c'], ['d'], [],
+                                                 ['e']])
+    ragged_indices = ragged_factory_ops.constant([[3, 1, 2], [1], [], [0]])
     self.assertRaggedEqual(
-        ragged.gather(params, ragged_indices),
+        ragged_array_ops.gather(params, ragged_indices),
         [[b'd', b'b', b'c'], [b'b'], [], [b'a']])
     self.assertRaggedEqual(
-        ragged.gather(ragged_params, indices),
+        ragged_array_ops.gather(ragged_params, indices),
         [[b'e'], [b'd'], [], [b'd'], [b'a', b'b', b'c']])
     self.assertRaggedEqual(
-        ragged.gather(ragged_params, ragged_indices),
+        ragged_array_ops.gather(ragged_params, ragged_indices),
         [[[b'e'], [b'd'], []], [[b'd']], [], [[b'a', b'b', b'c']]])
 
   def testTensorParamsAndTensorIndices(self):
     params = ['a', 'b', 'c', 'd', 'e']
     indices = [2, 0, 2, 1]
     self.assertRaggedEqual(
-        ragged.gather(params, indices), [b'c', b'a', b'c', b'b'])
-    self.assertIsInstance(ragged.gather(params, indices), ops.Tensor)
+        ragged_array_ops.gather(params, indices), [b'c', b'a', b'c', b'b'])
+    self.assertIsInstance(ragged_array_ops.gather(params, indices), ops.Tensor)
 
   def testRaggedParamsAndTensorIndices(self):
-    params = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
+    params = ragged_factory_ops.constant([['a', 'b'], ['c', 'd', 'e'], ['f'],
+                                          [], ['g']])
     indices = [2, 0, 2, 1]
     self.assertRaggedEqual(
-        ragged.gather(params, indices),
+        ragged_array_ops.gather(params, indices),
         [[b'f'], [b'a', b'b'], [b'f'], [b'c', b'd', b'e']])
 
   def testTensorParamsAndRaggedIndices(self):
     params = ['a', 'b', 'c', 'd', 'e']
-    indices = ragged.constant([[2, 1], [1, 2, 0], [3]])
+    indices = ragged_factory_ops.constant([[2, 1], [1, 2, 0], [3]])
     self.assertRaggedEqual(
-        ragged.gather(params, indices),
+        ragged_array_ops.gather(params, indices),
         [[b'c', b'b'], [b'b', b'c', b'a'], [b'd']])
 
   def testRaggedParamsAndRaggedIndices(self):
-    params = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
-    indices = ragged.constant([[2, 1], [1, 2, 0], [3]])
+    params = ragged_factory_ops.constant([['a', 'b'], ['c', 'd', 'e'], ['f'],
+                                          [], ['g']])
+    indices = ragged_factory_ops.constant([[2, 1], [1, 2, 0], [3]])
     self.assertRaggedEqual(
-        ragged.gather(params, indices),
+        ragged_array_ops.gather(params, indices),
         [[[b'f'], [b'c', b'd', b'e']],                # [[p[2], p[1]      ],
          [[b'c', b'd', b'e'], [b'f'], [b'a', b'b']],  #  [p[1], p[2], p[0]],
          [[]]]                                        #  [p[3]            ]]
     )  # pyformat: disable
 
   def testRaggedParamsAndScalarIndices(self):
-    params = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
+    params = ragged_factory_ops.constant([['a', 'b'], ['c', 'd', 'e'], ['f'],
+                                          [], ['g']])
     indices = 1
-    self.assertRaggedEqual(ragged.gather(params, indices), [b'c', b'd', b'e'])
+    self.assertRaggedEqual(
+        ragged_array_ops.gather(params, indices), [b'c', b'd', b'e'])
 
   def test3DRaggedParamsAnd2DTensorIndices(self):
-    params = ragged.constant([[['a', 'b'], []], [['c', 'd'], ['e'], ['f']],
-                              [['g']]])
+    params = ragged_factory_ops.constant([[['a', 'b'], []],
+                                          [['c', 'd'], ['e'], ['f']], [['g']]])
     indices = [[1, 2], [0, 1], [2, 2]]
     self.assertRaggedEqual(
-        ragged.gather(params, indices),
+        ragged_array_ops.gather(params, indices),
         [[[[b'c', b'd'], [b'e'], [b'f']], [[b'g']]],            # [[p1, p2],
          [[[b'a', b'b'], []], [[b'c', b'd'], [b'e'], [b'f']]],  #  [p0, p1],
          [[[b'g']], [[b'g']]]]                                  #  [p2, p2]]
     )  # pyformat: disable
 
   def testTensorParamsAnd4DRaggedIndices(self):
-    indices = ragged.constant(
+    indices = ragged_factory_ops.constant(
         [[[[3, 4], [0, 6]], []], [[[2, 1], [1, 0]], [[2, 5]], [[2, 3]]],
          [[[1, 0]]]],  # pyformat: disable
         ragged_rank=2,
         inner_shape=(2,))
     params = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
     self.assertRaggedEqual(
-        ragged.gather(params, indices),
+        ragged_array_ops.gather(params, indices),
         [[[[b'd', b'e'], [b'a', b'g']], []],
          [[[b'c', b'b'], [b'b', b'a']], [[b'c', b'f']], [[b'c', b'd']]],
          [[[b'b', b'a']]]])  # pyformat: disable
@@ -111,27 +117,27 @@ class RaggedGatherOpTest(ragged_test_util.RaggedTensorTestCase):
   def testOutOfBoundsError(self):
     tensor_params = ['a', 'b', 'c']
     tensor_indices = [0, 1, 2]
-    ragged_params = ragged.constant([['a', 'b'], ['c']])
-    ragged_indices = ragged.constant([[0, 3]])
+    ragged_params = ragged_factory_ops.constant([['a', 'b'], ['c']])
+    ragged_indices = ragged_factory_ops.constant([[0, 3]])
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  r'indices\[1\] = 3 is not in \[0, 3\)'):
-      self.evaluate(ragged.gather(tensor_params, ragged_indices))
+      self.evaluate(ragged_array_ops.gather(tensor_params, ragged_indices))
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  r'indices\[2\] = 2 is not in \[0, 2\)'):
-      self.evaluate(ragged.gather(ragged_params, tensor_indices))
+      self.evaluate(ragged_array_ops.gather(ragged_params, tensor_indices))
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  r'indices\[1\] = 3 is not in \[0, 2\)'):
-      self.evaluate(ragged.gather(ragged_params, ragged_indices))
+      self.evaluate(ragged_array_ops.gather(ragged_params, ragged_indices))
 
   def testUnknownIndicesRankError(self):
     if context.executing_eagerly():
       return
-    params = ragged.constant([], ragged_rank=1)
+    params = ragged_factory_ops.constant([], ragged_rank=1)
     indices = constant_op.constant([0], dtype=dtypes.int64)
     indices = array_ops.placeholder_with_default(indices, None)
     self.assertRaisesRegexp(ValueError,
                             r'indices\.shape\.ndims must be known statically',
-                            ragged.gather, params, indices)
+                            ragged_array_ops.gather, params, indices)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
index 45e60ff492..e9a7cdf6c0 100644
--- a/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_flat_values_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.map_flat_values."""
+"""Tests for ragged_functional_ops.map_flat_values."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -24,7 +24,9 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_functional_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -38,66 +40,66 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
                                         args=(),
                                         kwargs=None):
     kwargs = kwargs or {}
-    result = ragged.map_flat_values(op, *args, **kwargs)
+    result = ragged_functional_ops.map_flat_values(op, *args, **kwargs)
     self.assertRaggedEqual(result, expected)
 
   def testDocStringExamples(self):
     """Test the examples in apply_op_to_ragged_values.__doc__."""
-    rt = ragged.constant([[1, 2, 3], [], [4, 5], [6]])
-    v1 = ragged.map_flat_values(array_ops.ones_like, rt)
-    v2 = ragged.map_flat_values(math_ops.multiply, rt, rt)
-    v3 = ragged.map_flat_values(math_ops.add, rt, 5)
+    rt = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5], [6]])
+    v1 = ragged_functional_ops.map_flat_values(array_ops.ones_like, rt)
+    v2 = ragged_functional_ops.map_flat_values(math_ops.multiply, rt, rt)
+    v3 = ragged_functional_ops.map_flat_values(math_ops.add, rt, 5)
     self.assertRaggedEqual(v1, [[1, 1, 1], [], [1, 1], [1]])
     self.assertRaggedEqual(v2, [[1, 4, 9], [], [16, 25], [36]])
     self.assertRaggedEqual(v3, [[6, 7, 8], [], [9, 10], [11]])
 
   def testOpWithSingleRaggedTensorArg(self):
-    tensor = ragged.constant([[1, 2, 3], [], [4, 5]])
+    tensor = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
         op=array_ops.zeros_like,
         args=(tensor,),
         expected=[[0, 0, 0], [], [0, 0]])
 
   def testOpWithTwoRaggedTensorArgs(self):
-    x = ragged.constant([[3, 1, 4], [], [1, 5]])
-    y = ragged.constant([[1, 2, 3], [], [4, 5]])
+    x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]])
+    y = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply, args=(x, y), expected=[[3, 2, 12], [], [4, 25]])
 
   def testOpWithRaggedTensorAndScalarArgs(self):
-    y = ragged.constant([[1, 2, 3], [], [4, 5]])
+    y = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply, args=(5, y), expected=[[5, 10, 15], [], [20, 25]])
 
   def testOpWithThreeRaggedTensorArgs(self):
-    condition = ragged.constant(
+    condition = ragged_factory_ops.constant(
         [[True, True, False], [], [True, False]])  # pyformat: disable
-    x = ragged.constant([['a', 'b', 'c'], [], ['d', 'e']])
-    y = ragged.constant([['A', 'B', 'C'], [], ['D', 'E']])
+    x = ragged_factory_ops.constant([['a', 'b', 'c'], [], ['d', 'e']])
+    y = ragged_factory_ops.constant([['A', 'B', 'C'], [], ['D', 'E']])
     self.assertRaggedMapInnerValuesReturns(
         op=array_ops.where,
         args=(condition, x, y),
         expected=[[b'a', b'b', b'C'], [], [b'd', b'E']])
 
   def testOpWithRaggedTensorListArg(self):
-    x = ragged.constant([[1, 2, 3], [], [4, 5]])
-    y = ragged.constant([[10, 20, 30], [], [40, 50]])
+    x = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5]])
+    y = ragged_factory_ops.constant([[10, 20, 30], [], [40, 50]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.add_n,
         args=([x, y, x],),
         expected=[[12, 24, 36], [], [48, 60]])
 
   def testOpWithKeywordArgs(self):
-    x = ragged.constant([[3, 1, 4], [], [1, 5]])
-    y = ragged.constant([[1, 2, 3], [], [4, 5]])
+    x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]])
+    y = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply,
         kwargs=dict(x=x, y=y),
         expected=[[3, 2, 12], [], [4, 25]])
 
   def testOpWithMixedPositionalAndKeywordArgs(self):
-    x = ragged.constant([[3, 1, 4], [], [1, 5]])
-    y = ragged.constant([[1, 2, 3], [], [4, 5]])
+    x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]])
+    y = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply,
         args=(x,),
@@ -105,7 +107,7 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
         expected=[[3, 2, 12], [], [4, 25]])
 
   def testNonElementWiseOp(self):
-    x = ragged.constant(
+    x = ragged_factory_ops.constant(
         [[[3, 1, 4], [1, 5, 9], [2, 6, 5]], [], [[3, 5, 8], [9, 7, 9]]],
         ragged_rank=1)
     self.assertRaggedMapInnerValuesReturns(
@@ -124,16 +126,18 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
         math_ops.multiply(x0, y0), [3, 2, 12, 4, 25, 54, 14, 48, 45])
 
     # ragged_rank=1
-    x1 = ragged.constant([[3, 1, 4], [], [1, 5], [9, 2], [6, 5]])
-    y1 = ragged.constant([[1, 2, 3], [], [4, 5], [6, 7], [8, 9]])
+    x1 = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5], [9, 2], [6, 5]])
+    y1 = ragged_factory_ops.constant([[1, 2, 3], [], [4, 5], [6, 7], [8, 9]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply,
         args=(x1, y1),
         expected=[[3, 2, 12], [], [4, 25], [54, 14], [48, 45]])
 
     # ragged_rank=2
-    x2 = ragged.constant([[[3, 1, 4]], [], [[], [1, 5]], [[9, 2], [6, 5]]])
-    y2 = ragged.constant([[[1, 2, 3]], [], [[], [4, 5]], [[6, 7], [8, 9]]])
+    x2 = ragged_factory_ops.constant([[[3, 1, 4]], [], [[], [1, 5]],
+                                      [[9, 2], [6, 5]]])
+    y2 = ragged_factory_ops.constant([[[1, 2, 3]], [], [[], [4, 5]],
+                                      [[6, 7], [8, 9]]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply,
         args=(x2, y2),
@@ -144,10 +148,10 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
                  ])  # pyformat: disable
 
     # ragged_rank=3
-    x3 = ragged.constant([[[[3, 1, 4]], []], [], [[[], [1, 5]]],
-                          [[[9, 2], [6, 5]]]])
-    y3 = ragged.constant([[[[1, 2, 3]], []], [], [[[], [4, 5]]],
-                          [[[6, 7], [8, 9]]]])
+    x3 = ragged_factory_ops.constant([[[[3, 1, 4]], []], [], [[[], [1, 5]]],
+                                      [[[9, 2], [6, 5]]]])
+    y3 = ragged_factory_ops.constant([[[[1, 2, 3]], []], [], [[[], [4, 5]]],
+                                      [[[6, 7], [8, 9]]]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply,
         args=(x3, y3),
@@ -159,8 +163,8 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
         ])  # pyformat: disable
 
   def testOpWithRaggedRankThree(self):
-    x = ragged.constant([[[3, 1, 4]], [], [[], [1, 5]]])
-    y = ragged.constant([[[1, 2, 3]], [], [[], [4, 5]]])
+    x = ragged_factory_ops.constant([[[3, 1, 4]], [], [[], [1, 5]]])
+    y = ragged_factory_ops.constant([[[1, 2, 3]], [], [[], [4, 5]]])
     self.assertRaggedMapInnerValuesReturns(
         op=math_ops.multiply,
         args=(x, y),
@@ -173,29 +177,30 @@ class RaggedMapInnerValuesOpTest(ragged_test_util.RaggedTensorTestCase):
         op=math_ops.multiply, args=(x, y), expected=[[2, 4], [6, 8], [10, 12]])
 
   def testRaggedTensorSplitsRaggedRankMismatchError(self):
-    x = ragged.constant([[3, 1, 4], [], [1, 5]])
-    y = ragged.constant([[[3, 1, 4], []], [], [[1, 5]]])
-    self.assertRaisesRegexp(ValueError,
-                            r'Inputs must have identical ragged splits.*',
-                            ragged.map_flat_values, math_ops.add, x, y)
+    x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]])
+    y = ragged_factory_ops.constant([[[3, 1, 4], []], [], [[1, 5]]])
+    self.assertRaisesRegexp(
+        ValueError, r'Inputs must have identical ragged splits.*',
+        ragged_functional_ops.map_flat_values, math_ops.add, x, y)
 
   def testRaggedTensorSplitsValueMismatchError(self):
-    x = ragged.constant([[3, 1, 4], [], [1, 5]])
-    y = ragged.constant([[1], [2, 3], [4, 5]])
+    x = ragged_factory_ops.constant([[3, 1, 4], [], [1, 5]])
+    y = ragged_factory_ops.constant([[1], [2, 3], [4, 5]])
     self.assertRaisesRegexp(errors.InvalidArgumentError,
                             r'Inputs must have identical ragged splits.*',
-                            ragged.map_flat_values, math_ops.add, x, y)
+                            ragged_functional_ops.map_flat_values, math_ops.add,
+                            x, y)
 
   def testRaggedTensorSplitsMismatchErrorAtRuntime(self):
     splits1 = array_ops.placeholder_with_default(
         constant_op.constant([0, 3, 3, 5], dtypes.int64), None)
     splits2 = array_ops.placeholder_with_default(
         constant_op.constant([0, 1, 3, 5], dtypes.int64), None)
-    x = ragged.RaggedTensor.from_row_splits([3, 1, 4, 1, 5], splits1)
-    y = ragged.RaggedTensor.from_row_splits([1, 2, 3, 4, 5], splits2)
+    x = ragged_tensor.RaggedTensor.from_row_splits([3, 1, 4, 1, 5], splits1)
+    y = ragged_tensor.RaggedTensor.from_row_splits([1, 2, 3, 4, 5], splits2)
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  r'.*Inputs must have identical ragged splits'):
-      self.evaluate(ragged.map_flat_values(math_ops.add, x, y))
+      self.evaluate(ragged_functional_ops.map_flat_values(math_ops.add, x, y))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
index 171cb347de..15206404b2 100644
--- a/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_map_fn_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.map_fn."""
+"""Tests for ragged_map_ops.map_fn."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -26,8 +26,12 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.keras import backend
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops as mo
-from tensorflow.python.ops import ragged
 from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_functional_ops
+from tensorflow.python.ops.ragged import ragged_map_ops
+from tensorflow.python.ops.ragged import ragged_math_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -66,7 +70,7 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
           elems=[[1, 2, 3], [4, 5], [6, 7]],
           expected_output=[[2, 3, 4], [5, 6], [7, 8]],
           dtype=dtypes.int64,
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1, (d2), d3] -> [d1, (d2), d3]
@@ -75,45 +79,45 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
           elems=[[[1, 2], [3, 4]], [], [[5, 6], [7, 8], [9, 0]]],
           elems_ragged_rank=1,
           expected_ragged_rank=1,
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=1),
           expected_output=[[[2, 3], [4, 5]], [], [[6, 7], [8, 9], [10, 1]]],
       ),
       # [d1, (d2)] -> [d1, (d2), (d3)]
       dict(
-          fn=lambda x: ragged.RaggedTensor.from_row_starts(x, [0]),
+          fn=lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0]),
           elems=[[1, 2, 3], [4, 5], [6, 7]],
           expected_output=[[[1, 2, 3]], [[4, 5]], [[6, 7]]],
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=2),
       ),
       # [d1, (d2), (d3)] -> [d1, (d2), (d3)]
       dict(
-          fn=lambda x: ragged.map_flat_values(mo.add, x, 1),
+          fn=lambda x: ragged_functional_ops.map_flat_values(mo.add, x, 1),
           elems=[[[1, 2, 3]], [[4, 5], [6, 7]]],
           expected_output=[[[2, 3, 4]], [[5, 6], [7, 8]]],
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=2),
       ),
       # [d1, (d2), (d3)] -> [d1, (d2)]
       dict(
-          fn=lambda x: ragged.reduce_sum(x, axis=1),
+          fn=lambda x: ragged_math_ops.reduce_sum(x, axis=1),
           elems=[[[1, 2, 3]], [[4, 5], [6, 7]]],
           expected_output=[[6], [9, 13]],
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1, (d2), (d3)] -> [d1, (d3)]
       dict(
-          fn=lambda x: ragged.reduce_sum(x, axis=0),
+          fn=lambda x: ragged_math_ops.reduce_sum(x, axis=0),
           elems=[[[1, 2, 3]], [[4, 5], [6, 7]]],
           expected_output=[[1, 2, 3], [10, 12]],
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1, (d2), (d3)] -> [d1]
       dict(
-          fn=ragged.reduce_sum,
+          fn=ragged_math_ops.reduce_sum,
           elems=[[[1, 2, 3]], [[4, 5], [6, 7]]],
           expected_output=[6, 22],
           result_dtype=dtypes.int64,
@@ -123,16 +127,16 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
           fn=mo.range,
           elems=[4, 0, 2],
           expected_output=[[0, 1, 2, 3], [], [0, 1]],
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=1),
       ),
       # [d1] -> [d1, (d2), (d3)]
       dict(
-          fn=lambda x: ragged.range(mo.range(x)),
+          fn=lambda x: ragged_math_ops.range(mo.range(x)),
           elems=[5, 0, 3],
           expected_output=[[[], [0], [0, 1], [0, 1, 2], [0, 1, 2, 3]], [],
                            [[], [0], [0, 1]]],
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=2),
       ),
       # [d1, (d2), (d3), (d4a), (d5)] ->  [d1, (d2), (d3), (d4b), (d5)]
@@ -141,7 +145,7 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
           elems=[[[[[1, 2, 3]], [[4], [5]]]], [[[[6, 7]]], [[[8], []]]]],
           expected_output=[[[[[2, 3, 4]], [[5], [6]]]], [[[[7, 8]]], [[[9],
                                                                        []]]]],
-          result_dtype=ragged.RaggedTensorType(
+          result_dtype=ragged_tensor.RaggedTensorType(
               dtype=dtypes.int64, ragged_rank=4),
       ),
   ])
@@ -158,25 +162,25 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
       result_dtype=None,
       infer_shape=False,
   ):
-    elems = ragged.constant(elems, dtype, elems_ragged_rank)
-    output = ragged.map_fn(
+    elems = ragged_factory_ops.constant(elems, dtype, elems_ragged_rank)
+    output = ragged_map_ops.map_fn(
         fn=fn, elems=elems, dtype=result_dtype, infer_shape=infer_shape)
 
-    expected_rt = ragged.constant(
+    expected_rt = ragged_factory_ops.constant(
         expected_output, ragged_rank=expected_ragged_rank)
     self.assertRaggedEqual(expected_rt, output)
 
   def testRaggedMapOnStructure(self):
-    batman = ragged.constant([[1, 2, 3], [4], [5, 6, 7]])
+    batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
     # [[10, 20, 30], [40], [50, 60, 70]]
-    robin = ragged.map_flat_values(mo.multiply, batman, 10)
+    robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)
 
     features = {'batman': batman, 'robin': robin}
 
     def _reduce_sum_from_all(f):
       return mo.reduce_sum(f['batman']) + mo.reduce_sum(f['robin'])
 
-    output = ragged.map_fn(
+    output = ragged_map_ops.map_fn(
         fn=_reduce_sum_from_all,
         elems=features,
         dtype=dtypes.int32,
@@ -186,9 +190,9 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
 
   # Test mapping over a dict of RTs can produce a dict of RTs.
   def testRaggedMapOnStructure_RaggedOutputs(self):
-    batman = ragged.constant([[1, 2, 3], [4], [5, 6, 7]])
+    batman = ragged_factory_ops.constant([[1, 2, 3], [4], [5, 6, 7]])
     # [[10, 20, 30], [40], [50, 60, 70]]
-    robin = ragged.map_flat_values(mo.multiply, batman, 10)
+    robin = ragged_functional_ops.map_flat_values(mo.multiply, batman, 10)
 
     features = {'batman': batman, 'robin': robin}
 
@@ -198,15 +202,17 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
           'robin': f['robin'] + 1,
       }
 
-    output = ragged.map_fn(
+    output = ragged_map_ops.map_fn(
         fn=_increment,
         elems=features,
         infer_shape=False,
         dtype={
             'batman':
-                ragged.RaggedTensorType(dtype=dtypes.int32, ragged_rank=1),
+                ragged_tensor.RaggedTensorType(
+                    dtype=dtypes.int32, ragged_rank=1),
             'robin':
-                ragged.RaggedTensorType(dtype=dtypes.int32, ragged_rank=1)
+                ragged_tensor.RaggedTensorType(
+                    dtype=dtypes.int32, ragged_rank=1)
         },
     )
 
@@ -214,8 +220,8 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
     self.assertRaggedEqual(output['robin'], [[11, 21, 31], [41], [51, 61, 71]])
 
   def testZip(self):
-    x = ragged.constant([[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]],
-                        dtypes.int64)
+    x = ragged_factory_ops.constant(
+        [[10, 20], [30, 40], [50, 60], [70], [80, 90, 100]], dtypes.int64)
     y = array_ops.expand_dims(mo.range(x.nrows(), dtype=dtypes.int64), axis=1)
 
     def _zip(foo):
@@ -223,9 +229,9 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
       bar = backend.tile(y_val, array_ops.shape(x_val))
       return array_ops.stack([bar, x_val], axis=1)
 
-    output = ragged.map_fn(
+    output = ragged_map_ops.map_fn(
         _zip, (y, x),
-        dtype=ragged.RaggedTensorType(dtype=dtypes.int64, ragged_rank=1),
+        dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.int64, ragged_rank=1),
         infer_shape=False)
 
     self.assertRaggedEqual(
@@ -233,43 +239,46 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
                  [[3, 70]], [[4, 80], [4, 90], [4, 100]]])
 
   def testBatchGather(self):
-    tokens = ragged.constant([['hello', '.', 'there'], ['merhaba'],
-                              ['bonjour', '.', 'ca va', '?']])
-    indices = ragged.constant([[0, 2], [0], [0, 2]])
+    tokens = ragged_factory_ops.constant([['hello', '.', 'there'], ['merhaba'],
+                                          ['bonjour', '.', 'ca va', '?']])
+    indices = ragged_factory_ops.constant([[0, 2], [0], [0, 2]])
 
     def gather(x):
       tokens_val, indices_val = x
       return array_ops.gather(tokens_val, indices_val)
 
     data = tokens, indices
-    out = ragged.map_fn(
+    out = ragged_map_ops.map_fn(
         gather,
         data,
-        dtype=ragged.RaggedTensorType(dtype=dtypes.string, ragged_rank=1),
+        dtype=ragged_tensor.RaggedTensorType(
+            dtype=dtypes.string, ragged_rank=1),
         infer_shape=False)
 
     self.assertRaggedEqual(
         out, [[b'hello', b'there'], [b'merhaba'], [b'bonjour', b'ca va']])
 
   def testMismatchRaggedRank(self):
-    elems = ragged.constant([[[1, 2, 3]], [[4, 5], [6, 7]]])
-    fn = lambda x: ragged.reduce_sum(x, axis=0)
+    elems = ragged_factory_ops.constant([[[1, 2, 3]], [[4, 5], [6, 7]]])
+    fn = lambda x: ragged_math_ops.reduce_sum(x, axis=0)
     with self.assertRaisesWithLiteralMatch(
         ValueError, r'The declared ragged rank (23) mismatches the result (1)'):
-      _ = ragged.map_fn(
+      _ = ragged_map_ops.map_fn(
           fn,
           elems,
-          dtype=ragged.RaggedTensorType(dtype=dtypes.int64, ragged_rank=23))
+          dtype=ragged_tensor.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=23))
 
   def testMismatchRaggedRank2(self):
-    elems = ragged.constant([[1, 2, 3], [4, 5], [6, 7]])
-    fn = lambda x: ragged.RaggedTensor.from_row_starts(x, [0])
+    elems = ragged_factory_ops.constant([[1, 2, 3], [4, 5], [6, 7]])
+    fn = lambda x: ragged_tensor.RaggedTensor.from_row_starts(x, [0])
     with self.assertRaisesWithLiteralMatch(
         ValueError, r'The declared ragged rank (10) mismatches the result (1)'):
-      _ = ragged.map_fn(
+      _ = ragged_map_ops.map_fn(
           fn,
           elems,
-          dtype=ragged.RaggedTensorType(dtype=dtypes.int64, ragged_rank=10))
+          dtype=ragged_tensor.RaggedTensorType(
+              dtype=dtypes.int64, ragged_rank=10))
 
   def testMapOnSparseTensor(self):
     s = sparse_tensor.SparseTensor(
@@ -277,8 +286,8 @@ class RaggedMapOpTest(ragged_test_util.RaggedTensorTestCase,
         values=[0, 5, 0, 4],
         dense_shape=[2, 2],
     )
-    t2 = ragged.RaggedTensor.from_sparse(s)
-    id_t2 = ragged.map_fn(
+    t2 = ragged_tensor.RaggedTensor.from_sparse(s)
+    id_t2 = ragged_map_ops.map_fn(
         lambda x: x, t2,
     )
     self.assertRaggedEqual(id_t2, [[0, 5], [0, 4]])
diff --git a/tensorflow/python/ops/ragged/ragged_operators_test.py b/tensorflow/python/ops/ragged/ragged_operators_test.py
index 78bb37c341..d1c6b902f2 100644
--- a/tensorflow/python/ops/ragged/ragged_operators_test.py
+++ b/tensorflow/python/ops/ragged/ragged_operators_test.py
@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -28,16 +28,16 @@ from tensorflow.python.platform import googletest
 class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase):
 
   def testOrderingOperators(self):
-    x = ragged.constant([[1, 5], [3]])
-    y = ragged.constant([[4, 5], [1]])
+    x = ragged_factory_ops.constant([[1, 5], [3]])
+    y = ragged_factory_ops.constant([[4, 5], [1]])
     self.assertRaggedEqual((x > y), [[False, False], [True]])
     self.assertRaggedEqual((x >= y), [[False, True], [True]])
     self.assertRaggedEqual((x < y), [[True, False], [False]])
     self.assertRaggedEqual((x <= y), [[True, True], [False]])
 
   def testArithmeticOperators(self):
-    x = ragged.constant([[1.0, -2.0], [8.0]])
-    y = ragged.constant([[4.0, 4.0], [2.0]])
+    x = ragged_factory_ops.constant([[1.0, -2.0], [8.0]])
+    y = ragged_factory_ops.constant([[4.0, 4.0], [2.0]])
     self.assertRaggedEqual(abs(x), [[1.0, 2.0], [8.0]])
 
     self.assertRaggedEqual((-x), [[-1.0, 2.0], [-8.0]])
@@ -70,8 +70,8 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase):
     self.assertRaggedEqual((x % 2.0), [[1.0, 0.0], [0.0]])
 
   def testLogicalOperators(self):
-    a = ragged.constant([[True, True], [False]])
-    b = ragged.constant([[True, False], [False]])
+    a = ragged_factory_ops.constant([[True, True], [False]])
+    b = ragged_factory_ops.constant([[True, False], [False]])
     self.assertRaggedEqual((~a), [[False, False], [True]])
 
     self.assertRaggedEqual((a & b), [[True, False], [False]])
@@ -87,7 +87,7 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase):
     self.assertRaggedEqual((True ^ b), [[False, True], [True]])
 
   def testDummyOperators(self):
-    a = ragged.constant([[True, True], [False]])
+    a = ragged_factory_ops.constant([[True, True], [False]])
     with self.assertRaisesRegexp(TypeError,
                                  'RaggedTensor may not be used as a boolean.'):
       bool(a)
diff --git a/tensorflow/python/ops/ragged/ragged_range_op_test.py b/tensorflow/python/ops/ragged/ragged_range_op_test.py
index 5ab3d4abc3..afe5866cff 100644
--- a/tensorflow/python/ops/ragged/ragged_range_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_range_op_test.py
@@ -20,7 +20,7 @@ from __future__ import print_function
 
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_math_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -30,32 +30,32 @@ class RaggedRangeOpTest(ragged_test_util.RaggedTensorTestCase):
 
   def testDocStringExamples(self):
     """Examples from ragged_range.__doc__."""
-    rt1 = ragged.range([3, 5, 2])
+    rt1 = ragged_math_ops.range([3, 5, 2])
     self.assertRaggedEqual(rt1, [[0, 1, 2], [0, 1, 2, 3, 4], [0, 1]])
 
-    rt2 = ragged.range([0, 5, 8], [3, 3, 12])
+    rt2 = ragged_math_ops.range([0, 5, 8], [3, 3, 12])
     self.assertRaggedEqual(rt2, [[0, 1, 2], [], [8, 9, 10, 11]])
 
-    rt3 = ragged.range([0, 5, 8], [3, 3, 12], 2)
+    rt3 = ragged_math_ops.range([0, 5, 8], [3, 3, 12], 2)
     self.assertRaggedEqual(rt3, [[0, 2], [], [8, 10]])
 
   def testBasicRanges(self):
     # Specify limits only.
     self.assertRaggedEqual(
-        ragged.range([0, 3, 5]),
+        ragged_math_ops.range([0, 3, 5]),
         [list(range(0)), list(range(3)),
          list(range(5))])
 
     # Specify starts and limits.
     self.assertRaggedEqual(
-        ragged.range([0, 3, 5], [2, 3, 10]),
+        ragged_math_ops.range([0, 3, 5], [2, 3, 10]),
         [list(range(0, 2)),
          list(range(3, 3)),
          list(range(5, 10))])
 
     # Specify starts, limits, and deltas.
     self.assertRaggedEqual(
-        ragged.range([0, 3, 5], [4, 4, 15], [2, 3, 4]),
+        ragged_math_ops.range([0, 3, 5], [4, 4, 15], [2, 3, 4]),
         [list(range(0, 4, 2)),
          list(range(3, 4, 3)),
          list(range(5, 15, 4))])
@@ -63,20 +63,21 @@ class RaggedRangeOpTest(ragged_test_util.RaggedTensorTestCase):
   def testFloatRanges(self):
     expected = [[0.0, 0.4, 0.8, 1.2, 1.6, 2.0, 2.4, 2.8, 3.2, 3.6], [3.0],
                 [5.0, 7.2, 9.4, 11.6, 13.8]]
-    actual = ragged.range([0.0, 3.0, 5.0], [3.9, 4.0, 15.0], [0.4, 1.5, 2.2])
+    actual = ragged_math_ops.range([0.0, 3.0, 5.0], [3.9, 4.0, 15.0],
+                                   [0.4, 1.5, 2.2])
     self.assertEqual(
         expected,
         [[round(v, 5) for v in row] for row in self.eval_to_list(actual)])
 
   def testNegativeDeltas(self):
     self.assertRaggedEqual(
-        ragged.range([0, 3, 5], limits=0, deltas=-1),
+        ragged_math_ops.range([0, 3, 5], limits=0, deltas=-1),
         [list(range(0, 0, -1)),
          list(range(3, 0, -1)),
          list(range(5, 0, -1))])
 
     self.assertRaggedEqual(
-        ragged.range([0, -3, 5], limits=0, deltas=[-1, 1, -2]),
+        ragged_math_ops.range([0, -3, 5], limits=0, deltas=[-1, 1, -2]),
         [list(range(0, 0, -1)),
          list(range(-3, 0, 1)),
          list(range(5, 0, -2))])
@@ -84,40 +85,43 @@ class RaggedRangeOpTest(ragged_test_util.RaggedTensorTestCase):
   def testBroadcast(self):
     # Specify starts and limits, broadcast deltas.
     self.assertRaggedEqual(
-        ragged.range([0, 3, 5], [4, 4, 15], 3),
+        ragged_math_ops.range([0, 3, 5], [4, 4, 15], 3),
         [list(range(0, 4, 3)),
          list(range(3, 4, 3)),
          list(range(5, 15, 3))])
 
     # Broadcast all arguments.
-    self.assertRaggedEqual(ragged.range(0, 5, 1), [list(range(0, 5, 1))])
+    self.assertRaggedEqual(
+        ragged_math_ops.range(0, 5, 1), [list(range(0, 5, 1))])
 
   def testEmptyRanges(self):
-    rt1 = ragged.range([0, 5, 3], [0, 3, 5])
-    rt2 = ragged.range([0, 5, 5], [0, 3, 5], -1)
+    rt1 = ragged_math_ops.range([0, 5, 3], [0, 3, 5])
+    rt2 = ragged_math_ops.range([0, 5, 5], [0, 3, 5], -1)
     self.assertRaggedEqual(rt1, [[], [], [3, 4]])
     self.assertRaggedEqual(rt2, [[], [5, 4], []])
 
   def testShapeFnErrors(self):
-    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
-                      [[0]], 5)
-    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
-                      0, [[5]])
-    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
-                      0, 5, [[0]])
-    self.assertRaises((ValueError, errors.InvalidArgumentError), ragged.range,
-                      [0], [1, 2])
+    self.assertRaises((ValueError, errors.InvalidArgumentError),
+                      ragged_math_ops.range, [[0]], 5)
+    self.assertRaises((ValueError, errors.InvalidArgumentError),
+                      ragged_math_ops.range, 0, [[5]])
+    self.assertRaises((ValueError, errors.InvalidArgumentError),
+                      ragged_math_ops.range, 0, 5, [[0]])
+    self.assertRaises((ValueError, errors.InvalidArgumentError),
+                      ragged_math_ops.range, [0], [1, 2])
 
   def testKernelErrors(self):
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  r'Requires delta != 0'):
-      self.evaluate(ragged.range(0, 0, 0))
+      self.evaluate(ragged_math_ops.range(0, 0, 0))
 
   def testShape(self):
-    self.assertRaggedEqual(ragged.range(0, 0, 1).shape.as_list(), [1, None])
-    self.assertRaggedEqual(ragged.range([1, 2, 3]).shape.as_list(), [3, None])
     self.assertRaggedEqual(
-        ragged.range([1, 2, 3], [4, 5, 6]).shape.as_list(), [3, None])
+        ragged_math_ops.range(0, 0, 1).shape.as_list(), [1, None])
+    self.assertRaggedEqual(
+        ragged_math_ops.range([1, 2, 3]).shape.as_list(), [3, None])
+    self.assertRaggedEqual(
+        ragged_math_ops.range([1, 2, 3], [4, 5, 6]).shape.as_list(), [3, None])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_reduce_op_test.py b/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
index 890460221b..a9fa378eeb 100644
--- a/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_reduce_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.reduce_<AGGREGATE> ops."""
+"""Tests for ragged_math_ops.reduce_<AGGREGATE> ops."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -26,7 +26,8 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_math_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -52,88 +53,88 @@ class RaggedReduceOpsTest(ragged_test_util.RaggedTensorTestCase,
       #    [2, 6   ]]
       #=========================================================================
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=0,
           expected=[15, 12, 4]  # = [3+1+9+2, 1+5+6, 4]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=-2,
           expected=[15, 12, 4]  # = [3+1+9+2, 1+5+6, 4]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=1,
           expected=[8, 6, 9, 8]  # = [3+1+4, 1+5, 9, 2+6]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=-1,
           expected=[8, 6, 9, 8]  # = [3+1+4, 1+5, 9, 2+6]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_prod,
+          ragged_reduce_op=ragged_math_ops.reduce_prod,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=0,
           expected=[54, 30, 4]  # = [3*1*9*2, 1*5*6, 4]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_prod,
+          ragged_reduce_op=ragged_math_ops.reduce_prod,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=1,
           expected=[12, 5, 9, 12]  # = [3*1*4, 1*5, 9, 2*6]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_min,
+          ragged_reduce_op=ragged_math_ops.reduce_min,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=0,
           expected=[1, 1, 4]  # = [min(3, 1, 9, 2), min(1, 5, 6), 4]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_min,
+          ragged_reduce_op=ragged_math_ops.reduce_min,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=1,
           expected=[1, 1, 9, 2]  # = [min(3, 1, 4), min(1, 5), 9, min(2, 6)]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_max,
+          ragged_reduce_op=ragged_math_ops.reduce_max,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=0,
           expected=[9, 6, 4]  # = [max(3, 1, 9, 2), max(1, 5, 6), 4]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_max,
+          ragged_reduce_op=ragged_math_ops.reduce_max,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=1,
           expected=[4, 5, 9, 6]  # = [max(3, 1, 4), max(1, 5), 9, max(2, 6)]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_mean,
+          ragged_reduce_op=ragged_math_ops.reduce_mean,
           rt_input=[[3, 1, 4], [1, 5], [9], [2, 6]],
           axis=0,
           expected=[3.75, 4, 4]  # = [mean(3, 1, 9, 2), mean(1, 5, 6), 4]
       ),
       dict(
-          ragged_reduce_op=ragged.reduce_any,
+          ragged_reduce_op=ragged_math_ops.reduce_any,
           rt_input=[[True, True], [True, True, False, True], [False, True]],
           axis=0,
           expected=[True, True, False, True]),
       dict(
-          ragged_reduce_op=ragged.reduce_any,
+          ragged_reduce_op=ragged_math_ops.reduce_any,
           rt_input=[[True, True], [True, True, False, True], [False, True]],
           axis=1,
           expected=[True, True, True]),
       dict(
-          ragged_reduce_op=ragged.reduce_all,
+          ragged_reduce_op=ragged_math_ops.reduce_all,
           rt_input=[[True, True], [True, True, False, True], [False, True]],
           axis=0,
           expected=[False, True, False, True]),
       dict(
-          ragged_reduce_op=ragged.reduce_all,
+          ragged_reduce_op=ragged_math_ops.reduce_all,
           rt_input=[[True, True], [True, True, False, True], [False, True]],
           axis=1,
           expected=[True, False, False]),
@@ -150,53 +151,53 @@ class RaggedReduceOpsTest(ragged_test_util.RaggedTensorTestCase,
 
       # axis=None
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=None,
           expected=0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9),
       dict(
-          ragged_reduce_op=ragged.reduce_prod,
+          ragged_reduce_op=ragged_math_ops.reduce_prod,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=None,
           expected=0 * 1 * 2 * 3 * 4 * 5 * 6 * 7 * 8 * 9),
       dict(
-          ragged_reduce_op=ragged.reduce_min,
+          ragged_reduce_op=ragged_math_ops.reduce_min,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=None,
           expected=min(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)),
       dict(
-          ragged_reduce_op=ragged.reduce_max,
+          ragged_reduce_op=ragged_math_ops.reduce_max,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=None,
           expected=max(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)),
       dict(
-          ragged_reduce_op=ragged.reduce_mean,
+          ragged_reduce_op=ragged_math_ops.reduce_mean,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=None,
           expected=mean(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)),
       # axis=0
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=0,
           expected=[0 + 4 + 5 + 7 + 8, 1 + 6 + 9, 2, 3]),
       dict(
-          ragged_reduce_op=ragged.reduce_prod,
+          ragged_reduce_op=ragged_math_ops.reduce_prod,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=0,
           expected=[0 * 4 * 5 * 7 * 8, 1 * 6 * 9, 2, 3]),
       dict(
-          ragged_reduce_op=ragged.reduce_min,
+          ragged_reduce_op=ragged_math_ops.reduce_min,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=0,
           expected=[min(0, 4, 5, 7, 8), min(1, 6, 9), 2, 3]),
       dict(
-          ragged_reduce_op=ragged.reduce_max,
+          ragged_reduce_op=ragged_math_ops.reduce_max,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=0,
           expected=[max(0, 4, 5, 7, 8), max(1, 6, 9), 2, 3]),
       dict(
-          ragged_reduce_op=ragged.reduce_mean,
+          ragged_reduce_op=ragged_math_ops.reduce_mean,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=0,
           expected=[mean(0, 4, 5, 7, 8),
@@ -205,24 +206,24 @@ class RaggedReduceOpsTest(ragged_test_util.RaggedTensorTestCase,
       # Note: we don't test mean here because it gives a NaN, and this will
       # cause assertEqual to fail (since NaN != NaN).  See testMeanNan().
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=1,
           expected=[0 + 1 + 2 + 3, 4, 0, 5 + 6, 7, 8 + 9]),
       dict(
-          ragged_reduce_op=ragged.reduce_prod,
+          ragged_reduce_op=ragged_math_ops.reduce_prod,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=1,
           expected=[0 * 1 * 2 * 3, 4, 1, 5 * 6, 7, 8 * 9]),
       dict(
-          ragged_reduce_op=ragged.reduce_min,
+          ragged_reduce_op=ragged_math_ops.reduce_min,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=1,
           expected=[min(0, 1, 2, 3), 4, _MAX_INT32,
                     min(5, 6), 7,
                     min(8, 9)]),
       dict(
-          ragged_reduce_op=ragged.reduce_max,
+          ragged_reduce_op=ragged_math_ops.reduce_max,
           rt_input=[[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]],
           axis=1,
           expected=[max(0, 1, 2, 3), 4, _MIN_INT32,
@@ -237,47 +238,47 @@ class RaggedReduceOpsTest(ragged_test_util.RaggedTensorTestCase,
       #  [[9   ]                ]]
       #=========================================================================
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=[],
           expected=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]]),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=None,
           expected=sum([1, 2, 3, 4, 5, 6, 7, 8, 9])),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=0,
           expected=[[1 + 6 + 9, 2 + 7], [], [3 + 8, 4, 5]]),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=1,
           expected=[[1 + 3, 2 + 4, 5], [6 + 8, 7], [], [9]]),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=2,
           expected=[[1 + 2, 0, 3 + 4 + 5], [6 + 7, 0, 8], [], [9]]),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=[0, 1],
           expected=[1 + 3 + 6 + 8 + 9, 2 + 4 + 7, 5]),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=[0, 2],
           expected=[1 + 6 + 9 + 2 + 7, 0, 3 + 8 + 4 + 5]),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=[1, 2],
           expected=[1 + 2 + 3 + 4 + 5, 6 + 7 + 8, 0, 9]),
       dict(
-          ragged_reduce_op=ragged.reduce_sum,
+          ragged_reduce_op=ragged_math_ops.reduce_sum,
           rt_input=[[[1, 2], [], [3, 4, 5]], [[6, 7], [], [8]], [], [[9]]],
           axis=[0, 1, 2],
           expected=sum([1, 2, 3, 4, 5, 6, 7, 8, 9])),
@@ -289,23 +290,23 @@ class RaggedReduceOpsTest(ragged_test_util.RaggedTensorTestCase,
       #  [[9   ]          ]]
       #=========================================================================
       dict(
-          ragged_reduce_op=ragged.reduce_mean,
+          ragged_reduce_op=ragged_math_ops.reduce_mean,
           rt_input=[[[1, 2], [3, 4, 5]], [[6, 7], [8]], [[9]]],
           axis=0,
           expected=[[mean(1, 6, 9), mean(2, 7)], [mean(3, 8), 4, 5]]),
       dict(
-          ragged_reduce_op=ragged.reduce_mean,
+          ragged_reduce_op=ragged_math_ops.reduce_mean,
           rt_input=[[[1, 2], [3, 4, 5]], [[6, 7], [8]], [[9]]],
           axis=1,
           expected=[[mean(1, 3), mean(2, 4), 5], [mean(6, 8), 7], [9]]),
       dict(
-          ragged_reduce_op=ragged.reduce_mean,
+          ragged_reduce_op=ragged_math_ops.reduce_mean,
           rt_input=[[[1, 2], [3, 4, 5]], [[6, 7], [8]], [[9]]],
           axis=2,
           expected=[[mean(1, 2), mean(3, 4, 5)], [mean(6, 7), 8], [9]]),
   )
   def testReduce(self, ragged_reduce_op, rt_input, axis, expected):
-    rt_input = ragged.constant(rt_input)
+    rt_input = ragged_factory_ops.constant(rt_input)
     reduced = ragged_reduce_op(rt_input, axis)
     self.assertRaggedEqual(reduced, expected)
 
@@ -319,27 +320,26 @@ class RaggedReduceOpsTest(ragged_test_util.RaggedTensorTestCase,
     expected = (
         np.array([0 + 1 + 2 + 3, 4, 0, 5 + 6, 7, 8 + 9]) / np.array(
             [4, 1, 0, 2, 1, 2]))
-    rt_input = ragged.constant(rt_as_list)
-    reduced = ragged.reduce_mean(rt_input, axis=1)
+    rt_input = ragged_factory_ops.constant(rt_as_list)
+    reduced = ragged_math_ops.reduce_mean(rt_input, axis=1)
     self.assertEqualWithNan(self.evaluate(reduced), expected)
 
   def testMeanWithTensorInputs(self):
     tensor = [[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]
     expected = [2.0, 20.0]
-    reduced = ragged.reduce_mean(tensor, axis=1)
+    reduced = ragged_math_ops.reduce_mean(tensor, axis=1)
     self.assertRaggedEqual(reduced, expected)
 
   def testErrors(self):
-    rt_input = ragged.constant([[1, 2, 3], [4, 5]])
+    rt_input = ragged_factory_ops.constant([[1, 2, 3], [4, 5]])
     axis = array_ops.placeholder_with_default(constant_op.constant([0]), None)
 
     if not context.executing_eagerly():
       self.assertRaisesRegexp(
           ValueError, r'axis must be known at graph construction time.',
-          ragged.reduce_sum, rt_input, axis)
-    self.assertRaisesRegexp(TypeError,
-                            r'axis must be an int; got str.*',
-                            ragged.reduce_sum, rt_input, ['x'])
+          ragged_math_ops.reduce_sum, rt_input, axis)
+    self.assertRaisesRegexp(TypeError, r'axis must be an int; got str.*',
+                            ragged_math_ops.reduce_sum, rt_input, ['x'])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py b/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
index 15112d6c9c..8f8089c9bf 100644
--- a/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_row_lengths_op_test.py
@@ -22,7 +22,8 @@ from absl.testing import parameterized
 
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -117,11 +118,11 @@ class RaggedRowLengthsOp(ragged_test_util.RaggedTensorTestCase,
                      axis=1,
                      ragged_rank=None,
                      expected_ragged_rank=None):
-    rt = ragged.constant(rt_input, ragged_rank=ragged_rank)
+    rt = ragged_factory_ops.constant(rt_input, ragged_rank=ragged_rank)
     lengths = rt.row_lengths(axis)
     self.assertRaggedEqual(lengths, expected)
     if expected_ragged_rank is not None:
-      if isinstance(lengths, ragged.RaggedTensor):
+      if isinstance(lengths, ragged_tensor.RaggedTensor):
         self.assertEqual(lengths.ragged_rank, expected_ragged_rank)
       else:
         self.assertEqual(0, expected_ragged_rank)
@@ -137,7 +138,7 @@ class RaggedRowLengthsOp(ragged_test_util.RaggedTensorTestCase,
           exception=(ValueError, errors.InvalidArgumentError)),
   ])
   def testErrors(self, rt_input, exception, message=None, axis=1):
-    rt = ragged.constant(rt_input)
+    rt = ragged_factory_ops.constant(rt_input)
     with self.assertRaisesRegexp(exception, message):
       rt.row_lengths(axis)
 
diff --git a/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py b/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py
index 2970540f3e..5384f3ac09 100644
--- a/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_row_splits_to_segment_ids_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the ragged.row_splits_to_segment_ids() op."""
+"""Tests for the segment_id_ops.row_splits_to_segment_ids() op."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -20,8 +20,8 @@ from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
 from tensorflow.python.ops.ragged import ragged_test_util
+from tensorflow.python.ops.ragged import segment_id_ops
 from tensorflow.python.platform import googletest
 
 
@@ -31,25 +31,25 @@ class RaggedSplitsToSegmentIdsOpTest(ragged_test_util.RaggedTensorTestCase):
   def testDocStringExample(self):
     splits = [0, 3, 3, 5, 6, 9]
     expected = [0, 0, 0, 2, 2, 3, 4, 4, 4]
-    segment_ids = ragged.row_splits_to_segment_ids(splits)
+    segment_ids = segment_id_ops.row_splits_to_segment_ids(splits)
     self.assertAllEqual(segment_ids, expected)
 
   def testEmptySplits(self):
     # Note: the splits for an empty ragged tensor contains a single zero.
-    segment_ids = ragged.row_splits_to_segment_ids([0])
+    segment_ids = segment_id_ops.row_splits_to_segment_ids([0])
     self.assertAllEqual(segment_ids, [])
 
   def testErrors(self):
     self.assertRaisesRegexp(ValueError, r'Invalid row_splits: \[\]',
-                            ragged.row_splits_to_segment_ids, [])
+                            segment_id_ops.row_splits_to_segment_ids, [])
     self.assertRaisesRegexp(
         ValueError, r'Tensor conversion requested dtype int64 for '
-        'Tensor with dtype float32', ragged.row_splits_to_segment_ids,
+        'Tensor with dtype float32', segment_id_ops.row_splits_to_segment_ids,
         constant_op.constant([0.5]))
     self.assertRaisesRegexp(ValueError, r'Shape \(\) must have rank 1',
-                            ragged.row_splits_to_segment_ids, 0)
+                            segment_id_ops.row_splits_to_segment_ids, 0)
     self.assertRaisesRegexp(ValueError, r'Shape \(1, 1\) must have rank 1',
-                            ragged.row_splits_to_segment_ids, [[0]])
+                            segment_id_ops.row_splits_to_segment_ids, [[0]])
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py b/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py
index 4ed9626767..73ee42a19d 100644
--- a/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_segment_ids_to_row_splits_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for the ragged.segment_ids_to_row_splits() op."""
+"""Tests for the segment_id_ops.segment_ids_to_row_splits() op."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -20,8 +20,8 @@ from __future__ import print_function
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
 from tensorflow.python.ops.ragged import ragged_test_util
+from tensorflow.python.ops.ragged import segment_id_ops
 from tensorflow.python.platform import googletest
 
 
@@ -31,38 +31,38 @@ class RaggedSplitsToSegmentIdsOpTest(ragged_test_util.RaggedTensorTestCase):
   def testDocStringExample(self):
     segment_ids = [0, 0, 0, 2, 2, 3, 4, 4, 4]
     expected = [0, 3, 3, 5, 6, 9]
-    splits = ragged.segment_ids_to_row_splits(segment_ids)
+    splits = segment_id_ops.segment_ids_to_row_splits(segment_ids)
     self.assertAllEqual(splits, expected)
 
   def testEmptySegmentIds(self):
     # Note: the splits for an empty ragged tensor contains a single zero.
-    segment_ids = ragged.segment_ids_to_row_splits([])
+    segment_ids = segment_id_ops.segment_ids_to_row_splits([])
     self.assertAllEqual(segment_ids, [0])
 
   def testErrors(self):
     self.assertRaisesRegexp(TypeError,
                             r'segment_ids must be an integer tensor.*',
-                            ragged.segment_ids_to_row_splits,
+                            segment_id_ops.segment_ids_to_row_splits,
                             constant_op.constant([0.5]))
     self.assertRaisesRegexp(ValueError, r'Shape \(\) must have rank 1',
-                            ragged.segment_ids_to_row_splits, 0)
+                            segment_id_ops.segment_ids_to_row_splits, 0)
     self.assertRaisesRegexp(ValueError, r'Shape \(1, 1\) must have rank 1',
-                            ragged.segment_ids_to_row_splits, [[0]])
+                            segment_id_ops.segment_ids_to_row_splits, [[0]])
 
   def testNumSegments(self):
     segment_ids = [0, 0, 0, 2, 2, 3, 4, 4, 4]
     num_segments = 7
     expected = [0, 3, 3, 5, 6, 9, 9, 9]
-    splits = ragged.segment_ids_to_row_splits(segment_ids, num_segments)
+    splits = segment_id_ops.segment_ids_to_row_splits(segment_ids, num_segments)
     self.assertAllEqual(splits, expected)
 
   def testUnsortedSegmentIds(self):
     # Segment ids are not required to be sorted.
     segment_ids = [0, 4, 3, 2, 4, 4, 2, 0, 0]
-    splits1 = ragged.segment_ids_to_row_splits(segment_ids)
+    splits1 = segment_id_ops.segment_ids_to_row_splits(segment_ids)
     expected1 = [0, 3, 3, 5, 6, 9]
 
-    splits2 = ragged.segment_ids_to_row_splits(segment_ids, 7)
+    splits2 = segment_id_ops.segment_ids_to_row_splits(segment_ids, 7)
     expected2 = [0, 3, 3, 5, 6, 9, 9, 9]
     self.assertAllEqual(splits1, expected1)
     self.assertAllEqual(splits2, expected2)
diff --git a/tensorflow/python/ops/ragged/ragged_segment_op_test.py b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
index be1f39afef..435ce87e00 100644
--- a/tensorflow/python/ops/ragged/ragged_segment_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_segment_op_test.py
@@ -25,7 +25,9 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_math_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -61,7 +63,7 @@ class RaggedSegmentOpsTest(ragged_test_util.RaggedTensorTestCase,
     Returns:
       The expected value, as a nested Python list.
     """
-    self.assertEqual(len(data), len(segment_ids))
+    self.assertLen(data, len(segment_ids))
 
     # Build an empty (num_segments x ncols) "grouped" matrix
     ncols = max(len(row) for row in data)
@@ -79,30 +81,30 @@ class RaggedSegmentOpsTest(ragged_test_util.RaggedTensorTestCase,
             for grouped_row in grouped]
 
   @parameterized.parameters(
-      (ragged.segment_sum, sum, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_sum, sum, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_sum, sum, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_sum, sum, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_prod, prod, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_prod, prod, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_prod, prod, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_prod, prod, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_min, min, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_min, min, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_min, min, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_min, min, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_max, max, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_max, max, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_max, max, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_max, max, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_mean, mean, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_mean, mean, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_mean, mean, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_mean, mean, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_sum, sum, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_sum, sum, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_sum, sum, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_sum, sum, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_prod, prod, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_prod, prod, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_prod, prod, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_prod, prod, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_min, min, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_min, min, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_min, min, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_min, min, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_max, max, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_max, max, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_max, max, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_max, max, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_mean, mean, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_mean, mean, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_mean, mean, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_mean, mean, [0, 0, 0, 10, 10, 10]),
   )
   def testRaggedSegment_Int(self, segment_op, combiner, segment_ids):
     rt_as_list = [[0, 1, 2, 3], [4], [], [5, 6], [7], [8, 9]]
-    rt = ragged.constant(rt_as_list)
+    rt = ragged_factory_ops.constant(rt_as_list)
     num_segments = max(segment_ids) + 1
     expected = self.expected_value(rt_as_list, segment_ids, num_segments,
                                    combiner)
@@ -111,34 +113,34 @@ class RaggedSegmentOpsTest(ragged_test_util.RaggedTensorTestCase,
     self.assertRaggedEqual(segmented, expected)
 
   @parameterized.parameters(
-      (ragged.segment_sum, sum, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_sum, sum, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_sum, sum, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_sum, sum, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_prod, prod, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_prod, prod, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_prod, prod, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_prod, prod, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_min, min, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_min, min, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_min, min, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_min, min, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_max, max, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_max, max, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_max, max, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_max, max, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_mean, mean, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_mean, mean, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_mean, mean, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_mean, mean, [0, 0, 0, 10, 10, 10]),
-      (ragged.segment_sqrt_n, sqrt_n, [0, 0, 1, 1, 2, 2]),
-      (ragged.segment_sqrt_n, sqrt_n, [0, 0, 0, 1, 1, 1]),
-      (ragged.segment_sqrt_n, sqrt_n, [5, 4, 3, 2, 1, 0]),
-      (ragged.segment_sqrt_n, sqrt_n, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_sum, sum, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_sum, sum, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_sum, sum, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_sum, sum, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_prod, prod, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_prod, prod, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_prod, prod, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_prod, prod, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_min, min, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_min, min, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_min, min, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_min, min, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_max, max, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_max, max, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_max, max, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_max, max, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_mean, mean, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_mean, mean, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_mean, mean, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_mean, mean, [0, 0, 0, 10, 10, 10]),
+      (ragged_math_ops.segment_sqrt_n, sqrt_n, [0, 0, 1, 1, 2, 2]),
+      (ragged_math_ops.segment_sqrt_n, sqrt_n, [0, 0, 0, 1, 1, 1]),
+      (ragged_math_ops.segment_sqrt_n, sqrt_n, [5, 4, 3, 2, 1, 0]),
+      (ragged_math_ops.segment_sqrt_n, sqrt_n, [0, 0, 0, 10, 10, 10]),
   )
   def testRaggedSegment_Float(self, segment_op, combiner, segment_ids):
     rt_as_list = [[0., 1., 2., 3.], [4.], [], [5., 6.], [7.], [8., 9.]]
-    rt = ragged.constant(rt_as_list)
+    rt = ragged_factory_ops.constant(rt_as_list)
     num_segments = max(segment_ids) + 1
     expected = self.expected_value(rt_as_list, segment_ids, num_segments,
                                    combiner)
@@ -147,14 +149,14 @@ class RaggedSegmentOpsTest(ragged_test_util.RaggedTensorTestCase,
     self.assertRaggedAlmostEqual(segmented, expected, places=5)
 
   def testRaggedRankTwo(self):
-    rt = ragged.constant([
+    rt = ragged_factory_ops.constant([
         [[111, 112, 113, 114], [121],],  # row 0
         [],                              # row 1
         [[], [321, 322], [331]],         # row 2
         [[411, 412]]                     # row 3
     ])  # pyformat: disable
     segment_ids1 = [0, 2, 2, 2]
-    segmented1 = ragged.segment_sum(rt, segment_ids1, 3)
+    segmented1 = ragged_math_ops.segment_sum(rt, segment_ids1, 3)
     expected1 = [[[111, 112, 113, 114], [121]],     # row 0
                  [],                                # row 1
                  [[411, 412], [321, 322], [331]]    # row 2
@@ -162,21 +164,21 @@ class RaggedSegmentOpsTest(ragged_test_util.RaggedTensorTestCase,
     self.assertRaggedEqual(segmented1, expected1)
 
     segment_ids2 = [1, 2, 1, 1]
-    segmented2 = ragged.segment_sum(rt, segment_ids2, 3)
+    segmented2 = ragged_math_ops.segment_sum(rt, segment_ids2, 3)
     expected2 = [[],
                  [[111+411, 112+412, 113, 114], [121+321, 322], [331]],
                  []]  # pyformat: disable
     self.assertRaggedEqual(segmented2, expected2)
 
   def testRaggedSegmentIds(self):
-    rt = ragged.constant([
+    rt = ragged_factory_ops.constant([
         [[111, 112, 113, 114], [121],],  # row 0
         [],                              # row 1
         [[], [321, 322], [331]],         # row 2
         [[411, 412]]                     # row 3
     ])  # pyformat: disable
-    segment_ids = ragged.constant([[1, 2], [], [1, 1, 2], [2]])
-    segmented = ragged.segment_sum(rt, segment_ids, 3)
+    segment_ids = ragged_factory_ops.constant([[1, 2], [], [1, 1, 2], [2]])
+    segmented = ragged_math_ops.segment_sum(rt, segment_ids, 3)
     expected = [[],
                 [111+321, 112+322, 113, 114],
                 [121+331+411, 412]]  # pyformat: disable
@@ -184,35 +186,35 @@ class RaggedSegmentOpsTest(ragged_test_util.RaggedTensorTestCase,
 
   def testShapeMismatchError1(self):
     dt = constant_op.constant([1, 2, 3, 4, 5, 6])
-    segment_ids = ragged.constant([[1, 2], []])
+    segment_ids = ragged_factory_ops.constant([[1, 2], []])
     self.assertRaisesRegexp(
         ValueError, 'segment_ids.shape must be a prefix of data.shape, '
-        'but segment_ids is ragged and data is not.', ragged.segment_sum, dt,
-        segment_ids, 3)
+        'but segment_ids is ragged and data is not.',
+        ragged_math_ops.segment_sum, dt, segment_ids, 3)
 
   def testShapeMismatchError2(self):
-    rt = ragged.constant([
+    rt = ragged_factory_ops.constant([
         [[111, 112, 113, 114], [121]],  # row 0
         [],                             # row 1
         [[], [321, 322], [331]],        # row 2
         [[411, 412]]                    # row 3
     ])  # pyformat: disable
-    segment_ids = ragged.constant([[1, 2], [1], [1, 1, 2], [2]])
+    segment_ids = ragged_factory_ops.constant([[1, 2], [1], [1, 1, 2], [2]])
 
     # Error is raised at graph-building time if we can detect it then.
     self.assertRaisesRegexp(
         errors.InvalidArgumentError,
         'segment_ids.shape must be a prefix of data.shape.*',
-        ragged.segment_sum, rt, segment_ids, 3)
+        ragged_math_ops.segment_sum, rt, segment_ids, 3)
 
     # Otherwise, error is raised when we run the graph.
-    segment_ids2 = ragged.RaggedTensor.from_row_splits(
+    segment_ids2 = ragged_tensor.RaggedTensor.from_row_splits(
         array_ops.placeholder_with_default(segment_ids.values, None),
         array_ops.placeholder_with_default(segment_ids.row_splits, None))
     with self.assertRaisesRegexp(
         errors.InvalidArgumentError,
         'segment_ids.shape must be a prefix of data.shape.*'):
-      self.evaluate(ragged.segment_sum(rt, segment_ids2, 3))
+      self.evaluate(ragged_math_ops.segment_sum(rt, segment_ids2, 3))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/ops/ragged/ragged_stack_op_test.py b/tensorflow/python/ops/ragged/ragged_stack_op_test.py
index 17d80b5aad..f9c825168e 100644
--- a/tensorflow/python/ops/ragged/ragged_stack_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_stack_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.stack."""
+"""Tests for ragged_array_ops.stack."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -22,7 +22,8 @@ from absl.testing import parameterized
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -279,11 +280,11 @@ class RaggedStackOpTest(ragged_test_util.RaggedTensorTestCase,
     if ragged_ranks is None:
       ragged_ranks = [None] * len(rt_inputs)
     rt_inputs = [
-        ragged.constant(rt_input, ragged_rank=rrank)
+        ragged_factory_ops.constant(rt_input, ragged_rank=rrank)  # pylint: disable=g-long-ternary
         if rrank != 0 else constant_op.constant(rt_input)
         for (rt_input, rrank) in zip(rt_inputs, ragged_ranks)
     ]
-    stacked = ragged.stack(rt_inputs, axis)
+    stacked = ragged_array_ops.stack(rt_inputs, axis)
     if expected_ragged_rank is not None:
       self.assertEqual(stacked.ragged_rank, expected_ragged_rank)
     if expected_shape is not None:
@@ -313,7 +314,8 @@ class RaggedStackOpTest(ragged_test_util.RaggedTensorTestCase,
           message='axis=3 out of bounds: expected -3<=axis<3'),
   )
   def testError(self, rt_inputs, axis, error, message):
-    self.assertRaisesRegexp(error, message, ragged.stack, rt_inputs, axis)
+    self.assertRaisesRegexp(error, message, ragged_array_ops.stack, rt_inputs,
+                            axis)
 
   def testSingleTensorInput(self):
     """Tests ragged_stack with a single tensor input.
@@ -322,8 +324,8 @@ class RaggedStackOpTest(ragged_test_util.RaggedTensorTestCase,
     also pass in a single value (as with tf.stack), in which case it is
     equivalent to expand_dims(axis=0).  This test exercises that path.
     """
-    rt_inputs = ragged.constant([[1, 2], [3, 4]])
-    stacked = ragged.stack(rt_inputs, 0)
+    rt_inputs = ragged_factory_ops.constant([[1, 2], [3, 4]])
+    stacked = ragged_array_ops.stack(rt_inputs, 0)
     self.assertRaggedEqual(stacked, [[[1, 2], [3, 4]]])
 
 
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py b/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
index 4e6ebdf332..025a221626 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_bounding_shape_op_test.py
@@ -19,7 +19,8 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -29,30 +30,31 @@ class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase):
 
   def testDocStringExample(self):
     # This is the example from ragged.bounding_shape.__doc__.
-    rt = ragged.constant([[1, 2, 3, 4], [5], [], [6, 7, 8, 9], [10]])
+    rt = ragged_factory_ops.constant([[1, 2, 3, 4], [5], [], [6, 7, 8, 9],
+                                      [10]])
     self.assertRaggedEqual(rt.bounding_shape(), [5, 4])
 
   def test2DRaggedTensorWithOneRaggedDimension(self):
     values = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
-    rt1 = ragged.RaggedTensor.from_row_splits(values, [0, 2, 5, 6, 6, 7])
-    rt2 = ragged.RaggedTensor.from_row_splits(values, [0, 7])
-    rt3 = ragged.RaggedTensor.from_row_splits(values, [0, 0, 7, 7])
+    rt1 = ragged_tensor.RaggedTensor.from_row_splits(values, [0, 2, 5, 6, 6, 7])
+    rt2 = ragged_tensor.RaggedTensor.from_row_splits(values, [0, 7])
+    rt3 = ragged_tensor.RaggedTensor.from_row_splits(values, [0, 0, 7, 7])
     self.assertRaggedEqual(rt1.bounding_shape(), [5, 3])
     self.assertRaggedEqual(rt2.bounding_shape(), [1, 7])
     self.assertRaggedEqual(rt3.bounding_shape(), [3, 7])
 
   def test3DRaggedTensorWithOneRaggedDimension(self):
     values = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13]]
-    rt1 = ragged.RaggedTensor.from_row_splits(values, [0, 2, 5, 6, 6, 7])
-    rt2 = ragged.RaggedTensor.from_row_splits(values, [0, 7])
-    rt3 = ragged.RaggedTensor.from_row_splits(values, [0, 0, 7, 7])
+    rt1 = ragged_tensor.RaggedTensor.from_row_splits(values, [0, 2, 5, 6, 6, 7])
+    rt2 = ragged_tensor.RaggedTensor.from_row_splits(values, [0, 7])
+    rt3 = ragged_tensor.RaggedTensor.from_row_splits(values, [0, 0, 7, 7])
     self.assertRaggedEqual(rt1.bounding_shape(), [5, 3, 2])
     self.assertRaggedEqual(rt2.bounding_shape(), [1, 7, 2])
     self.assertRaggedEqual(rt3.bounding_shape(), [3, 7, 2])
 
   def testExplicitAxisOptimizations(self):
-    rt = ragged.RaggedTensor.from_row_splits(b'a b c d e f g'.split(),
-                                             [0, 2, 5, 6, 6, 7])
+    rt = ragged_tensor.RaggedTensor.from_row_splits(b'a b c d e f g'.split(),
+                                                    [0, 2, 5, 6, 6, 7])
     self.assertRaggedEqual(rt.bounding_shape(0), 5)
     self.assertRaggedEqual(rt.bounding_shape(1), 3)
     self.assertRaggedEqual(rt.bounding_shape([1, 0]), [3, 5])
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py b/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
index ec06aeaea5..bc0139cffd 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_shape_test.py
@@ -23,8 +23,11 @@ import numpy as np
 
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.ops.ragged import ragged_tensor_shape
 from tensorflow.python.ops.ragged import ragged_test_util
+from tensorflow.python.ops.ragged.ragged_tensor_shape import RaggedTensorDynamicShape
 from tensorflow.python.platform import googletest
 
 
@@ -33,8 +36,8 @@ class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase,
                                   parameterized.TestCase):
 
   def assertShapeEq(self, x, y):
-    assert isinstance(x, ragged.RaggedTensorDynamicShape)
-    assert isinstance(y, ragged.RaggedTensorDynamicShape)
+    assert isinstance(x, RaggedTensorDynamicShape)
+    assert isinstance(y, RaggedTensorDynamicShape)
     x_partitioned_dim_sizes = [
         self.eval_to_list(splits)  #
         for splits in x.partitioned_dim_sizes
@@ -54,39 +57,40 @@ class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase,
           value=[[['a', 'b', 'c'], ['d', 'e', 'f']]],
           expected_dim_sizes=[1, 2, 3]),
       dict(
-          value=ragged.constant_value([['a', 'b', 'c'], ['d', 'e']]),
+          value=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d',
+                                                                     'e']]),
           expected_dim_sizes=[2, [3, 2]]),
       dict(
-          value=ragged.constant_value([[['a', 'b', 'c'], ['d', 'e']]]),
+          value=ragged_factory_ops.constant_value([[['a', 'b', 'c'], ['d',
+                                                                      'e']]]),
           expected_dim_sizes=[1, [2], [3, 2]]),
       dict(
-          value=ragged.constant_value([[['a', 'b', 'c'], ['d', 'e', 'f']]],
-                                      ragged_rank=1),
+          value=ragged_factory_ops.constant_value(
+              [[['a', 'b', 'c'], ['d', 'e', 'f']]], ragged_rank=1),
           expected_dim_sizes=[1, [2], 3]),
       dict(
-          value=ragged.constant_value([[[[1], [2]], [[3], [4]]],
-                                       [[[5], [6]]]], ragged_rank=1),
+          value=ragged_factory_ops.constant_value(
+              [[[[1], [2]], [[3], [4]]], [[[5], [6]]]], ragged_rank=1),
           expected_dim_sizes=[2, [2, 1], 2, 1]),
       dict(
-          value=ragged.constant_value([[10, 20], [30]]),
+          value=ragged_factory_ops.constant_value([[10, 20], [30]]),
           expected_dim_sizes=[2, [2, 1]]),
       # Docstring examples:
       dict(value=[[1, 2, 3], [4, 5, 6]], expected_dim_sizes=[2, 3]),
       dict(
-          value=ragged.constant_value([[1, 2], [], [3, 4, 5]]),
+          value=ragged_factory_ops.constant_value([[1, 2], [], [3, 4, 5]]),
           expected_dim_sizes=[3, [2, 0, 3]]),
       dict(
-          value=ragged.constant_value([[[1, 2], [3, 4]], [[5, 6]]],
-                                      ragged_rank=1),
+          value=ragged_factory_ops.constant_value([[[1, 2], [3, 4]], [[5, 6]]],
+                                                  ragged_rank=1),
           expected_dim_sizes=[2, [2, 1], 2]),
       dict(
-          value=ragged.constant_value([[[1, 2], [3]], [[4, 5]]]),
+          value=ragged_factory_ops.constant_value([[[1, 2], [3]], [[4, 5]]]),
           expected_dim_sizes=[2, [2, 1], [2, 1, 2]]),
   ])
   def testFromTensor(self, value, expected_dim_sizes):
-    shape = ragged.RaggedTensorDynamicShape.from_tensor(value)
-    expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(
-        expected_dim_sizes)
+    shape = RaggedTensorDynamicShape.from_tensor(value)
+    expected = RaggedTensorDynamicShape.from_dim_sizes(expected_dim_sizes)
     self.assertShapeEq(shape, expected)
 
   @parameterized.parameters([
@@ -106,9 +110,8 @@ class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase,
           expected_dim_sizes=[1, 3, [3, 2, 4], 2, 3]),
   ])
   def testBroadcastToRank(self, dim_sizes, rank, expected_dim_sizes):
-    shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
-    expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(
-        expected_dim_sizes)
+    shape = RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
+    expected = RaggedTensorDynamicShape.from_dim_sizes(expected_dim_sizes)
     broadcasted_shape = shape.broadcast_to_rank(rank)
     self.assertShapeEq(broadcasted_shape, expected)
     self.assertEqual(broadcasted_shape.rank, rank)
@@ -297,21 +300,19 @@ class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase,
         original_dim_sizes[axis] should be equal to `1` or `row_length`.
       broadcast_dim_sizes: THe dimension sizes after broadcasting.
     """
-    original_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(
-        original_dim_sizes)
-    broadcast_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(
-        broadcast_dim_sizes)
-    self.assertEqual(original_shape.rank, broadcast_shape.rank)
+    original_shape = RaggedTensorDynamicShape.from_dim_sizes(original_dim_sizes)
+    bcast_shape = RaggedTensorDynamicShape.from_dim_sizes(broadcast_dim_sizes)
+    self.assertEqual(original_shape.rank, bcast_shape.rank)
     # shape[axis].value == 1 and row_length > 1:
     bcast1 = original_shape.broadcast_dimension(axis, row_length)
     # shape[axis].value > 1 and row_length == shape[axis].value:
-    bcast2 = broadcast_shape.broadcast_dimension(axis, row_length)
+    bcast2 = bcast_shape.broadcast_dimension(axis, row_length)
     # shape[axis].value > 1 and row_length == 1:
-    bcast3 = broadcast_shape.broadcast_dimension(axis, 1)
+    bcast3 = bcast_shape.broadcast_dimension(axis, 1)
 
-    self.assertShapeEq(bcast1, broadcast_shape)
-    self.assertShapeEq(bcast2, broadcast_shape)
-    self.assertShapeEq(bcast3, broadcast_shape)
+    self.assertShapeEq(bcast1, bcast_shape)
+    self.assertShapeEq(bcast2, bcast_shape)
+    self.assertShapeEq(bcast3, bcast_shape)
 
   @parameterized.parameters(
       [
@@ -369,104 +370,115 @@ class RaggedTensorBoundingShapeOp(ragged_test_util.RaggedTensorTestCase,
               expected_dims=[2, (2, 1), 2, (2, 1, 2, 1, 2, 1)]),
       ])
   def testBroadcastDynamicShape(self, x_dims, y_dims, expected_dims):
-    x_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(x_dims)
-    y_shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(y_dims)
-    expected = ragged.RaggedTensorDynamicShape.from_dim_sizes(expected_dims)
-    result1 = ragged.broadcast_dynamic_shape(x_shape, y_shape)
-    result2 = ragged.broadcast_dynamic_shape(y_shape, x_shape)
+    x_shape = RaggedTensorDynamicShape.from_dim_sizes(x_dims)
+    y_shape = RaggedTensorDynamicShape.from_dim_sizes(y_dims)
+    expected = RaggedTensorDynamicShape.from_dim_sizes(expected_dims)
+    result1 = ragged_tensor_shape.broadcast_dynamic_shape(x_shape, y_shape)
+    result2 = ragged_tensor_shape.broadcast_dynamic_shape(y_shape, x_shape)
     self.assertShapeEq(expected, result1)
     self.assertShapeEq(expected, result2)
 
   def testRepr(self):
-    shape = ragged.RaggedTensorDynamicShape.from_dim_sizes([2, (2, 1), 2, 1])
+    shape = RaggedTensorDynamicShape.from_dim_sizes([2, (2, 1), 2, 1])
     self.assertRegexpMatches(
         repr(shape),
         r'RaggedTensorDynamicShape\('
         r'partitioned_dim_sizes=\(<[^>]+>, <[^>]+>\), '
         r'inner_dim_sizes=<[^>]+>\)')
 
-  @parameterized.parameters([
-      dict(
-          x=[[10], [20], [30]],  # shape=[3, 1]
-          dim_sizes=[3, 2],
-          expected=[[10, 10], [20, 20], [30, 30]]),
-      dict(
-          x=[[10], [20], [30]],  # shape=[3, 1]
-          dim_sizes=[3, [3, 0, 2]],
-          expected=ragged.constant_value([[10, 10, 10], [], [30, 30]],
-                                         dtype=np.int32)),
-      dict(
-          x=[[[1, 2, 3]], [[4, 5, 6]]],  # shape = [2, 1, 3]
-          dim_sizes=[2, [2, 3], 3],
-          expected=ragged.constant_value(
-              [[[1, 2, 3], [1, 2, 3]], [[4, 5, 6], [4, 5, 6], [4, 5, 6]]],
-              dtype=np.int32,
-              ragged_rank=1)),
-      dict(
-          x=[[[1]], [[2]]],  # shape = [2, 1, 1]
-          dim_sizes=[2, [2, 3], [0, 2, 1, 2, 0]],
-          expected=ragged.constant_value([[[], [1, 1]], [[2], [2, 2], []]],
-                                         dtype=np.int32,
-                                         ragged_rank=2)),
-      dict(
-          x=10,
-          dim_sizes=[3, [3, 0, 2]],
-          expected=ragged.constant_value([[10, 10, 10], [], [10, 10]])),
-  ])
+  @parameterized.parameters(
+      [
+          dict(
+              x=[[10], [20], [30]],  # shape=[3, 1]
+              dim_sizes=[3, 2],
+              expected=[[10, 10], [20, 20], [30, 30]]),
+          dict(
+              x=[[10], [20], [30]],  # shape=[3, 1]
+              dim_sizes=[3, [3, 0, 2]],
+              expected=ragged_factory_ops.constant_value(
+                  [[10, 10, 10], [], [30, 30]], dtype=np.int32)),
+          dict(
+              x=[[[1, 2, 3]], [[4, 5, 6]]],  # shape = [2, 1, 3]
+              dim_sizes=[2, [2, 3], 3],
+              expected=ragged_factory_ops.constant_value(
+                  [[[1, 2, 3], [1, 2, 3]], [[4, 5, 6], [4, 5, 6], [4, 5, 6]]],
+                  dtype=np.int32,
+                  ragged_rank=1)),
+          dict(
+              x=[[[1]], [[2]]],  # shape = [2, 1, 1]
+              dim_sizes=[2, [2, 3], [0, 2, 1, 2, 0]],
+              expected=ragged_factory_ops.constant_value(
+                  [[[], [1, 1]], [[2], [2, 2], []]],
+                  dtype=np.int32,
+                  ragged_rank=2)),
+          dict(
+              x=10,
+              dim_sizes=[3, [3, 0, 2]],
+              expected=ragged_factory_ops.constant_value([[10, 10, 10], [],
+                                                          [10, 10]])),
+      ])
   def testRaggedBroadcastTo(self, x, dim_sizes, expected):
-    shape = ragged.RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
-    result = ragged.broadcast_to(x, shape)
+    shape = RaggedTensorDynamicShape.from_dim_sizes(dim_sizes)
+    result = ragged_tensor_shape.broadcast_to(x, shape)
     self.assertEqual(
         getattr(result, 'ragged_rank', 0), getattr(expected, 'ragged_rank', 0))
     self.assertRaggedEqual(result, expected)
 
-  @parameterized.parameters([
-      dict(
-          doc='x.shape=[3, (D1)]; y.shape=[3, 1]; bcast.shape=[3, (D1)]',
-          x=ragged.constant_value([[1, 2, 3], [], [4, 5]], dtype=np.int32),
-          y=[[10], [20], [30]],
-          expected=ragged.constant_value([[11, 12, 13], [], [34, 35]])),
-      dict(
-          doc='x.shape=[3, (D1)]; y.shape=[]; bcast.shape=[3, (D1)]',
-          x=ragged.constant_value([[1, 2, 3], [], [4, 5]], dtype=np.int32),
-          y=10,
-          expected=ragged.constant_value([[11, 12, 13], [], [14, 15]])),
-      dict(
-          doc='x.shape=[1, (D1)]; y.shape=[3, 1]; bcast.shape=[3, (D1)]',
-          x=ragged.constant_value([[1, 2, 3]], dtype=np.int32),
-          y=[[10], [20], [30]],
-          expected=ragged.constant_value(
-              [[11, 12, 13], [21, 22, 23], [31, 32, 33]], dtype=np.int32)),
-      dict(
-          doc=('x.shape=[2, (D1), 1]; y.shape=[1, (D2)]; '
-               'bcast.shape=[2, (D1), (D2)]'),
-          x=ragged.constant_value([[[1], [2], [3]], [[4]]], ragged_rank=1),
-          y=ragged.constant_value([[10, 20, 30]]),
-          expected=ragged.constant_value([[[11, 21, 31], [12, 22, 32],
-                                           [13, 23, 33]], [[14, 24, 34]]])),
-      dict(
-          doc=('x.shape=[2, (D1), 1]; y.shape=[1, 1, 4]; '
-               'bcast.shape=[2, (D1), 4]'),
-          x=ragged.constant_value([[[10], [20]], [[30]]], ragged_rank=1),
-          y=[[[1, 2, 3, 4]]],
-          expected=ragged.constant_value(
-              [[[11, 12, 13, 14], [21, 22, 23, 24]], [[31, 32, 33, 34]]],
-              ragged_rank=1)),
-      dict(
-          doc=('x.shape=[2, (D1), 2, 1]; y.shape=[2, (D2)]; '
-               'bcast.shape=[2, (D1), (2), (D2)'),
-          x=ragged.constant_value([[[[1], [2]], [[3], [4]]],
-                                   [[[5], [6]]]],
-                                  ragged_rank=1),
-          y=ragged.constant_value([[10, 20], [30]]),
-          expected=ragged.constant_value(
-              [[[[11, 21], [32]], [[13, 23], [34]]],
-               [[[15, 25], [36]]]])),
-  ])
+  @parameterized.parameters(
+      [
+          dict(
+              doc='x.shape=[3, (D1)]; y.shape=[3, 1]; bcast.shape=[3, (D1)]',
+              x=ragged_factory_ops.constant_value([[1, 2, 3], [], [4, 5]],
+                                                  dtype=np.int32),
+              y=[[10], [20], [30]],
+              expected=ragged_factory_ops.constant_value([[11, 12, 13], [],
+                                                          [34, 35]])),
+          dict(
+              doc='x.shape=[3, (D1)]; y.shape=[]; bcast.shape=[3, (D1)]',
+              x=ragged_factory_ops.constant_value([[1, 2, 3], [], [4, 5]],
+                                                  dtype=np.int32),
+              y=10,
+              expected=ragged_factory_ops.constant_value([[11, 12, 13], [],
+                                                          [14, 15]])),
+          dict(
+              doc='x.shape=[1, (D1)]; y.shape=[3, 1]; bcast.shape=[3, (D1)]',
+              x=ragged_factory_ops.constant_value([[1, 2, 3]], dtype=np.int32),
+              y=[[10], [20], [30]],
+              expected=ragged_factory_ops.constant_value(
+                  [[11, 12, 13], [21, 22, 23], [31, 32, 33]], dtype=np.int32)),
+          dict(
+              doc=('x.shape=[2, (D1), 1]; y.shape=[1, (D2)]; '
+                   'bcast.shape=[2, (D1), (D2)]'),
+              x=ragged_factory_ops.constant_value([[[1], [2], [3]], [[4]]],
+                                                  ragged_rank=1),
+              y=ragged_factory_ops.constant_value([[10, 20, 30]]),
+              expected=ragged_factory_ops.constant_value([[[11, 21, 31],
+                                                           [12, 22, 32],
+                                                           [13, 23, 33]],
+                                                          [[14, 24, 34]]])),
+          dict(
+              doc=('x.shape=[2, (D1), 1]; y.shape=[1, 1, 4]; '
+                   'bcast.shape=[2, (D1), 4]'),
+              x=ragged_factory_ops.constant_value([[[10], [20]], [[30]]],
+                                                  ragged_rank=1),
+              y=[[[1, 2, 3, 4]]],
+              expected=ragged_factory_ops.constant_value(
+                  [[[11, 12, 13, 14], [21, 22, 23, 24]], [[31, 32, 33, 34]]],
+                  ragged_rank=1)),
+          dict(
+              doc=('x.shape=[2, (D1), 2, 1]; y.shape=[2, (D2)]; '
+                   'bcast.shape=[2, (D1), (2), (D2)'),
+              x=ragged_factory_ops.constant_value(
+                  [[[[1], [2]], [[3], [4]]], [[[5], [6]]]], ragged_rank=1),
+              y=ragged_factory_ops.constant_value([[10, 20], [30]]),
+              expected=ragged_factory_ops.constant_value([[[[11, 21], [32]],
+                                                           [[13, 23], [34]]],
+                                                          [[[15, 25], [36]]]])),
+      ])
   def testRaggedAddWithBroadcasting(self, x, y, expected, doc):
     expected_rrank = getattr(expected, 'ragged_rank', 0)
-    x = ragged.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
-    y = ragged.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32)
+    x = ragged_tensor.convert_to_tensor_or_ragged_tensor(x, dtype=dtypes.int32)
+    y = ragged_tensor.convert_to_tensor_or_ragged_tensor(y, dtype=dtypes.int32)
     result = x + y
     result_rrank = getattr(result, 'ragged_rank', 0)
     self.assertEqual(expected_rrank, result_rrank)
diff --git a/tensorflow/python/ops/ragged/ragged_tensor_test.py b/tensorflow/python/ops/ragged/ragged_tensor_test.py
index b8f1d97137..89691b015d 100644
--- a/tensorflow/python/ops/ragged/ragged_tensor_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tensor_test.py
@@ -30,9 +30,11 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_math_ops
+from tensorflow.python.ops.ragged import ragged_tensor_value
 from tensorflow.python.ops.ragged import ragged_test_util
-from tensorflow.python.ops.ragged import RaggedTensor
+from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor
 from tensorflow.python.platform import googletest
 
 
@@ -176,7 +178,7 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
     splits2 = np.array([0, 3, 5], dtype=np.int64)
 
     # Test construction of a RaggedTensorValue with ragged_rank=1.
-    rt_value = ragged.RaggedTensorValue(values, splits)
+    rt_value = ragged_tensor_value.RaggedTensorValue(values, splits)
     self.assertEqual(rt_value.row_splits.dtype, np.int64)
     self.assertEqual(rt_value.shape, (5, None))
     self.assertEqual(len(rt_value.nested_row_splits), 1)
@@ -186,8 +188,9 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
     self.assertAllEqual(values, rt_value.flat_values)
 
     # Test construction of a RaggedTensorValue with ragged_rank=2.
-    rt_value = ragged.RaggedTensorValue(
-        values=ragged.RaggedTensorValue(values, splits), row_splits=splits2)
+    rt_value = ragged_tensor_value.RaggedTensorValue(
+        values=ragged_tensor_value.RaggedTensorValue(values, splits),
+        row_splits=splits2)
     self.assertEqual(rt_value.row_splits.dtype, np.int64)
     self.assertEqual(rt_value.shape, (2, None, None))
     self.assertEqual(len(rt_value.nested_row_splits), 2)
@@ -1096,7 +1099,7 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
   def testRaggedTensorValueStr(self):
     values = [b'a', b'b', b'c', b'd', b'e', b'f', b'g']
     row_splits = [0, 2, 5, 6, 6, 7]
-    rt = ragged.RaggedTensorValue(
+    rt = ragged_tensor_value.RaggedTensorValue(
         np.array(values), np.array(row_splits, dtype=np.int64))
     expected_str = '<tf.RaggedTensorValue {}>'.format([[b'a', b'b'],
                                                        [b'c', b'd', b'e'],
@@ -1111,8 +1114,9 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
   #=============================================================================
 
   def testWithValues(self):
-    rt1 = ragged.constant([[1, 2], [3, 4, 5], [6], [], [7]])
-    rt2 = ragged.constant([[[1, 2], [3, 4, 5]], [[6]], [], [[], [7]]])
+    rt1 = ragged_factory_ops.constant([[1, 2], [3, 4, 5], [6], [], [7]])
+    rt2 = ragged_factory_ops.constant([[[1, 2], [3, 4, 5]], [[6]], [], [[],
+                                                                        [7]]])
 
     rt1_plus_10 = rt1.with_values(rt1.values + 10)
     rt2_times_10 = rt2.with_flat_values(rt2.flat_values * 10)
@@ -1135,8 +1139,8 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
     if context.executing_eagerly():
       return
 
-    rt1 = ragged.constant([[1, 2, 3], [4]])
-    rt2 = ragged.constant([[[], [1, 2]], [[3]]])
+    rt1 = ragged_factory_ops.constant([[1, 2, 3], [4]])
+    rt2 = ragged_factory_ops.constant([[[], [1, 2]], [[3]]])
     with self.test_session() as session:
       result = session.run({'rt1': rt1, 'rt2': rt2})
       self.assertCountEqual(sorted(result.keys()), ['rt1', 'rt2'])
@@ -1156,8 +1160,8 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
             array_ops.placeholder(dtypes.int64)
         ])
 
-    rt1_feed_val = ragged.constant_value([[1, 2, 3], [4]])
-    rt2_feed_val = ragged.constant_value([[[], [1, 2]], [[3]]])
+    rt1_feed_val = ragged_factory_ops.constant_value([[1, 2, 3], [4]])
+    rt2_feed_val = ragged_factory_ops.constant_value([[[], [1, 2]], [[3]]])
 
     with self.test_session() as session:
       result = session.run({
@@ -1186,13 +1190,13 @@ class RaggedTensorTest(ragged_test_util.RaggedTensorTestCase,
     c = array_ops.placeholder(dtypes.int32, shape=[], name='c')
 
     # Feed values for placeholder inputs.
-    a_val = ragged.constant_value([[1, 2, 3], [4]])
-    b_val = ragged.constant_value([[5, 4, 3], [2]])
+    a_val = ragged_factory_ops.constant_value([[1, 2, 3], [4]])
+    b_val = ragged_factory_ops.constant_value([[5, 4, 3], [2]])
     c_val = 3
 
     # Compute some values.
-    r1 = ragged.reduce_sum(a * b, axis=1)
-    r2 = ragged.reduce_sum(a + c, axis=1)
+    r1 = ragged_math_ops.reduce_sum(a * b, axis=1)
+    r2 = ragged_math_ops.reduce_sum(a + c, axis=1)
 
     with self.test_session() as session:
       handle = session.partial_run_setup([r1, r2], [a, b, c])
diff --git a/tensorflow/python/ops/ragged/ragged_test_util.py b/tensorflow/python/ops/ragged/ragged_test_util.py
index 027417664d..dcbab3021e 100644
--- a/tensorflow/python/ops/ragged/ragged_test_util.py
+++ b/tensorflow/python/ops/ragged/ragged_test_util.py
@@ -24,7 +24,8 @@ import numpy as np
 
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.ops.ragged import ragged_tensor_value
 
 
 class RaggedTensorTestCase(test_util.TensorFlowTestCase):
@@ -32,14 +33,14 @@ class RaggedTensorTestCase(test_util.TensorFlowTestCase):
 
   def _GetPyList(self, a):
     """Converts a to a nested python list."""
-    if isinstance(a, ragged.RaggedTensor):
+    if isinstance(a, ragged_tensor.RaggedTensor):
       return self.evaluate(a).to_list()
     elif isinstance(a, ops.Tensor):
       a = self.evaluate(a)
       return a.tolist() if isinstance(a, np.ndarray) else a
     elif isinstance(a, np.ndarray):
       return a.tolist()
-    elif isinstance(a, ragged.RaggedTensorValue):
+    elif isinstance(a, ragged_tensor_value.RaggedTensorValue):
       return a.to_list()
     else:
       return np.array(a).tolist()
@@ -51,8 +52,8 @@ class RaggedTensorTestCase(test_util.TensorFlowTestCase):
     self.assertEqual(a_list, b_list)
 
     if not (isinstance(a, (list, tuple)) or isinstance(b, (list, tuple))):
-      a_ragged_rank = a.ragged_rank if ragged.is_ragged(a) else 0
-      b_ragged_rank = b.ragged_rank if ragged.is_ragged(b) else 0
+      a_ragged_rank = a.ragged_rank if ragged_tensor.is_ragged(a) else 0
+      b_ragged_rank = b.ragged_rank if ragged_tensor.is_ragged(b) else 0
       self.assertEqual(a_ragged_rank, b_ragged_rank)
 
   def assertRaggedAlmostEqual(self, a, b, places=7):
@@ -61,8 +62,8 @@ class RaggedTensorTestCase(test_util.TensorFlowTestCase):
     self.assertNestedListAlmostEqual(a_list, b_list, places, context='value')
 
     if not (isinstance(a, (list, tuple)) or isinstance(b, (list, tuple))):
-      a_ragged_rank = a.ragged_rank if ragged.is_ragged(a) else 0
-      b_ragged_rank = b.ragged_rank if ragged.is_ragged(b) else 0
+      a_ragged_rank = a.ragged_rank if ragged_tensor.is_ragged(a) else 0
+      b_ragged_rank = b.ragged_rank if ragged_tensor.is_ragged(b) else 0
       self.assertEqual(a_ragged_rank, b_ragged_rank)
 
   def assertNestedListAlmostEqual(self, a, b, places=7, context='value'):
@@ -79,7 +80,7 @@ class RaggedTensorTestCase(test_util.TensorFlowTestCase):
 
   def eval_to_list(self, tensor):
     value = self.evaluate(tensor)
-    if ragged.is_ragged(value):
+    if ragged_tensor.is_ragged(value):
       return value.to_list()
     elif isinstance(value, np.ndarray):
       return value.tolist()
@@ -87,8 +88,8 @@ class RaggedTensorTestCase(test_util.TensorFlowTestCase):
       return value
 
   def _eval_tensor(self, tensor):
-    if ragged.is_ragged(tensor):
-      return ragged.RaggedTensorValue(
+    if ragged_tensor.is_ragged(tensor):
+      return ragged_tensor_value.RaggedTensorValue(
           self._eval_tensor(tensor.values),
           self._eval_tensor(tensor.row_splits))
     else:
diff --git a/tensorflow/python/ops/ragged/ragged_tile_op_test.py b/tensorflow/python/ops/ragged/ragged_tile_op_test.py
index d3445571bf..8c03b16653 100644
--- a/tensorflow/python/ops/ragged/ragged_tile_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_tile_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.tile."""
+"""Tests for ragged_array_ops.tile."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -24,7 +24,8 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -189,7 +190,7 @@ class RaggedTileOpTest(ragged_test_util.RaggedTensorTestCase,
                      multiples,
                      expected,
                      ragged_rank=None):
-    rt = ragged.constant(rt_input, ragged_rank)
+    rt = ragged_factory_ops.constant(rt_input, ragged_rank)
 
     expected_shape = [
         None if dim is None else dim * multiple
@@ -203,7 +204,7 @@ class RaggedTileOpTest(ragged_test_util.RaggedTensorTestCase,
         const_multiples, shape=[len(multiples)])
 
     for multiples_tensor in (const_multiples, non_const_multiples):
-      tiled = ragged.tile(rt, multiples_tensor)
+      tiled = ragged_array_ops.tile(rt, multiples_tensor)
       self.assertEqual(tiled.ragged_rank, rt.ragged_rank)
       self.assertEqual(tiled.shape.ndims, rt.shape.ndims)
       if multiples_tensor is const_multiples:
@@ -213,7 +214,7 @@ class RaggedTileOpTest(ragged_test_util.RaggedTensorTestCase,
   def testRaggedTileWithTensorInput(self):
     # When the input is a `Tensor`, ragged_tile just delegates to tf.tile.
     dt = constant_op.constant([[1, 2], [3, 4]])
-    tiled = ragged.tile(dt, [3, 2])
+    tiled = ragged_array_ops.tile(dt, [3, 2])
     expected = [[1, 2, 1, 2], [3, 4, 3, 4],
                 [1, 2, 1, 2], [3, 4, 3, 4],
                 [1, 2, 1, 2], [3, 4, 3, 4]]  # pyformat: disable
diff --git a/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py b/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
index 46d7a56a7c..92959a98bf 100644
--- a/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_to_sparse_op_test.py
@@ -25,7 +25,9 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
+from tensorflow.python.ops.ragged import ragged_functional_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -34,7 +36,7 @@ from tensorflow.python.platform import googletest
 class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
 
   def testDocStringExample(self):
-    rt = ragged.constant([[1, 2, 3], [4], [], [5, 6]])
+    rt = ragged_factory_ops.constant([[1, 2, 3], [4], [], [5, 6]])
     st = self.evaluate(rt.to_sparse())
     self.assertAllEqual(st.indices,
                         [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [3, 1]])
@@ -42,7 +44,8 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
     self.assertAllEqual(st.dense_shape, [4, 3])
 
   def test2DRaggedTensorWithOneRaggedDimension(self):
-    rt = ragged.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [], ['g']])
+    rt = ragged_factory_ops.constant([['a', 'b'], ['c', 'd', 'e'], ['f'], [],
+                                      ['g']])
     st = self.evaluate(rt.to_sparse())
     self.assertAllEqual(
         st.indices, [[0, 0], [0, 1], [1, 0], [1, 1], [1, 2], [2, 0], [4, 0]])
@@ -50,9 +53,10 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
     self.assertAllEqual(st.dense_shape, [5, 3])
 
   def test3DRaggedTensorWithOneRaggedDimension(self):
-    rt = ragged.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8], [9, 10]],
-                          [[11, 12]], [], [[13, 14]]],
-                         ragged_rank=1)
+    rt = ragged_factory_ops.constant(
+        [[[1, 2], [3, 4]], [[5, 6], [7, 8], [9, 10]], [[11, 12]], [], [[13, 14]]
+        ],
+        ragged_rank=1)
     st = self.evaluate(rt.to_sparse())
     self.assertAllEqual(st.indices,
                         [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0],
@@ -63,7 +67,7 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
     self.assertAllEqual(st.dense_shape, [5, 3, 2])
 
   def test4DRaggedTensorWithOneRaggedDimension(self):
-    rt = ragged.constant(
+    rt = ragged_factory_ops.constant(
         [[[[1, 2], [3, 4]], [[5, 6], [7, 8]]], [], [[[9, 10], [11, 12]]]],
         ragged_rank=1)
     st = self.evaluate(rt.to_sparse())
@@ -87,9 +91,10 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
     self.assertAllEqual(st.dense_shape, [3, 2, 2, 2])
 
   def test4DRaggedTensorWithTwoRaggedDimensions(self):
-    rt = ragged.constant([[[[1, 2], [3, 4]], [[5, 6], [7, 8], [9, 10]]],
-                          [[[11, 12]], [], [[13, 14]]], []],
-                         ragged_rank=2)
+    rt = ragged_factory_ops.constant(
+        [[[[1, 2], [3, 4]], [[5, 6], [7, 8], [9, 10]]],
+         [[[11, 12]], [], [[13, 14]]], []],
+        ragged_rank=2)
     st = self.evaluate(rt.to_sparse())
     self.assertAllEqual(
         st.indices,
@@ -114,19 +119,20 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
     self.assertAllEqual(st.dense_shape, [3, 3, 3, 2])
 
   def testShape(self):
-    rt = ragged.constant([[1, 2], [3, 4, 5], [6], [], [7]])
+    rt = ragged_factory_ops.constant([[1, 2], [3, 4, 5], [6], [], [7]])
     st = rt.to_sparse()
     self.assertEqual(st.indices.shape.as_list(), [7, 2])
     self.assertEqual(st.values.shape.as_list(), [7])
     self.assertEqual(st.dense_shape.shape.as_list(), [2])
 
-    rt = ragged.constant([[[1, 2]], [], [[3, 4]], []], ragged_rank=1)
+    rt = ragged_factory_ops.constant([[[1, 2]], [], [[3, 4]], []],
+                                     ragged_rank=1)
     st = rt.to_sparse()
     self.assertEqual(st.indices.shape.as_list(), [4, 3])
     self.assertEqual(st.values.shape.as_list(), [4])
     self.assertEqual(st.dense_shape.shape.as_list(), [3])
 
-    rt = ragged.constant([[[1], [2, 3, 4, 5, 6, 7]], [[]]])
+    rt = ragged_factory_ops.constant([[[1], [2, 3, 4, 5, 6, 7]], [[]]])
     st = rt.to_sparse()
     self.assertEqual(st.indices.shape.as_list(), [7, 3])
     self.assertEqual(st.values.shape.as_list(), [7])
@@ -138,17 +144,17 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
     empty_vector = array_ops.placeholder_with_default(
         array_ops.zeros([0], dtypes.int64), shape=None)
 
-    bad_rt1 = ragged.RaggedTensor.from_row_splits(
+    bad_rt1 = ragged_tensor.RaggedTensor.from_row_splits(
         row_splits=[2, 3], values=[1, 2, 3])
     bad_split0 = r'First value of ragged splits must be 0.*'
     with self.assertRaisesRegexp(errors.InvalidArgumentError, bad_split0):
       self.evaluate(bad_rt1.to_sparse())
 
-    bad_rt2 = ragged.RaggedTensor.from_row_splits(
+    bad_rt2 = ragged_tensor.RaggedTensor.from_row_splits(
         row_splits=[0, 5], values=empty_vector)
-    bad_rt3 = ragged.RaggedTensor.from_row_splits(
+    bad_rt3 = ragged_tensor.RaggedTensor.from_row_splits(
         row_splits=[0, 1],
-        values=ragged.RaggedTensor.from_row_splits(
+        values=ragged_tensor.RaggedTensor.from_row_splits(
             row_splits=[0, 5], values=empty_vector))
     split_mismatch1_error = r'Final value of ragged splits must match.*'
     for rt in [bad_rt2, bad_rt3]:
@@ -156,16 +162,16 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
                                    split_mismatch1_error):
         self.evaluate(rt.to_sparse())
 
-    bad_rt4 = ragged.RaggedTensor.from_row_splits(
+    bad_rt4 = ragged_tensor.RaggedTensor.from_row_splits(
         row_splits=[0, 5],
-        values=ragged.RaggedTensor.from_row_splits(
+        values=ragged_tensor.RaggedTensor.from_row_splits(
             row_splits=[0], values=empty_vector))
     split_mismatch2_error = r'Final value of ragged splits must match.*'
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
                                  split_mismatch2_error):
       self.evaluate(bad_rt4.to_sparse())
 
-    bad_rt5 = ragged.RaggedTensor.from_row_splits(
+    bad_rt5 = ragged_tensor.RaggedTensor.from_row_splits(
         row_splits=empty_vector, values=[])
     empty_splits_error = (r'ragged splits may not be empty.*')
     with self.assertRaisesRegexp(errors.InvalidArgumentError,
@@ -176,11 +182,11 @@ class RaggedTensorToSparseOpTest(ragged_test_util.RaggedTensorTestCase):
     if context.executing_eagerly():
       return
     # rt1.shape == rt2.shape == [2, (D2), (D3), 2].
-    rt1 = ragged.constant([[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0]]]],
-                          ragged_rank=2)
-    rt2 = ragged.constant([[[[9.0, 8.0], [7.0, 6.0]], [[5.0, 4.0]]]],
-                          ragged_rank=2)
-    rt = ragged.map_flat_values(math_ops.add, rt1, rt2 * 2.0)
+    rt1 = ragged_factory_ops.constant(
+        [[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0]]]], ragged_rank=2)
+    rt2 = ragged_factory_ops.constant(
+        [[[[9.0, 8.0], [7.0, 6.0]], [[5.0, 4.0]]]], ragged_rank=2)
+    rt = ragged_functional_ops.map_flat_values(math_ops.add, rt1, rt2 * 2.0)
     st = rt.to_sparse()
 
     g1, g2 = gradients_impl.gradients(st.values,
diff --git a/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py b/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
index ffcc2be52e..ac75456813 100644
--- a/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_to_tensor_op_test.py
@@ -23,7 +23,7 @@ from absl.testing import parameterized
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -34,7 +34,7 @@ class RaggedTensorToTensorOpTest(ragged_test_util.RaggedTensorTestCase,
 
   def testDocStringExamples(self):
     """Example from ragged_to_tensor.__doc__."""
-    rt = ragged.constant([[9, 8, 7], [], [6, 5], [4]])
+    rt = ragged_factory_ops.constant([[9, 8, 7], [], [6, 5], [4]])
     dt = rt.to_tensor()
     self.assertAllEqual(dt, [[9, 8, 7], [0, 0, 0], [6, 5, 0], [4, 0, 0]])
 
@@ -100,7 +100,7 @@ class RaggedTensorToTensorOpTest(ragged_test_util.RaggedTensorTestCase,
                                ragged_rank=None,
                                default=None,
                                expected_shape=None):
-    rt = ragged.constant(rt_input, ragged_rank=ragged_rank)
+    rt = ragged_factory_ops.constant(rt_input, ragged_rank=ragged_rank)
     dt = rt.to_tensor(default)
     self.assertIsInstance(dt, ops.Tensor)
     self.assertEqual(rt.dtype, dt.dtype)
@@ -129,7 +129,7 @@ class RaggedTensorToTensorOpTest(ragged_test_util.RaggedTensorTestCase,
       },
   )
   def testError(self, rt_input, default, error, ragged_rank=None):
-    rt = ragged.constant(rt_input, ragged_rank=ragged_rank)
+    rt = ragged_factory_ops.constant(rt_input, ragged_rank=ragged_rank)
     with self.assertRaisesRegexp(error[0], error[1]):
       rt.to_tensor(default)
 
diff --git a/tensorflow/python/ops/ragged/ragged_where_op_test.py b/tensorflow/python/ops/ragged/ragged_where_op_test.py
index b3cd5a2deb..3dd9565826 100644
--- a/tensorflow/python/ops/ragged/ragged_where_op_test.py
+++ b/tensorflow/python/ops/ragged/ragged_where_op_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for ragged.where."""
+"""Tests for ragged_array_ops.where."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -21,7 +21,8 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import ragged
+from tensorflow.python.ops.ragged import ragged_array_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_test_util
 from tensorflow.python.platform import googletest
 
@@ -35,18 +36,24 @@ class RaggedWhereOpTest(ragged_test_util.RaggedTensorTestCase,
       # Docstring Examples
       #=========================================================================
       dict(  # shape=[D1, (D2)]
-          condition=ragged.constant_value([[True, False, True], [False, True]]),
+          condition=ragged_factory_ops.constant_value(
+              [[True, False, True], [False, True]]),
           expected=[[0, 0], [0, 2], [1, 1]]),
       dict(  # shape=[D1, (D2)]
-          condition=ragged.constant_value([[True, False, True], [False, True]]),
-          x=ragged.constant_value([['A', 'B', 'C'], ['D', 'E']]),
-          y=ragged.constant_value([['a', 'b', 'c'], ['d', 'e']]),
-          expected=ragged.constant_value([[b'A', b'b', b'C'], [b'd', b'E']])),
+          condition=ragged_factory_ops.constant_value(
+              [[True, False, True], [False, True]]),
+          x=ragged_factory_ops.constant_value(
+              [['A', 'B', 'C'], ['D', 'E']]),
+          y=ragged_factory_ops.constant_value(
+              [['a', 'b', 'c'], ['d', 'e']]),
+          expected=ragged_factory_ops.constant_value(
+              [[b'A', b'b', b'C'], [b'd', b'E']])),
       dict(  # shape=[D1, (D2)]
-          condition=ragged.constant_value([True, False]),
-          x=ragged.constant_value([['A', 'B', 'C'], ['D', 'E']]),
-          y=ragged.constant_value([['a', 'b', 'c'], ['d', 'e']]),
-          expected=ragged.constant_value([[b'A', b'B', b'C'], [b'd', b'e']])),
+          condition=ragged_factory_ops.constant_value([True, False]),
+          x=ragged_factory_ops.constant_value([['A', 'B', 'C'], ['D', 'E']]),
+          y=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d', 'e']]),
+          expected=ragged_factory_ops.constant_value(
+              [[b'A', b'B', b'C'], [b'd', b'e']])),
       #=========================================================================
       # Coordinate-retrieval mode
       #=========================================================================
@@ -57,24 +64,25 @@ class RaggedWhereOpTest(ragged_test_util.RaggedTensorTestCase,
           condition=[[True, False], [False, True]],
           expected=[[0, 0], [1, 1]]),
       dict(  # shape=[D1, (D2)]
-          condition=ragged.constant_value([[True, False, True], [False, True]]),
+          condition=ragged_factory_ops.constant_value(
+              [[True, False, True], [False, True]]),
           expected=[[0, 0], [0, 2], [1, 1]]),
       dict(  # shape=[D1, (D2), (D3)]
-          condition=ragged.constant_value([
+          condition=ragged_factory_ops.constant_value([
               [[True, False, True], [False, True]],
               [[True], [], [False], [False, True, False]]
           ]),
           expected=[[0, 0, 0], [0, 0, 2], [0, 1, 1],
                     [1, 0, 0], [1, 3, 1]]),
       dict(  # shape=[D1, (D2), D3]
-          condition=ragged.constant_value([
+          condition=ragged_factory_ops.constant_value([
               [[True, False], [False, True]],
               [[True, False], [False, False], [True, False], [False, True]]
           ], ragged_rank=1),
           expected=[[0, 0, 0], [0, 1, 1],
                     [1, 0, 0], [1, 2, 0], [1, 3, 1]]),
       dict(  # shape=[D1, (D2), (D3), (D4)]
-          condition=ragged.constant_value([
+          condition=ragged_factory_ops.constant_value([
               [[[], [True]]],
               [[[True, False, True], [False, True]],
                [[True], [], [False], [False, True, False]]]
@@ -101,44 +109,46 @@ class RaggedWhereOpTest(ragged_test_util.RaggedTensorTestCase,
           y=[['a', 'b'], ['d', 'e']],
           expected=[[b'A', b'b'], [b'd', b'E']]),
       dict(  # shape=[D1, (D2)]
-          condition=ragged.constant_value([[True, False, True], [False, True]]),
-          x=ragged.constant_value([['A', 'B', 'C'], ['D', 'E']]),
-          y=ragged.constant_value([['a', 'b', 'c'], ['d', 'e']]),
-          expected=ragged.constant_value([[b'A', b'b', b'C'], [b'd', b'E']])),
+          condition=ragged_factory_ops.constant_value(
+              [[True, False, True], [False, True]]),
+          x=ragged_factory_ops.constant_value([['A', 'B', 'C'], ['D', 'E']]),
+          y=ragged_factory_ops.constant_value([['a', 'b', 'c'], ['d', 'e']]),
+          expected=ragged_factory_ops.constant_value(
+              [[b'A', b'b', b'C'], [b'd', b'E']])),
       dict(  # shape=[D1, (D2), D3]
-          condition=ragged.constant_value([
+          condition=ragged_factory_ops.constant_value([
               [[True, False], [False, True]],
               [[True, False], [False, False], [True, False], [False, True]]
           ], ragged_rank=1),
-          x=ragged.constant_value([
+          x=ragged_factory_ops.constant_value([
               [['A', 'B'], ['C', 'D']],
               [['E', 'F'], ['G', 'H'], ['I', 'J'], ['K', 'L']]
           ], ragged_rank=1),
-          y=ragged.constant_value([
+          y=ragged_factory_ops.constant_value([
               [['a', 'b'], ['c', 'd']],
               [['e', 'f'], ['g', 'h'], ['i', 'j'], ['k', 'l']]
           ], ragged_rank=1),
-          expected=ragged.constant_value([
+          expected=ragged_factory_ops.constant_value([
               [[b'A', b'b'], [b'c', b'D']],
               [[b'E', b'f'], [b'g', b'h'], [b'I', b'j'], [b'k', b'L']]
           ], ragged_rank=1)),
       dict(  # shape=[D1, (D2), (D3), (D4)]
-          condition=ragged.constant_value([
+          condition=ragged_factory_ops.constant_value([
               [[[], [True]]],
               [[[True, False, True], [False, True]],
                [[True], [], [False], [False, True, False]]]
           ]),
-          x=ragged.constant_value([
+          x=ragged_factory_ops.constant_value([
               [[[], ['A']]],
               [[['B', 'C', 'D'], ['E', 'F']],
                [['G'], [], ['H'], ['I', 'J', 'K']]]
           ]),
-          y=ragged.constant_value([
+          y=ragged_factory_ops.constant_value([
               [[[], ['a']]],
               [[['b', 'c', 'd'], ['e', 'f']],
                [['g'], [], ['h'], ['i', 'j', 'k']]]
           ]),
-          expected=ragged.constant_value([
+          expected=ragged_factory_ops.constant_value([
               [[[], [b'A']]],
               [[[b'B', b'c', b'D'], [b'e', b'F']],
                [[b'G'], [], [b'h'], [b'i', b'J', b'k']]]
@@ -154,22 +164,25 @@ class RaggedWhereOpTest(ragged_test_util.RaggedTensorTestCase,
           expected=[[b'A', b'B'], [b'c', b'd'], [b'E', b'F']]),
       dict(  # shape=[D1, (D2)]
           condition=[True, False, True],
-          x=ragged.constant_value([['A', 'B', 'C'], ['D', 'E'], ['F', 'G']]),
-          y=ragged.constant_value([['a', 'b'], ['c'], ['d', 'e']]),
-          expected=ragged.constant_value([[b'A', b'B', b'C'], [b'c'],
-                                          [b'F', b'G']])),
+          x=ragged_factory_ops.constant_value(
+              [['A', 'B', 'C'], ['D', 'E'], ['F', 'G']]),
+          y=ragged_factory_ops.constant_value(
+              [['a', 'b'], ['c'], ['d', 'e']]),
+          expected=ragged_factory_ops.constant_value(
+              [[b'A', b'B', b'C'], [b'c'], [b'F', b'G']])),
       dict(  # shape=[D1, (D2), (D3), (D4)]
-          condition=ragged.constant_value([True, False]),
-          x=ragged.constant_value([
+          condition=ragged_factory_ops.constant_value([True, False]),
+          x=ragged_factory_ops.constant_value([
               [[[], ['A']]],
               [[['B', 'C', 'D'], ['E', 'F']],
                [['G'], [], ['H'], ['I', 'J', 'K']]]
           ]),
-          y=ragged.constant_value([[[['a']]], [[['b']]]]),
-          expected=ragged.constant_value([[[[], [b'A']]], [[[b'b']]]])),
+          y=ragged_factory_ops.constant_value([[[['a']]], [[['b']]]]),
+          expected=ragged_factory_ops.constant_value(
+              [[[[], [b'A']]], [[[b'b']]]])),
   ])   # pyformat: disable
   def testRaggedWhere(self, condition, expected, x=None, y=None):
-    result = ragged.where(condition, x, y)
+    result = ragged_array_ops.where(condition, x, y)
     self.assertRaggedEqual(result, expected)
 
   @parameterized.parameters([
@@ -179,15 +192,16 @@ class RaggedWhereOpTest(ragged_test_util.RaggedTensorTestCase,
           error=ValueError,
           message='x and y must be either both None or both non-None'),
       dict(
-          condition=ragged.constant_value([[True, False, True], [False, True]]),
-          x=ragged.constant_value([['A', 'B', 'C'], ['D', 'E']]),
+          condition=ragged_factory_ops.constant_value([[True, False, True],
+                                                       [False, True]]),
+          x=ragged_factory_ops.constant_value([['A', 'B', 'C'], ['D', 'E']]),
           y=[['a', 'b'], ['d', 'e']],
           error=ValueError,
           message='Input shapes do not match.'),
   ])
   def testRaggedWhereErrors(self, condition, error, message, x=None, y=None):
     with self.assertRaisesRegexp(error, message):
-      ragged.where(condition, x, y)
+      ragged_array_ops.where(condition, x, y)
 
 
 if __name__ == '__main__':
-- 
GitLab


From abc60d3bb512e352d3d8a3badf3af3cae51443c8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 10:13:28 -0800
Subject: [PATCH 507/873] [TF] Enable quantized_ops_test that wasn't being run.

PiperOrigin-RevId: 225391173
---
 tensorflow/python/BUILD | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index c11df5534d..8c0e95ea4e 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -5321,6 +5321,20 @@ py_test(
     ],
 )
 
+py_test(
+    name = "quantized_ops_test",
+    size = "small",
+    srcs = ["ops/quantized_ops_test.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_windows"],
+    deps = [
+        ":array_ops",
+        ":client_testlib",
+        ":framework_for_generated_wrappers",
+        "//third_party/py/numpy",
+    ],
+)
+
 py_test(
     name = "quantized_conv_ops_test",
     size = "small",
-- 
GitLab


From 2b4428894325586b5eb5bb24022c5bac3fa30dc9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 10:40:18 -0800
Subject: [PATCH 508/873] Add transpose_output option to Dequantize Op in XLA.

PiperOrigin-RevId: 225396120
---
 tensorflow/compiler/xla/client/lib/quantize.h | 58 ++++++++----
 .../compiler/xla/client/lib/quantize_test.cc  | 89 ++++++++++++++++++-
 2 files changed, 127 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/quantize.h b/tensorflow/compiler/xla/client/lib/quantize.h
index e002e5e19c..26dbbd5b00 100644
--- a/tensorflow/compiler/xla/client/lib/quantize.h
+++ b/tensorflow/compiler/xla/client/lib/quantize.h
@@ -74,9 +74,14 @@ inline std::vector<uint32> PackToUint32(absl::Span<const T> input) {
 // Only uint8 or uint16 is supported for the original unpacked input.
 // Returns a tensor of shape [d0,..., dn * unpack_size] if
 // input shape is [d0, ..., dn], where unpack_size = sizeof(unit32) / sizeof(T).
+// If transpose_output is true, will return a tensor of shape
+// [dn * unpack_size, dn-1, ..., d1, d0]. transpose_output is faster when
+// input's rank higher than 1. The input needs to be transposed to use
+// transpose_output feature.
 template <typename T>
 inline XlaOp Dequantize(XlaOp input, const QuantizedRange& range,
-                        absl::string_view mode_string = "MIN_COMBINED") {
+                        absl::string_view mode_string = "MIN_COMBINED",
+                        bool transpose_output = false) {
   XlaBuilder* const builder = input.builder();
   return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     float half_range =
@@ -94,14 +99,9 @@ inline XlaOp Dequantize(XlaOp input, const QuantizedRange& range,
           "Only U32 is supported for input type of xla::Dequantize Op.");
     }
 
-    auto broadcast_size = shape.dimensions();
-    broadcast_size.push_back(unpack_size);
-    std::vector<int64> broadcast_dimensions(shape.dimensions_size());
-    std::iota(broadcast_dimensions.begin(), broadcast_dimensions.end(), 0);
-    // Broadcast the input to [d0, ..., dn, unpack_size] if input size is
+    // Broadcast the input to [unpack_size, d0, ..., dn] if input size is
     // [d0, ..., dn].
-    auto broadcast_input =
-        BroadcastInDim(input, broadcast_size, broadcast_dimensions);
+    auto broadcast_input = Broadcast(input, {unpack_size});
 
     XlaOp iota_r1 = Iota(builder, U32, unpack_size);
     // Highest significant bytes needs to shift more bytes than lower
@@ -110,8 +110,9 @@ inline XlaOp Dequantize(XlaOp input, const QuantizedRange& range,
         xla::ConstantR0<uint32>(builder, unpack_size - 1) - iota_r1;
 
     const int bytes_of_type = sizeof(T) / sizeof(uint8);
-    XlaOp shift_bits = shift_bytes * xla::ConstantR0<uint32>(
-                                         builder, kBitsOfByte * bytes_of_type);
+    std::vector<uint32> shift_vec(unpack_size, kBitsOfByte * bytes_of_type);
+    XlaOp shift_bits =
+        shift_bytes * xla::ConstantR1<uint32>(builder, shift_vec);
 
     // Make bit_mask for different data type T.
     uint32 bit_mask = 0x00000000;
@@ -120,9 +121,16 @@ inline XlaOp Dequantize(XlaOp input, const QuantizedRange& range,
       bit_mask |= 0x000000ff;
     }
 
+    std::vector<int64> shift_transpose_dimensions(shape.dimensions_size());
+    std::iota(shift_transpose_dimensions.begin(),
+              shift_transpose_dimensions.end(), 0);
+    shift_transpose_dimensions.insert(shift_transpose_dimensions.begin(), 1,
+                                      shape.dimensions_size());
+
     // Shift the input by sizeof(T) bytes and apply bit_mask to unpack.
     XlaOp shifted_input = ShiftRightLogical(
-        broadcast_input, Broadcast(shift_bits, shape.dimensions()));
+        broadcast_input, Transpose(Broadcast(shift_bits, shape.dimensions()),
+                                   shift_transpose_dimensions));
     XlaOp unpack_input =
         And(shifted_input, xla::ConstantR0<uint32>(builder, bit_mask));
 
@@ -148,12 +156,28 @@ inline XlaOp Dequantize(XlaOp input, const QuantizedRange& range,
           "Only MIN_COMBINED mode is supported in xla::Dequantize Op.");
     }
 
-    // Reshape the result to [d0,..., dn * unpack_size] if
-    // input shape is [d0, ..., dn].
-    std::vector<int64> result_shape(shape.dimensions());
-    result_shape[shape.dimensions_size() - 1] =
-        shape.dimensions(shape.dimensions_size() - 1) * unpack_size;
-    return Reshape(result, result_shape);
+    std::vector<int64> transpose_dimensions(shape.dimensions_size());
+    std::iota(transpose_dimensions.begin(), transpose_dimensions.end(), 1);
+    std::reverse(transpose_dimensions.begin(), transpose_dimensions.end());
+    transpose_dimensions.insert(transpose_dimensions.begin() + 1, 1, 0);
+
+    // Transpose the result to be [dn, unpack_size, dn-1, ..., d1, d0].
+    XlaOp transposed_result = Transpose(result, transpose_dimensions);
+
+    // Reshape to be [dn * unpack_size, dn-1, ..., d1, d0].
+    XlaOp reshaped_result = Collapse(transposed_result, {0, 1});
+
+    // Return the transpose result if transpose_output is true.
+    if (transpose_output) {
+      return reshaped_result;
+    }
+
+    // Transpose the result to be [d0, d1, ..., dn-1, dn * unpack_size].
+    std::vector<int64> result_dimensions(shape.dimensions_size());
+    std::iota(result_dimensions.begin(), result_dimensions.end(), 0);
+    std::reverse(result_dimensions.begin(), result_dimensions.end());
+
+    return Transpose(reshaped_result, result_dimensions);
   });
 }
 
diff --git a/tensorflow/compiler/xla/client/lib/quantize_test.cc b/tensorflow/compiler/xla/client/lib/quantize_test.cc
index f7ff3502d1..be3603d9e1 100644
--- a/tensorflow/compiler/xla/client/lib/quantize_test.cc
+++ b/tensorflow/compiler/xla/client/lib/quantize_test.cc
@@ -77,13 +77,25 @@ Array2D<uint32> PackLargeInput(Array2D<NativeT> &input) {
 
 template <typename NativeT>
 Array2D<bfloat16> GenerateLargeSizeMinCombinedOutput(
-    Array2D<NativeT> &input, const QuantizedRange &range) {
+    Array2D<NativeT> &input, const QuantizedRange &range,
+    bool transpose_output = false) {
   const int64 size_per_pack = sizeof(uint32) / sizeof(NativeT);
   int64 width = input.width();
 
   int64 padded_output_width = CeilOfRatio(width, size_per_pack) * size_per_pack;
 
-  Array2D<bfloat16> output(input.height(), padded_output_width, bfloat16(0.0));
+  int64 output_height;
+  int64 output_width;
+
+  if (transpose_output) {
+    output_height = padded_output_width;
+    output_width = input.height();
+  } else {
+    output_height = input.height();
+    output_width = padded_output_width;
+  }
+
+  Array2D<bfloat16> output(output_height, output_width, bfloat16(0.0));
 
   float half_range =
       !std::is_signed<NativeT>::value
@@ -102,7 +114,11 @@ Array2D<bfloat16> GenerateLargeSizeMinCombinedOutput(
       bfloat16 result =
           static_cast<bfloat16>(input(h, w) + half_range) * scale_factor +
           range.min;
-      output(h, w) = result;
+      if (transpose_output) {
+        output(w, h) = result;
+      } else {
+        output(h, w) = result;
+      }
     }
   }
 
@@ -206,6 +222,29 @@ XLA_TEST_F(DequantizeTest, MinCombinedUint8R2) {
   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
 }
 
+XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TransposeOutput) {
+  XlaBuilder builder(TestName());
+  std::vector<std::vector<uint8>> input = {
+      {0, 1, 2, 3},
+      {4, 5, 6, 7},
+      {8, 9, 10, 11},
+      {12, 13, 16, 15},
+  };
+  auto x = ConstantR2<uint32>(&builder, {{PackToUint32<uint8>(input[0])[0]},
+                                         {PackToUint32<uint8>(input[1])[0]},
+                                         {PackToUint32<uint8>(input[2])[0]},
+                                         {PackToUint32<uint8>(input[3])[0]}});
+  QuantizedRange range(0, 255.0f);
+  xla::Dequantize<uint8>(x, range, "MIN_COMBINED", /*transpose_output=*/true);
+  const Array2D<bfloat16> expected = {
+      {bfloat16(0.0), bfloat16(4.0), bfloat16(8.0), bfloat16(12.0)},
+      {bfloat16(1.0), bfloat16(5.0), bfloat16(9.0), bfloat16(13.0)},
+      {bfloat16(2.0), bfloat16(6.0), bfloat16(10.0), bfloat16(16.0)},
+      {bfloat16(3.0), bfloat16(7.0), bfloat16(11.0), bfloat16(15.0)},
+  };
+  ComputeAndCompareR2<bfloat16>(&builder, expected, {});
+}
+
 XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TailingZero) {
   XlaBuilder builder(TestName());
   std::vector<std::vector<uint8>> input = {
@@ -236,6 +275,36 @@ XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TailingZero) {
   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
 }
 
+XLA_TEST_F(DequantizeTest, MinCombinedUint8R2TailingZeroTransposeOutput) {
+  XlaBuilder builder(TestName());
+  std::vector<std::vector<uint8>> input = {
+      {0, 1, 2, 3, 16},
+      {4, 5, 6, 7, 17},
+      {8, 9, 10, 11, 18},
+      {12, 13, 16, 15, 19},
+  };
+  auto x = ConstantR2<uint32>(
+      &builder,
+      {{PackToUint32<uint8>(input[0])[0], PackToUint32<uint8>(input[0])[1]},
+       {PackToUint32<uint8>(input[1])[0], PackToUint32<uint8>(input[1])[1]},
+       {PackToUint32<uint8>(input[2])[0], PackToUint32<uint8>(input[2])[1]},
+       {PackToUint32<uint8>(input[3])[0], PackToUint32<uint8>(input[3])[1]}});
+  QuantizedRange range(0, 255.0f);
+  xla::Dequantize<uint8>(x, range, "MIN_COMBINED", /*transpose_output=*/true);
+
+  const Array2D<bfloat16> expected = {
+      {bfloat16(0.0), bfloat16(4.0), bfloat16(8.0), bfloat16(12.0)},
+      {bfloat16(1.0), bfloat16(5.0), bfloat16(9.0), bfloat16(13.0)},
+      {bfloat16(2.0), bfloat16(6.0), bfloat16(10.0), bfloat16(16.0)},
+      {bfloat16(3.0), bfloat16(7.0), bfloat16(11.0), bfloat16(15.0)},
+      {bfloat16(16.0), bfloat16(17.0), bfloat16(18.0), bfloat16(19.0)},
+      {bfloat16(0.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
+      {bfloat16(0.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
+      {bfloat16(0.0), bfloat16(0.0), bfloat16(0.0), bfloat16(0.0)},
+  };
+  ComputeAndCompareR2<bfloat16>(&builder, expected, {});
+}
+
 XLA_TEST_F(DequantizeTest, MinCombinedUint8LargeSizeTest) {
   XlaBuilder builder(TestName());
   Array2D<uint8> input = GenerateLargeSizeInput<uint8>(500, 3547);
@@ -250,5 +319,19 @@ XLA_TEST_F(DequantizeTest, MinCombinedUint8LargeSizeTest) {
   ComputeAndCompareR2<bfloat16>(&builder, expected, {});
 }
 
+XLA_TEST_F(DequantizeTest, MinCombinedUint8LargeSizeTestTransposeOutput) {
+  XlaBuilder builder(TestName());
+  Array2D<uint8> input = GenerateLargeSizeInput<uint8>(500, 3547);
+  Array2D<uint32> input_packed = PackLargeInput<uint8>(input);
+
+  auto x = ConstantR2FromArray2D<uint32>(&builder, input_packed);
+  QuantizedRange range(0, 255.0f);
+  xla::Dequantize<uint8>(x, range, "MIN_COMBINED", /*transpose_output=*/true);
+
+  const Array2D<bfloat16> expected = GenerateLargeSizeMinCombinedOutput<uint8>(
+      input, range, /*transpose_output=*/true);
+  ComputeAndCompareR2<bfloat16>(&builder, expected, {});
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 150b4c8e40c3f50ae9c072685d9f413575b56458 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 13 Dec 2018 10:41:10 -0800
Subject: [PATCH 509/873] Use vlog rather then the verbosity level, to allow
 debugging without changing the code.

PiperOrigin-RevId: 225396273
---
 tensorflow/python/autograph/impl/api.py        | 3 +--
 tensorflow/python/autograph/impl/conversion.py | 9 ++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index 0debf24fb9..c113f0e1f5 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -160,8 +160,7 @@ def do_not_convert(run_as=RunMode.GRAPH, return_dtypes=None):
 
 def converted_call(f, owner, options, *args, **kwargs):
   """Compiles a function call inline. For internal use only."""
-  if options.verbose >= converter.Verbosity.VERBOSE:
-    logging.info('Converted call: {}; owner: {}'.format(f, owner))
+  logging.vlog(logging.DEBUG, 'Converted call: %s; owner: %s', f, owner)
 
   if owner is not None:
     if not isinstance(f, str):
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index f8decd24e8..d0a12df6eb 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -129,8 +129,7 @@ def entity_to_graph(o, program_ctx, arg_values, arg_types):
   Raises:
     ValueError: if the entity type is not supported.
   """
-  if program_ctx.options.verbose == converter.Verbosity.VERBOSE:
-    logging.info('Converting {}'.format(o))
+  logging.vlog(logging.DEBUG, 'Converting %s', o)
 
   if tf_inspect.isclass(o):
     node, name, ns = class_to_graph(o, program_ctx)
@@ -164,9 +163,9 @@ def entity_to_graph(o, program_ctx, arg_values, arg_types):
 
   program_ctx.add_to_cache(o, node)
 
-  if program_ctx.options.verbose == converter.Verbosity.VERBOSE:
-    logging.info('Compiled output of {}:\n\n{}\n'.format(
-        o, compiler.ast_to_source(node)))
+  if logging.get_verbosity() <= logging.DEBUG:
+    logging.vlog(logging.DEBUG, 'Compiled output of %s:\n\n%s\n', o,
+                 compiler.ast_to_source(node))
 
   if program_ctx.options.recursive:
     while True:
-- 
GitLab


From 4d8108484737ce8bc0c759d500853392a53f9d71 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 13 Dec 2018 10:43:58 -0800
Subject: [PATCH 510/873] Handle outside compilation at beginning/end of TPU
 computation.

PiperOrigin-RevId: 225396866
---
 .../jit/encapsulate_subgraphs_pass_test.cc    | 420 +++++++++----
 tensorflow/compiler/jit/encapsulate_util.cc   | 594 +-----------------
 tensorflow/compiler/jit/encapsulate_util.h    |  93 +--
 .../compiler/jit/encapsulate_util_test.cc     | 346 +---------
 .../jit/extract_outside_compilation_pass.cc   |  28 +-
 tensorflow/compiler/jit/shape_inference.cc    |  10 +-
 tensorflow/contrib/tpu/python/tpu/tpu.py      |  10 +-
 .../contrib/tpu/python/tpu/tpu_estimator.py   |  25 +-
 8 files changed, 330 insertions(+), 1196 deletions(-)

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
index 7476d1dc51..8617beec00 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc
@@ -510,11 +510,7 @@ Status Encapsulate(GraphDef* graphdef, FunctionDefLibrary* library,
   s = ConvertGraphDefToGraph(options, *graphdef, graph.get());
   if (!s.ok()) return s;
 
-  s = PerformStaticShapeInferenceBeforeEncapsulation(
-      graph.get(), "_encapsulate", "_outside");
-  if (!s.ok()) return s;
-
-  s = PreprocessForEncapsulation(graph.get(), "_encapsulate", "_outside");
+  s = PerformStaticShapeInferenceBeforeEncapsulation(graph.get());
   if (!s.ok()) return s;
 
   std::unique_ptr<Graph> graph_out;
@@ -550,6 +546,14 @@ Status Encapsulate(GraphDef* graphdef, FunctionDefLibrary* library,
   graphdef->Swap(&graphdef_out);
 
   *library = lib_def->ToProto();
+  // Remove "_xla_inferred_shapes" attr. They are added by
+  // `PerformStaticShapeInferenceBeforeEncapsulation`.
+  for (FunctionDef& fdef : *library->mutable_function()) {
+    for (NodeDef& node_def : *fdef.mutable_node_def()) {
+      node_def.mutable_attr()->erase("_xla_inferred_shapes");
+    }
+  }
+
   return s;
 }
 
@@ -957,7 +961,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) {
     Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1),
                      b2.opts()
                          .WithName("E")
-                         .WithControlInputs({recv, b})
+                         .WithControlInputs({recv})
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
     Node* send = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
@@ -971,9 +975,9 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) {
     NodeBuilder node_builder("F1", "F1", lib_def.get());
     node_builder.Input(a).Input(b);
     Node* call =
-        b2.opts().WithControlInputs({s}).FinalizeBuilder(&node_builder);
+        b2.opts().WithControlInputs({s, b}).FinalizeBuilder(&node_builder);
 
-    Binary(a, call, b2.opts().WithName("G").WithControlInputs({e}));
+    Binary(a, call, b2.opts().WithName("G").WithControlInputs({call}));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -1055,12 +1059,17 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
     Node* recv2 = RecvAtHost(
         ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT, DT_FLOAT},
         shape2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* g = Binary(e, ops::NodeOut(recv2, 0),
+                     shape2.opts()
+                         .WithName("G")
+                         .WithAttr("_encapsulate", "F1")
+                         .WithAttr("_outside", "O2"));
     Node* h = Binary(ops::NodeOut(recv2, 1), e,
                      shape2.opts()
                          .WithName("H")
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O2"));
-    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h},
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g, h},
                  shape2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
     TF_EXPECT_OK(
         AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected));
@@ -1070,13 +1079,14 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
   shape_inference_graph1.set_name("_outside_compilation_shape_inference_F1_O1");
   shape_inference_graph2.set_name("_outside_compilation_shape_inference_F1_O2");
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"i_0_retval_retval:float"}, {},
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"g_0_retval_retval:float", "i_0_retval_retval:float"}, {},
       {
           {{"C"}, "UnaryTest", {"a_0_arg"}},
           {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}, {}},
           {{"I"},
            "UnaryTest",
-           {"outside_compilation_O2_host_compute:outputs:0"}},
+           {"outside_compilation_O2_host_compute:outputs:1"}},
           {{"F"},
            "BinaryTest",
            {"C:o:0", "outside_compilation_O1_host_compute:outputs:0"},
@@ -1086,7 +1096,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
            "XlaHostCompute",
            {"F:o:0", "D:o:0"},
            {{"Tinputs", absl::Span<const DataType>({DT_FLOAT, DT_FLOAT})},
-            {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
+            {"Toutputs", absl::Span<const DataType>({DT_FLOAT, DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O2"},
             {"shape_inference_graph", shape_inference_graph2},
@@ -1105,7 +1115,8 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
             {"_outside_compilation_subgraph", "O1"}},
            {"D"}},
       },
-      {{"i_0_retval_retval", "I:o:0"}});
+      {{"g_0_retval_retval", "outside_compilation_O2_host_compute:outputs:0"},
+       {"i_0_retval_retval", "I:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -1122,7 +1133,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
     Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
                      b2.opts()
                          .WithName("E")
-                         .WithControlInputs({recv1, b})
+                         .WithControlInputs({recv1})
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
     Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
@@ -1144,7 +1155,7 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O2"));
     Node* send2 =
-        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {h},
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g, h},
                      b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
 
     Node* s = Sequencer(b2.opts()
@@ -1154,12 +1165,13 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) {
 
     NodeBuilder node_builder("F1", "F1", lib_def.get());
     node_builder.Input(a).Input(b);
-    Node* call = b2.opts().WithControlInput(s).FinalizeBuilder(&node_builder);
+    Node* call =
+        b2.opts().WithControlInputs({s, b}).FinalizeBuilder(&node_builder);
 
-    Binary(g, call, b2.opts().WithName("J"));
+    Binary(ops::NodeOut(call, 0), ops::NodeOut(call, 1),
+           b2.opts().WithName("J"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
-
   TF_EXPECT_GRAPH_EQ(graphdef_expected, graphdef);
   TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
 }
@@ -1211,7 +1223,9 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
 
   *library_expected.add_function() = FunctionDefHelper::Create(
       "F1", {"a_0_arg:float", "b_0_arg:float"},
-      {"f_0_retval_retval:float", "d_0_retval_retval:float"}, {},
+      {"e_0_retval_retval:float", "f_0_retval_retval:float",
+       "d_0_retval_retval:float"},
+      {},
       {
           {{"C"}, "UnaryTest", {"a_0_arg"}},
           {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
@@ -1233,20 +1247,22 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
             {"_outside_compilation_subgraph", "O1"}},
            {"D"}},
       },
-      {{"d_0_retval_retval", "D:o:0"}, {"f_0_retval_retval", "F:o:0"}});
+      {{"e_0_retval_retval", "outside_compilation_O1_host_compute:outputs:0"},
+       {"d_0_retval_retval", "D:o:0"},
+       {"f_0_retval_retval", "F:o:0"}});
 
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F2", {"f_0_arg:float", "bridge_e_g_0_arg:float"},
-      {"i_0_retval_retval:float", "g_0_retval_retval:float"}, {},
+      "F2", {"e_0_arg:float", "f_0_arg:float", "d_0_arg:float"},
+      {"g_0_retval_retval:float", "i_0_retval_retval:float"}, {},
       {
-          {{"G"}, "BinaryTest", {"bridge_e_g_0_arg", "f_0_arg"}},
+          {{"G"}, "BinaryTest", {"e_0_arg", "f_0_arg"}},
           {{"I"},
            "BinaryTest",
            {"f_0_arg", "outside_compilation_O1_host_compute:outputs:0"}},
           {{"outside_compilation_O1_host_compute"},
            "XlaHostCompute",
-           {"G:o:0"},
-           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
+           {"d_0_arg", "G:o:0"},
+           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT, DT_FLOAT})},
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F2_O1"},
@@ -1255,7 +1271,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
              absl::Span<const TensorShapeProto>({shape_proto_expected})},
             {"_outside_compilation_subgraph", "O1"}}},
       },
-      {{"i_0_retval_retval", "I:o:0"}, {"g_0_retval_retval", "G:o:0"}});
+      {{"g_0_retval_retval", "G:o:0"}, {"i_0_retval_retval", "I:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -1272,7 +1288,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
     Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
                      b2.opts()
                          .WithName("E")
-                         .WithControlInputs({recv1, b})
+                         .WithControlInputs({recv1})
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
     Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), "F1", "O1", {e},
@@ -1285,14 +1301,14 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
     NodeBuilder node_builder1("F1", "F1", lib_def.get());
     node_builder1.Input(a).Input(b);
     Node* call1 =
-        b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1);
+        b2.opts().WithControlInputs({s1, b}).FinalizeBuilder(&node_builder1);
 
     Node* key_constant2 =
         KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder"));
-    Node* recv2 =
-        RecvAtHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {DT_FLOAT},
-                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
-    Node* h = Binary(ops::NodeOut(call1, 1), recv2,
+    Node* recv2 = RecvAtHost(
+        ops::NodeOut(key_constant2, 0), "F2", "O1", {DT_FLOAT, DT_FLOAT},
+        b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* h = Binary(recv2, ops::NodeOut(recv2, 1),
                      b2.opts()
                          .WithName("H")
                          .WithAttr("_encapsulate", "F2")
@@ -1305,11 +1321,13 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) {
         b2.opts().WithName("F2_sequencer").WithControlInputs({recv2, send2}),
         "F2");
     NodeBuilder node_builder2("F2", "F2", lib_def.get());
-    node_builder2.Input(call1).Input(e);
+    node_builder2.Input(call1)
+        .Input(ops::NodeOut(call1, 1))
+        .Input(ops::NodeOut(call1, 2));
     Node* call2 = b2.opts()
-                      .WithControlInputs({s2, e, call1})
+                      .WithControlInputs({s2, call1})
                       .FinalizeBuilder(&node_builder2);
-    Binary(ops::NodeOut(call2, 1), call2, b2.opts().WithName("J"));
+    Binary(call2, ops::NodeOut(call2, 1), b2.opts().WithName("J"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -1345,8 +1363,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) {
     Node* h = Unary(g, b1.opts()
                            .WithName("H")
                            .WithAttr("_encapsulate", "F2")
-                           .WithAttr("_outside", "O1")
-                           .WithControlInput(e));
+                           .WithAttr("_outside", "O1"));
     Node* i = Unary(h, b1.opts().WithName("I").WithAttr("_encapsulate", "F2"));
     Binary(f, i, b1.opts().WithName("J"));
     TF_EXPECT_OK(b1.ToGraphDef(&graphdef));
@@ -1420,7 +1437,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) {
     Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1),
                      b2.opts()
                          .WithName("E")
-                         .WithControlInputs({recv1, b})
+                         .WithControlInputs({recv1})
                          .WithAttr("_encapsulate", "F1")
                          .WithAttr("_outside", "O1"));
     Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), "F1", "O1", {e},
@@ -1432,7 +1449,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) {
     NodeBuilder node_builder1("F1", "F1", lib_def.get());
     node_builder1.Input(a).Input(b);
     Node* call1 =
-        b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1);
+        b2.opts().WithControlInputs({s1, b}).FinalizeBuilder(&node_builder1);
 
     Node* key_constant2 =
         KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder"));
@@ -1441,8 +1458,7 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutsideDependencyFromOutside) {
     Node* h = Unary(recv2, b2.opts()
                                .WithName("H")
                                .WithAttr("_encapsulate", "F2")
-                               .WithAttr("_outside", "O1")
-                               .WithControlInput(e));
+                               .WithAttr("_outside", "O1"));
     Node* send2 = SendFromHost(ops::NodeOut(key_constant2, 0), "F2", "O1", {h},
                                b2.opts());
 
@@ -1503,8 +1519,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) {
            {"D:o:0", "outside_compilation_O1_host_compute:outputs:0"}},
           {{"outside_compilation_O1_host_compute"},
            "XlaHostCompute",
-           {},
-           {{"Tinputs", absl::Span<const DataType>({})},
+           {"a_0_arg"},
+           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
@@ -1522,16 +1538,19 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) {
     Node* a = InputShaped(b2.opts().WithName("A"));
     Node* b = Input(b2.opts().WithName("B"));
 
-    Node* e = Unary(a, b2.opts()
-                           .WithName("E")
-                           .WithAttr("_encapsulate", "F1")
-                           .WithAttr("_outside", "O1"));
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
+    Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
+                             {DT_FLOAT}, b2.opts());
+    Node* e = Unary(recv1, b2.opts()
+                               .WithName("E")
+                               .WithAttr("_encapsulate", "F1")
+                               .WithAttr("_outside", "O1"));
     Node* send1 =
         SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts());
     Node* s1 = Sequencer(
-        b2.opts().WithName("F1_sequencer").WithControlInput(send1), "F1");
+        b2.opts().WithName("F1_sequencer").WithControlInputs({send1, recv1}),
+        "F1");
     NodeBuilder node_builder1("F1", "F1", lib_def.get());
     node_builder1.Input(a).Input(b);
     Node* call1 =
@@ -1588,8 +1607,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) {
            {"D:o:0", "outside_compilation_O1_host_compute:outputs:0"}},
           {{"outside_compilation_O1_host_compute"},
            "XlaHostCompute",
-           {},
-           {{"Tinputs", absl::Span<const DataType>({})},
+           {"a_0_arg"},
+           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
@@ -1610,13 +1629,13 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) {
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv1 =
-        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {}, b2.opts());
-    Node* e = Unary(a, b2.opts()
-                           .WithName("E")
-                           .WithControlInput(recv1)
-                           .WithAttr("_encapsulate", "F1")
-                           .WithAttr("_outside", "O1"));
+    Node* recv1 = RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1",
+                             {DT_FLOAT}, b2.opts());
+    Node* e = Unary(recv1, b2.opts()
+                               .WithName("E")
+                               .WithControlInput(recv1)
+                               .WithAttr("_encapsulate", "F1")
+                               .WithAttr("_outside", "O1"));
     Node* send1 =
         SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts());
     Node* s1 = Sequencer(
@@ -1663,8 +1682,27 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) {
   FunctionDefLibrary library_expected;
   GraphDef graphdef_expected;
 
+  {
+    GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately);
+    Node* key_constant = KeyPlaceholder("F1", shape1.opts());
+    Node* recv1 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* e = Unary(ops::NodeOut(recv1, 0), shape1.opts()
+                                                .WithName("E")
+                                                .WithAttr("_encapsulate", "F1")
+                                                .WithAttr("_outside", "O1"));
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    TF_EXPECT_OK(
+        AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected));
+  }
+
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval_retval:float"}, {},
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"e_0_retval_retval:float", "f_0_retval_retval:float"}, {},
       {
           {{"C"}, "UnaryTest", {"a_0_arg"}},
           {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
@@ -1673,14 +1711,15 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) {
            "XlaHostCompute",
            {"D:o:0"},
            {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
-            {"Toutputs", absl::Span<const DataType>({})},
+            {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", NameAttrList()},
+            {"shape_inference_graph", shape_inference_graph},
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O1"}}},
       },
-      {{"f_0_retval_retval", "F:o:0"}});
+      {{"e_0_retval_retval", "outside_compilation_O1_host_compute:outputs:0"},
+       {"f_0_retval_retval", "F:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -1697,14 +1736,17 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) {
                                .WithName("E")
                                .WithAttr("_encapsulate", "F1")
                                .WithAttr("_outside", "O1"));
+    Node* send1 =
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e}, b2.opts());
     Node* s1 = Sequencer(
-        b2.opts().WithName("F1_sequencer").WithControlInput(recv1), "F1");
+        b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}),
+        "F1");
     NodeBuilder node_builder1("F1", "F1", lib_def.get());
     node_builder1.Input(a).Input(b);
     Node* call1 =
         b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1);
 
-    Binary(e, call1, b2.opts().WithName("G"));
+    Binary(call1, ops::NodeOut(call1, 1), b2.opts().WithName("G"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -1741,8 +1783,27 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) {
   FunctionDefLibrary library_expected;
   GraphDef graphdef_expected;
 
+  {
+    GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately);
+    Node* key_constant = KeyPlaceholder("F1", shape1.opts());
+    Node* recv1 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* e = Unary(ops::NodeOut(recv1, 0), shape1.opts()
+                                                .WithName("E")
+                                                .WithAttr("_encapsulate", "F1")
+                                                .WithAttr("_outside", "O1"));
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    TF_EXPECT_OK(
+        AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected));
+  }
+
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval_retval:float"}, {},
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"e_0_retval_retval:float", "f_0_retval_retval:float"}, {},
       {
           {{"C"}, "UnaryTest", {"a_0_arg"}},
           {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
@@ -1755,14 +1816,15 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) {
            "XlaHostCompute",
            {"D:o:0"},
            {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
-            {"Toutputs", absl::Span<const DataType>({})},
+            {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
-            {"shape_inference_graph", NameAttrList()},
+            {"shape_inference_graph", shape_inference_graph},
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O1"}}},
       },
-      {{"f_0_retval_retval", "F:o:0"}});
+      {{"e_0_retval_retval", "outside_compilation_O1_host_compute:outputs:0"},
+       {"f_0_retval_retval", "F:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -1779,7 +1841,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) {
                                .WithName("E")
                                .WithAttr("_encapsulate", "F1")
                                .WithAttr("_outside", "O1"));
-    Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {},
+    Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
                                b2.opts().WithControlInput(e));
     Node* s1 = Sequencer(
         b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1}),
@@ -1789,7 +1851,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) {
     Node* call1 =
         b2.opts().WithControlInput(s1).FinalizeBuilder(&node_builder1);
 
-    Binary(e, call1, b2.opts().WithName("G"));
+    Binary(call1, ops::NodeOut(call1, 1), b2.opts().WithName("G"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -1832,6 +1894,22 @@ TEST(EncapsulateSubgraphsTest,
   FunctionDefLibrary library_expected;
   GraphDef graphdef_expected;
 
+  {
+    GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately);
+    Node* key_constant = KeyPlaceholder("F1", shape1.opts());
+    Node* recv1 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* e = Unary(ops::NodeOut(recv1, 0), shape1.opts()
+                                                .WithName("E")
+                                                .WithAttr("_encapsulate", "F1")
+                                                .WithAttr("_outside", "O1"));
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    TF_EXPECT_OK(
+        AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected));
+  }
+
   {
     GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape2.opts());
@@ -1848,10 +1926,13 @@ TEST(EncapsulateSubgraphsTest,
         AddGraphDefToFunctionLibrary(shape2, "F1_O2", &library_expected));
   }
 
-  NameAttrList shape_inference_graph;
-  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O2");
+  NameAttrList shape_inference_graph1;
+  shape_inference_graph1.set_name("_outside_compilation_shape_inference_F1_O1");
+  NameAttrList shape_inference_graph2;
+  shape_inference_graph2.set_name("_outside_compilation_shape_inference_F1_O2");
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval_retval:float"}, {},
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"e_0_retval_retval:float", "h_0_retval_retval:float"}, {},
       {
           {{"C"}, "UnaryTest", {"a_0_arg"}},
           {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
@@ -1859,6 +1940,16 @@ TEST(EncapsulateSubgraphsTest,
           {{"H"},
            "UnaryTest",
            {"outside_compilation_O2_host_compute:outputs:0"}},
+          {{"outside_compilation_O1_host_compute"},
+           "XlaHostCompute",
+           {"a_0_arg"},
+           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
+            {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
+            {"ancestors", absl::Span<const string>({})},
+            {"key", "host_compute_channel_F1_O1"},
+            {"shape_inference_graph", shape_inference_graph1},
+            {"shapes", absl::Span<const TensorShapeProto>({})},
+            {"_outside_compilation_subgraph", "O1"}}},
           {{"outside_compilation_O2_host_compute"},
            "XlaHostCompute",
            {"F:o:0"},
@@ -1866,11 +1957,12 @@ TEST(EncapsulateSubgraphsTest,
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O2"},
-            {"shape_inference_graph", shape_inference_graph},
+            {"shape_inference_graph", shape_inference_graph2},
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O2"}}},
       },
-      {{"h_0_retval_retval", "H:o:0"}});
+      {{"e_0_retval_retval", "outside_compilation_O1_host_compute:outputs:0"},
+       {"h_0_retval_retval", "H:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -1878,32 +1970,39 @@ TEST(EncapsulateSubgraphsTest,
     GraphDefBuilder b2(GraphDefBuilder::kFailImmediately, lib_def.get());
     Node* a = Input(b2.opts().WithName("A"));
     Node* b = Input(b2.opts().WithName("B"));
-
-    Node* e = Unary(a, b2.opts()
-                           .WithName("E")
-                           .WithAttr("_encapsulate", "F1")
-                           .WithAttr("_outside", "O1"));
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv =
+    Node* recv1 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+
+    Node* e = Unary(recv1, b2.opts()
+                               .WithName("E")
+                               .WithAttr("_encapsulate", "F1")
+                               .WithAttr("_outside", "O1"));
+    Node* send1 =
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                     b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* recv2 =
         RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT},
                    b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
-    Node* g = Unary(recv, b2.opts()
-                              .WithName("G")
-                              .WithAttr("_encapsulate", "F1")
-                              .WithAttr("_outside", "O2")
-                              .WithControlInput(e));
-    Node* send =
+    Node* g = Unary(recv2, b2.opts()
+                               .WithName("G")
+                               .WithAttr("_encapsulate", "F1")
+                               .WithAttr("_outside", "O2")
+                               .WithControlInput(e));
+    Node* send2 =
         SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O2", {g},
                      b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
-    Node* s1 = Sequencer(
-        b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}),
-        "F1");
+    Node* s1 = Sequencer(b2.opts()
+                             .WithName("F1_sequencer")
+                             .WithControlInputs({recv1, send1, recv2, send2}),
+                         "F1");
     NodeBuilder node_builder1("F1", "F1", lib_def.get());
     node_builder1.Input(a).Input(b).ControlInput(s1);
     Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
 
-    Binary(e, call1, b2.opts().WithName("I"));
+    Binary(call1, ops::NodeOut(call1, 1), b2.opts().WithName("I"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -1965,7 +2064,8 @@ TEST(EncapsulateSubgraphsTest,
   NameAttrList shape_inference_graph;
   shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval_retval:float"}, {},
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"e_0_retval_retval:float", "h_0_retval_retval:float"}, {},
       {
           {{"C"}, "UnaryTest", {"a_0_arg"}},
           {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
@@ -1973,6 +2073,16 @@ TEST(EncapsulateSubgraphsTest,
            "UnaryTest",
            {"outside_compilation_O1_host_compute:outputs:0"}},
           {{"H"}, "UnaryTest", {"F:o:0"}},
+          {{"outside_compilation_O2_host_compute"},
+           "XlaHostCompute",
+           {"a_0_arg"},
+           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
+            {"Toutputs", absl::Span<const DataType>({})},
+            {"ancestors", absl::Span<const string>({})},
+            {"key", "host_compute_channel_F1_O2"},
+            {"shape_inference_graph", NameAttrList()},
+            {"shapes", absl::Span<const TensorShapeProto>({})},
+            {"_outside_compilation_subgraph", "O2"}}},
           {{"outside_compilation_O1_host_compute"},
            "XlaHostCompute",
            {"D:o:0"},
@@ -1984,7 +2094,8 @@ TEST(EncapsulateSubgraphsTest,
             {"shapes", absl::Span<const TensorShapeProto>({})},
             {"_outside_compilation_subgraph", "O1"}}},
       },
-      {{"h_0_retval_retval", "H:o:0"}});
+      {{"e_0_retval_retval", "outside_compilation_O1_host_compute:outputs:0"},
+       {"h_0_retval_retval", "H:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -1995,29 +2106,33 @@ TEST(EncapsulateSubgraphsTest,
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv =
+    Node* recv1 =
         RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
                    b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
-    Node* e = Unary(recv, b2.opts()
-                              .WithName("E")
-                              .WithAttr("_encapsulate", "F1")
-                              .WithAttr("_outside", "O1"));
+    Node* e = Unary(recv1, b2.opts()
+                               .WithName("E")
+                               .WithAttr("_encapsulate", "F1")
+                               .WithAttr("_outside", "O1"));
     Node* send =
         SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
                      b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
-    /*Node* g =*/Unary(a, b2.opts()
-                              .WithName("G")
-                              .WithAttr("_encapsulate", "F1")
-                              .WithAttr("_outside", "O2")
-                              .WithControlInput(e));
-    Node* s1 = Sequencer(
-        b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}),
-        "F1");
+    Node* recv2 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O2", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    /*Node* g =*/Unary(recv2, b2.opts()
+                                  .WithName("G")
+                                  .WithAttr("_encapsulate", "F1")
+                                  .WithAttr("_outside", "O2")
+                                  .WithControlInput(e));
+    Node* s1 = Sequencer(b2.opts()
+                             .WithName("F1_sequencer")
+                             .WithControlInputs({recv1, recv2, send}),
+                         "F1");
     NodeBuilder node_builder1("F1", "F1", lib_def.get());
     node_builder1.Input(a).Input(b).ControlInput(s1);
     Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
 
-    Binary(e, call1, b2.opts().WithName("I"));
+    Binary(call1, ops::NodeOut(call1, 1), b2.opts().WithName("I"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -2084,7 +2199,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
   NameAttrList shape_inference_graph;
   shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"h_0_retval_retval:float"}, {},
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"e_0_retval_retval:float", "h_0_retval_retval:float"}, {},
       {{{"C"}, "UnaryTest", {"a_0_arg"}},
        {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
        {{"F"}, "UnaryTest", {"outside_compilation_O1_host_compute:outputs:0"}},
@@ -2121,7 +2237,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
          {"shapes", absl::Span<const TensorShapeProto>({})},
          {"_outside_compilation_subgraph", "O3"}},
         {}}},
-      {{"h_0_retval_retval", "H:o:0"}});
+      {{"e_0_retval_retval", "outside_compilation_O1_host_compute:outputs:0"},
+       {"h_0_retval_retval", "H:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -2167,7 +2284,7 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationClusterDependency) {
     node_builder1.Input(a).Input(b).ControlInput(s1);
     Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
 
-    Binary(e, call1, b2.opts().WithName("J"));
+    Binary(call1, ops::NodeOut(call1, 1), b2.opts().WithName("J"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -2203,14 +2320,44 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputsOrOutputs) {
   FunctionDefLibrary library_expected;
   GraphDef graphdef_expected;
 
+  {
+    GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately);
+    Node* key_constant = KeyPlaceholder("F1", shape1.opts());
+    Node* recv2 =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* e = Unary(ops::NodeOut(recv2, 0), shape1.opts()
+                                                .WithName("E")
+                                                .WithAttr("_encapsulate", "F1")
+                                                .WithAttr("_outside", "O1"));
+    SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                 shape1.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    TF_EXPECT_OK(
+        AddGraphDefToFunctionLibrary(shape1, "F1_O1", &library_expected));
+  }
+
+  NameAttrList shape_inference_graph;
+  shape_inference_graph.set_name("_outside_compilation_shape_inference_F1_O1");
   *library_expected.add_function() = FunctionDefHelper::Create(
-      "F1", {"a_0_arg:float", "b_0_arg:float"}, {"f_0_retval_retval:float"}, {},
+      "F1", {"a_0_arg:float", "b_0_arg:float"},
+      {"e_0_retval_retval:float", "f_0_retval_retval:float"}, {},
       {
           {{"C"}, "UnaryTest", {"a_0_arg"}},
           {{"D"}, "BinaryTest", {"b_0_arg", "C:o:0"}},
           {{"F"}, "UnaryTest", {"D:o:0"}},
+          {{"outside_compilation_O1_host_compute"},
+           "XlaHostCompute",
+           {"a_0_arg"},
+           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
+            {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
+            {"ancestors", absl::Span<const string>({})},
+            {"key", "host_compute_channel_F1_O1"},
+            {"shape_inference_graph", shape_inference_graph},
+            {"shapes", absl::Span<const TensorShapeProto>({})},
+            {"_outside_compilation_subgraph", "O1"}}},
       },
-      {{"f_0_retval_retval", "F:o:0"}});
+      {{"e_0_retval_retval", "outside_compilation_O1_host_compute:outputs:0"},
+       {"f_0_retval_retval", "F:o:0"}});
 
   {
     std::unique_ptr<FunctionLibraryDefinition> lib_def(
@@ -2219,15 +2366,26 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputsOrOutputs) {
     Node* a = Input(b2.opts().WithName("A"));
     Node* b = Input(b2.opts().WithName("B"));
 
-    Node* e = Unary(a, b2.opts()
-                           .WithName("E")
-                           .WithAttr("_encapsulate", "F1")
-                           .WithAttr("_outside", "O1"));
+    Node* key_constant =
+        KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
+    Node* recv =
+        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
+                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* e = Unary(recv, b2.opts()
+                              .WithName("E")
+                              .WithAttr("_encapsulate", "F1")
+                              .WithAttr("_outside", "O1"));
+    Node* send =
+        SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
+                     b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* s = Sequencer(
+        b2.opts().WithName("F1_sequencer").WithControlInputs({recv, send}),
+        "F1");
     NodeBuilder node_builder1("F1", "F1", lib_def.get());
-    node_builder1.Input(a).Input(b);
+    node_builder1.Input(a).Input(b).ControlInput(s);
     Node* call1 = b2.opts().FinalizeBuilder(&node_builder1);
 
-    Binary(e, call1, b2.opts().WithName("G"));
+    Binary(call1, ops::NodeOut(call1, 1), b2.opts().WithName("G"));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
@@ -2272,12 +2430,10 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
   {
     GraphDefBuilder shape(GraphDefBuilder::kFailImmediately);
     Node* key_constant = KeyPlaceholder("F1", shape.opts());
-    Node* recv =
-        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
-                   shape.opts().WithAttr(kXlaHasHostTransferAttrName, true));
-    Node* a = InputShaped(shape.opts().WithName("A"));
-    Node* c = Unary(a, shape.opts().WithName("C"));
-    Node* e = BinaryUnknownShape(c, recv,
+    Node* recv = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        shape.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* e = BinaryUnknownShape(recv, ops::NodeOut(recv, 1),
                                  shape.opts()
                                      .WithName("E")
                                      .WithAttr("_encapsulate", "F1")
@@ -2302,8 +2458,8 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
            {"outside_compilation_O1_host_compute"}},
           {{"outside_compilation_O1_host_compute"},
            "XlaHostCompute",
-           {"c:o:0"},
-           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT})},
+           {"c_0_arg", "c:o:0"},
+           {{"Tinputs", absl::Span<const DataType>({DT_FLOAT, DT_FLOAT})},
             {"Toutputs", absl::Span<const DataType>({DT_FLOAT})},
             {"ancestors", absl::Span<const string>({})},
             {"key", "host_compute_channel_F1_O1"},
@@ -2324,13 +2480,13 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
 
     Node* key_constant =
         KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder"));
-    Node* recv =
-        RecvAtHost(ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT},
-                   b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
-    Node* e = BinaryUnknownShape(c, ops::NodeOut(recv, 0),
+    Node* recv = RecvAtHost(
+        ops::NodeOut(key_constant, 0), "F1", "O1", {DT_FLOAT, DT_FLOAT},
+        b2.opts().WithAttr(kXlaHasHostTransferAttrName, true));
+    Node* e = BinaryUnknownShape(recv, ops::NodeOut(recv, 1),
                                  b2.opts()
                                      .WithName("E")
-                                     .WithControlInputs({recv, b})
+                                     .WithControlInputs({recv})
                                      .WithAttr("_encapsulate", "F1")
                                      .WithAttr("_outside", "O1"));
     Node* send = SendFromHost(ops::NodeOut(key_constant, 0), "F1", "O1", {e},
@@ -2344,9 +2500,9 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
     NodeBuilder node_builder("F1", "F1", lib_def.get());
     node_builder.Input(b).Input(c);
     Node* call =
-        b2.opts().WithControlInputs({s, c}).FinalizeBuilder(&node_builder);
+        b2.opts().WithControlInputs({s, b, c}).FinalizeBuilder(&node_builder);
 
-    Binary(a, call, b2.opts().WithName("G").WithControlInputs({e}));
+    Binary(a, call, b2.opts().WithName("G").WithControlInputs({call}));
     TF_EXPECT_OK(b2.ToGraphDef(&graphdef_expected));
   }
 
diff --git a/tensorflow/compiler/jit/encapsulate_util.cc b/tensorflow/compiler/jit/encapsulate_util.cc
index 1f4b9c90a4..2264806d6b 100644
--- a/tensorflow/compiler/jit/encapsulate_util.cc
+++ b/tensorflow/compiler/jit/encapsulate_util.cc
@@ -62,517 +62,6 @@ void ReplaceAttr(Node* n, const string& attr_name, const T& value) {
   n->AddAttr(attr_name, value);
 }
 
-// Step 1a ~ 1d for PreprocessForEncapsulation(). See comments of
-// PreprocessForEncapsulation() for details.
-Status ProcessControlEdges(Graph* g, const string& xla_computation_attr_name,
-                           const string& outside_compilation_attr_name) {
-  // Gather edges to remove. We should not remove the edge while iterating.
-  std::vector<const Edge*> edges_to_remove;
-  for (const Edge* e : g->edges()) {
-    if (!e->IsControlEdge()) {
-      continue;
-    }
-
-    auto src_xla_computation =
-        GetStringAttr(*e->src(), xla_computation_attr_name);
-    auto dst_xla_computation =
-        GetStringAttr(*e->dst(), xla_computation_attr_name);
-    auto src_outside_compilation =
-        GetStringAttr(*e->src(), outside_compilation_attr_name);
-    auto dst_outside_compilation =
-        GetStringAttr(*e->dst(), outside_compilation_attr_name);
-
-    if (!src_xla_computation && !dst_xla_computation) {
-      continue;
-    } else if (src_xla_computation && !dst_xla_computation) {
-      if (src_outside_compilation) {
-        // Case 1c: outside compilation to host computation control edge.
-        edges_to_remove.push_back(e);
-
-        TF_RETURN_IF_ERROR(AppendToListAttr<string>(
-            e->dst(), kXlaControlDependenciesAttrName, e->src()->name()));
-      }
-    } else if (!src_xla_computation && dst_xla_computation) {
-      if (dst_outside_compilation) {
-        // Case 1c: host computation control to outside compilation edge.
-        edges_to_remove.push_back(e);
-
-        TF_RETURN_IF_ERROR(AppendToListAttr<string>(
-            e->dst(), kXlaControlDependenciesAttrName, e->src()->name()));
-      }
-    } else {  // src_xla_computation && dst_xla_computation
-      if (*src_xla_computation != *dst_xla_computation) {
-        if (src_outside_compilation && dst_outside_compilation) {
-          // Case 1b: outside compilation to outside compilation control edge.
-          edges_to_remove.push_back(e);
-
-          TF_RETURN_IF_ERROR(AppendToListAttr<string>(
-              e->dst(), kXlaControlDependenciesAttrName, e->src()->name()));
-        } else if (src_outside_compilation && !dst_outside_compilation) {
-          // Case 1a: outside compilation to another XLA computaition control
-          // edge.
-          TF_RETURN_IF_ERROR(AppendToListAttr<string>(
-              e->src(), kXlaConnectedToOtherXlaComputationAttrName,
-              *dst_xla_computation));
-        } else if (!src_outside_compilation && dst_outside_compilation) {
-          // Case 1a: another XLA computaition to outside compilation control
-          // edge.
-          TF_RETURN_IF_ERROR(AppendToListAttr<string>(
-              e->dst(), kXlaConnectedFromOtherXlaComputationAttrName,
-              *src_xla_computation));
-        }
-      }
-    }
-  }
-
-  for (auto e : edges_to_remove) {
-    g->RemoveEdge(e);
-  }
-  return Status::OK();
-}
-
-// Step 2 for PreprocessForEncapsulation(). See comments of
-// PreprocessForEncapsulation() for details.
-Status ProcessXlaToXlaDataEdges(Graph* g,
-                                const string& xla_computation_attr_name,
-                                const string& outside_compilation_attr_name) {
-  // Gather edges between XLA computations. Notice that we do not store `Edge*`
-  // directly because we remove some nodes while adding Identity nodes, and
-  // those Edge pointers might be invalidated.
-  struct EdgeInfo {
-    int dst_input, dst_node_id;
-  };
-  std::vector<EdgeInfo> edges;
-  for (const Edge* e : g->edges()) {
-    if (e->IsControlEdge()) {
-      continue;
-    }
-
-    auto src_xla_computation =
-        GetStringAttr(*e->src(), xla_computation_attr_name);
-    auto dst_xla_computation =
-        GetStringAttr(*e->dst(), xla_computation_attr_name);
-    auto src_outside_compilation =
-        GetStringAttr(*e->src(), outside_compilation_attr_name);
-    auto dst_outside_compilation =
-        GetStringAttr(*e->dst(), outside_compilation_attr_name);
-    if (!src_xla_computation || !dst_xla_computation) {
-      continue;
-    }
-
-    if (*src_xla_computation != *dst_xla_computation) {
-      if (src_outside_compilation || dst_outside_compilation) {
-        edges.push_back(EdgeInfo{e->dst_input(), e->dst()->id()});
-        VLOG(4) << "XLA -> XLA edge: " << e->DebugString();
-      }
-    }
-  }
-
-  // For each XLA -> XLA edge, add an Identity node between src and dst.
-  for (int i = 0; i < edges.size(); i++) {
-    Node* dst = g->FindNodeId(edges[i].dst_node_id);
-    const Edge* e;
-    TF_RETURN_IF_ERROR(dst->input_edge(edges[i].dst_input, &e));
-    Node* src = e->src();
-    int src_output = e->src_output(), dst_input = e->dst_input();
-    g->RemoveEdge(e);
-
-    // Create Identity node, and connect it between `src` and `dst`.
-    string identity_node_name =
-        absl::StrCat("bridge_", src->name(), "_", dst->name());
-    DataType dtype = src->output_type(src_output);
-    TF_ASSIGN_OR_RETURN(Node * identity_node,
-                        BuildIdentityNode(g, identity_node_name, dtype, src,
-                                          /*requested_device=*/absl::nullopt));
-    identity_node->AddAttr(kBridgeSourceNodeAttrName, src->name());
-    g->AddEdge(src, src_output, identity_node, 0);
-    g->AddEdge(identity_node, 0, dst, dst_input);
-
-    // Replace `e->dst()` because its input node changed.
-    NodeDef new_def = dst->def();
-    *new_def.mutable_input(dst_input) = identity_node->name();
-    TF_ASSIGN_OR_RETURN(Node * dst_replace_node, ReplaceNode(g, dst, new_def));
-
-    // Other edge in `edges` might have `e->dst()` as src or dst
-    // node. Before removing `e->dst()`, replace those edges with corresponding
-    // edges for `dst_replace_node`.
-    for (int j = i + 1; j < edges.size(); j++) {
-      if (edges[j].dst_node_id == edges[i].dst_node_id) {
-        edges[j].dst_node_id = dst_replace_node->id();
-      }
-    }
-  }
-  return Status::OK();
-}
-
-// Step 3 for PreprocessForEncapsulation(). See comments of
-// PreprocessForEncapsulation() for details.
-Status ProcessDataEdgeBetweenOutsideCompilationAndHostComputation(
-    Graph* g, const string& xla_computation_attr_name,
-    const string& outside_compilation_attr_name) {
-  // Gather edges between outside compilation and host computation. Notice that
-  // we do not store `Edge*` directly because we remove some nodes while adding
-  // Identity nodes, and those Edge pointers might be invalidated.
-  struct EdgeInfo {
-    int dst_input, dst_node_id;
-    bool is_host_to_outside_compilation;
-  };
-  std::vector<EdgeInfo> edges;
-  for (const Edge* e : g->edges()) {
-    if (e->IsControlEdge()) {
-      continue;
-    }
-
-    if (e->src()->attrs().Find(xla_computation_attr_name) == nullptr &&
-        e->dst()->attrs().Find(xla_computation_attr_name) != nullptr &&
-        e->dst()->attrs().Find(outside_compilation_attr_name) != nullptr) {
-      edges.push_back(EdgeInfo{e->dst_input(), e->dst()->id(),
-                               /*is_host_to_outside_compilation=*/true});
-      VLOG(4) << "Host -> oc edge: " << e->DebugString();
-    } else if (e->dst()->attrs().Find(xla_computation_attr_name) == nullptr &&
-               e->src()->attrs().Find(xla_computation_attr_name) != nullptr &&
-               e->src()->attrs().Find(outside_compilation_attr_name) !=
-                   nullptr) {
-      edges.push_back(EdgeInfo{e->dst_input(), e->dst()->id(),
-                               /*is_host_to_outside_compilation=*/false});
-      VLOG(4) << "Oc -> host edge: " << e->DebugString();
-    }
-  }
-
-  // Remove the edge from host to outside compilation. Add a placeholder as
-  // outside compilation node input.
-  std::map<std::pair<string, int>, Node*> placeholders;
-  for (int i = 0; i < edges.size(); i++) {
-    Node* dst = g->FindNodeId(edges[i].dst_node_id);
-    const Edge* e;
-    TF_RETURN_IF_ERROR(dst->input_edge(edges[i].dst_input, &e));
-    Node* src = e->src();
-    int src_output = e->src_output(), dst_input = e->dst_input();
-    g->RemoveEdge(e);
-
-    // Find or create placeholder node.
-    string new_name =
-        edges[i].is_host_to_outside_compilation
-            ? absl::StrCat(src->name(), "_host_to_oc_placeholder_", src_output)
-            : absl::StrCat(src->name(), "_oc_to_host_placeholder_", src_output);
-    auto placeholder_index = std::make_pair(src->name(), src_output);
-    auto iter = placeholders.find(placeholder_index);
-    Node* placeholder_node;
-    if (iter == placeholders.end()) {
-      NodeDefBuilder placeholder_builder(new_name, "Placeholder");
-      placeholder_builder.Attr("dtype", src->output_type(src_output));
-      if (edges[i].is_host_to_outside_compilation) {
-        placeholder_builder.Attr(kHostToOutsideCompilationOriginalNodeAttrName,
-                                 src->name());
-        placeholder_builder.Attr(kHostToOutsideCompilationSrcOutputAttrName,
-                                 src_output);
-        // If this placeholder node is in outside compilation, we need to set
-        // `xla_computation_attr_name` and `outside_compilation_attr_name`.
-        string xla_computation_attr, outside_compilation_attr;
-        TF_RETURN_IF_ERROR(GetNodeAttr(dst->attrs(), xla_computation_attr_name,
-                                       &xla_computation_attr));
-        TF_RETURN_IF_ERROR(GetNodeAttr(dst->attrs(),
-                                       outside_compilation_attr_name,
-                                       &outside_compilation_attr));
-        placeholder_builder.Attr(xla_computation_attr_name,
-                                 xla_computation_attr);
-        placeholder_builder.Attr(outside_compilation_attr_name,
-                                 outside_compilation_attr);
-      } else {
-        placeholder_builder.Attr(kOutsideCompilationToHostOriginalNodeAttrName,
-                                 src->name());
-        placeholder_builder.Attr(kOutsideCompilationToHostSrcOutputAttrName,
-                                 src_output);
-      }
-      NodeDef placeholder_def;
-      TF_RETURN_IF_ERROR(placeholder_builder.Finalize(&placeholder_def));
-      Status s;
-      placeholder_node = g->AddNode(placeholder_def, &s);
-      TF_RETURN_IF_ERROR(s);
-      placeholders[placeholder_index] = placeholder_node;
-    } else {
-      placeholder_node = iter->second;
-    }
-    g->AddEdge(placeholder_node, 0, dst, dst_input);
-
-    // Replace `e->dst()` because its input node changed.
-    NodeDef new_def = dst->def();
-    *new_def.mutable_input(dst_input) = placeholder_node->name();
-    TF_ASSIGN_OR_RETURN(Node * dst_replace_node, ReplaceNode(g, dst, new_def));
-
-    // Other edge in `edges` might have `e->dst()` as src or dst
-    // node. Before removing `e->dst()`, replace those edges with corresponding
-    // edges for `dst_replace_node`.
-    for (int j = i + 1; j < edges.size(); j++) {
-      if (edges[j].dst_node_id == edges[i].dst_node_id) {
-        edges[j].dst_node_id = dst_replace_node->id();
-      }
-    }
-  }
-  return Status::OK();
-}
-
-// Step 1 for `PostprocessForEncapsulation`. See comments of
-// `PostprocessForEncapsulation` for details.
-Status RemovePlaceholderBetweenOutsideCompilationAndHostComputation(Graph* g) {
-  // Gather all outside compilation to host computation nodes.
-  struct PlaceHolderNodeInfo {
-    Node* n;
-    bool is_host_to_oc;
-  };
-  std::vector<PlaceHolderNodeInfo> placeholder_nodes;
-  for (Node* n : g->nodes()) {
-    if (n->type_string() == "Placeholder") {
-      if (HasNodeAttr(n->def(),
-                      kOutsideCompilationToHostOriginalNodeAttrName)) {
-        placeholder_nodes.push_back({n, false});
-      } else if (HasNodeAttr(n->def(),
-                             kHostToOutsideCompilationOriginalNodeAttrName)) {
-        placeholder_nodes.push_back({n, true});
-      }
-    }
-  }
-
-  // Remove the placeholder nodes, and reconnect original edge.
-  auto node_name_index = g->BuildNodeNameIndex();
-  for (auto placeholder_iter : placeholder_nodes) {
-    Node* n = placeholder_iter.n;
-
-    string node_name;
-    int node_src_output;
-    if (placeholder_iter.is_host_to_oc) {
-      TF_RETURN_IF_ERROR(
-          GetNodeAttr(n->attrs(), kHostToOutsideCompilationOriginalNodeAttrName,
-                      &node_name));
-      TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(),
-                                     kHostToOutsideCompilationSrcOutputAttrName,
-                                     &node_src_output));
-    } else {
-      TF_RETURN_IF_ERROR(
-          GetNodeAttr(n->attrs(), kOutsideCompilationToHostOriginalNodeAttrName,
-                      &node_name));
-      TF_RETURN_IF_ERROR(GetNodeAttr(n->attrs(),
-                                     kOutsideCompilationToHostSrcOutputAttrName,
-                                     &node_src_output));
-    }
-    auto iter = node_name_index.find(node_name);
-    if (iter == node_name_index.end()) {
-      return errors::Internal(
-          "Cannot find original node for oc -> host placeholder node ",
-          node_name);
-    }
-
-    // Change all usage node to use the original node instead.
-    Node* original_node = iter->second;
-    std::vector<const Edge*> control_edges;
-    std::vector<OutEdgeInfo> data_edges;
-    for (auto e : n->out_edges()) {
-      if (e->IsControlEdge()) {
-        control_edges.push_back(e);
-      } else {
-        data_edges.push_back({e->dst(), e->src_output(), e->dst_input()});
-      }
-    }
-    for (const Edge* e : control_edges) {
-      g->AddControlEdge(original_node, e->dst());
-      g->RemoveEdge(e);
-    }
-    for (int i = 0; i < data_edges.size(); i++) {
-      Node* dst = data_edges[i].dst;
-      NodeDef new_def = dst->def();
-      int dst_input = data_edges[i].dst_input;
-      *new_def.mutable_input(dst_input) =
-          absl::StrCat(original_node->name(), ":", node_src_output);
-      TF_ASSIGN_OR_RETURN(Node * replace_node, ReplaceNode(g, dst, new_def));
-
-      const Edge* edge_to_replace = nullptr;
-      TF_RETURN_IF_ERROR(replace_node->input_edge(dst_input, &edge_to_replace));
-      g->RemoveEdge(edge_to_replace);
-      g->AddEdge(original_node, node_src_output, replace_node, dst_input);
-
-      // Other edges might have `dst` as dst node. Update those edges with
-      // `replace_node`.
-      for (int j = i + 1; j < data_edges.size(); j++) {
-        if (data_edges[j].dst == dst) {
-          data_edges[j].dst = replace_node;
-        }
-      }
-
-      // Other placeholder node might have `dst` as original node. Update
-      // `node_name_index` with `replace_node`.
-      node_name_index[replace_node->name()] = replace_node;
-    }
-
-    // Remove placeholder node.
-    g->RemoveNode(n);
-  }
-  return Status::OK();
-}
-
-// Step 2 for `PostprocessForEncapsulation`. See comments of
-// `PostprocessForEncapsulation` for details.
-Status RemoveIdentityBetweenDifferentXlaComputation(Graph* g) {
-  // Gather Identity nodes to remove.
-  std::vector<Node*> bridge_nodes;
-  for (Node* n : g->nodes()) {
-    if (n->type_string() == "Identity" &&
-        HasNodeAttr(n->def(), kBridgeSourceNodeAttrName)) {
-      bridge_nodes.push_back(n);
-    }
-  }
-
-  // Remove the identity nodes, and reconnect the original edge.
-  for (int i = 0; i < bridge_nodes.size(); i++) {
-    Node* n = bridge_nodes[i];
-    const Edge* src_edge = nullptr;
-    TF_RETURN_IF_ERROR(n->input_edge(0, &src_edge));
-
-    // Change all usage node to use the original node instead.
-    std::vector<const Edge*> control_edges;
-    std::vector<OutEdgeInfo> data_edges;
-    for (auto e : n->out_edges()) {
-      if (e->IsControlEdge()) {
-        control_edges.push_back(e);
-      } else {
-        data_edges.push_back({e->dst(), e->src_output(), e->dst_input()});
-      }
-    }
-    for (const Edge* e : control_edges) {
-      g->AddControlEdge(src_edge->src(), e->dst());
-      g->RemoveEdge(e);
-    }
-    for (int j = 0; j < data_edges.size(); j++) {
-      Node* dst = data_edges[j].dst;
-      NodeDef new_def = dst->def();
-      int dst_input = data_edges[j].dst_input;
-      *new_def.mutable_input(dst_input) =
-          absl::StrCat(src_edge->src()->name(), ":", src_edge->src_output());
-      TF_ASSIGN_OR_RETURN(Node * replace_node, ReplaceNode(g, dst, new_def));
-
-      const Edge* edge_to_replace = nullptr;
-      TF_RETURN_IF_ERROR(replace_node->input_edge(dst_input, &edge_to_replace));
-      g->RemoveEdge(edge_to_replace);
-      g->AddEdge(src_edge->src(), src_edge->src_output(), replace_node,
-                 dst_input);
-
-      // Other edges might have `dst` as dst node. Update those edges with
-      // `replace_node`.
-      for (int k = j + 1; k < data_edges.size(); k++) {
-        if (data_edges[k].dst == dst) {
-          data_edges[k].dst = replace_node;
-        }
-      }
-
-      // The node we replaced might be in `bridge_nodes`. If so, update
-      // `bridge_nodes` to use the replaced node.
-      for (int k = i + 1; k < bridge_nodes.size(); k++) {
-        if (bridge_nodes[k] == dst) {
-          bridge_nodes[k] = replace_node;
-        }
-      }
-    }
-
-    // Remove Identity node.
-    g->RemoveNode(n);
-  }
-  return Status::OK();
-}
-
-// Step 3 for `PostprocessForEncapsulation`. See comments of
-// `PostprocessForEncapsulation` for details.
-// We do not need to worry about removed nodes in step 1 and 2;
-// `PreprocessForEncapsulation` will not record control dependencies for those
-// remvoed nodes in the first place.
-Status AddControlDependencies(
-    Graph* g, const std::unordered_map<string, string>& cluster_node_names) {
-  auto node_name_index = g->BuildNodeNameIndex();
-
-  // Reconnect outside compilation to outside compilation control edge.
-  for (Node* n : g->nodes()) {
-    std::vector<string> control_deps;
-    Status s =
-        GetNodeAttr(n->attrs(), kXlaControlDependenciesAttrName, &control_deps);
-    if (!s.ok()) {
-      if (s.code() != error::NOT_FOUND) {
-        return s;
-      } else {
-        continue;
-      }
-    } else {
-      n->ClearAttr(kXlaControlDependenciesAttrName);
-      for (const string& control_input : control_deps) {
-        auto iter = node_name_index.find(control_input);
-        if (iter == node_name_index.end()) {
-          return errors::Internal("Cannot find original node for ",
-                                  control_input);
-        }
-        g->AddControlEdge(iter->second, n);
-      }
-    }
-  }
-
-  // Reconnect outside compilation to XLA computation control edge.
-  for (Node* n : g->nodes()) {
-    std::vector<string> control_deps;
-    Status s = GetNodeAttr(
-        n->attrs(), kXlaConnectedToOtherXlaComputationAttrName, &control_deps);
-    if (!s.ok()) {
-      if (s.code() != error::NOT_FOUND) {
-        return s;
-      } else {
-        continue;
-      }
-    } else {
-      n->ClearAttr(kXlaConnectedToOtherXlaComputationAttrName);
-      for (const string& control_input : control_deps) {
-        auto iter = cluster_node_names.find(control_input);
-        if (iter == cluster_node_names.end()) {
-          return errors::Internal("Cannot find cluster node for ",
-                                  control_input);
-        }
-        auto iter2 = node_name_index.find(iter->second);
-        if (iter2 == node_name_index.end()) {
-          return errors::Internal("Cannot find cluster node for ",
-                                  iter->second);
-        }
-        g->AddControlEdge(n, iter2->second);
-      }
-    }
-  }
-
-  // Reconnect XLA computation to outside compilation control edge.
-  for (Node* n : g->nodes()) {
-    std::vector<string> control_deps;
-    Status s =
-        GetNodeAttr(n->attrs(), kXlaConnectedFromOtherXlaComputationAttrName,
-                    &control_deps);
-    if (!s.ok()) {
-      if (s.code() != error::NOT_FOUND) {
-        return s;
-      } else {
-        continue;
-      }
-    } else {
-      n->ClearAttr(kXlaConnectedFromOtherXlaComputationAttrName);
-      for (const string& control_input : control_deps) {
-        auto iter = cluster_node_names.find(control_input);
-        if (iter == cluster_node_names.end()) {
-          return errors::Internal("Cannot find cluster node for ",
-                                  control_input);
-        }
-        auto iter2 = node_name_index.find(iter->second);
-        if (iter2 == node_name_index.end()) {
-          return errors::Internal("Cannot find cluster node for ",
-                                  iter->second);
-        }
-        g->AddControlEdge(iter2->second, n);
-      }
-    }
-  }
-
-  return Status::OK();
-}
-
 // Step 1 for `PreprocessEdgesBetweenOutsideCompilations`. See comments of
 // `PreprocessEdgesBetweenOutsideCompilations` for details.
 Status PreprocessControlEdgesBetweenOutsideCompilations(
@@ -811,20 +300,6 @@ Status PostprocessControlEdgesBetweenOutsideCompilations(
 
 const char kXlaInferredShapesAttrName[] = "_xla_inferred_shapes";
 
-const char kXlaConnectedToOtherXlaComputationAttrName[] =
-    "_xla_connected_to_other_xla_computation";
-const char kXlaConnectedFromOtherXlaComputationAttrName[] =
-    "_xla_connected_from_other_xla_computation";
-const char kXlaControlDependenciesAttrName[] = "_xla_control_dependencies";
-const char kBridgeSourceNodeAttrName[] = "_xla_bridge_src";
-const char kOutsideCompilationToHostOriginalNodeAttrName[] =
-    "_xla_oc_to_host_node_name";
-const char kOutsideCompilationToHostSrcOutputAttrName[] =
-    "_xla_oc_to_host_src_output";
-const char kHostToOutsideCompilationOriginalNodeAttrName[] =
-    "_xla_host_to_oc_node_name";
-const char kHostToOutsideCompilationSrcOutputAttrName[] =
-    "_xla_host_to_oc_src_output";
 const char kXlaConnectedToXlaComputationAttrName[] =
     "_xla_connected_to_xla_computation";
 const char kXlaConnectedFromXlaComputationAttrName[] =
@@ -835,32 +310,7 @@ const char kOutsideCompilationSrcOutputAttrName[] = "_xla_oc_to_oc_src_output";
 const char kXlaControlDependenciesWithinXlaClusterAttrName[] =
     "_xla_control_dependencies_within_xla_cluster";
 
-Status PerformStaticShapeInferenceBeforeEncapsulation(
-    Graph* g, const string& xla_computation_attr_name,
-    const string& outside_compilation_attr_name) {
-  // Find all outside compilation to XLA computation data edges.
-  std::unordered_set<Node*> outside_compilation_send_nodes;
-  for (auto e : g->edges()) {
-    if (e->IsControlEdge()) {
-      continue;
-    }
-
-    auto src_computation = GetStringAttr(*e->src(), xla_computation_attr_name);
-    auto dst_computation = GetStringAttr(*e->dst(), xla_computation_attr_name);
-    if (!src_computation || !dst_computation ||
-        *src_computation != *dst_computation) {
-      continue;
-    }
-
-    auto src_outside_compilation =
-        GetStringAttr(*e->src(), outside_compilation_attr_name);
-    auto dst_outside_compilation =
-        GetStringAttr(*e->dst(), outside_compilation_attr_name);
-    if (src_outside_compilation && !dst_outside_compilation) {
-      outside_compilation_send_nodes.insert(e->src());
-    }
-  }
-
+Status PerformStaticShapeInferenceBeforeEncapsulation(Graph* g) {
   // Perform shape inference.
   std::map<int, InferredShape> arg_shapes;
   GraphShapeInfo shape_info;
@@ -868,55 +318,21 @@ Status PerformStaticShapeInferenceBeforeEncapsulation(
       InferShapes(g, arg_shapes, /*fnlib_def=*/nullptr, &shape_info));
 
   // Add attribute for output shapes.
-  for (Node* n : outside_compilation_send_nodes) {
-    auto iter = shape_info.find(n->name());
-    if (iter == shape_info.end()) {
-      continue;
-    }
-
+  auto node_name_index = g->BuildNodeNameIndex();
+  for (auto iter : shape_info) {
     std::vector<PartialTensorShape> output_shapes;
-    std::transform(iter->second.begin(), iter->second.end(),
+    std::transform(iter.second.begin(), iter.second.end(),
                    std::back_inserter(output_shapes),
                    [](const InferredShape& inferred_shape) {
                      return inferred_shape.shape;
                    });
+    Node* n = node_name_index[iter.first];
     n->AddAttr(kXlaInferredShapesAttrName, output_shapes);
   }
 
   return Status::OK();
 }
 
-Status PreprocessForEncapsulation(Graph* g,
-                                  const string& xla_computation_attr_name,
-                                  const string& outside_compilation_attr_name) {
-  TF_RETURN_IF_ERROR(ProcessControlEdges(g, xla_computation_attr_name,
-                                         outside_compilation_attr_name));
-  TF_RETURN_IF_ERROR(ProcessXlaToXlaDataEdges(g, xla_computation_attr_name,
-                                              outside_compilation_attr_name));
-  TF_RETURN_IF_ERROR(ProcessDataEdgeBetweenOutsideCompilationAndHostComputation(
-      g, xla_computation_attr_name, outside_compilation_attr_name));
-  return Status::OK();
-}
-
-Status PostprocessForEncapsulation(
-    Graph* g, const string& xla_computation_attr_name,
-    const string& outside_compilation_attr_name,
-    const std::unordered_map<string, XlaClusterInfo>& clusters) {
-  // The `node` pointer in `XlaClusterInfo` might be invalidated in step 1/2,
-  // but the node name won't change. Record cluster node name for
-  // `AddControlDependencies`.
-  std::unordered_map<string, string> cluster_node_names;
-  for (const auto& iter : clusters) {
-    cluster_node_names[iter.first] = iter.second.node->name();
-  }
-
-  TF_RETURN_IF_ERROR(
-      RemovePlaceholderBetweenOutsideCompilationAndHostComputation(g));
-  TF_RETURN_IF_ERROR(RemoveIdentityBetweenDifferentXlaComputation(g));
-  TF_RETURN_IF_ERROR(AddControlDependencies(g, cluster_node_names));
-  return Status::OK();
-}
-
 Status PreprocessEdgesBetweenOutsideCompilations(
     Graph* g, const string& outside_compilation_attr_name) {
   // Remove edges from source node to outside compilation nodes, and edges
diff --git a/tensorflow/compiler/jit/encapsulate_util.h b/tensorflow/compiler/jit/encapsulate_util.h
index e363bc5754..c9f16d1416 100644
--- a/tensorflow/compiler/jit/encapsulate_util.h
+++ b/tensorflow/compiler/jit/encapsulate_util.h
@@ -27,51 +27,13 @@ namespace tensorflow {
 // a list of PartialTensorShape objects.
 extern const char kXlaInferredShapesAttrName[];
 
-// Infer output shapes for outside compilation nodes which have output data
-// edges to XLA computation nodes. These shapes will be used later by XLA
-// compiler as output shapes of the outside compilation's XlaHostCompute op.
-// XLA computation nodes will be mark by attr `xla_computation_attr_name`;
-// outside compilation nodes will be marked by both attr
-// `xla_computation_attr_name` and `outside_compilation_attr_name`.
-//
-// Those outside compilation nodes will be marked with attribute
-// `kXlaInferredShapesAttrName`.
+// Infers output shapes for all nodes in graph `g`. The output shapes will be
+// stored in node attribute `kXlaInferredShapesAttrName`.
 //
 // We have to perform shape inference before encapsulation because after
 // encapsulation, some nodes will be encapsulated into function call, and shape
 // inference does not handle function call at the moment.
-Status PerformStaticShapeInferenceBeforeEncapsulation(
-    Graph* g, const string& xla_computation_attr_name,
-    const string& outside_compilation_attr_name);
-
-// Attribute indicating that some ops in other XLA computation has control
-// dependency on this node. Attribute value will be a list of string (XLA
-// computation names).
-extern const char kXlaConnectedToOtherXlaComputationAttrName[];
-
-// Attribute indicating that this node has control dependency on some ops in
-// other XLA computation. Attribute value will be a list of string (XLA
-// computation names).
-extern const char kXlaConnectedFromOtherXlaComputationAttrName[];
-
-// Attribute indicating that this node has control dependencies on some other
-// nodes. Attribute value will be a list of string (node names).
-extern const char kXlaControlDependenciesAttrName[];
-
-// Attribute indicating that this is an Identity node added to act as a bridge
-// between different XLA computations. Attribute value will be string (source
-// node name).
-extern const char kBridgeSourceNodeAttrName[];
-
-// Attribute indicating that this is an Placeholder node added to act as a
-// temporary input node for an outside compilation node. Attribute value will be
-// string (original input node name).
-extern const char kOutsideCompilationToHostOriginalNodeAttrName[];
-
-// Attribute indicating that this is an Placeholder node added to act as a
-// temporary input node for an outside compilation node. Attribute value will be
-// int (src_output for original edge).
-extern const char kOutsideCompilationToHostSrcOutputAttrName[];
+Status PerformStaticShapeInferenceBeforeEncapsulation(Graph* g);
 
 // Attribute indicating that some ops in this node's XLA computation has control
 // dependency on this node. Attribute value will always be "true".
@@ -81,16 +43,6 @@ extern const char kXlaConnectedToXlaComputationAttrName[];
 // this node's XLA computation. Attribute value will always be "true".
 extern const char kXlaConnectedFromXlaComputationAttrName[];
 
-// Attribute indicating that this is an Placeholder node added to act as a
-// temporary input node for an host node. Attribute value will be string
-// (original input node name).
-extern const char kHostToOutsideCompilationOriginalNodeAttrName[];
-
-// Attribute indicating that this is an Placeholder node added to act as a
-// temporary input node for a host node. Attribute value will be int (src_output
-// for original edge).
-extern const char kHostToOutsideCompilationSrcOutputAttrName[];
-
 // Attribute indicating that this is an Placeholder node added to act as a
 // temporary input node for an outside compilation node. Attribute value will be
 // string (original input node name).
@@ -106,27 +58,6 @@ extern const char kOutsideCompilationSrcOutputAttrName[];
 // (node names).
 extern const char kXlaControlDependenciesWithinXlaClusterAttrName[];
 
-// Preprocesses edges between different XLA clusters for encapsulation. It will
-// perform the following operations in order:
-//
-// 1a. For control edges between outside compilation and another XLA
-//     computation, add attr "kXlaConnected{From, To}OtherXlaComputationAttrName
-//     = XLA computation node name" to the outside compilation node.
-// 1b. For control edges between different outside compilations (in different
-//     XLA computations), remove the edge and add attr
-//     "kXlaControlDependenciesAttrName = src node name" to dst node.
-// 1c. For control edges between outside compilation and host computation,
-//     remove the edge and add attr "kXlaControlDependenciesAttrName = src node
-//     name" to dst node.
-// 2. For data edges between different XLA computations, if either src or dst
-//    is outside compilation, add an Identity node in between the edge. The
-//    identity node will have attr kBridgeSourceNodeAttrName.
-// 3. For data edges between outside compilation and host computation, remove
-//    the edge and create a Placeholder node as dst node's input.
-Status PreprocessForEncapsulation(Graph* g,
-                                  const string& xla_computation_attr_name,
-                                  const string& outside_compilation_attr_name);
-
 // Information for XLA computation.
 struct XlaClusterInfo {
   // Add an explicitly-defined default constructor for this class.
@@ -158,24 +89,6 @@ struct XlaClusterInfo {
   const std::map<string, int> host_compute_core;
 };
 
-// Postprocesses edges between different XLA clusters for encapsulation. This
-// function reverts what `PreprocessForEncapsulation` did. It will perform the
-// following operations in order:
-//
-// 1. Remove Placeholder nodes between outside compilation and host computation
-//     (created in `PreprocessForEncapsulation` step 3).
-// 2. Remove Identity nodes created in `PreprocessForEncapsulation` step 2.
-// 3a. Reconnect control edges between outside compilation and another XLA
-//     computation (marked by `PreprocessForEncapsulation` step 1a).
-// 3b. Reconnect control edges between different outside compilations (marked by
-//     `PreprocessForEncapsulation` step 1b).
-// 3c. Reconnect control edges between outside compilation and host computation
-//     (marked by `PreprocessForEncapsulation` step 1c).
-Status PostprocessForEncapsulation(
-    Graph* g, const string& xla_computation_attr_name,
-    const string& outside_compilation_attr_name,
-    const std::unordered_map<string, XlaClusterInfo>& clusters);
-
 // Preprocesses edges within the same XLA cluster. It will perform the following
 // operations in order:
 //
diff --git a/tensorflow/compiler/jit/encapsulate_util_test.cc b/tensorflow/compiler/jit/encapsulate_util_test.cc
index 3b8b49cb92..3bb979e069 100644
--- a/tensorflow/compiler/jit/encapsulate_util_test.cc
+++ b/tensorflow/compiler/jit/encapsulate_util_test.cc
@@ -38,24 +38,11 @@ TEST(PerformStaticShapeInferenceBeforeEncapsulationTest, Basic) {
   Graph g(OpRegistry::Global());
   TF_CHECK_OK(s.ToGraph(&g));
 
-  // "add" node is outside compilation node, "identity" node is XLA node.
-  auto node_index = g.BuildNodeNameIndex();
-  Node *add_node = node_index["add"], *identity_node = node_index["identity"];
-  add_node->AddAttr("_xla", "cluster");
-  add_node->AddAttr("_oc", "cluster");
-  identity_node->AddAttr("_xla", "cluster");
-  TF_CHECK_OK(
-      PerformStaticShapeInferenceBeforeEncapsulation(&g, "_xla", "_oc"));
+  TF_CHECK_OK(PerformStaticShapeInferenceBeforeEncapsulation(&g));
 
-  // Check that only "add" node now has _xla_inferred_shapes attr.
-  std::vector<Node *> nodes_with_inferred_shape;
-  for (Node *n : g.nodes()) {
-    if (HasNodeAttr(n->def(), kXlaInferredShapesAttrName)) {
-      nodes_with_inferred_shape.push_back(n);
-    }
-  }
-  EXPECT_EQ(nodes_with_inferred_shape.size(), 1);
-  EXPECT_EQ(nodes_with_inferred_shape[0], add_node);
+  // Check that "add" node now has _xla_inferred_shapes attr.
+  auto node_index = g.BuildNodeNameIndex();
+  Node *add_node = node_index["add"];
   std::vector<PartialTensorShape> output_shapes;
   TF_CHECK_OK(GetNodeAttr(add_node->attrs(), kXlaInferredShapesAttrName,
                           &output_shapes));
@@ -66,329 +53,4 @@ TEST(PerformStaticShapeInferenceBeforeEncapsulationTest, Basic) {
   EXPECT_EQ(shape_proto.dim(0).size(), 2);
 }
 
-TEST(PreprocessForEncapsulationTest, ControlEdges) {
-  // Build the graph:
-  // "const_0" and "const_1" in host computation
-  // "add" = "const_0" + "const_1" in XLA computation 0
-  // "identity0" = "add" in XLA computation 0 & outside compilation 0
-  // "identity1" = "identity0" in XLA computation 0
-  // "identity2" = "identity1" in host computation
-  // "identity3" = "identity2" in XLA computation 1
-  // "identity4" = "identity3" in XLA computation 1 & outside compilation 1
-  // "identity5" = "identity4" in XLA computation 1
-  // "identity6" = "identity5" in host computation
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output const_0 = ops::Const(s.WithOpName("const_0"), 1, {});
-  Output const_1 = ops::Const(s.WithOpName("const_1"), 2, {});
-  Output add = ops::Add(s.WithOpName("add"), const_0, const_1);
-  Output identity0 = ops::Identity(s.WithOpName("identity0"), add);
-  Output identity1 = ops::Identity(s.WithOpName("identity1"), identity0);
-  Output identity2 = ops::Identity(s.WithOpName("identity2"), identity1);
-  Output identity3 = ops::Identity(s.WithOpName("identity3"), identity2);
-  Output identity4 = ops::Identity(s.WithOpName("identity4"), identity3);
-  Output identity5 = ops::Identity(s.WithOpName("identity5"), identity4);
-  Graph g(OpRegistry::Global());
-  TF_CHECK_OK(s.ToGraph(&g));
-  auto node_index = g.BuildNodeNameIndex();
-
-  // Set XLA computation/outside compilation attr, and add control edges.
-  Node *const0_node = node_index["const_0"], *add_node = node_index["add"],
-       *identity0_node = node_index["identity0"],
-       *identity1_node = node_index["identity1"],
-       *identity2_node = node_index["identity2"],
-       *identity3_node = node_index["identity3"],
-       *identity4_node = node_index["identity4"],
-       *identity5_node = node_index["identity5"];
-  add_node->AddAttr("_xla", "0");
-  identity0_node->AddAttr("_xla", "0");
-  identity0_node->AddAttr("_oc", "0");
-  identity1_node->AddAttr("_xla", "0");
-  identity3_node->AddAttr("_xla", "1");
-  identity4_node->AddAttr("_xla", "1");
-  identity4_node->AddAttr("_oc", "0");
-  identity5_node->AddAttr("_xla", "1");
-  // Case 1a: control edges between outside compilation and another XLA
-  // computation.
-  g.AddControlEdge(identity0_node, identity3_node);
-  g.AddControlEdge(identity1_node, identity4_node);
-  // Case 1b: control edges between different outside compilations.
-  g.AddControlEdge(identity0_node, identity4_node);
-  // Case 1c: control edges between outside compilation and host computation.
-  g.AddControlEdge(const0_node, identity0_node);
-  g.AddControlEdge(identity0_node, identity2_node);
-
-  TF_CHECK_OK(PreprocessForEncapsulation(&g, "_xla", "_oc"));
-
-  // Case 1a: add attr "_xla_control_deps_{from/to} = XLA computation node name"
-  // to the outside compilation node.
-  std::vector<string> attr;
-  TF_CHECK_OK(GetNodeAttr(identity0_node->def(),
-                          kXlaConnectedToOtherXlaComputationAttrName, &attr));
-  EXPECT_EQ(attr.size(), 1);
-  EXPECT_EQ(attr[0], "1");
-  attr.clear();
-  TF_CHECK_OK(GetNodeAttr(identity4_node->def(),
-                          kXlaConnectedFromOtherXlaComputationAttrName, &attr));
-  EXPECT_EQ(attr.size(), 1);
-  EXPECT_EQ(attr[0], "0");
-  // Case 1b: add attr "_xla_control_deps = src node name" to dst node.
-  attr.clear();
-  TF_CHECK_OK(GetNodeAttr(identity4_node->def(),
-                          kXlaControlDependenciesAttrName, &attr));
-  EXPECT_EQ(attr.size(), 1);
-  EXPECT_EQ(attr[0], "identity0");
-  // Case 1c: add attr "_xla_control_deps = src node name" to dst node.
-  attr.clear();
-  TF_CHECK_OK(GetNodeAttr(identity0_node->def(),
-                          kXlaControlDependenciesAttrName, &attr));
-  EXPECT_EQ(attr.size(), 1);
-  EXPECT_EQ(attr[0], "const_0");
-  attr.clear();
-  TF_CHECK_OK(GetNodeAttr(identity2_node->def(),
-                          kXlaControlDependenciesAttrName, &attr));
-  EXPECT_EQ(attr.size(), 1);
-  EXPECT_EQ(attr[0], "identity0");
-}
-
-TEST(PreprocessForEncapsulationTest, DataEdges) {
-  // Build the graph:
-  // "const_0" and "const_1" in host computation
-  // "identityn0" = ("const_0", "const_1") in host computation 0
-  // "add0" = "const_0" + "const_1" in XLA computation 0
-  // "add1" = "add0" + "const_0" in XLA computation 0 & outside compilation 0
-  // "identity0" = "add1" in XLA computation 0
-  // "add2" = "add1" + "identity0" in host computation
-  // "add3" = "add1" + "add2" in XLA computation 1
-  // "add4" = "identity0" + "add2" in XLA computation 1 & outside compilation 0
-  // "add5" = "identityn0"[0] + "identityn0"[1] in XLA computation 1 &
-  //                                               outside compilation 0
-  // "identityn1" = ("identityn0"[0], "identityn0"[1]) in XLA computation 1 &
-  //                                                   outside compilation 0
-  // "identity1" = "add4" in XLA computation 1
-  // "identity2" = "identity1" in host computation
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output const_0 = ops::Const(s.WithOpName("const_0"), 1, {});
-  Output const_1 = ops::Const(s.WithOpName("const_1"), 2, {});
-  auto identityn0 =
-      ops::IdentityN(s.WithOpName("identityn_0"), {const_0, const_1});
-  Output add0 = ops::Add(s.WithOpName("add0"), const_0, const_1);
-  Output add1 = ops::Add(s.WithOpName("add1"), add0, const_0);
-  Output identity0 = ops::Identity(s.WithOpName("identity0"), add1);
-  Output add2 = ops::Add(s.WithOpName("add2"), add1, identity0);
-  Output add3 = ops::Add(s.WithOpName("add3"), add1, add2);
-  Output add4 = ops::Add(s.WithOpName("add4"), identity0, add2);
-  Output add5 = ops::Add(s.WithOpName("add5"), identityn0[0], identityn0[1]);
-  auto identityn1 = ops::IdentityN(s.WithOpName("identityn_1"),
-                                   {identityn0[0], identityn0[1]});
-  Output identity1 = ops::Identity(s.WithOpName("identity1"), add4);
-  Output identity2 = ops::Identity(s.WithOpName("identity2"), add4);
-  Graph g(OpRegistry::Global());
-  TF_CHECK_OK(s.ToGraph(&g));
-  auto node_index = g.BuildNodeNameIndex();
-
-  // Set XLA computation/outside compilation attr.
-  Node *add0_node = node_index["add0"], *add1_node = node_index["add1"],
-       *identity0_node = node_index["identity0"],
-       *add3_node = node_index["add3"], *add4_node = node_index["add4"],
-       *add5_node = node_index["add5"],
-       *identityn1_node = node_index["identityn_1"],
-       *identity1_node = node_index["identity1"];
-  add0_node->AddAttr("_xla", "0");
-  add1_node->AddAttr("_xla", "0");
-  add1_node->AddAttr("_oc", "0");
-  identity0_node->AddAttr("_xla", "0");
-  add3_node->AddAttr("_xla", "1");
-  add4_node->AddAttr("_xla", "1");
-  add4_node->AddAttr("_oc", "0");
-  add5_node->AddAttr("_xla", "1");
-  add5_node->AddAttr("_oc", "0");
-  identityn1_node->AddAttr("_xla", "1");
-  identityn1_node->AddAttr("_oc", "0");
-  identity1_node->AddAttr("_xla", "1");
-
-  TF_CHECK_OK(PreprocessForEncapsulation(&g, "_xla", "_oc"));
-
-  // Check input nodes for related data edges.
-  node_index = g.BuildNodeNameIndex();
-  // Step 2: add an Identity node between different XLA computations.
-  Node *bridge_add1_add3 = node_index["bridge_add1_add3"];
-  EXPECT_NE(bridge_add1_add3, nullptr);
-  string str;
-  TF_CHECK_OK(
-      GetNodeAttr(bridge_add1_add3->attrs(), kBridgeSourceNodeAttrName, &str));
-  EXPECT_EQ(str, "add1");
-  Node *bridge_identity0_add4 = node_index["bridge_identity0_add4"];
-  EXPECT_NE(bridge_identity0_add4, nullptr);
-  // Step 3: add placeholder for edges between host computation and outside
-  // compilation.
-  EXPECT_EQ(bridge_add1_add3->def().input(0), "add1_oc_to_host_placeholder_0");
-  Node *add1_oc_to_host_placeholder =
-      node_index["add1_oc_to_host_placeholder_0"];
-  TF_CHECK_OK(GetNodeAttr(add1_oc_to_host_placeholder->attrs(),
-                          kOutsideCompilationToHostOriginalNodeAttrName, &str));
-  EXPECT_EQ(str, "add1");
-  int i;
-  TF_CHECK_OK(GetNodeAttr(add1_oc_to_host_placeholder->attrs(),
-                          kOutsideCompilationToHostSrcOutputAttrName, &i));
-  EXPECT_EQ(i, 0);
-  add4_node = node_index["add4"];
-  ASSERT_NE(add4_node, nullptr);
-  EXPECT_EQ(add4_node->def().input(0),
-            "bridge_identity0_add4_host_to_oc_placeholder_0");
-  Node *identity0_host_to_oc_placeholder =
-      node_index["bridge_identity0_add4_host_to_oc_placeholder_0"];
-  TF_CHECK_OK(GetNodeAttr(identity0_host_to_oc_placeholder->attrs(),
-                          kHostToOutsideCompilationOriginalNodeAttrName, &str));
-  EXPECT_EQ(str, "bridge_identity0_add4");
-  TF_CHECK_OK(GetNodeAttr(identity0_host_to_oc_placeholder->attrs(),
-                          kHostToOutsideCompilationSrcOutputAttrName, &i));
-  EXPECT_EQ(i, 0);
-
-  // Check different placeholder nodes are created for different src_output.
-  Node *placeholder0 = node_index["identityn_0_host_to_oc_placeholder_0"],
-       *placeholder1 = node_index["identityn_0_host_to_oc_placeholder_1"];
-  EXPECT_NE(placeholder0, nullptr);
-  EXPECT_NE(placeholder1, nullptr);
-  // Check we only have 2 placeholder nodes created for "identityn_0".
-  int placeholder_count = 0;
-  for (Node *n : g.nodes()) {
-    if (HasNodeAttr(n->def(), kHostToOutsideCompilationOriginalNodeAttrName)) {
-      string attr;
-      TF_CHECK_OK(GetNodeAttr(
-          n->attrs(), kHostToOutsideCompilationOriginalNodeAttrName, &attr));
-      if (attr == "identityn_0") {
-        ++placeholder_count;
-      }
-    }
-  }
-  EXPECT_EQ(placeholder_count, 2);
-}
-
-TEST(PostprocessForEncapsulationTest, ControlEdges) {
-  // Build the graph:
-  // "const0"
-  // "identity0" = "const0" (XLA computation 0)
-  // "identity1" = "identity0"
-  // "identity2" = "identity1" (XLA computation 1)
-  // "identity3" = "identity2"
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output const0 = ops::Const(s.WithOpName("const0"), 1, {});
-  Output identity0 = ops::Identity(s.WithOpName("identity0"), const0);
-  Output identity1 = ops::Identity(s.WithOpName("identity1"), identity0);
-  Output identity2 = ops::Identity(s.WithOpName("identity2"), identity1);
-  Output identity3 = ops::Identity(s.WithOpName("identity3"), identity2);
-  Graph g(OpRegistry::Global());
-  TF_CHECK_OK(s.ToGraph(&g));
-  auto node_index = g.BuildNodeNameIndex();
-
-  // Set XLA computation/outside compilation attr, and add control edges.
-  Node *const0_node = node_index["const0"],
-       *identity0_node = node_index["identity0"],
-       *identity1_node = node_index["identity1"],
-       *identity2_node = node_index["identity2"],
-       *identity3_node = node_index["identity3"];
-  identity1_node->AddAttr(kXlaConnectedFromOtherXlaComputationAttrName,
-                          std::vector<string>{"0"});
-  identity1_node->AddAttr(kXlaConnectedToOtherXlaComputationAttrName,
-                          std::vector<string>{"1"});
-  identity3_node->AddAttr(kXlaControlDependenciesAttrName,
-                          std::vector<string>{"const0", "identity1"});
-
-  std::unordered_map<string, XlaClusterInfo> clusters;
-  clusters["0"].node = identity0_node;
-  clusters["1"].node = identity2_node;
-  TF_CHECK_OK(PostprocessForEncapsulation(&g, "_xla", "_oc", clusters));
-
-  // Case 3a: we have control edge identity0 -> identity1, and identity1 ->
-  // identity2.
-  bool edge_identity0_identity1 = false, edge_identity1_identity2 = false;
-  for (const Edge *e : g.edges()) {
-    if (!e->IsControlEdge()) {
-      continue;
-    }
-    if (e->src() == identity0_node && e->dst() == identity1_node) {
-      edge_identity0_identity1 = true;
-    } else if (e->src() == identity1_node && e->dst() == identity2_node) {
-      edge_identity1_identity2 = true;
-    }
-  }
-  EXPECT_TRUE(edge_identity0_identity1);
-  EXPECT_TRUE(edge_identity1_identity2);
-  // Case 3b: we have control edge const0 -> identity3, and identity1 ->
-  // identity3.
-  bool edge_const0_identity3 = false, edge_identity1_identity3 = false;
-  for (const Edge *e : g.edges()) {
-    if (!e->IsControlEdge()) {
-      continue;
-    }
-    if (e->src() == const0_node && e->dst() == identity3_node) {
-      edge_const0_identity3 = true;
-    } else if (e->src() == identity1_node && e->dst() == identity3_node) {
-      edge_identity1_identity3 = true;
-    }
-  }
-  EXPECT_TRUE(edge_const0_identity3);
-  EXPECT_TRUE(edge_identity1_identity3);
-}
-
-TEST(PostprocessForEncapsulationTest, DataEdges) {
-  // Build the graph:
-  // "const0" in outside compilation "0"
-  // "placeholder0" (for "const0") in host computation
-  // "add0" = "placeholder0" + "placeholder0" in host computation
-  // "placeholder1" (for "add0") in outside compilation 1
-  // "add1" = "placeholder1" + "placeholder1" in outside compilation 1
-  //
-  // "bridge" = "placeholder0" in host computation
-  // "placeholder2" (for "bridge") in outside compilation 1
-  // "add2" = "placeholder2" + "placeholder2" in outside compilation 1
-  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-  Output const0 = ops::Const(s.WithOpName("const0"), 1, {});
-  Output placeholder0 =
-      ops::Placeholder(s.WithOpName("placeholder0"), DT_INT32);
-  Output add0 = ops::Add(s.WithOpName("add0"), placeholder0, placeholder0);
-  Output placeholder1 =
-      ops::Placeholder(s.WithOpName("placeholder1"), DT_INT32);
-  Output add1 = ops::Add(s.WithOpName("add1"), placeholder1, placeholder1);
-  Output bridge = ops::Identity(s.WithOpName("bridge"), placeholder0);
-  Output placeholder2 =
-      ops::Placeholder(s.WithOpName("placeholder2"), DT_INT32);
-  Output add2 = ops::Add(s.WithOpName("add2"), placeholder2, placeholder2);
-  Graph g(OpRegistry::Global());
-  TF_CHECK_OK(s.ToGraph(&g));
-  auto node_index = g.BuildNodeNameIndex();
-
-  // Set related attributes.
-  Node *placeholder0_node = node_index["placeholder0"];
-  placeholder0_node->AddAttr(kOutsideCompilationToHostOriginalNodeAttrName,
-                             "const0");
-  placeholder0_node->AddAttr(kOutsideCompilationToHostSrcOutputAttrName, 0);
-  Node *placeholder1_node = node_index["placeholder1"];
-  placeholder1_node->AddAttr(kHostToOutsideCompilationOriginalNodeAttrName,
-                             "add0");
-  placeholder1_node->AddAttr(kHostToOutsideCompilationSrcOutputAttrName, 0);
-  Node *bridge_node = node_index["bridge"];
-  bridge_node->AddAttr(kBridgeSourceNodeAttrName, "const0");
-  Node *placeholder2_node = node_index["placeholder2"];
-  placeholder2_node->AddAttr(kHostToOutsideCompilationOriginalNodeAttrName,
-                             "bridge");
-  placeholder2_node->AddAttr(kHostToOutsideCompilationSrcOutputAttrName, 0);
-
-  std::unordered_map<string, XlaClusterInfo> clusters;
-  TF_CHECK_OK(PostprocessForEncapsulation(&g, "_xla", "_oc", clusters));
-
-  // Result graph should be:
-  // "add0" = "const0" + "const0"
-  // "add1" = "add0" + "add0"
-  // "add2" = "const0" + "const0"
-  node_index = g.BuildNodeNameIndex();
-  EXPECT_EQ(node_index.size(), 6);
-  EXPECT_EQ(node_index["add0"]->def().input(0), "const0:0");
-  EXPECT_EQ(node_index["add0"]->def().input(1), "const0:0");
-  EXPECT_EQ(node_index["add1"]->def().input(0), "add0:0");
-  EXPECT_EQ(node_index["add1"]->def().input(1), "add0:0");
-  EXPECT_EQ(node_index["add2"]->def().input(0), "const0:0");
-  EXPECT_EQ(node_index["add2"]->def().input(1), "const0:0");
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
index feac983884..baf8507f4e 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
@@ -634,17 +634,14 @@ Status ExpandHostGraphIntoMainGraph(Graph* main_graph,
   return s;
 }
 
-// Rewrites shape inference graph for outside compilation.
-// 1. If the outside compilation is a "top-level" one (not in a function of any
-//    If/While/etc.), this shape inference graph might have host computation to
-//    outside compilation placeholder nodes, which will cause shape inference to
-//    fail. However, those nodes are not in `host_graph` any more (because we
-//    have executed `PostprocessForEncapsultion`). In this case, we clear the
-//    graph, and copy SendFromHost with all its predecessors from `host_graph`.
-//    This case is detected by whether the SendFromHost node exists in
-//    `host_graph` as well.
-// 2. Remove control edges, and prune nodes that are not useful for shape
-//    inference.
+// Rewrites shape inference graph for outside compilation:
+// 1) If XlaSendFromHost also exists in `host_graph`, copy nodes from
+//    `host_graph`. Because we might still have outside compilation to outside
+//    compilation placeholder nodes in shape inference graph, which will prevent
+//    us from inferring XlaSendFromHost shape. But in `host_graph`, we already
+//    removed those placeholder nodes.
+// 2) Remove control edges.
+// 3) Prune nodes that are not useful for shape inference.
 Status RewriteShapeInferenceGraph(const string& shape_inference_graph_name,
                                   Graph* host_graph,
                                   FunctionLibraryDefinition* fld) {
@@ -744,6 +741,7 @@ Status RewriteShapeInferenceGraph(const string& shape_inference_graph_name,
       g->RemoveEdge(e);
     }
   }
+
   // Nodes that are not reverse reachable from SendFromHost are not useful for
   // shape inference. Prune them.
   PruneForReverseReachability(g,
@@ -1581,14 +1579,6 @@ Status ExtractOutsideCompilation(
     TF_RETURN_IF_ERROR(fld->RemoveFunction(host_graph_func_name));
   }
 
-  if (VLOG_IS_ON(4)) {
-    dump_graph::DumpGraphToFile("extract_outside_compilation_expanded", *g,
-                                fld);
-  }
-
-  TF_RETURN_IF_ERROR(PostprocessForEncapsulation(
-      g, xla_cluster_attr_name, outside_compilation_attr_name, clusters));
-
   for (auto shape_inference_graph_name : shape_inference_graphs) {
     TF_RETURN_IF_ERROR(
         RewriteShapeInferenceGraph(shape_inference_graph_name, g, fld));
diff --git a/tensorflow/compiler/jit/shape_inference.cc b/tensorflow/compiler/jit/shape_inference.cc
index 80c691fe49..a27e0d9f2a 100644
--- a/tensorflow/compiler/jit/shape_inference.cc
+++ b/tensorflow/compiler/jit/shape_inference.cc
@@ -53,7 +53,15 @@ Status PropagateShapes(const Graph& graph,
     // shapes, even if no shape function is registered for a node.
     Status status = shape_refiner->AddNode(n);
     if (!status.ok()) {
-      VLOG(1) << "Shape inference failed for node: " << status;
+      VLOG(1) << "Shape inference failed for node " << n->name() << ": "
+              << status;
+    } else {
+      shape_inference::InferenceContext* context = shape_refiner->GetContext(n);
+      for (int i = 0; i < n->num_outputs(); i++) {
+        shape_inference::ShapeHandle handle = context->output(i);
+        VLOG(4) << "Output " << i << " for node " << n->name() << ": "
+                << context->DebugString(handle);
+      }
     }
 
     if (n->type_string() == "_Arg") {
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py
index 59722bc246..9266d81cf5 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu.py
@@ -646,6 +646,10 @@ def split_compile_and_replicate(computation,
           array_ops.identity(x, name="replicated_input_{}".format(i))
           for i, x in enumerate(computation_inputs)
       ]
+      for i in computation_inputs:
+        # pylint: disable=protected-access
+        i.op._set_attr("_tpu_input_identity", attr_value_pb2.AttrValue(b=True))
+        # pylint: enable=protected-access
 
       # If there is an infeed queue, adds the dequeued values to the
       # computation's inputs.
@@ -726,7 +730,11 @@ def split_compile_and_replicate(computation,
     new_output_tensors = []
     for t in output_tensors:
       with ops.device(t.device if t.device else core(0)):
-        new_output_tensors.append(array_ops.identity(t))
+        o = array_ops.identity(t)
+        # pylint: disable=protected-access
+        o.op._set_attr("_tpu_output_identity", attr_value_pb2.AttrValue(b=True))
+        # pylint: enable=protected-access
+        new_output_tensors.append(o)
     output_tensors = new_output_tensors
     context.ExitResult(output_tensors)
   finally:
diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 11ca5e1024..44a8f7ce0e 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -2280,7 +2280,7 @@ class TPUEstimator(estimator_lib.Estimator):
           (k, _export_output_to_tensors(v))
           for k, v in six.iteritems(estimator_spec.export_outputs))
       tensors = nest.flatten(tensors_dict)
-      tpu_tensors = [t for t in tensors if _is_tpu_tensor(t)]
+      tpu_tensors = [t for t in tensors if t is not None]
 
       # We cannot return anything other than `tpu_tensors` here so we capture
       # the rest for later use.
@@ -2294,18 +2294,10 @@ class TPUEstimator(estimator_lib.Estimator):
     # `tpu_tensors_on_cpu`.
     new_tensors = []
     for t in tensors:
-      if _is_tpu_tensor(t):
-        new_tensors.append(tpu_tensors_on_cpu.pop(0))
-      elif t is None:
+      if t is None:
         new_tensors.append(None)
       else:
-        # Only fetching `tpu_tensors_on_cpu` does not trigger
-        # TPU computation and blocks, so we add the control dependency here.
-        control_inputs = (
-            tpu_tensors_on_cpu if _is_iterable(tpu_tensors_on_cpu) else
-            (tpu_tensors_on_cpu,))
-        with ops.control_dependencies(control_inputs):
-          new_tensors.append(array_ops.identity(t))
+        new_tensors.append(tpu_tensors_on_cpu.pop(0))
 
     # Reconstruct `tensors_dict`.
     new_tensors_dict = nest.pack_sequence_as(tensors_dict, new_tensors)
@@ -2798,17 +2790,6 @@ class TPUEstimator(estimator_lib.Estimator):
     return _model_fn
 
 
-def _is_tpu_tensor(tensor):
-  if not isinstance(tensor, ops.Tensor):
-    return False
-  try:
-    tensor.op.get_attr(tpu._OUTSIDE_COMPILATION_ATTR)  # pylint: disable=protected-access
-  except ValueError:
-    return True
-  else:
-    return False
-
-
 def _export_output_to_tensors(export_output):
   """Get a list of `Tensors` used in `export_output`.
 
-- 
GitLab


From 7dcebc8629b0d94c8462ab14251043c496ca7b54 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 10:50:01 -0800
Subject: [PATCH 511/873] Adds all Keras modes to `convolutional_test`

PiperOrigin-RevId: 225397991
---
 tensorflow/python/keras/BUILD                 |  2 +-
 tensorflow/python/keras/backend.py            |  4 ++
 .../python/keras/layers/convolutional_test.py | 69 ++++++++++---------
 3 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index ca44a7bbe9..c056996f96 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -369,7 +369,7 @@ py_test(
     name = "convolutional_test",
     size = "large",
     srcs = ["layers/convolutional_test.py"],
-    shard_count = 4,
+    shard_count = 11,
     srcs_version = "PY2AND3",
     deps = [
         ":keras",
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 381e0ae3e3..c93a716b3c 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -568,6 +568,10 @@ def _get_available_gpus():
   Returns:
       A list of available GPU devices.
   """
+  if ops.executing_eagerly_outside_functions():
+    # Returns names of devices directly.
+    return [name for name in context.list_devices() if 'GPU' in name]
+
   global _LOCAL_DEVICES
   if _LOCAL_DEVICES is None:
     _LOCAL_DEVICES = get_session().list_devices()
diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py
index d3339a8413..81af06b4ec 100644
--- a/tensorflow/python/keras/layers/convolutional_test.py
+++ b/tensorflow/python/keras/layers/convolutional_test.py
@@ -24,13 +24,13 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.eager import context
-from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class Convolution1DTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class Convolution1DTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -100,8 +100,8 @@ class Convolution1DTest(test.TestCase):
       self.assertEqual(layer.bias.constraint, b_constraint)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class Conv2DTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class Conv2DTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -175,8 +175,8 @@ class Conv2DTest(test.TestCase):
       self.assertEqual(layer.bias.constraint, b_constraint)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class Conv2DTransposeTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class Conv2DTransposeTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -267,8 +267,8 @@ class Conv2DTransposeTest(test.TestCase):
                              expected_output=expected_output)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class Conv3DTransposeTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class Conv3DTransposeTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -336,8 +336,8 @@ class Conv3DTransposeTest(test.TestCase):
       self.assertEqual(layer.bias.constraint, b_constraint)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class SeparableConv1DTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class SeparableConv1DTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -411,8 +411,8 @@ class SeparableConv1DTest(test.TestCase):
       self.assertEqual(layer.bias.constraint, b_constraint)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class SeparableConv2DTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class SeparableConv2DTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -489,8 +489,8 @@ class SeparableConv2DTest(test.TestCase):
       self.assertEqual(layer.bias.constraint, b_constraint)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class Conv3DTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class Conv3DTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -557,8 +557,8 @@ class Conv3DTest(test.TestCase):
       self.assertEqual(layer.bias.constraint, b_constraint)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class ZeroPaddingTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class ZeroPaddingTest(keras_parameterized.TestCase):
 
   def test_zero_padding_1d(self):
     num_samples = 2
@@ -726,8 +726,8 @@ class ZeroPaddingTest(test.TestCase):
       keras.layers.ZeroPadding3D(padding=None)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class UpSamplingTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class UpSamplingTest(keras_parameterized.TestCase):
 
   def test_upsampling_1d(self):
     with self.session(use_gpu=True):
@@ -875,8 +875,8 @@ class UpSamplingTest(test.TestCase):
               np.testing.assert_allclose(np_output, expected_out)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class CroppingTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class CroppingTest(keras_parameterized.TestCase):
 
   def test_cropping_1d(self):
     num_samples = 2
@@ -1017,8 +1017,8 @@ class CroppingTest(test.TestCase):
       keras.layers.Cropping3D(cropping=None)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class DepthwiseConv2DTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class DepthwiseConv2DTest(keras_parameterized.TestCase):
 
   def _run_test(self, kwargs, arg, values):
     num_samples = 2
@@ -1044,17 +1044,18 @@ class DepthwiseConv2DTest(test.TestCase):
       self._run_test(kwargs, 'data_format', ['channels_first'])
     self._run_test(kwargs, 'depth_multiplier', [1, 2])
 
-    kwargs = {'kernel_size': 3,
-              'padding': 'valid',
-              'data_format': 'channels_first',
-              'activation': None,
-              'depthwise_regularizer': 'l2',
-              'bias_regularizer': 'l2',
-              'activity_regularizer': 'l2',
-              'depthwise_constraint': 'unit_norm',
-              'use_bias': True,
-              'strides': (2, 2),
-             }
+    kwargs = {
+        'kernel_size': 3,
+        'padding': 'valid',
+        'data_format': 'channels_last',
+        'activation': None,
+        'depthwise_regularizer': 'l2',
+        'bias_regularizer': 'l2',
+        'activity_regularizer': 'l2',
+        'depthwise_constraint': 'unit_norm',
+        'use_bias': True,
+        'strides': (2, 2),
+    }
     self._run_test(kwargs, 'depth_multiplier', [1])
 
 if __name__ == '__main__':
-- 
GitLab


From 23c93ebb5ef95b0c0c738a7a781c885459dc6afc Mon Sep 17 00:00:00 2001
From: Karmel Allison <karmel@google.com>
Date: Thu, 13 Dec 2018 10:54:52 -0800
Subject: [PATCH 512/873] Metrics tests: adding v2 decorators.

PiperOrigin-RevId: 225398873
---
 tensorflow/python/keras/metrics_test.py | 236 ++++++++++++------------
 1 file changed, 122 insertions(+), 114 deletions(-)

diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py
index 1f13a97d75..9720d910eb 100644
--- a/tensorflow/python/keras/metrics_test.py
+++ b/tensorflow/python/keras/metrics_test.py
@@ -27,9 +27,10 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import layers
 from tensorflow.python.keras import metrics
-from tensorflow.python.keras.models import Sequential
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
@@ -285,19 +286,6 @@ class KerasAccuracyTest(test.TestCase):
       metrics._assert_thresholds_range([None, 0.5])
 
 
-def _get_simple_sequential_model(compile_metrics):
-  model = Sequential()
-  model.add(
-      layers.Dense(
-          3, activation='relu', input_dim=4, kernel_initializer='ones'))
-  model.add(layers.Dense(1, activation='sigmoid', kernel_initializer='ones'))
-  model.compile(
-      loss='mae',
-      metrics=compile_metrics,
-      optimizer=RMSPropOptimizer(learning_rate=0.001))
-  return model
-
-
 @test_util.run_all_in_graph_and_eager_modes
 class FalsePositivesTest(test.TestCase):
 
@@ -366,16 +354,6 @@ class FalsePositivesTest(test.TestCase):
         r'Threshold values must be in \[0, 1\]. Invalid values: \[-1, 2\]'):
       metrics.FalsePositives(thresholds=[-1, 0.5, 2])
 
-  def test_reset_states(self):
-    fp_obj = metrics.FalsePositives()
-    model = _get_simple_sequential_model([fp_obj])
-    x = np.ones((100, 4))
-    y = np.zeros((100, 1))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(fp_obj.accumulator), 100.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(fp_obj.accumulator), 100.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class FalseNegativesTest(test.TestCase):
@@ -438,16 +416,6 @@ class FalseNegativesTest(test.TestCase):
     result = fn_obj(y_true, y_pred, sample_weight=sample_weight)
     self.assertAllClose([4., 16., 23.], self.evaluate(result))
 
-  def test_reset_states(self):
-    fn_obj = metrics.FalseNegatives()
-    model = _get_simple_sequential_model([fn_obj])
-    x = np.zeros((100, 4))
-    y = np.ones((100, 1))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(fn_obj.accumulator), 100.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(fn_obj.accumulator), 100.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class TrueNegativesTest(test.TestCase):
@@ -510,16 +478,6 @@ class TrueNegativesTest(test.TestCase):
     result = tn_obj(y_true, y_pred, sample_weight=sample_weight)
     self.assertAllClose([5., 15., 23.], self.evaluate(result))
 
-  def test_reset_states(self):
-    tn_obj = metrics.TrueNegatives()
-    model = _get_simple_sequential_model([tn_obj])
-    x = np.zeros((100, 4))
-    y = np.zeros((100, 1))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(tn_obj.accumulator), 100.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(tn_obj.accumulator), 100.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class TruePositivesTest(test.TestCase):
@@ -581,16 +539,6 @@ class TruePositivesTest(test.TestCase):
     result = tp_obj(y_true, y_pred, sample_weight=37.)
     self.assertAllClose([222., 111., 37.], self.evaluate(result))
 
-  def test_reset_states(self):
-    tp_obj = metrics.TruePositives()
-    model = _get_simple_sequential_model([tp_obj])
-    x = np.ones((100, 4))
-    y = np.ones((100, 1))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(tp_obj.accumulator), 100.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(tp_obj.accumulator), 100.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class PrecisionTest(test.TestCase):
@@ -703,18 +651,6 @@ class PrecisionTest(test.TestCase):
     self.assertArrayNear([expected_precision, 0], self.evaluate(p_obj.result()),
                          1e-3)
 
-  def test_reset_states(self):
-    p_obj = metrics.Precision()
-    model = _get_simple_sequential_model([p_obj])
-    x = np.concatenate((np.ones((50, 4)), np.ones((50, 4))))
-    y = np.concatenate((np.ones((50, 1)), np.zeros((50, 1))))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(p_obj.tp), 50.)
-    self.assertEqual(self.evaluate(p_obj.fp), 50.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(p_obj.tp), 50.)
-    self.assertEqual(self.evaluate(p_obj.fp), 50.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class RecallTest(test.TestCase):
@@ -826,18 +762,6 @@ class RecallTest(test.TestCase):
     self.assertArrayNear([expected_recall, 0], self.evaluate(r_obj.result()),
                          1e-3)
 
-  def test_reset_states(self):
-    r_obj = metrics.Recall()
-    model = _get_simple_sequential_model([r_obj])
-    x = np.concatenate((np.ones((50, 4)), np.zeros((50, 4))))
-    y = np.concatenate((np.ones((50, 1)), np.ones((50, 1))))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(r_obj.tp), 50.)
-    self.assertEqual(self.evaluate(r_obj.fn), 50.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(r_obj.tp), 50.)
-    self.assertEqual(self.evaluate(r_obj.fn), 50.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class SensitivityAtSpecificityTest(test.TestCase, parameterized.TestCase):
@@ -927,24 +851,6 @@ class SensitivityAtSpecificityTest(test.TestCase, parameterized.TestCase):
     with self.assertRaisesRegexp(ValueError, '`num_thresholds` must be > 0.'):
       metrics.SensitivityAtSpecificity(0.4, num_thresholds=-1)
 
-  def test_reset_states(self):
-    s_obj = metrics.SensitivityAtSpecificity(0.5, num_thresholds=1)
-    model = _get_simple_sequential_model([s_obj])
-    x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)),
-                        np.ones((25, 4))))
-    y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)),
-                        np.zeros((25, 1))))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(s_obj.tp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fn), 25.)
-    self.assertEqual(self.evaluate(s_obj.tn), 25.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(s_obj.tp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fn), 25.)
-    self.assertEqual(self.evaluate(s_obj.tn), 25.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class SpecificityAtSensitivityTest(test.TestCase, parameterized.TestCase):
@@ -1034,24 +940,6 @@ class SpecificityAtSensitivityTest(test.TestCase, parameterized.TestCase):
     with self.assertRaisesRegexp(ValueError, '`num_thresholds` must be > 0.'):
       metrics.SpecificityAtSensitivity(0.4, num_thresholds=-1)
 
-  def test_reset_states(self):
-    s_obj = metrics.SpecificityAtSensitivity(0.5, num_thresholds=1)
-    model = _get_simple_sequential_model([s_obj])
-    x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)),
-                        np.ones((25, 4))))
-    y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)),
-                        np.zeros((25, 1))))
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(s_obj.tp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fn), 25.)
-    self.assertEqual(self.evaluate(s_obj.tn), 25.)
-    model.evaluate(x, y)
-    self.assertEqual(self.evaluate(s_obj.tp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fp), 25.)
-    self.assertEqual(self.evaluate(s_obj.fn), 25.)
-    self.assertEqual(self.evaluate(s_obj.tn), 25.)
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class CosineProximityTest(test.TestCase):
@@ -1086,5 +974,125 @@ class CosineProximityTest(test.TestCase):
     result = cosine_obj(y_true, y_pred, sample_weight=sample_weight)
     self.assertAllClose(-0.59916, self.evaluate(result), atol=1e-5)
 
+
+def _get_model(compile_metrics):
+  model_layers = [
+      layers.Dense(3, activation='relu', kernel_initializer='ones'),
+      layers.Dense(1, activation='sigmoid', kernel_initializer='ones')]
+
+  model = testing_utils.get_model_from_layers(model_layers, input_shape=(4,))
+  model.compile(
+      loss='mae',
+      metrics=compile_metrics,
+      optimizer=RMSPropOptimizer(learning_rate=0.001),
+      run_eagerly=testing_utils.should_run_eagerly())
+  return model
+
+
+@keras_parameterized.run_with_all_model_types
+@keras_parameterized.run_all_keras_modes
+class ResetStatesTest(keras_parameterized.TestCase):
+
+  def test_reset_states_false_positives(self):
+    fp_obj = metrics.FalsePositives()
+    model = _get_model([fp_obj])
+    x = np.ones((100, 4))
+    y = np.zeros((100, 1))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(fp_obj.accumulator), 100.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(fp_obj.accumulator), 100.)
+
+  def test_reset_states_false_negatives(self):
+    fn_obj = metrics.FalseNegatives()
+    model = _get_model([fn_obj])
+    x = np.zeros((100, 4))
+    y = np.ones((100, 1))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(fn_obj.accumulator), 100.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(fn_obj.accumulator), 100.)
+
+  def test_reset_states_true_negatives(self):
+    tn_obj = metrics.TrueNegatives()
+    model = _get_model([tn_obj])
+    x = np.zeros((100, 4))
+    y = np.zeros((100, 1))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(tn_obj.accumulator), 100.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(tn_obj.accumulator), 100.)
+
+  def test_reset_states_true_positives(self):
+    tp_obj = metrics.TruePositives()
+    model = _get_model([tp_obj])
+    x = np.ones((100, 4))
+    y = np.ones((100, 1))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(tp_obj.accumulator), 100.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(tp_obj.accumulator), 100.)
+
+  def test_reset_states_precision(self):
+    p_obj = metrics.Precision()
+    model = _get_model([p_obj])
+    x = np.concatenate((np.ones((50, 4)), np.ones((50, 4))))
+    y = np.concatenate((np.ones((50, 1)), np.zeros((50, 1))))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(p_obj.tp), 50.)
+    self.assertEqual(self.evaluate(p_obj.fp), 50.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(p_obj.tp), 50.)
+    self.assertEqual(self.evaluate(p_obj.fp), 50.)
+
+  def test_reset_states_recall(self):
+    r_obj = metrics.Recall()
+    model = _get_model([r_obj])
+    x = np.concatenate((np.ones((50, 4)), np.zeros((50, 4))))
+    y = np.concatenate((np.ones((50, 1)), np.ones((50, 1))))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(r_obj.tp), 50.)
+    self.assertEqual(self.evaluate(r_obj.fn), 50.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(r_obj.tp), 50.)
+    self.assertEqual(self.evaluate(r_obj.fn), 50.)
+
+  def test_reset_states_sensitivity_at_specificity(self):
+    s_obj = metrics.SensitivityAtSpecificity(0.5, num_thresholds=1)
+    model = _get_model([s_obj])
+    x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)),
+                        np.ones((25, 4))))
+    y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)),
+                        np.zeros((25, 1))))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(s_obj.tp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fn), 25.)
+    self.assertEqual(self.evaluate(s_obj.tn), 25.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(s_obj.tp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fn), 25.)
+    self.assertEqual(self.evaluate(s_obj.tn), 25.)
+
+  def test_reset_states_specificity_at_sensitivity(self):
+    s_obj = metrics.SpecificityAtSensitivity(0.5, num_thresholds=1)
+    model = _get_model([s_obj])
+    x = np.concatenate((np.ones((25, 4)), np.zeros((25, 4)), np.zeros((25, 4)),
+                        np.ones((25, 4))))
+    y = np.concatenate((np.ones((25, 1)), np.zeros((25, 1)), np.ones((25, 1)),
+                        np.zeros((25, 1))))
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(s_obj.tp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fn), 25.)
+    self.assertEqual(self.evaluate(s_obj.tn), 25.)
+    model.evaluate(x, y)
+    self.assertEqual(self.evaluate(s_obj.tp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fp), 25.)
+    self.assertEqual(self.evaluate(s_obj.fn), 25.)
+    self.assertEqual(self.evaluate(s_obj.tn), 25.)
+
+
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 4b83eaab63f3082cbee21a35dac1a2f89821b7f3 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 13 Dec 2018 11:01:36 -0800
Subject: [PATCH 513/873] Validate buffer in
 tflite::FlatBufferModel::BuildFromBuffer.

PiperOrigin-RevId: 225400229
---
 tensorflow/lite/model.cc | 18 ++++++++++++++++++
 tensorflow/lite/model.h  | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/tensorflow/lite/model.cc b/tensorflow/lite/model.cc
index 5ac0532afe..831c81aa00 100644
--- a/tensorflow/lite/model.cc
+++ b/tensorflow/lite/model.cc
@@ -121,6 +121,24 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromBuffer(
   return model;
 }
 
+std::unique_ptr<FlatBufferModel> FlatBufferModel::VerifyAndBuildFromBuffer(
+    const char* buffer, size_t buffer_size, TfLiteVerifier* verifier,
+    ErrorReporter* error_reporter) {
+  error_reporter = ValidateErrorReporter(error_reporter);
+
+  flatbuffers::Verifier base_verifier(reinterpret_cast<const uint8_t*>(buffer),
+                                      buffer_size);
+  if (!VerifyModelBuffer(base_verifier)) {
+    return nullptr;
+  }
+
+  if (verifier && !verifier->Verify(buffer, buffer_size, error_reporter)) {
+    return nullptr;
+  }
+
+  return BuildFromBuffer(buffer, buffer_size, error_reporter);
+}
+
 std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromModel(
     const tflite::Model* model_spec, ErrorReporter* error_reporter) {
   error_reporter = ValidateErrorReporter(error_reporter);
diff --git a/tensorflow/lite/model.h b/tensorflow/lite/model.h
index 01e7c68205..58c9767849 100644
--- a/tensorflow/lite/model.h
+++ b/tensorflow/lite/model.h
@@ -80,10 +80,28 @@ class FlatBufferModel {
   // is destroyed. Caller retains ownership of `error_reporter` and must ensure
   // its lifetime is longer than the FlatBufferModel instance.
   // Returns a nullptr in case of failure.
+  // NOTE: this does NOT validate the buffer so it should NOT be called on
+  // invalid/untrusted input. Use VerifyAndBuildFromBuffer in that case
   static std::unique_ptr<FlatBufferModel> BuildFromBuffer(
       const char* buffer, size_t buffer_size,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
+  // Verifies whether the content of the buffer is legit, then builds a model
+  // based on the pre-loaded flatbuffer.
+  // The verifier argument is an additional optional verifier for the buffer. By
+  // default, we always check with tflite::VerifyModelBuffer. If verifier is
+  // supplied, the buffer is checked against the verifier after the check
+  // against tflite::VerifyModelBuilder.
+  // The caller retains ownership of the buffer and should keep it alive until
+  // the returned object is destroyed. Caller retains ownership of
+  // `error_reporter` and must ensure its lifetime is longer than the
+  // FlatBufferModel instance.
+  // Returns a nullptr in case of failure.
+  static std::unique_ptr<FlatBufferModel> VerifyAndBuildFromBuffer(
+      const char* buffer, size_t buffer_size,
+      TfLiteVerifier* verifier = nullptr,
+      ErrorReporter* error_reporter = DefaultErrorReporter());
+
   // Builds a model directly from a flatbuffer pointer. The caller retains
   // ownership of the buffer and should keep it alive until the returned object
   // is destroyed. Caller retains ownership of `error_reporter` and must ensure
-- 
GitLab


From 51f685c97544b2789efe4db6808e057a2b4b6090 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 13 Dec 2018 11:05:34 -0800
Subject: [PATCH 514/873] Add a workaround for reference cycles created by
 newer versions of astor.

PiperOrigin-RevId: 225401162
---
 tensorflow/python/autograph/pyct/compiler.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/python/autograph/pyct/compiler.py b/tensorflow/python/autograph/pyct/compiler.py
index 06e66c5b58..aa4fd551ec 100644
--- a/tensorflow/python/autograph/pyct/compiler.py
+++ b/tensorflow/python/autograph/pyct/compiler.py
@@ -67,6 +67,12 @@ def ast_to_source(node, indentation='  '):
       trimmed_code_lines.append(l)
   code = '\n'.join(trimmed_code_lines)
 
+  # Work around the reference cycle generated by astor.
+  # See https://github.com/berkerpeksag/astor/blob/55dd323f7d8d696610c703c0296763c567685c31/astor/code_gen.py#L162  # pylint:disable=line-too-long
+  # Reference cycles are quite disliked by TensorFlow's tests.
+  if hasattr(generator, 'write'):
+    generator.write = None
+
   return code
 
 
-- 
GitLab


From 99313dd8feac0c20dde96982da851ac5235e3aa3 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 13 Dec 2018 11:07:26 -0800
Subject: [PATCH 515/873] Fix uninitialized memory under fuzz for
 CheckNumerics.

PiperOrigin-RevId: 225401514
---
 tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
index bcd299e308..62d39895a4 100644
--- a/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
@@ -35,7 +35,7 @@ class FuzzCheckNumerics : public FuzzSession {
     const float* float_data = reinterpret_cast<const float*>(data);
 
     Tensor input_tensor(tensorflow::DT_FLOAT,
-                        TensorShape({static_cast<int64>(size)}));
+                        TensorShape({static_cast<int64>(num_floats)}));
     auto flat_tensor = input_tensor.flat<float>();
     for (size_t i = 0; i < num_floats; i++) {
       flat_tensor(i) = float_data[i];
-- 
GitLab


From 8596df458628dd65399e1a83f1788a6d88572c88 Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Thu, 13 Dec 2018 11:10:28 -0800
Subject: [PATCH 516/873] Add `serving_only` option to save_keras_model,
 allowing subclassed models to be saved.

PiperOrigin-RevId: 225402096
---
 .../python/saved_model/keras_saved_model.py   | 174 +++++++++++-------
 .../saved_model/keras_saved_model_test.py     |  81 +++++++-
 2 files changed, 184 insertions(+), 71 deletions(-)

diff --git a/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model.py b/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model.py
index ffba514bb9..2a4b6eae36 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model.py
+++ b/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model.py
@@ -22,53 +22,57 @@ import os
 import six
 
 from tensorflow.python.client import session
-from tensorflow.python.estimator import keras as estimator_keras_util
-from tensorflow.python.estimator import model_fn as model_fn_lib
-from tensorflow.python.estimator.export import export as export_helpers
 from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import models as models_lib
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.engine import sequential
+from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.metrics import Metric
 from tensorflow.python.keras.models import model_from_json
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import variables
-from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.saved_model import builder as saved_model_builder
 from tensorflow.python.saved_model import constants
+from tensorflow.python.saved_model import save as save_lib
 from tensorflow.python.saved_model import utils_impl as saved_model_utils
 from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training.checkpointable import util as checkpointable_utils
 from tensorflow.python.util import compat
+from tensorflow.python.util import nest
+from tensorflow_estimator.python.estimator import keras as estimator_keras_util
+from tensorflow_estimator.python.estimator import model_fn as model_fn_lib
+from tensorflow_estimator.python.estimator.export import export as export_helpers
 
 
 def save_keras_model(
-    model, saved_model_path, custom_objects=None, as_text=None):
-  """Save a `tf.keras.Model` into Tensorflow SavedModel format.
+    model, saved_model_path, custom_objects=None, as_text=None,
+    input_signature=None, serving_only=False):
+  """Saves a `tf.keras.Model` into Tensorflow SavedModel format.
 
   `save_model` generates new files/folders under the `saved_model_path` folder:
-  1) an asset folder containing the json string of the model's
-     configuration (topology).
-  2) a checkpoint containing the model weights.
-  3) a saved_model.pb file containing the model's MetaGraphs. The prediction
+  1) a checkpoint containing the model weights.
+  2) a saved_model.pb file containing the model's MetaGraphs. The prediction
      graph is always exported. The evaluaton and training graphs are exported
      if the following conditions are met:
      - Evaluation: model loss is defined.
      - Training: model is compiled with an optimizer defined under `tf.train`.
        This is because `tf.keras.optimizers.Optimizer` instances cannot be
        saved to checkpoints.
-
-  Model Requirements:
-  - Model must be a sequential model or functional model. Subclassed models can
-    not be saved via this function, unless you provide an implementation for
-    get_config() and from_config().
-  - All variables must be saveable by the model. In general, this condition is
-    met through the use of layers defined in the keras library. However,
-    there is currently a bug with variables created in Lambda layer functions
-    not being saved correctly (see
-    https://github.com/keras-team/keras/issues/9740).
+  3) Model's json configuration, if model.get_config() has been implemented.
+     This file can be used to reload the model using
+     tf.keras.models.model_from_json(). Note that if any custom objects were
+     used, they should be passed to the `custom_object` argument when loading
+     the model.
+
+  Model limitations:
+  - Sequential and functional models can always be saved.
+  - Subclassed models can only be saved when `serving_only=True`. This is due to
+    the current implementation copying the model in order to export the training
+    and evaluation graphs. Because the topology of subclassed models cannot be
+    determined, the subclassed models cannot be cloned. Subclassed models will
+    be entirely exportable in the future.
 
   Note that each mode is exported in separate graphs, so different modes do not
   share variables. To use the train graph with evaluation or prediction graphs,
@@ -94,38 +98,88 @@ def save_keras_model(
   ```
 
   Args:
-    model: A `tf.keras.Model` to be saved.
+    model: A `tf.keras.Model` to be saved. If the model is subclassed, the flag
+      `serving_only` must be set to True.
     saved_model_path: a string specifying the path to the SavedModel directory.
       The SavedModel will be saved to a timestamped folder created within this
       directory.
     custom_objects: Optional dictionary mapping string names to custom classes
       or functions (e.g. custom loss functions).
-    as_text: whether to write the `SavedModel` proto in text format.
+    as_text: whether to write the `SavedModel` proto in text format. Currently
+      unavailable in serving-only mode.
+    input_signature: A possibly nested sequence of `tf.TensorSpec` objects, used
+      to specify the expected model inputs. `input_signature`'s nested structure
+      should match the expected nested structure of the inputs to the model. If
+      this is not set, this function will attempt to infer the input shapes and
+      dtypes from the model. Note that if the model is subclassed, the tensor
+      inputs to the call function should be nested in the first argument (this
+      is a general requirement for using subclassed models with Keras functions
+      .fit(), .predict(), etc.).
+    serving_only: Export only the outputs produced from calling the model in
+      predict mode. The losses, optimizer, and other training configurations are
+      not saved. If the SavedModel will only be used for serving (rather than
+      retraining), or if the model is subclassed, this can be set to True.
 
   Returns:
     String path to the SavedModel folder, a subdirectory of `saved_model_path`.
 
   Raises:
-    NotImplementedError: If the model is a subclassed model.
-    ValueError: If a Sequential model does not have input shapes defined by the
-      user, and is not built.
+    NotImplementedError: If the model is a subclassed model, and serving_only is
+      False.
+    ValueError: If the input signature cannot be inferred from the model.
   """
+  export_dir = export_helpers.get_timestamped_export_dir(saved_model_path)
+
+  if serving_only:
+    save_lib.save(
+        model, export_dir,
+        signatures=training_utils.trace_model_call(model, input_signature))
+  else:
+    _save_v1_format(model, export_dir, custom_objects, as_text, input_signature)
+
+  try:
+    _export_model_json(model, export_dir)
+  except NotImplementedError:
+    logging.warning('Skipped saving model JSON, subclassed model does not have '
+                    'get_config() defined.')
+
+  return export_dir
+
+
+def _export_model_json(model, saved_model_path):
+  """Saves model configuration as a json string under assets folder."""
+  model_json = model.to_json()
+  model_json_filepath = os.path.join(
+      saved_model_utils.get_or_create_assets_dir(saved_model_path),
+      compat.as_text(constants.SAVED_MODEL_FILENAME_JSON))
+  file_io.write_string_to_file(model_json_filepath, model_json)
+
+
+def _export_model_variables(model, saved_model_path):
+  """Saves model weights in checkpoint format under variables folder."""
+  saved_model_utils.get_or_create_variables_dir(saved_model_path)
+  checkpoint_prefix = saved_model_utils.get_variables_path(saved_model_path)
+  model.save_weights(checkpoint_prefix, save_format='tf', overwrite=True)
+  return checkpoint_prefix
+
+
+def _save_v1_format(model, path, custom_objects, as_text, input_signature):
+  """Exports model to v1 SavedModel format."""
   if not model._is_graph_network:
     if isinstance(model, sequential.Sequential):
       # If input shape is not directly set in the model, the exported model
-      # will assume that the inputs have the same shape as the shape the model
-      # was built model with.
-      if not model.built:
+      # will infer the expected shapes of the input from the model.
+      if not model.built and input_signature is None:
         raise ValueError(
-            'Sequential model must be built before it can be exported.')
+            'Sequential model\'s input shape is unknown. Please build the '
+            'model, or use the input_signature argument to specify the '
+            'model inputs.')
     else:
       raise NotImplementedError(
-          'Exporting subclassed models is not yet supported.')
+          'Subclassed models can only be exported for serving. Please set '
+          'argument serving_only=True.')
 
-  export_dir = export_helpers.get_timestamped_export_dir(saved_model_path)
-  temp_export_dir = export_helpers.get_temp_export_dir(export_dir)
-
-  builder = saved_model_builder._SavedModelBuilder(temp_export_dir)
+  builder = saved_model_builder._SavedModelBuilder(path)
 
   # Manually save variables to export them in an object-based checkpoint. This
   # skips the `builder.add_meta_graph_and_variables()` step, which saves a
@@ -133,7 +187,7 @@ def save_keras_model(
   # TODO(b/113134168): Add fn to Builder to save with object-based saver.
   # TODO(b/113178242): This should only export the model json structure. Only
   # one save is needed once the weights can be copied from the model to clone.
-  checkpoint_path = _export_model_json_and_variables(model, temp_export_dir)
+  checkpoint_path = _export_model_variables(model, path)
 
   # Export each mode. Use ModeKeys enums defined for `Estimator` to ensure that
   # Keras models and `Estimator`s are exported with the same format.
@@ -143,10 +197,12 @@ def save_keras_model(
   export_args = {'builder': builder,
                  'model': model,
                  'custom_objects': custom_objects,
-                 'checkpoint_path': checkpoint_path}
+                 'checkpoint_path': checkpoint_path,
+                 'input_signature': input_signature}
 
   has_saved_vars = False
   if model.optimizer:
+    # TODO(kathywu): Verify this works with v2 optimizer.
     if isinstance(model.optimizer, optimizers.TFOptimizer):
       _export_mode(model_fn_lib.ModeKeys.TRAIN, has_saved_vars, **export_args)
       has_saved_vars = True
@@ -161,34 +217,20 @@ def save_keras_model(
 
   builder.save(as_text)
 
-  gfile.Rename(temp_export_dir, export_dir)
-  return export_dir
-
-
-def _export_model_json_and_variables(model, saved_model_path):
-  """Save model variables and json structure into SavedModel subdirectories."""
-  # Save model configuration as a json string under assets folder.
-  model_json = model.to_json()
-  model_json_filepath = os.path.join(
-      saved_model_utils.get_or_create_assets_dir(saved_model_path),
-      compat.as_text(constants.SAVED_MODEL_FILENAME_JSON))
-  file_io.write_string_to_file(model_json_filepath, model_json)
-
-  # Save model weights in checkpoint format under variables folder.
-  saved_model_utils.get_or_create_variables_dir(saved_model_path)
-  checkpoint_prefix = saved_model_utils.get_variables_path(saved_model_path)
-  model.save_weights(checkpoint_prefix, save_format='tf', overwrite=True)
-  return checkpoint_prefix
-
 
 def _get_var_list(model):
-  """Return list of all checkpointed saveable objects in the model."""
+  """Returns list of all checkpointed saveable objects in the model."""
   return checkpointable_utils.named_saveables(model)
 
 
+def create_placeholder(spec):
+  return K.placeholder(shape=spec.shape, dtype=spec.dtype, name=spec.name)
+
+
 def _export_mode(
-    mode, has_saved_vars, builder, model, custom_objects, checkpoint_path):
-  """Export a model, and optionally save new vars from the clone model.
+    mode, has_saved_vars, builder, model, custom_objects, checkpoint_path,
+    input_signature):
+  """Exports a model, and optionally saves new vars from the clone model.
 
   Args:
     mode: A `tf.estimator.ModeKeys` string.
@@ -199,6 +241,8 @@ def _export_mode(
     custom_objects: A dictionary mapping string names to custom classes
       or functions.
     checkpoint_path: String path to checkpoint.
+    input_signature: Nested TensorSpec containing the expected inputs. Can be
+      `None`, in which case the signature will be inferred from the model.
 
   Raises:
     ValueError: If the train/eval mode is being exported, but the model does
@@ -214,10 +258,16 @@ def _export_mode(
 
     K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN)
 
+    if input_signature is None:
+      input_tensors = None
+    else:
+      input_tensors = nest.map_structure(create_placeholder, input_signature)
+
     # Clone the model into blank graph. This will create placeholders for inputs
     # and targets.
     clone = models_lib.clone_and_build_model(
-        model, custom_objects=custom_objects, compile_clone=compile_clone)
+        model, input_tensors=input_tensors, custom_objects=custom_objects,
+        compile_clone=compile_clone)
 
     # Make sure that iterations variable is added to the global step collection,
     # to ensure that, when the SavedModel graph is loaded, the iterations
@@ -271,7 +321,7 @@ def _export_mode(
 
 
 def _create_signature_def_map(model, mode):
-  """Create a SignatureDef map from a Keras model."""
+  """Creates a SignatureDef map from a Keras model."""
   inputs_dict = {name: x for name, x in zip(model.input_names, model.inputs)}
   if model.optimizer:
     targets_dict = {x.name.split(':')[0]: x
@@ -309,14 +359,14 @@ def _create_signature_def_map(model, mode):
 
 
 def _assert_same_non_optimizer_objects(model, model_graph, clone, clone_graph):  # pylint: disable=unused-argument
-  """Assert model and clone contain the same checkpointable objects."""
+  """Asserts model and clone contain the same checkpointable objects."""
 
   # TODO(fchollet, kathywu): make sure this works in eager mode.
   return True
 
 
 def load_keras_model(saved_model_path):
-  """Load a keras.Model from SavedModel.
+  """Loads a keras.Model from SavedModel.
 
   load_model reinstantiates model state by:
   1) loading model topology from json (this will eventually come
diff --git a/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py b/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py
index 93d73e1b48..fbf8138493 100644
--- a/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py
+++ b/tensorflow/contrib/saved_model/python/saved_model/keras_saved_model_test.py
@@ -29,7 +29,9 @@ from tensorflow.python import keras
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
 from tensorflow.python.estimator import model_fn as model_fn_lib
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.utils import tf_utils
@@ -215,7 +217,7 @@ class LayerWithLearningPhase(keras.engine.base_layer.Layer):
     return input_shape
 
 
-def functional_model(uses_learning_phase):
+def functional_model(uses_learning_phase=True):
   inputs = keras.layers.Input(shape=(3,))
   x = keras.layers.Dense(2)(inputs)
   x = keras.layers.Dense(3)(x)
@@ -224,7 +226,7 @@ def functional_model(uses_learning_phase):
   return keras.models.Model(inputs, x)
 
 
-def sequential_model(uses_learning_phase):
+def sequential_model(uses_learning_phase=True):
   model = keras.models.Sequential()
   model.add(keras.layers.Dense(2, input_shape=(3,)))
   model.add(keras.layers.Dense(3))
@@ -233,7 +235,7 @@ def sequential_model(uses_learning_phase):
   return model
 
 
-def sequential_model_without_input_shape(uses_learning_phase):
+def sequential_model_without_input_shape(uses_learning_phase=True):
   model = keras.models.Sequential()
   model.add(keras.layers.Dense(2))
   model.add(keras.layers.Dense(3))
@@ -242,10 +244,30 @@ def sequential_model_without_input_shape(uses_learning_phase):
   return model
 
 
+class Subclassed(keras.models.Model):
+
+  def __init__(self):
+    super(Subclassed, self).__init__()
+    self.dense1 = keras.layers.Dense(2)
+    self.dense2 = keras.layers.Dense(3)
+
+  def call(self, inputs):
+    x = self.dense1(inputs)
+    x = self.dense2(x)
+    return x
+
+
+def subclassed_model():
+  return Subclassed()
+
+
 def load_model(sess, path, mode):
   tags = model_fn_lib.EXPORT_TAG_MAP[mode]
-  sig_def_key = (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
-                 if mode == model_fn_lib.ModeKeys.PREDICT else mode)
+  if mode == model_fn_lib.ModeKeys.PREDICT:
+    sig_def_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+  else:
+    sig_def_key = mode
+
   meta_graph_def = loader_impl.load(sess, tags, path)
   inputs = {
       k: sess.graph.get_tensor_by_name(v.name)
@@ -463,13 +485,54 @@ class TestModelSavedModelExport(test.TestCase, parameterized.TestCase):
       clone.compile(loss='mse', optimizer=keras.optimizers.RMSprop(lr=0.0001))
       clone.train_on_batch(input_arr, target_arr)
 
-  def testSaveSeqModelWithoutInputShapesRaisesError(self):
-    """A Sequential model that hasn't been built should raise an error."""
+  def testSaveSequentialModelWithoutInputShapes(self):
     model = sequential_model_without_input_shape(True)
-    with self.assertRaisesRegexp(
-        ValueError, 'must be built'):
+    # A Sequential model that hasn't been built should raise an error.
+    with self.assertRaisesRegexp(ValueError, 'Please build the model'):
       keras_saved_model.save_keras_model(model, '')
 
+    saved_model_path = self._save_model_dir()
+    output_path = keras_saved_model.save_keras_model(
+        model, saved_model_path,
+        input_signature=tensor_spec.TensorSpec(shape=(10, 11, 12, 13, 14),
+                                               dtype=dtypes.float32,
+                                               name='spec_input'))
+
+    with session.Session(graph=ops.Graph()) as sess:
+      inputs, outputs, _ = load_model(sess, output_path,
+                                      model_fn_lib.ModeKeys.PREDICT)
+      self.assertEqual(5, inputs[next(iter(inputs.keys()))].shape.ndims)
+      self.assertEqual(5, outputs[next(iter(outputs.keys()))].shape.ndims)
+      self.assertEqual(3, outputs[next(iter(outputs.keys()))].shape[-1])
+
+  @test_util.run_v2_only
+  @parameterized.parameters(
+      {
+          'model_builder': sequential_model_without_input_shape,
+          'input_signature': [tensor_spec.TensorSpec(shape=[None, 3],
+                                                     dtype=dtypes.float32)]},
+      {
+          'model_builder': subclassed_model,
+          'input_signature': [tensor_spec.TensorSpec(shape=[None, 3],
+                                                     dtype=dtypes.float32)]})
+  def testServingOnly(self, model_builder, input_signature):
+    saved_model_path = self._save_model_dir()
+    input_arr = np.random.random((5, 3)).astype(np.float32)
+    model = model_builder()
+    ref_predict = model.predict(input_arr)
+
+    output_path = keras_saved_model.save_keras_model(
+        model, saved_model_path, serving_only=True,
+        input_signature=input_signature)
+
+    # Load predict graph, and test predictions
+    with session.Session(graph=ops.Graph()) as sess:
+      inputs, outputs, _ = load_model(sess, output_path,
+                                      model_fn_lib.ModeKeys.PREDICT)
+      predictions = sess.run(outputs[next(iter(outputs.keys()))],
+                             {inputs[next(iter(inputs.keys()))]: input_arr})
+      self.assertAllClose(ref_predict, predictions, atol=1e-05)
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 976b23c98c32fc958da745a897d0e1cde484cc0a Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Thu, 13 Dec 2018 11:12:15 -0800
Subject: [PATCH 517/873] Internal change.

PiperOrigin-RevId: 225402420
---
 tensorflow/contrib/distribute/python/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 4c9c35da5a..322c02c210 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -611,6 +611,8 @@ cuda_py_test(
         "no_oss",  # TODO(b/117919883): Fix python error.
         "no_pip",
         "no_windows_gpu",
+        # TODO(b/120943676): Re-enable after fixing InvalidArgumentError.
+        "noguitar",
         "notsan",
     ],
 )
-- 
GitLab


From e8f8e219f7d9f0996bbda49be143718843ec5295 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Thu, 13 Dec 2018 11:20:53 -0800
Subject: [PATCH 518/873] [tf.data] Adds coverage for data experimental tests.

PiperOrigin-RevId: 225403946
---
 .../python/data/experimental/benchmarks/BUILD |  30 ++
 .../benchmarks/map_defun_benchmark.py         |  73 ++++
 .../benchmarks/optimize_benchmark.py          |  41 ++
 .../rejection_resample_benchmark.py           |  71 ++++
 .../benchmarks/unbatch_benchmark.py           |   2 +-
 .../data/experimental/kernel_tests/BUILD      |  28 +-
 .../kernel_tests/cardinality_test.py          |   2 +
 .../kernel_tests/copy_to_device_test.py       |   1 +
 .../experimental/kernel_tests/counter_test.py |  36 +-
 .../directed_interleave_dataset_test.py       |  46 +--
 .../kernel_tests/enumerate_dataset_test.py    |  25 +-
 .../kernel_tests/filter_dataset_op_test.py    |  76 ----
 .../kernel_tests/get_single_element_test.py   |  35 +-
 .../kernel_tests/group_by_reducer_test.py     |  63 ++-
 .../kernel_tests/group_by_window_test.py      | 367 ++++++++----------
 .../kernel_tests/ignore_errors_test.py        |  80 ++--
 .../kernel_tests/map_defun_op_test.py         |  43 +-
 .../kernel_tests/matching_files_test.py       | 113 ++----
 .../kernel_tests/override_threadpool_test.py  |   9 +-
 .../kernel_tests/prefetch_to_device_test.py   |   1 +
 .../kernel_tests/rejection_resample_test.py   |  88 +----
 .../kernel_tests/restructured_dataset_test.py |   1 +
 .../experimental/kernel_tests/scan_test.py    | 102 ++---
 .../kernel_tests/shuffle_and_repeat_test.py   |  31 +-
 .../experimental/kernel_tests/sleep_test.py   |  22 +-
 .../kernel_tests/stats_dataset_ops_test.py    | 356 ++++++++---------
 .../kernel_tests/stats_dataset_test_base.py   |  39 +-
 .../kernel_tests/tf_record_writer_test.py     |  47 +--
 .../experimental/kernel_tests/unbatch_test.py | 117 ++----
 .../experimental/kernel_tests/unique_test.py  |  20 +-
 .../kernel_tests/wrap_unwrap_test.py          |  15 +-
 .../python/data/kernel_tests/test_base.py     |   5 +
 32 files changed, 862 insertions(+), 1123 deletions(-)
 create mode 100644 tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py
 create mode 100644 tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py
 delete mode 100644 tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py

diff --git a/tensorflow/python/data/experimental/benchmarks/BUILD b/tensorflow/python/data/experimental/benchmarks/BUILD
index 8175116c6e..651dfd6857 100644
--- a/tensorflow/python/data/experimental/benchmarks/BUILD
+++ b/tensorflow/python/data/experimental/benchmarks/BUILD
@@ -58,6 +58,22 @@ py_test(
     ],
 )
 
+py_test(
+    name = "map_defun_benchmark",
+    srcs = ["map_defun_benchmark.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:functional_ops",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:tensor_spec",
+        "//tensorflow/python/data/experimental/ops:map_defun",
+        "//tensorflow/python/eager:function",
+    ],
+)
+
 py_test(
     name = "map_vectorization_benchmark",
     srcs = ["map_vectorization_benchmark.py"],
@@ -108,6 +124,20 @@ py_test(
     ],
 )
 
+py_test(
+    name = "rejection_resample_benchmark",
+    srcs = ["rejection_resample_benchmark.py"],
+    srcs_version = "PY2AND3",
+    tags = ["no_pip"],
+    deps = [
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python/data/experimental/ops:resampling",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
+    ],
+)
+
 py_test(
     name = "unbatch_benchmark",
     srcs = ["unbatch_benchmark.py"],
diff --git a/tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py b/tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py
new file mode 100644
index 0000000000..21e7ddaf7b
--- /dev/null
+++ b/tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py
@@ -0,0 +1,73 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmarks for MapDefunOp."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+from tensorflow.python.data.experimental.ops import map_defun
+from tensorflow.python.eager import function
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import functional_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+# TODO(b/119837791): Add eager benchmarks too.
+class MapDefunBenchmark(test.Benchmark):
+  """Benchmarks for MapDefunOp."""
+
+  def _run(self, op, name=None, num_iters=3000):
+    for _ in range(5):
+      self.evaluate(op)
+    start = time.time()
+    for _ in range(num_iters):
+      self.evaluate(op)
+    end = time.time()
+    mean_us = (end - start) * 1e6 / num_iters
+    self.report_benchmark(
+        name=name,
+        iters=num_iters,
+        wall_time=mean_us,
+        extras={"examples_per_sec": num_iters / (end - start)})
+
+  def benchmarkDefunVsMapFn(self):
+    """Benchmarks to compare the performance of MapDefun vs tf.map_fn."""
+
+    @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.int32)])
+    def defun(x):
+      return array_ops.identity(x)
+
+    def map_fn(x):
+      return array_ops.identity(x)
+
+    base = math_ops.range(100)
+    for input_size in [10, 100, 1000, 10000]:
+      num_iters = 100000 // input_size
+      map_defun_op = map_defun.map_defun(defun, [base], [dtypes.int32], [()])
+      map_fn_op = functional_ops.map_fn(map_fn, base)
+
+      self._run(
+          map_defun_op, "with_defun_size_%d" % input_size, num_iters=num_iters)
+      self._run(
+          map_fn_op, "without_defun_size_%d" % input_size, num_iters=num_iters)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py b/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
index 2f9b89111f..5df57a370c 100644
--- a/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
@@ -28,6 +28,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+# TODO(b/119837791): Add eager benchmarks too.
 class OptimizationBenchmark(test.Benchmark):
   """Benchmarks for static optimizations."""
 
@@ -115,6 +116,46 @@ class OptimizationBenchmark(test.Benchmark):
             name="map_and_filter_fusion_{}_chain_length_{}".format(
                 opt_mark, chain_length))
 
+  # This benchmark compares the performance of pipeline with multiple chained
+  # filter with and without filter fusion.
+  def benchmarkFilterFusion(self):
+    chain_lengths = [0, 1, 2, 5, 10, 20, 50]
+    for chain_length in chain_lengths:
+      self._benchmarkFilters(chain_length, False)
+      self._benchmarkFilters(chain_length, True)
+
+  def _benchmarkFilterFusion(self, chain_length, optimize_dataset):
+    with ops.Graph().as_default():
+      dataset = dataset_ops.Dataset.from_tensors(5).repeat(None)
+      for _ in range(chain_length):
+        dataset = dataset.filter(lambda x: math_ops.greater_equal(x - 5, 0))
+      if optimize_dataset:
+        options = dataset_ops.Options()
+        options.experimental_filter_fusion = True
+        dataset = dataset.with_options(options)
+
+      iterator = dataset_ops.make_one_shot_iterator(dataset)
+      next_element = iterator.get_next()
+
+      for _ in range(10):
+        self.evaluate(next_element.op)
+      deltas = []
+      for _ in range(100):
+        start = time.time()
+        for _ in range(100):
+          self.evaluate(next_element.op)
+        end = time.time()
+        deltas.append(end - start)
+
+      median_wall_time = np.median(deltas) / 100
+      opt_mark = "opt" if optimize_dataset else "no-opt"
+      print("Filter dataset {} chain length: {} Median wall time: {}".format(
+          opt_mark, chain_length, median_wall_time))
+      self.report_benchmark(
+          iters=1000,
+          wall_time=median_wall_time,
+          name="chain_length_{}_{}".format(opt_mark, chain_length))
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py b/tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py
new file mode 100644
index 0000000000..4cd8c4b73a
--- /dev/null
+++ b/tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py
@@ -0,0 +1,71 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmarks for `tf.data.experimental.rejection_resample()`."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+
+from tensorflow.python.data.experimental.ops import resampling
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.platform import test
+
+
+def _time_resampling(
+    test_obj, data_np, target_dist, init_dist, num_to_sample):
+  dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()
+
+  # Reshape distribution via rejection sampling.
+  dataset = dataset.apply(
+      resampling.rejection_resample(
+          class_func=lambda x: x,
+          target_dist=target_dist,
+          initial_dist=init_dist,
+          seed=142))
+
+  get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
+
+  with test_obj.test_session() as sess:
+    start_time = time.time()
+    for _ in xrange(num_to_sample):
+      sess.run(get_next)
+    end_time = time.time()
+
+  return end_time - start_time
+
+
+class RejectionResampleBenchmark(test.Benchmark):
+  """Benchmarks for `tf.data.experimental.rejection_resample()`."""
+
+  def benchmarkResamplePerformance(self):
+    init_dist = [0.25, 0.25, 0.25, 0.25]
+    target_dist = [0.0, 0.0, 0.0, 1.0]
+    num_classes = len(init_dist)
+    # We don't need many samples to test a dirac-delta target distribution
+    num_samples = 1000
+    data_np = np.random.choice(num_classes, num_samples, p=init_dist)
+
+    resample_time = _time_resampling(
+        self, data_np, target_dist, init_dist, num_to_sample=1000)
+
+    self.report_benchmark(iters=1000, wall_time=resample_time, name="resample")
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/experimental/benchmarks/unbatch_benchmark.py b/tensorflow/python/data/experimental/benchmarks/unbatch_benchmark.py
index c36a32534d..6f80df50b8 100644
--- a/tensorflow/python/data/experimental/benchmarks/unbatch_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/unbatch_benchmark.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for `tf.data.experimental.unbatch()`."""
+"""Benchmarks for `tf.data.experimental.unbatch()`."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD
index 548eb422ed..9362a3e8eb 100644
--- a/tensorflow/python/data/experimental/kernel_tests/BUILD
+++ b/tensorflow/python/data/experimental/kernel_tests/BUILD
@@ -1,12 +1,12 @@
+load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+
 package(default_visibility = ["//tensorflow:internal"])
 
 licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-load("//tensorflow:tensorflow.bzl", "py_test")
-
 py_test(
     name = "bucket_by_sequence_length_test",
     size = "medium",
@@ -129,26 +129,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "filter_dataset_op_test",
-    size = "medium",
-    srcs = ["filter_dataset_op_test.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:io_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/experimental/ops:optimization",
-        "//tensorflow/python/data/kernel_tests:test_base",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
 py_test(
     name = "get_single_element_test",
     size = "small",
@@ -622,7 +602,7 @@ py_test(
 
 py_test(
     name = "stats_dataset_ops_test",
-    size = "medium",
+    size = "large",
     srcs = ["stats_dataset_ops_test.py"],
     srcs_version = "PY2AND3",
     tags = [
diff --git a/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py b/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py
index 943f0f1f81..4a8296d084 100644
--- a/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/cardinality_test.py
@@ -22,9 +22,11 @@ from absl.testing import parameterized
 from tensorflow.python.data.experimental.ops import cardinality
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class NumElementsTest(test_base.DatasetTestBase, parameterized.TestCase):
   """Tests for `tf.data.experimental.cardinality()`."""
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
index b8166fe833..786eae11a9 100644
--- a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat as util_compat
 
 
+# TODO(b/119837791): add eager coverage when supported.
 class CopyToDeviceTest(test_base.DatasetTestBase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/data/experimental/kernel_tests/counter_test.py b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
index 49e1f2272b..436fa506c4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/counter_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
@@ -19,35 +19,31 @@ from __future__ import print_function
 
 from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class CounterTest(test_base.DatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testCounter(self):
     """Test dataset construction using `count`."""
-    iterator = dataset_ops.make_one_shot_iterator(
-        counter.Counter(start=3, step=4))
-    get_next = iterator.get_next()
-    self.assertEqual([], get_next.shape.as_list())
-    self.assertEqual(dtypes.int64, get_next.dtype)
-
-    negative_iterator = dataset_ops.make_one_shot_iterator(
-        counter.Counter(start=0, step=-1))
-    negative_get_next = negative_iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.assertEqual(3, self.evaluate(get_next))
-      self.assertEqual(3 + 4, self.evaluate(get_next))
-      self.assertEqual(3 + 2 * 4, self.evaluate(get_next))
-
-      self.assertEqual(0, self.evaluate(negative_get_next))
-      self.assertEqual(-1, self.evaluate(negative_get_next))
-      self.assertEqual(-2, self.evaluate(negative_get_next))
+    dataset = counter.Counter(start=3, step=4)
+    self.assertEqual([], dataset.output_shapes.as_list())
+    self.assertEqual(dtypes.int64, dataset.output_types)
+    get_next = self.getNext(dataset)
+
+    negative_dataset = counter.Counter(start=0, step=-1)
+    negative_get_next = self.getNext(negative_dataset)
+
+    self.assertEqual(3, self.evaluate(get_next()))
+    self.assertEqual(3 + 4, self.evaluate(get_next()))
+    self.assertEqual(3 + 2 * 4, self.evaluate(get_next()))
+
+    self.assertEqual(0, self.evaluate(negative_get_next()))
+    self.assertEqual(-1, self.evaluate(negative_get_next()))
+    self.assertEqual(-2, self.evaluate(negative_get_next()))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
index 2144342066..df69a9dbb0 100644
--- a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
@@ -28,9 +28,9 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testBasic(self):
     selector_dataset = dataset_ops.Dataset.range(10).repeat(100)
     input_datasets = [
@@ -38,16 +38,13 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
     ]
     dataset = interleave_ops._DirectedInterleaveDataset(selector_dataset,
                                                         input_datasets)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
+    next_element = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      for _ in range(100):
-        for i in range(10):
-          self.assertEqual(i, self.evaluate(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for _ in range(100):
+      for i in range(10):
+        self.assertEqual(i, self.evaluate(next_element()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def _normalize(self, vec):
     return vec / vec.sum()
@@ -67,19 +64,16 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
         for i in range(num_datasets)
     ], weights)
     dataset = dataset.take(num_samples)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    next_element = iterator.get_next()
 
-    with self.cached_session() as sess:
-      freqs = np.zeros([num_datasets])
-      for _ in range(num_samples):
-        freqs[self.evaluate(next_element)] += 1
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    next_element = self.getNext(dataset)
+    freqs = np.zeros([num_datasets])
+    for _ in range(num_samples):
+      freqs[self.evaluate(next_element())] += 1
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
     return freqs
 
-  @test_util.run_deprecated_v1
   def testSampleFromDatasets(self):
     random_seed.set_random_seed(1619)
     num_samples = 5000
@@ -99,21 +93,17 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
       freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples)
       self.assertLess(self._chi2(probs, freqs / num_samples), 1e-2)
 
-  @test_util.run_deprecated_v1
   def testSelectFromDatasets(self):
     words = [b"foo", b"bar", b"baz"]
     datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words]
     choice_array = np.random.randint(3, size=(15,), dtype=np.int64)
     choice_dataset = dataset_ops.Dataset.from_tensor_slices(choice_array)
     dataset = interleave_ops.choose_from_datasets(datasets, choice_dataset)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in choice_array:
-        self.assertEqual(words[i], self.evaluate(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    next_element = self.getNext(dataset)
+    for i in choice_array:
+      self.assertEqual(words[i], self.evaluate(next_element()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testErrors(self):
     with self.assertRaisesRegexp(ValueError,
diff --git a/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
index 25742098f1..cbc048e3ab 100644
--- a/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/enumerate_dataset_test.py
@@ -22,37 +22,28 @@ from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class EnumerateDatasetTest(test_base.DatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testEnumerateDataset(self):
     components = (["a", "b"], [1, 2], [37.0, 38])
     start = constant_op.constant(20, dtype=dtypes.int64)
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            enumerate_ops.enumerate_dataset(start)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
+        enumerate_ops.enumerate_dataset(start))
 
-    self.assertEqual(dtypes.int64, get_next[0].dtype)
-    self.assertEqual((), get_next[0].shape)
+    self.assertEqual(dtypes.int64, dataset.output_types[0])
+    self.assertEqual((), dataset.output_shapes[0])
     self.assertEqual([tensor_shape.TensorShape([])] * 3,
-                     [t.shape for t in get_next[1]])
+                     [shape for shape in dataset.output_shapes[1]])
 
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      self.assertEqual((20, (b"a", 1, 37.0)), self.evaluate(get_next))
-      self.assertEqual((21, (b"b", 2, 38.0)), self.evaluate(get_next))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    self.assertDatasetProduces(dataset, [(20, (b"a", 1, 37.0)),
+                                         (21, (b"b", 2, 38.0))])
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
deleted file mode 100644
index 357b5f1b49..0000000000
--- a/tensorflow/python/data/experimental/kernel_tests/filter_dataset_op_test.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Benchmarks FilterDataset input pipeline op."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import time
-
-import numpy as np
-
-from tensorflow.python.client import session
-from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import test
-
-
-class FilterBenchmark(test.Benchmark):
-
-  # This benchmark compares the performance of pipeline with multiple chained
-  # filter with and without filter fusion.
-  def benchmarkFilters(self):
-    chain_lengths = [0, 1, 2, 5, 10, 20, 50]
-    for chain_length in chain_lengths:
-      self._benchmarkFilters(chain_length, False)
-      self._benchmarkFilters(chain_length, True)
-
-  def _benchmarkFilters(self, chain_length, optimize_dataset):
-    with ops.Graph().as_default():
-      dataset = dataset_ops.Dataset.from_tensors(5).repeat(None)
-      for _ in range(chain_length):
-        dataset = dataset.filter(lambda x: math_ops.greater_equal(x - 5, 0))
-      if optimize_dataset:
-        dataset = dataset.apply(optimization.optimize(["filter_fusion"]))
-
-      iterator = dataset_ops.make_one_shot_iterator(dataset)
-      next_element = iterator.get_next()
-
-      with session.Session() as sess:
-        for _ in range(10):
-          self.evaluate(next_element.op)
-        deltas = []
-        for _ in range(100):
-          start = time.time()
-          for _ in range(100):
-            self.evaluate(next_element.op)
-          end = time.time()
-          deltas.append(end - start)
-
-        median_wall_time = np.median(deltas) / 100
-        opt_mark = "opt" if optimize_dataset else "no-opt"
-        print("Filter dataset {} chain length: {} Median wall time: {}".format(
-            opt_mark, chain_length, median_wall_time))
-        self.report_benchmark(
-            iters=1000,
-            wall_time=median_wall_time,
-            name="benchmark_filter_dataset_chain_latency_{}_{}".format(
-                opt_mark, chain_length))
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
index ef576563a1..3e2cf779a3 100644
--- a/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
@@ -22,7 +22,6 @@ from absl.testing import parameterized
 from tensorflow.python.data.experimental.ops import get_single_element
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
@@ -30,6 +29,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
@@ -40,34 +40,25 @@ class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("MoreThanOne", 0, 2, errors.InvalidArgumentError,
        "Dataset had more than one element."),
   )
-  @test_util.run_deprecated_v1
   def testGetSingleElement(self, skip, take, error=None, error_msg=None):
-    skip_t = array_ops.placeholder(dtypes.int64, shape=[])
-    take_t = array_ops.placeholder(dtypes.int64, shape=[])
 
     def make_sparse(x):
       x_1d = array_ops.reshape(x, [1])
       x_2d = array_ops.reshape(x, [1, 1])
       return sparse_tensor.SparseTensor(x_2d, x_1d, x_1d)
 
-    dataset = dataset_ops.Dataset.range(100).skip(skip_t).map(
-        lambda x: (x * x, make_sparse(x))).take(take_t)
-    element = get_single_element.get_single_element(dataset)
-
-    with self.cached_session() as sess:
-      if error is None:
-        dense_val, sparse_val = sess.run(
-            element, feed_dict={
-                skip_t: skip,
-                take_t: take
-            })
-        self.assertEqual(skip * skip, dense_val)
-        self.assertAllEqual([[skip]], sparse_val.indices)
-        self.assertAllEqual([skip], sparse_val.values)
-        self.assertAllEqual([skip], sparse_val.dense_shape)
-      else:
-        with self.assertRaisesRegexp(error, error_msg):
-          sess.run(element, feed_dict={skip_t: skip, take_t: take})
+    dataset = dataset_ops.Dataset.range(100).skip(
+        skip).map(lambda x: (x * x, make_sparse(x))).take(take)
+    if error is None:
+      dense_val, sparse_val = self.evaluate(
+          get_single_element.get_single_element(dataset))
+      self.assertEqual(skip * skip, dense_val)
+      self.assertAllEqual([[skip]], sparse_val.indices)
+      self.assertAllEqual([skip], sparse_val.values)
+      self.assertAllEqual([skip], sparse_val.dense_shape)
+    else:
+      with self.assertRaisesRegexp(error, error_msg):
+        self.evaluate(get_single_element.get_single_element(dataset))
 
   def testWindow(self):
     """Test that `get_single_element()` can consume a nested dataset."""
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
index 8507df3d3a..4194f06a34 100644
--- a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
@@ -33,19 +33,9 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class GroupByReducerTest(test_base.DatasetTestBase):
 
-  def checkResults(self, dataset, shapes, values):
-    self.assertEqual(shapes, dataset.output_shapes)
-    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
-    with self.cached_session() as sess:
-      for expected in values:
-        got = self.evaluate(get_next)
-        self.assertEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-
-  @test_util.run_deprecated_v1
   def testSum(self):
     reducer = grouping.Reducer(
         init_func=lambda _: np.int64(0),
@@ -54,10 +44,11 @@ class GroupByReducerTest(test_base.DatasetTestBase):
     for i in range(1, 11):
       dataset = dataset_ops.Dataset.range(2 * i).apply(
           grouping.group_by_reducer(lambda x: x % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
+      self.assertDatasetProduces(
+          dataset,
+          expected_shapes=tensor_shape.scalar(),
+          expected_output=[(i - 1) * i, i * i])
 
-  @test_util.run_deprecated_v1
   def testAverage(self):
 
     def reduce_fn(x, y):
@@ -72,10 +63,11 @@ class GroupByReducerTest(test_base.DatasetTestBase):
       dataset = dataset_ops.Dataset.range(2 * i).apply(
           grouping.group_by_reducer(
               lambda x: math_ops.cast(x, dtypes.int64) % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[i - 1, i])
+      self.assertDatasetProduces(
+          dataset,
+          expected_shapes=tensor_shape.scalar(),
+          expected_output=[i - 1, i])
 
-  @test_util.run_deprecated_v1
   def testConcat(self):
     components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray)
     reducer = grouping.Reducer(
@@ -87,12 +79,11 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           (dataset_ops.Dataset.from_tensor_slices(components),
            dataset_ops.Dataset.range(2 * i))).apply(
                grouping.group_by_reducer(lambda x, y: y % 2, reducer))
-      self.checkResults(
+      self.assertDatasetProduces(
           dataset,
-          shapes=tensor_shape.scalar(),
-          values=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]])
+          expected_shapes=tensor_shape.scalar(),
+          expected_output=[b"acegikmoqs" [:i], b"bdfhjlnprt" [:i]])
 
-  @test_util.run_deprecated_v1
   def testSparseSum(self):
     def _sparse(i):
       return sparse_tensor.SparseTensorValue(
@@ -107,10 +98,11 @@ class GroupByReducerTest(test_base.DatasetTestBase):
     for i in range(1, 11):
       dataset = dataset_ops.Dataset.range(2 * i).map(_sparse).apply(
           grouping.group_by_reducer(lambda x: x.values[0] % 2, reducer))
-      self.checkResults(
-          dataset, shapes=tensor_shape.scalar(), values=[(i - 1) * i, i * i])
+      self.assertDatasetProduces(
+          dataset,
+          expected_shapes=tensor_shape.scalar(),
+          expected_output=[(i - 1) * i, i * i])
 
-  @test_util.run_deprecated_v1
   def testChangingStateShape(self):
 
     def reduce_fn(x, _):
@@ -130,14 +122,12 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           grouping.group_by_reducer(lambda x: x, reducer))
       self.assertEqual([None], dataset.output_shapes[0].as_list())
       self.assertIs(None, dataset.output_shapes[1].ndims)
-      iterator = dataset_ops.make_one_shot_iterator(dataset)
-      get_next = iterator.get_next()
-      with self.cached_session() as sess:
-        x, y = self.evaluate(get_next)
-        self.assertAllEqual([0] * (2**i), x)
-        self.assertAllEqual(np.array(1, ndmin=i), y)
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(get_next)
+      get_next = self.getNext(dataset)
+      x, y = self.evaluate(get_next())
+      self.assertAllEqual([0] * (2**i), x)
+      self.assertAllEqual(np.array(1, ndmin=i), y)
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
 
   def testTypeMismatch(self):
     reducer = grouping.Reducer(
@@ -194,11 +184,10 @@ class GroupByReducerTest(test_base.DatasetTestBase):
     dataset = dataset_ops.Dataset.zip(
         (dataset_ops.Dataset.range(10), dataset_ops.Dataset.range(10))).apply(
             grouping.group_by_reducer(lambda x, y: np.int64(0), reducer))
-    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
-    with self.cached_session() as sess:
-      x, y = self.evaluate(get_next)
-      self.assertAllEqual(x, np.asarray([x for x in range(10)]))
-      self.assertEqual(y, 45)
+    get_next = self.getNext(dataset)
+    x, y = self.evaluate(get_next())
+    self.assertAllEqual(x, np.asarray([x for x in range(10)]))
+    self.assertEqual(y, 45)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
index cbb79e55f5..d1270703c5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
@@ -37,6 +37,7 @@ from tensorflow.python.platform import test
 # NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
 # Currently, they use a constant batch size, though should be made to use a
 # different batch size per key.
+@test_util.run_all_in_graph_and_eager_modes
 class GroupByWindowTest(test_base.DatasetTestBase):
 
   def _dynamicPad(self, bucket, window, window_size):
@@ -50,101 +51,87 @@ class GroupByWindowTest(test_base.DatasetTestBase):
              32, (tensor_shape.TensorShape([]), tensor_shape.TensorShape(
                  [None]), tensor_shape.TensorShape([3])))))
 
-  @test_util.run_deprecated_v1
   def testSingleBucket(self):
 
     def _map_fn(v):
       return (v, array_ops.fill([v], v),
               array_ops.fill([3], string_ops.as_string(v)))
 
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn))
+    input_dataset = dataset_ops.Dataset.from_tensor_slices(
+        math_ops.range(32)).map(_map_fn)
 
     bucketed_dataset = input_dataset.apply(
         grouping.group_by_window(
             lambda x, y, z: 0,
             lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
+    get_next = self.getNext(bucketed_dataset)
 
-    iterator = dataset_ops.make_initializable_iterator(bucketed_dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    which_bucket, bucketed_values = self.evaluate(get_next())
 
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
+    self.assertEqual(0, which_bucket)
 
-      which_bucket, bucketed_values = self.evaluate(get_next)
+    expected_scalar_int = np.arange(32, dtype=np.int64)
+    expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
+    for i in range(32):
+      expected_unk_int64[i, :i] = i
+    expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T
 
-      self.assertEqual(0, which_bucket)
+    self.assertAllEqual(expected_scalar_int, bucketed_values[0])
+    self.assertAllEqual(expected_unk_int64, bucketed_values[1])
+    self.assertAllEqual(expected_vec3_str, bucketed_values[2])
 
-      expected_scalar_int = np.arange(32, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
-      for i in range(32):
-        expected_unk_int64[i, :i] = i
-      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values[2])
-
-  @test_util.run_deprecated_v1
   def testEvenOddBuckets(self):
 
     def _map_fn(v):
       return (v, array_ops.fill([v], v),
               array_ops.fill([3], string_ops.as_string(v)))
 
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn))
+    input_dataset = dataset_ops.Dataset.from_tensor_slices(
+        math_ops.range(64)).map(_map_fn)
 
     bucketed_dataset = input_dataset.apply(
         grouping.group_by_window(
             lambda x, y, z: math_ops.cast(x % 2, dtypes.int64),
             lambda k, bucket: self._dynamicPad(k, bucket, 32), 32))
 
-    iterator = dataset_ops.make_initializable_iterator(bucketed_dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-
-      # Get two minibatches (one containing even values, one containing odds)
-      which_bucket_even, bucketed_values_even = self.evaluate(get_next)
-      which_bucket_odd, bucketed_values_odd = self.evaluate(get_next)
-
-      # Count number of bucket_tensors.
-      self.assertEqual(3, len(bucketed_values_even))
-      self.assertEqual(3, len(bucketed_values_odd))
-
-      # Ensure bucket 0 was used for all minibatch entries.
-      self.assertAllEqual(0, which_bucket_even)
-      self.assertAllEqual(1, which_bucket_odd)
-
-      # Test the first bucket outputted, the events starting at 0
-      expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
-      for i in range(0, 32):
-        expected_unk_int64[i, :2 * i] = 2 * i
-        expected_vec3_str = np.vstack(
-            3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values_even[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values_even[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values_even[2])
-
-      # Test the second bucket outputted, the odds starting at 1
-      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64)
-      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
-      for i in range(0, 32):
-        expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
-        expected_vec3_str = np.vstack(
-            3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T
-
-      self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0])
-      self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
-      self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
-
-  @test_util.run_deprecated_v1
+    get_next = self.getNext(bucketed_dataset)
+
+    # Get two minibatches (one containing even values, one containing odds)
+    which_bucket_even, bucketed_values_even = self.evaluate(get_next())
+    which_bucket_odd, bucketed_values_odd = self.evaluate(get_next())
+
+    # Count number of bucket_tensors.
+    self.assertEqual(3, len(bucketed_values_even))
+    self.assertEqual(3, len(bucketed_values_odd))
+
+    # Ensure bucket 0 was used for all minibatch entries.
+    self.assertAllEqual(0, which_bucket_even)
+    self.assertAllEqual(1, which_bucket_odd)
+
+    # Test the first bucket outputted, the events starting at 0
+    expected_scalar_int = np.arange(0, 32 * 2, 2, dtype=np.int64)
+    expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
+    for i in range(0, 32):
+      expected_unk_int64[i, :2 * i] = 2 * i
+      expected_vec3_str = np.vstack(
+          3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T
+
+    self.assertAllEqual(expected_scalar_int, bucketed_values_even[0])
+    self.assertAllEqual(expected_unk_int64, bucketed_values_even[1])
+    self.assertAllEqual(expected_vec3_str, bucketed_values_even[2])
+
+    # Test the second bucket outputted, the odds starting at 1
+    expected_scalar_int = np.arange(1, 32 * 2 + 1, 2, dtype=np.int64)
+    expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
+    for i in range(0, 32):
+      expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
+      expected_vec3_str = np.vstack(
+          3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T
+
+    self.assertAllEqual(expected_scalar_int, bucketed_values_odd[0])
+    self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
+    self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
+
   def testEvenOddBucketsFilterOutAllOdd(self):
 
     def _map_fn(v):
@@ -164,35 +151,28 @@ class GroupByWindowTest(test_base.DatasetTestBase):
                    "z": tensor_shape.TensorShape([3])
                })))
 
-    input_dataset = (
-        dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn)
-        .filter(lambda d: math_ops.equal(d["x"] % 2, 0)))
+    input_dataset = dataset_ops.Dataset.from_tensor_slices(math_ops.range(
+        128)).map(_map_fn).filter(lambda d: math_ops.equal(d["x"] % 2, 0))
 
     bucketed_dataset = input_dataset.apply(
         grouping.group_by_window(
             lambda d: math_ops.cast(d["x"] % 2, dtypes.int64),
             lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32))
 
-    iterator = dataset_ops.make_initializable_iterator(bucketed_dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
+    get_next = self.getNext(bucketed_dataset)
 
-      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
-      which_bucket0, bucketed_values_even0 = self.evaluate(get_next)
-      which_bucket1, bucketed_values_even1 = self.evaluate(get_next)
+    # Get two minibatches ([0, 2, ...] and [64, 66, ...])
+    which_bucket0, bucketed_values_even0 = self.evaluate(get_next())
+    which_bucket1, bucketed_values_even1 = self.evaluate(get_next())
 
-      # Ensure that bucket 1 was completely filtered out
-      self.assertAllEqual(0, which_bucket0)
-      self.assertAllEqual(0, which_bucket1)
-      self.assertAllEqual(
-          np.arange(0, 64, 2, dtype=np.int64), bucketed_values_even0["x"])
-      self.assertAllEqual(
-          np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
+    # Ensure that bucket 1 was completely filtered out
+    self.assertAllEqual(0, which_bucket0)
+    self.assertAllEqual(0, which_bucket1)
+    self.assertAllEqual(
+        np.arange(0, 64, 2, dtype=np.int64), bucketed_values_even0["x"])
+    self.assertAllEqual(
+        np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
 
-  @test_util.run_deprecated_v1
   def testDynamicWindowSize(self):
     components = np.arange(100).astype(np.int64)
 
@@ -207,111 +187,81 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
         grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(20),
                                  None, window_size_func))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      with self.assertRaises(errors.OutOfRangeError):
-        batches = 0
-        while True:
-          result = self.evaluate(get_next)
-          is_even = all(x % 2 == 0 for x in result)
-          is_odd = all(x % 2 == 1 for x in result)
-          self.assertTrue(is_even or is_odd)
-          expected_batch_size = 5 if is_even else 10
-          self.assertEqual(expected_batch_size, result.shape[0])
-          batches += 1
-
-      self.assertEqual(batches, 15)
-
-  @test_util.run_deprecated_v1
+
+    get_next = self.getNext(dataset)
+    with self.assertRaises(errors.OutOfRangeError):
+      batches = 0
+      while True:
+        result = self.evaluate(get_next())
+        is_even = all(x % 2 == 0 for x in result)
+        is_odd = all(x % 2 == 1 for x in result)
+        self.assertTrue(is_even or is_odd)
+        expected_batch_size = 5 if is_even else 10
+        self.assertEqual(expected_batch_size, result.shape[0])
+        batches += 1
+
+    self.assertEqual(batches, 15)
+
   def testSimple(self):
     components = np.random.randint(100, size=(200,)).astype(np.int64)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x)
-        .apply(
+    dataset = dataset_ops.Dataset.from_tensor_slices(
+        components).map(lambda x: x * x).apply(
             grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
-                                     4)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      counts = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          result = self.evaluate(get_next)
-          self.assertTrue(
-              all(x % 2 == 0
-                  for x in result) or all(x % 2 == 1)
-              for x in result)
-          counts.append(result.shape[0])
-
-      self.assertEqual(len(components), sum(counts))
-      num_full_batches = len([c for c in counts if c == 4])
-      self.assertGreaterEqual(num_full_batches, 24)
-      self.assertTrue(all(c == 4 for c in counts[:num_full_batches]))
-
-  @test_util.run_deprecated_v1
+                                     4))
+    get_next = self.getNext(dataset)
+    counts = []
+    with self.assertRaises(errors.OutOfRangeError):
+      while True:
+        result = self.evaluate(get_next())
+        self.assertTrue(
+            all(x % 2 == 0 for x in result) or all(x % 2 == 1) for x in result)
+        counts.append(result.shape[0])
+
+    self.assertEqual(len(components), sum(counts))
+    num_full_batches = len([c for c in counts if c == 4])
+    self.assertGreaterEqual(num_full_batches, 24)
+    self.assertTrue(all(c == 4 for c in counts[:num_full_batches]))
+
   def testImmediateOutput(self):
     components = np.array(
         [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply(
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
+        -1).apply(
             grouping.group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4),
-                                     4)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      # The input is infinite, so this test demonstrates that:
-      # 1. We produce output without having to consume the entire input,
-      # 2. Different buckets can produce output at different rates, and
-      # 3. For deterministic input, the output is deterministic.
-      for _ in range(3):
-        self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next))
-        self.assertAllEqual([1, 1, 1, 1], self.evaluate(get_next))
-        self.assertAllEqual([2, 2, 2, 2], self.evaluate(get_next))
-        self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next))
-
-  @test_util.run_deprecated_v1
+                                     4))
+    get_next = self.getNext(dataset)
+    # The input is infinite, so this test demonstrates that:
+    # 1. We produce output without having to consume the entire input,
+    # 2. Different buckets can produce output at different rates, and
+    # 3. For deterministic input, the output is deterministic.
+    for _ in range(3):
+      self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next()))
+      self.assertAllEqual([1, 1, 1, 1], self.evaluate(get_next()))
+      self.assertAllEqual([2, 2, 2, 2], self.evaluate(get_next()))
+      self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next()))
+
   def testSmallGroups(self):
     components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components).apply(
-            grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4),
-                                     4)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next))
-      self.assertAllEqual([1, 1, 1, 1], self.evaluate(get_next))
-      # The small outputs at the end are deterministically produced in key
-      # order.
-      self.assertAllEqual([0, 0, 0], self.evaluate(get_next))
-      self.assertAllEqual([1], self.evaluate(get_next))
-
-  @test_util.run_deprecated_v1
+    dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
+        grouping.group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4), 4))
+    get_next = self.getNext(dataset)
+    self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next()))
+    self.assertAllEqual([1, 1, 1, 1], self.evaluate(get_next()))
+    # The small outputs at the end are deterministically produced in key
+    # order.
+    self.assertAllEqual([0, 0, 0], self.evaluate(get_next()))
+    self.assertAllEqual([1], self.evaluate(get_next()))
+
   def testEmpty(self):
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.range(4).apply(
-            grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      with self.assertRaisesRegexp(
-          errors.InvalidArgumentError,
-          "Window size must be greater than zero, but got 0."):
-        print(self.evaluate(get_next))
-
-  @test_util.run_deprecated_v1
+    dataset = dataset_ops.Dataset.range(4).apply(
+        grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0))
+
+    get_next = self.getNext(dataset)
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError,
+        "Window size must be greater than zero, but got 0."):
+      print(self.evaluate(get_next()))
+
   def testReduceFuncError(self):
     components = np.random.randint(100, size=(200,)).astype(np.int64)
 
@@ -323,19 +273,13 @@ class GroupByWindowTest(test_base.DatasetTestBase):
           padded_shapes=(tensor_shape.TensorShape([]),
                          constant_op.constant([5], dtype=dtypes.int64) * -1))
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply(
-            grouping.group_by_window(lambda x, _: x % 2, reduce_func, 32)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
+    dataset = dataset_ops.Dataset.from_tensor_slices(
+        components).map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply(
+            grouping.group_by_window(lambda x, _: x % 2, reduce_func, 32))
+    get_next = self.getNext(dataset)
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(get_next())
 
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      with self.assertRaises(errors.InvalidArgumentError):
-        self.evaluate(get_next)
-
-  @test_util.run_deprecated_v1
   def testConsumeWindowDatasetMoreThanOnce(self):
     components = np.random.randint(50, size=(200,)).astype(np.int64)
 
@@ -349,26 +293,23 @@ class GroupByWindowTest(test_base.DatasetTestBase):
               4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])),
       ))
 
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x))
-        .apply(grouping.group_by_window(
+    dataset = dataset_ops.Dataset.from_tensor_slices(
+        components
+    ).map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x)).apply(
+        grouping.group_by_window(
             lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64),
-            reduce_func, 4)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      counts = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          tight_result, multiple_of_10_result = self.evaluate(get_next)
-          self.assertEqual(0, multiple_of_10_result.shape[1] % 10)
-          self.assertAllEqual(tight_result,
-                              multiple_of_10_result[:, :tight_result.shape[1]])
-          counts.append(tight_result.shape[0])
-      self.assertEqual(len(components), sum(counts))
+            reduce_func, 4))
+
+    get_next = self.getNext(dataset)
+    counts = []
+    with self.assertRaises(errors.OutOfRangeError):
+      while True:
+        tight_result, multiple_of_10_result = self.evaluate(get_next())
+        self.assertEqual(0, multiple_of_10_result.shape[1] % 10)
+        self.assertAllEqual(tight_result,
+                            multiple_of_10_result[:, :tight_result.shape[1]])
+        counts.append(tight_result.shape[0])
+    self.assertEqual(len(components), sum(counts))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
index 81f580fccb..1d02f4fb77 100644
--- a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
@@ -34,9 +34,9 @@ from tensorflow.python.util import compat
 _NUMPY_RANDOM_SEED = 42
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class IgnoreErrorsTest(test_base.DatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
@@ -44,18 +44,13 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
         dataset_ops.Dataset.from_tensor_slices(components)
         .map(lambda x: array_ops.check_numerics(x, "message")).apply(
             error_ops.ignore_errors()))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-
-  @test_util.run_deprecated_v1
+    get_next = self.getNext(dataset)
+
+    for x in [1., 2., 3., 5.]:
+      self.assertEqual(x, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
   def testParallelMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
@@ -63,18 +58,13 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
         dataset_ops.Dataset.from_tensor_slices(components).map(
             lambda x: array_ops.check_numerics(x, "message"),
             num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      for x in [1., 2., 3., 5.]:
-        self.assertEqual(x, self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-
-  @test_util.run_deprecated_v1
+    get_next = self.getNext(dataset)
+
+    for x in [1., 2., 3., 5.]:
+      self.assertEqual(x, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
   def testReadFileIgnoreError(self):
 
     def write_string_to_file(value, filename):
@@ -91,28 +81,24 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
         dataset_ops.Dataset.from_tensor_slices(filenames).map(
             io_ops.read_file,
             num_parallel_calls=2).prefetch(2).apply(error_ops.ignore_errors()))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # All of the files are present.
-      self.evaluate(init_op)
-      for filename in filenames:
-        self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-
-      # Delete one of the files.
-      os.remove(filenames[0])
-
-      # Attempting to read filenames[0] will fail, but ignore_errors()
-      # will catch the error.
-      self.evaluate(init_op)
-      for filename in filenames[1:]:
-        self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(dataset)
+
+    # All of the files are present.
+    for filename in filenames:
+      self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
+    # Delete one of the files.
+    os.remove(filenames[0])
+
+    # Attempting to read filenames[0] will fail, but ignore_errors()
+    # will catch the error.
+    get_next = self.getNext(dataset)
+    for filename in filenames[1:]:
+      self.assertEqual(compat.as_bytes(filename), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index 6042ca1c63..e41030dc04 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -31,11 +31,11 @@ from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import data_flow_ops
-from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+# TODO(b/119837791): add eager coverage.
 class MapDefunTest(test_base.DatasetTestBase):
 
   def testMapDefunSimple(self):
@@ -254,46 +254,5 @@ class MapDefunTest(test_base.DatasetTestBase):
     self.assertAllEqual(self.evaluate(expected), self.evaluate(map_defun_op))
 
 
-class MapDefunBenchmark(test.Benchmark):
-
-  def _run(self, op, name=None, num_iters=3000):
-    with session.Session() as sess:
-      # Warm up the session
-      for _ in range(5):
-        self.evaluate(op)
-      start = time.time()
-      for _ in range(num_iters):
-        self.evaluate(op)
-      end = time.time()
-      mean_us = (end - start) * 1e6 / num_iters
-      self.report_benchmark(
-          name=name,
-          iters=num_iters,
-          wall_time=mean_us,
-          extras={"examples_per_sec": num_iters / (end - start)})
-
-  def benchmarkDefunVsMapFn(self):
-    """Benchmarks to compare the performance of MapDefun vs tf.map_fn."""
-
-    @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.int32)])
-    def defun(x):
-      return array_ops.identity(x)
-
-    def map_fn(x):
-      return array_ops.identity(x)
-
-    base = math_ops.range(100)
-    for input_size in [10, 100, 1000, 10000]:
-      num_iters = 100000 // input_size
-      map_defun_op = map_defun.map_defun(defun, [base], [dtypes.int32], [()])
-      map_fn_op = functional_ops.map_fn(map_fn, base)
-
-      self._run(
-          map_defun_op,
-          "benchmarkMapDefun_size_%d" % input_size,
-          num_iters=num_iters)
-      self._run(
-          map_fn_op, "benchmarkMapFn_size_%d" % input_size, num_iters=num_iters)
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py b/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py
index 0ee7616d35..fe83b4c66e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/matching_files_test.py
@@ -23,14 +23,14 @@ import tempfile
 
 from tensorflow.python.data.experimental.ops import matching_files
 from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-class MatchingFilesTest(test_base.DatasetTestBase):
+@test_util.run_all_in_graph_and_eager_modes
+class MatchingFilesDatasetTest(test_base.DatasetTestBase):
 
   def setUp(self):
     self.tmp_dir = tempfile.mkdtemp()
@@ -42,30 +42,23 @@ class MatchingFilesTest(test_base.DatasetTestBase):
     for filename in filenames:
       open(os.path.join(self.tmp_dir, filename), 'a').close()
 
-  @test_util.run_deprecated_v1
   def testNonExistingDirectory(self):
     """Test the MatchingFiles dataset with a non-existing directory."""
 
     self.tmp_dir = os.path.join(self.tmp_dir, 'nonexistingdir')
     dataset = matching_files.MatchingFilesDataset(
         os.path.join(self.tmp_dir, '*'))
-    with self.cached_session() as sess:
-      next_element = dataset_ops.make_one_shot_iterator(dataset).get_next()
-      with self.assertRaises(errors.NotFoundError):
-        sess.run(next_element)
+    self.assertDatasetProduces(
+        dataset, expected_error=(errors.NotFoundError, ''))
 
-  @test_util.run_deprecated_v1
   def testEmptyDirectory(self):
     """Test the MatchingFiles dataset with an empty directory."""
 
     dataset = matching_files.MatchingFilesDataset(
         os.path.join(self.tmp_dir, '*'))
-    with self.cached_session() as sess:
-      next_element = dataset_ops.make_one_shot_iterator(dataset).get_next()
-      with self.assertRaises(errors.NotFoundError):
-        sess.run(next_element)
+    self.assertDatasetProduces(
+        dataset, expected_error=(errors.NotFoundError, ''))
 
-  @test_util.run_deprecated_v1
   def testSimpleDirectory(self):
     """Test the MatchingFiles dataset with a simple directory."""
 
@@ -74,21 +67,14 @@ class MatchingFilesTest(test_base.DatasetTestBase):
 
     dataset = matching_files.MatchingFilesDataset(
         os.path.join(self.tmp_dir, '*'))
-    with self.cached_session() as sess:
-      next_element = dataset_ops.make_one_shot_iterator(dataset).get_next()
+    self.assertDatasetProduces(
+        dataset,
+        expected_output=[
+            compat.as_bytes(os.path.join(self.tmp_dir, filename))
+            for filename in filenames
+        ],
+        assert_items_equal=True)
 
-      expected_filenames = []
-      actual_filenames = []
-      for filename in filenames:
-        expected_filenames.append(
-            compat.as_bytes(os.path.join(self.tmp_dir, filename)))
-        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
-
-      self.assertItemsEqual(expected_filenames, actual_filenames)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  @test_util.run_deprecated_v1
   def testFileSuffixes(self):
     """Test the MatchingFiles dataset using the suffixes of filename."""
 
@@ -97,20 +83,14 @@ class MatchingFilesTest(test_base.DatasetTestBase):
 
     dataset = matching_files.MatchingFilesDataset(
         os.path.join(self.tmp_dir, '*.py'))
-    with self.cached_session() as sess:
-      next_element = dataset_ops.make_one_shot_iterator(dataset).get_next()
-      expected_filenames = []
-      actual_filenames = []
-      for filename in filenames[1:-1]:
-        expected_filenames.append(
-            compat.as_bytes(os.path.join(self.tmp_dir, filename)))
-        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
-
-      self.assertItemsEqual(expected_filenames, actual_filenames)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  @test_util.run_deprecated_v1
+    self.assertDatasetProduces(
+        dataset,
+        expected_output=[
+            compat.as_bytes(os.path.join(self.tmp_dir, filename))
+            for filename in filenames[1:-1]
+        ],
+        assert_items_equal=True)
+
   def testFileMiddles(self):
     """Test the MatchingFiles dataset using the middles of filename."""
 
@@ -119,20 +99,14 @@ class MatchingFilesTest(test_base.DatasetTestBase):
 
     dataset = matching_files.MatchingFilesDataset(
         os.path.join(self.tmp_dir, 'b*.py*'))
-    with self.cached_session() as sess:
-      next_element = dataset_ops.make_one_shot_iterator(dataset).get_next()
-      expected_filenames = []
-      actual_filenames = []
-      for filename in filenames[1:3]:
-        expected_filenames.append(
-            compat.as_bytes(os.path.join(self.tmp_dir, filename)))
-        actual_filenames.append(compat.as_bytes(sess.run(next_element)))
-
-      self.assertItemsEqual(expected_filenames, actual_filenames)
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
-
-  @test_util.run_deprecated_v1
+    self.assertDatasetProduces(
+        dataset,
+        expected_output=[
+            compat.as_bytes(os.path.join(self.tmp_dir, filename))
+            for filename in filenames[1:3]
+        ],
+        assert_items_equal=True)
+
   def testNestedDirectories(self):
     """Test the MatchingFiles dataset with nested directories."""
 
@@ -156,21 +130,20 @@ class MatchingFilesTest(test_base.DatasetTestBase):
     ]
 
     dataset = matching_files.MatchingFilesDataset(patterns)
-    with self.cached_session() as sess:
-      next_element = dataset_ops.make_one_shot_iterator(dataset).get_next()
-      expected_filenames = [
-          compat.as_bytes(filename)
-          for filename in filenames
-          if filename.endswith('.txt') or filename.endswith('.log')
-      ]
-      actual_filenames = []
-      while True:
-        try:
-          actual_filenames.append(compat.as_bytes(sess.run(next_element)))
-        except errors.OutOfRangeError:
-          break
-
-      self.assertItemsEqual(expected_filenames, actual_filenames)
+    next_element = self.getNext(dataset)
+    expected_filenames = [
+        compat.as_bytes(filename)
+        for filename in filenames
+        if filename.endswith('.txt') or filename.endswith('.log')
+    ]
+    actual_filenames = []
+    while True:
+      try:
+        actual_filenames.append(compat.as_bytes(self.evaluate(next_element())))
+      except errors.OutOfRangeError:
+        break
+
+    self.assertItemsEqual(expected_filenames, actual_filenames)
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
index aa81663a18..811a58262e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
@@ -35,6 +35,7 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class OverrideThreadpoolTest(test_base.DatasetTestBase,
                              parameterized.TestCase):
 
@@ -53,14 +54,12 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
             lambda x: script_ops.py_func(get_thread_id, [x], dtypes.int64),
             num_parallel_calls=32).apply(unique.unique()))
     dataset = override_threadpool_fn(dataset)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
+    next_element = self.getNext(dataset, requires_initialization=True)
 
-    self.evaluate(iterator.initializer)
     thread_ids = []
     try:
       while True:
-        thread_ids.append(self.evaluate(next_element))
+        thread_ids.append(self.evaluate(next_element()))
     except errors.OutOfRangeError:
       pass
     self.assertLen(thread_ids, len(set(thread_ids)))
@@ -82,7 +81,6 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
       ("8", 4, 1),
       ("9", 4, 4),
   )
-  @test_util.run_deprecated_v1
   def testNumThreadsDeprecated(self, num_threads, max_intra_op_parallelism):
 
     def override_threadpool_fn(dataset):
@@ -109,7 +107,6 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
       ("11", 4, 4),
       ("12", None, None),
   )
-  @test_util.run_deprecated_v1
   def testNumThreads(self, num_threads, max_intra_op_parallelism):
 
     def override_threadpool_fn(dataset):
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
index 80bd43e9ad..2af31ad3e3 100644
--- a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
+# TODO(b/119837791): add eager coverage when supported.
 class PrefetchToDeviceTest(test_base.DatasetTestBase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
index 76f68f50c8..4d35b160fd 100644
--- a/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
@@ -17,11 +17,9 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import time
 
 from absl.testing import parameterized
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.data.experimental.ops import resampling
 from tensorflow.python.data.kernel_tests import test_base
@@ -36,35 +34,12 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-def _time_resampling(
-    test_obj, data_np, target_dist, init_dist, num_to_sample):
-  dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()
-
-  # Reshape distribution via rejection sampling.
-  dataset = dataset.apply(
-      resampling.rejection_resample(
-          class_func=lambda x: x,
-          target_dist=target_dist,
-          initial_dist=init_dist,
-          seed=142))
-
-  get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
-
-  with test_obj.test_session() as sess:
-    start_time = time.time()
-    for _ in xrange(num_to_sample):
-      sess.run(get_next)
-    end_time = time.time()
-
-  return end_time - start_time
-
-
+@test_util.run_all_in_graph_and_eager_modes
 class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
       ("InitialDistributionKnown", True),
       ("InitialDistributionUnknown", False))
-  @test_util.run_deprecated_v1
   def testDistribution(self, initial_known):
     classes = np.random.randint(5, size=(20000,))  # Uniformly sampled
     target_dist = [0.9, 0.05, 0.05, 0.0, 0.0]
@@ -73,17 +48,17 @@ class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.from_tensor_slices(classes).shuffle(
         200, seed=21).map(lambda c: (c, string_ops.as_string(c))).repeat()
 
-    get_next = dataset_ops.make_one_shot_iterator(dataset.apply(
-        resampling.rejection_resample(
-            target_dist=target_dist,
-            initial_dist=initial_dist,
-            class_func=lambda c, _: c,
-            seed=27))).get_next()
+    get_next = self.getNext(
+        dataset.apply(
+            resampling.rejection_resample(
+                target_dist=target_dist,
+                initial_dist=initial_dist,
+                class_func=lambda c, _: c,
+                seed=27)))
 
-    with self.cached_session() as sess:
-      returned = []
-      while len(returned) < 4000:
-        returned.append(sess.run(get_next))
+    returned = []
+    while len(returned) < 4000:
+      returned.append(self.evaluate(get_next()))
 
     returned_classes, returned_classes_and_data = zip(*returned)
     _, returned_data = zip(*returned_classes_and_data)
@@ -99,7 +74,6 @@ class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
   @parameterized.named_parameters(
       ("OnlyInitial", True),
       ("NotInitial", False))
-  @test_util.run_deprecated_v1
   def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist):
     init_dist = [0.5, 0.5]
     target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0]
@@ -117,15 +91,13 @@ class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
             target_dist=target_dist,
             initial_dist=init_dist))
 
-    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      returned = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          returned.append(sess.run(get_next))
+    returned = []
+    with self.assertRaises(errors.OutOfRangeError):
+      while True:
+        returned.append(self.evaluate(get_next()))
 
-  @test_util.run_deprecated_v1
   def testRandomClasses(self):
     init_dist = [0.25, 0.25, 0.25, 0.25]
     target_dist = [0.0, 0.0, 0.0, 1.0]
@@ -149,13 +121,12 @@ class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
             target_dist=target_dist,
             initial_dist=init_dist))
 
-    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      returned = []
-      with self.assertRaises(errors.OutOfRangeError):
-        while True:
-          returned.append(sess.run(get_next))
+    returned = []
+    with self.assertRaises(errors.OutOfRangeError):
+      while True:
+        returned.append(self.evaluate(get_next()))
 
     classes, _ = zip(*returned)
     bincount = np.bincount(
@@ -165,22 +136,5 @@ class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertAllClose(target_dist, bincount, atol=1e-2)
 
 
-class ResampleDatasetBenchmark(test.Benchmark):
-
-  def benchmarkResamplePerformance(self):
-    init_dist = [0.25, 0.25, 0.25, 0.25]
-    target_dist = [0.0, 0.0, 0.0, 1.0]
-    num_classes = len(init_dist)
-    # We don't need many samples to test a dirac-delta target distribution
-    num_samples = 1000
-    data_np = np.random.choice(num_classes, num_samples, p=init_dist)
-
-    resample_time = _time_resampling(
-        self, data_np, target_dist, init_dist, num_to_sample=1000)
-
-    self.report_benchmark(
-        iters=1000, wall_time=resample_time, name="benchmark_resample")
-
-
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
index 658e6120cf..3b0d23d6e1 100644
--- a/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
+# TODO(b/119837791): Add eager coverage
 class RestructuredDatasetTest(test_base.DatasetTestBase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/data/experimental/kernel_tests/scan_test.py b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
index bd974b21e3..89b3824821 100644
--- a/tensorflow/python/data/experimental/kernel_tests/scan_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
@@ -24,7 +24,6 @@ import numpy as np
 from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -35,48 +34,34 @@ from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class ScanTest(test_base.DatasetTestBase):
 
   def _counting_dataset(self, start, scan_fn):
     return dataset_ops.Dataset.from_tensors(0).repeat().apply(
         scan_ops.scan(start, scan_fn))
 
-  @test_util.run_deprecated_v1
   def testCount(self):
     def make_scan_fn(step):
       return lambda state, _: (state + step, state)
 
-    start = array_ops.placeholder(dtypes.int32, shape=[])
-    step = array_ops.placeholder(dtypes.int32, shape=[])
-    take = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = dataset_ops.make_initializable_iterator(self._counting_dataset(
-        start, make_scan_fn(step)).take(take))
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-
-      for start_val, step_val, take_val in [(0, 1, 10), (0, 1, 0), (10, 1, 10),
-                                            (10, 2, 10), (10, -1, 10),
-                                            (10, -2, 10)]:
-        sess.run(iterator.initializer,
-                 feed_dict={start: start_val, step: step_val, take: take_val})
-        for expected, _ in zip(
-            itertools.count(start_val, step_val), range(take_val)):
-          self.assertEqual(expected, self.evaluate(next_element))
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(next_element)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testFibonacci(self):
-    iterator = dataset_ops.make_one_shot_iterator(
-        dataset_ops.Dataset.from_tensors(1).repeat(None).apply(
-            scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1]))))
+    def dataset_fn(start, step, take):
+      return self._counting_dataset(start, make_scan_fn(step)).take(take)
 
-    if context.executing_eagerly():
-      next_element = iterator.get_next
-    else:
-      get_next = iterator.get_next()
-      next_element = lambda: get_next
+    for start_val, step_val, take_val in [(0, 1, 10), (0, 1, 0), (10, 1, 10),
+                                          (10, 2, 10), (10, -1, 10), (10, -2,
+                                                                      10)]:
+      next_element = self.getNext(dataset_fn(start_val, step_val, take_val))
+      for expected, _ in zip(
+          itertools.count(start_val, step_val), range(take_val)):
+        self.assertEqual(expected, self.evaluate(next_element()))
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(next_element())
+
+  def testFibonacci(self):
+    data = dataset_ops.Dataset.from_tensors(1).repeat(None).apply(
+        scan_ops.scan([0, 1], lambda a, _: ([a[1], a[0] + a[1]], a[1])))
+    next_element = self.getNext(data)
 
     self.assertEqual(1, self.evaluate(next_element()))
     self.assertEqual(1, self.evaluate(next_element()))
@@ -85,8 +70,10 @@ class ScanTest(test_base.DatasetTestBase):
     self.assertEqual(5, self.evaluate(next_element()))
     self.assertEqual(8, self.evaluate(next_element()))
 
+  # TODO(b/119837791): Add coverage for eager.
   @test_util.run_deprecated_v1
-  def testSparseCount(self):
+  def testSkipEagerSparseCount(self):
+
     def _sparse(i):
       return sparse_tensor.SparseTensorValue(
           indices=np.array([[0, 0]]),
@@ -96,27 +83,20 @@ class ScanTest(test_base.DatasetTestBase):
     def make_scan_fn(step):
       return lambda state, _: (_sparse(state.values[0] + step), state)
 
-    start = array_ops.placeholder(dtypes.int32, shape=[])
-    step = array_ops.placeholder(dtypes.int32, shape=[])
-    take = array_ops.placeholder(dtypes.int64, shape=[])
-    iterator = dataset_ops.make_initializable_iterator(self._counting_dataset(
-        _sparse(start), make_scan_fn(step)).take(take))
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-
-      for start_val, step_val, take_val in [(0, 1, 10), (0, 1, 0), (10, 1, 10),
-                                            (10, 2, 10), (10, -1, 10),
-                                            (10, -2, 10)]:
-        sess.run(iterator.initializer,
-                 feed_dict={start: start_val, step: step_val, take: take_val})
-        for expected, _ in zip(
-            itertools.count(start_val, step_val), range(take_val)):
-          self.assertEqual(expected, self.evaluate(next_element).values[0])
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(next_element)
+    def dataset_fn(start, step, take):
+      return self._counting_dataset(_sparse(start),
+                                    make_scan_fn(step)).take(take)
+
+    for start_val, step_val, take_val in [(0, 1, 10), (0, 1, 0), (10, 1, 10),
+                                          (10, 2, 10), (10, -1, 10), (10, -2,
+                                                                      10)]:
+      next_element = self.getNext(dataset_fn(start_val, step_val, take_val))
+      for expected, _ in zip(
+          itertools.count(start_val, step_val), range(take_val)):
+        self.assertEqual(expected, self.evaluate(next_element()).values[0])
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(next_element())
 
-  @test_util.run_deprecated_v1
   def testChangingStateShape(self):
     # Test the fixed-point shape invariant calculations: start with
     # initial values with known shapes, and use a scan function that
@@ -134,16 +114,14 @@ class ScanTest(test_base.DatasetTestBase):
     self.assertIs(None, dataset.output_shapes[0][1].ndims)
     self.assertEqual([], dataset.output_shapes[1].as_list())
 
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    next_element = iterator.get_next()
+    next_element = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      for i in range(5):
-        (longer_vector_val, larger_rank_val), _ = self.evaluate(next_element)
-        self.assertAllEqual([0] * (2**i), longer_vector_val)
-        self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for i in range(5):
+      (longer_vector_val, larger_rank_val), _ = self.evaluate(next_element())
+      self.assertAllEqual([0] * (2**i), longer_vector_val)
+      self.assertAllEqual(np.array(1, ndmin=i), larger_rank_val)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testIncorrectStateType(self):
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index 9528f83291..110966a5a0 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -23,11 +23,11 @@ from tensorflow.python.data.experimental.ops import shuffle_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class ShuffleAndRepeatTest(test_base.DatasetTestBase):
 
   def _build_ds(self, seed, count=5, num_elements=20):
@@ -35,17 +35,15 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
         shuffle_ops.shuffle_and_repeat(buffer_size=5, count=count, seed=seed))
 
   def _gen_outputs(self, ds_fn, num_outputs, verify_exhausted=True):
-    get_next = dataset_ops.make_one_shot_iterator(ds_fn()).get_next()
+    get_next = self.getNext(ds_fn())
     outputs = []
-    with self.cached_session() as sess:
-      for _ in range(num_outputs):
-        outputs.append(self.evaluate(get_next))
-      if verify_exhausted:
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(get_next)
+    for _ in range(num_outputs):
+      outputs.append(self.evaluate(get_next()))
+    if verify_exhausted:
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
     return outputs
 
-  @test_util.run_deprecated_v1
   def testCorrectOutput(self):
     output = self._gen_outputs(lambda: self._build_ds(10), 100)
     self.assertSequenceEqual(
@@ -54,7 +52,6 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     for i in range(5):
       self.assertSequenceEqual(sorted(output[i * 20:(i + 1) * 20]), range(20))
 
-  @test_util.run_deprecated_v1
   def testReshuffling(self):
     # Check that the output orders of different epochs are indeed different.
     output = self._gen_outputs(lambda: self._build_ds(10), 100)
@@ -63,20 +60,17 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
       epoch2 = output[(i + 1) * 20:(i + 2) * 20]
       self.assertNotEqual(epoch1, epoch2)
 
-  @test_util.run_deprecated_v1
   def testSameOrderForSameSeeds(self):
     output1 = self._gen_outputs(lambda: self._build_ds(10), 100)
     output2 = self._gen_outputs(lambda: self._build_ds(10), 100)
     self.assertEqual(output1, output2)
 
-  @test_util.run_deprecated_v1
   def testDifferentOrderForDifferentSeeds(self):
     output1 = self._gen_outputs(lambda: self._build_ds(10), 100)
     output2 = self._gen_outputs(lambda: self._build_ds(20), 100)
     self.assertNotEqual(output1, output2)
     self.assertEqual(sorted(output1), sorted(output2))
 
-  @test_util.run_deprecated_v1
   def testCountNone(self):
     output1 = self._gen_outputs(
         lambda: self._build_ds(10, count=None), 100, verify_exhausted=False)
@@ -85,7 +79,6 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     self.assertNotEqual(output1, output2)
     self.assertEqual(sorted(output1), sorted(output2))
 
-  @test_util.run_deprecated_v1
   def testCountMinusOne(self):
     output1 = self._gen_outputs(
         lambda: self._build_ds(10, count=-1), 100, verify_exhausted=False)
@@ -110,12 +103,10 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
                         100)
 
   def testLargeBufferSize(self):
-    with ops.Graph().as_default() as g:
-      ds = dataset_ops.Dataset.range(20).apply(
-          shuffle_ops.shuffle_and_repeat(buffer_size=21))
-      get_next_op = ds.make_one_shot_iterator().get_next()
-      with self.session(graph=g) as sess:
-        self.evaluate(get_next_op)
+    ds = dataset_ops.Dataset.range(20).apply(
+        shuffle_ops.shuffle_and_repeat(buffer_size=21))
+    get_next = self.getNext(ds)
+    self.evaluate(get_next())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/sleep_test.py b/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
index 46b22f80b6..a4fe847f04 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sleep_test.py
@@ -29,25 +29,21 @@ from tensorflow.python.platform import test
 _NUMPY_RANDOM_SEED = 42
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class SleepTest(test_base.DatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testSleep(self):
     sleep_microseconds = 100
     dataset = dataset_ops.Dataset.range(10).apply(
         sleep.sleep(sleep_microseconds))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      start_time = time.time()
-      for i in range(10):
-        self.assertEqual(i, self.evaluate(next_element))
-      end_time = time.time()
-      self.assertGreater(end_time - start_time, (10 * sleep_microseconds) / 1e6)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    next_element = self.getNext(dataset)
+    start_time = time.time()
+    for i in range(10):
+      self.assertEqual(i, self.evaluate(next_element()))
+    end_time = time.time()
+    self.assertGreater(end_time - start_time, (10 * sleep_microseconds) / 1e6)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index f19b08a2dd..97d386157a 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -36,6 +36,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 def function_set_stats_aggregator(dataset,
                                   aggregator,
                                   prefix="",
@@ -60,134 +61,110 @@ def function_apply_options(dataset, aggregator, prefix="", counter_prefix=""):
 )
 class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testBytesProduced(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).map(
         lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
             stats_ops.bytes_produced_stats("bytes_produced"))
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
+    next_element = self.getNext(dataset, requires_initialization=True)
     summary_t = aggregator.get_summary()
 
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      expected_sum = 0.0
-      for i in range(100):
-        self.assertAllEqual(
-            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
-        summary_str = self.evaluate(summary_t)
-        self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1))
-        expected_sum += i * 8.0
-        self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
-      summary_str = self.evaluate(summary_t)
-      self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
+    expected_sum = 0.0
+    for i in range(100):
+      self.assertAllEqual(
+          np.array([i] * i, dtype=np.int64), self.evaluate(next_element()))
+      summary_str = self.evaluate(aggregator.get_summary())
+      self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1))
+      expected_sum += i * 8.0
       self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    # TODO(shivaniagrawal): ntentional breaking case
+    summary_str = self.evaluate(summary_t)
+    self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
+    self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
 
-  @test_util.run_deprecated_v1
   def testLatencyStats(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency"))
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
+    next_element = self.getNext(dataset, requires_initialization=True)
 
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      for i in range(100):
-        self.assertEqual(i, self.evaluate(next_element))
-        self._assertSummaryHasCount(
-            self.evaluate(summary_t), "record_latency", float(i + 1))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for i in range(100):
+      self.assertEqual(i, self.evaluate(next_element()))
       self._assertSummaryHasCount(
-          self.evaluate(summary_t), "record_latency", 100.0)
+          self.evaluate(aggregator.get_summary()), "record_latency",
+          float(i + 1))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "record_latency", 100.0)
 
-  @test_util.run_deprecated_v1
   def testPrefetchBufferUtilization(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).map(
         lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(-1)
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
-
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      for i in range(100):
-        self.assertAllEqual(
-            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
-        summary_str = self.evaluate(summary_t)
-        self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
-                                    float(i + 1))
-        self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity")
-        self._assertSummaryContains(summary_str, "Prefetch::buffer_size")
-        self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization",
-                                    0, 1)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
-      summary_str = self.evaluate(summary_t)
+    next_element = self.getNext(dataset, requires_initialization=True)
+    for i in range(100):
+      self.assertAllEqual(
+          np.array([i] * i, dtype=np.int64), self.evaluate(next_element()))
+      summary_str = self.evaluate(aggregator.get_summary())
       self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
-                                  100)
+                                  float(i + 1))
+      self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity")
+      self._assertSummaryContains(summary_str, "Prefetch::buffer_size")
+      self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization",
+                                  0, 1)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    summary_str = self.evaluate(aggregator.get_summary())
+    self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization",
+                                100)
 
-  @test_util.run_deprecated_v1
   def testPrefetchBufferScalars(self, dataset_transformation):
-    def map_fn(x):
-      return array_ops.tile([x], ops.convert_to_tensor([x]))
     aggregator = stats_aggregator.StatsAggregator()
-    dataset = dataset_ops.Dataset.range(10).map(map_fn).prefetch(1)
+    dataset = dataset_ops.Dataset.range(10).map(
+        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(1)
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
+    next_element = self.getNext(dataset, requires_initialization=True)
+
+    for i in range(10):
+      self.assertAllEqual(
+          np.array([i] * i, dtype=np.int64), self.evaluate(next_element()))
+      summary_str = self.evaluate(aggregator.get_summary())
+      self._assertSummaryHasScalarValue(summary_str,
+                                        "Prefetch::buffer_capacity", 1)
+      self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_size", 1)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      for i in range(10):
-        self.assertAllEqual(
-            np.array([i] * i, dtype=np.int64), self.evaluate(next_element))
-        summary_str = self.evaluate(summary_t)
-        self._assertSummaryHasScalarValue(summary_str,
-                                          "Prefetch::buffer_capacity", 1)
-        self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_size",
-                                          1)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
-
-  @test_util.run_deprecated_v1
   def testFilteredElementsStats(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(101).filter(
         lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
+    next_element = self.getNext(dataset, requires_initialization=True)
 
-    with self.test_session() as sess:
-      self.evaluate(iterator.initializer)
-      for i in range(34):
-        self.assertEqual(i * 3, self.evaluate(next_element))
-        if i is not 0:
-          self._assertSummaryHasScalarValue(
-              self.evaluate(summary_t), "Filter::dropped_elements",
-              float(i * 2))
-        self._assertSummaryHasScalarValue(
-            self.evaluate(summary_t), "Filter::filtered_elements", float(i + 1))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
-      self._assertSummaryHasScalarValue(
-          self.evaluate(summary_t), "Filter::dropped_elements", 67.0)
+    for i in range(34):
+      self.assertEqual(i * 3, self.evaluate(next_element()))
+      summary_str = self.evaluate(aggregator.get_summary())
+      if i is not 0:
+        self._assertSummaryHasScalarValue(summary_str,
+                                          "Filter::dropped_elements",
+                                          float(i * 2))
       self._assertSummaryHasScalarValue(
-          self.evaluate(summary_t), "Filter::filtered_elements", 34.0)
+          summary_str, "Filter::filtered_elements", float(i + 1))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    summary_str = self.evaluate(aggregator.get_summary())
+    self._assertSummaryHasScalarValue(summary_str, "Filter::dropped_elements",
+                                      67.0)
+    self._assertSummaryHasScalarValue(summary_str, "Filter::filtered_elements",
+                                      34.0)
 
-  @test_util.run_deprecated_v1
   def testMapBufferUtilization(self, dataset_transformation):
 
     def dataset_fn():
@@ -202,7 +179,6 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         dataset_transformation,
         function_processing_time=True)
 
-  @test_util.run_deprecated_v1
   def testMapAutoTuneBufferUtilization(self, dataset_transformation):
 
     def dataset_fn():
@@ -220,7 +196,6 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         dataset_transformation,
         function_processing_time=True)
 
-  @test_util.run_deprecated_v1
   def testInterleaveAutoTuneBufferUtilization(self, dataset_transformation):
 
     def dataset_fn():
@@ -237,7 +212,6 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     self._testParallelCallsStats(dataset_fn, "ParallelInterleaveV2", 10,
                                  dataset_transformation)
 
-  @test_util.run_deprecated_v1
   def testMapAndBatchAutoTuneBufferUtilization(self, dataset_transformation):
 
     def dataset_fn():
@@ -259,114 +233,98 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         check_elements=False,
         function_processing_time=True)
 
-  @test_util.run_deprecated_v1
   def testReinitialize(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency"))
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
 
-    with self.cached_session() as sess:
-      for j in range(5):
-        self.evaluate(iterator.initializer)
-        for i in range(100):
-          self.assertEqual(i, self.evaluate(next_element))
-          self._assertSummaryHasCount(
-              self.evaluate(summary_t), "record_latency",
-              float((j * 100) + i + 1))
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(next_element)
+    for j in range(5):
+      next_element = self.getNext(dataset, requires_initialization=True)
+      for i in range(100):
+        self.assertEqual(i, self.evaluate(next_element()))
         self._assertSummaryHasCount(
-            self.evaluate(summary_t), "record_latency", (j + 1) * 100.0)
+            self.evaluate(aggregator.get_summary()), "record_latency",
+            float((j * 100) + i + 1))
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(next_element())
+      self._assertSummaryHasCount(
+          self.evaluate(aggregator.get_summary()), "record_latency",
+          (j + 1) * 100.0)
 
-  @test_util.run_deprecated_v1
   def testNoAggregatorRegistered(self, dataset_transformation):
     dataset = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency"))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
 
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      for i in range(100):
-        self.assertEqual(i, self.evaluate(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    next_element = self.getNext(dataset, requires_initialization=True)
+
+    for i in range(100):
+      self.assertEqual(i, self.evaluate(next_element()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
-  @test_util.run_deprecated_v1
   def testMultipleTags(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency")).apply(
             stats_ops.latency_stats("record_latency_2"))
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
 
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      for i in range(100):
-        self.assertEqual(i, self.evaluate(next_element))
-        self._assertSummaryHasCount(
-            self.evaluate(summary_t), "record_latency", float(i + 1))
-        self._assertSummaryHasCount(
-            self.evaluate(summary_t), "record_latency_2", float(i + 1))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    next_element = self.getNext(dataset, requires_initialization=True)
+
+    for i in range(100):
+      self.assertEqual(i, self.evaluate(next_element()))
       self._assertSummaryHasCount(
-          self.evaluate(summary_t), "record_latency", 100.0)
+          self.evaluate(aggregator.get_summary()), "record_latency",
+          float(i + 1))
       self._assertSummaryHasCount(
-          self.evaluate(summary_t), "record_latency_2", 100.0)
+          self.evaluate(aggregator.get_summary()), "record_latency_2",
+          float(i + 1))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "record_latency", 100.0)
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "record_latency_2", 100.0)
 
-  @test_util.run_deprecated_v1
   def testRepeatedTags(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency")).apply(
             stats_ops.latency_stats("record_latency"))
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
+    next_element = self.getNext(dataset, requires_initialization=True)
 
-    with self.cached_session() as sess:
-      self.evaluate(iterator.initializer)
-      for i in range(100):
-        self.assertEqual(i, self.evaluate(next_element))
-        self._assertSummaryHasCount(
-            self.evaluate(summary_t), "record_latency", float(2 * (i + 1)))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for i in range(100):
+      self.assertEqual(i, self.evaluate(next_element()))
       self._assertSummaryHasCount(
-          self.evaluate(summary_t), "record_latency", 200.0)
+          self.evaluate(aggregator.get_summary()), "record_latency",
+          float(2 * (i + 1)))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "record_latency", 200.0)
 
-  @test_util.run_deprecated_v1
   def testMultipleIteratorsSameAggregator(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency"))
     dataset = dataset_transformation(dataset, aggregator)
-    iterator_0 = dataset_ops.make_initializable_iterator(dataset)
-    iterator_1 = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator_0.get_next() + iterator_1.get_next()
-    summary_t = aggregator.get_summary()
+    next_element1 = self.getNext(dataset, requires_initialization=True)
+    next_element2 = self.getNext(dataset, requires_initialization=True)
 
-    with self.cached_session() as sess:
-      self.evaluate([iterator_0.initializer, iterator_1.initializer])
-      for i in range(100):
-        self.assertEqual(i * 2, self.evaluate(next_element))
-        self._assertSummaryHasCount(
-            self.evaluate(summary_t), "record_latency", float(2 * (i + 1)))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for i in range(100):
+      self.assertEqual(i * 2, self.evaluate(next_element1() + next_element2()))
       self._assertSummaryHasCount(
-          self.evaluate(summary_t), "record_latency", 200.0)
+          self.evaluate(aggregator.get_summary()), "record_latency",
+          float(2 * (i + 1)))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element1())
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element2())
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "record_latency", 200.0)
 
-  @test_util.run_deprecated_v1
   def testMultipleDatasetWithPrefixes(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -375,25 +333,27 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     dataset2 = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency"))
     dataset2 = dataset_transformation(dataset2, aggregator, prefix="dataset2")
-    iterator_0 = dataset_ops.make_initializable_iterator(dataset)
-    iterator_1 = dataset_ops.make_initializable_iterator(dataset2)
-    next_element = iterator_0.get_next() + iterator_1.get_next()
-    summary_t = aggregator.get_summary()
+    next_element1 = self.getNext(dataset, requires_initialization=True)
+    next_element2 = self.getNext(dataset2, requires_initialization=True)
 
-    with self.test_session() as sess:
-      self.evaluate([iterator_0.initializer, iterator_1.initializer])
-      for i in range(100):
-        self.assertEqual(i * 2, self.evaluate(next_element))
-        self._assertSummaryHasCount(
-            self.evaluate(summary_t), "dataset1_record_latency", float(i + 1))
-        self._assertSummaryHasCount(
-            self.evaluate(summary_t), "dataset2_record_latency", float(i + 1))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for i in range(100):
+      self.assertEqual(i * 2, self.evaluate(next_element1() + next_element2()))
       self._assertSummaryHasCount(
-          self.evaluate(summary_t), "dataset1_record_latency", 100.0)
+          self.evaluate(aggregator.get_summary()), "dataset1_record_latency",
+          float(i + 1))
       self._assertSummaryHasCount(
-          self.evaluate(summary_t), "dataset2_record_latency", 100.0)
+          self.evaluate(aggregator.get_summary()), "dataset2_record_latency",
+          float(i + 1))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element1())
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element2())
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "dataset1_record_latency",
+        100.0)
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "dataset2_record_latency",
+        100.0)
 
 
 @parameterized.named_parameters(
@@ -407,7 +367,6 @@ class FeatureStatsDatasetTest(
     stats_dataset_test_base.StatsDatasetTestBase,
     reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testFeaturesStats(self, dataset_transformation):
     num_epochs = 5
     total_records = num_epochs * self._num_records
@@ -436,27 +395,26 @@ class FeatureStatsDatasetTest(
 
     dataset = dataset_transformation(
         dataset_fn(), aggregator, prefix="record_stats")
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
-
-    with self.test_session() as sess:
-      self.evaluate(iterator.initializer)
-      for _ in range(num_output):
-        self.evaluate(next_element)
 
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
-      self._assertSummaryHasCount(
-          self.evaluate(summary_t), "record_stats_features", total_records)
-      self._assertSummaryHasCount(
-          self.evaluate(summary_t), "record_stats_feature-values",
-          total_records)
-      self._assertSummaryHasSum(
-          self.evaluate(summary_t), "record_stats_features", total_records * 4)
-      self._assertSummaryHasSum(
-          self.evaluate(summary_t), "record_stats_feature-values",
-          self._sum_keywords(1) * num_epochs + 3 * total_records)
+    next_element = self.getNext(dataset, requires_initialization=True)
+
+    for _ in range(num_output):
+      self.evaluate(next_element())
+
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "record_stats_features",
+        total_records)
+    self._assertSummaryHasCount(
+        self.evaluate(aggregator.get_summary()), "record_stats_feature-values",
+        total_records)
+    self._assertSummaryHasSum(
+        self.evaluate(aggregator.get_summary()), "record_stats_features",
+        total_records * 4)
+    self._assertSummaryHasSum(
+        self.evaluate(aggregator.get_summary()), "record_stats_feature-values",
+        self._sum_keywords(1) * num_epochs + 3 * total_records)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
index ab1d1c3028..b80aab994e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_test_base.py
@@ -22,7 +22,6 @@ import numpy as np
 from tensorflow.core.framework import summary_pb2
 from tensorflow.python.data.experimental.ops import stats_aggregator
 from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 
 
@@ -94,27 +93,23 @@ class StatsDatasetTestBase(test_base.DatasetTestBase):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_fn()
     dataset = dataset_transformation(dataset, aggregator)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
-    summary_t = aggregator.get_summary()
+    next_element = self.getNext(dataset, requires_initialization=True)
 
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer)
-      for i in range(num_output):
-        next_ = sess.run(next_element)
-        if check_elements:
-          self.assertAllEqual(np.array([i] * i, dtype=np.int64), next_)
-        summary_str = sess.run(summary_t)
-        if function_processing_time:
-          self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
-              summary_str, "::execution_time", float(i + 1))
-        self._assertSummaryContains(summary_str,
-                                    dataset_name + "::num_parallel_calls")
-        self._assertSummaryContains(summary_str,
-                                    dataset_name + "::active_parallel_calls")
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(next_element)
+    for i in range(num_output):
+      next_ = self.evaluate(next_element())
+      if check_elements:
+        self.assertAllEqual(np.array([i] * i, dtype=np.int64), next_)
+      summary_str = self.evaluate(aggregator.get_summary())
       if function_processing_time:
-        summary_str = sess.run(summary_t)
         self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
-            summary_str, "::execution_time", float(num_output))
+            summary_str, "::execution_time", float(i + 1))
+      self._assertSummaryContains(summary_str,
+                                  dataset_name + "::num_parallel_calls")
+      self._assertSummaryContains(summary_str,
+                                  dataset_name + "::active_parallel_calls")
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
+    if function_processing_time:
+      summary_str = self.evaluate(aggregator.get_summary())
+      self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
+          summary_str, "::execution_time", float(num_output))
diff --git a/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
index 8fd0ad50c4..14a4241ec2 100644
--- a/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
@@ -23,26 +23,24 @@ from tensorflow.python.data.experimental.ops import writers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
-from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.lib.io import tf_record
-from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class TFRecordWriterTest(test_base.DatasetTestBase):
 
   def setUp(self):
     super(TFRecordWriterTest, self).setUp()
     self._num_records = 7
-    self.filename = array_ops.placeholder(dtypes.string, shape=[])
-    self.compression_type = array_ops.placeholder_with_default("", shape=[])
 
-    input_dataset = readers.TFRecordDataset([self.filename],
-                                            self.compression_type)
-    self.writer = writers.TFRecordWriter(
-        self._outputFilename(), self.compression_type).write(input_dataset)
+  def writer_fn(self, filename, compression_type=""):
+    input_dataset = readers.TFRecordDataset([filename], compression_type)
+    return writers.TFRecordWriter(self._outputFilename(),
+                                  compression_type).write(input_dataset)
 
   def _record(self, i):
     return compat.as_bytes("Record %d" % (i))
@@ -62,56 +60,39 @@ class TFRecordWriterTest(test_base.DatasetTestBase):
     return os.path.join(self.get_temp_dir(), "tf_record.out.txt")
 
   def testWrite(self):
-    with self.cached_session() as sess:
-      sess.run(
-          self.writer, feed_dict={
-              self.filename: self._createFile(),
-          })
+    self.evaluate(self.writer_fn(self._createFile()))
     for i, r in enumerate(tf_record.tf_record_iterator(self._outputFilename())):
       self.assertAllEqual(self._record(i), r)
 
   def testWriteZLIB(self):
     options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.ZLIB)
-    with self.cached_session() as sess:
-      sess.run(
-          self.writer,
-          feed_dict={
-              self.filename: self._createFile(options),
-              self.compression_type: "ZLIB",
-          })
+    self.evaluate(
+        self.writer_fn(self._createFile(options), compression_type="ZLIB"))
     for i, r in enumerate(
         tf_record.tf_record_iterator(self._outputFilename(), options=options)):
       self.assertAllEqual(self._record(i), r)
 
   def testWriteGZIP(self):
     options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.GZIP)
-    with self.cached_session() as sess:
-      sess.run(
-          self.writer,
-          feed_dict={
-              self.filename: self._createFile(options),
-              self.compression_type: "GZIP",
-          })
+    self.evaluate(
+        self.writer_fn(self._createFile(options), compression_type="GZIP"))
     for i, r in enumerate(
         tf_record.tf_record_iterator(self._outputFilename(), options=options)):
       self.assertAllEqual(self._record(i), r)
 
   def testFailDataset(self):
     with self.assertRaises(TypeError):
-      writers.TFRecordWriter(self._outputFilename(),
-                             self.compression_type).write("whoops")
+      writers.TFRecordWriter(self._outputFilename(), "").write("whoops")
 
   def testFailDType(self):
     input_dataset = dataset_ops.Dataset.from_tensors(10)
     with self.assertRaises(TypeError):
-      writers.TFRecordWriter(self._outputFilename(),
-                             self.compression_type).write(input_dataset)
+      writers.TFRecordWriter(self._outputFilename(), "").write(input_dataset)
 
   def testFailShape(self):
     input_dataset = dataset_ops.Dataset.from_tensors([["hello"], ["world"]])
     with self.assertRaises(TypeError):
-      writers.TFRecordWriter(self._outputFilename(),
-                             self.compression_type).write(input_dataset)
+      writers.TFRecordWriter(self._outputFilename(), "").write(input_dataset)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
index cef5e8d269..7f30cf568e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
@@ -36,24 +36,14 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @test_util.run_deprecated_v1
   def testUnbatchWithUnknownRankInput(self):
-    placeholder = array_ops.placeholder(dtypes.int32)
-    dataset = dataset_ops.Dataset.from_tensors(placeholder).apply(
-        batching.unbatch())
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_elem = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer, feed_dict={placeholder: [0, 1, 2, 3]})
-      for i in range(4):
-        self.assertEqual(i, self.evaluate(next_elem))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_elem)
+    dataset = dataset_ops.Dataset.from_tensors([0, 1, 2,
+                                                3]).apply(batching.unbatch())
+    self.assertDatasetProduces(dataset, range(4))
 
-  @test_util.run_deprecated_v1
   def testUnbatchScalarDataset(self):
     data = tuple([math_ops.range(10) for _ in range(3)])
     data = dataset_ops.Dataset.from_tensor_slices(data)
@@ -63,17 +53,8 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     data = data.apply(batching.unbatch())
     self.assertEqual(expected_types, data.output_types)
 
-    iterator = dataset_ops.make_one_shot_iterator(data)
-    op = iterator.get_next()
+    self.assertDatasetProduces(data, [(i,) * 3 for i in range(10)])
 
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual((i,) * 3, self.evaluate(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(op)
-
-  @test_util.run_deprecated_v1
   def testUnbatchDatasetWithStrings(self):
     data = tuple([math_ops.range(10) for _ in range(3)])
     data = dataset_ops.Dataset.from_tensor_slices(data)
@@ -84,18 +65,12 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     data = data.apply(batching.unbatch())
     self.assertEqual(expected_types, data.output_types)
 
-    iterator = dataset_ops.make_one_shot_iterator(data)
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual((i, compat.as_bytes(str(i)), i), self.evaluate(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(op)
+    self.assertDatasetProduces(
+        data, [(i, compat.as_bytes(str(i)), i) for i in range(10)])
 
+  # TODO(b/119837791): Add eager coverage.
   @test_util.run_deprecated_v1
-  def testUnbatchDatasetWithSparseTensor(self):
+  def testSkipEagerUnbatchDatasetWithSparseTensor(self):
     st = sparse_tensor.SparseTensorValue(
         indices=[[i, i] for i in range(10)],
         values=list(range(10)),
@@ -107,17 +82,17 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     iterator = dataset_ops.make_one_shot_iterator(data)
     next_element = iterator.get_next()
 
-    with self.cached_session() as sess:
-      for i in range(10):
-        st_row = self.evaluate(next_element)
-        self.assertEqual([i], st_row.indices)
-        self.assertEqual([i], st_row.values)
-        self.assertEqual([10], st_row.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for i in range(10):
+      st_row = self.evaluate(next_element)
+      self.assertEqual([i], st_row.indices)
+      self.assertEqual([i], st_row.values)
+      self.assertEqual([10], st_row.dense_shape)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element)
 
+  # TODO(b/119837791): Add eager coverage.
   @test_util.run_deprecated_v1
-  def testUnbatchDatasetWithDenseAndSparseTensor(self):
+  def testSkipEagerUnbatchDatasetWithDenseAndSparseTensor(self):
     st = sparse_tensor.SparseTensorValue(
         indices=[[i, i] for i in range(10)],
         values=list(range(10)),
@@ -126,20 +101,17 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     data = data.apply(batching.unbatch())
     data = data.batch(5)
     data = data.apply(batching.unbatch())
-    iterator = dataset_ops.make_one_shot_iterator(data)
-    next_element = iterator.get_next()
+    next_element = self.getNext(data)
 
-    with self.cached_session() as sess:
-      for i in range(10):
-        dense_elem, st_row = self.evaluate(next_element)
-        self.assertEqual(i, dense_elem)
-        self.assertEqual([i], st_row.indices)
-        self.assertEqual([i], st_row.values)
-        self.assertEqual([10], st_row.dense_shape)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    for i in range(10):
+      dense_elem, st_row = self.evaluate(next_element())
+      self.assertEqual(i, dense_elem)
+      self.assertEqual([i], st_row.indices)
+      self.assertEqual([i], st_row.values)
+      self.assertEqual([10], st_row.dense_shape)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
-  @test_util.run_deprecated_v1
   def testUnbatchSingleElementTupleDataset(self):
     data = tuple([(math_ops.range(10),) for _ in range(3)])
     data = dataset_ops.Dataset.from_tensor_slices(data)
@@ -149,17 +121,8 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     data = data.apply(batching.unbatch())
     self.assertEqual(expected_types, data.output_types)
 
-    iterator = dataset_ops.make_one_shot_iterator(data)
-    op = iterator.get_next()
-
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(((i,),) * 3, self.evaluate(op))
+    self.assertDatasetProduces(data, [((i,),) * 3 for i in range(10)])
 
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(op)
-
-  @test_util.run_deprecated_v1
   def testUnbatchMultiElementTupleDataset(self):
     data = tuple([(math_ops.range(10 * i, 10 * i + 10),
                    array_ops.fill([10], "hi")) for i in range(3)])
@@ -170,29 +133,16 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     data = data.apply(batching.unbatch())
     self.assertAllEqual(expected_types, data.output_types)
 
-    iterator = dataset_ops.make_one_shot_iterator(data)
-    op = iterator.get_next()
+    self.assertDatasetProduces(
+        data,
+        [((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")) for i in range(10)])
 
-    with self.cached_session() as sess:
-      for i in range(10):
-        self.assertEqual(((i, b"hi"), (10 + i, b"hi"), (20 + i, b"hi")),
-                         self.evaluate(op))
-
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(op)
-
-  @test_util.run_deprecated_v1
   def testUnbatchEmpty(self):
     data = dataset_ops.Dataset.from_tensors(
         (constant_op.constant([]), constant_op.constant([], shape=[0, 4]),
          constant_op.constant([], shape=[0, 4, 0])))
     data = data.apply(batching.unbatch())
-    iterator = dataset_ops.make_one_shot_iterator(data)
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    self.assertDatasetProduces(data, [])
 
   def testUnbatchStaticShapeMismatch(self):
     data = dataset_ops.Dataset.from_tensors((np.arange(7), np.arange(8),
@@ -200,8 +150,9 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(ValueError):
       data.apply(batching.unbatch())
 
+  # TODO(b/119837791): eager mode doesnt capture raised error, debug.
   @test_util.run_deprecated_v1
-  def testUnbatchDynamicShapeMismatch(self):
+  def testSkipEagerUnbatchDynamicShapeMismatch(self):
     ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
     ph2 = array_ops.placeholder(dtypes.int32, shape=None)
     data = dataset_ops.Dataset.from_tensors((ph1, ph2))
diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
index 1d9941d7f4..42d76a2eb3 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
@@ -21,12 +21,12 @@ from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class UniqueTest(test_base.DatasetTestBase):
 
   def _testSimpleHelper(self, dtype, test_cases):
@@ -44,19 +44,13 @@ class UniqueTest(test_base.DatasetTestBase):
     current_test_case = []
     dataset = dataset_ops.Dataset.from_generator(lambda: current_test_case,
                                                  dtype).apply(unique.unique())
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
 
-    with self.cached_session() as sess:
-      for test_case, expected in test_cases:
-        current_test_case = test_case
-        self.evaluate(iterator.initializer)
-        for element in expected:
-          if dtype == dtypes.string:
-            element = compat.as_bytes(element)
-          self.assertAllEqual(element, self.evaluate(next_element))
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(next_element)
+    for test_case, expected in test_cases:
+      current_test_case = test_case
+      self.assertDatasetProduces(dataset, [
+          compat.as_bytes(element) if dtype == dtypes.string else element
+          for element in expected
+      ])
 
   @test_util.run_deprecated_v1
   def testSimpleInt(self):
diff --git a/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
index 9c734b65e0..c47595c16c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
@@ -20,11 +20,13 @@ from __future__ import print_function
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class WrapDatasetVariantTest(test_base.DatasetTestBase):
 
   def testBasic(self):
@@ -36,15 +38,12 @@ class WrapDatasetVariantTest(test_base.DatasetTestBase):
 
     variant_ds = dataset_ops._VariantDataset(unwrapped_variant,
                                              ds._element_structure)
-    iterator = dataset_ops.make_initializable_iterator(variant_ds)
-    get_next = iterator.get_next()
-
-    with self.cached_session():
-      self.evaluate(iterator.initializer)
-      for i in range(100):
-        self.assertEqual(i, self.evaluate(get_next))
+    get_next = self.getNext(variant_ds, requires_initialization=True)
+    for i in range(100):
+      self.assertEqual(i, self.evaluate(get_next()))
 
-  def testGPU(self):
+  # TODO(b/119837791): add eager coverage when supported.
+  def testSkipEagerGPU(self):
     ds = dataset_ops.Dataset.range(100)
     ds_variant = ds._as_variant_tensor()  # pylint: disable=protected-access
     wrapped_variant = gen_dataset_ops.wrap_dataset_variant(ds_variant)
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index 85f6c9de23..8793fd31bd 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -88,6 +88,7 @@ class DatasetTestBase(test.TestCase):
   def assertDatasetProduces(self,
                             dataset,
                             expected_output=None,
+                            expected_shapes=None,
                             expected_error=None,
                             requires_initialization=False,
                             num_test_iterations=1,
@@ -98,6 +99,8 @@ class DatasetTestBase(test.TestCase):
       dataset: A dataset to check for the expected output / error.
       expected_output: A list of elements that the dataset is expected to
         produce.
+      expected_shapes: A list of TensorShapes which is expected to match
+        output_shapes of dataset.
       expected_error: A tuple `(type, predicate)` identifying the expected error
         `dataset` should raise. The `type` should match the expected exception
         type, while `predicate` should either be 1) a unary function that inputs
@@ -126,6 +129,8 @@ class DatasetTestBase(test.TestCase):
             dataset, requires_initialization=requires_initialization)
         self.evaluate(get_next())
       return
+    if expected_shapes:
+      self.assertEqual(expected_shapes, dataset.output_shapes)
     self.assertGreater(num_test_iterations, 0)
     for _ in range(num_test_iterations):
       get_next = self.getNext(
-- 
GitLab


From ba40882c2fa10338f39a04a21e1086d837acf1f0 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 13 Dec 2018 11:25:43 -0800
Subject: [PATCH 519/873] [TF:XLA] Bump open source llvm revision to r348935

PiperOrigin-RevId: 225404938
---
 tensorflow/workspace.bzl                  | 8 ++++----
 third_party/llvm/llvm.autogenerated.BUILD | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 4eca0bf3cc..b9ae329e9f 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -496,11 +496,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
-        sha256 = "34170a4aa07e434dd537d98a705dcf1b3901f73820fe1d6b9370e8c1c94e9157",
-        strip_prefix = "llvm-0487bd8f42c8b38166ff825d56014d0ff49db604",
+        sha256 = "55769c91b9f5b5255d58a1ecd88e690a4e192dc8cbdf8f984596649abe3b5433",
+        strip_prefix = "llvm-2ba3294845dedcbb27dc49287bfbcdb49aa1e6b7",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/0487bd8f42c8b38166ff825d56014d0ff49db604.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/0487bd8f42c8b38166ff825d56014d0ff49db604.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/2ba3294845dedcbb27dc49287bfbcdb49aa1e6b7.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/2ba3294845dedcbb27dc49287bfbcdb49aa1e6b7.tar.gz",
         ],
     )
 
diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD
index eb468aa65f..6599b9e91b 100644
--- a/third_party/llvm/llvm.autogenerated.BUILD
+++ b/third_party/llvm/llvm.autogenerated.BUILD
@@ -2241,7 +2241,6 @@ cc_library(
     deps = [
         ":code_gen",
         ":config",
-        ":core",
         ":support",
     ],
 )
-- 
GitLab


From 7b4bfd908f65bb6ec77c65360bd920ba4d4505ec Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Thu, 13 Dec 2018 11:25:58 -0800
Subject: [PATCH 520/873] Add keras parameterization to training generator
 tests.

PiperOrigin-RevId: 225404979
---
 .../keras/engine/training_generator_test.py   | 103 ++++++++++--------
 1 file changed, 55 insertions(+), 48 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_generator_test.py b/tensorflow/python/keras/engine/training_generator_test.py
index 8941428e43..956ca2fe7c 100644
--- a/tensorflow/python/keras/engine/training_generator_test.py
+++ b/tensorflow/python/keras/engine/training_generator_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import training_generator
@@ -60,20 +61,17 @@ def custom_generator(mode=2):
       yield x, y, w
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
+class TestGeneratorMethods(keras_parameterized.TestCase):
 
   @unittest.skipIf(
       os.name == 'nt',
       'use_multiprocessing=True does not work on windows properly.')
-  @parameterized.parameters('sequential', 'functional')
-  def test_fit_generator_method(self, model_type):
-    if model_type == 'sequential':
-      model = testing_utils.get_small_sequential_mlp(
-          num_hidden=3, num_classes=4, input_dim=2)
-    else:
-      model = testing_utils.get_small_functional_mlp(
-          num_hidden=3, num_classes=4, input_dim=2)
+  # TODO(b/120940700): Bug with subclassed model inputs.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_all_keras_modes
+  def test_fit_generator_method(self):
+    model = testing_utils.get_small_mlp(
+        num_hidden=3, num_classes=4, input_dim=2)
     model.compile(
         loss='mse',
         optimizer='sgd',
@@ -109,19 +107,17 @@ class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
   @unittest.skipIf(
       os.name == 'nt',
       'use_multiprocessing=True does not work on windows properly.')
-  @parameterized.parameters('sequential', 'functional')
-  def test_evaluate_generator_method(self, model_type):
-    if model_type == 'sequential':
-      model = testing_utils.get_small_sequential_mlp(
-          num_hidden=3, num_classes=4, input_dim=2)
-    else:
-      model = testing_utils.get_small_functional_mlp(
-          num_hidden=3, num_classes=4, input_dim=2)
+  # TODO(b/120940700): Bug with subclassed model inputs.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_all_keras_modes
+  def test_evaluate_generator_method(self):
+    model = testing_utils.get_small_mlp(
+        num_hidden=3, num_classes=4, input_dim=2)
     model.compile(
         loss='mse',
         optimizer='sgd',
-        metrics=['mae', metrics_module.CategoricalAccuracy()])
-    model.summary()
+        metrics=['mae', metrics_module.CategoricalAccuracy()],
+        run_eagerly=testing_utils.should_run_eagerly())
 
     model.evaluate_generator(custom_generator(),
                              steps=5,
@@ -142,18 +138,16 @@ class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
   @unittest.skipIf(
       os.name == 'nt',
       'use_multiprocessing=True does not work on windows properly.')
-  @parameterized.parameters('sequential', 'functional')
-  def test_predict_generator_method(self, model_type):
-    if model_type == 'sequential':
-      model = testing_utils.get_small_sequential_mlp(
-          num_hidden=3, num_classes=4, input_dim=2)
-    else:
-      model = testing_utils.get_small_functional_mlp(
-          num_hidden=3, num_classes=4, input_dim=2)
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
+  def test_predict_generator_method(self):
+    model = testing_utils.get_small_mlp(
+        num_hidden=3, num_classes=4, input_dim=2)
     model.compile(
         loss='mse',
         optimizer='sgd',
-        metrics=['mae', metrics_module.CategoricalAccuracy()])
+        metrics=['mae', metrics_module.CategoricalAccuracy()],
+        run_eagerly=testing_utils.should_run_eagerly())
 
     model.predict_generator(custom_generator(),
                             steps=5,
@@ -183,13 +177,17 @@ class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
                             max_queue_size=10,
                             workers=0)
 
+  # TODO(b/120940700): Bug with subclassed model inputs.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_all_keras_modes
   def test_generator_methods_with_sample_weights(self):
-    model = keras.models.Sequential()
-    model.add(keras.layers.Dense(4, input_shape=(2,)))
+    model = testing_utils.get_small_mlp(
+        num_hidden=3, num_classes=4, input_dim=2)
     model.compile(
         loss='mse',
         optimizer='sgd',
-        metrics=['mae', metrics_module.CategoricalAccuracy()])
+        metrics=['mae', metrics_module.CategoricalAccuracy()],
+        run_eagerly=testing_utils.should_run_eagerly())
 
     model.fit_generator(custom_generator(mode=3),
                         steps_per_epoch=5,
@@ -214,15 +212,19 @@ class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
                              max_queue_size=10,
                              use_multiprocessing=False)
 
+  # TODO(b/120940700): Bug with subclassed model inputs.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_all_keras_modes
   def test_generator_methods_invalid_use_case(self):
 
     def invalid_generator():
       while 1:
         yield 0
 
-    model = keras.models.Sequential()
-    model.add(keras.layers.Dense(4, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='sgd')
+    model = testing_utils.get_small_mlp(
+        num_hidden=3, num_classes=4, input_dim=2)
+    model.compile(loss='mse', optimizer='sgd',
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     with self.assertRaises(ValueError):
       model.fit_generator(invalid_generator(),
@@ -251,6 +253,9 @@ class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
                                max_queue_size=10,
                                use_multiprocessing=False)
 
+  # TODO(b/120940700): Bug with subclassed model inputs.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_all_keras_modes
   def test_generator_input_to_fit_eval_predict(self):
     val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
 
@@ -258,12 +263,11 @@ class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
       while True:
         yield np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
 
-    inputs = keras.layers.Input(shape=(10,))
-    x = keras.layers.Dense(10, activation='relu')(inputs)
-    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
-    model = keras.Model(inputs, outputs)
+    model = testing_utils.get_small_mlp(
+        num_hidden=10, num_classes=1, input_dim=10)
 
-    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy')
+    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy',
+                  run_eagerly=testing_utils.should_run_eagerly())
     model.fit(
         ones_generator(),
         steps_per_epoch=2,
@@ -273,9 +277,11 @@ class TestGeneratorMethods(test.TestCase, parameterized.TestCase):
     model.predict(ones_generator(), steps=2)
 
 
-@tf_test_util.run_all_in_graph_and_eager_modes
-class TestGeneratorMethodsWithSequences(test.TestCase):
+class TestGeneratorMethodsWithSequences(keras_parameterized.TestCase):
 
+  # TODO(b/120940700): Bug with subclassed model inputs.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_all_keras_modes
   def test_training_with_sequences(self):
 
     class DummySequence(keras.utils.Sequence):
@@ -286,8 +292,8 @@ class TestGeneratorMethodsWithSequences(test.TestCase):
       def __len__(self):
         return 10
 
-    model = keras.models.Sequential()
-    model.add(keras.layers.Dense(4, input_shape=(2,)))
+    model = testing_utils.get_small_mlp(
+        num_hidden=3, num_classes=4, input_dim=2)
     model.compile(loss='mse', optimizer='sgd')
 
     model.fit_generator(DummySequence(),
@@ -305,6 +311,9 @@ class TestGeneratorMethodsWithSequences(test.TestCase):
                         workers=0,
                         use_multiprocessing=False)
 
+  # TODO(b/120940700): Bug with subclassed model inputs.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
+  @keras_parameterized.run_all_keras_modes
   def test_sequence_input_to_fit_eval_predict(self):
     val_data = np.ones([10, 10], np.float32), np.ones([10, 1], np.float32)
 
@@ -316,10 +325,8 @@ class TestGeneratorMethodsWithSequences(test.TestCase):
       def __len__(self):
         return 2
 
-    inputs = keras.layers.Input(shape=(10,))
-    x = keras.layers.Dense(10, activation='relu')(inputs)
-    outputs = keras.layers.Dense(1, activation='sigmoid')(x)
-    model = keras.Model(inputs, outputs)
+    model = testing_utils.get_small_mlp(
+        num_hidden=10, num_classes=1, input_dim=10)
 
     model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy')
     model.fit(CustomSequence(), validation_data=val_data, epochs=2)
-- 
GitLab


From fb83aa5d974d69608d4cd40bc7c59b2f51b75ba8 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 13 Dec 2018 11:31:21 -0800
Subject: [PATCH 521/873] [tf.data] Reduce the overhead of performance modeling
 when there are no autotunable knobs.

PiperOrigin-RevId: 225405978
---
 tensorflow/core/framework/dataset.h           | 35 ++++++++++---------
 tensorflow/core/framework/model.cc            |  6 ++--
 tensorflow/core/framework/model.h             | 32 +++++++++++++++--
 .../experimental/map_and_batch_dataset_op.cc  | 10 +++---
 .../numa_map_and_batch_dataset_op.cc          | 10 +++---
 .../data/parallel_interleave_dataset_op.cc    | 10 +++---
 .../kernels/data/parallel_map_dataset_op.cc   |  7 ++--
 .../kernels/data/parallel_map_iterator.cc     |  3 +-
 8 files changed, 72 insertions(+), 41 deletions(-)

diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 7d3776a6ec..cca10fa49e 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -50,8 +50,6 @@ class GraphDefBuilder;
 class Node;
 
 namespace data {
-// A constant that can be used to enable auto-tuning.
-constexpr int kAutoTune = -1;
 
 constexpr int kInfiniteCardinality = -1;
 constexpr int kUnknownCardinality = -2;
@@ -723,36 +721,36 @@ class DatasetBaseIterator : public IteratorBase {
     return model::MakeUnknownNode(std::move(args));
   }
 
-  // When performance modeling is enabled, this method records the fact that
-  // this iterator has dequeued a element from an internal buffer.
+  // When modeling is enabled, this method records the fact that this iterator
+  // has dequeued an element from an internal buffer.
   void RecordBufferDequeue(IteratorContext* ctx,
                            const std::vector<Tensor>& element) {
-    if (node_) {
+    if (collect_resource_usage(ctx)) {
       node_->add_buffered_bytes(-GetAllocatedBytes(element));
     }
   }
 
-  // When performance modeling is enabled, this method records the fact that
-  // this iterator has enqueued a element in an internal buffer.
+  // When modeling is enabled, this method records the fact that this iterator
+  // has enqueued an element in an internal buffer.
   void RecordBufferEnqueue(IteratorContext* ctx,
                            const std::vector<Tensor>& element) {
-    if (node_) {
+    if (collect_resource_usage(ctx)) {
       node_->add_buffered_bytes(GetAllocatedBytes(element));
     }
   }
 
-  // When performance modeling is enabled, this method records the fact that
-  // this iterator has produced an element.
+  // When modeling is enabled, this method records the fact that this iterator
+  // has produced an element.
   void RecordElement(IteratorContext* ctx) {
     if (node_) {
       node_->record_element();
     }
   }
 
-  // When performance modeling is enabled, this method records the fact that
-  // a thread of this iterator has started work.
+  // When modeling is enabled, this method records the fact that a thread of
+  // this iterator has started work.
   void RecordStart(IteratorContext* ctx, bool stop_output = false) {
-    if (node_) {
+    if (collect_resource_usage(ctx)) {
       int64 now_nanos = Env::Default()->NowNanos();
       if (stop_output && node_->output()) {
         node_->output()->record_stop(now_nanos);
@@ -761,10 +759,10 @@ class DatasetBaseIterator : public IteratorBase {
     }
   }
 
-  // When performance modeling is enabled, this method records the fact that
-  // a thread of this iterator has stopped work.
+  // When modeling is enabled, this method records the fact that a thread of
+  // this iterator has stopped work.
   void RecordStop(IteratorContext* ctx, bool start_output = false) {
-    if (node_) {
+    if (collect_resource_usage(ctx)) {
       int64 now_nanos = Env::Default()->NowNanos();
       node_->record_stop(now_nanos);
       if (start_output && node_->output()) {
@@ -774,6 +772,11 @@ class DatasetBaseIterator : public IteratorBase {
   }
 
  private:
+  inline bool collect_resource_usage(IteratorContext* ctx) {
+    auto model = ctx->model();
+    return model && model->collect_resource_usage() && node_;
+  }
+
   BaseParams params_;
 };
 
diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index 3bd5b725b8..b7c6d80910 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -356,6 +356,8 @@ std::shared_ptr<Node> Model::AddNode(Node::Factory factory, const string& name,
   if (output) {
     output->add_input(node);
   }
+  collect_resource_usage_ =
+      collect_resource_usage_ || node->has_tunable_parameters();
   lookup_table_.insert(std::make_pair(name, node));
   return node;
 }
@@ -441,7 +443,7 @@ void Model::RecordElement(const string& name) {
 void Model::RecordStart(const string& name, bool stop_output) {
   tf_shared_lock l(mu_);
   auto node = gtl::FindOrNull(lookup_table_, name);
-  if (node) {
+  if (collect_resource_usage_ && node) {
     int64 now_nanos = Env::Default()->NowNanos();
     if (stop_output && (*node)->output()) {
       (*node)->output()->record_stop(now_nanos);
@@ -453,7 +455,7 @@ void Model::RecordStart(const string& name, bool stop_output) {
 void Model::RecordStop(const string& name, bool start_output) {
   tf_shared_lock l(mu_);
   auto node = gtl::FindOrNull(lookup_table_, name);
-  if (node) {
+  if (collect_resource_usage_ && node) {
     int64 now_nanos = Env::Default()->NowNanos();
     (*node)->record_stop(now_nanos);
     if (start_output && (*node)->output()) {
diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h
index 10059bbfd5..c3a694227c 100644
--- a/tensorflow/core/framework/model.h
+++ b/tensorflow/core/framework/model.h
@@ -34,18 +34,24 @@ namespace tensorflow {
 namespace data {
 namespace model {
 
+// A constant that can be used to enable auto-tuning.
+constexpr int kAutoTune = -1;
+
 // Represents thread-safe state that can be shared between an input pipeline and
 // the performance model.
 struct SharedState {
  public:
   SharedState(int64 value, std::shared_ptr<mutex> mu,
               std::shared_ptr<condition_variable> cond_var)
-      : value(value), mu(std::move(mu)), cond_var(std::move(cond_var)) {}
+      : value(value),
+        mu(std::move(mu)),
+        cond_var(std::move(cond_var)),
+        tunable(value == kAutoTune) {}
 
   int64 value;
   std::shared_ptr<mutex> mu;
   std::shared_ptr<condition_variable> cond_var;
-  bool tunable = false;
+  const bool tunable;
 };
 
 // Represents a parameter.
@@ -136,6 +142,15 @@ class Node {
     return buffered_bytes_;
   }
 
+  // Indicates whether the node has tunable parameters.
+  bool has_tunable_parameters() const LOCKS_EXCLUDED(mu_) {
+    tf_shared_lock l(mu_);
+    for (const auto& pair : parameters_) {
+      if (pair.second->state->tunable) return true;
+    }
+    return false;
+  }
+
   // Returns the unique node ID.
   int64 id() const LOCKS_EXCLUDED(mu_) { return id_; }
 
@@ -344,7 +359,10 @@ std::shared_ptr<Node> MakeUnknownNode(Node::Args args);
 // implementation of `DatasetBase` and `DatasetBaseIterator` respectively.
 class Model {
  public:
-  Model() = default;
+  Model() : collect_resource_usage_(false) {}
+
+  // Indicates whether to collect resource usage.
+  bool collect_resource_usage() const { return collect_resource_usage_; }
 
   // Adds a node with the given name and given output.
   std::shared_ptr<Node> AddNode(Node::Factory factory, const string& name,
@@ -388,6 +406,14 @@ class Model {
   int64 id_counter_ GUARDED_BY(mu_) = 1;
   std::shared_ptr<Node> output_ GUARDED_BY(mu_);
   std::map<string, std::shared_ptr<Node>> lookup_table_ GUARDED_BY(mu_);
+
+  // Indicates whether the modeling framework should collect resource usage
+  // (e.g. CPU, memory). The logic for collecting this information assumes that
+  // the collection is not repeatedly disabled and enabled. As a consequence,
+  // the implementation starts collecting resource usage when it encounters a
+  // tunable parameter (because the information is used for for tuning the value
+  // of the parameter) and never stops.
+  std::atomic<bool> collect_resource_usage_;
 };
 
 }  // namespace model
diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
index d86c3a1a63..3ff3135593 100644
--- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
@@ -71,9 +71,10 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     int64 num_parallel_calls;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                             &num_parallel_calls));
-    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
-                errors::InvalidArgument(
-                    "num_parallel_calls must be greater than zero."));
+    OP_REQUIRES(
+        ctx, num_parallel_calls > 0 || num_parallel_calls == model::kAutoTune,
+        errors::InvalidArgument(
+            "num_parallel_calls must be greater than zero."));
 
     bool drop_remainder;
     OP_REQUIRES_OK(ctx,
@@ -268,9 +269,8 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status Initialize(IteratorContext* ctx) override {
         mutex_lock l(*mu_);
-        if (num_parallel_calls_->value == kAutoTune) {
+        if (num_parallel_calls_->value == model::kAutoTune) {
           num_parallel_calls_->value = ctx->runner_threadpool_size();
-          num_parallel_calls_->tunable = true;
         }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
diff --git a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
index 46233942f0..921f8ad584 100644
--- a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
@@ -76,9 +76,10 @@ class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
     int64 num_parallel_calls;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                             &num_parallel_calls));
-    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
-                errors::InvalidArgument(
-                    "num_parallel_calls must be greater than zero."));
+    OP_REQUIRES(
+        ctx, num_parallel_calls > 0 || num_parallel_calls == model::kAutoTune,
+        errors::InvalidArgument(
+            "num_parallel_calls must be greater than zero."));
 
     bool drop_remainder;
     OP_REQUIRES_OK(ctx,
@@ -214,9 +215,8 @@ class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
 
       Status Initialize(IteratorContext* ctx) override {
         mutex_lock l(*mu_);
-        if (num_parallel_calls_->value == kAutoTune) {
+        if (num_parallel_calls_->value == model::kAutoTune) {
           num_parallel_calls_->value = ctx->runner_threadpool_size();
-          num_parallel_calls_->tunable = true;
         }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index 2f6d91e863..f844a00576 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -76,9 +76,10 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
     int64 num_parallel_calls;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                             &num_parallel_calls));
-    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
-                errors::InvalidArgument(
-                    "num_parallel_calls must be greater than zero."));
+    OP_REQUIRES(
+        ctx, num_parallel_calls > 0 || num_parallel_calls == model::kAutoTune,
+        errors::InvalidArgument(
+            "num_parallel_calls must be greater than zero."));
     OP_REQUIRES(
         ctx, num_parallel_calls <= cycle_length,
         errors::InvalidArgument(
@@ -220,9 +221,8 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
 
       Status Initialize(IteratorContext* ctx) override {
         mutex_lock l(*mu_);
-        if (num_parallel_calls_->value == kAutoTune) {
+        if (num_parallel_calls_->value == model::kAutoTune) {
           num_parallel_calls_->value = dataset()->cycle_length_;
-          num_parallel_calls_->tunable = true;
         }
         TF_RETURN_IF_ERROR(
             dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_));
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 5ac81c187c..5c09b2d5dc 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -51,9 +51,10 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
     int32 num_parallel_calls;
     OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "num_parallel_calls",
                                             &num_parallel_calls));
-    OP_REQUIRES(ctx, num_parallel_calls > 0 || num_parallel_calls == kAutoTune,
-                errors::InvalidArgument(
-                    "num_parallel_calls must be greater than zero."));
+    OP_REQUIRES(
+        ctx, num_parallel_calls > 0 || num_parallel_calls == model::kAutoTune,
+        errors::InvalidArgument(
+            "num_parallel_calls must be greater than zero."));
 
     std::unique_ptr<CapturedFunction> captured_func;
     OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments",
diff --git a/tensorflow/core/kernels/data/parallel_map_iterator.cc b/tensorflow/core/kernels/data/parallel_map_iterator.cc
index b97f692500..b62e7059ba 100644
--- a/tensorflow/core/kernels/data/parallel_map_iterator.cc
+++ b/tensorflow/core/kernels/data/parallel_map_iterator.cc
@@ -76,9 +76,8 @@ class ParallelMapIterator : public DatasetBaseIterator {
 
   Status Initialize(IteratorContext* ctx) override {
     mutex_lock l(*mu_);
-    if (num_parallel_calls_->value == kAutoTune) {
+    if (num_parallel_calls_->value == model::kAutoTune) {
       num_parallel_calls_->value = ctx->runner_threadpool_size();
-      num_parallel_calls_->tunable = true;
     }
     TF_RETURN_IF_ERROR(
         input_dataset_->MakeIterator(ctx, prefix(), &input_impl_));
-- 
GitLab


From 27cffd795981f6d86e3b09b6d82c384d8b4e117a Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 13 Dec 2018 11:36:51 -0800
Subject: [PATCH 522/873] [XLA:CPU:Mac] Add __bzero intrinsic on Mac.

Make the log message on missing intrinsic louder (ideally, we wouldn't crash, too.)

PiperOrigin-RevId: 225407043
---
 tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
index bd6868d397..296f39a485 100644
--- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
+++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc
@@ -139,7 +139,7 @@ llvm::JITSymbol SimpleOrcJIT::ResolveRuntimeSymbol(const std::string& name) {
   }
 
   if (func_addr == nullptr) {
-    VLOG(2) << "Unable to resolve runtime symbol: " << name;
+    LOG(ERROR) << "Unable to resolve runtime symbol: " << name;
     return nullptr;
   }
   llvm::JITEvaluatedSymbol symbol_info(reinterpret_cast<uint64_t>(func_addr),
@@ -316,6 +316,7 @@ bool RegisterKnownJITSymbols() {
   registry->Register("memset", reinterpret_cast<void*>(memset));
 
 #ifdef __APPLE__
+  registry->Register("__bzero", reinterpret_cast<void*>(bzero));
   registry->Register("memset_pattern16",
                      reinterpret_cast<void*>(memset_pattern16));
 #endif
-- 
GitLab


From caf5da8cc5a14266c0f42993687b40ac923ba02c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 11:45:07 -0800
Subject: [PATCH 523/873] Enable RaggedTensor dispatch for tf.where with
 x=y=None.

PiperOrigin-RevId: 225408570
---
 tensorflow/python/ops/ragged/ragged_dispatch.py      | 2 ++
 tensorflow/python/ops/ragged/ragged_dispatch_test.py | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index 77990a8b18..ecc7f5d611 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py
@@ -76,6 +76,8 @@ def _get_arg_infos(func, arg_names):
 
 def _is_convertible_to_tensor(value):
   """Returns true if `value` is convertible to a `Tensor`."""
+  if value is None:
+    return True
   if isinstance(value,
                 (ops.Tensor, variables.Variable, np.ndarray, int, float, str)):
     return True
diff --git a/tensorflow/python/ops/ragged/ragged_dispatch_test.py b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
index fb3dabc3eb..9d70470f05 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch_test.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch_test.py
@@ -555,6 +555,10 @@ class RaggedElementwiseOpsTest(ragged_test_util.RaggedTensorTestCase,
                 ragged_factory_ops.constant_value([[b'A', b'B'], [b'C']]),
                 ragged_factory_ops.constant_value([[b'a', b'b'], [b'c']])),
           expected=ragged_factory_ops.constant_value([[b'A', b'b'], [b'C']])),
+      dict(
+          op=array_ops.where,
+          args=(ragged_factory_ops.constant_value([[True, False], [True]]),),
+          expected=[[0, 0], [1, 0]]),
       dict(
           op=math_ops.unsorted_segment_sum,
           kwargs={
-- 
GitLab


From c4d9c9b068bbf98c015dab63549343d8273cc4f2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 11:57:48 -0800
Subject: [PATCH 524/873] Add benchmark for Graph -> GraphDef conversion.

Benchmark output:
Run on ** redacted ** (72 X 2993 MHz CPUs); 2018-12-13T11:50:10.199225713-08:00
CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB
Benchmark              Time(ns)        CPU(ns)     Iterations
-------------------------------------------------------------
BM_ToGraphDef/10/2         5880           5888         100000
BM_ToGraphDef/64/2        18670          18699          37373
BM_ToGraphDef/512/2      125906         126110           5606
BM_ToGraphDef/4k/2      1331662        1333420            538
BM_ToGraphDef/32k/2    13059699       13086186             51
BM_ToGraphDef/10/4         9929           9947          71281
BM_ToGraphDef/64/4        27190          27235          25837
BM_ToGraphDef/512/4      164695         164936           4245
BM_ToGraphDef/4k/4      1794584        1797493            395
BM_ToGraphDef/32k/4    16604391       16641067             41
BM_ToGraphDef/10/8        18191          18212          38647
BM_ToGraphDef/64/8        45134          45191          15412
BM_ToGraphDef/512/8      327305         327803           2177
BM_ToGraphDef/4k/8      2868544        2872056            250
BM_ToGraphDef/32k/8    25948189       25999957             27
BM_ToGraphDef/10/16       34698          34763          20267
BM_ToGraphDef/64/16       78154          78332           8873
BM_ToGraphDef/512/16     595667         596683           1000
BM_ToGraphDef/4k/16     4867696        4877087            142
BM_ToGraphDef/32k/16   46445400       46518659             15
PiperOrigin-RevId: 225410602
---
 tensorflow/core/graph/graph_test.cc | 39 +++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index 333c32567f..e7762fd414 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -799,5 +799,44 @@ BENCHMARK(BM_GraphCreation)->ArgPair(1 << 9, 16);
 BENCHMARK(BM_GraphCreation)->ArgPair(1 << 12, 16);
 BENCHMARK(BM_GraphCreation)->ArgPair(1 << 15, 16);
 
+static void BM_ToGraphDef(int iters, int num_nodes, int num_edges_per_node) {
+  testing::StopTiming();
+  const GraphDef graph_def = CreateGraphDef(num_nodes, num_edges_per_node);
+  const auto registry = OpRegistry::Global();
+  GraphConstructorOptions opts;
+  // Warmup step.
+  Graph graph(registry);
+  TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph_def, &graph));
+  int64 sum = 0;
+  testing::StartTiming();
+  for (int i = 0; i < iters; ++i) {
+    GraphDef graph_def;
+    graph.ToGraphDef(&graph_def);
+    sum += graph_def.node_size();
+  }
+  VLOG(1) << sum;
+  testing::StopTiming();
+}
+BENCHMARK(BM_ToGraphDef)->ArgPair(10, 2);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 6, 2);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 9, 2);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 12, 2);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 15, 2);
+BENCHMARK(BM_ToGraphDef)->ArgPair(10, 4);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 6, 4);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 9, 4);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 12, 4);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 15, 4);
+BENCHMARK(BM_ToGraphDef)->ArgPair(10, 8);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 6, 8);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 9, 8);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 12, 8);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 15, 8);
+BENCHMARK(BM_ToGraphDef)->ArgPair(10, 16);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 6, 16);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 9, 16);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 12, 16);
+BENCHMARK(BM_ToGraphDef)->ArgPair(1 << 15, 16);
+
 }  // namespace
 }  // namespace tensorflow
-- 
GitLab


From bd1aaf9f95497671e43fa9436844bab2da5e0d64 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 13 Dec 2018 12:02:33 -0800
Subject: [PATCH 525/873] [tf.data] Adds default values for `tf.data.Options`'s
 `experimental_optimization`, `experimental_stats`, and
 `experimental_threading` property. Changes the default `latency_all_edges`
 option on `StatsOptions`. In order to turn on latency statistics, a user now
 has to explicitly specify `options.experimental_stats.latency_all_edges =
 True`.

PiperOrigin-RevId: 225411510
---
 .../optimization/filter_fusion_test.py        |  2 -
 .../optimization/hoist_random_uniform_test.py |  3 -
 .../optimization/latency_all_edges_test.py    | 25 -----
 .../map_and_filter_fusion_test.py             |  3 -
 .../optimization/map_fusion_test.py           |  2 -
 .../optimization/map_parallelization_test.py  |  2 -
 .../optimization/map_vectorization_test.py    |  9 +-
 .../optimization/optimize_dataset_test.py     | 19 ++--
 .../kernel_tests/stats_dataset_ops_test.py    |  2 -
 .../experimental/ops/optimization_options.py  |  8 +-
 .../data/experimental/ops/prefetching_ops.py  |  5 +-
 .../data/experimental/ops/stats_options.py    | 29 ++----
 .../experimental/ops/threading_options.py     |  8 +-
 tensorflow/python/data/kernel_tests/BUILD     | 13 +++
 .../python/data/kernel_tests/dataset_test.py  | 48 +---------
 .../multi_device_iterator_test.py             |  2 -
 .../python/data/kernel_tests/options_test.py  | 96 +++++++++++++++++++
 tensorflow/python/data/ops/dataset_ops.py     | 14 +--
 .../data/ops/multi_device_iterator_ops.py     |  5 +-
 tensorflow/python/data/util/options.py        | 23 +++--
 tensorflow/python/data/util/options_test.py   |  7 +-
 21 files changed, 164 insertions(+), 161 deletions(-)
 create mode 100644 tensorflow/python/data/kernel_tests/options_test.py

diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py
index 7371cf31df..3ce921b5ef 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -72,7 +71,6 @@ class FilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     dataset = dataset.cache()
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.filter_fusion = True
     dataset = dataset.with_options(options)
     expected_output = []
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
index 5f3a8683fb..f080891f2e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
@@ -92,7 +91,6 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
             ["Zip[0]", "Map"] if will_optimize else ["Map"])).map(function)
 
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.hoist_random_uniform = True
     dataset = dataset.with_options(options)
     self._testDataset(dataset)
@@ -109,7 +107,6 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.range(5).apply(
         optimization.assert_next(["Zip[0]", "Map"])).map(random_with_capture)
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.hoist_random_uniform = True
     dataset = dataset.with_options(options)
     self._testDataset(dataset)
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
index fc65f52704..8af86da852 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/latency_all_edges_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_base
 from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.experimental.ops import stats_aggregator
-from tensorflow.python.data.experimental.ops import stats_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
@@ -36,7 +35,6 @@ class LatencyAllEdgesTest(stats_dataset_test_base.StatsDatasetTestBase):
             ["LatencyStats", "Map", "LatencyStats", "Prefetch",
              "LatencyStats"])).map(lambda x: x * x).prefetch(1)
     options = dataset_ops.Options()
-    options.experimental_stats = stats_options.StatsOptions()
     options.experimental_stats.latency_all_edges = True
     options.experimental_stats.aggregator = aggregator
     dataset = dataset.with_options(options)
@@ -53,29 +51,6 @@ class LatencyAllEdgesTest(stats_dataset_test_base.StatsDatasetTestBase):
     self._assertSummaryHasCount(summary_str,
                                 "record_latency_PrefetchDataset/_6", 1)
 
-  def testLatencyStatsOptimizationV2(self):
-    aggregator = stats_aggregator.StatsAggregator()
-    dataset = dataset_ops.Dataset.from_tensors(1).apply(
-        optimization.assert_next(
-            ["LatencyStats", "Map", "LatencyStats", "Prefetch",
-             "LatencyStats"])).map(lambda x: x * x).prefetch(1)
-    options = dataset_ops.Options()
-    options.experimental_stats = stats_options.StatsOptions()
-    options.experimental_stats.aggregator = aggregator
-    dataset = dataset.with_options(options)
-    self.assertDatasetProduces(
-        dataset,
-        expected_output=[1],
-        requires_initialization=True,
-        num_test_iterations=1)
-    summary_t = aggregator.get_summary()
-    summary_str = self.evaluate(summary_t)
-    self._assertSummaryHasCount(summary_str, "record_latency_TensorDataset/_1",
-                                1)
-    self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", 1)
-    self._assertSummaryHasCount(summary_str,
-                                "record_latency_PrefetchDataset/_6", 1)
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
index db8f214fbf..fa1d673065 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -84,7 +83,6 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
         optimization.assert_next(
             ["Map", "FilterByLastComponent"])).map(function).filter(predicate)
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.map_and_filter_fusion = True
     dataset = dataset.with_options(options)
     self._testMapAndFilter(dataset, function, predicate)
@@ -103,7 +101,6 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
         optimization.assert_next(["Map",
                                   "Filter"])).map(function).filter(predicate)
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.map_and_filter_fusion = True
     dataset = dataset.with_options(options)
     self._testMapAndFilter(dataset, function, predicate)
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py
index d8d6390374..defdaf0440 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import test_util
@@ -75,7 +74,6 @@ class MapFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     dataset = dataset.cache()
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.map_fusion = True
     dataset = dataset.with_options(options)
     expected_output = []
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
index 0ff3fff4f8..d8dd31fee8 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
@@ -68,7 +67,6 @@ class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset_ops.Dataset.range(5).apply(
         optimization.assert_next(next_nodes)).map(function)
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.map_parallelization = True
     dataset = dataset.with_options(options)
     if should_optimize:
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
index adc411bfb5..65fa2bac17 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_vectorization_test.py
@@ -23,7 +23,6 @@ import numpy as np
 from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import constant_op
@@ -350,9 +349,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
       dataset = dataset.map(map_fn, num_parallel_calls)
       dataset = dataset.batch(100)
       options = dataset_ops.Options()
-      opt_options = optimization_options.OptimizationOptions()
-      opt_options.map_and_batch_fusion = False
-      options.experimental_optimization = opt_options
+      options.experimental_optimization.map_and_batch_fusion = False
       dataset = dataset.with_options(options)
       return dataset
 
@@ -360,9 +357,7 @@ class MapVectorizationTest(test_base.DatasetTestBase, parameterized.TestCase):
     optimized = _make_dataset(["Batch", map_node_name]
                               if expect_optimized else [map_node_name, "Batch"])
     options = dataset_ops.Options()
-    opt_options = optimization_options.OptimizationOptions()
-    opt_options.map_vectorization = True
-    options.experimental_optimization = opt_options
+    options.experimental_optimization.map_vectorization = True
     optimized = optimized.with_options(options)
     return unoptimized, optimized
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
index 230b74e9e8..dd432b8c15 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
@@ -25,7 +25,6 @@ import numpy as np
 from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.experimental.ops import threadpool
 from tensorflow.python.data.kernel_tests import test_base
@@ -168,9 +167,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     # here because of a bug with chaining _OptimizeDatasets when there are
     # nested dataset functions
     options = dataset_ops.Options()
-    opt_options = optimization_options.OptimizationOptions()
-    opt_options.map_and_batch_fusion = True
-    options.experimental_optimization = opt_options
+    options.experimental_optimization.map_and_batch_fusion = True
     dataset = dataset.with_options(options)
     self.assertDatasetProduces(dataset, expected_output=[[0]])
 
@@ -217,10 +214,8 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     unoptimized_dataset = dataset_fn(variable)
 
     options = dataset_ops.Options()
-    opt_options = optimization_options.OptimizationOptions()
-    opt_options.noop_elimination = True
-    opt_options.map_and_batch_fusion = True
-    options.experimental_optimization = opt_options
+    options.experimental_optimization.noop_elimination = True
+    options.experimental_optimization.map_and_batch_fusion = True
     optimized_dataset = unoptimized_dataset.with_options(options)
 
     # Check that warning is logged.
@@ -233,7 +228,7 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                 "tf.Variable. The following optimizations will be disabled: %s."
                 " To enable optimizations, use resource variables instead by "
                 "calling `tf.enable_resource_variables()` at the start of the "
-                "program." % (", ".join(opt_options._static_optimizations())))
+                "program." % (", ".join(options._static_optimizations())))
     self.assertTrue(any([expected in str(warning) for warning in w]))
 
     # Check that outputs are the same in the optimized and unoptimized cases,
@@ -271,10 +266,8 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     only explicitly enabled optimizations will be applied.
     """
     options = dataset_ops.Options()
-    opt_options = optimization_options.OptimizationOptions()
-    opt_options.hoist_random_uniform = True
-    opt_options.apply_default_optimizations = False
-    options.experimental_optimization = opt_options
+    options.experimental_optimization.hoist_random_uniform = True
+    options.experimental_optimization.apply_default_optimizations = False
     expected_optimizations = ["hoist_random_uniform"]
     self.assertEqual(options._static_optimizations(), expected_optimizations)
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 97d386157a..59d0ebdb37 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -26,7 +26,6 @@ from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.experimental.ops import stats_aggregator
 from tensorflow.python.data.experimental.ops import stats_ops
-from tensorflow.python.data.experimental.ops import stats_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -47,7 +46,6 @@ def function_set_stats_aggregator(dataset,
 
 def function_apply_options(dataset, aggregator, prefix="", counter_prefix=""):
   options = dataset_ops.Options()
-  options.experimental_stats = stats_options.StatsOptions()
   options.experimental_stats.aggregator = aggregator
   options.experimental_stats.prefix = prefix
   options.experimental_stats.counter_prefix = counter_prefix
diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py
index 11b8b86f64..3817e6228c 100644
--- a/tensorflow/python/data/experimental/ops/optimization_options.py
+++ b/tensorflow/python/data/experimental/ops/optimization_options.py
@@ -26,12 +26,14 @@ from tensorflow.python.util.tf_export import tf_export
 class OptimizationOptions(options.OptionsBase):
   """Represents options for dataset optimizations.
 
-  You can apply `OptimizationOptions` to a `dataset` object, as follows:
+  You can set the optimization options of a dataset through the
+  `experimental_optimization` property of `tf.data.Options`; the property is
+  an instance of `tf.data.experimental.OptimizationOptions`.
 
   ```python
   options = tf.data.Options()
-  options.optimization = tf.data.experimental.OptimizationOptions()
-  options.optimization.map_and_batch_fusion = True
+  options.experimental_optimization.map_vectorization = True
+  options.apply_default_optimizations = False
   dataset = dataset.with_options(options)
   ```
   """
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
index e46dfb6568..e3a8622393 100644
--- a/tensorflow/python/data/experimental/ops/prefetching_ops.py
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import function
@@ -72,9 +71,7 @@ def copy_to_device(target_device, source_device="/cpu:0"):
   def _apply_fn(dataset):
     options = dataset_ops.Options()
     options.experimental_autotune = False
-    opt_options = optimization_options.OptimizationOptions()
-    opt_options.apply_default_optimizations = False
-    options.experimental_optimization = opt_options
+    options.experimental_optimization.apply_default_optimizations = False
     return _CopyToDeviceDataset(
         dataset, target_device=target_device,
         source_device=source_device).with_options(options)
diff --git a/tensorflow/python/data/experimental/ops/stats_options.py b/tensorflow/python/data/experimental/ops/stats_options.py
index 6e884aa08a..94ae67ff1d 100644
--- a/tensorflow/python/data/experimental/ops/stats_options.py
+++ b/tensorflow/python/data/experimental/ops/stats_options.py
@@ -28,27 +28,19 @@ from tensorflow.python.util.tf_export import tf_export
 class StatsOptions(options.OptionsBase):
   """Represents options for collecting dataset stats using `StatsAggregator`.
 
-  To apply `StatsOptions` with a `tf.data.Dataset` object, use the following
-  pattern:
+  You can set the stats options of a dataset through the `experimental_stats`
+  property of `tf.data.Options`; the property is an instance of
+  `tf.data.experimental.StatsOptions`. For example, to collect latency stats
+  on all dataset edges, use the following pattern:
 
   ```python
   aggregator = tf.data.experimental.StatsAggregator()
 
   options = tf.data.Options()
-  options.experimental_stats = tf.data.experimental.StatsOptions()
   options.experimental_stats.aggregator = aggregator
+  options.experimental_stats.latency_all_edges = True
   dataset = dataset.with_options(options)
   ```
-
-  Note: a `StatsAggregator` object can be attached either duing construction or
-  can be provided later like in above example.
-
-  ```python
-  aggretator = tf.data.experimental.StatsAggregator()
-  # attach aggregator during construction
-  options.experimental_stats = tf.data.experimental.StatsOptions(aggregator)
-  .....
-  ```
   """
 
   aggregator = options.create_option(
@@ -62,18 +54,15 @@ class StatsOptions(options.OptionsBase):
       ty=str,
       docstring=
       "Prefix to prepend all statistics recorded for the input `dataset` with.",
-      default="")
+      default_factory=lambda: "")
 
   counter_prefix = options.create_option(
       name="counter_prefix",
       ty=str,
-      docstring=
-      "Prefix for the statistics recorded as counter.",
-      default="")
+      docstring="Prefix for the statistics recorded as counter.",
+      default_factory=lambda: "")
 
   latency_all_edges = options.create_option(
       name="latency_all_edges",
       ty=bool,
-      docstring=
-      "Whether to add latency measurements on all edges.",
-      default=True)
+      docstring="Whether to add latency measurements on all edges.")
diff --git a/tensorflow/python/data/experimental/ops/threading_options.py b/tensorflow/python/data/experimental/ops/threading_options.py
index dbf662186f..d713b9ae07 100644
--- a/tensorflow/python/data/experimental/ops/threading_options.py
+++ b/tensorflow/python/data/experimental/ops/threading_options.py
@@ -26,11 +26,12 @@ from tensorflow.python.util.tf_export import tf_export
 class ThreadingOptions(options.OptionsBase):
   """Represents options for dataset threading.
 
-  To apply `ThreadingOptions` to a `dataset` object, use the following pattern:
+  You can set the threading options of a dataset through the
+  `experimental_threading` property of `tf.data.Options`; the property is
+  an instance of `tf.data.experimental.ThreadingOptions`.
 
   ```python
   options = tf.data.Options()
-  options.experimental_threading = tf.data.experimental.ThreadingOptions()
   options.experimental_threading.private_threadpool_size = 10
   dataset = dataset.with_options(options)
   ```
@@ -46,5 +47,4 @@ class ThreadingOptions(options.OptionsBase):
       name="private_threadpool_size",
       ty=int,
       docstring=
-      "If set, the dataset will use a private threadpool of the given size.",
-      default=None)
+      "If set, the dataset will use a private threadpool of the given size.")
diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD
index 3390100bed..737ba28ceb 100644
--- a/tensorflow/python/data/kernel_tests/BUILD
+++ b/tensorflow/python/data/kernel_tests/BUILD
@@ -444,6 +444,19 @@ cuda_py_test(
     ],
 )
 
+tf_py_test(
+    name = "options_test",
+    size = "small",
+    srcs = ["options_test.py"],
+    additional_deps = [
+        ":test_base",
+        "//tensorflow/python/data/ops:dataset_ops",
+        "//tensorflow/python/data/experimental/ops:optimization_options",
+        "//tensorflow/python/data/experimental/ops:threading_options",
+        "//tensorflow/python:client_testlib",
+    ],
+)
+
 tf_py_test(
     name = "padded_batch_test",
     size = "small",
diff --git a/tensorflow/python/data/kernel_tests/dataset_test.py b/tensorflow/python/data/kernel_tests/dataset_test.py
index 2952c08be0..820bc8e4e2 100644
--- a/tensorflow/python/data/kernel_tests/dataset_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_test.py
@@ -207,53 +207,6 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertEqual(2, inputs.count(ds2))
     self.assertEqual(1, inputs.count(ds3))
 
-  def testOptionsDefault(self):
-    ds = dataset_ops.Dataset.range(0)
-    self.assertEqual(dataset_ops.Options(), ds.options())
-
-  def testOptionsOnce(self):
-    options = dataset_ops.Options()
-    ds = dataset_ops.Dataset.range(0).with_options(options).cache()
-    self.assertEqual(options, ds.options())
-
-  def testOptionsTwiceSame(self):
-    options = dataset_ops.Options()
-    options.experimental_autotune = True
-    ds = dataset_ops.Dataset.range(0).with_options(options).with_options(
-        options)
-    self.assertEqual(options, ds.options())
-
-  def testOptionsTwiceDifferent(self):
-    options1 = dataset_ops.Options()
-    options1.experimental_autotune = True
-    options2 = dataset_ops.Options()
-    options2.experimental_deterministic = False
-    ds = dataset_ops.Dataset.range(0).with_options(options1).with_options(
-        options2)
-    self.assertTrue(ds.options().experimental_autotune)
-    # Explicitly check that flag is False since assertFalse allows None
-    self.assertIs(ds.options().experimental_deterministic, False)
-
-  def testOptionsTwiceDifferentError(self):
-    options1 = dataset_ops.Options()
-    options1.experimental_autotune = True
-    options2 = dataset_ops.Options()
-    options2.experimental_autotune = False
-    with self.assertRaisesRegexp(ValueError,
-                                 "Cannot merge incompatible values"):
-      dataset_ops.Dataset.range(0).with_options(options1).with_options(options2)
-
-  def testOptionsMergeOptionsFromMultipleInputs(self):
-    options1 = dataset_ops.Options()
-    options1.experimental_autotune = True
-    options2 = dataset_ops.Options()
-    options2.experimental_deterministic = True
-    ds = dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.range(0).with_options(options1),
-         dataset_ops.Dataset.range(0).with_options(options2)))
-    self.assertTrue(ds.options().experimental_autotune)
-    self.assertTrue(ds.options().experimental_deterministic)
-
   # TODO(b/119882922): use-after-free bug in eager mode.
   # pylint: disable=g-long-lambda
   @parameterized.named_parameters(
@@ -313,5 +266,6 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
           round_trip_dataset, [self.evaluate(tf_value_fn())],
           requires_initialization=True)
 
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
index 0322d1f2c6..433ea620e1 100644
--- a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.data.experimental.ops import optimization
-from tensorflow.python.data.experimental.ops.optimization_options import OptimizationOptions
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import multi_device_iterator_ops
@@ -275,7 +274,6 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
     dataset = dataset.cache()
 
     options = dataset_ops.Options()
-    options.experimental_optimization = OptimizationOptions()
     options.experimental_optimization.noop_elimination = True
     dataset = dataset.with_options(options)
 
diff --git a/tensorflow/python/data/kernel_tests/options_test.py b/tensorflow/python/data/kernel_tests/options_test.py
new file mode 100644
index 0000000000..f5bad3e7ae
--- /dev/null
+++ b/tensorflow/python/data/kernel_tests/options_test.py
@@ -0,0 +1,96 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.data.Options`."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.experimental.ops import optimization_options
+from tensorflow.python.data.experimental.ops import stats_options
+from tensorflow.python.data.experimental.ops import threading_options
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.platform import test
+
+
+class OptionsTest(test_base.DatasetTestBase):
+
+  def testOptionsDefault(self):
+    ds = dataset_ops.Dataset.range(0)
+    self.assertEqual(dataset_ops.Options(), ds.options())
+
+  def testOptionsOnce(self):
+    options = dataset_ops.Options()
+    ds = dataset_ops.Dataset.range(0).with_options(options).cache()
+    self.assertEqual(options, ds.options())
+
+  def testOptionsTwiceSame(self):
+    options = dataset_ops.Options()
+    options.experimental_autotune = True
+    ds = dataset_ops.Dataset.range(0).with_options(options).with_options(
+        options)
+    self.assertEqual(options, ds.options())
+
+  def testOptionsTwiceDifferent(self):
+    options1 = dataset_ops.Options()
+    options1.experimental_autotune = True
+    options2 = dataset_ops.Options()
+    options2.experimental_deterministic = False
+    ds = dataset_ops.Dataset.range(0).with_options(options1).with_options(
+        options2)
+    self.assertTrue(ds.options().experimental_autotune)
+    # Explicitly check that flag is False since assertFalse allows None
+    self.assertIs(ds.options().experimental_deterministic, False)
+
+  def testOptionsTwiceDifferentError(self):
+    options1 = dataset_ops.Options()
+    options1.experimental_autotune = True
+    options2 = dataset_ops.Options()
+    options2.experimental_autotune = False
+    with self.assertRaisesRegexp(ValueError,
+                                 "Cannot merge incompatible values"):
+      dataset_ops.Dataset.range(0).with_options(options1).with_options(options2)
+
+  def testOptionsMergeOptionsFromMultipleInputs(self):
+    options1 = dataset_ops.Options()
+    options1.experimental_autotune = True
+    options2 = dataset_ops.Options()
+    options2.experimental_deterministic = True
+    ds = dataset_ops.Dataset.zip(
+        (dataset_ops.Dataset.range(0).with_options(options1),
+         dataset_ops.Dataset.range(0).with_options(options2)))
+    self.assertTrue(ds.options().experimental_autotune)
+    self.assertTrue(ds.options().experimental_deterministic)
+
+  def testOptionsHaveDefaults(self):
+    options1 = dataset_ops.Options()
+    options2 = dataset_ops.Options()
+    self.assertIsNot(options1.experimental_optimization,
+                     options2.experimental_optimization)
+    self.assertIsNot(options1.experimental_stats,
+                     options2.experimental_stats)
+    self.assertIsNot(options1.experimental_threading,
+                     options2.experimental_threading)
+    self.assertEquals(options1.experimental_optimization,
+                      optimization_options.OptimizationOptions())
+    self.assertEquals(options1.experimental_stats,
+                      stats_options.StatsOptions())
+    self.assertEquals(options1.experimental_threading,
+                      threading_options.ThreadingOptions())
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index bee04aaef2..6582ac23b4 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1712,26 +1712,26 @@ class Options(options_lib.OptionsBase):
   experimental_optimization = options_lib.create_option(
       name="experimental_optimization",
       ty=optimization_options.OptimizationOptions,
-      docstring="Associates the given optimization options with the dataset.")
+      docstring="Associates the given optimization options with the dataset.",
+      default_factory=optimization_options.OptimizationOptions)
 
   experimental_stats = options_lib.create_option(
       name="experimental_stats",
       ty=stats_options.StatsOptions,
-      docstring="Associates the given statistics options with the dataset.")
+      docstring="Associates the given statistics options with the dataset.",
+      default_factory=stats_options.StatsOptions)
 
   experimental_threading = options_lib.create_option(
       name="experimental_threading",
       ty=threading_options.ThreadingOptions,
-      docstring="Associates the given threading options with the dataset.")
+      docstring="Associates the given threading options with the dataset.",
+      default_factory=threading_options.ThreadingOptions)
 
   def _static_optimizations(self):
     """Produces the list of enabled static optimizations."""
 
     result = []
-    exp_optimization_options = (
-        self.experimental_optimization or
-        optimization_options.OptimizationOptions())  # If not set, use default
-    result.extend(exp_optimization_options._static_optimizations())  # pylint: disable=protected-access
+    result.extend(self.experimental_optimization._static_optimizations())  # pylint: disable=protected-access
 
     if self.experimental_numa_aware:
       result.append("make_numa_aware")
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 7586012574..45d0156479 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -17,7 +17,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
@@ -197,9 +196,7 @@ class MultiDeviceIterator(object):
       # non-CPU devices.
       options = dataset_ops.Options()
       options.experimental_autotune = False
-      opt_options = optimization_options.OptimizationOptions()
-      opt_options.apply_default_optimizations = False
-      options.experimental_optimization = opt_options
+      options.experimental_optimization.apply_default_optimizations = False
       ds = ds.with_options(options)
       with ops.device(device):
         self._device_iterators.append(ds.make_initializable_iterator())
diff --git a/tensorflow/python/data/util/options.py b/tensorflow/python/data/util/options.py
index 9badba8e56..c578a831db 100644
--- a/tensorflow/python/data/util/options.py
+++ b/tensorflow/python/data/util/options.py
@@ -48,27 +48,32 @@ class OptionsBase(object):
       return NotImplemented
 
 
-def create_option(name, ty, docstring, default=None):
+def create_option(name, ty, docstring, default_factory=lambda: None):
   """Creates a type-checked property.
 
   Args:
-    name: the name to use
-    ty: the type to use
-    docstring: the docstring to use
-    default: the default value to use
+    name: The name to use.
+    ty: The type to use. The type of the property will be validated when it
+      is set.
+    docstring: The docstring to use.
+    default_factory: A callable that takes no arguments and returns a default
+      value to use if not set.
 
   Returns:
     A type-checked property.
   """
 
-  def get_fn(self):
-    return self._options.get(name, default)  # pylint: disable=protected-access
+  def get_fn(option):
+    # pylint: disable=protected-access
+    if name not in option._options:
+      option._options[name] = default_factory()
+    return option._options.get(name)
 
-  def set_fn(self, value):
+  def set_fn(option, value):
     if not isinstance(value, ty):
       raise TypeError("Property \"%s\" must be of type %s, got: %r (type: %r)" %
                       (name, ty, value, type(value)))
-    self._options[name] = value  # pylint: disable=protected-access
+    option._options[name] = value  # pylint: disable=protected-access
 
   return property(get_fn, set_fn, None, docstring)
 
diff --git a/tensorflow/python/data/util/options_test.py b/tensorflow/python/data/util/options_test.py
index c5169835a3..8d41ef223f 100644
--- a/tensorflow/python/data/util/options_test.py
+++ b/tensorflow/python/data/util/options_test.py
@@ -24,9 +24,12 @@ from tensorflow.python.platform import test
 
 class _TestOptions(options.OptionsBase):
   x = options.create_option(
-      name="x", ty=int, docstring="the answer to everything", default=42)
+      name="x",
+      ty=int,
+      docstring="the answer to everything",
+      default_factory=lambda: 42)
   y = options.create_option(
-      name="y", ty=float, docstring="a tasty pie", default=3.14)
+      name="y", ty=float, docstring="a tasty pie", default_factory=lambda: 3.14)
 
 
 class _NestedTestOptions(options.OptionsBase):
-- 
GitLab


From d501a62aae66a2cf160cf01943886fb4d7fc7096 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 13 Dec 2018 12:08:14 -0800
Subject: [PATCH 526/873] Fix bug inside _WeakrefSelf that was effectively
 suppressing autograph for methods.

PiperOrigin-RevId: 225412615
---
 tensorflow/python/eager/function.py | 53 +++++++++++++++++------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 3aa7b7e27f..6770f1d3b3 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1223,19 +1223,18 @@ def validate_signature(signature):
 def defun(func=None, input_signature=None, autograph=True):
   """Compiles a Python function into a callable TensorFlow graph.
 
-  `defun` (short for "define function") trace-compiles a Python function
+  `defun` (short for "define function") compiles a Python function
   composed of TensorFlow operations into a callable that executes a `tf.Graph`
   containing those operations. The callable produced by `defun` contains only
   the subgraph of TensorFlow operations that were executed when the Python
   function was called with a particular input signature, defined as a list
   of the shapes and dtypes of the Python function's Tensor-valued arguments and
-  the values of its non-Tensor Python objects. In particular, `defun` is _not_ a
-  compiler for arbitrary Python code.
+  the values of its non-Tensor Python objects.
 
   When eager execution is enabled, the ability to create graphs from Python
   functions makes it possible to incrementally trade off debugability and
   interactivity for performance.  Functions compiled with `defun` cannot be
-  inspected with `pdb` and `print` statements; however, executing a graph
+  inspected with `pdb`; however, executing a graph
   generated by `defun` sometimes takes less time and memory than eagerly
   executing the corresponding Python function, since specifying computations as
   graphs allows for optimizations like automatic buffer reuse and
@@ -1326,6 +1325,7 @@ def defun(func=None, input_signature=None, autograph=True):
   outer graph otherwise.
 
   _Input Signatures_
+
   By default, `F = tf.contrib.eager.defun(f)` instantiates a separate graph
   for every unique sequence of the shapes and dtypes of Tensor arguments and
   the values of Python objects it is invoked with. For example, calling
@@ -1384,6 +1384,7 @@ def defun(func=None, input_signature=None, autograph=True):
   Tensors as arguments and must not take unnamed keyword arguments (**kwargs).
 
   _Tracing_
+
   Be aware that because `F` only logs TensorFlow operations, all the other
   Python code that `f` executes will only shape the _construction_ of the graphs
   that `F` executes: the Python code won't be executed when the graphs
@@ -1409,6 +1410,7 @@ def defun(func=None, input_signature=None, autograph=True):
   replace the call to `np.random.randn` with `tf.random_normal((5, 5))`.
 
   _Python Side-Effects_
+
   A corollary of the previous discussion on tracing is the following: If a
   Python function `f` has Python side-effects, then executing `f` multiple times
   will not necessarily be semantically equivalent to executing `F =
@@ -1416,7 +1418,8 @@ def defun(func=None, input_signature=None, autograph=True):
   that `defun` only captures the subgraph of TensorFlow operations that is
   constructed when `f` is called in a graph-building context.
 
-  _Python Control Flow_.
+  _Python Control Flow_
+
   The structure of many machine learning computations depend upon whether one is
   training or validating, and it is common to nest specialized logic under `if
   training:` blocks. By mapping each input signature to a unique graph, `defun`
@@ -1445,27 +1448,26 @@ def defun(func=None, input_signature=None, autograph=True):
   exact_outputs = lossy_matmul(W, x, training=False)
   ```
 
-  On the other hand, because `defun` generates graphs by tracing and not by
-  source code analysis, it fully unrolls Python `for` and `while` loops,
-  potentially creating large graphs. If your Python function has native loops
-  that run for many iterations, consider replacing them with `tf.while_loop`
-  operations.
+  _TensorFlow Control Flow_
 
-  When constructing graphs, `tf.Tensor` objects cannot be used as Python
-  `bool` objects. This means, for example, that you should replace code in `f`
-  resembling
+  When `autograph` is `True`, data-dependent control flow is allowed as well.
+  Control flow statements that depend on `Tensor` values are staged into
+  corresponding TensorFlow ops. For example, the following code will work as
+  expected:
 
   ```python
-
-  if tensor < 10:
-    true_fn()
-  else:
-    false_fn()
+  @tf.contrib.eager.defun
+  def dynamic_rnn_loop(cell, seq):
+    state, output = cell.zero_state()
+    for input in seq:
+      state, output = cell(input, state)
+    return output
   ```
 
-  with `tf.cond(tensor < 10, true_fn, false_fn)`.
+  For more information see `tf.autograph`.
 
   _Variables_
+
   TensorFlow operations related to variable creation and initialization are
   automatically lifted out of the graphs generated by `defun`. In practice, this
   implies that variable creation and initialization only happen the first time
@@ -1638,12 +1640,19 @@ def class_method_to_instance_method(original_function, instance):
   assert hasattr(original_function, "python_function")
 
   def bound_method_wrapper(*args, **kwargs):
+    """Wraps either a dummy MethodType or a converted AutoGraph function."""
     # __wrapped__ allows AutoGraph to swap in a converted function.
     wrapped_fn = bound_method_wrapper.__wrapped__
-    # If __wrapped__ was not replaced, then call original_function.
-    # TODO(b/119246461): This needs to be simplified.
-    if tf_inspect.ismethod(wrapped_fn):
+
+    if wrapped_fn is bound_method_wrapper.__original_wrapped__:
+      # If __wrapped__ was not replaced, then call original_function.
       wrapped_fn = original_function.python_function
+      if tf_inspect.ismethod(wrapped_fn):
+        wrapped_fn = six.get_unbound_function(wrapped_fn)
+      return wrapped_fn(weak_instance(), *args, **kwargs)
+
+    # If __wrapped__ was replaced, then it is always an unbound function
+    # that takes self as first argument.
     return wrapped_fn(weak_instance(), *args, **kwargs)
 
   # pylint: disable=protected-access
-- 
GitLab


From ec702337b8a1dd4ae22fead00ee89f15d0b314ee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 12:09:56 -0800
Subject: [PATCH 527/873] Modify Hash() function of HloComputation and
 HloInstruction to prevent non-termination from infinite recursive calls.

PiperOrigin-RevId: 225412890
---
 tensorflow/compiler/xla/service/hlo_computation.cc |  2 --
 tensorflow/compiler/xla/service/hlo_computation.h  |  6 ------
 tensorflow/compiler/xla/service/hlo_instruction.cc | 14 ++++++++++++--
 tensorflow/compiler/xla/service/hlo_instruction.h  |  8 ++++++++
 .../compiler/xla/service/hlo_instructions.cc       |  8 +++++++-
 tensorflow/compiler/xla/service/hlo_module.h       |  4 +++-
 6 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 80f7247048..ca663b8b4a 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -711,8 +711,6 @@ bool HloComputation::operator==(const HloComputation& other) const {
   return eq(root_instruction(), other.root_instruction());
 }
 
-uint64 HloComputation::Hash() const { return root_instruction()->Hash(); }
-
 Status HloComputation::ReplaceWithNewInstruction(
     HloInstruction* old_instruction,
     std::unique_ptr<HloInstruction> new_instruction) {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index da8a5320bb..5467d0a68b 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -264,12 +264,6 @@ class HloComputation {
   // Return whether `*this` and `other` are functionally equivalent.
   bool operator==(const HloComputation& other) const;
 
-  // Generates a hash value of an HLO computation. Hash considers
-  // information on opcode, shape, operands, and typically a root instruction.
-  // This function returns the same hash value for equivalent HLO computations,
-  // with respect to HloInstruction::Identical() method.
-  uint64 Hash() const;
-
   // Replaces old instruction with newly created instruction. Removes old
   // instruction from computation. Updates uses and root instruction.
   Status ReplaceWithNewInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index c57d9c1e86..8b2ace1e82 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -1761,7 +1761,12 @@ bool HloInstruction::IdenticalSlowPath(
   return false;
 }
 
-uint64 HloInstruction::Hash() const {
+static uint64 HashOperand(const HloInstruction* hlo) {
+  return ShapeUtil::Hash(hlo->shape());
+}
+
+uint64 HloInstruction::Hash(
+    const std::function<uint64(const HloInstruction*)>& hash_operand) const {
   using tensorflow::Hash64Combine;
 
   uint64 hash_value = Hash64Combine(0, static_cast<uint64>(opcode()));
@@ -1770,7 +1775,7 @@ uint64 HloInstruction::Hash() const {
   if (!IsCrossModuleAllReduce()) {
     if (!operands().empty()) {
       for (size_t i = 0; i < operands().size(); ++i) {
-        hash_value = Hash64Combine(hash_value, operand(i)->Hash());
+        hash_value = Hash64Combine(hash_value, hash_operand(operand(i)));
       }
     }
   }
@@ -1779,6 +1784,11 @@ uint64 HloInstruction::Hash() const {
   return hash_value;
 }
 
+uint64 HloInstruction::Hash() const {
+  // Use HashOperand as an argument to prevent non-termination.
+  return Hash(HashOperand);
+}
+
 uint64 HloInstruction::InnerHash() const { return 13; }
 
 void HloInstruction::RemoveUser(HloInstruction* user) {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index a312b6bf0d..dd77f101a0 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -909,6 +909,14 @@ class HloInstruction {
   // information on opcode, shape, operands, and typically a root instruction.
   // This function returns the same hash value for equivalent HLO instructions,
   // with respect to HloInstruction::Identical() method.
+  //
+  // Uses hash_operand function to compute hash values of its operands.
+  // At the very top level, hash_operand should be non-recursive to prevent
+  // non-termination.
+  uint64 Hash(
+      const std::function<uint64(const HloInstruction*)>& hash_operand) const;
+
+  // Calls the above method with non-recursive hash_operand function.
   uint64 Hash() const;
 
   // Returns whether the instruction has a constant operand.
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 2fe6395efe..f55de6a1c0 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1372,8 +1372,14 @@ bool HloFusionInstruction::IdenticalSlowPath(
                          other.fused_instructions_computation());
 }
 
+static uint64 HashOperandRecursive(const HloInstruction* hlo) {
+  return hlo->Hash(HashOperandRecursive);
+}
+
 uint64 HloFusionInstruction::InnerHash() const {
-  return fused_instructions_computation()->Hash();
+  // Use HashOperandRecursive to recursively compute hash on inner operands.
+  return fused_instructions_computation()->root_instruction()->Hash(
+      HashOperandRecursive);
 }
 
 std::unique_ptr<HloInstruction> HloFusionInstruction::CloneWithNewOperandsImpl(
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index 7b9cbf9a53..f1310e4b27 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -136,7 +136,9 @@ class HloModule {
   // information on opcode, shape, operands, and typically a root instruction.
   // This function returns the same hash value for equivalent HLO modules,
   // with respect to HloInstruction::Identical() method.
-  uint64 Hash() const { return entry_computation()->Hash(); }
+  uint64 Hash() const {
+    return entry_computation()->root_instruction()->Hash();
+  }
 
   // Gets the computations in this module.
   //
-- 
GitLab


From 92937fb439e102f31c26bd9234e03552499f6ba4 Mon Sep 17 00:00:00 2001
From: Nick Felt <nickfelt@google.com>
Date: Thu, 13 Dec 2018 12:18:34 -0800
Subject: [PATCH 528/873] Add tf.summary.write and tf.summary.summary_scope for
 TF 2.0

The write() op is the TF 2.0 version of tf.contrib.summary.generic() and still backed by the WriteSummary C++ op, but with some changes:
- first parameter is now the full summary tag, instead of deriving tag from "name"
- step parameter is now mandatory since there's no global step
- recording is "on by default" - no longer requires always_record_summaries()
- no more "family" parameter - this should be done via proper scoping

The summary_scope() is a helper for writing TF 2.0 summary ops like scalar(), etc. that provides better semantics for how tags are generated relative to name scopes.

PiperOrigin-RevId: 225414160
---
 tensorflow/python/framework/test_util.py      |   9 +-
 tensorflow/python/kernel_tests/BUILD          |  19 ++
 .../python/kernel_tests/summary_ops_test.py   | 267 ++++++++++++++++++
 tensorflow/python/ops/summary_ops_v2.py       | 117 +++++++-
 .../api/golden/v2/tensorflow.summary.pbtxt    |   8 +
 5 files changed, 412 insertions(+), 8 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/summary_ops_test.py

diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index d06e1f574b..af1687c8ef 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1023,12 +1023,13 @@ def also_run_as_tf_function(f):
   """
 
   def decorated(*args, **kwds):
+    def bound_f():
+      f(*args, **kwds)
     with context.eager_mode():
       # Running in eager mode
-      f(*args, **kwds)
-
-      defun_f = def_function.function(f)
-      defun_f(*args, **kwds)
+      bound_f()
+      # Running as TF function
+      def_function.function(bound_f)()
 
   return decorated
 
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index ddb2ddaf63..bd5c103b38 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -1068,6 +1068,25 @@ tf_py_test(
     ],
 )
 
+tf_py_test(
+    name = "summary_ops_test",
+    size = "small",
+    srcs = ["summary_ops_test.py"],
+    additional_deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:summary_ops_v2",
+        "//tensorflow/python:tensor_util",
+        "//tensorflow/python/eager:function",
+        "//tensorflow/python/eager:context",
+    ],
+)
+
 tf_py_test(
     name = "summary_v1_ops_test",
     size = "small",
diff --git a/tensorflow/python/kernel_tests/summary_ops_test.py b/tensorflow/python/kernel_tests/summary_ops_test.py
new file mode 100644
index 0000000000..cd446eb40e
--- /dev/null
+++ b/tensorflow/python/kernel_tests/summary_ops_test.py
@@ -0,0 +1,267 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for V2 summary ops from summary_ops_v2."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from tensorflow.core.framework import summary_pb2
+from tensorflow.core.util import event_pb2
+from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.framework import test_util
+from tensorflow.python.lib.io import tf_record
+from tensorflow.python.ops import summary_ops_v2 as summary_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import test
+
+
+class SummaryOpsTest(test_util.TensorFlowTestCase):
+
+  def testWrite(self):
+    logdir = self.get_temp_dir()
+    with context.eager_mode():
+      with summary_ops.create_file_writer(logdir).as_default():
+        output = summary_ops.write('tag', 42, step=12)
+        self.assertTrue(output.numpy())
+    events = events_from_logdir(logdir)
+    self.assertEqual(2, len(events))
+    self.assertEqual(12, events[1].step)
+    value = events[1].summary.value[0]
+    self.assertEqual('tag', value.tag)
+    self.assertEqual(42, to_numpy(value))
+
+  def testWrite_fromFunction(self):
+    logdir = self.get_temp_dir()
+    @def_function.function
+    def f():
+      with summary_ops.create_file_writer(logdir).as_default():
+        return summary_ops.write('tag', 42, step=12)
+    with context.eager_mode():
+      output = f()
+      self.assertTrue(output.numpy())
+    events = events_from_logdir(logdir)
+    self.assertEqual(2, len(events))
+    self.assertEqual(12, events[1].step)
+    value = events[1].summary.value[0]
+    self.assertEqual('tag', value.tag)
+    self.assertEqual(42, to_numpy(value))
+
+  def testWrite_metadata(self):
+    logdir = self.get_temp_dir()
+    metadata = summary_pb2.SummaryMetadata()
+    metadata.plugin_data.plugin_name = 'foo'
+    with context.eager_mode():
+      with summary_ops.create_file_writer(logdir).as_default():
+        summary_ops.write('obj', 0, 0, metadata=metadata)
+        summary_ops.write('bytes', 0, 0, metadata=metadata.SerializeToString())
+        m = constant_op.constant(metadata.SerializeToString())
+        summary_ops.write('string_tensor', 0, 0, metadata=m)
+    events = events_from_logdir(logdir)
+    self.assertEqual(4, len(events))
+    self.assertEqual(metadata, events[1].summary.value[0].metadata)
+    self.assertEqual(metadata, events[2].summary.value[0].metadata)
+    self.assertEqual(metadata, events[3].summary.value[0].metadata)
+
+  def testWrite_name(self):
+    @def_function.function
+    def f():
+      output = summary_ops.write('tag', 42, step=12, name='anonymous')
+      self.assertTrue(output.name.startswith('anonymous'))
+    f()
+
+  def testWrite_ndarray(self):
+    logdir = self.get_temp_dir()
+    with context.eager_mode():
+      with summary_ops.create_file_writer(logdir).as_default():
+        summary_ops.write('tag', [[1, 2], [3, 4]], step=12)
+    events = events_from_logdir(logdir)
+    value = events[1].summary.value[0]
+    self.assertAllEqual([[1, 2], [3, 4]], to_numpy(value))
+
+  def testWrite_tensor(self):
+    logdir = self.get_temp_dir()
+    with context.eager_mode():
+      t = constant_op.constant([[1, 2], [3, 4]])
+      with summary_ops.create_file_writer(logdir).as_default():
+        summary_ops.write('tag', t, step=12)
+      expected = t.numpy()
+    events = events_from_logdir(logdir)
+    value = events[1].summary.value[0]
+    self.assertAllEqual(expected, to_numpy(value))
+
+  def testWrite_tensor_fromFunction(self):
+    logdir = self.get_temp_dir()
+    @def_function.function
+    def f(t):
+      with summary_ops.create_file_writer(logdir).as_default():
+        summary_ops.write('tag', t, step=12)
+    with context.eager_mode():
+      t = constant_op.constant([[1, 2], [3, 4]])
+      f(t)
+      expected = t.numpy()
+    events = events_from_logdir(logdir)
+    value = events[1].summary.value[0]
+    self.assertAllEqual(expected, to_numpy(value))
+
+  def testWrite_stringTensor(self):
+    logdir = self.get_temp_dir()
+    with context.eager_mode():
+      with summary_ops.create_file_writer(logdir).as_default():
+        summary_ops.write('tag', [b'foo', b'bar'], step=12)
+    events = events_from_logdir(logdir)
+    value = events[1].summary.value[0]
+    self.assertAllEqual([b'foo', b'bar'], to_numpy(value))
+
+  @test_util.also_run_as_tf_function
+  def testWrite_noDefaultWriter(self):
+    with context.eager_mode():
+      self.assertFalse(summary_ops.write('tag', 42, step=0))
+
+  def testWrite_shouldRecordSummaries(self):
+    logdir = self.get_temp_dir()
+    with context.eager_mode():
+      with summary_ops.create_file_writer(logdir).as_default():
+        self.assertTrue(summary_ops.write('default_on', 1, step=0))
+        with summary_ops.always_record_summaries():
+          self.assertTrue(summary_ops.write('set_on', 1, step=0))
+        with summary_ops.never_record_summaries():
+          self.assertFalse(summary_ops.write('set_off', 1, step=0))
+    events = events_from_logdir(logdir)
+    self.assertEqual(3, len(events))
+    self.assertEqual('default_on', events[1].summary.value[0].tag)
+    self.assertEqual('set_on', events[2].summary.value[0].tag)
+
+  def testWrite_shouldRecordSummaries_fromFunction(self):
+    logdir = self.get_temp_dir()
+    @def_function.function
+    def f(tag_prefix):
+      with summary_ops.create_file_writer(logdir).as_default():
+        default_output = summary_ops.write(tag_prefix + '_default', 1, step=0)
+        with summary_ops.always_record_summaries():
+          on_output = summary_ops.write(tag_prefix + '_on', 1, step=0)
+        with summary_ops.never_record_summaries():
+          off_output = summary_ops.write(tag_prefix + '_off', 1, step=0)
+        return [default_output, on_output, off_output]
+    with context.eager_mode():
+      self.assertAllEqual([True, True, False], f('default'))
+      with summary_ops.always_record_summaries():
+        self.assertAllEqual([True, True, False], f('on'))
+      with summary_ops.never_record_summaries():
+        self.assertAllEqual([False, True, False], f('off'))
+    events = events_from_logdir(logdir)
+    self.assertEqual(6, len(events))
+    self.assertEqual('default_default', events[1].summary.value[0].tag)
+    self.assertEqual('default_on', events[2].summary.value[0].tag)
+    self.assertEqual('on_default', events[3].summary.value[0].tag)
+    self.assertEqual('on_on', events[4].summary.value[0].tag)
+    self.assertEqual('off_on', events[5].summary.value[0].tag)
+
+  @test_util.also_run_as_tf_function
+  def testSummaryScope(self):
+    with summary_ops.summary_scope('foo') as (tag, scope):
+      self.assertEqual('foo', tag)
+      self.assertEqual('foo/', scope)
+      with summary_ops.summary_scope('bar') as (tag, scope):
+        self.assertEqual('foo/bar', tag)
+        self.assertEqual('foo/bar/', scope)
+      with summary_ops.summary_scope('with/slash') as (tag, scope):
+        self.assertEqual('foo/with/slash', tag)
+        self.assertEqual('foo/with/slash/', scope)
+      with ops.name_scope(None):
+        with summary_ops.summary_scope('unnested') as (tag, scope):
+          self.assertEqual('unnested', tag)
+          self.assertEqual('unnested/', scope)
+
+  @test_util.also_run_as_tf_function
+  def testSummaryScope_defaultName(self):
+    with summary_ops.summary_scope(None) as (tag, scope):
+      self.assertEqual('summary', tag)
+      self.assertEqual('summary/', scope)
+    with summary_ops.summary_scope(None, 'backup') as (tag, scope):
+      self.assertEqual('backup', tag)
+      self.assertEqual('backup/', scope)
+
+  @test_util.also_run_as_tf_function
+  def testSummaryScope_handlesCharactersIllegalForScope(self):
+    with summary_ops.summary_scope('f?o?o') as (tag, scope):
+      self.assertEqual('f?o?o', tag)
+      self.assertEqual('foo/', scope)
+    # If all characters aren't legal for a scope name, use default name.
+    with summary_ops.summary_scope('???', 'backup') as (tag, scope):
+      self.assertEqual('???', tag)
+      self.assertEqual('backup/', scope)
+
+  @test_util.also_run_as_tf_function
+  def testSummaryScope_nameNotUniquifiedForTag(self):
+    constant_op.constant(0, name='foo')
+    with summary_ops.summary_scope('foo') as (tag, _):
+      self.assertEqual('foo', tag)
+    with summary_ops.summary_scope('foo') as (tag, _):
+      self.assertEqual('foo', tag)
+    with ops.name_scope('with'):
+      constant_op.constant(0, name='slash')
+    with summary_ops.summary_scope('with/slash') as (tag, _):
+      self.assertEqual('with/slash', tag)
+
+
+def events_from_file(filepath):
+  """Returns all events in a single event file.
+
+  Args:
+    filepath: Path to the event file.
+
+  Returns:
+    A list of all tf.Event protos in the event file.
+  """
+  records = list(tf_record.tf_record_iterator(filepath))
+  result = []
+  for r in records:
+    event = event_pb2.Event()
+    event.ParseFromString(r)
+    result.append(event)
+  return result
+
+
+def events_from_logdir(logdir):
+  """Returns all events in the single eventfile in logdir.
+
+  Args:
+    logdir: The directory in which the single event file is sought.
+
+  Returns:
+    A list of all tf.Event protos from the single event file.
+
+  Raises:
+    AssertionError: If logdir does not contain exactly one file.
+  """
+  assert gfile.Exists(logdir)
+  files = gfile.ListDirectory(logdir)
+  assert len(files) == 1, 'Found not exactly one file in logdir: %s' % files
+  return events_from_file(os.path.join(logdir, files[0]))
+
+
+def to_numpy(summary_value):
+  return tensor_util.MakeNdarray(summary_value.tensor)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py
index 3f99b9f877..168cb97554 100644
--- a/tensorflow/python/ops/summary_ops_v2.py
+++ b/tensorflow/python/ops/summary_ops_v2.py
@@ -58,14 +58,31 @@ _RUN_NAME_PATTERNS = re.compile(r"^[^\x00-\x1F<>]{0,512}$")
 _USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I)
 
 
-def should_record_summaries():
-  """Returns boolean Tensor which is true if summaries should be recorded."""
+def _should_record_summaries_internal():
+  """Returns boolean Tensor if summaries should/shouldn't be recorded, or None.
+  """
   global _SHOULD_RECORD_SUMMARIES
   key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
-  should = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
+  should = _SHOULD_RECORD_SUMMARIES.get(key)
   return should() if callable(should) else should
 
 
+def _should_record_summaries_v2():
+  """Returns boolean Tensor which is true if summaries should be recorded.
+
+  If no recording status has been set, this defaults to True, unlike the public
+  should_record_summaries().
+  """
+  result = _should_record_summaries_internal()
+  return True if result is None else result
+
+
+def should_record_summaries():
+  """Returns boolean Tensor which is true if summaries should be recorded."""
+  result = _should_record_summaries_internal()
+  return False if result is None else result
+
+
 @tf_contextlib.contextmanager
 def _record_summaries(boolean=True):
   """Sets summary recording on or off per the provided boolean value.
@@ -86,7 +103,7 @@ def _record_summaries(boolean=True):
   # TODO(nickfelt): make this threadlocal
   global _SHOULD_RECORD_SUMMARIES
   key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
-  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, False)
+  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, None)
   try:
     _SHOULD_RECORD_SUMMARIES[key] = boolean
     yield
@@ -370,6 +387,98 @@ def summary_writer_initializer_op():
   return _SUMMARY_WRITER_INIT_OP.setdefault(key, [])
 
 
+_INVALID_SCOPE_CHARACTERS = re.compile(r"[^-_/.A-Za-z0-9]")
+
+
+@tf_export("summary.summary_scope", v1=[])
+@tf_contextlib.contextmanager
+def summary_scope(name, default_name="summary", values=None):
+  """A context manager for use when defining a custom summary op.
+
+  This behaves similarly to `tf.name_scope`, except that it returns a generated
+  summary tag in addition to the scope name. The tag is structurally similar to
+  the scope name - derived from the user-provided name, prefixed with enclosing
+  name scopes if any - but we relax the constraint that it be uniquified, as
+  well as the character set limitation (so the user-provided name can contain
+  characters not legal for scope names; in the scope name these are removed).
+
+  This makes the summary tag more predictable and consistent for the user.
+
+  For example, to define a new summary op called `my_op`:
+
+  ```python
+  def my_op(name, my_value, step):
+    with tf.summary.summary_scope(name, "MyOp", [my_value]) as (tag, scope):
+      my_value = tf.convert_to_tensor(my_value)
+      return tf.summary.write(tag, my_value, step=step)
+  ```
+
+  Args:
+    name: string name for the summary.
+    default_name: Optional; if provided, used as default name of the summary.
+    values: Optional; passed as `values` parameter to name_scope.
+
+  Yields:
+    A tuple `(tag, scope)` as described above.
+  """
+  name = name or default_name
+  current_scope = ops.get_name_scope()
+  tag = current_scope + "/" + name if current_scope else name
+  # Strip illegal characters from the scope name, and if that leaves nothing,
+  # use None instead so we pick up the default name.
+  name = _INVALID_SCOPE_CHARACTERS.sub("", name) or None
+  with ops.name_scope(name, default_name, values) as scope:
+    yield tag, scope
+
+
+@tf_export("summary.write", v1=[])
+def write(tag, tensor, step, metadata=None, name=None):
+  """Writes a generic summary to the default SummaryWriter if one exists.
+
+  This exists primarily to support the definition of type-specific summary ops
+  like scalar() and image(), and is not intended for direct use unless defining
+  a new type-specific summary op.
+
+  Args:
+    tag: string tag used to identify the summary (e.g. in TensorBoard), usually
+      generated with `tf.summary.summary_scope`
+    tensor: the Tensor holding the summary data to write
+    step: `int64`-castable monotic step value for this summary
+    metadata: Optional SummaryMetadata, as a proto or serialized bytes
+    name: Optional string name for this op.
+
+  Returns:
+    True on success, or false if no summary was written because no default
+    summary writer was available.
+  """
+  with ops.name_scope(name, "write_summary") as scope:
+    if context.context().summary_writer_resource is None:
+      return constant_op.constant(False)
+    if metadata is None:
+      serialized_metadata = constant_op.constant(b"")
+    elif hasattr(metadata, "SerializeToString"):
+      serialized_metadata = constant_op.constant(metadata.SerializeToString())
+    else:
+      serialized_metadata = metadata
+
+    def record():
+      """Record the actual summary and return True."""
+      # Note the identity to move the tensor to the CPU.
+      with ops.device("cpu:0"):
+        write_summary_op = gen_summary_ops.write_summary(
+            context.context().summary_writer_resource,
+            step,
+            array_ops.identity(tensor),
+            tag,
+            serialized_metadata,
+            name=scope)
+        with ops.control_dependencies([write_summary_op]):
+          return constant_op.constant(True)
+
+    return smart_cond.smart_cond(
+        _should_record_summaries_v2(), record, _nothing, name="summary_cond")
+
+
 def summary_writer_function(name, tensor, function, family=None):
   """Helper function to write summaries.
 
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt
index 5cf4d7cfd9..61670bd151 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt
@@ -40,4 +40,12 @@ tf_module {
     name: "import_event"
     argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "summary_scope"
+    argspec: "args=[\'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'summary\', \'None\'], "
+  }
+  member_method {
+    name: "write"
+    argspec: "args=[\'tag\', \'tensor\', \'step\', \'metadata\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
+  }
 }
-- 
GitLab


From 6742cc2abd01c753572939008039f26995555f15 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 12:27:39 -0800
Subject: [PATCH 529/873] Copy of fix for optimizers v2.

PiperOrigin-RevId: 225415724
---
 tensorflow/python/keras/optimizer_v2/optimizer_v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index 874d0f7fe6..adce9fb9db 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -334,8 +334,8 @@ class OptimizerV2(checkpointable.CheckpointableBase):
       reduced_grads = merge_grads(grads_and_vars)
       grads_and_vars = zip(reduced_grads, var_list)
 
+    self._prepare()
     with ops.init_scope():
-      self._prepare()
       self._create_slots(var_list)
     update_ops = []
 
-- 
GitLab


From fe4328b4948727a2de457cbcc4690d7995682564 Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <slebedev@google.com>
Date: Thu, 13 Dec 2018 12:30:27 -0800
Subject: [PATCH 530/873] Inlined `Variable._AsTensor`

Both implementations used `Variable.value`.

PiperOrigin-RevId: 225416178
---
 tensorflow/python/ops/array_ops.py             |  2 +-
 tensorflow/python/ops/resource_variable_ops.py |  3 ---
 tensorflow/python/ops/variables.py             | 12 +-----------
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index e10d9036cd..d4e35ca77b 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -890,7 +890,7 @@ def _SliceHelperVar(var, slice_spec):
 
   """
 
-  return _slice_helper(var._AsTensor(), slice_spec, var)
+  return _slice_helper(var.value(), slice_spec, var)
 
 
 ops.Tensor._override_operator("__getitem__", _slice_helper)
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index dc53fb8e92..6104cfa7ff 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -809,9 +809,6 @@ class ResourceVariable(variables.RefVariable):
     return ResourceVariable(
         variable_def=variable_def, import_scope=import_scope)
 
-  def _AsTensor(self):
-    return self.value()
-
   def _ref(self):
     """Unsupported."""
     raise NotImplementedError("ResourceVariable does not implement _ref()")
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index e231343825..1dc96efa0b 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -932,7 +932,7 @@ class Variable(six.with_metaclass(VariableMetaclass,
 
     def _run_op(a, *args, **kwargs):
       # pylint: disable=protected-access
-      return tensor_oper(a._AsTensor(), *args, **kwargs)
+      return tensor_oper(a.value(), *args, **kwargs)
 
     functools.update_wrapper(_run_op, tensor_oper)
     setattr(cls, operator, _run_op)
@@ -1603,16 +1603,6 @@ class RefVariable(VariableV1):
     """Conversion function for Graph.as_graph_element()."""
     return self._variable
 
-  def _AsTensor(self):  # pylint: disable=invalid-name
-    """Converts this variable to a Tensor.
-
-    See `tf.Variable.value`.
-
-    Returns:
-      A `Tensor` containing the value of the variable.
-    """
-    return self._snapshot
-
   def value(self):
     """Returns the last snapshot of this variable.
 
-- 
GitLab


From e1b51397638e13abec04127c334167f546c9e846 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Thu, 13 Dec 2018 12:33:15 -0800
Subject: [PATCH 531/873] PR #23661: [XLA] Simplify transposes that are really
 reshapes

Please approve this CL. It will be submitted automatically, and its GitHub pull request will be marked as merged.

Imported from GitHub PR #23661

A transpose like
```
f32[1,1,64,1] = transpose(f32[1,64,1,1]), dimensions={3,2,1,0}
```
is really just a reshape (because there's only one non-1 dimension).
Teach algebraic simplifier to make that substitution, to enable applying
reshape-combining optimizations to such instructions.

Copybara import of the project:

  - c51f19ef50f993677d7d58d9dcf3de6785540e0b [XLA] Simplify transposes that are really reshapes by Keno Fischer <keno@juliacomputing.com>
  - 10fe3503be362e28906e6a01d0d272903f693817 [XLA] Canonicalize Transpose by dropping degenerate dims by Keno Fischer <keno@juliacomputing.com>
  - 333cdccc3a045ebdb36ca03e8877706d5659642e Merge 10fe3503be362e28906e6a01d0d272903f693817 into 3dfb4... by Keno Fischer <keno@alumni.harvard.edu>

COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/23661 from JuliaComputing:kf/transposereshape 10fe3503be362e28906e6a01d0d272903f693817
PiperOrigin-RevId: 225416731
---
 .../xla/service/algebraic_simplifier.cc       | 27 +++++++++++++++++++
 .../xla/service/algebraic_simplifier_test.cc  | 21 +++++++++++++++
 tensorflow/compiler/xla/shape_util.cc         |  5 ++++
 tensorflow/compiler/xla/shape_util.h          |  3 +++
 4 files changed, 56 insertions(+)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 985c5af1c4..ee268361b0 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <algorithm>
 #include <cmath>
+#include <functional>
 #include <iterator>
 #include <memory>
 #include <numeric>
@@ -2026,6 +2027,7 @@ Status AlgebraicSimplifierVisitor::HandleReshape(HloInstruction* reshape) {
         reshape, HloInstruction::CreateReshape(reshape->shape(),
                                                operand->mutable_operand(0)));
   }
+
   if (operand->opcode() == HloOpcode::kRng && operand->user_count() == 1) {
     *operand->mutable_shape() = reshape->shape();
     return ReplaceInstruction(reshape, operand);
@@ -2748,6 +2750,22 @@ Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) {
   return Status::OK();
 }
 
+namespace {
+bool OnlyPermutesMoreThanOneDegenerateDim(const Shape& shape,
+                                          absl::Span<const int64> perm) {
+  std::vector<int64> new_permutation;
+  int64 degenerate_count = 0;
+  for (int64 i = 0; i < perm.size(); ++i) {
+    if (shape.dimensions(i) != 1) {
+      new_permutation.push_back(perm[i]);
+    } else {
+      ++degenerate_count;
+    }
+  }
+  return degenerate_count > 1 && absl::c_is_sorted(new_permutation);
+}
+}  // namespace
+
 Status AlgebraicSimplifierVisitor::HandleTranspose(HloInstruction* transpose) {
   auto operand = transpose->mutable_operand(0);
   if (std::is_sorted(transpose->dimensions().begin(),
@@ -2764,6 +2782,15 @@ Status AlgebraicSimplifierVisitor::HandleTranspose(HloInstruction* transpose) {
                                            transpose->dimensions())));
   }
 
+  // Replace transpose with a reshape if more than one degenerate method is
+  // permuted.
+  if (OnlyPermutesMoreThanOneDegenerateDim(transpose->shape(),
+                                           transpose->dimensions())) {
+    return ReplaceWithNewInstruction(
+        transpose, HloInstruction::CreateReshape(
+                       transpose->shape(), transpose->mutable_operand(0)));
+  }
+
   if (operand->opcode() == HloOpcode::kRng && operand->user_count() == 1) {
     *operand->mutable_shape() = transpose->shape();
     return ReplaceInstruction(transpose, operand);
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 14ce519b6a..775e7ef40d 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -2047,6 +2047,27 @@ TEST_F(AlgebraicSimplifierTest, TransposesMerged) {
             computation->root_instruction()->dimensions());
 }
 
+TEST_F(AlgebraicSimplifierTest, TransposeIsReshape) {
+  const char* hlo_string = R"(
+    HloModule module
+
+    ENTRY test {
+      param = f32[10] parameter(0)
+      reshaped = f32[1,1,10] reshape(f32[10] param)
+      transposed = f32[10,1,1] transpose(f32[1,1,10] reshaped), dimensions={2,1,0}
+      ROOT reshaped_again = f32[10] reshape(f32[10,1,1] transposed)
+    }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto module,
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest()));
+
+  HloPassFix<AlgebraicSimplifier> simplifier(default_options_);
+  EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  auto root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, GmockMatch(m::Parameter()));
+}
+
 // Test merging reshape and broadcast.
 TEST_F(AlgebraicSimplifierTest, ReshapeAndBroadcastMerged) {
   auto m = CreateNewVerifiedModule();
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index eef2dc913d..da61873732 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -1067,6 +1067,11 @@ bool ShapeUtil::IsLeafIndex(const Shape& shape, const ShapeIndex& index) {
   return absl::c_linear_search(shape.dimensions(), 1);
 }
 
+/* static */ Shape ShapeUtil::DropDegenerateDimensions(const Shape& shape) {
+  return FilterDimensions(
+      [&](int64 dim) -> bool { return shape.dimensions()[dim] != 1; }, shape);
+}
+
 namespace {
 
 // Helper for ForEachSubshape which visits the subshapes of the given shape in
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index 84a27f662a..e02804dc88 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -551,6 +551,9 @@ class ShapeUtil {
   // (dimensions with bound 1).
   static bool HasDegenerateDimensions(const Shape& shape);
 
+  // Drops any degenerate dimensions (i.e. dimensions of size 1)
+  static Shape DropDegenerateDimensions(const Shape& shape);
+
   // Permutes the dimensions by the given permutation, so
   // return_value.dimensions[permutation[i]] = argument.dimensions[i].
   //
-- 
GitLab


From 9a24e6be709eed3bad45c8a9ef0a00d9a49dd180 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 12:53:40 -0800
Subject: [PATCH 532/873] Improve coverage when running tests in keras_test
 with only 1 GPU available. Re-enable keras_test on Guitar after disabling
 just the functions that were failing.

PiperOrigin-RevId: 225420181
---
 tensorflow/contrib/distribute/python/BUILD    |  2 --
 .../contrib/distribute/python/keras_test.py   | 32 ++++++++++++-------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 322c02c210..4c9c35da5a 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -611,8 +611,6 @@ cuda_py_test(
         "no_oss",  # TODO(b/117919883): Fix python error.
         "no_pip",
         "no_windows_gpu",
-        # TODO(b/120943676): Re-enable after fixing InvalidArgumentError.
-        "noguitar",
         "notsan",
     ],
 )
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 683cc89bfb..c53e76f922 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -375,7 +375,9 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
 
   @combinations.generate(combinations.combine(
       distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
           combinations.mirrored_strategy_with_two_gpus,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu,
           combinations.core_mirrored_strategy_with_two_gpus],
       mode=['graph']))
   def test_train_functional_with_distribution_strategy(self, distribution):
@@ -403,7 +405,9 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
 
   @combinations.generate(combinations.combine(
       distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
           combinations.mirrored_strategy_with_two_gpus,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu,
           combinations.core_mirrored_strategy_with_two_gpus],
       mode=['graph']))
   def test_train_sequential_with_distribution_strategy(self, distribution):
@@ -430,8 +434,8 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
 
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph']))
   def test_multi_inputs_multi_outputs_with_input_fn_as_dict(self, distribution):
     train_data, test_data = get_multi_inputs_multi_outputs_data()
@@ -482,8 +486,8 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
 
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph']))
   def test_keras_optimizer_with_distribution_strategy(self, distribution):
     keras_model = simple_sequential_model()
@@ -904,10 +908,12 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
 
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph', 'eager']))
-  def test_dataset_wrong_input_shape(self, distribution):
+  # TODO(b/120943676, b/120957836): Re-enable once the validation code is
+  # restored.
+  def DISABLED_test_dataset_wrong_input_shape(self, distribution):
     with self.cached_session():
       model = get_model()
 
@@ -927,9 +933,11 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
         model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
 
   @combinations.generate(combinations.combine(
-      distribution=[combinations.mirrored_strategy_with_two_gpus],
+      distribution=[combinations.mirrored_strategy_with_gpu_and_cpu],
       mode=['graph', 'eager']))
-  def test_dataset_no_batch_input_validation(self, distribution):
+  # TODO(b/120943676, b/120957836): Re-enable once the validation code is
+  # restored.
+  def DISABLED_test_dataset_no_batch_input_validation(self, distribution):
     with self.cached_session():
       model = get_model()
 
@@ -967,7 +975,9 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
 
   @combinations.generate(combinations.combine(
       distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
           combinations.mirrored_strategy_with_two_gpus,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu,
           combinations.core_mirrored_strategy_with_two_gpus],
       mode=['graph', 'eager']))
   def test_learning_phase_value(self, distribution):
@@ -1170,8 +1180,8 @@ class TestDistributionStrategyWithLossMasking(test.TestCase,
   # work for TPU due to some invalid datatype.
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph', 'eager']))
   def test_masking(self, distribution):
     with self.cached_session():
-- 
GitLab


From 6ad6db4fdc638775abeafa5358ab6ed771f33cab Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 13 Dec 2018 13:14:36 -0800
Subject: [PATCH 533/873] Explicitly delete the generator variable to make it
 clearer that it should not be used past that point.

PiperOrigin-RevId: 225423563
---
 tensorflow/python/autograph/pyct/compiler.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/autograph/pyct/compiler.py b/tensorflow/python/autograph/pyct/compiler.py
index aa4fd551ec..420f3bb223 100644
--- a/tensorflow/python/autograph/pyct/compiler.py
+++ b/tensorflow/python/autograph/pyct/compiler.py
@@ -72,6 +72,7 @@ def ast_to_source(node, indentation='  '):
   # Reference cycles are quite disliked by TensorFlow's tests.
   if hasattr(generator, 'write'):
     generator.write = None
+  del generator
 
   return code
 
-- 
GitLab


From b6f6ee4838d280909b31b3c5e28d14f7fd0c9c42 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Thu, 13 Dec 2018 13:16:47 -0800
Subject: [PATCH 534/873] [tf.data] Make it impossible to set illegitimate
 properties on any `tf.data` Options. Also update docstrings to make defaults
 clearer.

PiperOrigin-RevId: 225423882
---
 .../experimental/ops/optimization_options.py  | 29 +++++++++++++------
 .../data/experimental/ops/stats_options.py    |  3 +-
 tensorflow/python/data/ops/dataset_ops.py     | 21 +++++++++-----
 tensorflow/python/data/util/options.py        | 10 ++++++-
 tensorflow/python/data/util/options_test.py   |  7 +++++
 5 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py
index 3817e6228c..41a819d94b 100644
--- a/tensorflow/python/data/experimental/ops/optimization_options.py
+++ b/tensorflow/python/data/experimental/ops/optimization_options.py
@@ -47,43 +47,54 @@ class OptimizationOptions(options.OptionsBase):
   filter_fusion = options.create_option(
       name="filter_fusion",
       ty=bool,
-      docstring="Whether to fuse filter transformations.")
+      docstring=
+      "Whether to fuse filter transformations. If None, defaults to False.")
 
   hoist_random_uniform = options.create_option(
       name="hoist_random_uniform",
       ty=bool,
       docstring=
-      "Whether to hoist `tf.random_uniform()` ops out of map transformations.")
+      "Whether to hoist `tf.random_uniform()` ops out of map transformations. "
+      "If None, defaults to False.")
 
   map_and_batch_fusion = options.create_option(
       name="map_and_batch_fusion",
       ty=bool,
-      docstring="Whether to fuse map and batch transformations.")
+      docstring=
+      "Whether to fuse map and batch transformations. If None, defaults to "
+      "True.")
 
   map_and_filter_fusion = options.create_option(
       name="map_and_filter_fusion",
       ty=bool,
-      docstring="Whether to fuse map and filter transformations.")
+      docstring=
+      "Whether to fuse map and filter transformations. If None, defaults to "
+      "False.")
 
   map_fusion = options.create_option(
-      name="map_and_filter_fusion",
+      name="map_fusion",
       ty=bool,
-      docstring="Whether to fuse map transformations.")
+      docstring="Whether to fuse map transformations. If None, defaults to "
+      "False.")
 
   map_parallelization = options.create_option(
       name="map_parallelization",
       ty=bool,
-      docstring="Whether to parallelize stateless map transformations.")
+      docstring=
+      "Whether to parallelize stateless map transformations. If None, defaults "
+      "to False.")
 
   map_vectorization = options.create_option(
       name="map_vectorization",
       ty=bool,
-      docstring="Whether to vectorize map transformations.")
+      docstring=
+      "Whether to vectorize map transformations. If None, defaults to False.")
 
   noop_elimination = options.create_option(
       name="noop_elimination",
       ty=bool,
-      docstring="Whether to eliminate no-op transformations.")
+      docstring=
+      "Whether to eliminate no-op transformations. If None, defaults to True.")
 
   shuffle_and_repeat_fusion = options.create_option(
       name="shuffle_and_repeat_fusion",
diff --git a/tensorflow/python/data/experimental/ops/stats_options.py b/tensorflow/python/data/experimental/ops/stats_options.py
index 94ae67ff1d..c4c4b1cea0 100644
--- a/tensorflow/python/data/experimental/ops/stats_options.py
+++ b/tensorflow/python/data/experimental/ops/stats_options.py
@@ -65,4 +65,5 @@ class StatsOptions(options.OptionsBase):
   latency_all_edges = options.create_option(
       name="latency_all_edges",
       ty=bool,
-      docstring="Whether to add latency measurements on all edges.")
+      docstring=
+      "Whether to add latency measurements on all edges. Defaults to False.")
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 6582ac23b4..904c5b4b64 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -1695,36 +1695,43 @@ class Options(options_lib.OptionsBase):
       ty=bool,
       docstring=
       "Whether to dynamically adjust the values of tunable parameters (e.g. "
-      "degrees of parallelism).")
+      "degrees of parallelism). If None, defaults to True.")
 
   experimental_deterministic = options_lib.create_option(
       name="experimental_deterministic",
       ty=bool,
       docstring=
-      "Whether the outputs need to be produced in deterministic order."
-  )
+      "Whether the outputs need to be produced in deterministic order. If None,"
+      " defaults to True.")
 
   experimental_numa_aware = options_lib.create_option(
       name="experimental_numa_aware",
       ty=bool,
-      docstring="Whether to use NUMA-aware operations.")
+      docstring=
+      "Whether to use NUMA-aware operations. If None, defaults to False.")
 
   experimental_optimization = options_lib.create_option(
       name="experimental_optimization",
       ty=optimization_options.OptimizationOptions,
-      docstring="Associates the given optimization options with the dataset.",
+      docstring=
+      "The optimization options associated with the dataset. See "
+      "`tf.data.experimental.OptimizationOptions` for more details.",
       default_factory=optimization_options.OptimizationOptions)
 
   experimental_stats = options_lib.create_option(
       name="experimental_stats",
       ty=stats_options.StatsOptions,
-      docstring="Associates the given statistics options with the dataset.",
+      docstring=
+      "The statistics options associated with the dataset. See "
+      "`tf.data.experimental.StatsOptions` for more details.",
       default_factory=stats_options.StatsOptions)
 
   experimental_threading = options_lib.create_option(
       name="experimental_threading",
       ty=threading_options.ThreadingOptions,
-      docstring="Associates the given threading options with the dataset.",
+      docstring=
+      "The threading options associated with the dataset. See "
+      "`tf.data.experimental.ThreadingOptions` for more details.",
       default_factory=threading_options.ThreadingOptions)
 
   def _static_optimizations(self):
diff --git a/tensorflow/python/data/util/options.py b/tensorflow/python/data/util/options.py
index c578a831db..3c79197fae 100644
--- a/tensorflow/python/data/util/options.py
+++ b/tensorflow/python/data/util/options.py
@@ -31,7 +31,8 @@ class OptionsBase(object):
   """
 
   def __init__(self):
-    self._options = {}
+    # NOTE: Cannot use `self._options` here as we override `__setattr__`
+    object.__setattr__(self, "_options", {})
 
   def __eq__(self, other):
     if not isinstance(other, self.__class__):
@@ -47,6 +48,13 @@ class OptionsBase(object):
     else:
       return NotImplemented
 
+  def __setattr__(self, name, value):
+    if hasattr(self, name):
+      object.__setattr__(self, name, value)
+    else:
+      raise AttributeError(
+          "Cannot set the property %s on %s." % (name, type(self).__name__))
+
 
 def create_option(name, ty, docstring, default_factory=lambda: None):
   """Creates a type-checked property.
diff --git a/tensorflow/python/data/util/options_test.py b/tensorflow/python/data/util/options_test.py
index 8d41ef223f..b21afbd455 100644
--- a/tensorflow/python/data/util/options_test.py
+++ b/tensorflow/python/data/util/options_test.py
@@ -94,6 +94,13 @@ class OptionsTest(test.TestCase):
     with self.assertRaises(TypeError):
       options.merge_options(options1, options2)
 
+  def testNoSpuriousAttrs(self):
+    test_options = _TestOptions()
+    with self.assertRaises(AttributeError):
+      test_options.wrong_attr = True
+    with self.assertRaises(AttributeError):
+      _ = test_options.wrong_attr
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From e63e5db4193097905f8c720f92819c559a9eaa23 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Thu, 13 Dec 2018 13:18:04 -0800
Subject: [PATCH 535/873] Change sequential if's to elif in setup.py.
 Otherwise, the 2.0 branch doesn't get run.

PiperOrigin-RevId: 225424077
---
 tensorflow/tools/pip_package/setup.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index a3da276f89..3927540cc7 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -87,10 +87,10 @@ if 'tf_nightly' in project_name:
   for i, pkg in enumerate(REQUIRED_PACKAGES):
     if 'tensorboard' in pkg:
       REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.13.0a0, < 1.14.0a0'
-    if 'tensorflow_estimator' in pkg:
-      REQUIRED_PACKAGES[i] = 'tf-estimator-nightly'
-    if 'tensorflow_estimator' in pkg and '2.0' in project_name:
+    elif 'tensorflow_estimator' in pkg and '2.0' in project_name:
       REQUIRED_PACKAGES[i] = 'tensorflow-estimator-2.0-preview'
+    elif 'tensorflow_estimator' in pkg:
+      REQUIRED_PACKAGES[i] = 'tf-estimator-nightly'
 
 # weakref.finalize and enum were introduced in Python 3.4
 if sys.version_info < (3, 4):
-- 
GitLab


From 41737aff8c40a4b76ddb3c9e613354603a87f446 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Thu, 13 Dec 2018 13:24:33 -0800
Subject: [PATCH 536/873] Internal changes

PiperOrigin-RevId: 225425054
---
 tensorflow/core/BUILD                         | 21 ++++++++++++++-----
 .../core/platform/default/build_config.bzl    |  3 ---
 .../core/platform/default/build_config/BUILD  |  5 +++++
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 276005038c..f98280c3ec 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -113,7 +113,6 @@ load(
     "tf_additional_device_tracer_test_flags",
     "tf_additional_gdr_lib_defines",
     "tf_additional_human_readable_json_deps",
-    "tf_additional_logger_deps",
     "tf_additional_lib_defines",
     "tf_additional_lib_deps",
     "tf_additional_lib_hdrs",
@@ -450,15 +449,27 @@ cc_library(
     hdrs = ["platform/logger.h"],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
-    deps = [":platform_protobuf"],
+    deps = [
+        ":lib_proto_parsing",
+        "@protobuf_archive//:protobuf",
+    ],
+)
+
+cc_library(
+    name = "default_logger",
+    srcs = ["platform/default/logger.cc"],
+    hdrs = ["platform/logger.h"],
+    deps = [
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core:logger_interface",
+    ],
 )
 
 cc_library(
     name = "logger",
-    srcs = tf_platform_srcs(["logger.cc"]),
-    copts = tf_copts(),
+    hdrs = ["platform/logger.h"],
     visibility = ["//visibility:public"],
-    deps = [":logger_interface"] + tf_additional_logger_deps(),
+    deps = ["//tensorflow/core/platform/default/build_config:logger"],
 )
 
 filegroup(
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 0428715130..3a4415f229 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -543,9 +543,6 @@ def tf_additional_proto_srcs():
 def tf_additional_human_readable_json_deps():
     return []
 
-def tf_additional_logger_deps():
-    return []
-
 def tf_additional_all_protos():
     return ["//tensorflow/core:protos_all"]
 
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index da1f66dc67..ee6936b372 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -275,3 +275,8 @@ alias(
     actual = ":mobile_srcs",
     visibility = ["//visibility:public"],
 )
+
+alias(
+    name = "logger",
+    actual = "//tensorflow/core:default_logger",
+)
-- 
GitLab


From e66d0c15baa17101780f114120973e36507fc9b0 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Thu, 13 Dec 2018 13:28:46 -0800
Subject: [PATCH 537/873] Last-minute arg changes:  * move verbosity to
 experimental, since it's unclear whether it will be actually needed.  * move
 strip_decorators and optional_features to experimental as well, until we
 figure out whether we need to use a single Options object.

PiperOrigin-RevId: 225425738
---
 tensorflow/python/autograph/impl/api.py       | 42 +++++++++----------
 .../api/golden/v1/tensorflow.autograph.pbtxt  |  4 +-
 .../api/golden/v2/tensorflow.autograph.pbtxt  |  4 +-
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index c113f0e1f5..c6c137c8fd 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -279,11 +279,11 @@ def converted_call(f, owner, options, *args, **kwargs):
   converted_f = to_graph(
       target_entity,
       recursive=options.recursive,
-      verbose=options.verbose,
       arg_values=arg_values,
       arg_types=arg_types,
-      strip_decorators=options.strip_decorators,
-      optional_features=options.optional_features,
+      experimental_optional_features=options.optional_features,
+      experimental_strip_decorators=options.strip_decorators,
+      experimental_verbose=options.verbose,
       experimental_partial_types=partial_types)
 
   result = converted_f(*effective_args, **kwargs)
@@ -316,11 +316,11 @@ def _is_not_callable(obj):
 @tf_export('autograph.to_graph')
 def to_graph(entity,
              recursive=True,
-             verbose=converter.Verbosity.VERBOSE,
              arg_values=None,
              arg_types=None,
-             strip_decorators=None,
-             optional_features=converter.Feature.ALL,
+             experimental_optional_features=converter.Feature.ALL,
+             experimental_strip_decorators=None,
+             experimental_verbose=converter.Verbosity.BRIEF,
              experimental_partial_types=None):
   """Converts a Python entity into a TensorFlow graph.
 
@@ -368,21 +368,21 @@ def to_graph(entity,
     entity: Python callable or class to convert.
     recursive: Whether to recursively convert any functions that the
       converted function may call.
-    verbose: The level of printing verbosity to use, as a
-      `tf.autograph.experimental.Verbosity` value.
     arg_values: Optional dict of value hints for symbols including
       function arguments mapping string names to actual values. For example,
       `arg_values={'a': 1}` will map the variable `a` to the value `1`.
     arg_types: Optional dict of type hints for symbols including function
       arguments. Type hints allow specifying just the type of a variable, rather
       than a specific value.
-    strip_decorators: A tuple specifying decorators that should be
+    experimental_optional_features: `None`, a tuple of, or a single
+      `tf.autograph.experimental.Feature` value. Controls the use of
+      optional features in the conversion process.
+    experimental_strip_decorators: A tuple specifying decorators that should be
       excluded from the compiled output. By default, when converting a function
       before the decorators are applied, the compiled output will include those
       decorators.
-    optional_features: `None`, a tuple of, or a single
-      `tf.autograph.experimental.Feature` value. Controls the use of
-      optional features in the conversion process.
+    experimental_verbose: The level of printing verbosity to use, as a
+      `tf.autograph.experimental.Verbosity` value.
     experimental_partial_types: A `set` of `type` values, reserved for internal
       use.
 
@@ -392,16 +392,16 @@ def to_graph(entity,
   Raises:
     ValueError: If the entity could not be converted.
   """
-  if strip_decorators is None:
-    strip_decorators = ()
-  strip_decorators += (convert, do_not_convert, converted_call)
+  if experimental_strip_decorators is None:
+    experimental_strip_decorators = ()
+  experimental_strip_decorators += (convert, do_not_convert, converted_call)
 
   program_ctx = converter.ProgramContext(
       options=converter.ConversionOptions(
           recursive=recursive,
-          verbose=verbose,
-          strip_decorators=strip_decorators,
-          optional_features=optional_features),
+          verbose=experimental_verbose,
+          strip_decorators=experimental_strip_decorators,
+          optional_features=experimental_optional_features),
       partial_types=experimental_partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
@@ -460,7 +460,7 @@ def to_code(entity,
             arg_values=None,
             arg_types=None,
             indentation='  ',
-            optional_features=converter.Feature.ALL,
+            experimental_optional_features=converter.Feature.ALL,
             experimental_partial_types=None):
   """Similar to `to_graph`, but returns Python source code as a string.
 
@@ -481,7 +481,7 @@ def to_code(entity,
       than a specific value.
     indentation: The string to use for indenting. Typically two or four spaces,
       or just the tab character.
-    optional_features: `None`, a tuple of, or a single
+    experimental_optional_features: `None`, a tuple of, or a single
       `tf.autograph.experimental.Feature` value. Controls the use of
       optional features in the conversion process.
     experimental_partial_types: A `set` of `type` values, reserved for internal
@@ -495,7 +495,7 @@ def to_code(entity,
           recursive=recursive,
           verbose=converter.Verbosity.BRIEF,
           strip_decorators=(convert, do_not_convert, converted_call),
-          optional_features=optional_features),
+          optional_features=experimental_optional_features),
       partial_types=experimental_partial_types,
       autograph_module=tf_inspect.getmodule(to_graph),
       uncompiled_modules=config.DEFAULT_UNCOMPILED_MODULES)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt
index 34bdab95ff..12e23bc0c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.autograph.pbtxt
@@ -6,10 +6,10 @@ tf_module {
   }
   member_method {
     name: "to_code"
-    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'indentation\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'  \', \'Feature.ALL\', \'None\'], "
+    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'indentation\', \'experimental_optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'  \', \'Feature.ALL\', \'None\'], "
   }
   member_method {
     name: "to_graph"
-    argspec: "args=[\'entity\', \'recursive\', \'verbose\', \'arg_values\', \'arg_types\', \'strip_decorators\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'Verbosity.VERBOSE\', \'None\', \'None\', \'None\', \'Feature.ALL\', \'None\'], "
+    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'experimental_optional_features\', \'experimental_strip_decorators\', \'experimental_verbose\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'Feature.ALL\', \'None\', \'Verbosity.BRIEF\', \'None\'], "
   }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt
index 34bdab95ff..12e23bc0c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.autograph.pbtxt
@@ -6,10 +6,10 @@ tf_module {
   }
   member_method {
     name: "to_code"
-    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'indentation\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'  \', \'Feature.ALL\', \'None\'], "
+    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'indentation\', \'experimental_optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'  \', \'Feature.ALL\', \'None\'], "
   }
   member_method {
     name: "to_graph"
-    argspec: "args=[\'entity\', \'recursive\', \'verbose\', \'arg_values\', \'arg_types\', \'strip_decorators\', \'optional_features\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'Verbosity.VERBOSE\', \'None\', \'None\', \'None\', \'Feature.ALL\', \'None\'], "
+    argspec: "args=[\'entity\', \'recursive\', \'arg_values\', \'arg_types\', \'experimental_optional_features\', \'experimental_strip_decorators\', \'experimental_verbose\', \'experimental_partial_types\'], varargs=None, keywords=None, defaults=[\'True\', \'None\', \'None\', \'Feature.ALL\', \'None\', \'Verbosity.BRIEF\', \'None\'], "
   }
 }
-- 
GitLab


From 3a9bd14c67b52f6826eb7d5cfa1a9b7324450b9f Mon Sep 17 00:00:00 2001
From: Gautam Vasudevan <gvasudevan@google.com>
Date: Thu, 13 Dec 2018 13:34:51 -0800
Subject: [PATCH 538/873] Move Half Plus Two example to TF Serving

PiperOrigin-RevId: 225426867
---
 tensorflow/examples/saved_model/BUILD         |  22 --
 .../saved_model/saved_model_half_plus_two.py  | 271 ------------------
 2 files changed, 293 deletions(-)
 delete mode 100644 tensorflow/examples/saved_model/BUILD
 delete mode 100644 tensorflow/examples/saved_model/saved_model_half_plus_two.py

diff --git a/tensorflow/examples/saved_model/BUILD b/tensorflow/examples/saved_model/BUILD
deleted file mode 100644
index ebefc6576d..0000000000
--- a/tensorflow/examples/saved_model/BUILD
+++ /dev/null
@@ -1,22 +0,0 @@
-# Description: SavedModel half plus two example.
-
-package(
-    default_visibility = ["//tensorflow:internal"],
-)
-
-licenses(["notice"])  # Apache 2.0
-
-exports_files(["LICENSE"])
-
-py_binary(
-    name = "saved_model_half_plus_two",
-    srcs = [
-        "saved_model_half_plus_two.py",
-    ],
-    srcs_version = "PY2AND3",
-    deps = [
-        "//tensorflow:tensorflow_py",
-        "//tensorflow/python:lib",
-        "//tensorflow/python/saved_model:main_op",
-    ],
-)
diff --git a/tensorflow/examples/saved_model/saved_model_half_plus_two.py b/tensorflow/examples/saved_model/saved_model_half_plus_two.py
deleted file mode 100644
index dfdde44540..0000000000
--- a/tensorflow/examples/saved_model/saved_model_half_plus_two.py
+++ /dev/null
@@ -1,271 +0,0 @@
-## Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-r"""Exports an example linear regression inference graph.
-
-Exports a TensorFlow graph to `/tmp/saved_model/half_plus_two/` based on the
-`SavedModel` format.
-
-This graph calculates,
-
-\\(
-  y = a*x + b
-\\)
-
-and/or, independently,
-
-\\(
-  y2 = a*x2 + c
-\\)
-
-where `a`, `b` and `c` are variables with `a=0.5` and `b=2` and `c=3`.
-
-Output from this program is typically used to exercise SavedModel load and
-execution code.
-
-To create a CPU model:
-  bazel run -c opt saved_half_plus_two -- --device=cpu
-
-To create GPU model:
-  bazel run --config=cuda -c opt saved_half_plus_two -- \
-  --device=gpu
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-
-import tensorflow as tf
-
-from tensorflow.python.lib.io import file_io
-
-FLAGS = None
-
-
-def _write_assets(assets_directory, assets_filename):
-  """Writes asset files to be used with SavedModel for half plus two.
-
-  Args:
-    assets_directory: The directory to which the assets should be written.
-    assets_filename: Name of the file to which the asset contents should be
-        written.
-
-  Returns:
-    The path to which the assets file was written.
-  """
-  if not file_io.file_exists(assets_directory):
-    file_io.recursive_create_dir(assets_directory)
-
-  path = os.path.join(
-      tf.compat.as_bytes(assets_directory), tf.compat.as_bytes(assets_filename))
-  file_io.write_string_to_file(path, "asset-file-contents")
-  return path
-
-
-def _build_regression_signature(input_tensor, output_tensor):
-  """Helper function for building a regression SignatureDef."""
-  input_tensor_info = tf.saved_model.utils.build_tensor_info(input_tensor)
-  signature_inputs = {
-      tf.saved_model.signature_constants.REGRESS_INPUTS: input_tensor_info
-  }
-  output_tensor_info = tf.saved_model.utils.build_tensor_info(output_tensor)
-  signature_outputs = {
-      tf.saved_model.signature_constants.REGRESS_OUTPUTS: output_tensor_info
-  }
-  return tf.saved_model.signature_def_utils.build_signature_def(
-      signature_inputs, signature_outputs,
-      tf.saved_model.signature_constants.REGRESS_METHOD_NAME)
-
-
-# Possibly extend this to allow passing in 'classes', but for now this is
-# sufficient for testing purposes.
-def _build_classification_signature(input_tensor, scores_tensor):
-  """Helper function for building a classification SignatureDef."""
-  input_tensor_info = tf.saved_model.utils.build_tensor_info(input_tensor)
-  signature_inputs = {
-      tf.saved_model.signature_constants.CLASSIFY_INPUTS: input_tensor_info
-  }
-  output_tensor_info = tf.saved_model.utils.build_tensor_info(scores_tensor)
-  signature_outputs = {
-      tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
-          output_tensor_info
-  }
-  return tf.saved_model.signature_def_utils.build_signature_def(
-      signature_inputs, signature_outputs,
-      tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME)
-
-
-def _generate_saved_model_for_half_plus_two(export_dir,
-                                            as_text=False,
-                                            use_main_op=False,
-                                            device_type="cpu"):
-  """Generates SavedModel for half plus two.
-
-  Args:
-    export_dir: The directory to which the SavedModel should be written.
-    as_text: Writes the SavedModel protocol buffer in text format to disk.
-    use_main_op: Whether to supply a main op during SavedModel build time.
-    device_name: Device to force ops to run on.
-  """
-  builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
-
-  device_name = "/cpu:0"
-  if device_type == "gpu":
-    device_name = "/gpu:0"
-
-  with tf.Session(
-      graph=tf.Graph(),
-      config=tf.ConfigProto(log_device_placement=True)) as sess:
-    with tf.device(device_name):
-      # Set up the model parameters as variables to exercise variable loading
-      # functionality upon restore.
-      a = tf.Variable(0.5, name="a")
-      b = tf.Variable(2.0, name="b")
-      c = tf.Variable(3.0, name="c")
-
-      # Create a placeholder for serialized tensorflow.Example messages to be
-      # fed.
-      serialized_tf_example = tf.placeholder(tf.string, name="tf_example")
-
-      # Parse the tensorflow.Example looking for a feature named "x" with a
-      # single floating point value.
-      feature_configs = {
-          "x": tf.FixedLenFeature([1], dtype=tf.float32),
-          "x2": tf.FixedLenFeature([1], dtype=tf.float32, default_value=[0.0])
-      }
-      # parse_example only works on CPU
-      with tf.device("/cpu:0"):
-        tf_example = tf.parse_example(serialized_tf_example, feature_configs)
-      # Use tf.identity() to assign name
-      x = tf.identity(tf_example["x"], name="x")
-      y = tf.add(tf.multiply(a, x), b)
-      y = tf.identity(y, name="y")
-      y2 = tf.add(tf.multiply(a, x), c)
-      y2 = tf.identity(y2, name="y2")
-
-      x2 = tf.identity(tf_example["x2"], name="x2")
-      y3 = tf.add(tf.multiply(a, x2), c)
-      y3 = tf.identity(y3, name="y3")
-
-    # Create an assets file that can be saved and restored as part of the
-    # SavedModel.
-    original_assets_directory = "/tmp/original/export/assets"
-    original_assets_filename = "foo.txt"
-    original_assets_filepath = _write_assets(original_assets_directory,
-                                             original_assets_filename)
-
-    # Set up the assets collection.
-    assets_filepath = tf.constant(original_assets_filepath)
-    tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath)
-    filename_tensor = tf.Variable(
-        original_assets_filename,
-        name="filename_tensor",
-        trainable=False,
-        collections=[])
-    assign_filename_op = filename_tensor.assign(original_assets_filename)
-
-    # Set up the signature for Predict with input and output tensor
-    # specification.
-    predict_input_tensor = tf.saved_model.utils.build_tensor_info(x)
-    predict_signature_inputs = {"x": predict_input_tensor}
-
-    predict_output_tensor = tf.saved_model.utils.build_tensor_info(y)
-    predict_signature_outputs = {"y": predict_output_tensor}
-    predict_signature_def = (
-        tf.saved_model.signature_def_utils.build_signature_def(
-            predict_signature_inputs, predict_signature_outputs,
-            tf.saved_model.signature_constants.PREDICT_METHOD_NAME))
-
-    signature_def_map = {
-        "regress_x_to_y":
-            _build_regression_signature(serialized_tf_example, y),
-        "regress_x_to_y2":
-            _build_regression_signature(serialized_tf_example, y2),
-        "regress_x2_to_y3":
-            _build_regression_signature(x2, y3),
-        "classify_x_to_y":
-            _build_classification_signature(serialized_tf_example, y),
-        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
-            predict_signature_def
-    }
-    # Initialize all variables and then save the SavedModel.
-    sess.run(tf.global_variables_initializer())
-
-    if use_main_op:
-      builder.add_meta_graph_and_variables(
-          sess, [tf.saved_model.tag_constants.SERVING],
-          signature_def_map=signature_def_map,
-          assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
-          main_op=tf.group(tf.saved_model.main_op.main_op(),
-                           assign_filename_op))
-    else:
-      builder.add_meta_graph_and_variables(
-          sess, [tf.saved_model.tag_constants.SERVING],
-          signature_def_map=signature_def_map,
-          assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS),
-          main_op=tf.group(assign_filename_op))
-  builder.save(as_text)
-
-
-def main(_):
-  _generate_saved_model_for_half_plus_two(
-      FLAGS.output_dir, device_type=FLAGS.device)
-  print("SavedModel generated for %(device)s at: %(dir)s" % {
-      "device": FLAGS.device,
-      "dir": FLAGS.output_dir
-  })
-
-  _generate_saved_model_for_half_plus_two(
-      FLAGS.output_dir_pbtxt, as_text=True, device_type=FLAGS.device)
-  print("SavedModel generated for %(device)s at: %(dir)s" % {
-      "device": FLAGS.device,
-      "dir": FLAGS.output_dir_pbtxt
-  })
-
-  _generate_saved_model_for_half_plus_two(
-      FLAGS.output_dir_main_op, use_main_op=True, device_type=FLAGS.device)
-  print("SavedModel generated for %(device)s at: %(dir)s " % {
-      "device": FLAGS.device,
-      "dir": FLAGS.output_dir_main_op
-  })
-
-
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
-      "--output_dir",
-      type=str,
-      default="/tmp/saved_model_half_plus_two",
-      help="Directory where to output SavedModel.")
-  parser.add_argument(
-      "--output_dir_pbtxt",
-      type=str,
-      default="/tmp/saved_model_half_plus_two_pbtxt",
-      help="Directory where to output the text format of SavedModel.")
-  parser.add_argument(
-      "--output_dir_main_op",
-      type=str,
-      default="/tmp/saved_model_half_plus_two_main_op",
-      help="Directory where to output the SavedModel with a main op.")
-  parser.add_argument(
-      "--device",
-      type=str,
-      default="cpu",
-      help="Force model to run on 'cpu' or 'gpu'")
-  FLAGS, unparsed = parser.parse_known_args()
-  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
-- 
GitLab


From 4b9240ca4bb646654284c2ba81a1a1d5d19f0294 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 13 Dec 2018 13:38:34 -0800
Subject: [PATCH 539/873] Add fuzzer for OneHot.

PiperOrigin-RevId: 225427576
---
 tensorflow/core/kernels/fuzzing/BUILD         |  2 +
 .../core/kernels/fuzzing/fuzz_session.h       |  5 ++
 .../core/kernels/fuzzing/one_hot_fuzz.cc      | 79 +++++++++++++++++++
 3 files changed, 86 insertions(+)
 create mode 100644 tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc

diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD
index fcaf1a8966..7300f7a4e2 100644
--- a/tensorflow/core/kernels/fuzzing/BUILD
+++ b/tensorflow/core/kernels/fuzzing/BUILD
@@ -70,3 +70,5 @@ tf_oss_fuzz_corpus("decode_json_example")
 tf_oss_fuzz_dict("decode_json_example")
 
 tf_ops_fuzz_target_lib("check_numerics")
+
+tf_ops_fuzz_target_lib("one_hot")
diff --git a/tensorflow/core/kernels/fuzzing/fuzz_session.h b/tensorflow/core/kernels/fuzzing/fuzz_session.h
index 57d562ddf4..6abce959b9 100644
--- a/tensorflow/core/kernels/fuzzing/fuzz_session.h
+++ b/tensorflow/core/kernels/fuzzing/fuzz_session.h
@@ -118,6 +118,11 @@ class FuzzSession {
                          {"output"}, nullptr);
   }
 
+  Status RunMultipleInputs(
+      const std::vector<std::pair<string, Tensor> >& inputs) {
+    return session_->Run(inputs, {}, {"output"}, nullptr);
+  }
+
   // Dispatches to FuzzImpl;  small amount of sugar to keep the code
   // of the per-op fuzzers tiny.
   int Fuzz(const uint8_t* data, size_t size) {
diff --git a/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
new file mode 100644
index 0000000000..e685d4eebd
--- /dev/null
+++ b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
@@ -0,0 +1,79 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/kernels/fuzzing/fuzz_session.h"
+
+namespace tensorflow {
+namespace fuzzing {
+
+class FuzzOneHot : public FuzzSession {
+  void BuildGraph(const Scope& scope) override {
+    auto input =
+        tensorflow::ops::Placeholder(scope.WithOpName("input"), DT_UINT8);
+    auto depth =
+        tensorflow::ops::Placeholder(scope.WithOpName("depth"), DT_INT32);
+    auto on = tensorflow::ops::Placeholder(scope.WithOpName("on"), DT_UINT8);
+    auto off = tensorflow::ops::Placeholder(scope.WithOpName("off"), DT_UINT8);
+    (void)tensorflow::ops::OneHot(scope.WithOpName("output"), input, depth, on,
+                                  off);
+  }
+
+  void FuzzImpl(const uint8_t* data, size_t size) override {
+    int64 input_size;
+    int32 depth;
+    uint8 on, off;
+    const uint8_t* input_data;
+
+    if (size > 3) {
+      depth = static_cast<int32>(data[0]);
+      on = data[1];
+      off = data[2];
+      input_size = static_cast<int64>(size - 3);
+      input_data = data + 3;
+    } else {
+      depth = 1;
+      on = 1;
+      off = 0;
+      input_size = static_cast<int64>(size);
+      input_data = data;
+    }
+
+    Tensor input_tensor(tensorflow::DT_UINT8, TensorShape({input_size}));
+    Tensor depth_tensor(tensorflow::DT_INT32, TensorShape({}));
+    Tensor on_tensor(tensorflow::DT_UINT8, TensorShape({}));
+    Tensor off_tensor(tensorflow::DT_UINT8, TensorShape({}));
+
+    auto flat_tensor = input_tensor.flat<uint8>();
+    for (size_t i = 0; i < input_size; i++) {
+      flat_tensor(i) = input_data[i];
+    }
+    depth_tensor.scalar<int32>()() = depth;
+    on_tensor.scalar<uint8>()() = on;
+    off_tensor.scalar<uint8>()() = off;
+
+    RunMultipleInputs({{"input", input_tensor},
+                       {"depth", depth_tensor},
+                       {"on", on_tensor},
+                       {"off", off_tensor}})
+        .IgnoreError();
+  }
+};
+
+STANDARD_TF_FUZZ_FUNCTION(FuzzOneHot);
+
+}  // end namespace fuzzing
+}  // end namespace tensorflow
-- 
GitLab


From 08096b1b7a66b20dd24817de7977244918673780 Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Thu, 13 Dec 2018 13:38:49 -0800
Subject: [PATCH 540/873] Remove redefined main(). This target links with
 :test_main, so we can't define another main().

PiperOrigin-RevId: 225427623
---
 tensorflow/core/platform/port_test.cc | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc
index 9d144efbfd..be99ff09e0 100644
--- a/tensorflow/core/platform/port_test.cc
+++ b/tensorflow/core/platform/port_test.cc
@@ -84,9 +84,3 @@ TEST(TestCPUFeature, TestFeature) {
 
 }  // namespace port
 }  // namespace tensorflow
-
-int main(int argc, char** argv) {
-  // On Linux, add: FLAGS_logtostderr = true;
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-- 
GitLab


From 99f853f3fb695b3757287409a3256ce6b9426da0 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 13:54:55 -0800
Subject: [PATCH 541/873] Bump minimum bazel version check to 0.19, to avoid
 the following error, which isn't helpful:

ERROR: cc_toolchain_suite '@local_config_cuda//crosstool:toolchain' does not contain a toolchain for CPU 'k8', you may want to add an entry for 'local|compiler' into toolchains and toolchain_identifier 'local_linux' into the corresponding cc_toolchain rule (see --incompatible_disable_cc_toolchain_label_from_crosstool_proto).
PiperOrigin-RevId: 225430355
---
 configure.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.py b/configure.py
index 7f05bc6a97..1e732db264 100644
--- a/configure.py
+++ b/configure.py
@@ -1554,7 +1554,7 @@ def main():
   # environment variables.
   environ_cp = dict(os.environ)
 
-  check_bazel_version('0.18.0', '0.20.0')
+  check_bazel_version('0.19.0', '0.20.0')
 
   reset_tf_configure_bazelrc()
 
-- 
GitLab


From 85bcb3bbdcdb4da139b1139e2d4c8865f402ce81 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Thu, 13 Dec 2018 14:15:37 -0800
Subject: [PATCH 542/873] Roll forward: Log GPU and cuDNN version information.

PiperOrigin-RevId: 225434114
---
 tensorflow/stream_executor/BUILD              | 11 ++++++++
 tensorflow/stream_executor/dnn.h              |  7 +++--
 tensorflow/stream_executor/logging.proto      | 19 +++++++++++++
 .../stream_executor/stream_executor_pimpl.cc  | 28 ++++++++++++++++++-
 4 files changed, 61 insertions(+), 4 deletions(-)
 create mode 100644 tensorflow/stream_executor/logging.proto

diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index 4c764a7b09..c43efc799c 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -23,6 +23,14 @@ tf_proto_library(
     protodeps = tf_additional_all_protos(),
 )
 
+tf_proto_library(
+    name = "logging_proto",
+    srcs = ["logging.proto"],
+    cc_api_version = 2,
+    default_header = True,
+    protodeps = tf_additional_all_protos(),
+)
+
 cc_library(
     name = "stream_executor_impl",
     srcs = glob(
@@ -46,7 +54,9 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc_impl",
+        ":logging_proto_cc_impl",
         "//tensorflow/core:lib",
+        "//tensorflow/core:logger",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
@@ -63,6 +73,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc",
+        ":logging_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index c044a356ef..43738d2d1d 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -906,9 +906,10 @@ class VersionInfo {
  public:
   VersionInfo(int major = 0, int minor = 0, int patch = 0)
       : major_(major), minor_(minor), patch_(patch) {}
-  int major_version() { return major_; }
-  int minor_version() { return minor_; }
-  int patch() { return patch_; }
+  int major_version() const { return major_; }
+  int minor_version() const { return minor_; }
+  int patch() const { return patch_; }
+
  private:
   int major_;
   int minor_;
diff --git a/tensorflow/stream_executor/logging.proto b/tensorflow/stream_executor/logging.proto
new file mode 100644
index 0000000000..2c75500cda
--- /dev/null
+++ b/tensorflow/stream_executor/logging.proto
@@ -0,0 +1,19 @@
+syntax = "proto3";
+
+package stream_executor;
+
+message CudnnVersion {
+  int32 major = 1;
+  int32 minor = 2;
+  int32 patch = 3;
+};
+
+message ComputeCapability {
+  int32 major = 1;
+  int32 minor = 2;
+}
+
+message CudaInfo {
+  CudnnVersion cudnn_version = 1;
+  ComputeCapability compute_capability = 2;
+}
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index d1d0bd9bc2..86bc4ab7d0 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/strings/str_cat.h"
+#include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/stream_executor/blas.h"
 #include "tensorflow/stream_executor/fft.h"
@@ -33,6 +34,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
+#include "tensorflow/stream_executor/logging.pb.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/rng.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
@@ -217,7 +219,31 @@ StreamExecutor::~StreamExecutor() {
 port::Status StreamExecutor::Init(int device_ordinal,
                                   DeviceOptions device_options) {
   device_ordinal_ = device_ordinal;
-  return implementation_->Init(device_ordinal, std::move(device_options));
+  TF_RETURN_IF_ERROR(
+      implementation_->Init(device_ordinal, std::move(device_options)));
+
+  if (platform_kind_ == PlatformKind::kCuda) {
+    CudaInfo info;
+
+    int cc_major, cc_minor;
+    GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor);
+    info.mutable_compute_capability()->set_major(cc_major);
+    info.mutable_compute_capability()->set_minor(cc_minor);
+
+    if (auto *dnn = AsDnn()) {
+      port::StatusOr<dnn::VersionInfo> version_or = dnn->GetVersion();
+      if (version_or.ok()) {
+        const auto &version = version_or.ValueOrDie();
+        info.mutable_cudnn_version()->set_major(version.major_version());
+        info.mutable_cudnn_version()->set_minor(version.minor_version());
+        info.mutable_cudnn_version()->set_patch(version.patch());
+      }
+    }
+
+    tensorflow::Logger::Singleton()->LogProto(info);
+  }
+
+  return port::Status::OK();
 }
 
 port::Status StreamExecutor::Init() {
-- 
GitLab


From 1a0df60da9f85bf8541dd6fba8fbafa3b9dea3a9 Mon Sep 17 00:00:00 2001
From: Mahmoud Abuzaina <mahmoud.abuzaina@intel.com>
Date: Thu, 13 Dec 2018 14:21:13 -0800
Subject: [PATCH 543/873] Fixing build issue

---
 tensorflow/core/kernels/mkl_relu_op.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc
index a12fd5c2db..02471ada8a 100644
--- a/tensorflow/core/kernels/mkl_relu_op.cc
+++ b/tensorflow/core/kernels/mkl_relu_op.cc
@@ -16,12 +16,12 @@ limitations under the License.
 // See docs in ../ops/nn_ops.cc.
 #ifdef INTEL_MKL
 
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 
 #ifndef INTEL_MKL_ML_ONLY
 #include "mkldnn.hpp"
@@ -866,9 +866,9 @@ class MklReluOpBase : public OpKernel {
       // execute eltwise
       eltwise_fwd->Execute(src_data, dst_data);
     } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
           errors::Aborted("Operation received an exception:", error_msg));
@@ -1034,9 +1034,9 @@ class MklReluGradOpBase : public OpKernel {
       // execute eltwise bwd
       eltwise_bwd->Execute(src_data, diff_dst_data, diff_src_data);
     } catch (mkldnn::error& e) {
-      string error_msg = "Status: " + std::to_string(e.status) +
-                         ", message: " + string(e.message) + ", in file " +
-                         string(__FILE__) + ":" + std::to_string(__LINE__);
+      string error_msg = "Status: " + std::to_string(e.status) + ", message: " +
+                         string(e.message) + ", in file " + string(__FILE__) +
+                         ":" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
           errors::Aborted("Operation received an exception:", error_msg));
@@ -1353,7 +1353,7 @@ class MklLeakyReluOp : public MklReluOpBase<Device, T, eltwise_relu> {
     AllocateOutputSetMklShape(context, dst_index, &dst_tensor,
                               src_tensor.shape(), dnn_shape_dst);
     T* out_o = dst_tensor->flat<T>().data();
-    out_o[0] = user_i[0] >= 0 ? user_g[0] : user_g[0] * this->alpha_;
+    out_o[0] = user_i[0] >= 0 ? user_i[0] : user_i[0] * this->alpha_;
     return;
   }
 };
-- 
GitLab


From a62dd916cdfa32c0c7ef2f22278433057b35e043 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Thu, 13 Dec 2018 14:16:03 -0800
Subject: [PATCH 544/873] [XLA:Python] Fix minor Python 3 compatibility issue
 in xla_shape.py

PiperOrigin-RevId: 225434188
---
 tensorflow/compiler/xla/python_api/xla_shape.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/compiler/xla/python_api/xla_shape.py b/tensorflow/compiler/xla/python_api/xla_shape.py
index 95b2bf300e..bdcd4abd6c 100644
--- a/tensorflow/compiler/xla/python_api/xla_shape.py
+++ b/tensorflow/compiler/xla/python_api/xla_shape.py
@@ -20,6 +20,8 @@ from __future__ import print_function
 
 import numpy as _np  # Avoids becoming a part of public Tensorflow API.
 
+from six.moves import xrange
+
 from tensorflow.compiler.xla import xla_data_pb2
 from tensorflow.compiler.xla.python_api import types
 
-- 
GitLab


From cea1bea432452bfd3b5aa34af689b01e4da236d6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 14:19:31 -0800
Subject: [PATCH 545/873] Migrate appropriate tests in topology_test.py to use
 the run_all_keras_modes and run_in_graph_and_eager_modes decorators.

PiperOrigin-RevId: 225434760
---
 tensorflow/python/keras/BUILD                 |   2 +-
 .../python/keras/engine/topology_test.py      | 202 ++++++++++--------
 2 files changed, 112 insertions(+), 92 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index c056996f96..aa9e1c03b1 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -875,7 +875,7 @@ py_test(
 
 py_test(
     name = "topology_test",
-    size = "small",
+    size = "medium",
     srcs = ["engine/topology_test.py"],
     srcs_version = "PY2AND3",
     tags = [
diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py
index 4071e2c091..b7680dcbc0 100644
--- a/tensorflow/python/keras/engine/topology_test.py
+++ b/tensorflow/python/keras/engine/topology_test.py
@@ -26,6 +26,8 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import input_layer as input_layer_lib
 from tensorflow.python.keras.engine import network as network_lib
 from tensorflow.python.ops import array_ops
@@ -40,7 +42,7 @@ except ImportError:
   yaml = None
 
 
-class TopologyConstructionTest(test.TestCase):
+class TopologyConstructionTest(keras_parameterized.TestCase):
 
   @test_util.run_deprecated_v1
   def test_get_updates(self):
@@ -107,7 +109,7 @@ class TopologyConstructionTest(test.TestCase):
     self.assertEqual(len(network.updates), 5)
     self.assertEqual(len(network.get_updates_for(x4)), 2)
 
-  @test_util.run_v1_only('b/120545219')
+  @test_util.run_in_graph_and_eager_modes()
   def test_get_updates_bn(self):
     x1 = input_layer_lib.Input(shape=(1,))
     layer = keras.layers.BatchNormalization()
@@ -180,6 +182,7 @@ class TopologyConstructionTest(test.TestCase):
     self.assertEqual(len(network.losses), 5)
     self.assertEqual(len(network.get_losses_for(x4)), 2)
 
+  @test_util.run_in_graph_and_eager_modes()
   def testTopologicalAttributes(self):
     # test layer attributes / methods related to cross-layer connectivity.
     a = input_layer_lib.Input(shape=(32,), name='input_a')
@@ -237,6 +240,7 @@ class TopologyConstructionTest(test.TestCase):
       b_2 = dense(b)
       _ = new_dense.output_shape
 
+  @test_util.run_in_graph_and_eager_modes()
   def testTopologicalAttributesMultiOutputLayer(self):
 
     class PowersLayer(keras.layers.Layer):
@@ -253,6 +257,7 @@ class TopologyConstructionTest(test.TestCase):
     self.assertEqual(test_layer.input_shape, (None, 32))
     self.assertEqual(test_layer.output_shape, [(None, 32), (None, 32)])
 
+  @test_util.run_in_graph_and_eager_modes()
   def testTopologicalAttributesMultiInputLayer(self):
 
     class AddLayer(keras.layers.Layer):
@@ -304,6 +309,7 @@ class TopologyConstructionTest(test.TestCase):
     self.assertEqual(network.non_trainable_weights,
                      dense.trainable_weights + dense.non_trainable_weights)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_trainable_weights(self):
     a = keras.layers.Input(shape=(2,))
     b = keras.layers.Dense(1)(a)
@@ -424,6 +430,7 @@ class TopologyConstructionTest(test.TestCase):
     self.assertEqual(dense.get_output_mask_at(0), None)
     self.assertEqual(dense.get_output_mask_at(1), None)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_multi_input_layer(self):
     with self.cached_session():
       # test multi-input layer
@@ -558,6 +565,7 @@ class TopologyConstructionTest(test.TestCase):
       fn_outputs = fn([input_a_np, input_b_np])
       self.assertListEqual([x.shape for x in fn_outputs], [(10, 7), (10, 64)])
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_multi_input_multi_output_recursion(self):
     with self.cached_session():
       # test multi-input multi-output
@@ -631,6 +639,7 @@ class TopologyConstructionTest(test.TestCase):
         yaml_str = model.to_yaml()
         keras.models.model_from_yaml(yaml_str)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_invalid_graphs(self):
     a = keras.layers.Input(shape=(32,), name='input_a')
     b = keras.layers.Input(shape=(32,), name='input_b')
@@ -720,6 +729,7 @@ class TopologyConstructionTest(test.TestCase):
     x = keras.layers.Input(tensor=x)
     keras.layers.Dense(2)(x)
 
+  @test_util.run_in_graph_and_eager_modes()
   def test_basic_masking(self):
     a = keras.layers.Input(shape=(10, 32), name='input_a')
     b = keras.layers.Masking()(a)
@@ -786,121 +796,128 @@ class TopologyConstructionTest(test.TestCase):
     loss = model_b.evaluate(x)
     self.assertEqual(loss, 4.)
 
+  @keras_parameterized.run_all_keras_modes
   def test_layer_sharing_at_heterogenous_depth(self):
-    with self.cached_session():
-      x_val = np.random.random((10, 5))
+    x_val = np.random.random((10, 5))
 
-      x = input_layer_lib.Input(shape=(5,))
-      a = keras.layers.Dense(5, name='A')
-      b = keras.layers.Dense(5, name='B')
-      output = a(b(a(b(x))))
-      m = keras.models.Model(x, output)
+    x = input_layer_lib.Input(shape=(5,))
+    a = keras.layers.Dense(5, name='A')
+    b = keras.layers.Dense(5, name='B')
+    output = a(b(a(b(x))))
+    m = keras.models.Model(x, output)
+    m.run_eagerly = testing_utils.should_run_eagerly()
 
-      output_val = m.predict(x_val)
+    output_val = m.predict(x_val)
 
-      config = m.get_config()
-      weights = m.get_weights()
+    config = m.get_config()
+    weights = m.get_weights()
 
-      m2 = keras.models.Model.from_config(config)
-      m2.set_weights(weights)
+    m2 = keras.models.Model.from_config(config)
+    m2.set_weights(weights)
 
-      output_val_2 = m2.predict(x_val)
-      self.assertAllClose(output_val, output_val_2, atol=1e-6)
+    output_val_2 = m2.predict(x_val)
+    self.assertAllClose(output_val, output_val_2, atol=1e-6)
 
+  @keras_parameterized.run_all_keras_modes
   def test_layer_sharing_at_heterogenous_depth_with_concat(self):
-    with self.cached_session():
-      input_shape = (16, 9, 3)
-      input_layer = input_layer_lib.Input(shape=input_shape)
+    input_shape = (16, 9, 3)
+    input_layer = input_layer_lib.Input(shape=input_shape)
 
-      a = keras.layers.Dense(3, name='dense_A')
-      b = keras.layers.Dense(3, name='dense_B')
-      c = keras.layers.Dense(3, name='dense_C')
+    a = keras.layers.Dense(3, name='dense_A')
+    b = keras.layers.Dense(3, name='dense_B')
+    c = keras.layers.Dense(3, name='dense_C')
 
-      x1 = b(a(input_layer))
-      x2 = a(c(input_layer))
-      output = keras.layers.concatenate([x1, x2])
+    x1 = b(a(input_layer))
+    x2 = a(c(input_layer))
+    output = keras.layers.concatenate([x1, x2])
 
-      m = keras.models.Model(inputs=input_layer, outputs=output)
+    m = keras.models.Model(inputs=input_layer, outputs=output)
+    m.run_eagerly = testing_utils.should_run_eagerly()
 
-      x_val = np.random.random((10, 16, 9, 3))
-      output_val = m.predict(x_val)
+    x_val = np.random.random((10, 16, 9, 3))
+    output_val = m.predict(x_val)
 
-      config = m.get_config()
-      weights = m.get_weights()
+    config = m.get_config()
+    weights = m.get_weights()
 
-      m2 = keras.models.Model.from_config(config)
-      m2.set_weights(weights)
+    m2 = keras.models.Model.from_config(config)
+    m2.set_weights(weights)
 
-      output_val_2 = m2.predict(x_val)
-      self.assertAllClose(output_val, output_val_2, atol=1e-6)
+    output_val_2 = m2.predict(x_val)
+    self.assertAllClose(output_val, output_val_2, atol=1e-6)
 
-  @test_util.run_v1_only('b/120545219')
+  @keras_parameterized.run_all_keras_modes
   def test_explicit_training_argument(self):
-    with self.cached_session():
-      a = keras.layers.Input(shape=(2,))
-      b = keras.layers.Dropout(0.5)(a)
-      base_model = keras.models.Model(a, b)
-
-      a = keras.layers.Input(shape=(2,))
-      b = base_model(a, training=False)
-      model = keras.models.Model(a, b)
-
-      x = np.ones((100, 2))
-      y = np.ones((100, 2))
-      model.compile(optimizer='sgd', loss='mse')
-      loss = model.train_on_batch(x, y)
-      self.assertEqual(loss, 0)  # In inference mode, output is equal to input.
-
-      a = keras.layers.Input(shape=(2,))
-      b = base_model(a, training=True)
-      model = keras.models.Model(a, b)
-      preds = model.predict(x)
-      self.assertEqual(np.min(preds), 0.)  # At least one unit was dropped.
+    a = keras.layers.Input(shape=(2,))
+    b = keras.layers.Dropout(0.5)(a)
+    base_model = keras.models.Model(a, b)
 
-  def test_multi_output_model_with_none_masking(self):
+    a = keras.layers.Input(shape=(2,))
+    b = base_model(a, training=False)
+    model = keras.models.Model(a, b)
 
-    with self.cached_session():
+    x = np.ones((100, 2))
+    y = np.ones((100, 2))
+    model.compile(
+        optimizer='sgd',
+        loss='mse',
+        run_eagerly=testing_utils.should_run_eagerly())
+    loss = model.train_on_batch(x, y)
+    self.assertEqual(loss, 0)  # In inference mode, output is equal to input.
+
+    a = keras.layers.Input(shape=(2,))
+    b = base_model(a, training=True)
+    model = keras.models.Model(a, b)
+    preds = model.predict(x)
+    self.assertEqual(np.min(preds), 0.)  # At least one unit was dropped.
 
-      def func(x):
-        return [x * 0.2, x * 0.3]
+  @keras_parameterized.run_all_keras_modes
+  def test_multi_output_model_with_none_masking(self):
+    def func(x):
+      return [x * 0.2, x * 0.3]
 
-      def output_shape(input_shape):
-        return [input_shape, input_shape]
+    def output_shape(input_shape):
+      return [input_shape, input_shape]
 
-      i = keras.layers.Input(shape=(3, 2, 1))
-      o = keras.layers.Lambda(function=func, output_shape=output_shape)(i)
+    i = keras.layers.Input(shape=(3, 2, 1))
+    o = keras.layers.Lambda(function=func, output_shape=output_shape)(i)
 
-      self.assertEqual(keras.backend.int_shape(o[0]), (None, 3, 2, 1))
-      self.assertEqual(keras.backend.int_shape(o[1]), (None, 3, 2, 1))
+    self.assertEqual(keras.backend.int_shape(o[0]), (None, 3, 2, 1))
+    self.assertEqual(keras.backend.int_shape(o[1]), (None, 3, 2, 1))
 
-      o = keras.layers.add(o)
-      model = keras.Model(i, o)
+    o = keras.layers.add(o)
+    model = keras.Model(i, o)
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
-      i2 = keras.layers.Input(shape=(3, 2, 1))
-      o2 = model(i2)
-      model2 = keras.Model(i2, o2)
+    i2 = keras.layers.Input(shape=(3, 2, 1))
+    o2 = model(i2)
+    model2 = keras.Model(i2, o2)
+    model2.run_eagerly = testing_utils.should_run_eagerly()
 
-      x = np.random.random((4, 3, 2, 1))
-      out = model2.predict(x)
-      assert out.shape == (4, 3, 2, 1)
-      self.assertAllClose(out, x * 0.2 + x * 0.3, atol=1e-4)
+    x = np.random.random((4, 3, 2, 1))
+    out = model2.predict(x)
+    assert out.shape == (4, 3, 2, 1)
+    self.assertAllClose(out, x * 0.2 + x * 0.3, atol=1e-4)
 
+  @keras_parameterized.run_all_keras_modes
   def test_constant_initializer_with_numpy(self):
+    initializer = keras.initializers.Constant(np.ones((3, 2)))
+    model = keras.models.Sequential()
+    model.add(
+        keras.layers.Dense(2, input_shape=(3,), kernel_initializer=initializer))
+    model.add(keras.layers.Dense(3))
+    model.compile(
+        loss='mse',
+        optimizer='sgd',
+        metrics=['acc'],
+        run_eagerly=testing_utils.should_run_eagerly())
 
-    with self.cached_session():
-      initializer = keras.initializers.Constant(np.ones((3, 2)))
-      model = keras.models.Sequential()
-      model.add(keras.layers.Dense(2, input_shape=(3,),
-                                   kernel_initializer=initializer))
-      model.add(keras.layers.Dense(3))
-      model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
-
-      json_str = model.to_json()
-      keras.models.model_from_json(json_str)
+    json_str = model.to_json()
+    keras.models.model_from_json(json_str)
 
-      if yaml is not None:
-        yaml_str = model.to_yaml()
-        keras.models.model_from_yaml(yaml_str)
+    if yaml is not None:
+      yaml_str = model.to_yaml()
+      keras.models.model_from_yaml(yaml_str)
 
 
 class DeferredModeTest(test.TestCase):
@@ -929,7 +946,7 @@ class DeferredModeTest(test.TestCase):
       self.assertEqual(outputs.shape.as_list(), [10, 4])
 
   @test_util.run_in_graph_and_eager_modes()
-  def testMultiIONetworkbuilding(self):
+  def testMultiIONetworkBuilding(self):
     input_a = input_layer_lib.Input(shape=(32,))
     input_b = input_layer_lib.Input(shape=(16,))
     a = keras.layers.Dense(16)(input_a)
@@ -954,7 +971,7 @@ class DeferredModeTest(test.TestCase):
       self.assertEqual(outputs[1].shape.as_list(), [10, 2])
 
 
-class DefaultShapeInferenceBehaviorTest(test.TestCase):
+class DefaultShapeInferenceBehaviorTest(keras_parameterized.TestCase):
 
   def _testShapeInference(self, model, input_shape, expected_output_shape):
     input_value = np.random.random(input_shape)
@@ -1122,7 +1139,7 @@ class DefaultShapeInferenceBehaviorTest(test.TestCase):
     output = model(sample_input)
     self.assertEqual(output.shape, (1, 3))
 
-  @test_util.run_in_graph_and_eager_modes()
+  @keras_parameterized.run_all_keras_modes
   def test_sequential_as_downstream_of_masking_layer(self):
     inputs = keras.layers.Input(shape=(3, 4))
     x = keras.layers.Masking(mask_value=0., input_shape=(3, 4))(inputs)
@@ -1132,7 +1149,10 @@ class DefaultShapeInferenceBehaviorTest(test.TestCase):
 
     x = keras.layers.wrappers.TimeDistributed(s)(x)
     model = keras.Model(inputs=inputs, outputs=x)
-    model.compile(optimizer=rmsprop.RMSPropOptimizer(1e-3), loss='mse')
+    model.compile(
+        optimizer=rmsprop.RMSPropOptimizer(1e-3),
+        loss='mse',
+        run_eagerly=testing_utils.should_run_eagerly())
 
     model_input = np.random.randint(
         low=1, high=5, size=(10, 3, 4)).astype('float32')
-- 
GitLab


From 06a4b66fc4349c83365520a47e41b9b2f58d8f2d Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Thu, 13 Dec 2018 14:36:12 -0800
Subject: [PATCH 546/873] Update embedding test to run in 2.0 mode.

PiperOrigin-RevId: 225437660
---
 .../python/keras/layers/embeddings_test.py    | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/keras/layers/embeddings_test.py b/tensorflow/python/keras/layers/embeddings_test.py
index aaa17b7e96..ac3acad7ac 100644
--- a/tensorflow/python/keras/layers/embeddings_test.py
+++ b/tensorflow/python/keras/layers/embeddings_test.py
@@ -23,15 +23,19 @@ import numpy as np
 from tensorflow.python import keras
 from tensorflow.python.eager import backprop
 from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 from tensorflow.python.training import adagrad
 
 
-class EmbeddingTest(test.TestCase):
+class EmbeddingTest(keras_parameterized.TestCase):
 
-  @tf_test_util.run_in_graph_and_eager_modes(use_gpu=False)
+  @keras_parameterized.run_all_keras_modes
   def test_embedding(self):
+    if tf_test_util.is_gpu_available():
+      self.skipTest('Only test embedding on CPU.')
+
     testing_utils.layer_test(
         keras.layers.Embedding,
         kwargs={'output_dim': 4,
@@ -69,18 +73,17 @@ class EmbeddingTest(test.TestCase):
         input_dtype='int32',
         expected_output_dtype='float32')
 
-  @tf_test_util.run_in_graph_and_eager_modes()
+  @keras_parameterized.run_all_keras_modes
   def test_embedding_correctness(self):
     layer = keras.layers.Embedding(output_dim=2, input_dim=2)
-    layer.build((None, 2))
-    matrix = np.array([[1, 1], [2, 2]])
-    layer.set_weights([matrix])
+    model = keras.models.Sequential([layer])
 
-    inputs = keras.backend.constant([[0, 1, 0]], dtype='int32')
-    outputs = keras.backend.eval(layer(inputs))
+    layer.set_weights([np.array([[1, 1], [2, 2]])])
+    model.run_eagerly = testing_utils.should_run_eagerly()
+    outputs = model.predict(np.array([[0, 1, 0]], dtype='int32'))
     self.assertAllClose(outputs, [[[1, 1], [2, 2], [1, 1]]])
 
-  @tf_test_util.run_in_graph_and_eager_modes()
+  @tf_test_util.run_in_graph_and_eager_modes
   def test_eager_gpu_cpu(self):
     l = keras.layers.Embedding(output_dim=2, input_dim=2)
     l.build((None, 2))
-- 
GitLab


From 5d2c0ee557f03a0ab372978c03525d6407b9bdb1 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 13 Dec 2018 14:48:40 -0800
Subject: [PATCH 547/873] Adding deprecation warnings for I/O modules that will
 be removed in 2.0.

PiperOrigin-RevId: 225439721
---
 tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py   | 5 +++++
 tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py   | 5 +++++
 tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py     | 5 +++++
 tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py | 5 +++++
 4 files changed, 20 insertions(+)

diff --git a/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py b/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
index 5c5599858e..77813519c1 100644
--- a/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
+++ b/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
@@ -23,11 +23,16 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.util import deprecation
 
 
 class SequenceFileDataset(dataset_ops.DatasetSource):
   """A Sequence File Dataset that reads the sequence file."""
 
+  @deprecation.deprecated(
+      None,
+      "tf.contrib.hadoop will be removed in 2.0, the support for Apache Hadoop "
+      "will continue to be provided through the tensorflow/io GitHub project.")
   def __init__(self, filenames):
     """Create a `SequenceFileDataset`.
 
diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
index e4762c91b1..66e654ca63 100644
--- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
+++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py
@@ -31,6 +31,7 @@ from tensorflow.python.data.util import structure
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.util import deprecation
 
 
 @six.add_metaclass(abc.ABCMeta)
@@ -699,6 +700,10 @@ class IgniteDataset(dataset_ops.DatasetSource):
      Ignite Binary Client Protocol.
   """
 
+  @deprecation.deprecated(
+      None,
+      "tf.contrib.ignite will be removed in 2.0, the support for Apache Ignite "
+      "will continue to be provided through the tensorflow/io GitHub project.")
   def __init__(self,
                cache_name,
                host="localhost",
diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
index 2b86331099..b399e1b6c2 100644
--- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
+++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py
@@ -23,12 +23,17 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.util import deprecation
 
 
 class KafkaDataset(dataset_ops.DatasetSource):
   """A Kafka Dataset that consumes the message.
   """
 
+  @deprecation.deprecated(
+      None,
+      "tf.contrib.kafka will be removed in 2.0, the support for Apache Kafka "
+      "will continue to be provided through the tensorflow/io GitHub project.")
   def __init__(self,
                topics,
                servers="localhost",
diff --git a/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py b/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py
index 2039539528..2b1d478a9b 100644
--- a/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py
+++ b/tensorflow/contrib/kinesis/python/ops/kinesis_dataset_ops.py
@@ -23,6 +23,7 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.util import deprecation
 
 
 class KinesisDataset(dataset_ops.DatasetSource):
@@ -50,6 +51,10 @@ class KinesisDataset(dataset_ops.DatasetSource):
   is returned immediately instead.
   """
 
+  @deprecation.deprecated(
+      None,
+      "tf.contrib.kinesis will be removed in 2.0, the support for Kinesis "
+      "will continue to be provided through the tensorflow/io GitHub project.")
   def __init__(self,
                stream,
                shard="",
-- 
GitLab


From 564e8ab6a84fd157f5a80fc13e674afa40fa18d2 Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Thu, 13 Dec 2018 14:52:01 -0800
Subject: [PATCH 548/873] Add Keras/Graph+Eager parameterization to Keras tests

PiperOrigin-RevId: 225440240
---
 .../python/keras/engine/base_layer_test.py    |  71 +++++++----
 .../keras/engine/training_eager_test.py       | 118 +++++++++---------
 2 files changed, 110 insertions(+), 79 deletions(-)

diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index fa4eb48d56..fa0cad70af 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py
@@ -23,11 +23,14 @@ import numpy as np
 from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import base_layer
+from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
-from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
 class DynamicLayer1(base_layer.Layer):
@@ -63,35 +66,48 @@ class InvalidLayer(base_layer.Layer):
     return input_shape
 
 
-class BaseLayerTest(test.TestCase):
+class BaseLayerTest(keras_parameterized.TestCase):
 
-  def test_dynamic_layer_in_functional_model_in_graph_mode(self):
-    with context.graph_mode():
-      inputs = keras.Input((3,))
+  def _assert_static_graph_unfriendly_model(self, model):
+    self.assertEqual(model._static_graph_friendly, False)
+    if not testing_utils.should_run_eagerly():
       with self.assertRaisesRegexp(
-          TypeError, 'Using a `tf.Tensor` as a Python `bool` is not allowed'):
-        _ = DynamicLayer1()(inputs)
+          ValueError, 'can only be successfully run in eager execution'):
+        model.compile(rmsprop.RMSprop(0.001), loss='mse',
+                      run_eagerly=testing_utils.should_run_eagerly())
+    else:
+      model.compile(rmsprop.RMSprop(0.001), loss='mse',
+                    run_eagerly=testing_utils.should_run_eagerly())
+      model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
 
-      inputs = keras.Input((3,))
+  @test_util.run_v1_only
+  def test_dynamic_layer_fails_in_v1(self):
+    inputs = keras.Input((3,))
+
+    if not context.executing_eagerly():
+      with self.assertRaisesRegexp(
+          TypeError, 'Using a `tf.Tensor` as a Python `bool` is not allowed'):
+        DynamicLayer1()(inputs)
       with self.assertRaisesRegexp(
           TypeError, 'Tensor objects are only iterable when eager'):
-        _ = DynamicLayer2()(inputs)
+        DynamicLayer2()(inputs)
 
-  def test_dynamic_layer_in_functional_model_in_eager_mode(self):
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+  def test_dynamic_layer(self):
     inputs = keras.Input((3,))
     outputs = DynamicLayer1()(inputs)
     model = keras.Model(inputs, outputs)
-    self.assertEqual(model._static_graph_friendly, False)
-    model.compile(RMSPropOptimizer(0.001), loss='mse')
-    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    self.assertAllClose([[0], [4], [9]], model.predict_on_batch([0, 2, -3]))
+    self._assert_static_graph_unfriendly_model(model)
 
     inputs = keras.Input((3,))
     outputs = DynamicLayer2()(inputs)
     model = keras.Model(inputs, outputs)
-    self.assertEqual(model._static_graph_friendly, False)
-    model.compile(RMSPropOptimizer(0.001), loss='mse')
-    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    self.assertAllClose([[0], [4], [9]], model.predict_on_batch([0, 2, -3]))
+    self._assert_static_graph_unfriendly_model(model)
 
+  # TODO(b/120985967): Test fails for nested models due to _set_mask_metadata.
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
   def nested_dynamic_layers_in_eager_mode(self):
     inputs = keras.Input((3,))
     outputs = DynamicLayer1()(inputs)
@@ -103,8 +119,13 @@ class BaseLayerTest(test.TestCase):
 
     model = keras.Model(inputs, outputs)
     self.assertEqual(model._static_graph_friendly, False)
-    model.compile(RMSPropOptimizer(0.001), loss='mse')
-    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    if testing_utils.should_run_eagerly():
+      model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=True)
+      model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    else:
+      with self.assertRaisesRegexp(
+          ValueError, 'only be successfully run in eager execution'):
+        model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=False)
 
   def test_invalid_forward_pass_in_graph_mode(self):
     with context.graph_mode():
@@ -112,14 +133,20 @@ class BaseLayerTest(test.TestCase):
       with self.assertRaisesRegexp(ValueError, 'You did something wrong!'):
         _ = InvalidLayer()(inputs)
 
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
   def test_invalid_forward_pass_in_eager_mode(self):
     inputs = keras.Input((3,))
     outputs = InvalidLayer()(inputs)
     model = keras.Model(inputs, outputs)
     self.assertEqual(model._static_graph_friendly, False)
-    model.compile(RMSPropOptimizer(0.001), loss='mse')
-    with self.assertRaisesRegexp(ValueError, 'You did something wrong!'):
-      model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    if testing_utils.should_run_eagerly():
+      model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=True)
+      with self.assertRaisesRegexp(ValueError, 'You did something wrong!'):
+        model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    else:
+      with self.assertRaisesRegexp(
+          ValueError, 'only be successfully run in eager execution'):
+        model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=False)
 
   def test_using_symbolic_tensors_with_tf_ops(self):
     # Single-input.
@@ -164,6 +191,7 @@ class BaseLayerTest(test.TestCase):
                                  'objects other than \'EagerTensor\''):
       math_ops.matmul(x1, x2)
 
+  @test_util.run_in_graph_and_eager_modes
   def test_mixing_keras_symbolic_tensors_and_eager_tensors(self):
     x1 = keras.Input((3,))
     x2 = array_ops.ones((3, 3))
@@ -176,6 +204,7 @@ class BaseLayerTest(test.TestCase):
                         np.matmul(x_val, y_val),
                         atol=1e-5)
 
+  @test_util.run_in_graph_and_eager_modes
   def test_mixing_keras_symbolic_tensors_and_numpy_arrays(self):
     x1 = keras.Input((3,))
     x2 = np.ones((3, 3), dtype='float32')
diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py
index f95a502cbc..6b98067063 100644
--- a/tensorflow/python/keras/engine/training_eager_test.py
+++ b/tensorflow/python/keras/engine/training_eager_test.py
@@ -24,25 +24,27 @@ from tensorflow.python import keras
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import metrics as metrics_module
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.platform import test
-from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
-class TrainingTest(test.TestCase):
+class TrainingTest(keras_parameterized.TestCase):
 
+  @keras_parameterized.run_with_all_model_types(exclude_models='sequential')
   def test_model_methods_with_eager_tensors_multi_io(self):
-    a = keras.layers.Input(shape=(3,), name='input_a')
-    b = keras.layers.Input(shape=(3,), name='input_b')
+    input_a = keras.layers.Input(shape=(3,), name='input_a')
+    input_b = keras.layers.Input(shape=(3,), name='input_b')
 
     dense = keras.layers.Dense(4, name='dense')
-    c = dense(a)
-    d = dense(b)
-    e = keras.layers.Dropout(0.5, name='dropout')(c)
+    dropout = keras.layers.Dropout(0.5, name='dropout')
 
-    model = keras.models.Model([a, b], [d, e])
+    model = testing_utils.get_multi_io_model(
+        [input_a, dense], [input_b, dense, dropout])
 
-    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    optimizer = rmsprop.RMSprop(learning_rate=0.001)
     loss = 'mse'
     loss_weights = [1., 0.5]
     metrics = ['mae', metrics_module.CategoricalAccuracy()]
@@ -56,60 +58,59 @@ class TrainingTest(test.TestCase):
 
     input_a = keras.backend.zeros(shape=(10, 3))
     input_b = keras.backend.zeros(shape=(10, 3))
-    target_d = keras.backend.zeros(shape=(10, 4))
-    target_e = keras.backend.zeros(shape=(10, 4))
+    target_a = keras.backend.zeros(shape=(10, 4))
+    target_b = keras.backend.zeros(shape=(10, 4))
 
     model.fit(
-        [input_a, input_b], [target_d, target_e],
+        [input_a, input_b], [target_a, target_b],
         epochs=1,
         batch_size=5,
         verbose=0)
     # Test: no shuffle.
     model.fit(
-        [input_a, input_b], [target_d, target_e],
+        [input_a, input_b], [target_a, target_b],
         epochs=1,
         batch_size=5,
         verbose=0,
         shuffle=False)
     # Test: validation data.
-    model.fit([input_a, input_b], [target_d, target_e],
+    model.fit([input_a, input_b], [target_a, target_b],
               epochs=1, batch_size=2, verbose=0,
-              validation_data=([input_a, input_b], [target_d, target_e]))
-    model.train_on_batch([input_a, input_b], [target_d, target_e])
+              validation_data=([input_a, input_b], [target_a, target_b]))
+    model.train_on_batch([input_a, input_b], [target_a, target_b])
     model.predict([input_a, input_b], batch_size=5)
-    model.evaluate([input_a, input_b], [target_d, target_e],
+    model.evaluate([input_a, input_b], [target_a, target_b],
                    batch_size=2, verbose=0)
-    model.test_on_batch([input_a, input_b], [target_d, target_e])
+    model.test_on_batch([input_a, input_b], [target_a, target_b])
 
     # Test: mix np and tensors.
     input_b = np.zeros(shape=(10, 3)).astype('float32')
-    target_e = np.zeros(shape=(10, 4)).astype('float32')
+    target_b = np.zeros(shape=(10, 4)).astype('float32')
     model.fit(
-        [input_a, input_b], [target_d, target_e],
+        [input_a, input_b], [target_a, target_b],
         epochs=1,
         batch_size=5,
         verbose=0)
-    model.fit([input_a, input_b], [target_d, target_e],
+    model.fit([input_a, input_b], [target_a, target_b],
               epochs=1, batch_size=2, verbose=0,
-              validation_data=([input_a, input_b], [target_d, target_e]))
+              validation_data=([input_a, input_b], [target_a, target_b]))
     model.fit(
-        [input_a, input_b], [target_d, target_e],
+        [input_a, input_b], [target_a, target_b],
         epochs=1,
         batch_size=5,
         verbose=0,
         shuffle=False)
-    model.train_on_batch([input_a, input_b], [target_d, target_e])
+    model.train_on_batch([input_a, input_b], [target_a, target_b])
     model.predict([input_a, input_b], batch_size=5)
-    model.evaluate([input_a, input_b], [target_d, target_e],
+    model.evaluate([input_a, input_b], [target_a, target_b],
                    batch_size=2, verbose=0)
-    model.test_on_batch([input_a, input_b], [target_d, target_e])
+    model.test_on_batch([input_a, input_b], [target_a, target_b])
 
+  @keras_parameterized.run_with_all_model_types
   def test_model_methods_with_eager_tensors_single_io(self):
-    x = keras.layers.Input(shape=(3,), name='input')
-    y = keras.layers.Dense(4, name='dense')(x)
-    model = keras.Model(x, y)
+    model = testing_utils.get_small_mlp(10, 4, 3)
 
-    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    optimizer = rmsprop.RMSprop(learning_rate=0.001)
     loss = 'mse'
     metrics = ['mae', metrics_module.CategoricalAccuracy()]
     model.compile(optimizer, loss, metrics=metrics, run_eagerly=True)
@@ -126,11 +127,10 @@ class TrainingTest(test.TestCase):
     model.train_on_batch(inputs, targets)
     model.test_on_batch(inputs, targets)
 
+  @keras_parameterized.run_with_all_model_types
   def test_model_fit_and_validation_with_missing_arg_errors(self):
-    x = keras.layers.Input(shape=(3,), name='input')
-    y = keras.layers.Dense(4, name='dense')(x)
-    model = keras.Model(x, y)
-    model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001),
+    model = testing_utils.get_small_mlp(10, 4, 3)
+    model.compile(optimizer=rmsprop.RMSprop(learning_rate=0.001),
                   loss='mse',
                   run_eagerly=True)
 
@@ -161,10 +161,12 @@ class TrainingTest(test.TestCase):
       model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
                 validation_data=validation_iterator)
 
+  # TODO(b/120931266): Enable test on subclassed models after bug causing an
+  # extra dimension to be added to predict outputs is fixed.
+  @keras_parameterized.run_with_all_model_types(exclude_models='subclass')
   def test_generator_methods(self):
-    model = keras.Sequential()
-    model.add(keras.layers.Dense(4, input_shape=(3,)))
-    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    model = testing_utils.get_small_mlp(10, 4, 3)
+    optimizer = rmsprop.RMSprop(learning_rate=0.001)
     model.compile(
         optimizer,
         loss='mse',
@@ -189,41 +191,41 @@ class TrainingTest(test.TestCase):
     self.assertEqual(out.shape, (30, 4))
 
 
-class CorrectnessTest(test.TestCase):
+class CorrectnessTest(keras_parameterized.TestCase):
 
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
   def test_loss_correctness(self):
     # Test that training loss is the same in eager and graph
     # (by comparing it to a reference value in a deterministic case)
-    model = keras.Sequential()
-    model.add(keras.layers.Dense(3,
-                                 activation='relu',
-                                 input_dim=4,
-                                 kernel_initializer='ones'))
-    model.add(keras.layers.Dense(2,
-                                 activation='softmax',
-                                 kernel_initializer='ones'))
+    layers = [
+        keras.layers.Dense(3, activation='relu',
+                           kernel_initializer='ones'),
+        keras.layers.Dense(2, activation='softmax', kernel_initializer='ones')]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(4,))
     model.compile(loss='sparse_categorical_crossentropy',
-                  optimizer=RMSPropOptimizer(learning_rate=0.001),
-                  run_eagerly=False)
+                  optimizer=rmsprop.RMSprop(learning_rate=0.001),
+                  run_eagerly=testing_utils.should_run_eagerly())
     x = np.ones((100, 4))
     np.random.seed(123)
     y = np.random.randint(0, 1, size=(100, 1))
     history = model.fit(x, y, epochs=1, batch_size=10)
-    self.assertAlmostEqual(history.history['loss'][-1], 0.6173, 4)
+    self.assertAlmostEqual(history.history['loss'][-1], 0.5836, 4)
 
+  @keras_parameterized.run_with_all_model_types
+  @keras_parameterized.run_all_keras_modes
   def test_loss_correctness_with_iterator(self):
     # Test that training loss is the same in eager and graph
     # (by comparing it to a reference value in a deterministic case)
-    model = keras.Sequential()
-    model.add(
-        keras.layers.Dense(
-            3, activation='relu', input_dim=4, kernel_initializer='ones'))
-    model.add(
-        keras.layers.Dense(2, activation='softmax', kernel_initializer='ones'))
+    layers = [
+        keras.layers.Dense(3, activation='relu',
+                           kernel_initializer='ones'),
+        keras.layers.Dense(2, activation='softmax', kernel_initializer='ones')]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(4,))
     model.compile(
         loss='sparse_categorical_crossentropy',
-        optimizer=RMSPropOptimizer(learning_rate=0.001),
-        run_eagerly=True)
+        optimizer=rmsprop.RMSprop(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     x = np.ones((100, 4), dtype=np.float32)
     np.random.seed(123)
     y = np.random.randint(0, 1, size=(100, 1))
@@ -232,7 +234,7 @@ class CorrectnessTest(test.TestCase):
     dataset = dataset.batch(10)
     iterator = dataset_ops.make_one_shot_iterator(dataset)
     history = model.fit(iterator, epochs=1, steps_per_epoch=10)
-    self.assertAlmostEqual(history.history['loss'][-1], 0.6173, 4)
+    self.assertAlmostEqual(history.history['loss'][-1], 0.5836, 4)
 
   def test_loss_in_call(self):
 
-- 
GitLab


From 1182414979232e16e89f38f3894637c8d1f4709a Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Thu, 13 Dec 2018 14:57:09 -0800
Subject: [PATCH 549/873] Typo correction.

PiperOrigin-RevId: 225440995
---
 .../data/experimental/kernel_tests/copy_to_device_test.py   | 2 +-
 .../data/experimental/kernel_tests/map_defun_op_test.py     | 2 +-
 .../experimental/kernel_tests/prefetch_to_device_test.py    | 2 +-
 .../experimental/kernel_tests/restructured_dataset_test.py  | 2 +-
 .../python/data/experimental/kernel_tests/scan_test.py      | 2 +-
 .../python/data/experimental/kernel_tests/unbatch_test.py   | 6 +++---
 .../data/experimental/kernel_tests/wrap_unwrap_test.py      | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
index 786eae11a9..d9fbe9e0e1 100644
--- a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
@@ -33,7 +33,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat as util_compat
 
 
-# TODO(b/119837791): add eager coverage when supported.
+# TODO(b/117581999): add eager coverage when supported.
 class CopyToDeviceTest(test_base.DatasetTestBase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index e41030dc04..85652bf00f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -35,7 +35,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-# TODO(b/119837791): add eager coverage.
+# TODO(b/117581999): add eager coverage.
 class MapDefunTest(test_base.DatasetTestBase):
 
   def testMapDefunSimple(self):
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
index 2af31ad3e3..238c5cd506 100644
--- a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
@@ -29,7 +29,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
-# TODO(b/119837791): add eager coverage when supported.
+# TODO(b/117581999): add eager coverage when supported.
 class PrefetchToDeviceTest(test_base.DatasetTestBase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
index 3b0d23d6e1..87a91415b0 100644
--- a/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/restructured_dataset_test.py
@@ -27,7 +27,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-# TODO(b/119837791): Add eager coverage
+# TODO(b/117581999): Add eager coverage
 class RestructuredDatasetTest(test_base.DatasetTestBase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/data/experimental/kernel_tests/scan_test.py b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
index 89b3824821..f5ac0f5007 100644
--- a/tensorflow/python/data/experimental/kernel_tests/scan_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/scan_test.py
@@ -70,7 +70,7 @@ class ScanTest(test_base.DatasetTestBase):
     self.assertEqual(5, self.evaluate(next_element()))
     self.assertEqual(8, self.evaluate(next_element()))
 
-  # TODO(b/119837791): Add coverage for eager.
+  # TODO(b/117581999): Add coverage for eager.
   @test_util.run_deprecated_v1
   def testSkipEagerSparseCount(self):
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
index 7f30cf568e..e4034cc43a 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unbatch_test.py
@@ -68,7 +68,7 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertDatasetProduces(
         data, [(i, compat.as_bytes(str(i)), i) for i in range(10)])
 
-  # TODO(b/119837791): Add eager coverage.
+  # TODO(b/117581999): Add eager coverage.
   @test_util.run_deprecated_v1
   def testSkipEagerUnbatchDatasetWithSparseTensor(self):
     st = sparse_tensor.SparseTensorValue(
@@ -90,7 +90,7 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element)
 
-  # TODO(b/119837791): Add eager coverage.
+  # TODO(b/117581999): Add eager coverage.
   @test_util.run_deprecated_v1
   def testSkipEagerUnbatchDatasetWithDenseAndSparseTensor(self):
     st = sparse_tensor.SparseTensorValue(
@@ -150,7 +150,7 @@ class UnbatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(ValueError):
       data.apply(batching.unbatch())
 
-  # TODO(b/119837791): eager mode doesnt capture raised error, debug.
+  # TODO(b/117581999): eager mode doesnt capture raised error, debug.
   @test_util.run_deprecated_v1
   def testSkipEagerUnbatchDynamicShapeMismatch(self):
     ph1 = array_ops.placeholder(dtypes.int32, shape=[None])
diff --git a/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
index c47595c16c..a8f5050151 100644
--- a/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
@@ -42,7 +42,7 @@ class WrapDatasetVariantTest(test_base.DatasetTestBase):
     for i in range(100):
       self.assertEqual(i, self.evaluate(get_next()))
 
-  # TODO(b/119837791): add eager coverage when supported.
+  # TODO(b/117581999): add eager coverage when supported.
   def testSkipEagerGPU(self):
     ds = dataset_ops.Dataset.range(100)
     ds_variant = ds._as_variant_tensor()  # pylint: disable=protected-access
-- 
GitLab


From 3bc90c53545661b99645930582bffbc0a61dd3b4 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Thu, 13 Dec 2018 15:09:02 -0800
Subject: [PATCH 550/873] Update GRU tests to run in 2.0 modes.

PiperOrigin-RevId: 225443196
---
 tensorflow/python/keras/BUILD              |   1 +
 tensorflow/python/keras/layers/gru_test.py | 138 +++++++++++----------
 2 files changed, 73 insertions(+), 66 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index aa9e1c03b1..4259e6d592 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -503,6 +503,7 @@ py_test(
     name = "gru_test",
     size = "large",
     srcs = ["layers/gru_test.py"],
+    shard_count = 2,
     srcs_version = "PY2AND3",
     tags = ["notsan"],  # http://b/62136390
     deps = [
diff --git a/tensorflow/python/keras/layers/gru_test.py b/tensorflow/python/keras/layers/gru_test.py
index 1b2881a26b..61c502c3b6 100644
--- a/tensorflow/python/keras/layers/gru_test.py
+++ b/tensorflow/python/keras/layers/gru_test.py
@@ -22,14 +22,15 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util as tf_test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
-class GRULayerTest(test.TestCase):
+@keras_parameterized.run_all_keras_modes
+class GRULayerTest(keras_parameterized.TestCase):
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_return_sequences_GRU(self):
     num_samples = 2
     timesteps = 3
@@ -41,7 +42,6 @@ class GRULayerTest(test.TestCase):
                 'return_sequences': True},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_dynamic_behavior_GRU(self):
     num_samples = 2
     timesteps = 3
@@ -50,12 +50,12 @@ class GRULayerTest(test.TestCase):
     layer = keras.layers.GRU(units, input_shape=(None, embedding_dim))
     model = keras.models.Sequential()
     model.add(layer)
-    model.compile(RMSPropOptimizer(0.01), 'mse')
+    model.compile(RMSPropOptimizer(0.01), 'mse',
+                  run_eagerly=testing_utils.should_run_eagerly())
     x = np.random.random((num_samples, timesteps, embedding_dim))
     y = np.random.random((num_samples, units))
     model.train_on_batch(x, y)
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_dropout_GRU(self):
     num_samples = 2
     timesteps = 3
@@ -68,7 +68,6 @@ class GRULayerTest(test.TestCase):
                 'recurrent_dropout': 0.1},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_implementation_mode_GRU(self):
     num_samples = 2
     timesteps = 3
@@ -81,7 +80,6 @@ class GRULayerTest(test.TestCase):
                   'implementation': mode},
           input_shape=(num_samples, timesteps, embedding_dim))
 
-  @tf_test_util.run_in_graph_and_eager_modes
   def test_reset_after_GRU(self):
     num_samples = 2
     timesteps = 3
@@ -100,16 +98,65 @@ class GRULayerTest(test.TestCase):
                                  reset_after=True)
     output = gru_layer(inputs)
     gru_model = keras.models.Model(inputs, output)
-    gru_model.compile('rmsprop', 'mse')
+    gru_model.compile('rmsprop', 'mse',
+                      run_eagerly=testing_utils.should_run_eagerly())
     gru_model.fit(x_train, y_train)
     gru_model.predict(x_train)
 
+  def test_with_masking_layer_GRU(self):
+    layer_class = keras.layers.GRU
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(input_shape=(3, 4)))
+    model.add(layer_class(units=5, return_sequences=True, unroll=False))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=RMSPropOptimizer(0.01),
+                  run_eagerly=testing_utils.should_run_eagerly())
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
+
+
+@tf_test_util.run_all_in_graph_and_eager_modes
+class GRULayerGenericTest(test.TestCase):
+
+  def test_constraints_GRU(self):
+    embedding_dim = 4
+    layer_class = keras.layers.GRU
+    k_constraint = keras.constraints.max_norm(0.01)
+    r_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_constraint=k_constraint,
+        recurrent_constraint=r_constraint,
+        bias_constraint=b_constraint)
+    layer.build((None, None, embedding_dim))
+    self.assertEqual(layer.cell.kernel.constraint, k_constraint)
+    self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
+    self.assertEqual(layer.cell.bias.constraint, b_constraint)
+
+  def test_from_config_GRU(self):
+    layer_class = keras.layers.GRU
+    for stateful in (False, True):
+      l1 = layer_class(units=1, stateful=stateful)
+      l2 = layer_class.from_config(l1.get_config())
+      assert l1.get_config() == l2.get_config()
+
+
+class GRULayerGraphOnlyTest(test.TestCase):
+
+  @tf_test_util.run_v1_only('b/120545219')
   def test_statefulness_GRU(self):
     num_samples = 2
     timesteps = 3
     embedding_dim = 4
     units = 2
     layer_class = keras.layers.GRU
+
     with self.cached_session():
       model = keras.models.Sequential()
       model.add(
@@ -166,67 +213,26 @@ class GRULayerTest(test.TestCase):
 
       np.testing.assert_allclose(out7, out6, atol=1e-5)
 
+  # b/120919032
+  @tf_test_util.run_deprecated_v1
   def test_regularizers_GRU(self):
     embedding_dim = 4
     layer_class = keras.layers.GRU
-    with self.cached_session():
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_regularizer=keras.regularizers.l1(0.01),
-          recurrent_regularizer=keras.regularizers.l1(0.01),
-          bias_regularizer='l2',
-          activity_regularizer='l1')
-      layer.build((None, None, 2))
-      self.assertEqual(len(layer.losses), 3)
-
-      x = keras.backend.variable(np.ones((2, 3, 2)))
-      layer(x)
-      self.assertEqual(len(layer.get_losses_for(x)), 1)
-
-  def test_constraints_GRU(self):
-    embedding_dim = 4
-    layer_class = keras.layers.GRU
-    with self.cached_session():
-      k_constraint = keras.constraints.max_norm(0.01)
-      r_constraint = keras.constraints.max_norm(0.01)
-      b_constraint = keras.constraints.max_norm(0.01)
-      layer = layer_class(
-          5,
-          return_sequences=False,
-          weights=None,
-          input_shape=(None, embedding_dim),
-          kernel_constraint=k_constraint,
-          recurrent_constraint=r_constraint,
-          bias_constraint=b_constraint)
-      layer.build((None, None, embedding_dim))
-      self.assertEqual(layer.cell.kernel.constraint, k_constraint)
-      self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
-      self.assertEqual(layer.cell.bias.constraint, b_constraint)
-
-  @tf_test_util.run_in_graph_and_eager_modes
-  def test_with_masking_layer_GRU(self):
-    layer_class = keras.layers.GRU
-    with self.cached_session():
-      inputs = np.random.random((2, 3, 4))
-      targets = np.abs(np.random.random((2, 3, 5)))
-      targets /= targets.sum(axis=-1, keepdims=True)
-      model = keras.models.Sequential()
-      model.add(keras.layers.Masking(input_shape=(3, 4)))
-      model.add(layer_class(units=5, return_sequences=True, unroll=False))
-      model.compile(loss='categorical_crossentropy',
-                    optimizer=RMSPropOptimizer(0.01))
-      model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
-
-  def test_from_config_GRU(self):
-    layer_class = keras.layers.GRU
-    for stateful in (False, True):
-      l1 = layer_class(units=1, stateful=stateful)
-      l2 = layer_class.from_config(l1.get_config())
-      assert l1.get_config() == l2.get_config()
-
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        recurrent_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer='l2',
+        activity_regularizer='l1')
+    layer.build((None, None, 2))
+    self.assertEqual(len(layer.losses), 3)
+
+    x = keras.backend.variable(np.ones((2, 3, 2)))
+    layer(x)
+    self.assertEqual(len(layer.get_losses_for(x)), 1)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 23b92345f4f6e049b702399a34398cc690bff7c3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 15:11:07 -0800
Subject: [PATCH 551/873] [TF:XLA] Speed up Literal::Near comparisons.

Avoid eagerly generating an error status string. Instead only generate the error status lazily when a comparison fails.

PiperOrigin-RevId: 225443548
---
 tensorflow/compiler/xla/literal_comparison.cc | 110 ++++++++++++------
 1 file changed, 74 insertions(+), 36 deletions(-)

diff --git a/tensorflow/compiler/xla/literal_comparison.cc b/tensorflow/compiler/xla/literal_comparison.cc
index b044f0ad73..1ac9a48e80 100644
--- a/tensorflow/compiler/xla/literal_comparison.cc
+++ b/tensorflow/compiler/xla/literal_comparison.cc
@@ -46,68 +46,102 @@ uint16 GetRawValue(Eigen::half val) { return val.x; }
 // between the left-hand-side and right-hand-side, by bit-casting to UnsignedT
 // -- on miscompare, a nice error message is given in the AssertionFailure.
 template <typename FloatT, typename UnsignedT>
-Status CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs,
-                                 absl::Span<const int64> multi_index) {
+bool CompareFloatsBitwiseEqual(FloatT lhs, FloatT rhs,
+                               absl::Span<const int64> multi_index) {
+  auto ulhs = absl::bit_cast<UnsignedT>(GetRawValue(lhs));
+  auto urhs = absl::bit_cast<UnsignedT>(GetRawValue(rhs));
+  return ulhs == urhs;
+}
+
+// Templated comparator that specializes for float equality comparison with the
+// bitwise helper above (this is the un-specialized fallback, to just use the
+// default gunit implementation).
+template <typename NativeT>
+bool CompareEqual(NativeT lhs, NativeT rhs,
+                  absl::Span<const int64> multi_index) {
+  return lhs == rhs;
+}
+
+// Specializations for floating types that do bitwise comparisons when equality
+// comparison is requested.
+template <>
+bool CompareEqual<bfloat16>(bfloat16 lhs, bfloat16 rhs,
+                            absl::Span<const int64> multi_index) {
+  return CompareFloatsBitwiseEqual<bfloat16, uint16>(lhs, rhs, multi_index);
+}
+template <>
+bool CompareEqual<Eigen::half>(Eigen::half lhs, Eigen::half rhs,
+                               absl::Span<const int64> multi_index) {
+  return CompareFloatsBitwiseEqual<Eigen::half, uint16>(lhs, rhs, multi_index);
+}
+template <>
+bool CompareEqual<float>(float lhs, float rhs,
+                         absl::Span<const int64> multi_index) {
+  return CompareFloatsBitwiseEqual<float, uint32>(lhs, rhs, multi_index);
+}
+template <>
+bool CompareEqual<double>(double lhs, double rhs,
+                          absl::Span<const int64> multi_index) {
+  return CompareFloatsBitwiseEqual<double, uint64>(lhs, rhs, multi_index);
+}
+template <>
+bool CompareEqual<complex64>(complex64 lhs, complex64 rhs,
+                             absl::Span<const int64> multi_index) {
+  return CompareEqual<float>(lhs.real(), rhs.real(), multi_index) &&
+         CompareEqual<float>(lhs.imag(), rhs.imag(), multi_index);
+}
+
+template <typename NativeT, typename UnsignedT>
+Status MakeBitwiseErrorStatus(NativeT lhs, NativeT rhs,
+                              absl::Span<const int64> multi_index) {
   auto ulhs = absl::bit_cast<UnsignedT>(GetRawValue(lhs));
   auto urhs = absl::bit_cast<UnsignedT>(GetRawValue(rhs));
   auto lhs_double = static_cast<double>(lhs);
   auto rhs_double = static_cast<double>(rhs);
-  if (ulhs != urhs) {
     return InvalidArgument(
         "floating values are not bitwise-equal; and equality testing "
         "was requested: %s=%g=%a vs %s=%g=%a at array index %s",
         StrCat(absl::Hex(ulhs)), lhs_double, lhs_double,
         StrCat(absl::Hex(urhs)), rhs_double, rhs_double,
         LiteralUtil::MultiIndexAsString(multi_index));
-  }
-  return Status::OK();
 }
 
-// Templated comparator that specializes for float equality comparison with the
-// bitwise helper above (this is the un-specialized fallback, to just use the
-// default gunit implementation).
 template <typename NativeT>
-Status CompareEqual(NativeT lhs, NativeT rhs,
-                    absl::Span<const int64> multi_index) {
-  if (lhs == rhs) {
-    return Status::OK();
-  }
+Status MakeErrorStatus(NativeT lhs, NativeT rhs,
+                       absl::Span<const int64> multi_index) {
   return InvalidArgument(
       "first mismatch at array index %s:\n  expected value: %s\n  actual "
       "value:   %s",
       LiteralUtil::MultiIndexAsString(multi_index), StrCat(lhs), StrCat(rhs));
 }
 
-// Specializations for floating types that do bitwise comparisons when equality
-// comparison is requested.
 template <>
-Status CompareEqual<bfloat16>(bfloat16 lhs, bfloat16 rhs,
-                              absl::Span<const int64> multi_index) {
-  return CompareFloatsBitwiseEqual<bfloat16, uint16>(lhs, rhs, multi_index);
+Status MakeErrorStatus(bfloat16 lhs, bfloat16 rhs,
+                       absl::Span<const int64> multi_index) {
+  return MakeBitwiseErrorStatus<bfloat16, uint16>(lhs, rhs, multi_index);
 }
 template <>
-Status CompareEqual<Eigen::half>(Eigen::half lhs, Eigen::half rhs,
-                                 absl::Span<const int64> multi_index) {
-  return CompareFloatsBitwiseEqual<Eigen::half, uint16>(lhs, rhs, multi_index);
+Status MakeErrorStatus(Eigen::half lhs, Eigen::half rhs,
+                       absl::Span<const int64> multi_index) {
+  return MakeBitwiseErrorStatus<Eigen::half, uint16>(lhs, rhs, multi_index);
 }
 template <>
-Status CompareEqual<float>(float lhs, float rhs,
-                           absl::Span<const int64> multi_index) {
-  return CompareFloatsBitwiseEqual<float, uint32>(lhs, rhs, multi_index);
+Status MakeErrorStatus(float lhs, float rhs,
+                       absl::Span<const int64> multi_index) {
+  return MakeBitwiseErrorStatus<float, uint32>(lhs, rhs, multi_index);
 }
 template <>
-Status CompareEqual<double>(double lhs, double rhs,
-                            absl::Span<const int64> multi_index) {
-  return CompareFloatsBitwiseEqual<double, uint64>(lhs, rhs, multi_index);
+Status MakeErrorStatus(double lhs, double rhs,
+                       absl::Span<const int64> multi_index) {
+  return MakeBitwiseErrorStatus<double, uint64>(lhs, rhs, multi_index);
 }
 template <>
-Status CompareEqual<complex64>(complex64 lhs, complex64 rhs,
-                               absl::Span<const int64> multi_index) {
-  auto res = CompareEqual<float>(lhs.real(), rhs.real(), multi_index);
-  if (!res.ok()) {
-    return res;
+Status MakeErrorStatus(complex64 lhs, complex64 rhs,
+                       absl::Span<const int64> multi_index) {
+  if (!CompareEqual<float>(lhs.real(), rhs.real(), multi_index)) {
+    return MakeErrorStatus(lhs.real(), rhs.real(), multi_index);
   }
-  return CompareEqual<float>(lhs.imag(), rhs.imag(), multi_index);
+  return MakeErrorStatus(lhs.imag(), rhs.imag(), multi_index);
 }
 
 // A recursive function which iterates through every index of expected and
@@ -119,7 +153,11 @@ Status Equal(LiteralSlice expected, LiteralSlice actual,
   if (dimension == expected.shape().dimensions_size()) {
     NativeT expected_value = expected.Get<NativeT>(multi_index);
     NativeT actual_value = actual.Get<NativeT>(multi_index);
-    return CompareEqual<NativeT>(expected_value, actual_value, multi_index);
+    bool result =
+        CompareEqual<NativeT>(expected_value, actual_value, multi_index);
+    return result ? Status::OK()
+                  : MakeErrorStatus<NativeT>(expected_value, actual_value,
+                                             multi_index);
   }
 
   Status result;
@@ -330,7 +368,7 @@ class NearComparator {
         NanMismatch(expected, actual, error_.relaxed_nans);
     float abs_error;
     float rel_error;
-    if (CompareEqual<T>(expected, actual, {linear_index}).ok()) {
+    if (CompareEqual<T>(expected, actual, {linear_index})) {
       abs_error = 0;
       rel_error = 0;
     } else if (is_nan_mismatch) {
@@ -344,7 +382,7 @@ class NearComparator {
     } else if (IsInf(expected) || IsInf(actual)) {
       // If either the expected or actual value is infinity but not both,
       // then both absolute and relative error are regarded as inifity.
-      CHECK(!CompareEqual(expected, actual, {linear_index}).ok());
+      CHECK(!CompareEqual(expected, actual, {linear_index}));
       abs_error = std::numeric_limits<float>::infinity();
       rel_error = std::numeric_limits<float>::infinity();
     } else {
-- 
GitLab


From 1dda9d7b9933d96cae985d484d55971cc943cb26 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Thu, 13 Dec 2018 15:23:28 -0800
Subject: [PATCH 552/873] Make all fuzzers use a fixed RunInputs API.

PiperOrigin-RevId: 225445688
---
 .../kernels/fuzzing/check_numerics_fuzz.cc    |  4 +--
 .../kernels/fuzzing/decode_compressed_fuzz.cc |  2 +-
 .../core/kernels/fuzzing/encode_jpeg_fuzz.cc  |  3 +--
 .../example_proto_fast_parsing_fuzz.cc        |  5 ++--
 .../core/kernels/fuzzing/fuzz_session.h       | 26 +++++++------------
 .../core/kernels/fuzzing/identity_fuzz.cc     |  2 +-
 .../core/kernels/fuzzing/one_hot_fuzz.cc      |  8 +++---
 .../kernels/fuzzing/parse_tensor_op_fuzz.cc   |  5 ++--
 .../core/kernels/fuzzing/string_split_fuzz.cc | 16 ++++++------
 .../kernels/fuzzing/string_split_v2_fuzz.cc   |  7 ++---
 10 files changed, 35 insertions(+), 43 deletions(-)

diff --git a/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
index 62d39895a4..c62378a264 100644
--- a/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
@@ -23,7 +23,7 @@ namespace fuzzing {
 class FuzzCheckNumerics : public FuzzSession {
   void BuildGraph(const Scope& scope) override {
     auto input =
-        tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_FLOAT);
+        tensorflow::ops::Placeholder(scope.WithOpName("input"), DT_FLOAT);
     auto prefix = "Error: ";
     (void)tensorflow::ops::CheckNumerics(scope.WithOpName("output"), input,
                                          prefix);
@@ -40,7 +40,7 @@ class FuzzCheckNumerics : public FuzzSession {
     for (size_t i = 0; i < num_floats; i++) {
       flat_tensor(i) = float_data[i];
     }
-    RunOneInput(input_tensor).IgnoreError();
+    RunInputs({{"input", input_tensor}}).IgnoreError();
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc b/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc
index 0a56f4b63f..b9fc014b86 100644
--- a/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/decode_compressed_fuzz.cc
@@ -22,7 +22,7 @@ namespace fuzzing {
 class FuzzDecodeCompressed : public FuzzStringInputOp {
   void BuildGraph(const Scope& scope) override {
     auto input =
-        tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_STRING);
+        tensorflow::ops::Placeholder(scope.WithOpName("input"), DT_STRING);
     auto d1 = tensorflow::ops::DecodeCompressed(
         scope.WithOpName("d1"), input,
         tensorflow::ops::DecodeCompressed::CompressionType(""));
diff --git a/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc b/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc
index f5dd47a052..b13c37a42c 100644
--- a/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc
@@ -52,8 +52,7 @@ class FuzzEncodeJpeg : public FuzzSession {
     for (size_t i = 0; i < actual_pixels; i++) {
       flat_tensor(i) = data[i];
     }
-    // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
-    RunOneInput(input_tensor).IgnoreError();
+    RunInputs({{"input", input_tensor}}).IgnoreError();
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
index 5b029bf5ec..41b2eec62c 100644
--- a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
@@ -30,7 +30,7 @@ class FuzzExampleProtoFastParsing : public FuzzSession {
   void BuildGraph(const Scope& scope) final {
     using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
     // The serialized proto.
-    auto input = Placeholder(scope.WithOpName("input1"), DT_STRING);
+    auto input = Placeholder(scope.WithOpName("input"), DT_STRING);
 
     auto in_expanded = ExpandDims(scope, input, Const<int>(scope, 0));
 
@@ -53,8 +53,7 @@ class FuzzExampleProtoFastParsing : public FuzzSession {
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
     input_tensor.scalar<string>()() =
         string(reinterpret_cast<const char*>(data), size);
-    // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
-    RunOneInput(input_tensor).IgnoreError();
+    RunInputs({{"input", input_tensor}}).IgnoreError();
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/fuzz_session.h b/tensorflow/core/kernels/fuzzing/fuzz_session.h
index 6abce959b9..f5eca49b96 100644
--- a/tensorflow/core/kernels/fuzzing/fuzz_session.h
+++ b/tensorflow/core/kernels/fuzzing/fuzz_session.h
@@ -35,11 +35,11 @@ limitations under the License.
 #endif
 
 // Standard builder for hooking one placeholder to one op.
-#define SINGLE_INPUT_OP_BUILDER(dtype, opName)                           \
-  void BuildGraph(const Scope& scope) override {                         \
-    auto op_node =                                                       \
-        tensorflow::ops::Placeholder(scope.WithOpName("input1"), dtype); \
-    (void)tensorflow::ops::opName(scope.WithOpName("output"), op_node);  \
+#define SINGLE_INPUT_OP_BUILDER(dtype, opName)                          \
+  void BuildGraph(const Scope& scope) override {                        \
+    auto op_node =                                                      \
+        tensorflow::ops::Placeholder(scope.WithOpName("input"), dtype); \
+    (void)tensorflow::ops::opName(scope.WithOpName("output"), op_node); \
   }
 
 namespace tensorflow {
@@ -61,7 +61,7 @@ namespace fuzzing {
 //   SINGLE_INPUT_OP_BUILDER(DT_INT8, Identity);
 //   void FuzzImpl(const uint8_t* data, size_t size) {
 //      ... convert data and size to a Tensor, pass it to:
-//      RunOneInput(input_tensor);
+//      RunInputs({{"input", input_tensor}}).IgnoreError();
 //
 class FuzzSession {
  public:
@@ -107,15 +107,10 @@ class FuzzSession {
   }
 
   // Runs the TF session by pulling on the "output" node, attaching
-  // the supplied input_tensor to the "input1" node, and discarding
+  // the supplied input_tensor to the input node(s), and discarding
   // any returned output.
-  Status RunOneInput(const Tensor& input_tensor) {
-    return session_->Run({{"input1", input_tensor}}, {}, {"output"}, nullptr);
-  }
-
-  Status RunTwoInputs(const Tensor& input1, const Tensor& input2) {
-    return session_->Run({{"input1", input1}, {"input2", input2}}, {},
-                         {"output"}, nullptr);
+  Status RunInputs(const std::vector<std::pair<string, Tensor> >& inputs) {
+    return session_->Run(inputs, {}, {"output"}, nullptr);
   }
 
   Status RunMultipleInputs(
@@ -149,8 +144,7 @@ class FuzzStringInputOp : public FuzzSession {
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
     input_tensor.scalar<string>()() =
         string(reinterpret_cast<const char*>(data), size);
-    // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
-    RunOneInput(input_tensor).IgnoreError();
+    RunInputs({{"input", input_tensor}}).IgnoreError();
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/identity_fuzz.cc b/tensorflow/core/kernels/fuzzing/identity_fuzz.cc
index 5c3fc4a279..dc056331e5 100644
--- a/tensorflow/core/kernels/fuzzing/identity_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/identity_fuzz.cc
@@ -30,7 +30,7 @@ class FuzzIdentity : public FuzzSession {
       flat_tensor(i) = data[i];
     }
 
-    Status s = RunOneInput(input_tensor);
+    Status s = RunInputs({{"input", input_tensor}});
     // Note:  For many ops, we don't care about this success -- but when
     // testing to make sure the harness actually works, it's useful.
     if (!s.ok()) {
diff --git a/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
index e685d4eebd..d181f54a11 100644
--- a/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
@@ -65,10 +65,10 @@ class FuzzOneHot : public FuzzSession {
     on_tensor.scalar<uint8>()() = on;
     off_tensor.scalar<uint8>()() = off;
 
-    RunMultipleInputs({{"input", input_tensor},
-                       {"depth", depth_tensor},
-                       {"on", on_tensor},
-                       {"off", off_tensor}})
+    RunInputs({{"input", input_tensor},
+               {"depth", depth_tensor},
+               {"on", on_tensor},
+               {"off", off_tensor}})
         .IgnoreError();
   }
 };
diff --git a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
index ab6812c5f1..bf67d24266 100644
--- a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
@@ -25,7 +25,7 @@ class FuzzParseTensor : public FuzzSession {
   void BuildGraph(const Scope& scope) final {
     using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
     // The serialized proto.
-    auto input = Placeholder(scope.WithOpName("input1"), DT_STRING);
+    auto input = Placeholder(scope.WithOpName("input"), DT_STRING);
 
     (void)ParseTensor(scope.WithOpName("output"), input, DT_FLOAT);
   }
@@ -62,8 +62,7 @@ class FuzzParseTensor : public FuzzSession {
     // Now we can do the actual fuzz implementation
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
     input_tensor.scalar<string>()() = as_string;
-    // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
-    RunOneInput(input_tensor).IgnoreError();
+    RunInputs({{"input", input_tensor}}).IgnoreError();
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc b/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
index 2564f8ed03..201e0c52c8 100644
--- a/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
@@ -22,16 +22,16 @@ namespace fuzzing {
 class FuzzStringSplit : public FuzzSession {
   void BuildGraph(const Scope& scope) override {
     auto input =
-        tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_STRING);
-    auto delimeter =
-        tensorflow::ops::Placeholder(scope.WithOpName("input2"), DT_STRING);
+        tensorflow::ops::Placeholder(scope.WithOpName("input"), DT_STRING);
+    auto delimiter =
+        tensorflow::ops::Placeholder(scope.WithOpName("delimiter"), DT_STRING);
     (void)tensorflow::ops::StringSplit(scope.WithOpName("output"), input,
-                                       delimeter);
+                                       delimiter);
   }
 
   void FuzzImpl(const uint8_t* data, size_t size) final {
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
-    Tensor delimeter_tensor(tensorflow::DT_STRING, TensorShape({}));
+    Tensor delimiter_tensor(tensorflow::DT_STRING, TensorShape({}));
 
     if (size > 0) {
       // The spec for split is that the delimeter should be 0 or 1 characters.
@@ -42,14 +42,14 @@ class FuzzStringSplit : public FuzzSession {
       if (delim_len > size) {
         delim_len = size - 1;
       }
-      delimeter_tensor.scalar<string>()() =
+      delimiter_tensor.scalar<string>()() =
           string(reinterpret_cast<const char*>(data), delim_len);
       input_tensor.scalar<string>()() = string(
           reinterpret_cast<const char*>(data + delim_len), size - delim_len);
     }
 
-    // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object!
-    RunTwoInputs(input_tensor, delimeter_tensor).IgnoreError();
+    RunInputs({{"input", input_tensor}, {"delimiter", delimiter_tensor}})
+        .IgnoreError();
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc b/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
index 787bccc15b..2eee6a8871 100644
--- a/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
@@ -22,9 +22,9 @@ namespace fuzzing {
 class FuzzStringSplitV2 : public FuzzSession {
   void BuildGraph(const Scope& scope) override {
     auto input =
-        tensorflow::ops::Placeholder(scope.WithOpName("input1"), DT_STRING);
+        tensorflow::ops::Placeholder(scope.WithOpName("input"), DT_STRING);
     auto separator =
-        tensorflow::ops::Placeholder(scope.WithOpName("input2"), DT_STRING);
+        tensorflow::ops::Placeholder(scope.WithOpName("separator"), DT_STRING);
     (void)tensorflow::ops::StringSplitV2(scope.WithOpName("output"),
                                                input, separator);
   }
@@ -52,7 +52,8 @@ class FuzzStringSplitV2 : public FuzzSession {
           reinterpret_cast<const char*>(data + sep_len), size - sep_len);
     }
 
-    RunTwoInputs(input_tensor, separator_tensor).IgnoreError();
+    RunInputs({{"input", input_tensor}, {"separator", separator_tensor}})
+        .IgnoreError();
   }
 
  private:
-- 
GitLab


From 400eb096aeccd87d35fa6243cbfedbde1aaaceb1 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Thu, 13 Dec 2018 15:24:55 -0800
Subject: [PATCH 553/873] Adjust status reporting for Barrier/RunManyGraphs.

Previously this code would choose the first status reported by an executor.
This frequently resulted in useless status messages being reported to workers
as a cancelled executor would report before an executor with a useful error
message.  Change this to preferentially keep non-cancelled failures over
cancellelation messages.

PiperOrigin-RevId: 225445929
---
 tensorflow/core/common_runtime/executor.h     | 36 +++++++++++++------
 .../distributed_runtime/master_session.cc     | 17 ++++++---
 tensorflow/core/distributed_runtime/worker.cc |  3 +-
 3 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index 34bf73972f..3c0f18d50a 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -173,21 +173,38 @@ class ExecutorBarrier {
   int pending_ GUARDED_BY(mu_) = 0;
   Status status_ GUARDED_BY(mu_);
 
+  void MergeStatusLocked(const Status& s) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    if (s.ok()) {
+      return;
+    }
+
+    // Prefer primary failures over cancellations.  A cancellation may finish
+    // _before_ the original status is propagated; we override it in this case.
+    if (status_.ok() ||
+        str_util::StrContains(status_.error_message(), "[CHILD]")) {
+      status_ = s;
+    }
+  }
+
   void WhenDone(const Status& s) {
-    bool error = false;
     Rendezvous* error_rendez = nullptr;
     StatusCallback done = nullptr;
     Status status;
+
     {
       mutex_lock l(mu_);
-      // If we are the first error encountered, mark the status
-      // appropriately and later trigger an abort of the Rendezvous
-      // object by this thread only.
+
+      // If we are the first error encountered, trigger an abort of the
+      // Rendezvous object by this thread only.
       if (status_.ok() && !s.ok()) {
-        error = true;
         error_rendez = rendez_;
         error_rendez->Ref();
-        status_ = s;
+      }
+
+      MergeStatusLocked(s);
+
+      if (!status_.ok()) {
+        status = status_;
       }
 
       // If this is the last call to WhenDone, call the final callback
@@ -196,16 +213,13 @@ class ExecutorBarrier {
         CHECK(done_cb_ != nullptr);
         std::swap(done, done_cb_);
       }
-
-      if (!status_.ok()) {
-        status = status_;
-      }
     }
 
-    if (error) {
+    if (error_rendez != nullptr) {
       error_rendez->StartAbort(status);
       error_rendez->Unref();
     }
+
     if (done != nullptr) {
       delete this;
       done(status);
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 59bb18e7eb..47f567ea8d 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -512,18 +512,18 @@ class RunManyGraphs {
     if (resp->status_code() != error::Code::OK) {
       // resp->status_code will only be non-OK if s.ok().
       mutex_lock l(mu_);
-      UpdateStatusLocked(
+      ReportBadStatus(
           Status(resp->status_code(), resp->status_error_message()));
     } else if (!s.ok()) {
       mutex_lock l(mu_);
-      UpdateStatusLocked(s);
+      ReportBadStatus(s);
     }
     pending_.DecrementCount();
   }
 
   void StartCancel() {
     mutex_lock l(mu_);
-    UpdateStatusLocked(errors::Cancelled("RunManyGraphs"));
+    ReportBadStatus(errors::Cancelled("RunManyGraphs"));
   }
 
   void Wait() { pending_.Wait(); }
@@ -540,13 +540,20 @@ class RunManyGraphs {
   mutable mutex mu_;
   Status status_ GUARDED_BY(mu_);
 
-  void UpdateStatusLocked(const Status& s) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+  void ReportBadStatus(const Status& s) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    // Start cancellation if we aren't already in an error state.
     if (status_.ok()) {
-      status_ = s;
       for (Call& call : calls_) {
         call.opts.StartCancel();
       }
     }
+
+    // Prefer primary failures over cancellations.  A cancellation may finish
+    // _before_ the original status is propagated; we override it in this case.
+    if (status_.ok() ||
+        str_util::StrContains(status_.error_message(), "[CHILD]")) {
+      status_ = s;
+    }
   }
 
   TF_DISALLOW_COPY_AND_ASSIGN(RunManyGraphs);
diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc
index f42143e582..8f41856f32 100644
--- a/tensorflow/core/distributed_runtime/worker.cc
+++ b/tensorflow/core/distributed_runtime/worker.cc
@@ -104,7 +104,8 @@ void Worker::AbortStep(int64 step_id) {
     // Delay a bit before aborting the step. This way, the root
     // cause may return first back to the client instead of this
     // cancellation generated abort error.
-    rendez->StartAbort(errors::Aborted("Step ", step_id));
+    rendez->StartAbort(errors::Aborted("[CHILD] Step ", step_id,
+                                       " cancelled.  Cancelling rendezvous."));
     rendez->Unref();
   });
 }
-- 
GitLab


From e36c15234b3f1f86c987051962d5d126ab67f18e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 15:52:56 -0800
Subject: [PATCH 554/873] Fix graph creation benchmark: Add a single sink node
 to the test graph. Otherwise a lot of time is spent
 FixupSourceAndSinkEdges().

Run on ************** (72 X 2993 MHz CPUs); 2018-12-13T14:28:47.680634494-08:00
CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB
Benchmark                 Time(ns)        CPU(ns)     Iterations
----------------------------------------------------------------
BM_GraphCreation/10/2        66540          86041           8196
BM_GraphCreation/64/2       136484         161598           4272
BM_GraphCreation/512/2      784516         818832            858
BM_GraphCreation/4k/2      7701701        7744640             89
BM_GraphCreation/32k/2    88439608       88672953              8
BM_GraphCreation/10/4        84779         105101           6644
BM_GraphCreation/64/4       177022         204266           3442
BM_GraphCreation/512/4     1089580        1123798            621
BM_GraphCreation/4k/4     10186857       10253981             68
BM_GraphCreation/32k/4   115389099      115635234              6
BM_GraphCreation/10/8       128583         152827           4611
BM_GraphCreation/64/8       287883         318163           2186
BM_GraphCreation/512/8     1957398        2007351            354
BM_GraphCreation/4k/8     17433017       17517949             40
BM_GraphCreation/32k/8   200025621      200436031              3
BM_GraphCreation/10/16      212706         240579           2904
BM_GraphCreation/64/16      477643         509846           1000
BM_GraphCreation/512/16    3553689        3604331            192
BM_GraphCreation/4k/16    32329683       32445653             21
BM_GraphCreation/32k/16  350925702      351627119              2
PiperOrigin-RevId: 225450999
---
 tensorflow/core/graph/graph_test.cc | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/graph/graph_test.cc b/tensorflow/core/graph/graph_test.cc
index e7762fd414..602578a83a 100644
--- a/tensorflow/core/graph/graph_test.cc
+++ b/tensorflow/core/graph/graph_test.cc
@@ -661,6 +661,10 @@ TEST_F(GraphTest, BuildNodeNameIndex) {
 }
 
 REGISTER_OP("Input").Output("y: float");
+REGISTER_OP("Output")
+    .Input("x: N * float")
+    .Attr("N: int >= 1")
+    .Output("y: float");
 REGISTER_OP("In2Out1").Input("a: float").Input("b: float").Output("y: float");
 REGISTER_OP("In4Out1")
     .Input("a: float")
@@ -713,7 +717,14 @@ GraphDef CreateGraphDef(int num_nodes, int num_edges_per_node) {
     }
     s += strings::Printf("'in%04d' ] } ", rnd.Uniform(kNumInNodes));
   }
-
+  // Add a single sink node. Otherwise a lot of time is spent in
+  // FixupSourceAndSinkEdges().
+  s += strings::Printf("node { name: 'out' op: 'Output' input: [ ");
+  for (int op = 0; op < num_nodes - 1; op++) {
+    s += strings::Printf("'op%05d', ", op);
+  }
+  s += strings::Printf("'op%05d' ], attr: { key: 'N' value { i: %d } } } ",
+                       num_nodes - 1, num_nodes);
   GraphDef graph_def;
   CHECK(protobuf::TextFormat::ParseFromString(s, &graph_def));
   return graph_def;
-- 
GitLab


From 092a49a2bf181a3571a5b1994b6b9305313a0403 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 16:00:12 -0800
Subject: [PATCH 555/873] Small edits to receptive_field library comments. Some
 automatic code edits were also included for better polishing.

PiperOrigin-RevId: 225452146
---
 tensorflow/contrib/receptive_field/README.md  |  3 +-
 .../python/util/examples/compute_rf.py        |  5 +--
 .../python/util/examples/rf_benchmark.py      | 29 ++++++-------
 .../python/util/receptive_field.py            | 42 +++++++++----------
 4 files changed, 34 insertions(+), 45 deletions(-)

diff --git a/tensorflow/contrib/receptive_field/README.md b/tensorflow/contrib/receptive_field/README.md
index 79b015a916..d1c41e4c0a 100644
--- a/tensorflow/contrib/receptive_field/README.md
+++ b/tensorflow/contrib/receptive_field/README.md
@@ -185,5 +185,4 @@ Effective padding (vertical) = 1482
 
 ## Authors
 
-Andr&eacute; Araujo (github id: andrefaraujo) and Mark Sandler (github id:
-marksandler)
+Andr&eacute; Araujo (@andrefaraujo) and Mark Sandler (@marksandler)
diff --git a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py
index d6fdd12bbe..72f98ccc32 100644
--- a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py
+++ b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py
@@ -12,10 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Computes Receptive Field (RF) information given a graph protobuf.
-
-For an example of usage, see accompanying file compute_rf.sh
-"""
+"""Computes Receptive Field (RF) information given a graph protobuf."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py
index a298b4d490..325929a593 100644
--- a/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py
+++ b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py
@@ -16,8 +16,6 @@
 
 The receptive field (and related parameters) for the different models are
 printed to stdout, and may also optionally be written to a CSV file.
-
-For an example of usage, see rf_benchmark.sh
 """
 
 from __future__ import absolute_import
@@ -262,11 +260,11 @@ def _model_rf(graphdef,
       information will be computed.
     model_type: Type of model to be used, used only for printing purposes.
     csv_writer: A CSV writer for RF parameters, which is used if it is not None.
-    input_resolution: Input resolution to use when computing RF
-      parameters. This is important for the case where padding can only be
-      defined if the input resolution is known, which may happen if using SAME
-      padding. This is assumed the resolution for both height and width. If
-      None, we consider the resolution is unknown.
+    input_resolution: Input resolution to use when computing RF parameters. This
+      is important for the case where padding can only be defined if the input
+      resolution is known, which may happen if using SAME padding. This is
+      assumed the resolution for both height and width. If None, we consider the
+      resolution is unknown.
   """
   for desired_end_point_key in desired_end_point_keys:
     print('- %s:' % desired_end_point_key)
@@ -283,10 +281,10 @@ def _model_rf(graphdef,
       if (receptive_field_x == receptive_field_y) and (
           effective_stride_x == effective_stride_y) and (
               effective_padding_x == effective_padding_y):
-        print('Receptive field size = %5s, effective stride = %5s, effective '
-              'padding = %5s' % (str(receptive_field_x),
-                                 str(effective_stride_x),
-                                 str(effective_padding_x)))
+        print(
+            'Receptive field size = %5s, effective stride = %5s, effective '
+            'padding = %5s' % (str(receptive_field_x), str(effective_stride_x),
+                               str(effective_padding_x)))
       else:
         print('Receptive field size: horizontal = %5s, vertical = %5s. '
               'Effective stride: horizontal = %5s, vertical = %5s. Effective '
@@ -362,9 +360,8 @@ def _process_model_rf(model_type='resnet_v1_50',
       defined if the input resolution is known, which may happen if using SAME
       padding. The entries in the list are assumed the resolution for both
       height and width. If one of the elements in the list is None, we consider
-      it to mean that the resolution is unknown. If the list itself is None,
-      we use the default list [None, 224, 321].
-
+      it to mean that the resolution is unknown. If the list itself is None, we
+      use the default list [None, 224, 321].
   """
   # Process default value for this list.
   if input_resolutions is None:
@@ -477,8 +474,8 @@ def _mobilenet_v1_rf(csv_writer=None):
     csv_writer: A CSV writer for RF parameters, which is used if it is not None.
   """
   for model_type in _SUPPORTED_MOBILENETV1_VARIANTS:
-    with slim.arg_scope(
-        [slim.batch_norm, slim.dropout], is_training=False) as arg_sc:
+    with slim.arg_scope([slim.batch_norm, slim.dropout],
+                        is_training=False) as arg_sc:
       _process_model_rf(model_type, csv_writer, arg_sc)
 
 
diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field.py b/tensorflow/contrib/receptive_field/python/util/receptive_field.py
index b9bd2f0976..9127c772c7 100644
--- a/tensorflow/contrib/receptive_field/python/util/receptive_field.py
+++ b/tensorflow/contrib/receptive_field/python/util/receptive_field.py
@@ -12,12 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Functions to compute receptive field of a fully-convolutional network.
-
-Please refer to the following g3doc for detailed explanation on how this
-computation is performed, and why it is important:
-g3doc/photos/vision/features/delf/g3doc/rf_computation.md
-"""
+"""Functions to compute receptive field of a fully-convolutional network."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -96,8 +91,8 @@ class ReceptiveField(object):
     Args:
       y: An array of feature coordinates with shape `(..., d)`, where `d` is the
         number of dimensions of the coordinates.
-      axis: The dimensions for which to compute the input center coordinates.
-        If `None` (the default), compute the input center coordinates for all
+      axis: The dimensions for which to compute the input center coordinates. If
+        `None` (the default), compute the input center coordinates for all
         dimensions.
 
     Returns:
@@ -127,8 +122,8 @@ class ReceptiveField(object):
     Args:
       x: An array of input center coordinates with shape `(..., d)`, where `d`
         is the number of dimensions of the coordinates.
-      axis: The dimensions for which to compute the feature coordinates.
-        If `None` (the default), compute the feature coordinates for all
+      axis: The dimensions for which to compute the feature coordinates. If
+        `None` (the default), compute the feature coordinates for all
         dimensions.
 
     Returns:
@@ -274,14 +269,15 @@ def compute_receptive_field_from_graph_def(graph_def,
         continue
 
       # Get params for this layer.
-      (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
-       padding_y, _, _) = parse_layer_parameters.get_layer_params(
+      (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y,
+       _, _) = parse_layer_parameters.get_layer_params(
            node, name_to_node, node_info[node.name].input_size)
-      logging.vlog(3, "kernel_size_x = %s, kernel_size_y = %s, "
-                   "stride_x = %s, stride_y = %s, "
-                   "padding_x = %s, padding_y = %s, input size = %s" %
-                   (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
-                    padding_y, node_info[node.name].input_size))
+      logging.vlog(
+          3, "kernel_size_x = %s, kernel_size_y = %s, "
+          "stride_x = %s, stride_y = %s, "
+          "padding_x = %s, padding_y = %s, input size = %s" %
+          (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
+           padding_y, node_info[node.name].input_size))
       if padding_x is None or padding_y is None:
         undefined_padding = True
 
@@ -352,15 +348,15 @@ def compute_receptive_field_from_graph_def(graph_def,
               raise ValueError(
                   "Graph is not aligned since effective stride from different "
                   "paths is different in vertical direction")
-            if (rf_sizes_x[inp_name] - 1
-               ) / 2 - effective_paddings_x[inp_name] != (
-                   rf_size_input_x - 1) / 2 - effective_padding_input_x:
+            if (rf_sizes_x[inp_name] -
+                1) / 2 - effective_paddings_x[inp_name] != (
+                    rf_size_input_x - 1) / 2 - effective_padding_input_x:
               raise ValueError(
                   "Graph is not aligned since center shift from different "
                   "paths is different in horizontal direction")
-            if (rf_sizes_y[inp_name] - 1
-               ) / 2 - effective_paddings_y[inp_name] != (
-                   rf_size_input_y - 1) / 2 - effective_padding_input_y:
+            if (rf_sizes_y[inp_name] -
+                1) / 2 - effective_paddings_y[inp_name] != (
+                    rf_size_input_y - 1) / 2 - effective_padding_input_y:
               raise ValueError(
                   "Graph is not aligned since center shift from different "
                   "paths is different in vertical direction")
-- 
GitLab


From 47b5a7e3e1b1b8a122536a16a463f0cc5df8d9ea Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Thu, 13 Dec 2018 16:03:58 -0800
Subject: [PATCH 556/873] [TF:XLA] Bump open source abseil revision to
 389ec3f906f018661a5308458d623d01f96d7b23

PiperOrigin-RevId: 225452935
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index b9ae329e9f..646439a244 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -123,11 +123,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "c2f8a1a399994df49db348a4725933b12fc807909cee21d48e46f53a28e79d4b",
-        strip_prefix = "abseil-cpp-8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f",
+        sha256 = "ab499df1dc1ee5f9bf95f327adc22a7bd327ae5e7c023309cddccd0763ba1043",
+        strip_prefix = "abseil-cpp-389ec3f906f018661a5308458d623d01f96d7b23",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/389ec3f906f018661a5308458d623d01f96d7b23.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/389ec3f906f018661a5308458d623d01f96d7b23.tar.gz",
         ],
     )
 
-- 
GitLab


From a941c077df778eb2f9af6a652a42544df209ef52 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Thu, 13 Dec 2018 16:16:51 -0800
Subject: [PATCH 557/873] Allow TPUMirroredVariable to be a valid reduction
 destination

PiperOrigin-RevId: 225454867
---
 tensorflow/python/distribute/cross_device_ops.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py
index 6bb3639bf0..23349a965e 100644
--- a/tensorflow/python/distribute/cross_device_ops.py
+++ b/tensorflow/python/distribute/cross_device_ops.py
@@ -53,7 +53,8 @@ def validate_destinations(destinations):
   if not isinstance(
       destinations,
       (value_lib.DistributedValues, resource_variable_ops.ResourceVariable,
-       value_lib.AggregatingVariable, six.string_types, list, tuple)):
+       value_lib.AggregatingVariable, six.string_types, list, tuple,
+       value_lib.TPUMirroredVariable)):
     raise ValueError("destinations must be one of a `DistributedValues` object,"
                      " a tf.Variable object, a device string, a list or tuple "
                      "of device strings")
-- 
GitLab


From 63037a9519ef45cecfe6519f144cd4e0ce648d31 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Thu, 13 Dec 2018 16:20:11 -0800
Subject: [PATCH 558/873] Partial rollback: Log GPU and cuDNN version
 information.

PiperOrigin-RevId: 225455409
---
 tensorflow/stream_executor/BUILD              |  3 --
 .../stream_executor/stream_executor_pimpl.cc  | 28 +------------------
 2 files changed, 1 insertion(+), 30 deletions(-)

diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index c43efc799c..00c23b8d17 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -54,9 +54,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc_impl",
-        ":logging_proto_cc_impl",
         "//tensorflow/core:lib",
-        "//tensorflow/core:logger",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
@@ -73,7 +71,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc",
-        ":logging_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index 86bc4ab7d0..d1d0bd9bc2 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include <utility>
 
 #include "absl/strings/str_cat.h"
-#include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/stream_executor/blas.h"
 #include "tensorflow/stream_executor/fft.h"
@@ -34,7 +33,6 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
-#include "tensorflow/stream_executor/logging.pb.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/rng.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
@@ -219,31 +217,7 @@ StreamExecutor::~StreamExecutor() {
 port::Status StreamExecutor::Init(int device_ordinal,
                                   DeviceOptions device_options) {
   device_ordinal_ = device_ordinal;
-  TF_RETURN_IF_ERROR(
-      implementation_->Init(device_ordinal, std::move(device_options)));
-
-  if (platform_kind_ == PlatformKind::kCuda) {
-    CudaInfo info;
-
-    int cc_major, cc_minor;
-    GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor);
-    info.mutable_compute_capability()->set_major(cc_major);
-    info.mutable_compute_capability()->set_minor(cc_minor);
-
-    if (auto *dnn = AsDnn()) {
-      port::StatusOr<dnn::VersionInfo> version_or = dnn->GetVersion();
-      if (version_or.ok()) {
-        const auto &version = version_or.ValueOrDie();
-        info.mutable_cudnn_version()->set_major(version.major_version());
-        info.mutable_cudnn_version()->set_minor(version.minor_version());
-        info.mutable_cudnn_version()->set_patch(version.patch());
-      }
-    }
-
-    tensorflow::Logger::Singleton()->LogProto(info);
-  }
-
-  return port::Status::OK();
+  return implementation_->Init(device_ordinal, std::move(device_options));
 }
 
 port::Status StreamExecutor::Init() {
-- 
GitLab


From f5ef72147c433acda9b3d699d97f02de3431b08d Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Thu, 13 Dec 2018 16:23:09 -0800
Subject: [PATCH 559/873] Downgrade some @run_v1_only annotations

Some annotations were too coarse grained, whereas others were resolved
with recent fixes.

PiperOrigin-RevId: 225455888
---
 tensorflow/python/keras/callbacks_test.py     |  9 ++++-----
 .../python/keras/engine/sequential_test.py    |  1 -
 .../python/keras/engine/training_test.py      |  3 ---
 tensorflow/python/keras/integration_test.py   |  4 ----
 tensorflow/python/keras/layers/local_test.py  |  2 +-
 tensorflow/python/keras/layers/lstm_test.py   |  1 -
 .../python/keras/layers/normalization_test.py |  2 +-
 .../python/keras/layers/simplernn_test.py     |  1 -
 .../python/keras/layers/wrappers_test.py      |  4 ----
 .../python/keras/model_subclassing_test.py    |  2 --
 .../kernel_tests/checkpoint_ops_test.py       |  4 ++--
 .../python/kernel_tests/session_ops_test.py   | 20 +++++++++++++++----
 12 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 4a65ade33c..5f0567c663 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -403,7 +403,6 @@ class KerasCallbacksTest(test.TestCase):
           float(keras.backend.get_value(
               model.optimizer.lr)) - 0.01 / 4) < keras.backend.epsilon()
 
-  @test_util.run_v1_only('b/120545219')
   def test_ReduceLROnPlateau(self):
     with self.cached_session():
       np.random.seed(1337)
@@ -675,7 +674,7 @@ class KerasCallbacksTest(test.TestCase):
       self.assertEqual(len(loss), 1)
       self.assertEqual(loss[0], np.inf)
 
-  @test_util.run_v1_only('b/120545219')
+  @test_util.run_deprecated_v1
   def test_TensorBoard(self):
     np.random.seed(1337)
 
@@ -779,7 +778,7 @@ class KerasCallbacksTest(test.TestCase):
           data_generator(True), len(x_train), epochs=2, callbacks=cbks)
       assert os.path.exists(temp_dir)
 
-  @test_util.run_v1_only('b/120545219')
+  @test_util.run_deprecated_v1
   def test_TensorBoard_multi_input_output(self):
     np.random.seed(1337)
     tmpdir = self.get_temp_dir()
@@ -851,7 +850,7 @@ class KerasCallbacksTest(test.TestCase):
                           callbacks=callbacks_factory(histogram_freq=1))
       assert os.path.isdir(filepath)
 
-  @test_util.run_v1_only('b/120545219')
+  @test_util.run_deprecated_v1
   def test_Tensorboard_histogram_summaries_in_test_function(self):
 
     class FileWriterStub(object):
@@ -929,7 +928,7 @@ class KerasCallbacksTest(test.TestCase):
 
       self.assertAllEqual(tsb.writer.steps_seen, [0, 1, 2, 3, 4, 5])
 
-  @test_util.run_v1_only('b/120545219')
+  @test_util.run_deprecated_v1
   def test_Tensorboard_histogram_summaries_with_generator(self):
     np.random.seed(1337)
     tmpdir = self.get_temp_dir()
diff --git a/tensorflow/python/keras/engine/sequential_test.py b/tensorflow/python/keras/engine/sequential_test.py
index 10f69da061..30a41e39b7 100644
--- a/tensorflow/python/keras/engine/sequential_test.py
+++ b/tensorflow/python/keras/engine/sequential_test.py
@@ -226,7 +226,6 @@ class TestSequential(keras_parameterized.TestCase):
     inner_model.trainable = True
     self.assertEqual(len(model.trainable_weights), 4)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_sequential_update_disabling(self):
     val_a = np.random.random((10, 4))
     val_out = np.random.random((10, 4))
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 9d56eb261d..a39d433982 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -473,7 +473,6 @@ class TrainingTest(keras_parameterized.TestCase):
         metrics=['accuracy'],
         run_eagerly=testing_utils.should_run_eagerly())
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_that_trainable_disables_updates(self):
     val_a = np.random.random((10, 4))
     val_out = np.random.random((10, 4))
@@ -1286,7 +1285,6 @@ class LossMaskingTest(keras_parameterized.TestCase):
 
 class TestDynamicTrainability(keras_parameterized.TestCase):
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_trainable_warning(self):
     with self.cached_session():
       x = np.random.random((5, 3))
@@ -1300,7 +1298,6 @@ class TestDynamicTrainability(keras_parameterized.TestCase):
       model.train_on_batch(x, y)
       self.assertRaises(Warning)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_trainable_argument(self):
     with self.cached_session():
       x = np.random.random((5, 3))
diff --git a/tensorflow/python/keras/integration_test.py b/tensorflow/python/keras/integration_test.py
index 8d65f63aba..fbe3508f07 100644
--- a/tensorflow/python/keras/integration_test.py
+++ b/tensorflow/python/keras/integration_test.py
@@ -35,7 +35,6 @@ class KerasIntegrationTest(test.TestCase):
   def test_version(self):
     self.assertTrue(keras.__version__.endswith('-tf'))
 
-  @test_util.run_v1_only('b/120545219')
   def test_vector_classification_sequential(self):
     with self.cached_session():
       np.random.seed(1337)
@@ -168,7 +167,6 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
-  @test_util.run_v1_only('b/120545219')
   def test_video_classification_functional(self):
     with self.cached_session():
       np.random.seed(1337)
@@ -197,7 +195,6 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
-  @test_util.run_v1_only('b/120545219')
   def test_vector_classification_shared_sequential(self):
     # Test that Sequential models that feature internal updates
     # and internal losses can be shared.
@@ -232,7 +229,6 @@ class KerasIntegrationTest(test.TestCase):
                           verbose=2)
       self.assertGreater(history.history['val_acc'][-1], 0.7)
 
-  @test_util.run_v1_only('b/120545219')
   def test_vector_classification_shared_model(self):
     # Test that functional models that feature internal updates
     # and internal losses can be shared.
diff --git a/tensorflow/python/keras/layers/local_test.py b/tensorflow/python/keras/layers/local_test.py
index 6db5bf385e..e4f4d0a639 100644
--- a/tensorflow/python/keras/layers/local_test.py
+++ b/tensorflow/python/keras/layers/local_test.py
@@ -235,7 +235,7 @@ class LocallyConnected2DLayersTest(test.TestCase):
 
 class LocallyConnectedImplementationModeTest(test.TestCase):
 
-  @tf_test_util.run_v1_only('b/120545219')
+  @tf_test_util.run_deprecated_v1
   def test_locallyconnected_implementation(self):
     with self.cached_session():
       num_samples = 4
diff --git a/tensorflow/python/keras/layers/lstm_test.py b/tensorflow/python/keras/layers/lstm_test.py
index 5c0ad5cf71..b132d2ee8e 100644
--- a/tensorflow/python/keras/layers/lstm_test.py
+++ b/tensorflow/python/keras/layers/lstm_test.py
@@ -321,7 +321,6 @@ class LSTMLayerTest(keras_parameterized.TestCase):
 
 class LSTMLayerGraphOnlyTest(test.TestCase):
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_statefulness_LSTM(self):
     num_samples = 2
     timesteps = 3
diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py
index 780e02cf06..f81ddcecb4 100644
--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@@ -253,7 +253,6 @@ def _run_batchnorm_correctness_test(layer, dtype='float32', fused=False):
   np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
 
 
-@tf_test_util.run_v1_only('b/120545219')
 class NormalizationLayersGraphModeOnlyTest(test.TestCase):
 
   def test_shared_batchnorm(self):
@@ -328,6 +327,7 @@ class NormalizationLayersGraphModeOnlyTest(test.TestCase):
       x2 = model.predict(val_a)
       self.assertAllClose(x1, x2, atol=1e-7)
 
+  @tf_test_util.run_deprecated_v1
   def test_batchnorm_trainable(self):
     """Tests that batchnorm layer is trainable when learning phase is enabled.
 
diff --git a/tensorflow/python/keras/layers/simplernn_test.py b/tensorflow/python/keras/layers/simplernn_test.py
index 58f2f9a913..b5063850f0 100644
--- a/tensorflow/python/keras/layers/simplernn_test.py
+++ b/tensorflow/python/keras/layers/simplernn_test.py
@@ -99,7 +99,6 @@ class SimpleRNNLayerTest(keras_parameterized.TestCase):
     self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
     self.assertEqual(layer.cell.bias.constraint, b_constraint)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_with_masking_layer_SimpleRNN(self):
     layer_class = keras.layers.SimpleRNN
     inputs = np.random.random((2, 3, 4))
diff --git a/tensorflow/python/keras/layers/wrappers_test.py b/tensorflow/python/keras/layers/wrappers_test.py
index 727f33dadc..46d5487b2c 100644
--- a/tensorflow/python/keras/layers/wrappers_test.py
+++ b/tensorflow/python/keras/layers/wrappers_test.py
@@ -165,7 +165,6 @@ class TimeDistributedTest(test.TestCase):
       y = model.predict(np.random.random((10, 3, 2)))
       self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_batchnorm(self):
     with self.cached_session():
       # test that wrapped BN updates still work.
@@ -188,7 +187,6 @@ class TimeDistributedTest(test.TestCase):
       # Verify input_map has one mapping from inputs to reshaped inputs.
       self.assertEqual(len(td._input_map.keys()), 1)
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_trainable(self):
     # test layers that need learning_phase to be set
     x = keras.layers.Input(shape=(3, 2))
@@ -203,7 +201,6 @@ class TimeDistributedTest(test.TestCase):
     assert len(layer.updates) == 2
     assert len(layer.trainable_weights) == 2
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(self):
     with self.cached_session():
       # test with unspecified shape and Embeddings with mask_zero
@@ -236,7 +233,6 @@ class TimeDistributedTest(test.TestCase):
         self.assertAllEqual(mask_outputs_val[i], ref_mask_val[i])
       self.assertIs(mask_outputs[-1], None)  # final layer
 
-  @tf_test_util.run_v1_only('b/120545219')
   def test_TimeDistributed_with_masking_layer(self):
     with self.cached_session():
       # test with Masking layer
diff --git a/tensorflow/python/keras/model_subclassing_test.py b/tensorflow/python/keras/model_subclassing_test.py
index 553c7fb009..620275e50f 100644
--- a/tensorflow/python/keras/model_subclassing_test.py
+++ b/tensorflow/python/keras/model_subclassing_test.py
@@ -187,7 +187,6 @@ def get_nested_model_3(input_dim, num_classes):
 
 
 @test_util.run_all_in_graph_and_eager_modes
-@test_util.run_v1_only('b/120545219')
 class ModelSubclassingTest(test.TestCase):
 
   def test_custom_build(self):
@@ -916,7 +915,6 @@ class ModelSubclassingTest(test.TestCase):
       self.assertEqual(1, len(model.get_updates_for(x)))
 
 
-@test_util.run_v1_only('b/120545219')
 class GraphSpecificModelSubclassingTests(test.TestCase):
 
   @test_util.run_deprecated_v1
diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
index dd5ac1f763..a674618568 100644
--- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py
+++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py
@@ -230,7 +230,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
           np.reshape(initializing_values, (num_rows, num_cols)),
           self.evaluate(remapped_matrix))
 
-  @test_util.run_v1_only('b/120545219')
+  @test_util.run_deprecated_v1
   def test_load_and_remap_invalid_remapping(self):
     """Tests that errors are raised when an ID maps to multiple new IDs.
 
@@ -262,7 +262,7 @@ class LoadAndRemapMatrixTest(test.TestCase):
     with self.cached_session(), self.assertRaises(errors.UnimplementedError):
       self.evaluate(remapped_matrix)
 
-  @test_util.run_v1_only('b/120545219')
+  @test_util.run_deprecated_v1
   def test_load_and_remap_incorrect_initializing_values(self):
     """Tests that errors are raised with incorrect number of init values."""
     remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix(
diff --git a/tensorflow/python/kernel_tests/session_ops_test.py b/tensorflow/python/kernel_tests/session_ops_test.py
index bc5d8e8151..7d42227840 100644
--- a/tensorflow/python/kernel_tests/session_ops_test.py
+++ b/tensorflow/python/kernel_tests/session_ops_test.py
@@ -29,9 +29,9 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
-@test_util.run_v1_only("b/120545219")
 class SessionOpsTest(test.TestCase):
 
+  @test_util.run_deprecated_v1
   def testHandleBasic(self):
     with self.cached_session() as sess:
       # Return a handle.
@@ -46,6 +46,7 @@ class SessionOpsTest(test.TestCase):
       y = math_ops.multiply(x, 10)
       self.assertEqual(500, sess.run(y, feed_dict={f: h.handle}))
 
+  @test_util.run_deprecated_v1
   def testHandleEval(self):
     with self.cached_session() as sess:
       # Return a handle.
@@ -58,6 +59,7 @@ class SessionOpsTest(test.TestCase):
       # Get the tensor from its handle.
       self.assertEqual(50, h.eval())
 
+  @test_util.run_deprecated_v1
   def testHandleAndValue(self):
     with self.cached_session() as sess:
       # Return a handle and a value.
@@ -71,6 +73,7 @@ class SessionOpsTest(test.TestCase):
       self.assertEqual(50, h.eval())
       self.assertEqual(500, v)
 
+  @test_util.run_deprecated_v1
   def testHandleCond(self):
     with self.cached_session() as sess:
       # Return a handle and a value
@@ -91,6 +94,7 @@ class SessionOpsTest(test.TestCase):
 
       self.assertEqual(5000, result)
 
+  @test_util.run_deprecated_v1
   def testHandleForLoop(self):
     with self.cached_session() as sess:
       # Initialize a handle.
@@ -108,6 +112,7 @@ class SessionOpsTest(test.TestCase):
 
       self.assertEqual(100, h.eval())
 
+  @test_util.run_deprecated_v1
   def testHandleWhileLoop(self):
     with self.cached_session() as sess:
       # Initialize a handle.
@@ -128,6 +133,7 @@ class SessionOpsTest(test.TestCase):
 
       self.assertEqual(101, h.eval())
 
+  @test_util.run_deprecated_v1
   def testHandleMover(self):
     with self.cached_session() as sess:
       # Return a handle.
@@ -149,6 +155,7 @@ class SessionOpsTest(test.TestCase):
         h = self.evaluate(h)
         self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
 
+  @test_util.run_deprecated_v1
   def testHandleDelete(self):
     with self.cached_session() as sess:
       # Return a handle.
@@ -158,6 +165,7 @@ class SessionOpsTest(test.TestCase):
       h = session_ops.get_session_handle(c)
       self.evaluate(h).delete()
 
+  @test_util.run_deprecated_v1
   def testHandleDeleteRaw(self):
     with self.cached_session() as sess:
       # Return a handle.
@@ -172,6 +180,7 @@ class SessionOpsTest(test.TestCase):
       f, x = session_ops.delete_session_tensor(raw_h)
       sess.run(x, feed_dict={f: raw_h})
 
+  @test_util.run_deprecated_v1
   def testMultiDevices(self):
     with self.cached_session() as sess:
       with ops.device(test.gpu_device_name()):
@@ -190,6 +199,7 @@ class SessionOpsTest(test.TestCase):
                      b_p: b_handle.handle})
       self.assertEqual(3.0, c_handle.eval())
 
+  @test_util.run_deprecated_v1
   def testHandleGC(self):
     with self.cached_session() as sess:
       # initial values live on CPU
@@ -214,6 +224,7 @@ class SessionOpsTest(test.TestCase):
             feed_dict={add_h1: one_handle.handle,
                        add_h2: x_handle.handle})
 
+  @test_util.run_deprecated_v1
   def testHandlePlacement(self):
     with self.cached_session() as sess:
       a = constant_op.constant(1.0)
@@ -234,7 +245,7 @@ class SessionOpsTest(test.TestCase):
                      b_p: b_handle.handle})
       self.assertEqual(3.0, c_handle.eval())
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testFeedOneHandleDirectly(self):
     with self.cached_session() as sess:
       a = constant_op.constant(10.0)
@@ -246,7 +257,7 @@ class SessionOpsTest(test.TestCase):
 
       self.assertAllClose(2500.0, sess.run(d, feed_dict={c: h_c}))
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testDirectHandleFeedOverlappingWithFetches(self):
     with self.cached_session() as sess:
       a = constant_op.constant(10.0)
@@ -273,6 +284,7 @@ class SessionOpsTest(test.TestCase):
       self.assertAllClose(50.0, c_val)
       self.assertAllClose(50.0, d_val)
 
+  @test_util.run_deprecated_v1
   def testFeedTwoHandlesDirectly(self):
     with self.cached_session() as sess:
       a = constant_op.constant(10.0)
@@ -287,7 +299,7 @@ class SessionOpsTest(test.TestCase):
       self.assertAllClose(48.0, sess.run(e, feed_dict={c: h_c, d: h_d}))
       self.assertAllClose(-48.0, sess.run(e, feed_dict={c: h_d, d: h_c}))
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testFeedHandleToVariableDirectly(self):
     with self.cached_session() as sess:
       a = variables.Variable(12.0)
-- 
GitLab


From e8d6281e7ef34af009bd2be60c42ae004fe57411 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 16:32:06 -0800
Subject: [PATCH 560/873] [Error improvement] We now put an attribute for
 keeping track of the original source nodes. We have also changed many
 optimizers to correctly transmit the original node values.

PiperOrigin-RevId: 225457141
---
 .../jit/encapsulate_subgraphs_pass.cc         | 22 ++++--
 .../jit/encapsulate_xla_computations_pass.cc  |  1 +
 .../compiler/jit/partially_decluster_pass.cc  |  1 +
 .../compiler/tf2xla/functionalize_cond.cc     |  3 +-
 tensorflow/compiler/tf2xla/tf2xla_util.cc     |  6 +-
 .../common_runtime/accumulate_n_optimizer.cc  |  3 +-
 tensorflow/core/common_runtime/lower_if_op.cc | 56 +++++++++-------
 .../core/common_runtime/lower_while_op.cc     | 67 ++++++++++---------
 .../parallel_concat_optimizer.cc              |  3 +-
 tensorflow/core/framework/function.cc         |  1 +
 .../core/framework/graph_to_functiondef.cc    |  1 +
 .../framework/graph_to_functiondef_test.cc    | 15 ++++-
 tensorflow/core/framework/node_def.proto      | 14 ++++
 tensorflow/core/framework/node_def_builder.cc | 11 ++-
 tensorflow/core/framework/node_def_builder.h  |  6 +-
 tensorflow/core/framework/node_def_util.cc    | 43 +++++++++++-
 tensorflow/core/framework/node_def_util.h     |  7 ++
 tensorflow/core/graph/graph.cc                | 25 +++++++
 tensorflow/core/graph/graph.h                 | 13 ++++
 tensorflow/core/graph/graph_partition.cc      | 16 +++--
 tensorflow/core/graph/node_builder.cc         |  5 +-
 tensorflow/core/graph/node_builder.h          |  3 +-
 tensorflow/core/graph/optimizer_cse.cc        |  2 +
 tensorflow/core/kernels/constant_op.cc        |  3 +-
 ...w.-node-def.-experimental-debug-info.pbtxt | 12 ++++
 .../api/golden/v1/tensorflow.-node-def.pbtxt  | 16 +++++
 26 files changed, 272 insertions(+), 83 deletions(-)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.-node-def.-experimental-debug-info.pbtxt

diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
index f478832781..03aba97bbe 100644
--- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc
@@ -779,7 +779,8 @@ Status Encapsulator::Subgraph::RecordArg(
   if (inserted) {
     NodeDef arg_def;
     NodeDefBuilder builder(
-        absl::StrCat(src_node->name(), "_", src_slot, "_arg"), kArgOp);
+        absl::StrCat(src_node->name(), "_", src_slot, "_arg"), kArgOp,
+        NodeDebugInfo(src_node->def()));
     DataType dtype = edge->dst()->input_type(edge->dst_input());
     builder.Attr("T", dtype);
     builder.Attr("index", arg_index);
@@ -814,7 +815,8 @@ Status Encapsulator::Subgraph::RecordResult(
   if (inserted) {
     NodeDef ret_def;
     NodeDefBuilder builder(
-        absl::StrCat(src_node->name(), "_", src_slot, "_retval"), kRetValOp);
+        absl::StrCat(src_node->name(), "_", src_slot, "_retval"), kRetValOp,
+        NodeDebugInfo(src_node->def()));
     DataType dtype = src_node->output_type(src_slot);
     builder.Attr("T", dtype);
     builder.Attr("index", ret_index);
@@ -974,6 +976,7 @@ Status Encapsulator::Subgraph::AddHostComputes(
       }
 
       NodeDef host_compute_def;
+      // TODO(shikharagarwal): What source node should we use for errors?
       NodeDefBuilder builder(absl::StrCat("outside_compilation_",
                                           oc_subgraph_name, "_host_compute"),
                              kHostComputeOp);
@@ -1040,6 +1043,7 @@ Status Encapsulator::Subgraph::MakeSequencingNode(const string& subgraph_name,
                                                   Graph* graph_out) {
   if (sequencer_ == nullptr) {
     NodeDef seq_def;
+    // TODO(shikharagarwal): What source node should we use for errors?
     NodeDefBuilder builder(absl::StrCat(subgraph_name, "_sequencer"), "NoOp");
     builder.Attr(kXlaHostTransferSequencerAttr, subgraph_name);
     builder.Device(device_);
@@ -1214,7 +1218,8 @@ Status Encapsulator::Subgraph::AddHostComputeKeyPlaceholder(
   GraphDefBuilder::Options options(graph_out, /*status=*/nullptr);
   NodeDef key_def;
   NodeDefBuilder builder(
-      absl::StrCat(call_node_def_.name(), "_key_placeholder"), "Placeholder");
+      absl::StrCat(call_node_def_.name(), "_key_placeholder"), "Placeholder",
+      NodeDebugInfo(call_node_def_));
   builder.Attr("dtype", DT_STRING);
   builder.Attr("shape", shape_proto);
   builder.Attr("_host_compute_call_node", call_node_def_.name());
@@ -1248,6 +1253,7 @@ Status Encapsulator::Subgraph::AddRecvAtHostNode(
   }
 
   NodeDef recv_def;
+  // TODO(shikharagarwal): What source node should we use for errors?
   NodeDefBuilder builder(absl::StrCat("outside_compilation_", subgraph_name,
                                       "_", oc_subgraph_name, "_recv"),
                          kRecvAtHostOp);
@@ -1303,6 +1309,7 @@ Status Encapsulator::Subgraph::AddSendFromHostNode(
   }
 
   NodeDef send_def;
+  // TODO(shikharagarwal): What source node should we use for errors?
   NodeDefBuilder builder(absl::StrCat("outside_compilation_", subgraph_name,
                                       "_", oc_subgraph_name, "_send"),
                          kSendFromHostOp);
@@ -1833,8 +1840,9 @@ Node* AddDummyShapedNode(const Node* src_node, int src_port,
   // Add any Enter nodes required to bring the constant to the correct control
   // flow frame.
   while (!control_flow_info[src_node->id()].frame_name.empty()) {
+    NodeDebugInfo debug_info(*src_node);
     NodeBuilder enter_builder(options.GetNameForOp("Enter"), "Enter",
-                              options.op_registry());
+                              options.op_registry(), &debug_info);
     enter_builder.Attr("frame_name",
                        control_flow_info[src_node->id()].frame_name);
     enter_builder.Attr("is_constant", true);
@@ -2018,7 +2026,8 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend(
             return errors::InvalidArgument(
                 "Shape inference is not possible for outside_compilation "
                 "SendFromHost node ",
-                send_node->name(), " because shape of node ", n->name(),
+                send_node->name(), " because shape of node ",
+                FormatNodeForError(*n),
                 " will not be known at compilation time.");
           }
         }
@@ -2047,8 +2056,7 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend(
         return errors::Internal(
             "Internal assumption failed while rewriting an outside_compilation "
             "cluster that contains a while loop. Logic assumes back-edge is to "
-            "port 1 of a 2-input "
-            "Merge node.");
+            "port 1 of a 2-input Merge node.");
       }
       // Connect the existing edge to both inputs of the Merge node so that the
       // graph will be well-formed.
diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
index d334100aa4..ec745cdbb7 100644
--- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
+++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc
@@ -297,6 +297,7 @@ Status RewriteSubgraph(const std::vector<OutputTensor>& arg_source_tensors,
 
     NodeDef def;
     def.set_name(launch->name());
+    MergeDebugInfo(NodeDebugInfo(launch->def()), &def);
 
     // Target the XLA CPU/GPU backends.
     VLOG(2) << "Replacing with XlaLaunch";
diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc
index 42ea3926e1..e1fd2aaee2 100644
--- a/tensorflow/compiler/jit/partially_decluster_pass.cc
+++ b/tensorflow/compiler/jit/partially_decluster_pass.cc
@@ -120,6 +120,7 @@ Status PartiallyDeclusterNode(Graph* graph, Node* n) {
 
   NodeDef ndef = n->def();
   ndef.set_name(absl::StrCat(n->name(), "/declustered"));
+  MergeDebugInfo(NodeDebugInfo(n->def()), &ndef);
   RemoveFromXlaCluster(&ndef);
   Status s;
   Node* cloned_node = graph->AddNode(ndef, &s);
diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc
index c693e42d26..7ae96e1d48 100644
--- a/tensorflow/compiler/tf2xla/functionalize_cond.cc
+++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc
@@ -640,7 +640,8 @@ Status Conditional::ExtractBodies(Graph* graph) {
 Status Conditional::BuildIfNode(Graph* graph,
                                 FunctionLibraryDefinition* library) {
   VLOG(2) << "Build cond function for " << name();
-  NodeDefBuilder builder(name(), "If", library);
+  NodeDebugInfo debug_info((*merges_.begin())->def());
+  NodeDefBuilder builder(name(), "If", library, &debug_info);
   const string branch_name[] = {"else_branch", "then_branch"};
   for (auto branch : {BranchType::kElseBranch, BranchType::kThenBranch}) {
     int branch_index = static_cast<int>(branch);
diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc
index 6cc8ae3afd..18d87727c5 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_util.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc
@@ -364,6 +364,7 @@ Status AddPlaceholdersForFeeds(
       GraphDef gd;
       *gd.mutable_versions() = graph_def->versions();
       *gd.add_node() = *existing;
+      MergeDebugInfo(NodeDebugInfo(*existing), gd.mutable_node(0));
       TF_RETURN_IF_ERROR(
           AddDefaultAttrsToGraphDef(&gd, *op_registry, 0 /*node_offset*/));
 
@@ -390,6 +391,7 @@ Status AddPlaceholdersForFeeds(
   // in this code.
   for (auto it = placeholder_info.begin(); it != placeholder_info.end(); ++it) {
     const PlaceholderInfo& info = it->second;
+    // TODO(shikharagarwal): Add original node information.
     NodeDef* d = graph_def->add_node();
     d->set_name(info.placeholder_name);
     d->set_op("PlaceholderV2");
@@ -608,7 +610,9 @@ Status RewriteAssociatedFunction(
   switch (associated_function.type()) {
     case AssociatedFunctionInfo::kFunctionCallNode: {
       // Change this node to call the new function.
-      NodeDefBuilder builder(node->name(), rewritten_function_name, fld);
+      NodeDebugInfo debug_info(*node);
+      NodeDefBuilder builder(node->name(), rewritten_function_name, fld,
+                             &debug_info);
       for (auto attr : node->attrs()) {
         builder.Attr(attr.first, attr.second);
       }
diff --git a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc
index c4bc1a684c..1fc077af92 100644
--- a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc
+++ b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc
@@ -75,7 +75,8 @@ class AccumulateNV2RemovePass : public GraphOptimizationPass {
   Status rewriteNode(Node* n, Graph* g) {
     AttrSlice n_attrs = n->attrs();
     auto base_make_node = [n, &n_attrs](const string& op, const string& name) {
-      NodeBuilder node_builder(name, op);
+      NodeDebugInfo debug_info(*n);
+      NodeBuilder node_builder(name, op, OpRegistry::Global(), &debug_info);
 
       // The pieces of AccumulateNV2 should all be on the same node.
       node_builder.Device(n->requested_device());
diff --git a/tensorflow/core/common_runtime/lower_if_op.cc b/tensorflow/core/common_runtime/lower_if_op.cc
index 9738006f5c..241c403087 100644
--- a/tensorflow/core/common_runtime/lower_if_op.cc
+++ b/tensorflow/core/common_runtime/lower_if_op.cc
@@ -89,6 +89,7 @@ class CondBuilder {
   const FunctionLibraryDefinition& flib_;
   string name_;
 
+  NodeDebugInfo debug_info_;
   NodeBuilder then_call_builder_;
   NodeBuilder else_call_builder_;
 };
@@ -100,8 +101,11 @@ CondBuilder::CondBuilder(Node* if_op, const string& then_fn_name,
       graph_(graph),
       flib_(flib),
       name_(if_op->name()),
-      then_call_builder_(NewName("then"), then_fn_name, graph->op_registry()),
-      else_call_builder_(NewName("else"), else_fn_name, graph->op_registry()) {
+      debug_info_(*if_op_),
+      then_call_builder_(NewName("then"), then_fn_name, graph->op_registry(),
+                         &debug_info_),
+      else_call_builder_(NewName("else"), else_fn_name, graph->op_registry(),
+                         &debug_info_) {
   TF_CHECK_OK(if_op_->input_tensor(0, &pred_));
   then_call_builder_.Device(if_op_->requested_device());
   else_call_builder_.Device(if_op_->requested_device());
@@ -111,23 +115,23 @@ Status CondBuilder::CreatePivotNodes() {
   // Construct the basic cond body (consisting of feeding in the predicate to
   // create pivot nodes).
   Node* switch_pred;
-  TF_RETURN_IF_ERROR(
-      NodeBuilder(NewName("switch_pred"), "Switch", graph_->op_registry())
-          .Input(NodeOut(pred_))
-          .Input(NodeOut(pred_))
-          .Device(if_op_->requested_device())
-          .Finalize(graph_, &switch_pred));
+  TF_RETURN_IF_ERROR(NodeBuilder(NewName("switch_pred"), "Switch",
+                                 graph_->op_registry(), &debug_info_)
+                         .Input(NodeOut(pred_))
+                         .Input(NodeOut(pred_))
+                         .Device(if_op_->requested_device())
+                         .Finalize(graph_, &switch_pred));
   control_predecessor_ = switch_pred;
-  TF_RETURN_IF_ERROR(
-      NodeBuilder(NewName("pivot_f"), "Identity", graph_->op_registry())
-          .Input(switch_pred, kElseBranch)
-          .Device(if_op_->requested_device())
-          .Finalize(graph_, &pivot_f_));
-  TF_RETURN_IF_ERROR(
-      NodeBuilder(NewName("pivot_t"), "Identity", graph_->op_registry())
-          .Input(switch_pred, kThenBranch)
-          .Device(if_op_->requested_device())
-          .Finalize(graph_, &pivot_t_));
+  TF_RETURN_IF_ERROR(NodeBuilder(NewName("pivot_f"), "Identity",
+                                 graph_->op_registry(), &debug_info_)
+                         .Input(switch_pred, kElseBranch)
+                         .Device(if_op_->requested_device())
+                         .Finalize(graph_, &pivot_f_));
+  TF_RETURN_IF_ERROR(NodeBuilder(NewName("pivot_t"), "Identity",
+                                 graph_->op_registry(), &debug_info_)
+                         .Input(switch_pred, kThenBranch)
+                         .Device(if_op_->requested_device())
+                         .Finalize(graph_, &pivot_t_));
   return Status::OK();
 }
 
@@ -137,12 +141,13 @@ string CondBuilder::NewName(const string& infix) {
 
 Status CondBuilder::AddInput(Node* src, int src_output) {
   Node* input;
-  TF_RETURN_IF_ERROR(
-      NodeBuilder(NewName(src->name()), "Switch", graph_->op_registry())
-          .Input(src, src_output)
-          .Input(pred_)
-          .Device(if_op_->requested_device())
-          .Finalize(graph_, &input));
+  NodeDebugInfo debug_info(*src);
+  TF_RETURN_IF_ERROR(NodeBuilder(NewName(src->name()), "Switch",
+                                 graph_->op_registry(), &debug_info)
+                         .Input(src, src_output)
+                         .Input(pred_)
+                         .Device(if_op_->requested_device())
+                         .Finalize(graph_, &input));
   then_call_builder_.Input(input, kThenBranch);
   else_call_builder_.Input(input, kElseBranch);
   return Status::OK();
@@ -178,7 +183,8 @@ Status CondBuilder::AddOutputs() {
   outputs_.resize(merges.size());
   for (int i = 0; i < then_call_node_->num_outputs(); ++i) {
     TF_RETURN_IF_ERROR(
-        NodeBuilder(graph_->NewName("merge"), "Merge", graph_->op_registry())
+        NodeBuilder(graph_->NewName("merge"), "Merge", graph_->op_registry(),
+                    &debug_info_)
             .Input({NodeOut(then_call_node_, i), NodeOut(else_call_node_, i)})
             .Device(if_op_->requested_device())
             .Finalize(graph_, &merges[i]));
diff --git a/tensorflow/core/common_runtime/lower_while_op.cc b/tensorflow/core/common_runtime/lower_while_op.cc
index 6f9921a796..8b68c31a72 100644
--- a/tensorflow/core/common_runtime/lower_while_op.cc
+++ b/tensorflow/core/common_runtime/lower_while_op.cc
@@ -133,6 +133,7 @@ class LowerWhileHelper {
   // Name of the `while_op_`.
   string name_;
 
+  NodeDebugInfo debug_info_;
   NodeBuilder cond_call_builder_;
   NodeBuilder body_call_builder_;
 
@@ -152,8 +153,11 @@ LowerWhileHelper::LowerWhileHelper(Node* while_op, const string& cond_fn_name,
       graph_(graph),
       flib_(flib),
       name_(while_op->name()),
-      cond_call_builder_(NewName("cond"), cond_fn_name, graph->op_registry()),
-      body_call_builder_(NewName("body"), body_fn_name, graph->op_registry()),
+      debug_info_(*while_op_),
+      cond_call_builder_(NewName("cond"), cond_fn_name, graph->op_registry(),
+                         &debug_info_),
+      body_call_builder_(NewName("body"), body_fn_name, graph->op_registry(),
+                         &debug_info_),
       num_loop_inputs_(while_op_->num_inputs()) {
   // We intentionally `resize` instead of `reserve` space in `enter_nodes_`
   // because we need to set it's elements out of order in `CreateEnterNodes`.
@@ -186,11 +190,11 @@ Status LowerWhileHelper::CreateEnterNodes() {
   TF_RETURN_IF_ERROR(while_op_->input_edges(&edges));
   for (const Edge* edge : edges) {
     Node* enter_node;
-    TF_RETURN_IF_ERROR(
-        NodeBuilder(NewName("enter"), "Enter", graph_->op_registry())
-            .Input(NodeOut(edge->src(), edge->src_output()))
-            .Attr("frame_name", name_)
-            .Finalize(graph_, &enter_node));
+    TF_RETURN_IF_ERROR(NodeBuilder(NewName("enter"), "Enter",
+                                   graph_->op_registry(), &debug_info_)
+                           .Input(NodeOut(edge->src(), edge->src_output()))
+                           .Attr("frame_name", name_)
+                           .Finalize(graph_, &enter_node));
     enter_nodes_[edge->dst_input()] = enter_node;
   }
   // Create a NoOp node that takes incoming control inputs of the original While
@@ -203,10 +207,10 @@ Status LowerWhileHelper::CreateEnterNodes() {
   }
   if (!control_inputs.empty()) {
     Node* incoming_control_node;
-    TF_RETURN_IF_ERROR(
-        NodeBuilder(NewName("LoopControlInputs"), "NoOp", graph_->op_registry())
-            .ControlInputs(control_inputs)
-            .Finalize(graph_, &incoming_control_node));
+    TF_RETURN_IF_ERROR(NodeBuilder(NewName("LoopControlInputs"), "NoOp",
+                                   graph_->op_registry(), &debug_info_)
+                           .ControlInputs(control_inputs)
+                           .Finalize(graph_, &incoming_control_node));
     for (Node* n : enter_nodes_) {
       graph_->AddControlEdge(incoming_control_node, n);
     }
@@ -218,7 +222,8 @@ Status LowerWhileHelper::CreateMergeNodes() {
   for (Node* enter_node : enter_nodes_) {
     Node* merge_node;
     TF_RETURN_IF_ERROR(
-        NodeBuilder(NewName("merge"), "Merge", graph_->op_registry())
+        NodeBuilder(NewName("merge"), "Merge", graph_->op_registry(),
+                    &debug_info_)
             .Input({NodeOut(enter_node, 0), NodeOut(enter_node, 0)})
             .Finalize(graph_, &merge_node));
     merge_nodes_.emplace_back(merge_node);
@@ -235,10 +240,10 @@ Status LowerWhileHelper::CreateCondFuncCallNode() {
   // are in the same frame as the rest of the function, otherwise
   // `BuildControlFlowInfo` throws an error.
   graph_->AddControlEdge(merge_nodes_[0], cond_call_node_);
-  TF_RETURN_IF_ERROR(
-      NodeBuilder(NewName("LoopCond"), "LoopCond", graph_->op_registry())
-          .Input(NodeOut(cond_call_node_, 0))
-          .Finalize(graph_, &loop_cond_node_));
+  TF_RETURN_IF_ERROR(NodeBuilder(NewName("LoopCond"), "LoopCond",
+                                 graph_->op_registry(), &debug_info_)
+                         .Input(NodeOut(cond_call_node_, 0))
+                         .Finalize(graph_, &loop_cond_node_));
   return Status::OK();
 }
 
@@ -255,11 +260,11 @@ Status LowerWhileHelper::CreateSwitchNodes() {
     if (IsRefType(merge_nodes_[i]->output_type(0))) {
       op_type = "RefSwitch";
     }
-    TF_RETURN_IF_ERROR(
-        NodeBuilder(NewName(op_name), op_type, graph_->op_registry())
-            .Input(NodeOut(merge_nodes_[i], 0))
-            .Input(NodeOut(loop_cond_node_, 0))
-            .Finalize(graph_, &switch_node));
+    TF_RETURN_IF_ERROR(NodeBuilder(NewName(op_name), op_type,
+                                   graph_->op_registry(), &debug_info_)
+                           .Input(NodeOut(merge_nodes_[i], 0))
+                           .Input(NodeOut(loop_cond_node_, 0))
+                           .Finalize(graph_, &switch_node));
     switch_nodes_.emplace_back(switch_node);
   }
   return Status::OK();
@@ -282,10 +287,10 @@ Status LowerWhileHelper::CreateBodyFuncCallNode() {
   if (IsRefType(switch_nodes_[0]->output_type(1))) {
     op_type = "RefIdentity";
   }
-  TF_RETURN_IF_ERROR(
-      NodeBuilder(NewName("loop_body_control"), op_type, graph_->op_registry())
-          .Input(NodeOut(switch_nodes_[0], 1))
-          .Finalize(graph_, &body_control_node_));
+  TF_RETURN_IF_ERROR(NodeBuilder(NewName("loop_body_control"), op_type,
+                                 graph_->op_registry(), &debug_info_)
+                         .Input(NodeOut(switch_nodes_[0], 1))
+                         .Finalize(graph_, &body_control_node_));
   graph_->AddControlEdge(body_control_node_, body_call_node_);
   return Status::OK();
 }
@@ -295,10 +300,10 @@ Status LowerWhileHelper::CreateExitNodes() {
   outputs.reserve(num_loop_inputs_);
   for (Node* switch_node : switch_nodes_) {
     Node* exit_node;
-    TF_RETURN_IF_ERROR(
-        NodeBuilder(NewName("exit"), "Exit", graph_->op_registry())
-            .Input(NodeOut(switch_node, 0))
-            .Finalize(graph_, &exit_node));
+    TF_RETURN_IF_ERROR(NodeBuilder(NewName("exit"), "Exit",
+                                   graph_->op_registry(), &debug_info_)
+                           .Input(NodeOut(switch_node, 0))
+                           .Finalize(graph_, &exit_node));
     exit_nodes_.emplace_back(exit_node);
     outputs.emplace_back(NodeOut(exit_node, 0));
   }
@@ -307,7 +312,7 @@ Status LowerWhileHelper::CreateExitNodes() {
   // original functional While op. This is used for
   // 1. Rewiring the control edges with the original while op as src.
   // 2. Fetching the output of the While node by name in calls to sess.run.
-  NodeBuilder ib(name_, "IdentityN");
+  NodeBuilder ib(name_, "IdentityN", OpRegistry::Global(), &debug_info_);
   ib.Input(outputs);
   TF_RETURN_IF_ERROR(ib.Finalize(graph_, &lowered_while_output_));
   return Status::OK();
@@ -317,7 +322,7 @@ Status LowerWhileHelper::CreateNextIterationNodes() {
   for (int i = 0; i < num_loop_inputs_; i++) {
     Node* next_iteration;
     TF_RETURN_IF_ERROR(NodeBuilder(NewName("next_iteration"), "NextIteration",
-                                   graph_->op_registry())
+                                   graph_->op_registry(), &debug_info_)
                            .Input(NodeOut(body_call_node_, i))
                            .Finalize(graph_, &next_iteration));
     next_iterations_nodes_.emplace_back(next_iteration);
diff --git a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
index 6af4ca4d96..ecb2670a74 100644
--- a/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
+++ b/tensorflow/core/common_runtime/parallel_concat_optimizer.cc
@@ -52,7 +52,8 @@ class ParallelConcatRemovePass : public GraphOptimizationPass {
       AttrSlice n_attrs = n->attrs();
       auto base_make_node = [n, &n_attrs](const string& op,
                                           const string& name) {
-        NodeBuilder node_builder(name, op);
+        NodeDebugInfo debug_info(*n);
+        NodeBuilder node_builder(name, op, OpRegistry::Global(), &debug_info);
         node_builder.Device(n->requested_device());
         string colo;
         if (GetNodeAttr(n_attrs, "_class", &colo).ok()) {
diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc
index b69a40f312..94af4ee580 100644
--- a/tensorflow/core/framework/function.cc
+++ b/tensorflow/core/framework/function.cc
@@ -673,6 +673,7 @@ Status AddDefaultAttrs(const string& op,
 
 }  // end namespace
 
+// TODO(shikharagarwal): Transmit original node names correctly in file.
 Status InstantiateFunction(const FunctionDef& fdef, AttrSlice attr_values,
                            GetFunctionSignature get_function,
                            InstantiationResult* result) {
diff --git a/tensorflow/core/framework/graph_to_functiondef.cc b/tensorflow/core/framework/graph_to_functiondef.cc
index b2bc414c49..44b22f93c1 100644
--- a/tensorflow/core/framework/graph_to_functiondef.cc
+++ b/tensorflow/core/framework/graph_to_functiondef.cc
@@ -165,6 +165,7 @@ Status GraphToFunctionDef(const Graph& graph, const string& name,
       node_def->set_device(node->assigned_device_name());
     }
     node_def->set_name(node_names.Uniquify(node->name()));
+    MergeDebugInfo(NodeDebugInfo(node->def()), node_def);
 
     // Reset input names based on graph rather than the NodeDef.
     node_def->clear_input();
diff --git a/tensorflow/core/framework/graph_to_functiondef_test.cc b/tensorflow/core/framework/graph_to_functiondef_test.cc
index 587e2c07ac..c3cc1a7433 100644
--- a/tensorflow/core/framework/graph_to_functiondef_test.cc
+++ b/tensorflow/core/framework/graph_to_functiondef_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/ops/function_ops.h"
 #include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/framework/function.pb.h"
 #include "tensorflow/core/framework/function_testlib.h"
 #include "tensorflow/core/graph/graph_constructor.h"
 #include "tensorflow/core/graph/graph_def_builder.h"
@@ -28,6 +29,14 @@ limitations under the License.
 namespace tensorflow {
 namespace {
 
+FunctionDef RemoveDebugInfo(const FunctionDef& def) {
+  FunctionDef copy = def;
+  for (auto& node_def : *copy.mutable_node_def()) {
+    node_def.clear_experimental_debug_info();
+  }
+  return copy;
+}
+
 bool EqualFunctionDef(const FunctionDef& a, const FunctionDef& b,
                       string* diff) {
   // TODO(phawkins) use a more sophisticated equality test.
@@ -78,7 +87,8 @@ TEST(GraphToFunctionDefTest, Basics) {
       {{"h_0", "G:sum:0"}});  // return values
 
   string diff;
-  bool fdefs_equal = EqualFunctionDef(fdef_expected, fdef, &diff);
+  bool fdefs_equal =
+      EqualFunctionDef(fdef_expected, RemoveDebugInfo(fdef), &diff);
   EXPECT_TRUE(fdefs_equal) << diff;
 }
 
@@ -111,7 +121,8 @@ TEST(GraphToFunctionDefTest, ControlDependencies) {
       {{"c", "b:y:0"}});  // return values
 
   string diff;
-  bool fdefs_equal = EqualFunctionDef(fdef_expected, fdef, &diff);
+  bool fdefs_equal =
+      EqualFunctionDef(fdef_expected, RemoveDebugInfo(fdef), &diff);
   EXPECT_TRUE(fdefs_equal) << diff;
 }
 
diff --git a/tensorflow/core/framework/node_def.proto b/tensorflow/core/framework/node_def.proto
index 0a095f903f..73cbc9600c 100644
--- a/tensorflow/core/framework/node_def.proto
+++ b/tensorflow/core/framework/node_def.proto
@@ -60,4 +60,18 @@ message NodeDef {
   // attr's type field.
   // TODO(josh11b): Add some examples here showing best practices.
   map<string, AttrValue> attr = 5;
+
+  message ExperimentalDebugInfo {
+    // Opaque string inserted into error messages created by the runtime.
+    //
+    // This is intended to store the list of names of the nodes from the
+    // original graph that this node was derived. For example if this node, say
+    // C, was result of a fusion of 2 nodes A and B, then 'original_node' would
+    // be {A, B}. This information can be used to map errors originating at the
+    // current node to some top level source code.
+    repeated string original_node_names = 1;
+  };
+
+  // This stores debug information associated with the node.
+  ExperimentalDebugInfo experimental_debug_info = 6;
 };
diff --git a/tensorflow/core/framework/node_def_builder.cc b/tensorflow/core/framework/node_def_builder.cc
index 348a825af9..4808967ca6 100644
--- a/tensorflow/core/framework/node_def_builder.cc
+++ b/tensorflow/core/framework/node_def_builder.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 #include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def_util.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -37,7 +38,8 @@ void NodeDefBuilder::NodeOut::Reset(StringPiece n, int i, DataType dt) {
 }
 
 NodeDefBuilder::NodeDefBuilder(StringPiece name, StringPiece op_name,
-                               const OpRegistryInterface* op_registry) {
+                               const OpRegistryInterface* op_registry,
+                               const NodeDebugInfo* debug) {
   node_def_.set_name(string(name));
   const Status status = op_registry->LookUpOpDef(string(op_name), &op_def_);
   if (status.ok()) {
@@ -46,6 +48,13 @@ NodeDefBuilder::NodeDefBuilder(StringPiece name, StringPiece op_name,
     errors_.push_back(status.error_message());
     inputs_specified_ = 0;
   }
+  if (debug != nullptr) MergeDebugInfo(*debug, &node_def_);
+}
+
+NodeDefBuilder::NodeDefBuilder(StringPiece name, StringPiece op_name,
+                               const NodeDebugInfo& debug)
+    : NodeDefBuilder(name, op_name) {
+  MergeDebugInfo(debug, &node_def_);
 }
 
 NodeDefBuilder::NodeDefBuilder(StringPiece name, const OpDef* op_def)
diff --git a/tensorflow/core/framework/node_def_builder.h b/tensorflow/core/framework/node_def_builder.h
index ad07ec5480..63d856d16c 100644
--- a/tensorflow/core/framework/node_def_builder.h
+++ b/tensorflow/core/framework/node_def_builder.h
@@ -24,6 +24,7 @@ limitations under the License.
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def.pb.h"
 #include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/gtl/array_slice.h"
 #include "tensorflow/core/lib/strings/strcat.h"
@@ -63,7 +64,10 @@ class NodeDefBuilder {
   // specified by calling the methods below.
   // REQUIRES: The OpDef must satisfy ValidateOpDef().
   NodeDefBuilder(StringPiece name, StringPiece op_name,
-                 const OpRegistryInterface* op_registry = OpRegistry::Global());
+                 const OpRegistryInterface* op_registry = OpRegistry::Global(),
+                 const NodeDebugInfo* debug = nullptr);
+  NodeDefBuilder(StringPiece name, StringPiece op_name,
+                 const NodeDebugInfo& debug);
   // REQUIRES: in addition, *op_def must outlive *this.
   NodeDefBuilder(StringPiece name, const OpDef* op_def);
 
diff --git a/tensorflow/core/framework/node_def_util.cc b/tensorflow/core/framework/node_def_util.cc
index 95a787b2df..8071da5b6d 100644
--- a/tensorflow/core/framework/node_def_util.cc
+++ b/tensorflow/core/framework/node_def_util.cc
@@ -106,13 +106,50 @@ string SummarizeAttrs(const NodeDef& node_def) {
   return SummarizeAttrsHelper(node_def, node_def.device());
 }
 
+string FormatNodeForError(const NodeDebugInfo& debug_info) {
+  return debug_info.original_node_names.empty()
+             ? errors::FormatNodeNameForError(debug_info.name)
+             : errors::FormatNodeNamesForError(debug_info.original_node_names);
+}
+
 string FormatNodeForError(const Node& node) {
-  return FormatNodeDefForError(node.def());
+  return FormatNodeForError(NodeDebugInfo(node));
 }
 
 string FormatNodeDefForError(const NodeDef& node_def) {
-  VLOG(1) << "Error in the node: " << SummarizeNodeDef(node_def);
-  return errors::FormatNodeNameForError(node_def.name());
+  return FormatNodeForError(NodeDebugInfo(node_def));
+}
+
+void GetMergedOriginalNodeNames(const NodeDebugInfo& from,
+                                const NodeDebugInfo& to,
+                                std::set<string>* names) {
+  if (!from.original_node_names.empty()) {
+    names->insert(from.original_node_names.begin(),
+                  from.original_node_names.end());
+  } else {
+    names->insert(from.name);
+  }
+  names->insert(to.original_node_names.begin(), to.original_node_names.end());
+}
+
+void MergeDebugInfo(const NodeDebugInfo& from, Node* to) {
+  std::set<string> names;
+  GetMergedOriginalNodeNames(from, NodeDebugInfo(*to), &names);
+  to->set_original_node_names({names.begin(), names.end()});
+}
+
+void MergeDebugInfo(const NodeDebugInfo& from, NodeDef* to) {
+  std::set<string> names;
+  GetMergedOriginalNodeNames(from, NodeDebugInfo(*to), &names);
+  to->mutable_experimental_debug_info()->clear_original_node_names();
+  if (!names.empty()) {
+    *to->mutable_experimental_debug_info()->mutable_original_node_names() = {
+        names.begin(), names.end()};
+  }
+}
+
+void MergeDebugInfo(const NodeDef& from, NodeDef* to) {
+  MergeDebugInfo(NodeDebugInfo(from), to);
 }
 
 const AttrValue* AttrSlice::Find(StringPiece attr_name) const {
diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index f682bb1535..4e4a5c38d5 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -29,6 +29,7 @@ limitations under the License.
 namespace tensorflow {
 
 class Node;
+class NodeDebugInfo;
 
 // We forward declare protos so that kernels don't need to depend on them
 class NodeDef;
@@ -56,6 +57,12 @@ string SummarizeAttrs(const NodeDef& node_def);
 string FormatNodeForError(const Node& node);
 string FormatNodeDefForError(const NodeDef& node_def);
 
+// Merges the original node names from the debug information of 'from' to the
+// debug information of 'to'.
+void MergeDebugInfo(const NodeDebugInfo& from, Node* to);
+void MergeDebugInfo(const NodeDebugInfo& from, NodeDef* to);
+void MergeDebugInfo(const NodeDef& from, NodeDef* to);
+
 typedef protobuf::Map<string, AttrValue> AttrValueMap;
 
 // Adds an attr with name <name> and value <value> to *node_def.
diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 223fc85f9f..623dc855c4 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -216,6 +216,16 @@ void Node::set_requested_device(const string& device) {
   props_->node_def.set_device(device);
 }
 
+void Node::set_original_node_names(const std::vector<string>& names) {
+  MaybeCopyOnWrite();
+  props_->node_def.mutable_experimental_debug_info()
+      ->clear_original_node_names();
+  if (!names.empty()) {
+    *props_->node_def.mutable_experimental_debug_info()
+         ->mutable_original_node_names() = {names.begin(), names.end()};
+  }
+}
+
 Status Node::input_edge(int idx, const Edge** e) const {
   if (idx < 0 || idx >= num_inputs()) {
     return errors::InvalidArgument("Invalid input_edge index: ", idx, ", Node ",
@@ -293,6 +303,21 @@ Status Node::input_tensor(int idx, OutputTensor* t) const {
   return Status::OK();
 }
 
+// NodeDebugInfo
+
+NodeDebugInfo::NodeDebugInfo(const Node& n) : NodeDebugInfo(n.def()) {}
+NodeDebugInfo::NodeDebugInfo(const NodeDef& ndef)
+    : name(ndef.name()),
+      original_node_names(
+          ndef.has_experimental_debug_info()
+              ? std::vector<string>({ndef.experimental_debug_info()
+                                         .original_node_names()
+                                         .begin(),
+                                     ndef.experimental_debug_info()
+                                         .original_node_names()
+                                         .end()})
+              : std::vector<string>()) {}
+
 // InputTensor
 
 bool InputTensor::operator==(const InputTensor& other) const {
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 667eaba24c..0b31219d5f 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -120,6 +120,10 @@ class Node {
   int assigned_device_name_index() const { return assigned_device_name_index_; }
   void set_assigned_device_name_index(int index);
 
+  // Sets 'original_node_names' field of this node's DebugInfo proto to
+  // 'names'.
+  void set_original_node_names(const std::vector<string>& names);
+
   // Read only access to attributes
   AttrSlice attrs() const;
 
@@ -290,6 +294,15 @@ class Node {
   TF_DISALLOW_COPY_AND_ASSIGN(Node);
 };
 
+// Stores debug information associated with the Node.
+struct NodeDebugInfo {
+  const string name;
+  const std::vector<string> original_node_names;
+
+  NodeDebugInfo(const Node& n);
+  NodeDebugInfo(const NodeDef& ndef);
+};
+
 // Represents an input of a node, i.e., the `index`-th input to `node`.
 struct InputTensor {
   Node* node;
diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc
index 9c640c42a5..f213eb7c10 100644
--- a/tensorflow/core/graph/graph_partition.cc
+++ b/tensorflow/core/graph/graph_partition.cc
@@ -209,7 +209,8 @@ NodeDef* AddSend(const PartitionOptions& opts, const GraphInfo& g_info,
   // NOTE(yuanbyu): Only cast for cross-device send/recv.
   if (dtype != cast_dtype && !NeedSameDeviceSendRecv(edge, g_info)) {
     const string cast_op = (host_memory) ? "_HostCast" : "Cast";
-    NodeDefBuilder cast_builder(opts.new_name(src->name()), cast_op);
+    NodeDefBuilder cast_builder(opts.new_name(src->name()), cast_op,
+                                NodeDebugInfo(*src));
     cast_builder.Device(src->assigned_device_name()).Input(send_from);
     if (opts.scheduling_for_recvs) {
       cast_builder.Attr("_start_time", start_time);
@@ -233,7 +234,8 @@ NodeDef* AddSend(const PartitionOptions& opts, const GraphInfo& g_info,
 
   // Add the send node.
   const string send_op = (host_memory) ? "_HostSend" : "_Send";
-  NodeDefBuilder send_builder(opts.new_name(src->name()), send_op);
+  NodeDefBuilder send_builder(opts.new_name(src->name()), send_op,
+                              NodeDebugInfo(*src));
   SetSendRecvAttrs(opts, edge, &send_builder);
   send_builder.Device(src->assigned_device_name()).Input(send_from);
   if (opts.scheduling_for_recvs) {
@@ -268,7 +270,8 @@ NodeDef* AddRecv(const PartitionOptions& opts, const GraphInfo& g_info,
 
   // Add the recv node.
   const string recv_op = (host_memory) ? "_HostRecv" : "_Recv";
-  NodeDefBuilder recv_builder(opts.new_name(src->name()), recv_op);
+  NodeDefBuilder recv_builder(opts.new_name(src->name()), recv_op,
+                              NodeDebugInfo(*src));
   SetSendRecvAttrs(opts, edge, &recv_builder);
   recv_builder.Device(dst->assigned_device_name())
       .Attr("tensor_type", cast_dtype);
@@ -280,7 +283,8 @@ NodeDef* AddRecv(const PartitionOptions& opts, const GraphInfo& g_info,
   // Add the cast node (from cast_dtype to dtype) or an Identity node.
   if (dtype != cast_dtype) {
     const string cast_op = (host_memory) ? "_HostCast" : "Cast";
-    NodeDefBuilder cast_builder(opts.new_name(src->name()), cast_op);
+    NodeDefBuilder cast_builder(opts.new_name(src->name()), cast_op,
+                                NodeDebugInfo(*src));
     cast_builder.Attr("DstT", dtype);
     cast_builder.Device(dst->assigned_device_name())
         .Input(recv->name(), 0, cast_dtype);
@@ -290,7 +294,8 @@ NodeDef* AddRecv(const PartitionOptions& opts, const GraphInfo& g_info,
     return cast;
   } else if (edge->IsControlEdge()) {
     // An Identity is only needed for control edges.
-    NodeDefBuilder id_builder(opts.new_name(src->name()), "Identity");
+    NodeDefBuilder id_builder(opts.new_name(src->name()), "Identity",
+                              NodeDebugInfo(*src));
     id_builder.Device(dst->assigned_device_name())
         .Input(recv->name(), 0, cast_dtype);
     NodeDef* id = gdef->add_node();
@@ -982,6 +987,7 @@ Status Partition(const PartitionOptions& opts, Graph* g,
     GraphDef* dst_graph = &(*partitions)[dstp];
     NodeDef* dst_def = dst_graph->add_node();
     *dst_def = dst->def();
+    MergeDebugInfo(NodeDebugInfo(dst->def()), dst_def);
     dst_def->set_device(dst->assigned_device_name());
     dst_def->clear_input();  // Inputs are filled below
     if (opts.need_to_record_start_times) {
diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc
index a91e6dd057..6dc9a50b98 100644
--- a/tensorflow/core/graph/node_builder.cc
+++ b/tensorflow/core/graph/node_builder.cc
@@ -38,8 +38,9 @@ NodeBuilder::NodeOut::NodeOut()
     : node(nullptr), error(true), index(0), dt(DT_FLOAT) {}
 
 NodeBuilder::NodeBuilder(StringPiece name, StringPiece op_name,
-                         const OpRegistryInterface* op_registry)
-    : def_builder_(name, op_name, op_registry) {}
+                         const OpRegistryInterface* op_registry,
+                         const NodeDebugInfo* debug)
+    : def_builder_(name, op_name, op_registry, debug) {}
 
 NodeBuilder::NodeBuilder(StringPiece name, const OpDef* op_def)
     : def_builder_(name, op_def) {}
diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h
index b1dc2ae92f..51e044cd8b 100644
--- a/tensorflow/core/graph/node_builder.h
+++ b/tensorflow/core/graph/node_builder.h
@@ -77,7 +77,8 @@ class NodeBuilder {
   // specified by calling the methods below.
   // REQUIRES: The OpDef must satisfy ValidateOpDef().
   NodeBuilder(StringPiece name, StringPiece op_name,
-              const OpRegistryInterface* op_registry = OpRegistry::Global());
+              const OpRegistryInterface* op_registry = OpRegistry::Global(),
+              const NodeDebugInfo* debug = nullptr);
   NodeBuilder(StringPiece name, const OpDef* op_def);
 
   // Create a NodeBuilder from an existing NodeDefBuilder.
diff --git a/tensorflow/core/graph/optimizer_cse.cc b/tensorflow/core/graph/optimizer_cse.cc
index 4073255db3..19afeb6bad 100644
--- a/tensorflow/core/graph/optimizer_cse.cc
+++ b/tensorflow/core/graph/optimizer_cse.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/graph/algorithm.h"
 #include "tensorflow/core/lib/gtl/map_util.h"
 #include "tensorflow/core/lib/hash/hash.h"
@@ -213,6 +214,7 @@ bool OptimizerCSE::Optimize(
         g_->AddEdge(*candidate, e->src_output(), e->dst(), e->dst_input());
       }
 
+      MergeDebugInfo(NodeDebugInfo(*n), *candidate);
       g_->RemoveNode(n);
       changed = true;
     }
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index 426c404f43..33b9243dfe 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -47,7 +47,7 @@ namespace {
 std::unique_ptr<const NodeDef> StripTensorDataFromNodeDef(
     OpKernelConstruction* ctx) {
 #ifndef __ANDROID__
-  DCHECK_EQ(NodeDef::descriptor()->field_count(), 5)
+  DCHECK_EQ(NodeDef::descriptor()->field_count(), 6)
       << "The NodeDef format has changed, and the attr-stripping code may need "
       << "to be updated.";
 #endif
@@ -61,6 +61,7 @@ std::unique_ptr<const NodeDef> StripTensorDataFromNodeDef(
   // attrs that affect the cardinality of list-typed inputs and outputs, so it
   // is safe to drop other attrs from the NodeDef.
   AddNodeAttr("dtype", ctx->output_type(0), ret);
+  MergeDebugInfo(original, ret);
   return std::unique_ptr<const NodeDef>(ret);
 }
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-node-def.-experimental-debug-info.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-node-def.-experimental-debug-info.pbtxt
new file mode 100644
index 0000000000..73483e2b6e
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-node-def.-experimental-debug-info.pbtxt
@@ -0,0 +1,12 @@
+path: "tensorflow.NodeDef.ExperimentalDebugInfo"
+tf_proto {
+  descriptor {
+    name: "ExperimentalDebugInfo"
+    field {
+      name: "original_node_names"
+      number: 1
+      label: LABEL_REPEATED
+      type: TYPE_STRING
+    }
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-node-def.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-node-def.pbtxt
index 646fa8abb9..18548632c9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.-node-def.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.-node-def.pbtxt
@@ -33,6 +33,13 @@ tf_proto {
       type: TYPE_MESSAGE
       type_name: ".tensorflow.NodeDef.AttrEntry"
     }
+    field {
+      name: "experimental_debug_info"
+      number: 6
+      label: LABEL_OPTIONAL
+      type: TYPE_MESSAGE
+      type_name: ".tensorflow.NodeDef.ExperimentalDebugInfo"
+    }
     nested_type {
       name: "AttrEntry"
       field {
@@ -52,5 +59,14 @@ tf_proto {
         map_entry: true
       }
     }
+    nested_type {
+      name: "ExperimentalDebugInfo"
+      field {
+        name: "original_node_names"
+        number: 1
+        label: LABEL_REPEATED
+        type: TYPE_STRING
+      }
+    }
   }
 }
-- 
GitLab


From 3b676d11684c92e4adc0aa8c70c0fef9eb38fed8 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Thu, 13 Dec 2018 16:39:57 -0800
Subject: [PATCH 561/873] Make segmenter deterministic

---
 .../contrib/tensorrt/segment/segment.cc       | 33 +++++++++++++++----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc
index 6abc5226cc..a32356710d 100644
--- a/tensorflow/contrib/tensorrt/segment/segment.cc
+++ b/tensorflow/contrib/tensorrt/segment/segment.cc
@@ -225,6 +225,24 @@ SimpleGraph::~SimpleGraph() {
   for (auto x : edges_) delete x;
 }
 
+// Define comparison functions for std::set with pointer keys so that behavior
+// is deterministic. When using std::set with pointer key types, the items are
+// sorted by pointer address which is non-deterministic. This can cause issues
+// for INT8 mode because the graph is converted twice and non-determinism may
+// cause a mismatch between the calibration tables of the conversions.
+struct SimpleEdgePtrCompare {
+  bool operator()(const SimpleEdge* lhs, const SimpleEdge* rhs) const {
+    return (lhs->id() < rhs->id());
+  }
+};
+
+struct NodePtrCompare {
+  bool operator()(const tensorflow::Node* lhs,
+                  const tensorflow::Node* rhs) const {
+    return (lhs->name() < rhs->name());
+  }
+};
+
 namespace {
 
 // Copied from TF ReverseDFS, which only works for tensorflow::Graph.
@@ -476,7 +494,7 @@ tensorflow::Status SegmentGraph(
     // nodes. Iterate since combining two nodes may unblock other
     // combining.
     while (true) {
-      std::set<const SimpleEdge*> contract_edges;
+      std::set<const SimpleEdge*, SimpleEdgePtrCompare> contract_edges;
       for (const SimpleEdge* out_edge : node->out_edges()) {
         VLOG(3) << "... out node " << out_edge->dst()->name() << " ( "
                 << out_edge->dst()->id() << " <- " << node->id() << " )";
@@ -530,7 +548,7 @@ tensorflow::Status SegmentGraph(
 
   // A map from the segment identifier (currently the name of the root node of
   // the segment tree) to the segment nodes set.
-  std::map<string, std::set<const tensorflow::Node*>> sg_map;
+  std::map<string, std::set<const tensorflow::Node*, NodePtrCompare>> sg_map;
 
   // A map from the segment identifier (currently the name of the root node of
   // the segment tree) to the device names that the nodes in the segment are
@@ -566,7 +584,8 @@ tensorflow::Status SegmentGraph(
   // --------------------------------- Step 2 ---------------------------------
   // Remove ineligible input/output nodes.
   for (auto& itr : sg_map) {
-    std::set<const tensorflow::Node*>& segment_nodes = itr.second;
+    std::set<const tensorflow::Node*, NodePtrCompare>& segment_nodes =
+        itr.second;
     VLOG(1) << "Segment original size: " << segment_nodes.size();
     while (true) {
       std::deque<const tensorflow::Node*> in_nodes_que, out_nodes_que;
@@ -618,8 +637,9 @@ tensorflow::Status SegmentGraph(
                               bool is_input_nodes,
                               std::deque<const tensorflow::Node*>* que) {
         // Run a BFS on the queue to find all the input/output nodes.
-        std::set<const tensorflow::Node*> visited;
-        std::set<const tensorflow::Node*> logged(que->begin(), que->end());
+        std::set<const tensorflow::Node*, NodePtrCompare> visited;
+        std::set<const tensorflow::Node*, NodePtrCompare> logged(que->begin(),
+                                                                 que->end());
         while (!que->empty()) {
           auto node = que->front();
           que->pop_front();
@@ -653,7 +673,8 @@ tensorflow::Status SegmentGraph(
   // --------------------------------- Step 3 ---------------------------------
   // Convert the segments into the expected return format
   for (const auto& itr : sg_map) {
-    const std::set<const tensorflow::Node*>& segment_nodes = itr.second;
+    const std::set<const tensorflow::Node*, NodePtrCompare>& segment_nodes =
+        itr.second;
     if (VLOG_IS_ON(1)) {
       string s = "parent=" + itr.first + ":";
       for (auto node : segment_nodes) s += " " + node->name();
-- 
GitLab


From 80ecf7a7c2886c0866389f920c5ace8eb1c7a51e Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 13 Dec 2018 16:42:43 -0800
Subject: [PATCH 562/873] For windows builds, add timestamps to each command
 logged when run under msys.

PiperOrigin-RevId: 225458736
---
 tensorflow/tools/ci_build/windows/bazel/common_env.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index c18f0d6e69..34376f1481 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -30,6 +30,9 @@ export TMPDIR=${TMPDIR:-"C:/tmp"}
 export TMPDIR=$(cygpath -m "$TMPDIR")
 mkdir -p "$TMPDIR"
 
+# Add timestamps before each command.
+export PS4='+ $(date) + '
+
 # Set bash path
 export BAZEL_SH=${BAZEL_SH:-"C:/tools/msys64/usr/bin/bash"}
 
-- 
GitLab


From bc99c3db7f82cd2789a06898f4cc0d7b86da4e5a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 16:43:31 -0800
Subject: [PATCH 563/873] Rolls back exposure of the optimizers_v2 in tf 1.x,
 until the keras model estimator backward incompatibility issues can be sorted
 out. (Namely, the use of global step in estimators & trying to assign to
 iterations not working in optimizers_v2

PiperOrigin-RevId: 225458875
---
 tensorflow/python/keras/engine/saving_test.py |  4 +-
 .../python/keras/engine/topology_test.py      |  5 +-
 tensorflow/python/keras/models_test.py        |  3 +-
 .../python/keras/optimizer_v2/adadelta.py     |  2 +-
 .../python/keras/optimizer_v2/adagrad.py      |  2 +-
 tensorflow/python/keras/optimizer_v2/adam.py  |  2 +-
 .../python/keras/optimizer_v2/adamax.py       |  2 +-
 tensorflow/python/keras/optimizer_v2/ftrl.py  |  2 +-
 .../keras/optimizer_v2/gradient_descent.py    |  2 +-
 .../python/keras/optimizer_v2/optimizer_v2.py |  2 +-
 .../python/keras/optimizer_v2/rmsprop.py      |  2 +-
 tensorflow/python/keras/optimizers.py         | 39 +++++++++++-----
 ...ensorflow.keras.optimizers.-adadelta.pbtxt | 45 ++----------------
 ...tensorflow.keras.optimizers.-adagrad.pbtxt | 45 ++----------------
 .../tensorflow.keras.optimizers.-adam.pbtxt   | 45 ++----------------
 .../tensorflow.keras.optimizers.-adamax.pbtxt | 46 ++-----------------
 ...nsorflow.keras.optimizers.-optimizer.pbtxt | 43 ++---------------
 ...nsorflow.keras.optimizers.-r-m-sprop.pbtxt | 45 ++----------------
 .../tensorflow.keras.optimizers.-s-g-d.pbtxt  | 45 ++----------------
 19 files changed, 69 insertions(+), 312 deletions(-)

diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py
index f6ed3f45c4..92fac6f242 100644
--- a/tensorflow/python/keras/engine/saving_test.py
+++ b/tensorflow/python/keras/engine/saving_test.py
@@ -33,7 +33,6 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.engine import saving
 from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
@@ -334,6 +333,7 @@ class TestWeightSavingAndLoading(test.TestCase, parameterized.TestCase):
 
 class TestWholeModelSaving(test.TestCase):
 
+  @test_util.run_v1_only('b/120994067')
   def test_sequential_model_saving(self):
     if h5py is None:
       self.skipTest('h5py required to run this test')
@@ -345,7 +345,7 @@ class TestWholeModelSaving(test.TestCase):
       model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
       model.compile(
           loss=keras.losses.MSE,
-          optimizer=rmsprop.RMSprop(lr=0.0001),
+          optimizer=keras.optimizers.RMSprop(lr=0.0001),
           metrics=[
               keras.metrics.categorical_accuracy,
               keras.metrics.CategoricalAccuracy()
diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py
index b7680dcbc0..cd1f4d1697 100644
--- a/tensorflow/python/keras/engine/topology_test.py
+++ b/tensorflow/python/keras/engine/topology_test.py
@@ -30,6 +30,7 @@ from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import input_layer as input_layer_lib
 from tensorflow.python.keras.engine import network as network_lib
+from tensorflow.python.keras.optimizer_v2 import gradient_descent
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
@@ -859,7 +860,7 @@ class TopologyConstructionTest(keras_parameterized.TestCase):
     x = np.ones((100, 2))
     y = np.ones((100, 2))
     model.compile(
-        optimizer='sgd',
+        optimizer=gradient_descent.SGD(),
         loss='mse',
         run_eagerly=testing_utils.should_run_eagerly())
     loss = model.train_on_batch(x, y)
@@ -908,7 +909,7 @@ class TopologyConstructionTest(keras_parameterized.TestCase):
     model.add(keras.layers.Dense(3))
     model.compile(
         loss='mse',
-        optimizer='sgd',
+        optimizer=gradient_descent.SGD(),
         metrics=['acc'],
         run_eagerly=testing_utils.should_run_eagerly())
 
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index fe7d8a5f59..c466d94fed 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -31,7 +31,6 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import metrics
 from tensorflow.python.keras import models
-from tensorflow.python.keras import optimizers
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
@@ -482,7 +481,7 @@ class TestCloneAndBuildModel(test.TestCase):
     self.assert_optimizer_iterations_increases(adam.AdamOptimizer(0.01))
 
   def test_replace_keras_optimizer_iterations_variable(self):
-    self.assert_optimizer_iterations_increases(optimizers.Adam())
+    self.assert_optimizer_iterations_increases('adam')
 
   def test_clone_and_build_sequential_model_without_inputs_defined(self):
     with self.cached_session():
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py
index 8985325056..88ddc94324 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta.py
@@ -25,7 +25,7 @@ from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('keras.optimizers.Adadelta')
+@tf_export('keras.optimizers.Adadelta', v1=[])
 class Adadelta(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the Adadelta algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py
index 6faf7fc2f4..ac55d2075a 100644
--- a/tensorflow/python/keras/optimizer_v2/adagrad.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad.py
@@ -30,7 +30,7 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('keras.optimizers.Adagrad')
+@tf_export('keras.optimizers.Adagrad', v1=[])
 class Adagrad(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the Adagrad algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py
index c99468f8cf..873dadb31a 100644
--- a/tensorflow/python/keras/optimizer_v2/adam.py
+++ b/tensorflow/python/keras/optimizer_v2/adam.py
@@ -27,7 +27,7 @@ from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('keras.optimizers.Adam')
+@tf_export('keras.optimizers.Adam', v1=[])
 class Adam(optimizer_v2.OptimizerV2):
   """Optimizer that implements the Adam algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/adamax.py b/tensorflow/python/keras/optimizer_v2/adamax.py
index 920a6c0fd3..9c826eb42a 100644
--- a/tensorflow/python/keras/optimizer_v2/adamax.py
+++ b/tensorflow/python/keras/optimizer_v2/adamax.py
@@ -28,7 +28,7 @@ from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('keras.optimizers.Adamax')
+@tf_export('keras.optimizers.Adamax', v1=[])
 class Adamax(adam.Adam):
   """Optimizer that implements the Adamax algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/ftrl.py b/tensorflow/python/keras/optimizer_v2/ftrl.py
index 365bd68220..7828b1791e 100644
--- a/tensorflow/python/keras/optimizer_v2/ftrl.py
+++ b/tensorflow/python/keras/optimizer_v2/ftrl.py
@@ -24,7 +24,7 @@ from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('keras.optimizers.Ftrl')
+@tf_export('keras.optimizers.Ftrl', v1=[])
 class Ftrl(optimizer_v2.OptimizerV2):
   """Optimizer that implements the FTRL algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent.py b/tensorflow/python/keras/optimizer_v2/gradient_descent.py
index a77ae30551..06db2f3b4c 100644
--- a/tensorflow/python/keras/optimizer_v2/gradient_descent.py
+++ b/tensorflow/python/keras/optimizer_v2/gradient_descent.py
@@ -24,7 +24,7 @@ from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("keras.optimizers.SGD")
+@tf_export("keras.optimizers.SGD", v1=[])
 class SGD(optimizer_v2.OptimizerV2):
   """Stochastic gradient descent and momentum optimizer.
 
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index adce9fb9db..0e909d0d79 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -67,7 +67,7 @@ def _deduplicate_indexed_slices(values, indices):
 
 
 @six.add_metaclass(abc.ABCMeta)
-@tf_export("keras.optimizers.Optimizer")
+@tf_export("keras.optimizers.Optimizer", v1=[])
 class OptimizerV2(checkpointable.CheckpointableBase):
   """Updated base class for optimizers.
 
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py
index 634111b470..dbb5a37fd8 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@@ -23,7 +23,7 @@ from tensorflow.python.training import training_ops
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("keras.optimizers.RMSprop")
+@tf_export("keras.optimizers.RMSprop", v1=[])
 class RMSprop(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the RMSprop algorithm.
 
diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index decfcf993c..dda603fa2e 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -45,6 +45,7 @@ from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export(v1=['keras.optimizers.Optimizer'])
 class Optimizer(object):
   """Abstract optimizer base class.
 
@@ -158,6 +159,7 @@ class Optimizer(object):
     return cls(**config)
 
 
+@tf_export(v1=['keras.optimizers.SGD'])
 class SGD(Optimizer):
   """Stochastic gradient descent optimizer.
 
@@ -222,6 +224,7 @@ class SGD(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
+@tf_export(v1=['keras.optimizers.RMSprop'])
 class RMSprop(Optimizer):
   """RMSProp optimizer.
 
@@ -288,6 +291,7 @@ class RMSprop(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
+@tf_export(v1=['keras.optimizers.Adagrad'])
 class Adagrad(Optimizer):
   """Adagrad optimizer.
 
@@ -354,6 +358,7 @@ class Adagrad(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
+@tf_export(v1=['keras.optimizers.Adadelta'])
 class Adadelta(Optimizer):
   """Adadelta optimizer.
 
@@ -437,6 +442,7 @@ class Adadelta(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
+@tf_export(v1=['keras.optimizers.Adam'])
 class Adam(Optimizer):
   """Adam optimizer.
 
@@ -533,6 +539,7 @@ class Adam(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
+@tf_export(v1=['keras.optimizers.Adamax'])
 class Adamax(Optimizer):
   """Adamax optimizer from Adam paper's Section 7.
 
@@ -799,17 +806,27 @@ def deserialize(config, custom_objects=None):
   Returns:
       A Keras Optimizer instance.
   """
-  all_classes = {
-      'adadelta': adadelta_v2.Adadelta,
-      'adagrad': adagrad_v2.Adagrad,
-      'adam': adam_v2.Adam,
-      'adamax': adamax_v2.Adamax,
-      'nadam': nadam_v2.Nadam,
-      'rmsprop': rmsprop_v2.RMSprop,
-      'sgd': gradient_descent_v2.SGD
-  }
-  if not tf2.enabled():
-    all_classes['nadam'] = Nadam
+  if tf2.enabled():
+    all_classes = {
+        'adadelta': adadelta_v2.Adadelta,
+        'adagrad': adagrad_v2.Adagrad,
+        'adam': adam_v2.Adam,
+        'adamax': adamax_v2.Adamax,
+        'nadam': nadam_v2.Nadam,
+        'rmsprop': rmsprop_v2.RMSprop,
+        'sgd': gradient_descent_v2.SGD
+    }
+  else:
+    all_classes = {
+        'adadelta': Adadelta,
+        'adagrad': Adagrad,
+        'adam': Adam,
+        'adamax': Adamax,
+        'nadam': Nadam,
+        'rmsprop': RMSprop,
+        'sgd': SGD,
+        'tfoptimizer': TFOptimizer
+    }
 
   # Make deserialization case-insensitive for built-in optimizers.
   if config['class_name'].lower() in all_classes:
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
index 5426269793..b9ce154bdd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adadelta.pbtxt
@@ -1,36 +1,15 @@
 path: "tensorflow.keras.optimizers.Adadelta"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adadelta.Adadelta\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Adadelta\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
   is_instance: "<type \'object\'>"
-  member {
-    name: "iterations"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.95\', \'1e-07\', \'Adadelta\'], "
-  }
-  member_method {
-    name: "add_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply_gradients"
-    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'lr\', \'rho\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'1.0\', \'0.95\', \'None\', \'0.0\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_config"
@@ -40,14 +19,6 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_slot_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -56,16 +27,8 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "minimize"
-    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
index c39fe6ba4f..d0dc9e37a3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adagrad.pbtxt
@@ -1,36 +1,15 @@
 path: "tensorflow.keras.optimizers.Adagrad"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adagrad.Adagrad\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Adagrad\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
   is_instance: "<type \'object\'>"
-  member {
-    name: "iterations"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'learning_rate\', \'initial_accumulator_value\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.1\', \'1e-07\', \'Adagrad\'], "
-  }
-  member_method {
-    name: "add_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply_gradients"
-    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'lr\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'None\', \'0.0\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_config"
@@ -40,14 +19,6 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_slot_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -56,16 +27,8 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "minimize"
-    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
index 05d46d380b..06815fa99a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adam.pbtxt
@@ -1,36 +1,15 @@
 path: "tensorflow.keras.optimizers.Adam"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Adam\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
   is_instance: "<type \'object\'>"
-  member {
-    name: "iterations"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'amsgrad\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'False\', \'Adam\'], "
-  }
-  member_method {
-    name: "add_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply_gradients"
-    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'lr\', \'beta_1\', \'beta_2\', \'epsilon\', \'decay\', \'amsgrad\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'None\', \'0.0\', \'False\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_config"
@@ -40,14 +19,6 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_slot_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -56,16 +27,8 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "minimize"
-    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
index 78829def67..47b55fdb44 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-adamax.pbtxt
@@ -1,37 +1,15 @@
 path: "tensorflow.keras.optimizers.Adamax"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adamax.Adamax\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.adam.Adam\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Adamax\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
   is_instance: "<type \'object\'>"
-  member {
-    name: "iterations"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'learning_rate\', \'beta_1\', \'beta_2\', \'epsilon\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.999\', \'1e-07\', \'Adamax\'], "
-  }
-  member_method {
-    name: "add_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply_gradients"
-    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'lr\', \'beta_1\', \'beta_2\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.002\', \'0.9\', \'0.999\', \'None\', \'0.0\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_config"
@@ -41,14 +19,6 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_slot_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -57,16 +27,8 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "minimize"
-    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
index 58b7f27491..53d64dae93 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-optimizer.pbtxt
@@ -1,35 +1,14 @@
 path: "tensorflow.keras.optimizers.Optimizer"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
   is_instance: "<type \'object\'>"
-  member {
-    name: "iterations"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'name\'], varargs=None, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply_gradients"
-    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\'], varargs=None, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_config"
@@ -39,14 +18,6 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_slot_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -55,16 +26,8 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "minimize"
-    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
index 8de796edde..a1e9b8cceb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-r-m-sprop.pbtxt
@@ -1,36 +1,15 @@
 path: "tensorflow.keras.optimizers.RMSprop"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.RMSprop\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
   is_instance: "<type \'object\'>"
-  member {
-    name: "iterations"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'learning_rate\', \'rho\', \'momentum\', \'epsilon\', \'centered\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'0.0\', \'1e-07\', \'False\', \'RMSprop\'], "
-  }
-  member_method {
-    name: "add_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply_gradients"
-    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'lr\', \'rho\', \'epsilon\', \'decay\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.9\', \'None\', \'0.0\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_config"
@@ -40,14 +19,6 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_slot_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -56,16 +27,8 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "minimize"
-    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
 }
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
index 393eeb3d6c..a67fefb1ba 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.optimizers.-s-g-d.pbtxt
@@ -1,36 +1,15 @@
 path: "tensorflow.keras.optimizers.SGD"
 tf_class {
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.gradient_descent.SGD\'>"
-  is_instance: "<class \'tensorflow.python.keras.optimizer_v2.optimizer_v2.OptimizerV2\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.SGD\'>"
+  is_instance: "<class \'tensorflow.python.keras.optimizers.Optimizer\'>"
   is_instance: "<type \'object\'>"
-  member {
-    name: "iterations"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'learning_rate\', \'momentum\', \'nesterov\', \'name\'], varargs=None, keywords=kwargs, defaults=[\'0.001\', \'0.0\', \'False\', \'SGD\'], "
-  }
-  member_method {
-    name: "add_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\', \'initializer\'], varargs=None, keywords=None, defaults=[\'zeros\'], "
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'trainable\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'zeros\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply_gradients"
-    argspec: "args=[\'self\', \'grads_and_vars\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'lr\', \'momentum\', \'decay\', \'nesterov\'], varargs=None, keywords=kwargs, defaults=[\'0.01\', \'0.0\', \'0.0\', \'False\'], "
   }
   member_method {
     name: "from_config"
-    argspec: "args=[\'cls\', \'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "get_config"
@@ -40,14 +19,6 @@ tf_class {
     name: "get_gradients"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "get_slot"
-    argspec: "args=[\'self\', \'var\', \'slot_name\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_slot_names"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "get_updates"
     argspec: "args=[\'self\', \'loss\', \'params\'], varargs=None, keywords=None, defaults=None"
@@ -56,16 +27,8 @@ tf_class {
     name: "get_weights"
     argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "minimize"
-    argspec: "args=[\'self\', \'loss\', \'var_list\', \'grad_loss\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
   member_method {
     name: "set_weights"
     argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "variables"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
 }
-- 
GitLab


From cd1bae00876050054e21f7683b294b08c359405c Mon Sep 17 00:00:00 2001
From: gehring <clement.gehring@gmail.com>
Date: Thu, 13 Dec 2018 19:59:29 -0500
Subject: [PATCH 564/873] Fixed typo in comment about docker build argument.

---
 .../dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
index a61dfbbe54..0652ac4151 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-cpu.partial.Dockerfile
@@ -23,6 +23,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
  
 ENV CI_BUILD_PYTHON python
 
-# Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
+# Check out TensorFlow source code if --build-arg CHECKOUT_TF_SRC=1
 ARG CHECKOUT_TF_SRC=0
 RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
-- 
GitLab


From 352a08f34fa38b2618b10bd5ae3a6d25d667e73b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 17:05:31 -0800
Subject: [PATCH 565/873] Refactoring, and avoid calling function twice in
 CHECK_NN.

PiperOrigin-RevId: 225461929
---
 .../lite/delegates/nnapi/nnapi_delegate.cc    | 205 +++++-------------
 1 file changed, 54 insertions(+), 151 deletions(-)

diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index 4fe07004a8..7908bbf164 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -37,11 +37,15 @@ namespace {
 
 // TODO(b/80621585): Consider printing error string, but don't for now to
 // minimize binary size.
-#define CHECK_NN(context, code)                                           \
-  if (code != ANEURALNETWORKS_NO_ERROR) {                                 \
-    context->ReportError(context, "NN API returned error (%d).\n", code); \
-    return kTfLiteError;                                                  \
-  }
+#define CHECK_NN(context, code)                                               \
+  do {                                                                        \
+    const auto _code = (code);                                                \
+    if (_code != ANEURALNETWORKS_NO_ERROR) {                                  \
+      context->ReportError(context, "NN API returned error (%d, line %d).\n", \
+                           _code, __LINE__);                                  \
+      return kTfLiteError;                                                    \
+    }                                                                         \
+  } while (0)
 
 namespace {
 int32_t GetAndroidSdkVersion() {
@@ -349,19 +353,18 @@ class NNAPIOpBuilder {
     return kTfLiteOk;
   }
 
-  // TfLiteContext for error handling. Must be named context for macros to
-  // work.
-  TfLiteContext* context_;
+  // TfLiteContext for error handling.
+  TfLiteContext* const context_;
 
-  // Tracks relationship between indices
+  // Tracks relationship between indices.
   OperandMapping* operand_mapping_;
 
-  // The model
-  ANeuralNetworksModel* nn_model_;
+  // The NNAPI model.
+  ANeuralNetworksModel* const nn_model_;
 
   // Inputs and outputs for the current op. These are augmented in the sense
   // that NN API uses operands for all arguments, not just tensors, unlike
-  // TensorFlow lite.
+  // TensorFlow Lite.
   std::vector<uint32_t> augmented_inputs_;
   std::vector<uint32_t> augmented_outputs_;
 };
@@ -374,6 +377,14 @@ struct NNAPIOpMappingArgs {
   std::vector<int>* model_state_tfl_inputs;
 };
 
+// Mapping function simply returning the operation type without adding any
+// additional parameter.
+template <ANeuralNetworksOperationType OperationType>
+ANeuralNetworksOperationType BasicMappingFn(
+    const NNAPIOpMappingArgs& mapping_args) {
+  return OperationType;
+}
+
 // The kernel that represents the node sub set of TF Lite being run on NN API.
 class NNAPIDelegateKernel {
  public:
@@ -385,8 +396,8 @@ class NNAPIDelegateKernel {
   // Return a function that knows how to translate a node into its operands
   // when called. You can use this function to see if a node is supported
   // (i.e. that MappingFn is not nullptr).
-  MappingFn Map(TfLiteContext* context, int builtin_code, int version,
-                TfLiteNode* node) {
+  static MappingFn Map(TfLiteContext* context, int builtin_code, int version,
+                       TfLiteNode* node) {
     switch (builtin_code) {
       case kTfLiteBuiltinAdd:
         if (version == 1) {
@@ -397,8 +408,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_ADD;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinMul:
@@ -410,8 +419,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_MUL;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinAveragePool2d:
@@ -422,8 +429,6 @@ class NNAPIDelegateKernel {
                 mapping_args.node->builtin_data);
             return ANEURALNETWORKS_AVERAGE_POOL_2D;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinMaxPool2d:
@@ -434,8 +439,6 @@ class NNAPIDelegateKernel {
                 mapping_args.node->builtin_data);
             return ANEURALNETWORKS_MAX_POOL_2D;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinL2Pool2d:
@@ -446,8 +449,6 @@ class NNAPIDelegateKernel {
                 mapping_args.node->builtin_data);
             return ANEURALNETWORKS_L2_POOL_2D;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinConv2d:
@@ -469,8 +470,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_CONV_2D;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinDepthwiseConv2d:
@@ -487,8 +486,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_DEPTHWISE_CONV_2D;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinFullyConnected:
@@ -500,8 +497,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_FULLY_CONNECTED;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinSoftmax:
@@ -513,18 +508,11 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
             return ANEURALNETWORKS_SOFTMAX;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinReshape:
         if (version == 1 && node->inputs->size == 2) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_RESHAPE;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_RESHAPE>;
         }
         break;
       case kTfLiteBuiltinSqueeze:
@@ -540,20 +528,15 @@ class NNAPIDelegateKernel {
                 static_cast<uint32_t>(builtin->num_squeeze_dims));
             return ANEURALNETWORKS_SQUEEZE;
           };
-        } else {
-          return nullptr;
         }
+        break;
       case kTfLiteBuiltinL2Normalization: {
         auto builtin =
             reinterpret_cast<TfLiteL2NormParams*>(node->builtin_data);
-        if (builtin->activation != kTfLiteActNone) {
-          // NNAPI does not support activations
-          return nullptr;
+        if (builtin->activation == kTfLiteActNone) {
+          return BasicMappingFn<ANEURALNETWORKS_L2_NORMALIZATION>;
         }
-        return [](const NNAPIOpMappingArgs& mapping_args)
-                   -> ANeuralNetworksOperationType {
-          return ANEURALNETWORKS_L2_NORMALIZATION;
-        };
+        break;
       }
       case kTfLiteBuiltinLocalResponseNormalization:
         if (version == 1) {
@@ -567,10 +550,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarFloat32Operand(builtin->beta);
             return ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
           };
-        } else {
-          // TODO(miaowang): clean-up code and return early in the unsupported
-          // case.
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinLshProjection:
@@ -587,8 +566,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->type);
             return ANEURALNETWORKS_LSH_PROJECTION;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinConcatenation:
@@ -599,7 +576,7 @@ class NNAPIDelegateKernel {
             // NNAPI only support concatenating quantized tensor of the same
             // scale and offset.
             auto first_param = context->tensors[node->inputs->data[0]].params;
-            for (int i = 0; i < node->inputs->size; i++) {
+            for (int i = 1; i < node->inputs->size; i++) {
               auto curr_param = context->tensors[node->inputs->data[i]].params;
               if (curr_param.scale != first_param.scale ||
                   curr_param.zero_point != first_param.zero_point) {
@@ -614,68 +591,36 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->axis);
             return ANEURALNETWORKS_CONCATENATION;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinDequantize:
         if (version == 1) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_DEQUANTIZE;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_DEQUANTIZE>;
         }
         break;
       case kTfLiteBuiltinFloor:
         if (version == 1) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_FLOOR;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_FLOOR>;
         }
         break;
       case kTfLiteBuiltinRelu:
         if (version == 1) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_RELU;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_RELU>;
         }
         break;
       case kTfLiteBuiltinReluN1To1:
         if (version == 1) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_RELU1;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_RELU1>;
         }
         break;
       case kTfLiteBuiltinRelu6:
         if (version == 1) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_RELU6;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_RELU6>;
         }
         break;
       case kTfLiteBuiltinLogistic:
         if (version == 1) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_LOGISTIC;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_LOGISTIC>;
         }
         break;
       case kTfLiteBuiltinTanh:
@@ -683,12 +628,7 @@ class NNAPIDelegateKernel {
         if (version == 1 &&
             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
           // NNAPI only support float tanh.
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_TANH;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_TANH>;
         }
         break;
       case kTfLiteBuiltinSub:
@@ -702,8 +642,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_SUB;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinDiv:
@@ -717,8 +655,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_DIV;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinPad:
@@ -728,22 +664,12 @@ class NNAPIDelegateKernel {
           // NNAPI does not support specifying the padding value.
           // NNAPI pads physical zero for quantized tensors, so only delegate
           // float pad to NNAPI.
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_PAD;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_PAD>;
         }
         break;
       case kTfLiteBuiltinSpaceToBatchNd:
         if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_SPACE_TO_BATCH_ND;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_SPACE_TO_BATCH_ND>;
         }
         break;
       case kTfLiteBuiltinStridedSlice:
@@ -758,8 +684,6 @@ class NNAPIDelegateKernel {
                 builtin->shrink_axis_mask);
             return ANEURALNETWORKS_STRIDED_SLICE;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinTranspose:
@@ -771,12 +695,7 @@ class NNAPIDelegateKernel {
             (node->inputs->size > 1) &&
             (context->tensors[node->inputs->data[1]].allocation_type ==
              kTfLiteMmapRo)) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_TRANSPOSE;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_TRANSPOSE>;
         }
         break;
       case kTfLiteBuiltinRnn:
@@ -799,8 +718,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_RNN;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinSvdf:
@@ -827,8 +744,6 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(builtin->activation);
             return ANEURALNETWORKS_SVDF;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinLstm:
@@ -870,8 +785,6 @@ class NNAPIDelegateKernel {
 
             return ANEURALNETWORKS_LSTM;
           };
-        } else {
-          return nullptr;
         }
         break;
       case kTfLiteBuiltinMean:
@@ -888,36 +801,27 @@ class NNAPIDelegateKernel {
             mapping_args.builder->AddScalarInt32Operand(keep_dims);
             return ANEURALNETWORKS_MEAN;
           };
-        } else {
-          return nullptr;
         }
+        break;
       case kTfLiteBuiltinEmbeddingLookup:
         // NNAPI only support float32 values.
         if (version == 1 &&
             context->tensors[node->inputs->data[1]].type == kTfLiteFloat32) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_EMBEDDING_LOOKUP;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_EMBEDDING_LOOKUP>;
         }
         break;
       case kTfLiteBuiltinHashtableLookup:
         // NNAPI only support float32 output.
         if (version == 1 &&
             context->tensors[node->outputs->data[0]].type == kTfLiteFloat32) {
-          return [](const NNAPIOpMappingArgs& mapping_args)
-                     -> ANeuralNetworksOperationType {
-            return ANEURALNETWORKS_HASHTABLE_LOOKUP;
-          };
-        } else {
-          return nullptr;
+          return BasicMappingFn<ANEURALNETWORKS_HASHTABLE_LOOKUP>;
         }
         break;
       default:
+        // All other operators are not mapped.
         return nullptr;
     }
+    return nullptr;
   }
 
   // Initialize the kernel (a NN model).
@@ -1090,7 +994,7 @@ class NNAPIDelegateKernel {
     outputs.reserve(output_tensors->size);
 
     size_t total_input_byte_size = 0;
-    // Make the TensorFlow lite inputs and outputs to ann_indices.
+    // Make the TensorFlow Lite inputs and outputs to ann_indices.
     for (int i : TfLiteIntArrayView(input_tensors)) {
       // Constant tensors are not NNAPI inputs.
       if (i != kOptionalTensor &&
@@ -1149,12 +1053,14 @@ TfLiteDelegate* NnApiDelegate() {
           return kTfLiteOk;
         }
 
+        // Allocate one element in vector already since TensorFlow Lite uses
+        // the first value as the number of nodes. The actual value will be set
+        // later, after the vector has been filled.
         std::vector<int> supported_nodes(1);
         // We don't care about all nodes_, we only care about ones in the
         // current plan.
         TfLiteIntArray* plan;
         TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
-        int total_supported_nodes = 0;
 
         // Check for every node if it is supported
         // TODO(b/80625235): Fix this to do more careful checking of versioning.
@@ -1163,14 +1069,12 @@ TfLiteDelegate* NnApiDelegate() {
           TfLiteRegistration* registration;
           TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
               context, node_index, &node, &registration));
-          NNAPIDelegateKernel dummy_kernel;
-          if (dummy_kernel.Map(context, registration->builtin_code,
-                               registration->version, node)) {
+          if (NNAPIDelegateKernel::Map(context, registration->builtin_code,
+                                       registration->version, node)) {
             supported_nodes.push_back(node_index);
           }
-          total_supported_nodes += 1;
         }
-        // Put the size at the beginning of the array.
+        // First element in vector must be the number of actual nodes.
         supported_nodes[0] = supported_nodes.size() - 1;
 
         // NN API Delegate Registration (the pseudo kernel that will invoke NN
@@ -1208,11 +1112,10 @@ TfLiteDelegate* NnApiDelegate() {
 
         // Request TFLite to partition the graph and make kernels
         // for each independent node sub set a new nnapi_delegate_kernel.
-        context->ReplaceNodeSubsetsWithDelegateKernels(
+        return context->ReplaceNodeSubsetsWithDelegateKernels(
             context, nnapi_delegate_kernel,
             reinterpret_cast<TfLiteIntArray*>(supported_nodes.data()),
             delegate);
-        return kTfLiteOk;
       }};
 
   return &delegate;
-- 
GitLab


From 97fe55e68bca159f1e2e7d5d5925f560edeac24e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 17:06:24 -0800
Subject: [PATCH 566/873] Fix docstring of lookup ops.

PiperOrigin-RevId: 225462083
---
 tensorflow/contrib/lookup/lookup_ops.py | 8 ++++----
 tensorflow/python/ops/lookup_ops.py     | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py
index e52fb5ab14..229a72a780 100644
--- a/tensorflow/contrib/lookup/lookup_ops.py
+++ b/tensorflow/contrib/lookup/lookup_ops.py
@@ -91,7 +91,7 @@ def index_table_from_tensor(mapping,
   The bucket ID range is `[mapping size, mapping size + num_oov_buckets - 1]`.
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` or `table.initializer.run()` once.
+  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
 
   Elements in `mapping` cannot have duplicates, otherwise when executing the
   table initializer op, it will throw a `FailedPreconditionError`.
@@ -158,7 +158,7 @@ def string_to_index(tensor, mapping, default_value=-1, name=None):
   will throw a FailedPreconditionError.
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` once.
+  `session.run(tf.tables_initializer)` once.
 
   For example:
 
@@ -202,7 +202,7 @@ def index_to_string_table_from_tensor(mapping, default_value="UNK", name=None):
   (an out-of-vocabulary entry) is assigned the `default_value`
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` or `table.initializer.run()` once.
+  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
 
   Elements in `mapping` cannot have duplicates, otherwise when executing the
   table initializer op, it will throw a `FailedPreconditionError`.
@@ -257,7 +257,7 @@ def index_to_string(tensor, mapping, default_value="UNK", name=None):
   (an out-of-vocabulary entry) is assigned the `default_value`
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` once.
+  `session.run(tf.tables_initializer)` once.
 
   For example:
 
diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py
index 758cb8041d..e96c93c15c 100644
--- a/tensorflow/python/ops/lookup_ops.py
+++ b/tensorflow/python/ops/lookup_ops.py
@@ -948,7 +948,7 @@ def index_table_from_file(vocabulary_file=None,
   `[vocabulary size, vocabulary size + num_oov_buckets - 1]`.
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` or `table.init.run()` once.
+  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
 
   To specify multi-column vocabulary files, use key_column_index and
   value_column_index and delimiter.
@@ -1077,7 +1077,7 @@ def index_table_from_tensor(vocabulary_list,
   `[vocabulary list size, vocabulary list size + num_oov_buckets - 1]`.
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` or `table.init.run()` once.
+  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
 
   Elements in `vocabulary_list` cannot have duplicates, otherwise when executing
   the table initializer op, it will throw a `FailedPreconditionError`.
@@ -1179,7 +1179,7 @@ def index_to_string_table_from_file(vocabulary_file,
   (an out-of-vocabulary entry) is assigned the `default_value`
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` or `table.init.run()` once.
+  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
 
   To specify multi-column vocabulary files, use key_column_index and
   value_column_index and delimiter.
@@ -1276,7 +1276,7 @@ def index_to_string_table_from_tensor(vocabulary_list,
   (an out-of-vocabulary entry) is assigned the `default_value`
 
   The underlying table must be initialized by calling
-  `tf.tables_initializer.run()` or `table.init.run()` once.
+  `session.run(tf.tables_initializer)` or `session.run(table.init)` once.
 
   Elements in `vocabulary_list` cannot have duplicates, otherwise when executing
   the table initializer op, it will throw a `FailedPreconditionError`.
-- 
GitLab


From 2a6751569985494efaef42745e6649053fa288ad Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 13 Dec 2018 17:06:32 -0800
Subject: [PATCH 567/873] Make sure temp directory on windows has the correct
 path separator.

PiperOrigin-RevId: 225462105
---
 tensorflow/python/platform/googletest.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/python/platform/googletest.py b/tensorflow/python/platform/googletest.py
index 5b20e36a69..fe4b0d0d37 100644
--- a/tensorflow/python/platform/googletest.py
+++ b/tensorflow/python/platform/googletest.py
@@ -112,6 +112,9 @@ def GetTempDir():
                               os.path.basename(tf_inspect.getfile(first_frame)))
       temp_dir = tempfile.mkdtemp(prefix=temp_dir.rstrip('.py'))
 
+    # Make sure we have the correct path separators.
+    temp_dir = temp_dir.replace('/', os.sep)
+
     def delete_temp_dir(dirname=temp_dir):
       try:
         file_io.delete_recursively(dirname)
@@ -119,6 +122,7 @@ def GetTempDir():
         logging.error('Error removing %s: %s', dirname, e)
 
     atexit.register(delete_temp_dir)
+
     _googletest_temp_dir = temp_dir
 
   return _googletest_temp_dir
-- 
GitLab


From 09decf56ba4e9f45aabd3d048ef8624176a1c63a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 17:38:47 -0800
Subject: [PATCH 568/873] Microoptimizations of graph construction code.

Before:
Run on *********** (72 X 2993 MHz CPUs); 2018-12-13T16:09:43.471855971-08:00
CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB
Benchmark                 Time(ns)        CPU(ns)     Iterations
----------------------------------------------------------------
BM_GraphCreation/10/2        67142          86268           8252
BM_GraphCreation/64/2       138640         163264           4262
BM_GraphCreation/512/2      801036         837092            838
BM_GraphCreation/4k/2      7670132        7719032             89
BM_GraphCreation/32k/2    87954443       88133128              8
BM_GraphCreation/10/4        85895         106133           6589
BM_GraphCreation/64/4       176924         202943           3445
BM_GraphCreation/512/4     1092235        1124801            620
BM_GraphCreation/4k/4     10167172       10242199             68
BM_GraphCreation/32k/4   116535329      116863022              6
BM_GraphCreation/10/8       128276         152347           4595
BM_GraphCreation/64/8       290808         322147           2167
BM_GraphCreation/512/8     1995712        2040134            349
BM_GraphCreation/4k/8     17648175       17725397             39
BM_GraphCreation/32k/8   201791945      202232200              3
BM_GraphCreation/10/16      212183         240520           2909
BM_GraphCreation/64/16      474982         506036           1000
BM_GraphCreation/512/16    3590180        3641964            195
BM_GraphCreation/4k/16    32178292       32265093             22
BM_GraphCreation/32k/16  359809818      360593206              2

After:
Run on *********** (72 X 2993 MHz CPUs); 2018-12-13T16:48:26.030782518-08:00
CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB
Benchmark                 Time(ns)        CPU(ns)     Iterations
----------------------------------------------------------------
BM_GraphCreation/10/2        65638          84729           8276
BM_GraphCreation/64/2       130192         154173           4579
BM_GraphCreation/512/2      766354         802899            881
BM_GraphCreation/4k/2      6966973        7019842             98
BM_GraphCreation/32k/2    82443771       82643748              8
BM_GraphCreation/10/4        82697         102636           6743
BM_GraphCreation/64/4       171184         197236           3574
BM_GraphCreation/512/4     1000612        1030750            676
BM_GraphCreation/4k/4      9268842        9346867             74
BM_GraphCreation/32k/4   110080002      110330854              7
BM_GraphCreation/10/8       161076         181417           4764
BM_GraphCreation/64/8       300977         331782           2081
BM_GraphCreation/512/8     1781437        1829938            387
BM_GraphCreation/4k/8     16062834       16148914             44
BM_GraphCreation/32k/8   188352170      188727906              4
BM_GraphCreation/10/16      201874         229188           3049
BM_GraphCreation/64/16      445487         479042           1462
BM_GraphCreation/512/16    3173224        3224053            218
BM_GraphCreation/4k/16    29365146       29457557             24
BM_GraphCreation/32k/16  326978055      327510864              2
PiperOrigin-RevId: 225466082
---
 tensorflow/core/graph/edgeset.cc           |  5 ++---
 tensorflow/core/graph/graph_constructor.cc | 18 +++++++++---------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/graph/edgeset.cc b/tensorflow/core/graph/edgeset.cc
index 2e0c671461..e3b88994b5 100644
--- a/tensorflow/core/graph/edgeset.cc
+++ b/tensorflow/core/graph/edgeset.cc
@@ -38,9 +38,8 @@ std::pair<EdgeSet::const_iterator, bool> EdgeSet::insert(value_type value) {
     }
     // array is full. convert to set.
     s = new std::set<const Edge*>;
-    for (int i = 0; i < kInline; i++) {
-      s->insert(static_cast<const Edge*>(ptrs_[i]));
-    }
+    s->insert(reinterpret_cast<const Edge**>(std::begin(ptrs_)),
+              reinterpret_cast<const Edge**>(std::end(ptrs_)));
     ptrs_[0] = this;
     ptrs_[1] = s;
     // fall through.
diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc
index f6d83d5f6f..ac1b690df3 100644
--- a/tensorflow/core/graph/graph_constructor.cc
+++ b/tensorflow/core/graph/graph_constructor.cc
@@ -35,6 +35,8 @@ limitations under the License.
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/gtl/flatmap.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@@ -268,22 +270,20 @@ class GraphConstructor {
     int gdef_index;
     Node* node;  // nullptr until the NodeDef is converted to a Node.
   };
-  // TODO(vrv): Profile this data structure to see if we should use an
-  // alternative implementation of std::unordered_map.
-  std::unordered_map<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
+  gtl::FlatMap<StringPiece, NodeInfo, StringPieceHasher> gdef_nodes_;
 
   // Prefixes already used in the GraphDef being imported.
-  std::unordered_set<StringPiece, StringPieceHasher> gdef_prefixes_;
+  gtl::FlatSet<StringPiece, StringPieceHasher> gdef_prefixes_;
 
   // Mapping from node name to the existing node in g_.
-  std::unordered_map<StringPiece, Node*, StringPieceHasher> existing_nodes_;
+  gtl::FlatMap<StringPiece, Node*, StringPieceHasher> existing_nodes_;
 
   // Prefixes already used in the graph.
-  std::unordered_set<StringPiece, StringPieceHasher> existing_prefixes_;
+  gtl::FlatSet<StringPiece, StringPieceHasher> existing_prefixes_;
 
   // Imported node names that have been uniquified. The key is the original
   // name, the value is the new unique name.
-  std::unordered_map<string, string> uniquified_names_;
+  gtl::FlatMap<string, string> uniquified_names_;
 
   // Index of NodeDefs in node_defs_ with all inputs already converted. We use a
   // (sorted) set so nodes are created in the order defined in the GraphDef.
@@ -360,7 +360,7 @@ bool NodeNameInValues(const std::vector<string>& control_dependencies,
 // Adds any prefixes of `node_name` (not including the full name itself) to
 // `prefixes`.
 void AddPrefixes(StringPiece node_name,
-                 std::unordered_set<StringPiece, StringPieceHasher>* prefixes) {
+                 gtl::FlatSet<StringPiece, StringPieceHasher>* prefixes) {
   size_t idx = -1;
   while ((idx = node_name.find('/', idx + 1)) != StringPiece::npos) {
     prefixes->insert(node_name.substr(0, idx));
@@ -857,7 +857,7 @@ void GraphConstructor::UpdateUniquifiedColocationNames() {
     for (int i = 0; i < coloc_values.size(); ++i) {
       StringPiece val(coloc_values[i]);
       if (str_util::ConsumePrefix(&val, kColocationGroupPrefix)) {
-        const auto& name_pair = uniquified_names_.find(string(val));
+        auto name_pair = uniquified_names_.find(string(val));
         if (name_pair == uniquified_names_.end()) continue;
         updated = true;
         coloc_values[i] =
-- 
GitLab


From 45a6f48469023f1c7615f80e6d026f85350fa051 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Thu, 13 Dec 2018 18:03:31 -0800
Subject: [PATCH 569/873] Don't do VLOG(0) in mutable_graph_view

PiperOrigin-RevId: 225468686
---
 tensorflow/core/grappler/mutable_graph_view.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/mutable_graph_view.cc b/tensorflow/core/grappler/mutable_graph_view.cc
index 1a4754153b..09268f8e24 100644
--- a/tensorflow/core/grappler/mutable_graph_view.cc
+++ b/tensorflow/core/grappler/mutable_graph_view.cc
@@ -68,7 +68,7 @@ void MutableGraphView::UpdateFanouts(absl::string_view from_node,
 }
 
 void MutableGraphView::UpdateFanouts(NodeDef* from_node, NodeDef* to_node) {
-  VLOG(0) << absl::Substitute("Update fanouts from '$0' to '$1'.",
+  VLOG(2) << absl::Substitute("Update fanouts from '$0' to '$1'.",
                               from_node->name(), to_node->name());
 
   // Update internal state with the new output_port->input_port edge.
-- 
GitLab


From 4b63e8c3662d5890548d38bc3fd2fdd926ff15e0 Mon Sep 17 00:00:00 2001
From: Siju <siju.samuel@huawei.com>
Date: Fri, 14 Dec 2018 08:35:11 +0530
Subject: [PATCH 570/873] Update custom_operators.md

Documentation issue
---
 tensorflow/lite/g3doc/custom_operators.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/g3doc/custom_operators.md b/tensorflow/lite/g3doc/custom_operators.md
index 4a22d6a675..bb7fbd9cfd 100644
--- a/tensorflow/lite/g3doc/custom_operators.md
+++ b/tensorflow/lite/g3doc/custom_operators.md
@@ -137,7 +137,7 @@ operations instead of a single operator.
 
 ## Special TF Graph Attributes
 
-When Toco convertes a TF graph into TFLite format, it makes some assumption
+When Toco converts a TF graph into TFLite format, it makes some assumption
 about custom operations that might be not correct. In this case, the generated
 graph can be not executable.
 
-- 
GitLab


From fbb933775656555495e53fefc46b700bc9e6a47b Mon Sep 17 00:00:00 2001
From: Siju <siju.samuel@huawei.com>
Date: Fri, 14 Dec 2018 08:36:18 +0530
Subject: [PATCH 571/873] Update RELEASE.md

Documentation issue
---
 RELEASE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE.md b/RELEASE.md
index 32abdcea49..ddd83cd12b 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -289,7 +289,7 @@ Ag Ramesh, Alex Wiltschko, Alexander Pantyukhin, Amogh Mannekote, An Jiaoyang, A
     [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details
 * `tf.data`:
   * `Dataset.from_generator()` now accepts an `args` list, in order to create nested generators.
-  * `Dataset.list_files()` now produces determinstic results when `shuffle=False` or a `seed` is passed.
+  * `Dataset.list_files()` now produces deterministic results when `shuffle=False` or a `seed` is passed.
   * `tf.contrib.data.sample_from_datasets()` and `tf.contrib.data.choose_from_datasets()` make it easier to sample or deterministically choose elements from multiple datasets.
   * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings, and two infrequently used arguments removed.
   * (C++) `DatasetBase::DebugString()` is now `const`.
-- 
GitLab


From 0b0dea8cf10801e8786008bccf3269a6776ca811 Mon Sep 17 00:00:00 2001
From: Andy Ly <lyandy@google.com>
Date: Thu, 13 Dec 2018 19:03:48 -0800
Subject: [PATCH 572/873] [Grappler] Add node fanin mutations in
 MutableGraphView.

PiperOrigin-RevId: 225474536
---
 tensorflow/core/grappler/BUILD                |   6 +-
 .../core/grappler/mutable_graph_view.cc       | 206 +++++++++-
 tensorflow/core/grappler/mutable_graph_view.h |  61 ++-
 .../core/grappler/mutable_graph_view_test.cc  | 352 ++++++++++++++++++
 4 files changed, 619 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index f353d789d4..6e3012000f 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -176,12 +176,14 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":graph_view",
-        ":grappler_item",
+        ":op_types",
         ":utils",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
     ],
 )
 
@@ -191,7 +193,9 @@ tf_cc_test(
     deps = [
         ":grappler_item",
         ":mutable_graph_view",
+        ":utils",
         "//tensorflow/cc:cc_ops",
+        "//tensorflow/core:graph",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
diff --git a/tensorflow/core/grappler/mutable_graph_view.cc b/tensorflow/core/grappler/mutable_graph_view.cc
index 09268f8e24..224b720328 100644
--- a/tensorflow/core/grappler/mutable_graph_view.cc
+++ b/tensorflow/core/grappler/mutable_graph_view.cc
@@ -14,14 +14,32 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/core/grappler/mutable_graph_view.h"
+
+#include <algorithm>
+#include <utility>
+
 #include "absl/strings/str_cat.h"
 #include "absl/strings/substitute.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/tensor_id.h"
+#include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace grappler {
 
+namespace {
+
+bool IsTensorIdPortValid(const TensorId& tensor_id) {
+  return tensor_id.index() >= Graph::kControlSlot;
+}
+
+}  // namespace
+
 const absl::flat_hash_set<MutableGraphView::InputPort>&
 MutableGraphView::GetFanout(const GraphView::OutputPort& port) const {
   return GetFanout(MutableGraphView::OutputPort(const_cast<NodeDef*>(port.node),
@@ -160,17 +178,201 @@ void MutableGraphView::UpdateFanouts(NodeDef* from_node, NodeDef* to_node) {
   }
 }
 
+bool MutableGraphView::AddFanin(NodeDef* node, const TensorId& fanin) {
+  NodeDef* fanin_node = GetNode(fanin.node());
+  if (fanin_node == nullptr) {
+    return false;
+  }
+
+  int num_non_controlling_fanins =
+      NumFanins(*node, /*include_controlling_nodes=*/false);
+  InputPort input;
+  input.node = node;
+  input.port_id = fanin.index() == Graph::kControlSlot
+                      ? Graph::kControlSlot
+                      : num_non_controlling_fanins;
+
+  OutputPort fanin_port(fanin_node, fanin.index());
+
+  if (!gtl::InsertIfNotPresent(&fanouts()[fanin_port], input)) {
+    return false;
+  }
+  node->add_input(TensorIdToString(fanin));
+  if (fanin.index() > Graph::kControlSlot) {
+    int node_input_size = node->input_size() - 1;
+    // If there are control dependencies in node, move newly inserted fanin to
+    // be before such control dependencies.
+    if (num_non_controlling_fanins < node_input_size) {
+      node->mutable_input()->SwapElements(node_input_size,
+                                          num_non_controlling_fanins);
+    }
+  }
+  return true;
+}
+
+bool MutableGraphView::AddFanin(absl::string_view node_name,
+                                const TensorId& fanin) {
+  if (!IsTensorIdPortValid(fanin)) {
+    return false;
+  }
+  NodeDef* node = GetNode(node_name);
+  if (node == nullptr) {
+    return false;
+  }
+  return AddFanin(node, fanin);
+}
+
+bool MutableGraphView::RemoveFanins(NodeDef* node,
+                                    absl::Span<const TensorId> fanins) {
+  bool modified = false;
+  auto mutable_inputs = node->mutable_input();
+  int curr_pos = 0;
+  int num_inputs = node->input_size();
+  for (int i = 0; i < num_inputs; ++i) {
+    TensorId tensor_id = ParseTensorName(node->input(i));
+    bool remove_fanin =
+        std::find(fanins.begin(), fanins.end(), tensor_id) != fanins.end();
+    bool update_fanin = !remove_fanin && modified;
+    if (remove_fanin || update_fanin) {
+      OutputPort fanin(nodes()[tensor_id.node()], tensor_id.index());
+
+      InputPort input;
+      input.node = node;
+      input.port_id =
+          tensor_id.index() == Graph::kControlSlot ? Graph::kControlSlot : i;
+
+      if (remove_fanin) {
+        fanouts()[fanin].erase(input);
+      } else {
+        // Shift inputs to be retained.
+        if (tensor_id.index() > Graph::kControlSlot) {
+          fanouts()[fanin].erase(input);
+          fanouts()[fanin].insert(InputPort(node, i));
+        }
+        mutable_inputs->SwapElements(i, curr_pos++);
+      }
+
+      modified = true;
+    } else {
+      // Skip inputs to be retained until first modification.
+      curr_pos++;
+    }
+  }
+  if (modified) {
+    mutable_inputs->DeleteSubrange(curr_pos, num_inputs - curr_pos);
+  }
+  return modified;
+}
+
+bool MutableGraphView::RemoveFanin(absl::string_view node_name,
+                                   const TensorId& fanin) {
+  if (!IsTensorIdPortValid(fanin)) {
+    return false;
+  }
+  NodeDef* node = GetNode(node_name);
+  if (node == nullptr) {
+    return false;
+  }
+  return RemoveFanins(node, {fanin});
+}
+
+bool MutableGraphView::RemoveAllFanins(absl::string_view node_name,
+                                       bool keep_controlling_fanins) {
+  NodeDef* node = GetNode(node_name);
+  if (node == nullptr || node->input().empty()) {
+    return false;
+  }
+  RemoveFaninsInternal(node, keep_controlling_fanins);
+  if (keep_controlling_fanins) {
+    int num_non_controlling_fanins =
+        NumFanins(*node, /*include_controlling_nodes=*/false);
+    if (num_non_controlling_fanins == 0) {
+      return false;
+    } else if (num_non_controlling_fanins < node->input_size()) {
+      node->mutable_input()->DeleteSubrange(0, num_non_controlling_fanins);
+    } else {
+      node->clear_input();
+    }
+  } else {
+    node->clear_input();
+  }
+  return true;
+}
+
+bool MutableGraphView::UpdateFanin(absl::string_view node_name,
+                                   const TensorId& from_fanin,
+                                   const TensorId& to_fanin) {
+  if (from_fanin == to_fanin || !IsTensorIdPortValid(from_fanin) ||
+      !IsTensorIdPortValid(to_fanin)) {
+    return false;
+  }
+  NodeDef* node = GetNode(node_name);
+  if (node == nullptr) {
+    return false;
+  }
+
+  bool is_from_fanin_control = from_fanin.index() == Graph::kControlSlot;
+  bool is_to_fanin_control = to_fanin.index() == Graph::kControlSlot;
+  // When replacing a non control dependency fanin with a control dependency, or
+  // vice versa, remove and add, so ports can be updated properly in fanout(s).
+  if (is_from_fanin_control || is_to_fanin_control) {
+    bool modified = RemoveFanins(node, {from_fanin});
+    if (!HasFanin(*node, to_fanin)) {
+      modified |= AddFanin(node, to_fanin);
+    }
+    return modified;
+  }
+
+  // In place mutation, requires no shifting of ports.
+  NodeDef* from_fanin_node = GetNode(from_fanin.node());
+  NodeDef* to_fanin_node = GetNode(to_fanin.node());
+  if (from_fanin_node == nullptr || to_fanin_node == nullptr) {
+    return false;
+  }
+
+  string to_fanin_string = TensorIdToString(to_fanin);
+  int num_inputs = node->input_size();
+  bool modified = false;
+  for (int i = 0; i < num_inputs; ++i) {
+    if (ParseTensorName(node->input(i)) == from_fanin) {
+      OutputPort from_fanin_port(from_fanin_node, from_fanin.index());
+      InputPort old_input;
+      old_input.node = node;
+      old_input.port_id =
+          from_fanin.index() == Graph::kControlSlot ? Graph::kControlSlot : i;
+      fanouts()[from_fanin_port].erase(old_input);
+
+      OutputPort to_fanin_port(to_fanin_node, to_fanin.index());
+      InputPort new_input;
+      new_input.node = node;
+      new_input.port_id =
+          to_fanin.index() == Graph::kControlSlot ? Graph::kControlSlot : i;
+      fanouts()[to_fanin_port].insert(new_input);
+
+      node->set_input(i, to_fanin_string);
+      modified = true;
+    }
+  }
+
+  return modified;
+}
+
 void MutableGraphView::DeleteNodes(const std::set<string>& nodes_to_delete) {
   for (const string& node_name_to_delete : nodes_to_delete)
-    RemoveFanouts(nodes().at(node_name_to_delete));
+    RemoveFaninsInternal(nodes().at(node_name_to_delete),
+                         /*keep_controlling_fanins=*/false);
   for (const string& node_name_to_delete : nodes_to_delete)
     nodes().erase(node_name_to_delete);
   EraseNodesFromGraph(nodes_to_delete, graph());
 }
 
-void MutableGraphView::RemoveFanouts(NodeDef* deleted_node) {
+void MutableGraphView::RemoveFaninsInternal(NodeDef* deleted_node,
+                                            bool keep_controlling_fanins) {
   for (int i = 0; i < deleted_node->input_size(); ++i) {
     TensorId tensor_id = ParseTensorName(deleted_node->input(i));
+    if (keep_controlling_fanins && tensor_id.index() < 0) {
+      break;
+    }
     OutputPort fanin(nodes()[tensor_id.node()], tensor_id.index());
 
     InputPort input;
diff --git a/tensorflow/core/grappler/mutable_graph_view.h b/tensorflow/core/grappler/mutable_graph_view.h
index 355dd6c491..8025b8ca77 100644
--- a/tensorflow/core/grappler/mutable_graph_view.h
+++ b/tensorflow/core/grappler/mutable_graph_view.h
@@ -16,7 +16,17 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_GRAPPLER_MUTABLE_GRAPH_VIEW_H_
 #define TENSORFLOW_CORE_GRAPPLER_MUTABLE_GRAPH_VIEW_H_
 
+#include <set>
+#include <string>
+
+#include "absl/container/flat_hash_set.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/grappler/graph_view.h"
+#include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
 namespace grappler {
@@ -60,6 +70,38 @@ class MutableGraphView : public internal::GraphViewInternal<GraphDef, NodeDef> {
   //   2. foo2(new_bar:1, other:1)
   void UpdateFanouts(absl::string_view from_node, absl::string_view to_node);
 
+  // Add fanin to node `node_name`. If the node or fanin do not exist in the
+  // graph, nothing will be modified in the graph. If fanin is a control
+  // dependency, existing control dependencies will be checked first before
+  // adding. Otherwise fanin will be added after existing non control dependency
+  // inputs.
+  //
+  // This will return true iff the node is modified. If a control dependency
+  // already exists, the node will not be modified.
+  bool AddFanin(absl::string_view node_name, const TensorId& fanin);
+
+  // Remove fanin from node `node_name`. If the node or fanin do not exist in
+  // the graph, nothing will be modified in the graph. If there are multiple
+  // inputs that match the fanin, all of them will be removed.
+  //
+  // This will return true iff the node is modified. If no inputs match the
+  // fanin, the node will not be modified.
+  bool RemoveFanin(absl::string_view node_name, const TensorId& fanin);
+
+  // Remove all fanins from node `node_name`. Control dependencies will be
+  // retained if keep_controlling_fanins is true.
+  //
+  // This will return true iff the node is modified.
+  bool RemoveAllFanins(absl::string_view node_name,
+                       bool keep_controlling_fanins);
+
+  // Replace all fanins `from_fanin` with `to_fanin` in node `node_name`. If
+  // the fanins or node do not exist, nothing will be modified in the graph.
+  //
+  // This will return true iff the node is modified.
+  bool UpdateFanin(absl::string_view node_name, const TensorId& from_fanin,
+                   const TensorId& to_fanin);
+
   // Deletes nodes from the graph.
   void DeleteNodes(const std::set<string>& nodes_to_delete);
 
@@ -79,9 +121,22 @@ class MutableGraphView : public internal::GraphViewInternal<GraphDef, NodeDef> {
   // behavior is undefined.
   void UpdateFanouts(NodeDef* from_node, NodeDef* to_node);
 
-  // Remove fanouts of the deleted node from internal state (including control
-  // dependencies).
-  void RemoveFanouts(NodeDef* deleted_node);
+  // Remove fanins of the deleted node from internal state. Control dependencies
+  // are retained iff keep_controlling_fanins is true.
+  void RemoveFaninsInternal(NodeDef* deleted_node,
+                            bool keep_controlling_fanins);
+
+  // Add fanin to node. If the node or fanin do not exist in the graph, nothing
+  // will be modified in the graph. If fanin is a control dependency, existing
+  // control dependencies will be checked first before adding. Otherwise fanin
+  // will be added after existing non control dependency inputs.
+  //
+  // This will return true iff the node is modified. If a control dependency
+  // already exists, the node will not be modified.
+  bool AddFanin(NodeDef* node, const TensorId& fanin);
+
+  // Remove any fanin in node that matches to a fanin in fanins.
+  bool RemoveFanins(NodeDef* node, absl::Span<const TensorId> fanins);
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/mutable_graph_view_test.cc b/tensorflow/core/grappler/mutable_graph_view_test.cc
index c1b3f8c01c..cd7e638595 100644
--- a/tensorflow/core/grappler/mutable_graph_view_test.cc
+++ b/tensorflow/core/grappler/mutable_graph_view_test.cc
@@ -16,8 +16,10 @@ limitations under the License.
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
+#include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/platform/test.h"
 
 namespace tensorflow {
@@ -110,6 +112,356 @@ TEST(MutableGraphViewTest, AddAndUpdateFanoutsWithoutSelfLoops) {
   EXPECT_EQ(new_bar_fanouts.count(MutableGraphView::InputPort(foo, -1)), 1);
 }
 
+GraphDef SimpleMutateFaninGraph() {
+  // Actual node.op() is not important in this test.
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("b", "NotImportant", {}, {}),
+       NDef("c", "NotImportant", {}, {}), NDef("d", "NotImportant", {}, {}),
+       NDef("foo_1", "NotImportant", {"a"}),
+       NDef("foo_2", "NotImportant", {"b", "^a", "^c"}),
+       NDef("foo_3", "NotImportant", {"b", "a:1", "a:1"}),
+       NDef("foo_4", "NotImportant", {"a", "b:2", "b:2", "^c", "^d"}),
+       NDef("foo_5", "NotImportant", {}),
+       NDef("foo_6", "NotImportant", {"^a", "^b"})},
+      /*funcs=*/{});
+  return graph_def;
+}
+
+void CompareNodeInputs(const MutableGraphView& graph, const NodeDef* expected,
+                       NodeDef* actual) {
+  ASSERT_EQ(actual->input_size(), expected->input_size());
+  int port;
+  for (int i = 0; i < actual->input_size(); ++i) {
+    EXPECT_EQ(actual->input(i), expected->input(i));
+    TensorId tensor_id = ParseTensorName(expected->input(i));
+    if (tensor_id.index() == Graph::kControlSlot) {
+      port = Graph::kControlSlot;
+    } else {
+      port = i;
+    }
+    MutableGraphView::InputPort input_port(actual, port);
+    MutableGraphView::OutputPort output_port =
+        graph.GetOutputPort(tensor_id.node(), tensor_id.index());
+    EXPECT_EQ(graph.GetFanin(input_port).contains(output_port), true);
+    EXPECT_EQ(graph.GetFanout(output_port).contains(input_port), true);
+  }
+}
+
+void TestAddFanin(absl::string_view node_name, const TensorId& fanin_to_add,
+                  bool modified, const NodeDef* expected_node) {
+  GraphDef graph_def = SimpleMutateFaninGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  auto node = graph.GetNode(node_name);
+  if (expected_node == nullptr) {
+    EXPECT_EQ(node, nullptr);
+  } else {
+    EXPECT_NE(node, nullptr);
+  }
+
+  EXPECT_EQ(modified, graph.AddFanin(node_name, fanin_to_add));
+  if (expected_node != nullptr) {
+    CompareNodeInputs(graph, expected_node, node);
+  }
+}
+
+TEST(MutableGraphViewTest, AddFanin) {
+  NodeDef expected_node;
+  // Add input to node with 1 input 0 controls.
+  expected_node = NDef("", "", {"a", "b:1"});
+  TestAddFanin("foo_1", {"b", 1}, /*modified=*/true, &expected_node);
+  // Add input to node with multiple inputs and 0 controls.
+  expected_node = NDef("", "", {"b", "a:1", "a:1", "b:2"});
+  TestAddFanin("foo_3", {"b", 2}, /*modified=*/true, &expected_node);
+  // Add input to node with 1 input multiple controls.
+  expected_node = NDef("", "", {"b", "a", "^c", "^a"});
+  TestAddFanin("foo_2", {"a", 0}, /*modified=*/true, &expected_node);
+  // Add input to node with multiple inputs and controls.
+  expected_node = NDef("", "", {"a", "b:2", "b:2", "a:1", "^d", "^c"});
+  TestAddFanin("foo_4", {"a", 1}, /*modified=*/true, &expected_node);
+  // Add input to node with 0 inputs 0 controls.
+  expected_node = NDef("", "", {"a:1"});
+  TestAddFanin("foo_5", {"a", 1}, /*modified=*/true, &expected_node);
+  // Add input to node with 0 inputs multiple controls.
+  expected_node = NDef("", "", {"c:1", "^b", "^a"});
+  TestAddFanin("foo_6", {"c", 1}, /*modified=*/true, &expected_node);
+
+  // Add control to node with 1 input 0 controls.
+  expected_node = NDef("", "", {"a", "^b"});
+  TestAddFanin("foo_1", {"b", Graph::kControlSlot}, /*modified=*/true,
+               &expected_node);
+  // Add control to node with multiple inputs and 0 controls.
+  expected_node = NDef("", "", {"b", "a:1", "a:1", "^c"});
+  TestAddFanin("foo_3", {"c", Graph::kControlSlot}, /*modified=*/true,
+               &expected_node);
+  // Add control to node with 1 input multiple controls.
+  expected_node = NDef("", "", {"b", "^a", "^c", "^d"});
+  TestAddFanin("foo_2", {"d", Graph::kControlSlot}, /*modified=*/true,
+               &expected_node);
+  // Add control to node with multiple input multiple controls.
+  expected_node = NDef("", "", {"a", "b:2", "b:2", "^c", "^d", "^a"});
+  TestAddFanin("foo_4", {"a", Graph::kControlSlot}, /*modified=*/true,
+               &expected_node);
+  // Add control to node with 0 inputs 0 controls.
+  expected_node = NDef("", "", {"^a"});
+  TestAddFanin("foo_5", {"a", Graph::kControlSlot}, /*modified=*/true,
+               &expected_node);
+  // Add control to node with 0 inputs multiple controls.
+  expected_node = NDef("", "", {"^a", "^b", "^c"});
+  TestAddFanin("foo_6", {"c", Graph::kControlSlot}, /*modified=*/true,
+               &expected_node);
+  // Add control to node with control that already exists.
+  expected_node = NDef("", "", {"b", "^a", "^c"});
+  TestAddFanin("foo_2", {"a", Graph::kControlSlot}, /*modified=*/false,
+               &expected_node);
+
+  // Add fanin to node where node is missing.
+  TestAddFanin("foo_missing", {"a", 0}, /*modified=*/false, nullptr);
+  // Add fanin to node where fanin is missing.
+  expected_node = NDef("", "", {"a"});
+  TestAddFanin("foo_1", {"bar_missing", 0}, /*modified=*/false, &expected_node);
+  // Add fanin to node where node and fanin are missing.
+  TestAddFanin("foo_missing", {"bar_missing", 0}, /*modified=*/false,
+               /*expected_node=*/nullptr);
+}
+
+void CheckFanout(const MutableGraphView& graph, const TensorId& fanin,
+                 absl::string_view node_name) {
+  MutableGraphView::OutputPort output_port =
+      graph.GetOutputPort(fanin.node(), fanin.index());
+  auto fanouts = graph.GetFanout(output_port);
+  for (auto fanout : fanouts) {
+    EXPECT_NE(fanout.node->name(), fanin.node());
+  }
+}
+
+void TestRemoveFanin(absl::string_view node_name,
+                     const TensorId& fanin_to_remove, bool modified,
+                     const NodeDef* expected_node) {
+  GraphDef graph_def = SimpleMutateFaninGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  auto node = graph.GetNode(node_name);
+  if (expected_node == nullptr) {
+    EXPECT_EQ(nullptr, node);
+  } else {
+    EXPECT_NE(nullptr, node);
+  }
+
+  EXPECT_EQ(modified, graph.RemoveFanin(node_name, fanin_to_remove));
+  if (expected_node != nullptr) {
+    CompareNodeInputs(graph, expected_node, node);
+    if (modified) {
+      CheckFanout(graph, fanin_to_remove, node_name);
+    }
+  }
+}
+
+TEST(MutableGraphViewTest, RemoveFanin) {
+  NodeDef expected_node;
+  // Remove input from node with 1 input 0 controls.
+  expected_node = NDef("", "", {});
+  TestRemoveFanin("foo_1", {"a", 0}, /*modified=*/true, &expected_node);
+  // Remove input from node with multiple inputs and 0 controls.
+  expected_node = NDef("", "", {"b"});
+  TestRemoveFanin("foo_3", {"a", 1}, /*modified=*/true, &expected_node);
+  // Remove input from node with 1 input multiple controls.
+  expected_node = NDef("", "", {"^a", "^c"});
+  TestRemoveFanin("foo_2", {"b", 0}, /*modified=*/true, &expected_node);
+  // Remove input from node with multiple inputs and controls.
+  expected_node = NDef("", "", {"a", "^c", "^d"});
+  TestRemoveFanin("foo_4", {"b", 2}, /*modified=*/true, &expected_node);
+
+  // Remove control from node with 1 input multiple controls.
+  expected_node = NDef("", "", {"b", "^c"});
+  TestRemoveFanin("foo_2", {"a", Graph::kControlSlot}, /*modified=*/true,
+                  &expected_node);
+  // Remove control from node with multiple input multiple controls.
+  expected_node = NDef("", "", {"a", "b:2", "b:2", "^c"});
+  TestRemoveFanin("foo_4", {"d", Graph::kControlSlot}, /*modified=*/true,
+                  &expected_node);
+  // Remove control from node with 0 inputs multiple controls.
+  expected_node = NDef("", "", {"^b"});
+  TestRemoveFanin("foo_6", {"a", Graph::kControlSlot}, /*modified=*/true,
+                  &expected_node);
+
+  // Remove input from node with 0 inputs 0 controls.
+  expected_node = NDef("", "", {});
+  TestRemoveFanin("foo_5", {"a", 1}, /*modified=*/false, &expected_node);
+  // Remove input from node with 0 inputs multiple controls.
+  expected_node = NDef("", "", {"^a", "^b"});
+  TestRemoveFanin("foo_6", {"a", 1}, /*modified=*/false, &expected_node);
+  // Remove control from node with 1 input 0 controls.
+  expected_node = NDef("", "", {"a"});
+  TestRemoveFanin("foo_1", {"b", Graph::kControlSlot}, /*modified=*/false,
+                  &expected_node);
+  // Remove control from node with multiple inputs and 0 controls.
+  expected_node = NDef("", "", {"b", "a:1", "a:1"});
+  TestRemoveFanin("foo_3", {"c", Graph::kControlSlot}, /*modified=*/false,
+                  &expected_node);
+  // Remove control from node with 0 inputs 0 controls.
+  expected_node = NDef("", "", {});
+  TestRemoveFanin("foo_5", {"a", Graph::kControlSlot}, /*modified=*/false,
+                  &expected_node);
+
+  // Remove fanin from node where node is missing.
+  TestRemoveFanin("foo_missing", {"a", 0}, /*modified=*/false,
+                  /*expected_node=*/nullptr);
+  // Remove fanin from node where fanin is missing.
+  expected_node = NDef("", "", {"a"});
+  TestRemoveFanin("foo_1", {"bar_missing", 0}, /*modified=*/false,
+                  &expected_node);
+  // Remove fanin from node where node and fanin are missing.
+  TestRemoveFanin("foo_missing", {"bar_missing", 0}, /*modified=*/false,
+                  /*expected_node=*/nullptr);
+}
+
+void TestRemoveAllFanins(absl::string_view node_name,
+                         bool keep_controlling_nodes, bool modified,
+                         const NodeDef* expected_node) {
+  GraphDef graph_def = SimpleMutateFaninGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  auto node = graph.GetNode(node_name);
+  absl::flat_hash_set<string> fanin_strings;
+  if (expected_node == nullptr) {
+    EXPECT_EQ(node, nullptr);
+  } else {
+    EXPECT_NE(node, nullptr);
+    fanin_strings.insert(node->input().begin(), node->input().end());
+  }
+
+  EXPECT_EQ(modified, graph.RemoveAllFanins(node_name, keep_controlling_nodes));
+  if (expected_node != nullptr) {
+    CompareNodeInputs(graph, expected_node, node);
+    if (modified) {
+      TensorId tensor_id;
+      auto retained_inputs = absl::flat_hash_set<string>(node->input().begin(),
+                                                         node->input().end());
+      for (const string& fanin : fanin_strings) {
+        if (!retained_inputs.contains(fanin)) {
+          tensor_id = ParseTensorName(fanin);
+          CheckFanout(graph, tensor_id, node_name);
+        }
+      }
+    }
+  }
+}
+
+TEST(MutableGraphViewTest, RemoveAllFanins) {
+  NodeDef expected_node;
+  // Remove all fanins from node with no control dependencies.
+  expected_node = NDef("", "", {});
+  TestRemoveAllFanins("foo_3", /*keep_controlling_nodes=*/false,
+                      /*modified=*/true, &expected_node);
+  // Remove all fanins from node with control dependencies.
+  TestRemoveAllFanins("foo_4", /*keep_controlling_nodes=*/false,
+                      /*modified=*/true, &expected_node);
+
+  // Remove all fanins from node with no control dependencies and preserve
+  // control dependencies.
+  TestRemoveAllFanins("foo_3", /*keep_controlling_nodes=*/true,
+                      /*modified=*/true, &expected_node);
+  // Remove all fanins from node with control dependencies and preserve control
+  // dependencies.
+  expected_node = NDef("", "", {"^c", "^d"});
+  TestRemoveAllFanins("foo_4", /*keep_controlling_nodes=*/true,
+                      /*modified=*/true, &expected_node);
+
+  // Remove all fanins from node with no fanins.
+  expected_node = NDef("", "", {});
+  TestRemoveAllFanins("foo_5", /*keep_controlling_nodes=*/false,
+                      /*modified=*/false, &expected_node);
+  TestRemoveAllFanins("foo_5", /*keep_controlling_nodes=*/true,
+                      /*modified=*/false, &expected_node);
+
+  // Remove all fanins from node with only control dependencies.
+  TestRemoveAllFanins("foo_6", /*keep_controlling_nodes=*/false,
+                      /*modified=*/true, &expected_node);
+  expected_node = NDef("", "", {"^a", "^b"});
+  TestRemoveAllFanins("foo_6", /*keep_controlling_nodes=*/true,
+                      /*modified=*/false, &expected_node);
+
+  // Remove all fanins from node where node is missing.
+  TestRemoveAllFanins("foo_missing", /*keep_controlling_nodes=*/false,
+                      /*modified=*/false, /*expected_node=*/nullptr);
+  TestRemoveAllFanins("foo_missing", /*keep_controlling_nodes=*/true,
+                      /*modified=*/false, /*expected_node=*/nullptr);
+}
+
+void TestUpdateFanin(absl::string_view node_name, const TensorId& from_fanin,
+                     const TensorId& to_fanin, bool modified,
+                     const NodeDef* expected_node) {
+  GraphDef graph_def = SimpleMutateFaninGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  auto node = graph.GetNode(node_name);
+  if (expected_node == nullptr) {
+    EXPECT_EQ(node, nullptr);
+  } else {
+    EXPECT_NE(node, nullptr);
+  }
+
+  EXPECT_EQ(modified, graph.UpdateFanin(node_name, from_fanin, to_fanin));
+  if (expected_node != nullptr) {
+    CompareNodeInputs(graph, expected_node, node);
+    if (modified) {
+      CheckFanout(graph, from_fanin, node_name);
+    }
+  }
+}
+
+TEST(MutableGraphViewTest, UpdateFanin) {
+  NodeDef expected_node;
+  // Update fanin from non control to non control.
+  expected_node = NDef("", "", {"a", "b:3", "b:3", "^c", "^d"});
+  TestUpdateFanin("foo_4", {"b", 2}, {"b", 3}, /*modified=*/true,
+                  &expected_node);
+  // Update fanin from non control to control.
+  expected_node = NDef("", "", {"a", "^c", "^d", "^b"});
+  TestUpdateFanin("foo_4", {"b", 2}, {"b", Graph::kControlSlot},
+                  /*modified=*/true, &expected_node);
+  // Update fanin from control to non control.
+  expected_node = NDef("", "", {"a", "b:2", "b:2", "d:1", "^c"});
+  TestUpdateFanin("foo_4", {"d", Graph::kControlSlot}, {"d", 1},
+                  /*modified=*/true, &expected_node);
+  // Update fanin from control to control.
+  expected_node = NDef("", "", {"a", "b:2", "b:2", "^d", "^b"});
+  TestUpdateFanin("foo_4", {"c", Graph::kControlSlot},
+                  {"b", Graph::kControlSlot}, /*modified=*/true,
+                  &expected_node);
+  // Update fanin from control to existing control.
+  expected_node = NDef("", "", {"a", "b:2", "b:2", "^d"});
+  TestUpdateFanin("foo_4", {"c", Graph::kControlSlot},
+                  {"d", Graph::kControlSlot}, /*modified=*/true,
+                  &expected_node);
+
+  // Update fanin of node where from and to fanins are the same.
+  expected_node = NDef("", "", {"a"});
+  TestUpdateFanin("foo_1", {"a", -1}, {"a", -1}, /*modified=*/false,
+                  &expected_node);
+  TestUpdateFanin("foo_1", {"a", 0}, {"a", 0}, /*modified=*/false,
+                  &expected_node);
+  TestUpdateFanin("foo_1", {"a", 1}, {"a", 1}, /*modified=*/false,
+                  &expected_node);
+  // Update fanin of node where node is missing.
+  TestUpdateFanin("foo_missing", {"a", 0}, {"a", 1}, /*modified=*/false,
+                  /*expected_node=*/nullptr);
+  // Update fanin of node where from fanin is missing.
+  TestUpdateFanin("foo_1", {"from_bar_missing", 0}, {"a", 1},
+                  /*modified=*/false, &expected_node);
+  // Update fanin of node where to fanin is missing.
+  TestUpdateFanin("foo_1", {"a", 0}, {"to_bar_missing", 1}, /*modified=*/false,
+                  &expected_node);
+  // Update fanin of node where from/to fanins and node are missing.
+  TestUpdateFanin("foo_missing", {"from_bar_missing", 0}, {"to_bar_missing", 1},
+                  /*modified=*/false, /*expected_node=*/nullptr);
+}
+
 TEST(MutableGraphViewTest, DeleteNodes) {
   // Actual node.op() is not important in this test.
   GraphDef graph_def = test::function::GDef(
-- 
GitLab


From c406a363690d9cf74e2ee27b2a2bdd91e40f5652 Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Thu, 13 Dec 2018 19:39:42 -0800
Subject: [PATCH 573/873] [tf.data] Create resource manager for
 `ReduceDatasetOp` and `ToSingleElementOp`.

PiperOrigin-RevId: 225477525
---
 tensorflow/core/kernels/data/iterator_ops.cc | 54 +++++++++++---------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc
index d5b4bfa5c5..9f5881563b 100644
--- a/tensorflow/core/kernels/data/iterator_ops.cc
+++ b/tensorflow/core/kernels/data/iterator_ops.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/util/ptr_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -60,8 +61,8 @@ class IteratorResource : public ResourceBase {
                    std::unique_ptr<ProcessFunctionLibraryRuntime> pflr,
                    FunctionLibraryRuntime* lib)
       : device_mgr_(std::move(device_mgr)),
-        iterator_state_(
-            new State(std::move(flib_def), std::move(pflr), lib, nullptr)),
+        iterator_state_(std::make_shared<State>(
+            std::move(flib_def), std::move(pflr), lib, nullptr /* iterator */)),
         output_dtypes_(output_dtypes),
         output_shapes_(output_shapes) {}
 
@@ -136,8 +137,8 @@ class IteratorResource : public ResourceBase {
     std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(nullptr);
     TF_RETURN_IF_ERROR(ctx->function_library()->Clone(&flib_def, &pflr, &lib));
     TF_RETURN_IF_ERROR(flib_def->AddLibrary(graph_def.library()));
-    std::unique_ptr<State> new_state(new State(
-        std::move(flib_def), std::move(pflr), lib, nullptr /* iterator */));
+    std::unique_ptr<State> new_state = MakeUnique<State>(
+        std::move(flib_def), std::move(pflr), lib, nullptr /* iterator */);
 
     TF_RETURN_IF_ERROR(
         graph_runner.Run(&graph, new_state->lib, {}, {output_node}, &outputs));
@@ -181,10 +182,10 @@ class IteratorResource : public ResourceBase {
     std::shared_ptr<State> new_state;
     {
       tf_shared_lock l(mu_);
-      new_state.reset(new State(iterator_state_->flib_def,
-                                iterator_state_->pflr, iterator_state_->lib,
-                                nullptr /* function_handle_cache */,
-                                nullptr /* iterator */));
+      new_state = std::make_shared<State>(
+          iterator_state_->flib_def, iterator_state_->pflr,
+          iterator_state_->lib, nullptr /* function_handle_cache */,
+          nullptr /* iterator */);
     }
 
     // Ensure that the iterator has access to all functions in the current
@@ -209,8 +210,8 @@ class IteratorResource : public ResourceBase {
       new_state->lib = lib;
     }
 
-    new_state->function_handle_cache.reset(
-        new FunctionHandleCache(new_state->lib));
+    new_state->function_handle_cache =
+        MakeUnique<FunctionHandleCache>(new_state->lib);
     // Create new iterator.
     std::unique_ptr<IteratorBase> iterator;
     IteratorContext::Params params(ctx);
@@ -246,7 +247,7 @@ class IteratorResource : public ResourceBase {
         : flib_def(flib_def),
           pflr(pflr),
           lib(lib),
-          function_handle_cache(absl::make_unique<FunctionHandleCache>(lib)),
+          function_handle_cache(MakeUnique<FunctionHandleCache>(lib)),
           iterator(std::move(iterator)) {}
 
     State(std::shared_ptr<FunctionLibraryDefinition> flib_def,
@@ -433,7 +434,7 @@ class IteratorStateVariant {
     SerializationContext::Params params;
     params.flib_def = ctx->function_library()->GetFunctionLibraryDefinition();
     SerializationContext serialization_ctx(params);
-    data_.reset(new VariantTensorData());
+    data_ = MakeUnique<VariantTensorData>();
     data_->set_type_name(TypeName());
     VariantTensorDataWriter writer(data_.get());
     TF_RETURN_IF_ERROR(iterator_resource->Save(&serialization_ctx, &writer));
@@ -446,10 +447,11 @@ class IteratorStateVariant {
     if (data.type_name() != TypeName()) {
       return false;
     }
-    std::unique_ptr<VariantTensorData> tensor_data(new VariantTensorData);
+    std::unique_ptr<VariantTensorData> tensor_data =
+        MakeUnique<VariantTensorData>();
     std::swap(*tensor_data, data);
-    std::unique_ptr<VariantTensorDataReader> reader(
-        new VariantTensorDataReader(tensor_data.get()));
+    std::unique_ptr<VariantTensorDataReader> reader =
+        MakeUnique<VariantTensorDataReader>(tensor_data.get());
     status_ = reader->status();
     if (!status_.ok()) {
       return false;
@@ -583,12 +585,12 @@ FunctionLibraryRuntime* IteratorHandleOp::CreatePrivateFLR(
   *device_mgr = absl::make_unique<DeviceMgr>(RenamedDevice::NewRenamedDevice(
       ctx->device()->name(), down_cast<Device*>(ctx->device()),
       false /* owns_underlying */, false /* isolate_session_state */));
-  flib_def->reset(new FunctionLibraryDefinition(
-      *ctx->function_library()->GetFunctionLibraryDefinition()));
-  pflr->reset(new ProcessFunctionLibraryRuntime(
+  *flib_def = MakeUnique<FunctionLibraryDefinition>(
+      *ctx->function_library()->GetFunctionLibraryDefinition());
+  *pflr = MakeUnique<ProcessFunctionLibraryRuntime>(
       device_mgr->get(), ctx->env(), graph_def_version_, flib_def->get(),
-      {} /* TODO(mrry): OptimizerOptions? */,
-      nullptr /* TODO(mrry): ClusterFLR */));
+      OptimizerOptions{} /* TODO(mrry): OptimizerOptions? */,
+      nullptr /* TODO(mrry): ClusterFLR */);
 
   return (*pflr)->GetFLR(ctx->device()->name());
 }
@@ -676,9 +678,11 @@ class ToSingleElementOp : public AsyncOpKernel {
           ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset), done);
       std::unique_ptr<IteratorBase> iterator;
       IteratorContext::Params params(ctx);
-      std::unique_ptr<FunctionHandleCache> function_handle_cache(
-          new FunctionHandleCache(params.lib));
+      std::unique_ptr<FunctionHandleCache> function_handle_cache =
+          MakeUnique<FunctionHandleCache>(params.lib);
       params.function_handle_cache = function_handle_cache.get();
+      std::unique_ptr<ResourceMgr> resource_mgr = MakeUnique<ResourceMgr>();
+      params.resource_mgr = resource_mgr.get();
       IteratorContext iter_ctx(std::move(params));
 
       OP_REQUIRES_OK_ASYNC(
@@ -764,9 +768,11 @@ class ReduceDatasetOp : public AsyncOpKernel {
           done);
 
       IteratorContext::Params params(ctx);
-      std::unique_ptr<FunctionHandleCache> function_handle_cache(
-          new FunctionHandleCache(params.lib));
+      std::unique_ptr<FunctionHandleCache> function_handle_cache =
+          MakeUnique<FunctionHandleCache>(params.lib);
       params.function_handle_cache = function_handle_cache.get();
+      std::unique_ptr<ResourceMgr> resource_mgr = MakeUnique<ResourceMgr>();
+      params.resource_mgr = resource_mgr.get();
       IteratorContext iter_ctx(std::move(params));
       std::unique_ptr<InstantiatedCapturedFunction> instantiated_captured_func;
       OP_REQUIRES_OK_ASYNC(
-- 
GitLab


From f0c4d5814ad6ecbba06bd813d94a2fefb2caba3d Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Thu, 13 Dec 2018 19:44:28 -0800
Subject: [PATCH 574/873] Make Layer base class track nested sublayers'
 weights, updates, and losses.

This enables users to create nested layers without having to subclass Model instead (commonly requested feature).

Note that automatic tracking of lists of layers via CheckpointableDatastructure is not yet supported.

Due to backwards compatibility concerns, we don't force users to call Layer's super __init__ before any other logic for the tracking to take place (unlike what we did in Model).

PiperOrigin-RevId: 225477916
---
 tensorflow/python/keras/engine/base_layer.py  | 75 +++++++++++++---
 .../python/keras/engine/base_layer_test.py    | 88 +++++++++++++++++++
 tensorflow/python/keras/engine/network.py     | 55 ++----------
 tensorflow/python/keras/layers/recurrent.py   |  2 +
 tensorflow/python/keras/layers/wrappers.py    |  3 +
 tensorflow/python/keras/models.py             | 27 +++---
 6 files changed, 181 insertions(+), 69 deletions(-)

diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 858fa76472..c4c431d7d4 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import functools
 import inspect  # Necessary supplement to tf_inspect to deal with variadic args.
+import itertools
 
 import numpy as np
 from six.moves import zip  # pylint: disable=redefined-builtin
@@ -45,6 +46,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.checkpointable import layer_utils as checkpointable_layer_utils
 from tensorflow.python.util import function_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
@@ -135,8 +137,10 @@ class Layer(checkpointable.CheckpointableBase):
 
     self._init_set_name(name)
     self._activity_regularizer = kwargs.pop('activity_regularizer', None)
-    self._trainable_weights = []
-    self._non_trainable_weights = []
+    if not hasattr(self, '_trainable_weights'):
+      self._trainable_weights = []
+    if not hasattr(self, '_non_trainable_weights'):
+      self._non_trainable_weights = []
     self._updates = []
     # A list of zero-argument lambdas which return Tensors, used for variable
     # regularizers.
@@ -164,6 +168,8 @@ class Layer(checkpointable.CheckpointableBase):
                                    hasattr(self, 'compute_mask'))
     self._call_convention = (base_layer_utils
                              .CallConvention.EXPLICIT_INPUTS_ARGUMENT)
+    if not hasattr(self, '_layers'):
+      self._layers = []  # Dependencies tracked via attribute assignment.
 
     # These lists will be filled via successive calls
     # to self._add_inbound_node().
@@ -517,8 +523,7 @@ class Layer(checkpointable.CheckpointableBase):
                         self._compute_previous_mask):
       previous_mask = base_layer_utils.collect_previous_mask(inputs)
       if not hasattr(self, '_call_fn_args'):
-        self._call_fn_args = self._no_dependency(
-            function_utils.fn_args(self.call))
+        self._call_fn_args = function_utils.fn_args(self.call)
       if ('mask' in self._call_fn_args and 'mask' not in kwargs and
           not generic_utils.is_all_none(previous_mask)):
         # The previous layer generated a mask, and mask was not explicitly pass
@@ -613,18 +618,24 @@ class Layer(checkpointable.CheckpointableBase):
   @activity_regularizer.setter
   def activity_regularizer(self, regularizer):
     """Optional regularizer function for the output of this layer."""
-    self._activity_regularizer = self._no_dependency(regularizer)
+    self._activity_regularizer = regularizer
 
   @property
   def trainable_weights(self):
-    return self._trainable_weights if self.trainable else []
+    if self.trainable:
+      nested = self._gather_children_attribute('trainable_weights')
+      return self._trainable_weights + nested
+    else:
+      return []
 
   @property
   def non_trainable_weights(self):
     if self.trainable:
-      return self._non_trainable_weights
+      nested = self._gather_children_attribute('non_trainable_weights')
+      return self._non_trainable_weights + nested
     else:
-      return self._trainable_weights + self._non_trainable_weights
+      nested = self._gather_children_attribute('weights')
+      return self._trainable_weights + self._non_trainable_weights + nested
 
   @property
   def weights(self):
@@ -639,7 +650,7 @@ class Layer(checkpointable.CheckpointableBase):
   def updates(self):
     if not self.trainable and not self.stateful:
       return []
-    return self._updates
+    return self._updates + self._gather_children_attribute('updates')
 
   @property
   def losses(self):
@@ -661,7 +672,7 @@ class Layer(checkpointable.CheckpointableBase):
       loss_tensor = regularizer()
       if loss_tensor is not None:
         collected_losses.append(loss_tensor)
-    return collected_losses
+    return collected_losses + self._gather_children_attribute('losses')
 
   @doc_controls.for_subclass_implementers
   def add_loss(self, losses, inputs=None):
@@ -1591,6 +1602,50 @@ class Layer(checkpointable.CheckpointableBase):
     if not hasattr(self.build, '_is_default'):
       self.build(input_shapes)
 
+  def __setattr__(self, name, value):
+    if (not getattr(self, '_setattr_tracking', True) or
+        getattr(self, '_is_graph_network', False)):
+      super(Layer, self).__setattr__(name, value)
+      return
+
+    # Append value to self._layers if relevant
+    if (isinstance(value, Layer) or
+        checkpointable_layer_utils.has_weights(value)):
+      # Initialize `_layers` here in case `__init__` has not yet been called.
+      if not hasattr(self, '_layers'):
+        self._layers = []
+      # We need to check object identity to avoid de-duplicating empty
+      # container types which compare equal.
+      if not any((layer is value for layer in self._layers)):
+        self._layers.append(value)
+        if hasattr(value, '_use_resource_variables'):
+          # Legacy layers (V1 tf.layers) must always use
+          # resource variables.
+          value._use_resource_variables = True
+
+    # Append value to list of trainable / non-trainable weights if relevant
+    if isinstance(value, tf_variables.Variable):
+      # Users may add extra weights/variables
+      # simply by assigning them to attributes (invalid for graph networks)
+      if not hasattr(self, '_trainable_weights'):
+        self._trainable_weights = []
+      if not hasattr(self, '_non_trainable_weights'):
+        self._non_trainable_weights = []
+      if value not in self._trainable_weights + self._non_trainable_weights:
+        if value.trainable:
+          self._trainable_weights.append(value)
+        else:
+          self._non_trainable_weights.append(value)
+    super(Layer, self).__setattr__(name, value)
+
+  def _gather_children_attribute(self, attribute):
+    assert attribute in {'weights', 'trainable_weights',
+                         'non_trainable_weights', 'updates', 'losses'}
+    if hasattr(self, '_layers'):
+      return list(itertools.chain.from_iterable(
+          getattr(layer, attribute) for layer in self._layers))
+    return []
+
 
 class Node(object):
   """A `Node` describes the connectivity between two layers.
diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index fa0cad70af..4431f89693 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py
@@ -30,6 +30,8 @@ from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -218,6 +220,92 @@ class BaseLayerTest(keras_parameterized.TestCase):
                         atol=1e-5)
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class NestedTrackingTest(test.TestCase):
+
+  def test_nested_layer_variable_tracking(self):
+    # Test that variables from nested sublayers are
+    # being tracked by subclassed layers.
+
+    class MyLayer(keras.layers.Layer):
+
+      def __init__(self):
+        super(MyLayer, self).__init__()
+        self.dense1 = keras.layers.Dense(1)
+        self.dense2 = keras.layers.BatchNormalization()
+
+      def build(self, input_shape):
+        self.v1 = self.add_weight('v1', shape=input_shape[1:].as_list())
+        self.v2 = variables.Variable(
+            name='v2',
+            initial_value=np.zeros(input_shape[1:].as_list(), dtype='float32'),
+            trainable=False)
+
+      def call(self, inputs):
+        x = self.dense1(inputs) + self.dense2(inputs)
+        return x + self.v1 + self.v2
+
+    layer = MyLayer()
+    inputs = keras.Input((1,))
+    _ = layer(inputs)
+
+    self.assertEqual(len(layer.weights), 8)
+    self.assertEqual(len(layer.trainable_weights), 5)
+    self.assertEqual(len(layer.non_trainable_weights), 3)
+
+    layer.dense1.trainable = False
+    self.assertEqual(len(layer.weights), 8)
+    self.assertEqual(len(layer.trainable_weights), 3)
+    self.assertEqual(len(layer.non_trainable_weights), 5)
+
+    layer.trainable = False
+    self.assertEqual(len(layer.weights), 8)
+    self.assertEqual(len(layer.trainable_weights), 0)
+    self.assertEqual(len(layer.non_trainable_weights), 8)
+
+  def test_nested_layer_updates_losses_tracking(self):
+    # Test that updates and losses from nested sublayers are
+    # being tracked by subclassed layers.
+
+    class UpdateAndLossLayer(keras.layers.Layer):
+
+      def build(self, _):
+        self.v1 = self.add_weight('v1', shape=())
+
+      def call(self, inputs):
+        self.add_loss(math_ops.reduce_sum(inputs))
+        self.add_update(state_ops.assign_add(self.v1, 1))
+        return inputs + 1
+
+    class MyLayer(keras.layers.Layer):
+
+      def build(self, _):
+        self.v1 = self.add_weight('v1', shape=())
+
+      def __init__(self):
+        super(MyLayer, self).__init__()
+        self.ul1 = UpdateAndLossLayer()
+        self.ul2 = UpdateAndLossLayer()
+
+      def call(self, inputs):
+        self.add_loss(math_ops.reduce_sum(inputs))
+        self.add_update(state_ops.assign_add(self.v1, 1))
+        x = self.ul1(inputs)
+        return self.ul2(x)
+
+    layer = MyLayer()
+
+    if context.executing_eagerly():
+      inputs = array_ops.ones((3, 1))
+      _ = layer(inputs)
+      self.assertEqual(len(layer.losses), 3)
+    else:
+      inputs = keras.Input((1,))
+      _ = layer(inputs)
+      self.assertEqual(len(layer.losses), 3)
+      self.assertEqual(len(layer.updates), 3)
+
+
 if __name__ == '__main__':
   ops.enable_eager_execution()
   test.main()
diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 7e6cc7bfee..642ac562cb 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -43,7 +43,6 @@ from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite
-from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import checkpoint_management
 from tensorflow.python.training.checkpointable import base as checkpointable
@@ -327,71 +326,31 @@ class Network(base_layer.Layer):
       self._track_checkpointable(
           layer, name='layer-%d' % layer_index, overwrite=True)
 
-  def _no_dependency(self, value):
-    """Override to allow `Layer` to disable dependency tracking.
-
-    `CheckpointableBase` defines this method, whose semantics are "if a subclass
-    does dependency tracking, this method exempts `value`." Layer uses
-    `_no_dependency` to exempt some of its attribute assignments (conditional on
-    attribute assignment causing tracking in the subclass).
-
-    Args:
-      value: An object which will be assigned to an object attribute, whose
-        value should not be tracked.
-
-    Returns:
-      A wrapped object which, when assigned to an attribute, will not be
-      tracked (`value` will be stored in the attribute).
-    """
-    return data_structures.NoDependency(value)
-
   def __setattr__(self, name, value):
     if not getattr(self, '_setattr_tracking', True):
       super(Network, self).__setattr__(name, value)
       return
-    no_dependency = isinstance(value, data_structures.NoDependency)
-    value = data_structures.sticky_attribute_assignment(
-        checkpointable=self, value=value, name=name)
     if (isinstance(value, (base_layer.Layer,
-                           Network,
                            data_structures.CheckpointableDataStructure))
         or checkpointable_layer_utils.has_weights(value)):
       try:
-        is_graph_network = self._is_graph_network
+        self._is_graph_network
       except AttributeError:
         raise RuntimeError('It looks like you are subclassing `Model` and you '
                            'forgot to call `super(YourClass, self).__init__()`.'
                            ' Always start with this line.')
-      if not is_graph_network:
-        # We need to check object identity to avoid de-duplicating empty
-        # container types which compare equal.
-        if not any((layer is value for layer in self._layers)):
-          self._layers.append(value)
-          if hasattr(value, '_use_resource_variables'):
-            # In subclassed models, legacy layers (tf.layers) must always use
-            # resource variables.
-            value._use_resource_variables = True
-    if (not no_dependency
-        and isinstance(value, checkpointable.CheckpointableBase)):
-      if (  # For subclassed models only, users may add extra weights/variables
-            # simply by assigning them to attributes.
-          not self._is_graph_network
-          and isinstance(value, variables.Variable)):
-        if value.trainable:
-          # Could already be added via `add_weight`.
-          if value not in self._trainable_weights:
-            self._trainable_weights.append(value)
-        else:
-          if value not in self._non_trainable_weights:
-            self._non_trainable_weights.append(value)
+    # Keep track of checkpointable objects,
+    # for the needs of `self.save/save_weights`.
+    value = data_structures.sticky_attribute_assignment(
+        checkpointable=self, value=value, name=name)
+    super(Network, self).__setattr__(name, value)
 
-    # Keeping track of metric instance created in subclassed model/layer.
+    # Keep track of metric instance created in subclassed model/layer.
     # We do this so that we can maintain the correct order of metrics by adding
     # the instance to the `metrics` list as soon as it is created.
     from tensorflow.python.keras import metrics as metrics_module  # pylint: disable=g-import-not-at-top
     if isinstance(value, metrics_module.Metric):
       self._metrics.append(value)
-    super(Network, self).__setattr__(name, value)
 
   @property
   def stateful(self):
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 568e879c9c..604544efbe 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -70,6 +70,7 @@ class StackedRNNCells(Layer):
   ```
   """
 
+  @checkpointable.no_automatic_dependency_tracking
   def __init__(self, cells, **kwargs):
     for cell in cells:
       if not hasattr(cell, 'call'):
@@ -434,6 +435,7 @@ class RNN(Layer):
   ```
   """
 
+  @checkpointable.no_automatic_dependency_tracking
   def __init__(self,
                cell,
                return_sequences=False,
diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py
index 67b154141e..c78807611b 100644
--- a/tensorflow/python/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/layers/wrappers.py
@@ -29,6 +29,7 @@ from tensorflow.python.keras.layers.recurrent import _standardize_args
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
+from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
@@ -45,6 +46,7 @@ class Wrapper(Layer):
       layer: The layer to be wrapped.
   """
 
+  @checkpointable.no_automatic_dependency_tracking
   def __init__(self, layer, **kwargs):
     assert isinstance(layer, Layer)
     self.layer = layer
@@ -380,6 +382,7 @@ class Bidirectional(Wrapper):
   ```
   """
 
+  @checkpointable.no_automatic_dependency_tracking
   def __init__(self, layer, merge_mode='concat', weights=None, **kwargs):
     if not isinstance(layer, Layer):
       raise ValueError(
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index 2637191bb7..68d58bf66b 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -31,8 +31,6 @@ from tensorflow.python.keras.engine.input_layer import InputLayer
 from tensorflow.python.keras.engine.network import Network
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.training.checkpointable import data_structures
 from tensorflow.python.util.tf_export import tf_export
 
 # API entries importable from `keras.models`:
@@ -308,6 +306,10 @@ def _in_place_subclassed_model_reset(model):
     if isinstance(value, Layer):
       attributes_cache[name] = value
       assert value in model._layers
+      if hasattr(value, '_layers') and value._layers:
+        raise ValueError('We do not support the use of nested layers '
+                         'in `model_to_estimator` at this time. Found nested '
+                         'layer: %s' % value)
     elif isinstance(
         value,
         (list, tuple)) and name not in ('layers', '_layers', 'metrics',
@@ -322,7 +324,9 @@ def _in_place_subclassed_model_reset(model):
   # Replace layers on the model with fresh layers
   layers_to_names = {value: key for key, value in attributes_cache.items()}
   original_layers = model._layers[:]
-  model._layers = data_structures.NoDependency([])
+  setattr_tracking = model._setattr_tracking
+  model._setattr_tracking = False
+  model._layers = []
   for layer in original_layers:  # We preserve layer order.
     config = layer.get_config()
     # This will not work for nested subclassed models used as layers.
@@ -335,6 +339,7 @@ def _in_place_subclassed_model_reset(model):
     fresh_layer = layer.__class__.from_config(config)
     name = layers_to_names[layer]
     setattr(model, name, fresh_layer)
+    model._layers.append(fresh_layer)
 
   # Cache original model build attributes (in addition to layers)
   if (not hasattr(model, '_original_attributes_cache') or
@@ -367,12 +372,12 @@ def _in_place_subclassed_model_reset(model):
       ]
       for name in attributes_to_cache:
         attributes_cache[name] = getattr(model, name)
-  model._original_attributes_cache = data_structures.NoDependency(
-      attributes_cache)
+  model._original_attributes_cache = attributes_cache
   # Reset built state
   model.built = False
   model.inputs = None
   model.outputs = None
+  model._setattr_tracking = setattr_tracking
 
 
 def in_place_subclassed_model_state_restoration(model):
@@ -393,15 +398,15 @@ def in_place_subclassed_model_state_restoration(model):
     # back the previous attributes and track Layers by their original names
     # without adding dependencies on "utility" attributes which Models exempt
     # when they're constructed.
-    model._layers = data_structures.NoDependency([])
+    setattr_tracking = model._setattr_tracking
+    model._setattr_tracking = False
+    model._layers = []
     for name, value in model._original_attributes_cache.items():
-      if not isinstance(value, checkpointable.CheckpointableBase):
-        # If this value is not already checkpointable, it's probably that way
-        # for a reason; we don't want to start tracking data structures that the
-        # original Model didn't.
-        value = data_structures.NoDependency(value)
       setattr(model, name, value)
+      if isinstance(value, Layer):
+        model._layers.append(value)
     model._original_attributes_cache = None
+    model._setattr_tracking = setattr_tracking
   else:
     # Restore to the state of a never-called model.
     model.built = False
-- 
GitLab


From e927acde254d3153a58090dba028174462a8e1af Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Thu, 13 Dec 2018 19:54:34 -0800
Subject: [PATCH 575/873] Address review comments

---
 tensorflow/compiler/jit/xla_device.h             |  8 ++++----
 tensorflow/compiler/jit/xla_gpu_device.cc        |  4 ++--
 tensorflow/compiler/xla/client/client_library.h  | 11 +++++------
 tensorflow/compiler/xla/service/backend.h        |  1 -
 tensorflow/compiler/xla/service/platform_util.cc |  8 +++++++-
 tensorflow/compiler/xla/service/platform_util.h  |  4 +++-
 tensorflow/compiler/xla/service/service.h        |  1 -
 7 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h
index 94706a7056..857792d813 100644
--- a/tensorflow/compiler/jit/xla_device.h
+++ b/tensorflow/compiler/jit/xla_device.h
@@ -126,8 +126,8 @@ class XlaDevice : public LocalDevice {
     // the logical on-device shape without padding is used.
     PaddedShapeFn padded_shape_fn;
 
-    // Set of devices to use. This controls which of the devices given type in
-    // the system will have resources allocated for. For GPUs this will be
+    // Set of devices to use. This controls which of the devices on the given
+    // platform resources will have resources allocated. For GPUs this will be
     // filled from visible_gpu_devices list from session configuration.
     absl::optional<std::set<int>> allowed_devices;
   };
@@ -264,8 +264,8 @@ class XlaDevice : public LocalDevice {
   int64 outstanding_asynchronous_operations_ GUARDED_BY(mu_) = 0;
   condition_variable outstanding_asynchronous_operations_cv_;
 
-  // Set of devices to use. This controls which of the devices of current type
-  // in the system will have resources allocated for. For GPUs this will be
+  // Set of devices to use. This controls which of the devices on the given
+  // platform resources will have resources allocated. For GPUs this will be
   // filled from visible_gpu_devices list from session configuration.
   absl::optional<std::set<int>> allowed_devices_;
 };
diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index e84a784607..3841d03606 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -53,8 +53,8 @@ xla::StatusOr<std::set<int>> XlaGpuDeviceFactory::ParseVisibleDeviceList(
     if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) {
       return errors::InvalidArgument(
           "Could not parse entry in 'visible_device_list': '",
-          platform_gpu_id_str,
-          "'. visible_device_list = ", visible_device_list);
+          platform_gpu_id_str, "'. visible_device_list = ",
+          visible_device_list);
     }
     gpu_ids.insert(platform_gpu_id);
   }
diff --git a/tensorflow/compiler/xla/client/client_library.h b/tensorflow/compiler/xla/client/client_library.h
index 4d615f03f2..0cf548aeae 100644
--- a/tensorflow/compiler/xla/client/client_library.h
+++ b/tensorflow/compiler/xla/client/client_library.h
@@ -45,10 +45,10 @@ namespace xla {
 // Options to configure the local client when it is created.
 class LocalClientOptions {
  public:
-  LocalClientOptions(se::Platform* platform = nullptr,
-                     int number_of_replicas = 1,
-                     int intra_op_parallelism_threads = -1,
-                     const absl::optional<std::set<int>>& allowed_devices = {});
+  LocalClientOptions(
+      se::Platform* platform = nullptr, int number_of_replicas = 1,
+      int intra_op_parallelism_threads = -1,
+      const absl::optional<std::set<int>>& allowed_devices = absl::nullopt);
 
   // Set the platform backing the service, or nullptr for the default platform.
   LocalClientOptions& set_platform(se::Platform* platform);
@@ -66,7 +66,6 @@ class LocalClientOptions {
   // Sets the allowed_devices set for creation of stream executors.
   LocalClientOptions& set_allowed_devices(
       const absl::optional<std::set<int>>& allowed_devices);
-
   const absl::optional<std::set<int>>& allowed_devices() const;
 
  private:
@@ -87,7 +86,7 @@ class ClientLibrary {
   //   created for, for the given platform.
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
       se::Platform* platform = nullptr,
-      const absl::optional<std::set<int>>& allowed_devices = {});
+      const absl::optional<std::set<int>>& allowed_devices = absl::nullopt);
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
       const LocalClientOptions& options);
 
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index 3527761503..97ac02fad3 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h
@@ -57,7 +57,6 @@ class BackendOptions {
   // Sets the allowed_devices set for creation of stream executors.
   BackendOptions& set_allowed_devices(
       const absl::optional<std::set<int>>& allowed_devices);
-
   const absl::optional<std::set<int>>& allowed_devices() const;
 
  private:
diff --git a/tensorflow/compiler/xla/service/platform_util.cc b/tensorflow/compiler/xla/service/platform_util.cc
index f01724126c..896b73cda4 100644
--- a/tensorflow/compiler/xla/service/platform_util.cc
+++ b/tensorflow/compiler/xla/service/platform_util.cc
@@ -228,7 +228,13 @@ PlatformUtil::GetStreamExecutors(
     tensorflow::thread::ThreadPool thread_pool(
         tensorflow::Env::Default(), "device_initialization", device_count);
     for (int i = 0; i < device_count; ++i) {
-      if (allowed_devices && (*allowed_devices).count(i) == 0) {
+      // Once a stream executor is instantiated it will cause allocations on
+      // the device, for example for GPUs cuda context, cudnn handles etc. will
+      // be constructed. By constructing stream executors only on the
+      // allowed_devices, we don't make any allocations on other devices.
+      // This helps in multi-process executions on the same host like horovod or
+      // shared hosts.
+      if (allowed_devices && allowed_devices->count(i) == 0) {
         VLOG(1) << "Not initializing StreamExecutor for device " << i
                 << " since it is not in the visible device list";
         continue;
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index 46123eb5d7..13b0323959 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -61,11 +61,13 @@ class PlatformUtil {
   // Returns a vector of StreamExecutors for the given platform. The vector is
   // indexed by device ordinal (device numbering used by StreamExecutor). If an
   // element is nullptr, then the device is present by not supported by XLA.
+  // Optional parameter, allowed_devices controls which of the devices on the
+  // platform will have StreamExecutors constructed for. 
   //
   // If the platform has no visible devices, a not-found error is returned.
   static StatusOr<std::vector<se::StreamExecutor*>> GetStreamExecutors(
       se::Platform* platform,
-      const absl::optional<std::set<int>>& allowed_devices = {});
+      const absl::optional<std::set<int>>& allowed_devices = absl::nullopt);
 
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(PlatformUtil);
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 0682a880de..91edc21d87 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -65,7 +65,6 @@ class ServiceOptions {
   // Sets the allowed_devices set for creation of stream executors.
   ServiceOptions& set_allowed_devices(
       const absl::optional<std::set<int>>& allowed_devices);
-
   const absl::optional<std::set<int>>& allowed_devices() const;
 
  private:
-- 
GitLab


From 3b9f7b91b5c8b9147fec9963ca7e24bed8ac7dda Mon Sep 17 00:00:00 2001
From: Karl Lessard <karl@kubx.ca>
Date: Thu, 13 Dec 2018 23:10:36 -0500
Subject: [PATCH 576/873] Fix conflicting method signatures

---
 .../java/src/main/java/org/tensorflow/Session.java   | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/tensorflow/java/src/main/java/org/tensorflow/Session.java b/tensorflow/java/src/main/java/org/tensorflow/Session.java
index c49e98b20e..8cc23e2991 100644
--- a/tensorflow/java/src/main/java/org/tensorflow/Session.java
+++ b/tensorflow/java/src/main/java/org/tensorflow/Session.java
@@ -147,21 +147,11 @@ public final class Session implements AutoCloseable {
       return this;
     }
 
-    /**
-     * Use {@code t} instead of the Tensor referred to by executing the operation referred to by
-     * {@code output}.
-     */
-    public Runner feed(Output<?> o, Tensor<?> t) {
-      inputs.add(o);
-      inputTensors.add(t);
-      return this;
-    }
-
     /**
      * Use {@code t} instead of the Tensor referred to by executing the operation referred to by
      * {@code operand}.
      */
-    public <T> Runner feed(Operand<T> operand, Tensor<T> t) {
+    public Runner feed(Operand<?> operand, Tensor<?> t) {
       inputs.add(operand.asOutput());
       inputTensors.add(t);
       return this;
-- 
GitLab


From 53805cfcf88a41980afb64310c690538bbd9c480 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Thu, 13 Dec 2018 21:01:14 -0800
Subject: [PATCH 577/873] [XLA] Strength reduce Dots with batch dimensions if
 there is a single contracting and non contracting dimension in the lhs and
 rhs, the batch dimension numbers are equal and one contracting or
 non-contracting dimension is 1. This is a generalization of rank 2 dot
 strength reduction.

PiperOrigin-RevId: 225483537
---
 tensorflow/compiler/xla/service/BUILD         |   1 +
 .../xla/service/algebraic_simplifier.cc       | 203 +++++++++++++++---
 .../xla/service/algebraic_simplifier_test.cc  |  51 +++++
 3 files changed, 221 insertions(+), 34 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 4aaa8a5b65..0c92ea7364 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1578,6 +1578,7 @@ cc_library(
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/container:inlined_vector",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/types:optional",
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index ee268361b0..1287dcf546 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include <vector>
 
 #include "absl/algorithm/container.h"
+#include "absl/container/inlined_vector.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/str_cat.h"
 #include "absl/types/optional.h"
@@ -240,6 +241,13 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault {
   // more fusion than leaving the nodes as Dot operations.
   StatusOr<bool> HandleDotStrengthReduction(HloInstruction* dot);
 
+  // Removes dimension dim from hlo.
+  HloInstruction* StripDim(HloInstruction* hlo, int64 dim) {
+    CHECK_EQ(hlo->shape().dimensions(dim), 1);
+    return computation_->AddInstruction(HloInstruction::CreateReshape(
+        ShapeUtil::DeleteDimension(dim, hlo->shape()), hlo));
+  }
+
   // Reshapes an instruction to rank 1 if it is not already rank 1.
   HloInstruction* Flatten(HloInstruction* hlo) {
     if (ShapeUtil::Rank(hlo->shape()) == 1) {
@@ -909,21 +917,51 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
     HloInstruction* dot) {
   HloInstruction *lhs, *rhs;
   CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs))));
-  int64 lhs_collapsing_dim =
-      dot->dot_dimension_numbers().lhs_contracting_dimensions(0);
+
+  const auto kept_dim = [](int64 rank, int64 contracting_dimension,
+                           absl::Span<const int64> batch_dimensions) -> int64 {
+    for (int64 i = 0; i < rank; ++i) {
+      if (i != contracting_dimension &&
+          !absl::c_linear_search(batch_dimensions, i)) {
+        return i;
+      }
+    }
+    return -1;
+  };
+
+  const int64 dot_rank = ShapeUtil::Rank(dot->shape());
+  const int64 rhs_rank = ShapeUtil::Rank(rhs->shape());
+  const int64 lhs_rank = ShapeUtil::Rank(lhs->shape());
+  const auto& dnums = dot->dot_dimension_numbers();
+  if (dnums.rhs_contracting_dimensions_size() > 1) {
+    return false;
+  }
+  if (dot_rank > 2 && (lhs_rank != rhs_rank || lhs_rank != dot_rank)) {
+    return false;
+  }
+  int64 lhs_collapsing_dim = dnums.lhs_contracting_dimensions(0);
+  int64 lhs_kept_dim = kept_dim(lhs_rank, lhs_collapsing_dim,
+                                AsInt64Slice(dnums.lhs_batch_dimensions()));
+  // If there is no non-contracting dimension in rank 2, do not strength reduce.
+  if (lhs_kept_dim == -1 && lhs_rank > 1) {
+    return false;
+  }
   if (lhs->IsRank2Transpose()) {
     lhs = lhs->mutable_operand(0);
-    lhs_collapsing_dim = 1 - lhs_collapsing_dim;
+    std::swap(lhs_collapsing_dim, lhs_kept_dim);
   }
-  const int64 lhs_kept_dim = 1 - lhs_collapsing_dim;
 
-  int64 rhs_collapsing_dim =
-      dot->dot_dimension_numbers().rhs_contracting_dimensions(0);
+  int64 rhs_collapsing_dim = dnums.rhs_contracting_dimensions(0);
+  int64 rhs_kept_dim = kept_dim(rhs_rank, rhs_collapsing_dim,
+                                AsInt64Slice(dnums.rhs_batch_dimensions()));
+  // If there is no non-contracting dimension in rank 2, do not strength reduce.
+  if (rhs_kept_dim == -1 && rhs_rank > 1) {
+    return false;
+  }
   if (rhs->IsRank2Transpose()) {
     rhs = rhs->mutable_operand(0);
-    rhs_collapsing_dim = 1 - rhs_collapsing_dim;
+    std::swap(rhs_collapsing_dim, rhs_kept_dim);
   }
-  const int64 rhs_kept_dim = 1 - rhs_collapsing_dim;
 
   auto as_type = [&](HloInstruction* hlo, const PrimitiveType element_type) {
     if (hlo->shape().element_type() == element_type) {
@@ -946,10 +984,15 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
     return AddReduce(as_type(hlo, F32), dim);
   };
 
+  auto broadcast = [&](HloInstruction* hlo, const Shape& shape,
+                       absl::Span<const int64> dims) {
+    return computation_->AddInstruction(
+        HloInstruction::CreateBroadcast(shape, hlo, dims));
+  };
+
   auto broadcast_to_dim = [&](HloInstruction* hlo, const Shape& shape,
                               int64 dim) {
-    return computation_->AddInstruction(
-        HloInstruction::CreateBroadcast(shape, hlo, {dim}));
+    return broadcast(hlo, shape, {dim});
   };
 
   auto multiply = [&](HloInstruction* local_lhs, HloInstruction* local_rhs) {
@@ -960,11 +1003,9 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   // Strength reduce dot(a[K] , b[K]) =
   //  reshape(result.shape,
   //          reduce_sum(multiply(a, b), {0}))
-  if (ShapeUtil::Rank(rhs->shape()) == 1 &&
-      ShapeUtil::Rank(lhs->shape()) == 1) {
-    TF_RETURN_IF_ERROR(
-        ReplaceInstruction(dot, reshape_if_necessary(add_reduce_in_f32(
-                                    multiply(Flatten(lhs), Flatten(rhs)), 0))));
+  if (rhs_rank == 1 && lhs_rank == 1) {
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(add_reduce_in_f32(multiply(lhs, rhs), 0))));
     return true;
   }
 
@@ -978,8 +1019,7 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   // Simplify outer product into multiply with implicit broadcasting.
   //
   // A dot(a[M, 1], b[1, N]) = multiply(a [M,1], b [1, N])
-  if (ShapeUtil::Rank(rhs->shape()) == 2 &&
-      rhs->shape().dimensions(rhs_collapsing_dim) == 1) {
+  if (rhs_rank == 2 && rhs->shape().dimensions(rhs_collapsing_dim) == 1) {
     TF_RETURN_IF_ERROR(ReplaceInstruction(
         dot, multiply(broadcast_to_dim(Flatten(lhs), dot->shape(), 0),
                       broadcast_to_dim(Flatten(rhs), dot->shape(), 1))));
@@ -993,9 +1033,8 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   //        {0})
   //      )
   //    )
-  if (ShapeUtil::Rank(lhs->shape()) == 1 ||
-      (ShapeUtil::Rank(lhs->shape()) == 2 &&
-       lhs->shape().dimensions(lhs_kept_dim) == 1)) {
+  if (lhs_rank == 1 ||
+      (lhs_rank == 2 && lhs->shape().dimensions(lhs_kept_dim) == 1)) {
     if (ShapeUtil::Rank(rhs->shape()) == 1) {
       TF_RETURN_IF_ERROR(
           ReplaceInstruction(dot, reshape_if_necessary(add_reduce_in_f32(
@@ -1015,9 +1054,8 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
   //  reshape(result.shape,
   //    reduce_sum(multiply(a, broadcast(reshape([K],b), {1})), {0})
   //  )
-  if (ShapeUtil::Rank(rhs->shape()) == 1 ||
-      (ShapeUtil::Rank(rhs->shape()) == 2 &&
-       rhs->shape().dimensions(rhs_kept_dim) == 1)) {
+  if (rhs_rank == 1 ||
+      (rhs_rank == 2 && rhs->shape().dimensions(rhs_kept_dim) == 1)) {
     TF_RETURN_IF_ERROR(ReplaceInstruction(
         dot, reshape_if_necessary(add_reduce_in_f32(
                  multiply(lhs, broadcast_to_dim(Flatten(rhs), lhs->shape(),
@@ -1025,6 +1063,97 @@ StatusOr<bool> AlgebraicSimplifierVisitor::HandleDotStrengthReduction(
                  lhs_collapsing_dim))));
     return true;
   }
+
+  // Only consider kDot with batch dimension.
+  if (dot_rank <= 2) {
+    return false;
+  }
+
+  CHECK_EQ(rhs_rank, lhs_rank);
+  CHECK_EQ(dot_rank, lhs_rank);
+  // If there is more than one non-contracting dimension or the batch dimensions
+  // are not equal, bail out since transposes may be required to do a strength
+  // reduction.
+  if (dnums.rhs_batch_dimensions_size() + 2 != dot_rank ||
+      !absl::c_equal(dnums.lhs_batch_dimensions(),
+                     dnums.rhs_batch_dimensions())) {
+    return false;
+  }
+
+  auto broadcast_dims = [](int64 rank, int64 non_broadcast_dim) {
+    absl::InlinedVector<int64, 8> dims;
+    for (int64 i = 0; i < rank; ++i) {
+      if (i != non_broadcast_dim) {
+        dims.push_back(i);
+      }
+    }
+    return dims;
+  };
+
+  // If the contracting dimension is 1, remove the degnerate dimnesions from the
+  // lhs and rhs, broadcast each to the result shape and multiply.
+  if (lhs->shape().dimensions(lhs_collapsing_dim) == 1 &&
+      (rhs_kept_dim == rhs_rank - 1 ||
+       (rhs_collapsing_dim == rhs_rank - 1 && rhs_kept_dim == rhs_rank - 2))) {
+    CHECK_EQ(rhs->shape().dimensions(rhs_collapsing_dim), 1);
+    const int64 lhs_kept_dim_in_output =
+        lhs_kept_dim > lhs_collapsing_dim ? (lhs_kept_dim - 1) : lhs_kept_dim;
+    absl::InlinedVector<int64, 8> lhs_broadcast_dims;
+    for (const int64 dim : dnums.lhs_batch_dimensions()) {
+      lhs_broadcast_dims.push_back(dim > lhs_collapsing_dim ? (dim - 1) : dim);
+    }
+    absl::InlinedVector<int64, 8> rhs_broadcast_dims = lhs_broadcast_dims;
+    lhs_broadcast_dims.push_back(lhs_kept_dim_in_output);
+    absl::c_sort(lhs_broadcast_dims);
+    rhs_broadcast_dims.push_back(dot_rank - 1);
+    absl::c_sort(rhs_broadcast_dims);
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(
+                 multiply(broadcast(StripDim(lhs, lhs_collapsing_dim),
+                                    dot->shape(), lhs_broadcast_dims),
+                          broadcast(StripDim(rhs, rhs_collapsing_dim),
+                                    dot->shape(), rhs_broadcast_dims)))));
+    return true;
+  }
+
+  // If the lhs and rhs non-contracting dimensions are both one, strip each one,
+  // multiply and then reduce the collapsing dimension
+  if (lhs->shape().dimensions(lhs_kept_dim) == 1 &&
+      rhs->shape().dimensions(rhs_kept_dim) == 1 &&
+      lhs_kept_dim == rhs_kept_dim) {
+    auto new_lhs = StripDim(lhs, lhs_kept_dim);
+    auto new_rhs = StripDim(rhs, rhs_kept_dim);
+    const int64 reduce_dim = rhs_kept_dim < rhs_collapsing_dim
+                                 ? (rhs_collapsing_dim - 1)
+                                 : rhs_collapsing_dim;
+    TF_RETURN_IF_ERROR(
+        ReplaceInstruction(dot, reshape_if_necessary(add_reduce_in_f32(
+                                    multiply(new_lhs, new_rhs), reduce_dim))));
+    return true;
+  }
+
+  // If the lhs  non-contracting dimensions is one, strip the one, brodcast to
+  // the rhs shape, multiply and then reduce the collapsing dimension
+  if (lhs->shape().dimensions(lhs_kept_dim) == 1) {
+    auto new_lhs = broadcast(StripDim(lhs, lhs_kept_dim), rhs->shape(),
+                             broadcast_dims(rhs_rank, rhs_kept_dim));
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(add_reduce_in_f32(multiply(new_lhs, rhs),
+                                                    rhs_collapsing_dim))));
+    return true;
+  }
+
+  // If the rhs  non-contracting dimensions is one, strip the one, brodcast to
+  // the lhs shape, multiply and then reduce the collapsing dimension
+  if (rhs->shape().dimensions(rhs_kept_dim) == 1) {
+    auto new_rhs = broadcast(StripDim(rhs, rhs_kept_dim), lhs->shape(),
+                             broadcast_dims(lhs_rank, lhs_kept_dim));
+    TF_RETURN_IF_ERROR(ReplaceInstruction(
+        dot, reshape_if_necessary(add_reduce_in_f32(multiply(lhs, new_rhs),
+                                                    lhs_collapsing_dim))));
+    return true;
+  }
+
   return false;
 }
 
@@ -1303,25 +1432,31 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) {
   HloInstruction *lhs, *rhs;
   CHECK(Match(dot, m::Dot(m::Op(&lhs), m::Op(&rhs))));
 
-  // Only optimize F32 or BF16 dot operations where the dot, rhs and lhs are
-  // rank 2 or below.
-  if ((dot->shape().element_type() != F32 &&
-       dot->shape().element_type() != BF16) ||
-      ShapeUtil::Rank(lhs->shape()) > 2 || ShapeUtil::Rank(rhs->shape()) > 2 ||
-      ShapeUtil::Rank(dot->shape()) > 2) {
-    return Status::OK();
-  }
-
   // Replace a zero element dot with a broadcast of the constant 0.
   if (ShapeUtil::IsZeroElementArray(dot->shape()) ||
       ShapeUtil::IsZeroElementArray(lhs->shape()) ||
       ShapeUtil::IsZeroElementArray(rhs->shape())) {
-    auto zero = computation_->AddInstruction(
-        HloInstruction::CreateConstant(LiteralUtil::CreateR0(0.0f)));
+    auto zero = computation_->AddInstruction(HloInstruction::CreateConstant(
+        LiteralUtil::Zero(dot->shape().element_type())));
     return ReplaceWithNewInstruction(
         dot, HloInstruction::CreateBroadcast(dot->shape(), zero, {}));
   }
 
+  // Only optimize F32 or BF16 dot operations where the dot, rhs and lhs are
+  // rank 2 or below.
+  if (dot->shape().element_type() != F32 &&
+      dot->shape().element_type() != BF16) {
+    return Status::OK();
+  }
+  if (ShapeUtil::Rank(lhs->shape()) > 2 || ShapeUtil::Rank(rhs->shape()) > 2 ||
+      ShapeUtil::Rank(dot->shape()) > 2) {
+    if (options_.enable_dot_strength_reduction() &&
+        !options_.is_layout_sensitive()) {
+      TF_RETURN_IF_ERROR(HandleDotStrengthReduction(dot).status());
+    }
+    return Status::OK();
+  }
+
   TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_concat_optimized,
                       OptimizeDotOfConcat(dot));
   if (dot_of_concat_optimized) {
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index 775e7ef40d..cfb4c48277 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -4104,6 +4104,57 @@ INSTANTIATE_TEST_CASE_P(
     PadReduceWindowEffectiveBroadcastTest,
     ::testing::ValuesIn(PadReduceWindowEffectiveBroadcastCases()));
 
+class BatchDotStrengthReductionTest
+    : public AlgebraicSimplifierTest,
+      public ::testing::WithParamInterface<
+          ::testing::tuple<int, int, int, PrimitiveType>> {};
+TEST_P(BatchDotStrengthReductionTest, BatchDotStrengthReduction) {
+  auto module = CreateNewVerifiedModule();
+  int m, k, n;
+  PrimitiveType element_type;
+  std::tie(m, k, n, element_type) = GetParam();
+
+  Shape dot_shape = ShapeUtil::MakeShape(element_type, {1, 3, 5, m, n});
+  Shape lhs_shape = ShapeUtil::MakeShape(element_type, {1, 3, 5, m, k});
+  Shape rhs_shape = ShapeUtil::MakeShape(element_type, {1, 3, 5, k, n});
+  HloComputation::Builder builder(TestName());
+
+  auto lhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, lhs_shape, "lhs"));
+  auto rhs = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, rhs_shape, "rhs"));
+  DotDimensionNumbers dot_dnums;
+  dot_dnums.add_lhs_batch_dimensions(0);
+  dot_dnums.add_lhs_batch_dimensions(1);
+  dot_dnums.add_lhs_batch_dimensions(2);
+  dot_dnums.add_rhs_batch_dimensions(0);
+  dot_dnums.add_rhs_batch_dimensions(1);
+  dot_dnums.add_rhs_batch_dimensions(2);
+  dot_dnums.add_lhs_contracting_dimensions(4);
+  dot_dnums.add_rhs_contracting_dimensions(3);
+  builder.AddInstruction(HloInstruction::CreateDot(
+      dot_shape, lhs, rhs, dot_dnums, DefaultPrecisionConfig(2)));
+  auto computation = module->AddEntryComputation(builder.Build());
+  AlgebraicSimplifier simplifier(default_options_);
+  TF_ASSERT_OK_AND_ASSIGN(bool changed, simplifier.Run(module.get()));
+  const bool dot_should_be_transformed = m == 1 || k == 1 || n == 1;
+  const bool computation_should_be_modified = dot_should_be_transformed;
+  EXPECT_EQ(changed, computation_should_be_modified);
+  bool has_no_dot = true;
+  for (const auto& hlo : computation->instructions()) {
+    if (hlo->opcode() == HloOpcode::kDot) {
+      has_no_dot = false;
+      break;
+    }
+  }
+  EXPECT_EQ(has_no_dot, dot_should_be_transformed);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    BatchDotStrengthReductionTestInstantiation, BatchDotStrengthReductionTest,
+    ::testing::Combine(::testing::Values(1, 2), ::testing::Values(1, 2),
+                       ::testing::Values(1, 2), ::testing::Values(F32, BF16)));
+
 class DotStrengthReductionTest
     : public AlgebraicSimplifierTest,
       public ::testing::WithParamInterface<
-- 
GitLab


From 724cb2018c6f587c51c06b5008a00ba210f804ac Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 13 Dec 2018 21:07:55 -0800
Subject: [PATCH 578/873] Optimized scalar-broadcast add.

PiperOrigin-RevId: 225484293
---
 tensorflow/lite/kernels/add.cc                |   5 +-
 tensorflow/lite/kernels/add_test.cc           |  95 +++++++-
 .../internal/optimized/optimized_ops.h        | 217 +++++++++++++++---
 .../internal/reference/reference_ops.h        |  95 +++++++-
 4 files changed, 349 insertions(+), 63 deletions(-)

diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc
index 32a7c100ce..9867cc53b3 100644
--- a/tensorflow/lite/kernels/add.cc
+++ b/tensorflow/lite/kernels/add.cc
@@ -247,7 +247,10 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
         TF_LITE_ADD(reference_ops, Add);
       }
     } else {
-      if (need_broadcast) {
+      if (op_params.broadcast_category ==
+          BroadcastableOpCategory::kGenericBroadcast) {
+        TF_LITE_ADD(optimized_ops, BroadcastAdd4DSlow);
+      } else if (need_broadcast) {
         TF_LITE_ADD(optimized_ops, BroadcastAddFivefold);
       } else {
         TF_LITE_ADD(optimized_ops, Add);
diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc
index 1d33adf199..16045d4572 100644
--- a/tensorflow/lite/kernels/add_test.cc
+++ b/tensorflow/lite/kernels/add_test.cc
@@ -279,21 +279,92 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) {
   }
 }
 
-TEST(QuantizedAddOpModel, QuantizedWithBroadcast) {
-  float kQuantizedTolerance = GetTolerance(-3.0, 3.0);
+TEST(QuantizedAddOpModel, QuantizedWithScalarBroadcast) {
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
   std::vector<std::vector<int>> test_shapes = {
       {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}};
   for (int i = 0; i < test_shapes.size(); ++i) {
-    QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0},
-                          {TensorType_UINT8, {}, -3.0, 3.0},
-                          {TensorType_UINT8, {}, -3.0, 3.0},
-                          ActivationFunctionType_NONE);
-    m.QuantizeAndPopulate<uint8_t>(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0});
-    m.QuantizeAndPopulate<uint8_t>(m.input2(), {0.1});
-    m.Invoke();
-    EXPECT_THAT(m.GetDequantizedOutput(),
-                ElementsAreArray(ArrayFloatNear({-1.9, 0.3, 0.8, 0.9, 1.2, 2.1},
-                                                kQuantizedTolerance)))
+    QuantizedAddOpModel model_fixture(
+        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
+        {TensorType_UINT8, {}, -3.f, 3.f}, {TensorType_UINT8, {}, -3.f, 3.f},
+        ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<uint8_t>(
+        model_fixture.input1(), {-2.0f, 0.2f, 0.7f, 0.8f, 1.1f, 2.0f});
+    model_fixture.QuantizeAndPopulate<uint8_t>(model_fixture.input2(), {0.1f});
+    model_fixture.Invoke();
+    EXPECT_THAT(
+        model_fixture.GetDequantizedOutput(),
+        ElementsAreArray(ArrayFloatNear({-1.9f, 0.3f, 0.8f, 0.9f, 1.2f, 2.1f},
+                                        kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    QuantizedAddOpModel model_fixture(
+        {TensorType_UINT8, {}, -3.f, 3.f},
+        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
+        {TensorType_UINT8, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<uint8_t>(model_fixture.input1(), {0.1f});
+    model_fixture.QuantizeAndPopulate<uint8_t>(
+        model_fixture.input2(), {-2.0f, 0.2f, 0.7f, 0.8f, 1.1f, 2.0f});
+    model_fixture.Invoke();
+    EXPECT_THAT(
+        model_fixture.GetDequantizedOutput(),
+        ElementsAreArray(ArrayFloatNear({-1.9f, 0.3f, 0.8f, 0.9f, 1.2f, 2.1f},
+                                        kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+}
+
+TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcast) {
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  const std::vector<int> base_shape = {2, 3, 1, 2};
+  std::vector<std::vector<int>> test_shapes = {
+      {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+      {-0.1f, 2.6f,  -0.7f, 2.8f, 0.7f,  3.0f, 1.1f,  0.8f, 0.5f,
+       1.0f,  1.9f,  1.4f,  1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f,
+       1.4f,  3.0f,  0.8f,  3.0f, 2.2f,  3.0f, -1.4f, 0.3f, -2.0f,
+       0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+      {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.0f, -2.0f, 0.5f, 1.7f,
+       -1.3f},
+      {-0.1f, 2.5f,  0.0f, 2.6f, -0.7f, 1.9f, 1.1f,  0.7f, 1.2f,
+       0.8f,  0.5f,  0.1f, 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f,
+       1.7f,  3.0f,  2.2f, 3.0f, 2.1f,  3.0f, -1.1f, 0.5f, -0.6f,
+       1.0f,  -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+      {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.0f, -0.6f, 1.0f, 1.6f,
+       -1.3f}};
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    QuantizedAddOpModel model_fixture(
+        {TensorType_UINT8, base_shape, -3.f, 3.f},
+        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
+        {TensorType_UINT8, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<uint8_t>(
+        model_fixture.input1(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.QuantizeAndPopulate<uint8_t>(
+        model_fixture.input2(), {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.Invoke();
+    EXPECT_THAT(
+        model_fixture.GetDequantizedOutput(),
+        ElementsAreArray(ArrayFloatNear(test_outputs[i], kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (int i = 0; i < test_shapes.size(); ++i) {
+    QuantizedAddOpModel model_fixture(
+        {TensorType_UINT8, test_shapes[i], -3.f, 3.f},
+        {TensorType_UINT8, base_shape, -3.f, 3.f},
+        {TensorType_UINT8, {}, -3.f, 3.f}, ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<uint8_t>(
+        model_fixture.input1(), {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.QuantizeAndPopulate<uint8_t>(
+        model_fixture.input2(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.Invoke();
+    EXPECT_THAT(
+        model_fixture.GetDequantizedOutput(),
+        ElementsAreArray(ArrayFloatNear(test_outputs[i], kQuantizedTolerance)))
         << "With shape number " << i;
   }
 }
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index c79b69a22e..bf3902ec31 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -2347,36 +2347,37 @@ inline void Add(const ArithmeticParams& params,
 inline void AddElementwise(int size, const ArithmeticParams& params,
                            const uint8* input1_data, const uint8* input2_data,
                            uint8* output_data) {
+  gemmlowp::ScopedProfilingLabel label("AddElementwise/8bit");
   int i = 0;
   TFLITE_DCHECK_GT(params.input1_offset, -256);
   TFLITE_DCHECK_GT(params.input2_offset, -256);
   TFLITE_DCHECK_LT(params.input1_offset, 256);
   TFLITE_DCHECK_LT(params.input2_offset, 256);
 #ifdef USE_NEON
-  const auto output_activation_min_vector =
+  const uint8x8_t output_activation_min_vector =
       vdup_n_u8(params.quantized_activation_min);
-  const auto output_activation_max_vector =
+  const uint8x8_t output_activation_max_vector =
       vdup_n_u8(params.quantized_activation_max);
   for (; i <= size - 8; i += 8) {
-    const auto input1_val_original = vld1_u8(input1_data + i);
-    const auto input2_val_original = vld1_u8(input2_data + i);
-    const auto input1_val_s16 =
+    const uint8x8_t input1_val_original = vld1_u8(input1_data + i);
+    const uint8x8_t input2_val_original = vld1_u8(input2_data + i);
+    const int16x8_t input1_val_s16 =
         vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
-    const auto input2_val_s16 =
+    const int16x8_t input2_val_s16 =
         vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
-    const auto input1_val =
+    const int16x8_t input1_val =
         vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
-    const auto input2_val =
+    const int16x8_t input2_val =
         vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
-    const auto input1_val_high = vget_high_s16(input1_val);
-    const auto input1_val_low = vget_low_s16(input1_val);
-    const auto input2_val_high = vget_high_s16(input2_val);
-    const auto input2_val_low = vget_low_s16(input2_val);
-    auto x11 = vmovl_s16(input1_val_low);
-    auto x12 = vmovl_s16(input1_val_high);
-    auto x21 = vmovl_s16(input2_val_low);
-    auto x22 = vmovl_s16(input2_val_high);
-    const auto left_shift_dup = vdupq_n_s32(params.left_shift);
+    const int16x4_t input1_val_high = vget_high_s16(input1_val);
+    const int16x4_t input1_val_low = vget_low_s16(input1_val);
+    const int16x4_t input2_val_high = vget_high_s16(input2_val);
+    const int16x4_t input2_val_low = vget_low_s16(input2_val);
+    int32x4_t x11 = vmovl_s16(input1_val_low);
+    int32x4_t x12 = vmovl_s16(input1_val_high);
+    int32x4_t x21 = vmovl_s16(input2_val_low);
+    int32x4_t x22 = vmovl_s16(input2_val_high);
+    const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
     x11 = vshlq_s32(x11, left_shift_dup);
     x12 = vshlq_s32(x12, left_shift_dup);
     x21 = vshlq_s32(x21, left_shift_dup);
@@ -2385,24 +2386,24 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
     x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
     x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
     x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
-    const auto input1_shift_dup = vdupq_n_s32(params.input1_shift);
-    const auto input2_shift_dup = vdupq_n_s32(params.input2_shift);
+    const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+    const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
     x11 = vshlq_s32(x11, input1_shift_dup);
     x12 = vshlq_s32(x12, input1_shift_dup);
     x21 = vshlq_s32(x21, input2_shift_dup);
     x22 = vshlq_s32(x22, input2_shift_dup);
-    auto s1 = vaddq_s32(x11, x21);
-    auto s2 = vaddq_s32(x12, x22);
+    int32x4_t s1 = vaddq_s32(x11, x21);
+    int32x4_t s2 = vaddq_s32(x12, x22);
     s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
     s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
     using gemmlowp::RoundingDivideByPOT;
     s1 = RoundingDivideByPOT(s1, -params.output_shift);
     s2 = RoundingDivideByPOT(s2, -params.output_shift);
-    const auto s1_narrowed = vmovn_s32(s1);
-    const auto s2_narrowed = vmovn_s32(s2);
-    const auto s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed),
-                             vdupq_n_s16(params.output_offset));
-    const auto clamped =
+    const int16x4_t s1_narrowed = vmovn_s32(s1);
+    const int16x4_t s2_narrowed = vmovn_s32(s2);
+    const int16x8_t s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed),
+                                  vdupq_n_s16(params.output_offset));
+    const uint8x8_t clamped =
         vmax_u8(output_activation_min_vector,
                 vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
     vst1_u8(output_data + i, clamped);
@@ -2432,6 +2433,109 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
   }
 }
 
+// Scalar-broadcast add that can be used for inner loop of more general
+// broadcast add, so that, for example, scalar-broadcast with batch will still
+// be fast.
+inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
+                               uint8 input1_data, const uint8* input2_data,
+                               uint8* output_data) {
+  using gemmlowp::RoundingDivideByPOT;
+
+  gemmlowp::ScopedProfilingLabel label("AddScalarBroadcast/8bit");
+  TFLITE_DCHECK_GT(params.input1_offset, -256);
+  TFLITE_DCHECK_GT(params.input2_offset, -256);
+  TFLITE_DCHECK_LT(params.input1_offset, 256);
+  TFLITE_DCHECK_LT(params.input2_offset, 256);
+
+  int i = 0;
+
+#ifdef USE_NEON
+  const int32x4_t left_shift_dup = vdupq_n_s32(params.left_shift);
+  const uint8x8_t output_activation_min_vector =
+      vdup_n_u8(params.quantized_activation_min);
+  const uint8x8_t output_activation_max_vector =
+      vdup_n_u8(params.quantized_activation_max);
+
+  // Process broadcast scalar.
+  const uint8x8_t input1_val_original = vdup_n_u8(input1_data);
+  const int16x8_t input1_val_s16 =
+      vreinterpretq_s16_u16(vmovl_u8(input1_val_original));
+  const int16x8_t input1_val =
+      vaddq_s16(input1_val_s16, vdupq_n_s16(params.input1_offset));
+  const int16x4_t input1_val_high = vget_high_s16(input1_val);
+  const int16x4_t input1_val_low = vget_low_s16(input1_val);
+  int32x4_t x11 = vmovl_s16(input1_val_low);
+  int32x4_t x12 = vmovl_s16(input1_val_high);
+  x11 = vshlq_s32(x11, left_shift_dup);
+  x12 = vshlq_s32(x12, left_shift_dup);
+  x11 = vqrdmulhq_n_s32(x11, params.input1_multiplier);
+  x12 = vqrdmulhq_n_s32(x12, params.input1_multiplier);
+  const int32x4_t input1_shift_dup = vdupq_n_s32(params.input1_shift);
+  x11 = vshlq_s32(x11, input1_shift_dup);
+  x12 = vshlq_s32(x12, input1_shift_dup);
+
+  for (; i <= size - 8; i += 8) {
+    const uint8x8_t input2_val_original = vld1_u8(input2_data + i);
+    const int16x8_t input2_val_s16 =
+        vreinterpretq_s16_u16(vmovl_u8(input2_val_original));
+    const int16x8_t input2_val =
+        vaddq_s16(input2_val_s16, vdupq_n_s16(params.input2_offset));
+    const int16x4_t input2_val_high = vget_high_s16(input2_val);
+    const int16x4_t input2_val_low = vget_low_s16(input2_val);
+    int32x4_t x21 = vmovl_s16(input2_val_low);
+    int32x4_t x22 = vmovl_s16(input2_val_high);
+    x21 = vshlq_s32(x21, left_shift_dup);
+    x22 = vshlq_s32(x22, left_shift_dup);
+    x21 = vqrdmulhq_n_s32(x21, params.input2_multiplier);
+    x22 = vqrdmulhq_n_s32(x22, params.input2_multiplier);
+    const int32x4_t input2_shift_dup = vdupq_n_s32(params.input2_shift);
+    x21 = vshlq_s32(x21, input2_shift_dup);
+    x22 = vshlq_s32(x22, input2_shift_dup);
+    int32x4_t s1 = vaddq_s32(x11, x21);
+    int32x4_t s2 = vaddq_s32(x12, x22);
+    s1 = vqrdmulhq_n_s32(s1, params.output_multiplier);
+    s2 = vqrdmulhq_n_s32(s2, params.output_multiplier);
+    s1 = RoundingDivideByPOT(s1, -params.output_shift);
+    s2 = RoundingDivideByPOT(s2, -params.output_shift);
+    const int16x4_t s1_narrowed = vmovn_s32(s1);
+    const int16x4_t s2_narrowed = vmovn_s32(s2);
+    const int16x8_t s = vaddq_s16(vcombine_s16(s1_narrowed, s2_narrowed),
+                                  vdupq_n_s16(params.output_offset));
+    const uint8x8_t clamped =
+        vmax_u8(output_activation_min_vector,
+                vmin_u8(output_activation_max_vector, vqmovun_s16(s)));
+    vst1_u8(output_data + i, clamped);
+  }
+#endif  // NEON
+
+  if (i < size) {
+    // Process broadcast scalar.
+    const int32 input1_val = params.input1_offset + input1_data;
+    const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
+    const int32 scaled_input1_val =
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            shifted_input1_val, params.input1_multiplier, params.input1_shift);
+
+    for (; i < size; ++i) {
+      const int32 input2_val = params.input2_offset + input2_data[i];
+      const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
+      const int32 scaled_input2_val =
+          MultiplyByQuantizedMultiplierSmallerThanOneExp(
+              shifted_input2_val, params.input2_multiplier,
+              params.input2_shift);
+      const int32 raw_sum = scaled_input1_val + scaled_input2_val;
+      const int32 raw_output =
+          MultiplyByQuantizedMultiplierSmallerThanOneExp(
+              raw_sum, params.output_multiplier, params.output_shift) +
+          params.output_offset;
+      const int32 clamped_output =
+          std::min(params.quantized_activation_max,
+                   std::max(params.quantized_activation_min, raw_output));
+      output_data[i] = static_cast<uint8>(clamped_output);
+    }
+  }
+}
+
 inline void Add(const ArithmeticParams& params,
                 const RuntimeShape& input1_shape, const uint8* input1_data,
                 const RuntimeShape& input2_shape, const uint8* input2_data,
@@ -2546,26 +2650,63 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
   uint8* output_data_ptr = output_data;
   const uint8* input1_data_ptr = input1_data;
   const uint8* input2_data_reset = input2_data;
+  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
+  // between input shapes. y3 for input 1 is always broadcast, and so the
+  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
+  // Put another way,
+  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
+  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
   int y0 = params.broadcast_shape[0];
   int y1 = params.broadcast_shape[1];
   int y2 = params.broadcast_shape[2];
   int y3 = params.broadcast_shape[3];
   int y4 = params.broadcast_shape[4];
-  for (int i0 = 0; i0 < y0; ++i0) {
-    const uint8* input2_data_ptr;
-    for (int i1 = 0; i1 < y1; ++i1) {
-      input2_data_ptr = input2_data_reset;
-      for (int i2 = 0; i2 < y2; ++i2) {
-        for (int i3 = 0; i3 < y3; ++i3) {
-          AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
-                         output_data_ptr);
-          input2_data_ptr += y4;
-          output_data_ptr += y4;
+  if (y4 > 1) {
+    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
+    // dimension.
+    for (int i0 = 0; i0 < y0; ++i0) {
+      const uint8* input2_data_ptr;
+      for (int i1 = 0; i1 < y1; ++i1) {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2) {
+          for (int i3 = 0; i3 < y3; ++i3) {
+            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
+                           output_data_ptr);
+            input2_data_ptr += y4;
+            output_data_ptr += y4;
+          }
+          // We have broadcast y4 of input1 data y3 times, and now move on.
+          input1_data_ptr += y4;
+        }
+      }
+      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
+      input2_data_reset = input2_data_ptr;
+    }
+  } else {
+    // Special case of y4 == 1, in which the innermost loop is a single element
+    // and can be combined with the next (y3) as an inner broadcast.
+    //
+    // Note that this handles the case of pure scalar broadcast when
+    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
+    // broadcast with batch (as y2 > 1).
+    //
+    // NOTE The process is the same as the above general case except simplified
+    // for y4 == 1 and the loop over y3 is contained within the
+    // AddScalarBroadcast function.
+    for (int i0 = 0; i0 < y0; ++i0) {
+      const uint8* input2_data_ptr;
+      for (int i1 = 0; i1 < y1; ++i1) {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2) {
+          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
+                             output_data_ptr);
+          input2_data_ptr += y3;
+          output_data_ptr += y3;
+          input1_data_ptr += 1;
         }
-        input1_data_ptr += y4;
       }
+      input2_data_reset = input2_data_ptr;
     }
-    input2_data_reset = input2_data_ptr;
   }
 }
 
diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h
index ea3ab06da1..b7b9139428 100644
--- a/tensorflow/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h
@@ -735,6 +735,40 @@ inline void AddElementwise(int size, const ArithmeticParams& params,
   }
 }
 
+// Scalar-broadcast add that can be used for inner loop of more general
+// broadcast add, so that, for example, scalar-broadcast with batch will still
+// be fast.
+inline void AddScalarBroadcast(int size, const ArithmeticParams& params,
+                               uint8 input1_data, const uint8* input2_data,
+                               uint8* output_data) {
+  TFLITE_DCHECK_GT(params.input1_offset, -256);
+  TFLITE_DCHECK_GT(params.input2_offset, -256);
+  TFLITE_DCHECK_LT(params.input1_offset, 256);
+  TFLITE_DCHECK_LT(params.input2_offset, 256);
+
+  const int32 input1_val = params.input1_offset + input1_data;
+  const int32 shifted_input1_val = input1_val * (1 << params.left_shift);
+  const int32 scaled_input1_val =
+      MultiplyByQuantizedMultiplierSmallerThanOneExp(
+          shifted_input1_val, params.input1_multiplier, params.input1_shift);
+  for (int i = 0; i < size; ++i) {
+    const int32 input2_val = params.input2_offset + input2_data[i];
+    const int32 shifted_input2_val = input2_val * (1 << params.left_shift);
+    const int32 scaled_input2_val =
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            shifted_input2_val, params.input2_multiplier, params.input2_shift);
+    const int32 raw_sum = scaled_input1_val + scaled_input2_val;
+    const int32 raw_output =
+        MultiplyByQuantizedMultiplierSmallerThanOneExp(
+            raw_sum, params.output_multiplier, params.output_shift) +
+        params.output_offset;
+    const int32 clamped_output =
+        std::min(params.quantized_activation_max,
+                 std::max(params.quantized_activation_min, raw_output));
+    output_data[i] = static_cast<uint8>(clamped_output);
+  }
+}
+
 inline void Add(const ArithmeticParams& params,
                 const RuntimeShape& input1_shape, const uint8* input1_data,
                 const RuntimeShape& input2_shape, const uint8* input2_data,
@@ -975,26 +1009,63 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
   uint8* output_data_ptr = output_data;
   const uint8* input1_data_ptr = input1_data;
   const uint8* input2_data_reset = input2_data;
+  // In the fivefold pattern, y0, y2 and y4 are not broadcast, and so shared
+  // between input shapes. y3 for input 1 is always broadcast, and so the
+  // dimension there is 1, whereas optionally y1 might be broadcast for input 2.
+  // Put another way,
+  // input1.shape.FlatSize = y0 * y1 * y2 * y4,
+  // input2.shape.FlatSize = y0 * y2 * y3 * y4.
   int y0 = params.broadcast_shape[0];
   int y1 = params.broadcast_shape[1];
   int y2 = params.broadcast_shape[2];
   int y3 = params.broadcast_shape[3];
   int y4 = params.broadcast_shape[4];
-  for (int i0 = 0; i0 < y0; ++i0) {
-    const uint8* input2_data_ptr;
-    for (int i1 = 0; i1 < y1; ++i1) {
-      input2_data_ptr = input2_data_reset;
-      for (int i2 = 0; i2 < y2; ++i2) {
-        for (int i3 = 0; i3 < y3; ++i3) {
-          AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
-                         output_data_ptr);
-          input2_data_ptr += y4;
-          output_data_ptr += y4;
+  if (y4 > 1) {
+    // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner
+    // dimension.
+    for (int i0 = 0; i0 < y0; ++i0) {
+      const uint8* input2_data_ptr;
+      for (int i1 = 0; i1 < y1; ++i1) {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2) {
+          for (int i3 = 0; i3 < y3; ++i3) {
+            AddElementwise(y4, params, input1_data_ptr, input2_data_ptr,
+                           output_data_ptr);
+            input2_data_ptr += y4;
+            output_data_ptr += y4;
+          }
+          // We have broadcast y4 of input1 data y3 times, and now move on.
+          input1_data_ptr += y4;
         }
-        input1_data_ptr += y4;
       }
+      // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
+      input2_data_reset = input2_data_ptr;
+    }
+  } else {
+    // Special case of y4 == 1, in which the innermost loop is a single element
+    // and can be combined with the next (y3) as an inner broadcast.
+    //
+    // Note that this handles the case of pure scalar broadcast when
+    // y0 == y1 == y2 == 1. With low overhead it handles cases such as scalar
+    // broadcast with batch (as y2 > 1).
+    //
+    // NOTE The process is the same as the above general case except simplified
+    // for y4 == 1 and the loop over y3 is contained within the
+    // AddScalarBroadcast function.
+    for (int i0 = 0; i0 < y0; ++i0) {
+      const uint8* input2_data_ptr;
+      for (int i1 = 0; i1 < y1; ++i1) {
+        input2_data_ptr = input2_data_reset;
+        for (int i2 = 0; i2 < y2; ++i2) {
+          AddScalarBroadcast(y3, params, *input1_data_ptr, input2_data_ptr,
+                             output_data_ptr);
+          input2_data_ptr += y3;
+          output_data_ptr += y3;
+          input1_data_ptr += 1;
+        }
+      }
+      input2_data_reset = input2_data_ptr;
     }
-    input2_data_reset = input2_data_ptr;
   }
 }
 
-- 
GitLab


From d050d65a63085ebb1ce9980098e5b1b3fde583e7 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Thu, 13 Dec 2018 21:13:56 -0800
Subject: [PATCH 579/873] Fixes for outside compilation in "If".

1. Add a control edge from predicate sending node to "If" node, so in XlaCompiler we visit "If" node after predicate sending node, and when we visit "If" node, token output for predicate sending node is already available.

2. When lowering "If" node in XLA, the extra token output comes after resource updates, so its index should be "ctx->num_outputs() + num_resource_args" (for "If", we treat each resource arg as a resource update).

PiperOrigin-RevId: 225484666
---
 .../compiler/jit/extract_outside_compilation_pass.cc  |  5 +++++
 .../jit/extract_outside_compilation_pass_test.cc      | 11 ++++++++++-
 tensorflow/compiler/tf2xla/kernels/if_op.cc           | 11 +++++++++--
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
index baf8507f4e..1906f1ac85 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
@@ -1255,6 +1255,11 @@ Status ExtractOutsideCompilationForNodesWithAssociatedFunctions(
     n->AddAttr(kXlaTokenInputNodesAttrName,
                std::vector<string>{send_pred_node->name()});
 
+    // Add a control edge from `send_pred_node` to If node, so XlaCompiler will
+    // visit If node after `send_pred_node`, thus the token output for
+    // `send_pred_node` has been generated.
+    g->AddControlEdge(send_pred_node, n);
+
     // Build host side graph for the "If" node.
     string oc_host_graph_name = absl::StrCat("oc_if_host_graph_", n->name());
     TF_RETURN_IF_ERROR(BuildHostGraphForIfNode(
diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
index 0887fbcde9..e9a89e34e0 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass_test.cc
@@ -627,9 +627,18 @@ TEST(ExtractOutsideCompilationForFunctionTest, OutsideCompilationInIf) {
     Graph *xla_graph = xla_fbody->graph;
     auto node_name_index = xla_graph->BuildNodeNameIndex();
 
-    // Check that we have XlaSendToHost to send cond predicate to host.
+    // Check that we have XlaSendToHost to send cond predicate to host, and
+    // there is a control edge to If node.
     Node *send_if_pred_node = node_name_index["send_oc_if_pred_if"];
     EXPECT_NE(send_if_pred_node, nullptr);
+    bool has_control_edge_to_if = false;
+    for (const Edge *e : send_if_pred_node->out_edges()) {
+      if (e->IsControlEdge() && e->dst()->name() == "if") {
+        has_control_edge_to_if = true;
+        break;
+      }
+    }
+    EXPECT_TRUE(has_control_edge_to_if);
 
     // Check that the "If" node now has `send_if_pred_node` as attribute
     // _xla_token_input_nodes.
diff --git a/tensorflow/compiler/tf2xla/kernels/if_op.cc b/tensorflow/compiler/tf2xla/kernels/if_op.cc
index b5e0839125..4f0f0fd9ae 100644
--- a/tensorflow/compiler/tf2xla/kernels/if_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/if_op.cc
@@ -56,6 +56,7 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) {
   VLOG(1) << "Building If: " << input_types_.size() << " inputs";
 
   std::vector<XlaCompiler::Argument> arguments(input_types_.size());
+  int num_resource_args = 0;
   for (int i = 0; i < input_types_.size(); ++i) {
     XlaCompiler::Argument& arg = arguments[i];
     DataType type = ctx->input_type(i + 1);
@@ -81,6 +82,8 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) {
               << " type: " << DataTypeString(arg.type)
               << " shape: " << arg.shape.DebugString()
               << " initialized: " << arg.initialized;
+
+      num_resource_args++;
     } else {
       arg.kind = XlaCompiler::Argument::kParameter;
       arg.type = input_types_[i];
@@ -236,9 +239,13 @@ void XlaIfOp::Compile(XlaOpKernelContext* ctx) {
     ctx->SetOutput(i, output_handle);
   }
   if (has_token_input_output_) {
-    // Set token output for this "if" op.
+    // Set token output for this "If" op. Token output is the last output of
+    // XLA computation, which comes after all "normal" TF outputs and resource
+    // updates. For "If" node, num of resource updates equals to number of
+    // resource args because we set `return_updated_values_for_all_resources`
+    // to true in XlaCompiler option.
     xla::XlaOp token_output =
-        xla::GetTupleElement(outputs, output_types_.size());
+        xla::GetTupleElement(outputs, output_types_.size() + num_resource_args);
     auto shape_or = b->GetShape(token_output);
     OP_REQUIRES_OK(ctx, shape_or.status());
     OP_REQUIRES(ctx, xla::ShapeUtil::IsToken(shape_or.ValueOrDie()),
-- 
GitLab


From 16f68575b53e7528e7d8fd2d6449fc8369259ab7 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Thu, 13 Dec 2018 22:12:48 -0800
Subject: [PATCH 580/873] Fixes in benchmark.

PiperOrigin-RevId: 225489160
---
 .../benchmarks/map_defun_benchmark.py         | 26 +++++++------
 .../benchmarks/optimize_benchmark.py          | 39 ++++++++++---------
 .../rejection_resample_benchmark.py           |  8 ++--
 3 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py b/tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py
index 21e7ddaf7b..49297ca7c5 100644
--- a/tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/map_defun_benchmark.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 
 import time
 
+from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import map_defun
 from tensorflow.python.eager import function
 from tensorflow.python.framework import dtypes
@@ -34,18 +35,19 @@ class MapDefunBenchmark(test.Benchmark):
   """Benchmarks for MapDefunOp."""
 
   def _run(self, op, name=None, num_iters=3000):
-    for _ in range(5):
-      self.evaluate(op)
-    start = time.time()
-    for _ in range(num_iters):
-      self.evaluate(op)
-    end = time.time()
-    mean_us = (end - start) * 1e6 / num_iters
-    self.report_benchmark(
-        name=name,
-        iters=num_iters,
-        wall_time=mean_us,
-        extras={"examples_per_sec": num_iters / (end - start)})
+    with session.Session() as sess:
+      for _ in range(5):
+        sess.run(op)
+      start = time.time()
+      for _ in range(num_iters):
+        sess.run(op)
+      end = time.time()
+      mean_us = (end - start) * 1e6 / num_iters
+      self.report_benchmark(
+          name=name,
+          iters=num_iters,
+          wall_time=mean_us,
+          extras={"examples_per_sec": num_iters / (end - start)})
 
   def benchmarkDefunVsMapFn(self):
     """Benchmarks to compare the performance of MapDefun vs tf.map_fn."""
diff --git a/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py b/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
index 5df57a370c..73c21d17aa 100644
--- a/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
@@ -121,8 +121,8 @@ class OptimizationBenchmark(test.Benchmark):
   def benchmarkFilterFusion(self):
     chain_lengths = [0, 1, 2, 5, 10, 20, 50]
     for chain_length in chain_lengths:
-      self._benchmarkFilters(chain_length, False)
-      self._benchmarkFilters(chain_length, True)
+      self._benchmarkFilterFusion(chain_length, False)
+      self._benchmarkFilterFusion(chain_length, True)
 
   def _benchmarkFilterFusion(self, chain_length, optimize_dataset):
     with ops.Graph().as_default():
@@ -137,24 +137,25 @@ class OptimizationBenchmark(test.Benchmark):
       iterator = dataset_ops.make_one_shot_iterator(dataset)
       next_element = iterator.get_next()
 
-      for _ in range(10):
-        self.evaluate(next_element.op)
-      deltas = []
-      for _ in range(100):
-        start = time.time()
+      with session.Session() as sess:
+        for _ in range(10):
+          sess.run(next_element.op)
+        deltas = []
         for _ in range(100):
-          self.evaluate(next_element.op)
-        end = time.time()
-        deltas.append(end - start)
-
-      median_wall_time = np.median(deltas) / 100
-      opt_mark = "opt" if optimize_dataset else "no-opt"
-      print("Filter dataset {} chain length: {} Median wall time: {}".format(
-          opt_mark, chain_length, median_wall_time))
-      self.report_benchmark(
-          iters=1000,
-          wall_time=median_wall_time,
-          name="chain_length_{}_{}".format(opt_mark, chain_length))
+          start = time.time()
+          for _ in range(100):
+            sess.run(next_element.op)
+          end = time.time()
+          deltas.append(end - start)
+
+        median_wall_time = np.median(deltas) / 100
+        opt_mark = "opt" if optimize_dataset else "no-opt"
+        print("Filter dataset {} chain length: {} Median wall time: {}".format(
+            opt_mark, chain_length, median_wall_time))
+        self.report_benchmark(
+            iters=1000,
+            wall_time=median_wall_time,
+            name="chain_length_{}_{}".format(opt_mark, chain_length))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py b/tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py
index 4cd8c4b73a..a64f7ecb00 100644
--- a/tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/rejection_resample_benchmark.py
@@ -22,13 +22,13 @@ import time
 import numpy as np
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import resampling
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.platform import test
 
 
-def _time_resampling(
-    test_obj, data_np, target_dist, init_dist, num_to_sample):
+def _time_resampling(data_np, target_dist, init_dist, num_to_sample):  # pylint: disable=missing-docstring
   dataset = dataset_ops.Dataset.from_tensor_slices(data_np).repeat()
 
   # Reshape distribution via rejection sampling.
@@ -41,7 +41,7 @@ def _time_resampling(
 
   get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
 
-  with test_obj.test_session() as sess:
+  with session.Session() as sess:
     start_time = time.time()
     for _ in xrange(num_to_sample):
       sess.run(get_next)
@@ -62,7 +62,7 @@ class RejectionResampleBenchmark(test.Benchmark):
     data_np = np.random.choice(num_classes, num_samples, p=init_dist)
 
     resample_time = _time_resampling(
-        self, data_np, target_dist, init_dist, num_to_sample=1000)
+        data_np, target_dist, init_dist, num_to_sample=1000)
 
     self.report_benchmark(iters=1000, wall_time=resample_time, name="resample")
 
-- 
GitLab


From 040ee45aa83607fd057b21251407d20c7eb8bf18 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Thu, 13 Dec 2018 23:15:59 -0800
Subject: [PATCH 581/873] Add hwloc dependency package to TF build

PiperOrigin-RevId: 225493211
---
 tensorflow/workspace.bzl        |  2 +
 third_party/hwloc/BUILD         |  1 +
 third_party/hwloc/BUILD.bazel   | 87 +++++++++++++++++++++++++++++++++
 third_party/hwloc/workspace.bzl | 15 ++++++
 4 files changed, 105 insertions(+)
 create mode 100644 third_party/hwloc/BUILD
 create mode 100644 third_party/hwloc/BUILD.bazel
 create mode 100644 third_party/hwloc/workspace.bzl

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index a2b96d7491..aefab03b6d 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -23,6 +23,7 @@ load(
 load("//third_party/aws:workspace.bzl", aws = "repo")
 load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo")
 load("//third_party/highwayhash:workspace.bzl", highwayhash = "repo")
+load("//third_party/hwloc:workspace.bzl", hwloc = "repo")
 load("//third_party/icu:workspace.bzl", icu = "repo")
 load("//third_party/jpeg:workspace.bzl", jpeg = "repo")
 load("//third_party/nasm:workspace.bzl", nasm = "repo")
@@ -34,6 +35,7 @@ def initialize_third_party():
     aws()
     flatbuffers()
     highwayhash()
+    hwloc()
     icu()
     keras_applications()
     kissfft()
diff --git a/third_party/hwloc/BUILD b/third_party/hwloc/BUILD
new file mode 100644
index 0000000000..2f5d02becb
--- /dev/null
+++ b/third_party/hwloc/BUILD
@@ -0,0 +1 @@
+# Dummy BUILD file to make this directory a package.
diff --git a/third_party/hwloc/BUILD.bazel b/third_party/hwloc/BUILD.bazel
new file mode 100644
index 0000000000..b73267d668
--- /dev/null
+++ b/third_party/hwloc/BUILD.bazel
@@ -0,0 +1,87 @@
+# hwloc: Portable Hardware Locality Library
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+COMMON_INCLUDE_COPTS = [
+    "-I.",
+    "-Ihwloc",
+    "-Iinclude",
+]
+
+DISABLE_WARNINGS_COPTS = [
+    "-Wno-vla",
+]
+
+VAR_SETTINGS_COPTS = [
+    "-DHWLOC_DUMPED_HWDATA_DIR=",
+    "-DRUNSTATEDIR=",
+]
+
+cc_library(
+    name = "hwloc",
+    srcs = [
+        "hwloc/base64.c",
+        "hwloc/bind.c",
+        "hwloc/bitmap.c",
+        "hwloc/components.c",
+        "hwloc/diff.c",
+        "hwloc/distances.c",
+        "hwloc/misc.c",
+        "hwloc/pci-common.c",
+        "hwloc/shmem.c",
+        "hwloc/static-components.h",
+        "hwloc/topology.c",
+        "hwloc/topology-hardwired.c",
+        "hwloc/topology-linux.c",
+        "hwloc/topology-noos.c",
+        "hwloc/topology-synthetic.c",
+        "hwloc/topology-x86.c",
+        "hwloc/topology-xml.c",
+        "hwloc/topology-xml-nolibxml.c",
+        "hwloc/traversal.c",
+        "include/hwloc/linux.h",
+        "include/hwloc/plugins.h",
+        "include/hwloc/shmem.h",
+        "include/private/autogen/config.h",
+        "include/private/components.h",
+        "include/private/cpuid-x86.h",
+        "include/private/debug.h",
+        "include/private/internal-components.h",
+        "include/private/misc.h",
+        "include/private/private.h",
+        "include/private/xml.h",
+    ],
+    hdrs = [
+        "include/hwloc.h",
+        "include/hwloc/autogen/config.h",
+        "include/hwloc/bitmap.h",
+        "include/hwloc/deprecated.h",
+        "include/hwloc/diff.h",
+        "include/hwloc/distances.h",
+        "include/hwloc/export.h",
+        "include/hwloc/helper.h",
+        "include/hwloc/inlines.h",
+        "include/hwloc/rename.h",
+    ],
+    copts = COMMON_INCLUDE_COPTS + DISABLE_WARNINGS_COPTS + VAR_SETTINGS_COPTS,
+    features = [
+        "-parse_headers",
+        "-layering_check",
+    ],
+    deps = [],
+)
+
+cc_binary(
+    name = "hwloc_print",
+    srcs = ["hwloc_print.cc"],
+    copts = COMMON_INCLUDE_COPTS,
+    deps = [
+        ":hwloc",
+    ],
+)
diff --git a/third_party/hwloc/workspace.bzl b/third_party/hwloc/workspace.bzl
new file mode 100644
index 0000000000..47a143c8a0
--- /dev/null
+++ b/third_party/hwloc/workspace.bzl
@@ -0,0 +1,15 @@
+"""loads the hwloc library, used by TF."""
+
+load("//third_party:repo.bzl", "third_party_http_archive")
+
+def repo():
+    third_party_http_archive(
+        name = "hwloc",
+        urls = [
+            "http://mirror.bazel.build/download.open-mpi.org/release/hwloc/v2.0/hwloc-2.0.3.tar.gz",
+            "https://download.open-mpi.org/release/hwloc/v2.0/hwloc-2.0.3.tar.gz",
+        ],
+        sha256 = "64def246aaa5b3a6e411ce10932a22e2146c3031b735c8f94739534f06ad071c",
+        strip_prefix = "hwloc-2.0.3",
+        build_file = "//third_party/hwloc:BUILD.bazel",
+    )
-- 
GitLab


From c364a5d786335e2993893a6789aeec4ca6879810 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 01:02:52 -0800
Subject: [PATCH 582/873] compat: Update forward compatibility horizon to
 2018-12-14

PiperOrigin-RevId: 225502633
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 82ed7da830..bd0e38c823 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 13)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 14)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 61e9a72b112f93461f83e868db732d30ad6665e8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 02:12:19 -0800
Subject: [PATCH 583/873] Delete unused target.

It has the same sources as the fused_batch_norm_op kernel and thus quickly leads to brittle links when used.

PiperOrigin-RevId: 225510089
---
 tensorflow/core/kernels/BUILD | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index e2234c1f9d..d519b2426e 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3972,19 +3972,6 @@ tf_kernel_library(
     alwayslink = 1,
 )
 
-tf_kernel_library(
-    name = "fused_batch_norm_util",
-    gpu_srcs = [
-        "fused_batch_norm_op.h",
-        "fused_batch_norm_op.cu.cc",
-    ],
-    deps = [
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//third_party/eigen3",
-    ],
-)
-
 cc_library(
     name = "pooling_ops_hdrs",
     hdrs = [
-- 
GitLab


From 57f43d532faf69a1c5a759ba7068adf33be46c6b Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <slebedev@google.com>
Date: Fri, 14 Dec 2018 04:50:13 -0800
Subject: [PATCH 584/873] Pulled `RefVariable.initialized_value` and
 `_shared_name` to `Variable`

They can be implemented in a generic way in terms of other `Variable`
properties/methods.

PiperOrigin-RevId: 225523261
---
 tensorflow/python/ops/variables.py | 53 +++++++++---------------------
 1 file changed, 16 insertions(+), 37 deletions(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 1dc96efa0b..fcc4a5275c 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -506,7 +506,10 @@ class Variable(six.with_metaclass(VariableMetaclass,
       A `Tensor` holding the value of this variable after its initializer
       has run.
     """
-    raise NotImplementedError
+    with ops.init_scope():
+      return control_flow_ops.cond(is_variable_initialized(self),
+                                   self.read_value,
+                                   lambda: self.initial_value)
 
   @property
   def initial_value(self):
@@ -963,6 +966,18 @@ class Variable(six.with_metaclass(VariableMetaclass,
     """The name of this variable."""
     raise NotImplementedError
 
+  @property
+  def _shared_name(self):
+    """The shared name of the variable.
+
+      Unlike name(), shared_name doesn't have ":0" suffix. It is user-specified
+      name with name scope prefix.
+
+    Returns:
+      variable name.
+    """
+    return self.name[:self.name.index(":")]
+
   @property
   def initializer(self):
     """The initializer operation for this variable."""
@@ -1694,30 +1709,6 @@ class RefVariable(VariableV1):
     """
     return self._variable.eval(session=session)
 
-  def initialized_value(self):
-    """Returns the value of the initialized variable.
-
-    You should use this instead of the variable itself to initialize another
-    variable with a value that depends on the value of this variable.
-
-    ```python
-    # Initialize 'v' with a random tensor.
-    v = tf.Variable(tf.truncated_normal([10, 40]))
-    # Use `initialized_value` to guarantee that `v` has been
-    # initialized before its value is used to initialize `w`.
-    # The random values are picked only once.
-    w = tf.Variable(v.initialized_value() * 2.0)
-    ```
-
-    Returns:
-      A `Tensor` holding the value of this variable after its initializer
-      has run.
-    """
-    with ops.init_scope():
-      return control_flow_ops.cond(is_variable_initialized(self),
-                                   self.read_value,
-                                   lambda: self.initial_value)
-
   @property
   def initial_value(self):
     """Returns the Tensor used as the initial value for the variable.
@@ -2330,18 +2321,6 @@ class RefVariable(VariableV1):
     """The name of this variable."""
     return self._variable.name
 
-  @property
-  def _shared_name(self):
-    """The shared name of the variable.
-
-      Unlike name(), shared_name doesn't have ":0" suffix. It is user-specified
-      name with name scope prefix.
-
-    Returns:
-      variable name.
-    """
-    return self.name[:-2]
-
   @property
   def initializer(self):
     """The initializer operation for this variable."""
-- 
GitLab


From 06f311a596b515de8f81a9cf73f0914b5421def1 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Fri, 14 Dec 2018 05:04:43 -0800
Subject: [PATCH 585/873] Update the default cuda version windows GPU builds
 default to.

PiperOrigin-RevId: 225524673
---
 tensorflow/tools/ci_build/windows/bazel/common_env.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/windows/bazel/common_env.sh b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
index 34376f1481..9c6825f271 100644
--- a/tensorflow/tools/ci_build/windows/bazel/common_env.sh
+++ b/tensorflow/tools/ci_build/windows/bazel/common_env.sh
@@ -55,9 +55,9 @@ export PATH="/c/Program Files/Git/cmd:$PATH"
 export PATH="/c/${PYTHON_BASE_PATH}/Scripts:$PATH"
 
 # Setting default values to CUDA related environment variables
-export TF_CUDA_VERSION=${TF_CUDA_VERSION:-9.0}
+export TF_CUDA_VERSION=${TF_CUDA_VERSION:-10.0}
 export TF_CUDNN_VERSION=${TF_CUDNN_VERSION:-7}
-export TF_CUDA_COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES:-3.7}
+export TF_CUDA_COMPUTE_CAPABILITIES=${TF_CUDA_COMPUTE_CAPABILITIES:-6.0}
 export CUDA_TOOLKIT_PATH=${CUDA_TOOLKIT_PATH:-"C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${TF_CUDA_VERSION}"}
 export CUDNN_INSTALL_PATH=${CUDNN_INSTALL_PATH:-"C:/tools/cuda"}
 
-- 
GitLab


From d65c7fea40861351882626a6b42e281e11f35792 Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <slebedev@google.com>
Date: Fri, 14 Dec 2018 05:20:15 -0800
Subject: [PATCH 586/873] Pulled `RefVariable.load` to `Variable`

`load` can be fully-defined in terms of other `Variable` methods.

This change also removes the need to define both `get_shape` and
`shape` in `Variable` implementations.

PiperOrigin-RevId: 225525803
---
 tensorflow/python/ops/variables.py | 61 ++++++------------------------
 1 file changed, 11 insertions(+), 50 deletions(-)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index fcc4a5275c..d7d064aba1 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -896,7 +896,15 @@ class Variable(six.with_metaclass(VariableMetaclass,
     Raises:
         ValueError: Session is not passed and no default session
     """
-    raise NotImplementedError
+    if context.executing_eagerly():
+      self.assign(value)
+    else:
+      session = session or ops.get_default_session()
+      if session is None:
+        raise ValueError(
+            "Either session argument should be provided or default session "
+            "should be established")
+      session.run(self.initializer, {self.initializer.inputs[1]: value})
 
   # Conversion to tensor.
   @staticmethod
@@ -1013,8 +1021,8 @@ class Variable(six.with_metaclass(VariableMetaclass,
     raise NotImplementedError
 
   def get_shape(self):
-    """Alias of Variable.shape."""
-    raise NotImplementedError
+    """Alias of `Variable.shape`."""
+    return self.shape
 
   def to_proto(self, export_scope=None):
     """Converts a `Variable` to a `VariableDef` protocol buffer.
@@ -2122,49 +2130,6 @@ class RefVariable(VariableV1):
     """
     return state_ops.count_up_to(self._variable, limit=limit)
 
-  def load(self, value, session=None):
-    """Load new value into this variable.
-
-    Writes new value to variable's memory. Doesn't add ops to the graph.
-
-    This convenience method requires a session where the graph
-    containing this variable has been launched. If no session is
-    passed, the default session is used.  See `tf.Session` for more
-    information on launching a graph and on sessions.
-
-    ```python
-    v = tf.Variable([1, 2])
-    init = tf.global_variables_initializer()
-
-    with tf.Session() as sess:
-        sess.run(init)
-        # Usage passing the session explicitly.
-        v.load([2, 3], sess)
-        print(v.eval(sess)) # prints [2 3]
-        # Usage with the default session.  The 'with' block
-        # above makes 'sess' the default session.
-        v.load([3, 4], sess)
-        print(v.eval()) # prints [3 4]
-    ```
-
-    Args:
-        value: New variable value
-        session: The session to use to evaluate this variable. If
-          none, the default session is used.
-
-    Raises:
-        ValueError: Session is not passed and no default session
-    """
-    if context.executing_eagerly():
-      self.assign(value)
-    else:
-      session = session or ops.get_default_session()
-      if session is None:
-        raise ValueError(
-            "Either session argument should be provided or default session "
-            "should be established")
-      session.run(self._initializer_op, {self._initializer_op.inputs[1]: value})
-
   # Conversion to tensor.
   @staticmethod
   def _TensorConversionFunction(v, dtype=None, name=None, as_ref=False):  # pylint: disable=invalid-name
@@ -2355,10 +2320,6 @@ class RefVariable(VariableV1):
     """
     return self._variable.get_shape()
 
-  def get_shape(self):
-    """Alias of Variable.shape."""
-    return self.shape
-
   def to_proto(self, export_scope=None):
     """Converts a `Variable` to a `VariableDef` protocol buffer.
 
-- 
GitLab


From ab6229b58371c8f1c384a8a77d2bec5f72b4d990 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 14 Dec 2018 06:11:14 -0800
Subject: [PATCH 587/873] [XLA] Fix crash if a zero-element array was passed to
 TriangularSolve.

In passing, remove redundant xla:: prefixes from the triangular solve test and reformat.

PiperOrigin-RevId: 225530438
---
 .../xla/client/lib/triangular_solve.cc        |   6 +
 .../xla/client/lib/triangular_solve_test.cc   | 132 ++++++++++--------
 2 files changed, 77 insertions(+), 61 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve.cc b/tensorflow/compiler/xla/client/lib/triangular_solve.cc
index c5a1d34cc6..ac58090dfe 100644
--- a/tensorflow/compiler/xla/client/lib/triangular_solve.cc
+++ b/tensorflow/compiler/xla/client/lib/triangular_solve.cc
@@ -393,6 +393,12 @@ XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
           block_size);
     }
 
+    if (ShapeUtil::IsZeroElementArray(b_shape)) {
+      // The output has the same shape as 'b', and since the output has zero
+      // elements, any such array will do.
+      return b;
+    }
+
     // We find the diagonal blocks of the coefficient matrix
     auto diag_blocks = DiagonalBlocks(a, block_size);
 
diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc b/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
index f6a70d64a7..d0188e8ea0 100644
--- a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
+++ b/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
@@ -33,56 +33,68 @@ limitations under the License.
 namespace xla {
 namespace {
 
-using TriangularSolveTest = xla::ClientLibraryTestBase;
-using TriangularSolveLeftLookingTest = xla::ClientLibraryTestBase;
-using complex64 = xla::complex64;
+using TriangularSolveTest = ClientLibraryTestBase;
+using TriangularSolveLeftLookingTest = ClientLibraryTestBase;
 
-xla::Array2D<float> AValsLower() {
+Array2D<float> AValsLower() {
   return {{2, 0, 0, 0}, {3, 6, 0, 0}, {4, 7, 9, 0}, {5, 8, 10, 11}};
 }
 
-xla::Array2D<float> AValsUpper() {
+Array2D<float> AValsUpper() {
   return {{2, 3, 4, 5}, {0, 6, 7, 8}, {0, 0, 9, 10}, {0, 0, 0, 11}};
 }
 
-xla::Array2D<float> BValsRight() {
+Array2D<float> BValsRight() {
   return {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}};
 }
 
-xla::Array2D<float> BValsLeft() {
+Array2D<float> BValsLeft() {
   return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}};
 }
 
-xla::Array2D<complex64> AValsLowerComplex() {
+Array2D<complex64> AValsLowerComplex() {
   return {{2, 0, 0, 0},
           {complex64(3, 1), 6, 0, 0},
           {4, complex64(7, 2), 9, 0},
           {5, 8, complex64(10, 3), 11}};
 }
 
-xla::Array2D<complex64> AValsUpperComplex() {
+Array2D<complex64> AValsUpperComplex() {
   return {{2, 3, complex64(4, 3), 5},
           {0, 6, complex64(7, 2), 8},
           {0, 0, complex64(9, 1), 10},
           {0, 0, 0, 11}};
 }
 
-xla::Array2D<complex64> BValsRightComplex() {
+Array2D<complex64> BValsRightComplex() {
   return {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}};
 }
 
-xla::Array2D<complex64> BValsLeftComplex() {
+Array2D<complex64> BValsLeftComplex() {
   return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}};
 }
 
-xla::Array2D<float> AValsFull() {
-  return {{2, 0, 1, 2}, {3, 6, 0, 1}, {4, 7, 9, 0}, {5, 8, 10, 11}};
+XLA_TEST_F(TriangularSolveTest, EmptyArrays) {
+  XlaBuilder builder(TestName());
+
+  XlaOp a, b;
+  auto a_data =
+      CreateR2Parameter<float>(Array2D<float>(0, 0), 0, "a", &builder, &a);
+  auto b_data =
+      CreateR2Parameter<float>(Array2D<float>(0, 10), 1, "b", &builder, &b);
+  TriangularSolve(a, b,
+                  /*left_side=*/true, /*lower=*/true,
+                  /*transpose_a=*/true, /*conjugate_a=*/false,
+                  /*block_size=*/2);
+
+  ComputeAndCompareR2<float>(&builder, Array2D<float>(0, 10),
+                             {a_data.get(), b_data.get()});
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -90,20 +102,20 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTranspose) {
                   /*transpose_a=*/true, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {0.5, 0.08333334, 0.04629629, 0.03367003},
       {2.5, -0.25, -0.1388889, -0.1010101},
       {4.5, -0.58333331, -0.32407406, -0.23569024},
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleRightLowerNotranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -111,20 +123,20 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightLowerNotranspose) {
                   /*transpose_a=*/false, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {-0.16414141, -0.06902357, -0.07070707, 0.36363636},
       {0.64393939, 0.06565657, -0.03030303, 0.72727273},
       {1.4520202, 0.2003367, 0.01010101, 1.09090909},
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleRightUpperTranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -132,20 +144,20 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightUpperTranspose) {
                   /*transpose_a=*/true, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {-0.16414141, -0.06902357, -0.07070707, 0.36363636},
       {0.64393939, 0.06565657, -0.03030303, 0.72727273},
       {1.4520202, 0.2003367, 0.01010101, 1.09090909},
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleRightUpperNotranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsRight(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -153,20 +165,20 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightUpperNotranspose) {
                   /*transpose_a=*/false, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {0.5, 0.08333334, 0.04629629, 0.03367003},
       {2.5, -0.25, -0.1388889, -0.1010101},
       {4.5, -0.58333331, -0.32407406, -0.23569024},
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerTranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -174,7 +186,7 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerTranspose) {
                   /*transpose_a=*/true, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {-0.89646465, -0.69444444, -0.49242424},
       {-0.27441077, -0.24074074, -0.20707071},
       {-0.23232323, -0.22222222, -0.21212121},
@@ -182,13 +194,13 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerTranspose) {
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -196,7 +208,7 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotranspose) {
                   /*transpose_a=*/false, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {0.5, 1.0, 1.5},
       {0.41666667, 0.33333333, 0.25},
       {0.23148148, 0.18518519, 0.13888889},
@@ -204,13 +216,13 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotranspose) {
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotransposeIrregularblock) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsLower(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -218,7 +230,7 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotransposeIrregularblock) {
                   /*transpose_a=*/false, /*conjugate_a=*/false,
                   /*block_size=*/3);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {0.5, 1.0, 1.5},
       {0.41666667, 0.33333333, 0.25},
       {0.23148148, 0.18518519, 0.13888889},
@@ -226,13 +238,13 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftLowerNotransposeIrregularblock) {
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -240,7 +252,7 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTranspose) {
                   /*transpose_a=*/true, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {0.5, 1.0, 1.5},
       {0.41666667, 0.33333333, 0.25},
       {0.23148148, 0.18518519, 0.13888889},
@@ -248,13 +260,13 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTranspose) {
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotranspose) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data = CreateR2Parameter<float>(AValsUpper(), 0, "a", &builder, &a);
   auto b_data = CreateR2Parameter<float>(BValsLeft(), 1, "b", &builder, &b);
   TriangularSolve(a, b,
@@ -262,7 +274,7 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotranspose) {
                   /*transpose_a=*/false, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<float> expected({
+  Array2D<float> expected({
       {-0.89646465, -0.69444444, -0.49242424},
       {-0.27441077, -0.24074074, -0.20707071},
       {-0.23232323, -0.22222222, -0.21212121},
@@ -270,13 +282,13 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperNotranspose) {
   });
 
   ComputeAndCompareR2<float>(&builder, expected, {a_data.get(), b_data.get()},
-                             xla::ErrorSpec(1e-2, 1e-2));
+                             ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTransposeConjugate) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data =
       CreateR2Parameter<complex64>(AValsLowerComplex(), 0, "a", &builder, &a);
   auto b_data =
@@ -286,7 +298,7 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTransposeConjugate) {
                   /*transpose_a=*/true, /*conjugate_a=*/true,
                   /*block_size=*/2);
 
-  xla::Array2D<complex64> expected({
+  Array2D<complex64> expected({
       {0.5, complex64(0.08333333, 0.08333333),
        complex64(0.02777778, -0.0462963), complex64(0.06313131, -0.01094276)},
       {2.5, complex64(-0.25, 0.41666667), complex64(-0.23148148, -0.37962963),
@@ -295,15 +307,14 @@ XLA_TEST_F(TriangularSolveTest, SimpleRightLowerTransposeConjugate) {
        complex64(0.11026936, -0.03114478)},
   });
 
-  ComputeAndCompareR2<complex64>(&builder, expected,
-                                 {a_data.get(), b_data.get()},
-                                 xla::ErrorSpec(1e-2, 1e-2));
+  ComputeAndCompareR2<complex64>(
+      &builder, expected, {a_data.get(), b_data.get()}, ErrorSpec(1e-2, 1e-2));
 }
 
 XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) {
-  xla::XlaBuilder builder(TestName());
+  XlaBuilder builder(TestName());
 
-  xla::XlaOp a, b;
+  XlaOp a, b;
   auto a_data =
       CreateR2Parameter<complex64>(AValsUpperComplex(), 0, "a", &builder, &a);
   auto b_data =
@@ -313,7 +324,7 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) {
                   /*transpose_a=*/true, /*conjugate_a=*/false,
                   /*block_size=*/2);
 
-  xla::Array2D<complex64> expected({
+  Array2D<complex64> expected({
       {0.5, 1., 1.5},
       {0.41666667, 0.33333333, 0.25},
       {complex64(0.20020325, -2.81504065e-01),
@@ -324,9 +335,8 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) {
        complex64(0.15798226, 5.12749446e-01)},
   });
 
-  ComputeAndCompareR2<complex64>(&builder, expected,
-                                 {a_data.get(), b_data.get()},
-                                 xla::ErrorSpec(1e-2, 1e-2));
+  ComputeAndCompareR2<complex64>(
+      &builder, expected, {a_data.get(), b_data.get()}, ErrorSpec(1e-2, 1e-2));
 }
 
 }  // namespace
-- 
GitLab


From 7979eff9bfaf1001e4b916c9ee4c7744587e36a1 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Fri, 14 Dec 2018 06:55:00 -0800
Subject: [PATCH 588/873] Only convert BackpropFilterConv to depthwise
 convolution if format is NHWC.

The logic for the conversion has this assumption. This CL makes sure that this
assumption holds, and adds tests for NCHW format.

PiperOrigin-RevId: 225534508
---
 .../compiler/tests/depthwise_conv_op_test.py  | 43 +++++++++++++++++--
 .../tf2xla/kernels/conv_op_helpers.cc         |  4 +-
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/tensorflow/compiler/tests/depthwise_conv_op_test.py b/tensorflow/compiler/tests/depthwise_conv_op_test.py
index 6183d3ed5b..90146e6b27 100644
--- a/tensorflow/compiler/tests/depthwise_conv_op_test.py
+++ b/tensorflow/compiler/tests/depthwise_conv_op_test.py
@@ -350,8 +350,13 @@ class DepthwiseConv2DTest(xla_test.XLATestCase):
       self._CompareBackpropInput(input_size, filter_size, output_size, stride,
                                  padding)
 
-  def _CompareBackpropFilter(self, input_sizes, filter_sizes, output_sizes,
-                             stride, padding):
+  def _CompareBackpropFilter(self,
+                             input_sizes,
+                             filter_sizes,
+                             output_sizes,
+                             stride,
+                             padding,
+                             data_format="NHWC"):
     x0 = np.random.rand(*input_sizes).astype(np.float32)
     x2 = np.random.rand(*output_sizes).astype(np.float32)
 
@@ -360,13 +365,30 @@ class DepthwiseConv2DTest(xla_test.XLATestCase):
         t0 = array_ops.placeholder(np.float32, shape=input_sizes)
         t1 = constant_op.constant(filter_sizes, shape=[len(filter_sizes)])
         t2 = array_ops.placeholder(np.float32, shape=output_sizes)
+        native_t0 = t0
+        native_t2 = t2
+        strides = [1, stride, stride, 1]
+
         if use_xla:
+          if data_format == "NCHW":
+            # Transpose from NWHC input to NCHW
+            # Ex. [4, 5, 5, 48] to [4, 48, 5, 5]
+            native_t0 = array_ops.transpose(t0, [0, 3, 1, 2])
+            native_t2 = array_ops.transpose(t2, [0, 3, 1, 2])
+            strides = [1, 1, stride, stride]
           with self.test_scope():
             backprop = nn_ops.depthwise_conv2d_native_backprop_filter(
-                t0, t1, t2, strides=[1, stride, stride, 1], padding=padding)
+                native_t0,
+                t1,
+                native_t2,
+                strides=strides,
+                padding=padding,
+                data_format=data_format)
         else:
+          # For CPU, the format NCHW is not supported. Therefore we always use
+          # NHWC here.
           backprop = nn_ops.depthwise_conv2d_native_backprop_filter(
-              t0, t1, t2, strides=[1, stride, stride, 1], padding=padding)
+              native_t0, t1, native_t2, strides=strides, padding=padding)
         ret = backprop.eval({t0: x0, t2: x2})
         self.assertShapeEqual(ret, backprop)
         return ret
@@ -384,6 +406,19 @@ class DepthwiseConv2DTest(xla_test.XLATestCase):
       self._CompareBackpropFilter(input_size, filter_size, output_size,
                                   stride, padding)
 
+  def testDepthwiseConv2DFilterGradFormatNCHWCompare(self):
+    for index, (input_size, filter_size, output_size, stride,
+                padding) in enumerate(ConfigsToTest()):
+      print("Testing DepthwiseConv2DFilterGradFormatNCHWCompare,", index,
+            "th config:", input_size, "*", filter_size, "producing output",
+            output_size, "stride:", stride, "padding:", padding)
+      self._CompareBackpropFilter(
+          input_size,
+          filter_size,
+          output_size,
+          stride,
+          padding,
+          data_format="NCHW")
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
index 399e6e1187..4124b258c7 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -434,8 +434,10 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
   }
 
   // We use this approach only for depthwise convolutions where feature counts
-  // are large but space dimensions are small.
+  // are large but space dimensions are small. The conversion logic below
+  // assumes that the data format is NHWC, so we also check that here.
   bool should_perform_depthwise_conv =
+      attrs.data_format == FORMAT_NHWC &&
       (total_spatial_size < dims.in_depth) &&
       filter_tensor_shape.dim_size(num_dims - 1) == 1 && attrs.depthwise;
 
-- 
GitLab


From 8b435b7a8d2eb0ef24fba7eec0984c6578be1707 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 07:29:22 -0800
Subject: [PATCH 589/873] Improvement to RaggedTensor documentation.

PiperOrigin-RevId: 225538433
---
 .../python/ops/ragged/ragged_dispatch.py      | 81 ++++++++++++++++---
 1 file changed, 69 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/ops/ragged/ragged_dispatch.py b/tensorflow/python/ops/ragged/ragged_dispatch.py
index ecc7f5d611..bc64f9cc9e 100644
--- a/tensorflow/python/ops/ragged/ragged_dispatch.py
+++ b/tensorflow/python/ops/ragged/ragged_dispatch.py
@@ -377,11 +377,28 @@ _BINARY_ELEMENTWISE_OPS = [
 ]
 
 
+# We don't need to register a separate delegation handler for these v1 ops,
+# since they delegate to the v2 ops (which already have a handler).  But we
+# still want to include them in the ragged_op_list() output.
+_V1_OPS_THAT_DELEGATE_TO_V2_OPS = [
+    math_ops.reduce_sum,
+    math_ops.reduce_prod,
+    math_ops.reduce_min,
+    math_ops.reduce_max,
+    math_ops.reduce_mean,
+    math_ops.reduce_any,
+    math_ops.reduce_all,
+]
+
+
 def _ragged_gather_v1(params, indices, validate_indices=None, name=None,
                       axis=0):
-  return ragged_array_ops.gather(params=params, indices=indices,
-                                 validate_indices=validate_indices,
-                                 axis=axis, name=name)
+  return ragged_array_ops.gather(
+      params=params,
+      indices=indices,
+      validate_indices=validate_indices,
+      axis=axis,
+      name=name)
 
 
 def _ragged_expand_dims_v1(input, axis=None, name=None, dim=None):  # pylint: disable=redefined-builtin
@@ -450,16 +467,56 @@ def register_dispatchers():
     RaggedDispatcher(original_op, ragged_op, args).register(original_op)
 
 
-def ragged_op_list():
+def _ragged_op_signature(op, ragged_args):
+  """Returns a signature for the given op, marking ragged args in bold."""
+  op_name = tf_export.get_canonical_name_for_symbol(op)
+  argspec = tf_inspect.getfullargspec(op)
+  arg_names = argspec.args
+
+  # Mark ragged arguments in bold.
+  for pos in ragged_args:
+    arg_names[pos] = '**' + arg_names[pos] + '**'
+
+  # Add argument defaults.
+  for pos in range(-1, -len(argspec.defaults) - 1, -1):
+    arg_names[pos] += '=`{!r}`'.format(argspec.defaults[pos])
+
+  # Add varargs and keyword args
+  if argspec.varargs:
+    arg_names.append('*' + argspec.varargs)
+  if argspec.varkw:
+    arg_names.append('**' + argspec.varkw)
+
+  return '* `tf.{}`({})'.format(op_name, ', '.join(arg_names))
+
+
+def _op_is_in_tf_version(op, version):
+  if version == 1:
+    return (tf_export.get_v1_names(tf_decorator.unwrap(op)[1]) or
+            op in _V1_OPS_THAT_DELEGATE_TO_V2_OPS)
+  elif version == 2:
+    return tf_export.get_v2_names(tf_decorator.unwrap(op)[1])
+  else:
+    raise ValueError('Expected version 1 or 2.')
+
+
+def ragged_op_list(tf_version=1):
   """Returns a string listing operators that have dispathers registered."""
-  op_list = (
-      _UNARY_ELEMENTWISE_OPS + _UNARY_LIST_ELEMENTWISE_OPS +
-      _BINARY_ELEMENTWISE_OPS + [x[0] for x in _RAGGED_DISPATCH_OPS])
-  return (
-      '\n\n### Additional ops that support `RaggedTensor`\n\n' + '\n'.join([
-          '* `tf.%s`' % tf_export.get_canonical_name_for_symbol(op)
-          for op in op_list
-      ]))
+  lines = []
+  for op in _UNARY_ELEMENTWISE_OPS + _UNARY_LIST_ELEMENTWISE_OPS:
+    if _op_is_in_tf_version(op, tf_version):
+      lines.append(_ragged_op_signature(op, [0]))
+  for op in _BINARY_ELEMENTWISE_OPS:
+    if _op_is_in_tf_version(op, tf_version):
+      lines.append(_ragged_op_signature(op, [0, 1]))
+  for op, _, ragged_args in _RAGGED_DISPATCH_OPS:
+    if _op_is_in_tf_version(op, tf_version):
+      arginfos = _get_arg_infos(op, ragged_args)
+      ragged_args = [arginfo.position for arginfo in arginfos]
+      lines.append(_ragged_op_signature(op, ragged_args))
+  return ('\n\n### Additional ops that support `RaggedTensor`\n\n'
+          'Arguments that accept `RaggedTensor`s are marked in **bold**.\n\n' +
+          '\n'.join(sorted(lines)) + 'n')
 
 
 register_dispatchers()
-- 
GitLab


From 3aeb92527258ef97787664e7b9319b51a952a9b6 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 14 Dec 2018 07:30:28 -0800
Subject: [PATCH 590/873] [XLA:CPU] Add support for MKLDNN contraction kernel
 in runtime_matmul.cc

PiperOrigin-RevId: 225538587
---
 tensorflow/compiler/xla/service/cpu/BUILD             | 1 +
 tensorflow/compiler/xla/service/cpu/runtime_matmul.cc | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index ce4c2a9cc6..4173af5179 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -572,6 +572,7 @@ cc_library(
         ":runtime_matvec",
         "//tensorflow/compiler/xla:executable_run_options",
         "//tensorflow/core:framework_lite",
+        "//tensorflow/core/kernels:eigen_contraction_kernel",
         "//third_party/eigen3",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc
index a71a85913c..56f018abdd 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc
@@ -23,6 +23,10 @@ limitations under the License.
 #include "tensorflow/core/platform/dynamic_annotations.h"
 #include "tensorflow/core/platform/types.h"
 
+#if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
+#include "tensorflow/core/kernels/eigen_contraction_kernel.h"
+#endif
+
 using tensorflow::int32;
 using tensorflow::int64;
 
-- 
GitLab


From 13187e1566e74a1f9434f5bb16c0cddc076ac497 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 07:40:48 -0800
Subject: [PATCH 591/873] Add basic support for variables in object-based saved
 model.

PiperOrigin-RevId: 225539883
---
 tensorflow/python/saved_model/load.py          | 18 +++++++++++++++++-
 tensorflow/python/saved_model/load_test.py     | 14 ++++++++++++++
 tensorflow/python/saved_model/save.py          |  4 ++++
 .../saved_model/saved_object_graph.proto       | 12 ++++++++++++
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py
index 28c0af2b65..9d9f60c69d 100644
--- a/tensorflow/python/saved_model/load.py
+++ b/tensorflow/python/saved_model/load.py
@@ -22,12 +22,15 @@ import os
 
 from tensorflow.python.framework import function as function_lib
 from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.saved_model import constants
 from tensorflow.python.saved_model import function_deserialization
 from tensorflow.python.saved_model import loader_impl
 from tensorflow.python.saved_model import saved_object_graph_pb2
 from tensorflow.python.saved_model import utils_impl as saved_model_utils
 from tensorflow.python.training.checkpointable import tracking
+from tensorflow.python.training.checkpointable import util
 from tensorflow.python.util import compat
 
 
@@ -47,6 +50,7 @@ class _Loader(object):
       defined_function.add_to_graph(None)
       self._defined_functions[defined_function.name] = defined_function
     self._load_all()
+    self._restore_checkpoint()
 
   def _load_all(self):
     self._nodes = [self._recreate(proto) for proto in self._proto.nodes]
@@ -55,14 +59,21 @@ class _Loader(object):
       for reference in object_proto.children:
         setattr(obj, reference.local_name, self._nodes[reference.node_id])
 
+  def _restore_checkpoint(self):
+    variables_path = saved_model_utils.get_variables_path(self._export_dir)
+    saver = util.CheckpointableSaver(self.get(0))
+    saver.restore(variables_path).assert_consumed()
+
   def get(self, node_id):
     return self._nodes[node_id]
 
   def _recreate(self, proto):
+    """Creates a Python object from a SavedObject protocol buffer."""
     factory = {
         "user_object": lambda: self._recreate_user_object(proto.user_object),
         "asset": lambda: self._recreate_asset(proto.asset),
-        "function": lambda: self._recreate_function(proto.function)
+        "function": lambda: self._recreate_function(proto.function),
+        "variable": lambda: self._recreate_variable(proto.variable),
     }
     kind = proto.WhichOneof("kind")
     if kind not in factory:
@@ -83,6 +94,11 @@ class _Loader(object):
     return function_deserialization.recreate_polymorphic_function(
         proto, self._defined_functions)
 
+  def _recreate_variable(self, proto):
+    # TODO(andresp): Can we use the checkpointed value as initializer?
+    dummy_value = init_ops.Zeros(dtype=proto.dtype)(shape=proto.shape)
+    return variables.Variable(dummy_value)
+
 
 def _load_saved_object_graph_proto(filename):
   with file_io.FileIO(filename, "rb") as f:
diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py
index 303b8f66ef..ba88668f8c 100644
--- a/tensorflow/python/saved_model/load_test.py
+++ b/tensorflow/python/saved_model/load_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import variables
 from tensorflow.python.saved_model import load
 from tensorflow.python.saved_model import save
 from tensorflow.python.training.checkpointable import tracking
@@ -50,6 +51,19 @@ class LoadTest(test.TestCase):
     self.assertIsNot(imported.dep_one, imported.dep_two)
     self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
 
+  def test_variables(self):
+    root = tracking.Checkpointable()
+    root.f = def_function.function(
+        lambda x: 2. * x,
+        input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
+    root.v1 = variables.Variable(1.)
+    root.v2 = variables.Variable(2.)
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save.save(root, save_dir)
+    imported = load.load(save_dir)
+    self.assertEquals(imported.v1.numpy(), 1.0)
+    self.assertEquals(imported.v2.numpy(), 2.0)
+
   def _make_asset(self, contents):
     filename = tempfile.mktemp(prefix=self.get_temp_dir())
     with open(filename, "w") as f:
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 57c63f8cda..6c2d5e6f2b 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -541,6 +541,10 @@ def _write_object_proto(obj, proto, asset_file_def_index):
   if isinstance(obj, tracking.TrackableAsset):
     proto.asset.SetInParent()
     proto.asset.asset_file_def_index = asset_file_def_index[obj]
+  elif resource_variable_ops.is_resource_variable(obj):
+    proto.variable.SetInParent()
+    proto.variable.dtype = obj.dtype.as_datatype_enum
+    proto.variable.shape.CopyFrom(obj.shape.as_proto())
   else:
     proto.user_object.SetInParent()
 
diff --git a/tensorflow/python/saved_model/saved_object_graph.proto b/tensorflow/python/saved_model/saved_object_graph.proto
index ed5c63935f..b95990ad34 100644
--- a/tensorflow/python/saved_model/saved_object_graph.proto
+++ b/tensorflow/python/saved_model/saved_object_graph.proto
@@ -1,6 +1,8 @@
 syntax = "proto3";
 
 import "tensorflow/core/protobuf/checkpointable_object_graph.proto";
+import "tensorflow/core/framework/tensor_shape.proto";
+import "tensorflow/core/framework/types.proto";
 
 option cc_enable_arenas = true;
 
@@ -49,6 +51,7 @@ message SavedObject {
     SavedUserObject user_object = 4;
     SavedAsset asset = 5;
     SavedPolymorphicFunction function = 6;
+    SavedVariable variable = 7;
   }
 }
 
@@ -82,3 +85,12 @@ message SavedMonomorphicFunction {
   // A reference to a TensorFlow function in the MetaGraph's FunctionDefLibrary
   string concrete_function = 1;
 }
+
+// Represents a Variable that is initialized by loading the contents from the
+// SavedModel checkpoint.
+message SavedVariable {
+  DataType dtype = 1;
+  TensorShapeProto shape = 2;
+
+  // TODO(andresp): Add "trainable" and save_slice_info_def.
+}
-- 
GitLab


From b55e7a9a82dd9fbf0b6ac92b5f621424c23e48f2 Mon Sep 17 00:00:00 2001
From: Mark Heffernan <meheff@google.com>
Date: Fri, 14 Dec 2018 08:33:13 -0800
Subject: [PATCH 592/873] Change Shape parsing from regexp matcher to parser.
 Previously in the HLO parser/lexer shapes were tokens which were identified
 using a complicated regular expression. This made augmenting the textual form
 of shape difficult such as would be necessary for dynamic shapes or tiling.
 To avoid ambiguity and other problems a couple changes were made to HLO
 textual form, as well as some related clean up:

(1) Do not redundantly print the shape inside of the constant HLO instruction's "operand" field. Previously, constant instructions we printed like:

    S32[2,2] constant(S32[2,2] {{1,2},{3,4}})

  Now this is printed as:

    S32[2,2] constant({{1,2},{3,4}})

  This avoids an ambiguity where the values of the literal can be misinterpreted as a layout. Also, the shape was printed inconsistently: only when the rank was greater than one.

(2) Remove ShapeUtil::ParseShapeString, replace with ParseShape function in hlo parser.

(3) Merge hlo_token.h into hlo_lexer.h. It is only used by the lexer and parser which include that file and avoids potential confusion with the token HLO type

(4) Fix b/112302613 by removing the unused Shape field in the sharding attribute of HLO text.

(5) As part of this change primitive element types are now keywords which simplifies parsing. The fallout is that a bunch of values in HLO text named "token" had to be renamed. Also, change the HLO name sanitizer to avoid these primitive type keywords.

PiperOrigin-RevId: 225546437
---
 tensorflow/compiler/tf2xla/tf2xla_test.cc     |   2 +-
 tensorflow/compiler/xla/BUILD                 |  17 ++
 tensorflow/compiler/xla/literal.cc            |  60 ++++--
 tensorflow/compiler/xla/literal.h             |  17 +-
 tensorflow/compiler/xla/literal_test.cc       |  28 +--
 tensorflow/compiler/xla/primitive_util.cc     |  63 +++++++
 tensorflow/compiler/xla/primitive_util.h      |  14 ++
 .../compiler/xla/primitive_util_test.cc       |  46 +++++
 tensorflow/compiler/xla/service/BUILD         |   3 +-
 .../xla/service/ar_crs_combiner_test.cc       |  30 +--
 .../cpu/parallel_task_assignment_test.cc      |   6 +-
 .../cpu/tests/cpu_literal_caching_test.cc     |  26 ++-
 .../xla/service/cpu/tests/cpu_outfeed_test.cc |   5 +-
 .../service/gpu/cudnn_conv_rewriter_test.cc   |   2 +-
 .../service/gpu/gpu_layout_assignment_test.cc |   2 +-
 .../xla/service/hlo_constant_folding_test.cc  |   2 +-
 .../xla/service/hlo_dataflow_analysis_test.cc |   4 +-
 .../compiler/xla/service/hlo_domain_test.cc   |  22 +--
 .../hlo_element_type_converter_test.cc        |  10 +-
 .../compiler/xla/service/hlo_instructions.cc  |   2 +-
 tensorflow/compiler/xla/service/hlo_lexer.cc  |  93 ++++++----
 tensorflow/compiler/xla/service/hlo_lexer.h   |  90 +++++++--
 .../xla/service/hlo_liveness_analysis_test.cc |   8 +-
 .../compiler/xla/service/hlo_matchers.h       |   6 +-
 .../xla/service/hlo_module_dce_test.cc        |   4 +-
 tensorflow/compiler/xla/service/hlo_parser.cc | 143 +++++++++++----
 tensorflow/compiler/xla/service/hlo_parser.h  |   3 +
 .../compiler/xla/service/hlo_parser_test.cc   | 168 ++++++++++++-----
 tensorflow/compiler/xla/service/hlo_token.h   |  78 --------
 .../service/indexed_array_analysis_test.cc    |  64 +++----
 .../xla/service/instruction_fusion_test.cc    |  12 +-
 .../xla/service/layout_assignment_test.cc     |   8 +-
 .../compiler/xla/service/name_uniquer.cc      |   9 +
 .../compiler/xla/service/name_uniquer_test.cc |  16 ++
 .../compiler/xla/service/pattern_matcher.h    |   3 +-
 .../xla/service/pattern_matcher_test.cc       |   4 +-
 .../xla/service/transpose_folding_test.cc     |   4 +-
 .../while_loop_constant_sinking_test.cc       |   8 +-
 .../xla/service/while_loop_simplifier_test.cc |  18 +-
 .../compiler/xla/service/while_util_test.cc   |   4 +-
 tensorflow/compiler/xla/shape_util.cc         | 172 +-----------------
 tensorflow/compiler/xla/shape_util.h          |   4 -
 tensorflow/compiler/xla/shape_util_test.cc    |  96 ----------
 .../xla/tests/literal_test_util_test.cc       |  10 +-
 .../compiler/xla/tests/test_utils_test.cc     |   8 +-
 .../compiler/xla/tests/token_hlo_test.cc      |  18 +-
 tensorflow/compiler/xla/tests/tuple_test.cc   |   4 +-
 .../compiler/xla/text_literal_reader.cc       |   3 +-
 .../compiler/xla/tools/replay_computation.cc  |   3 +-
 49 files changed, 751 insertions(+), 671 deletions(-)
 create mode 100644 tensorflow/compiler/xla/primitive_util_test.cc
 delete mode 100644 tensorflow/compiler/xla/service/hlo_token.h

diff --git a/tensorflow/compiler/tf2xla/tf2xla_test.cc b/tensorflow/compiler/tf2xla/tf2xla_test.cc
index ab26d939cc..24afe595b1 100644
--- a/tensorflow/compiler/tf2xla/tf2xla_test.cc
+++ b/tensorflow/compiler/tf2xla/tf2xla_test.cc
@@ -91,7 +91,7 @@ TEST(ConvertGraphDefToXla, Sum) {
       client->ExecuteAndTransfer(computation, {x_global.get(), y_global.get()});
   TF_EXPECT_OK(result_or.status());
   xla::Literal result = std::move(result_or.ValueOrDie());
-  EXPECT_EQ("(s32[]) (\n42\n)", result.ToString());
+  EXPECT_EQ("(\ns32[] 42\n)", result.ToString());
 
   config.mutable_feed(0)->mutable_id()->set_output_index(
       123); /* invalid output_index */
diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD
index 0a20ddf662..722d137668 100644
--- a/tensorflow/compiler/xla/BUILD
+++ b/tensorflow/compiler/xla/BUILD
@@ -292,6 +292,22 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "primitive_util_test",
+    srcs = ["primitive_util_test.cc"],
+    deps = [
+        ":shape_util",
+        ":status_macros",
+        ":test",
+        ":test_helpers",
+        ":types",
+        ":util",
+        ":xla_data_proto",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test_main",
+    ],
+)
+
 tf_cc_test(
     name = "layout_util_test",
     srcs = ["layout_util_test.cc"],
@@ -593,6 +609,7 @@ cc_library(
         ":types",
         ":util",
         ":xla_data_proto",
+        "//tensorflow/compiler/xla/service:hlo_parser",
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "@com_google_absl//absl/memory",
diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index 8f480c1f10..277c98721e 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -1028,20 +1028,21 @@ string ShapeToString(bool print_layout, const Shape& shape) {
 }
 
 void ToStringHelper(const LiteralBase& literal, const ShapeIndex& shape_index,
-                    bool print_layout, std::vector<string>* pieces);
+                    bool print_shape, bool print_layout,
+                    std::vector<string>* pieces);
 
 void TupleToStringHelper(const LiteralBase& literal,
-                         const ShapeIndex& shape_index, bool print_layout,
-                         std::vector<string>* pieces) {
+                         const ShapeIndex& shape_index, bool print_shape,
+                         bool print_layout, std::vector<string>* pieces) {
   const Shape& subshape = ShapeUtil::GetSubshape(literal.shape(), shape_index);
-  pieces->push_back(ShapeToString(print_layout, subshape));
-  pieces->push_back(" (\n");
+  pieces->push_back("(\n");
   std::vector<string> tuple_pieces;
   for (int i = 0; i < ShapeUtil::TupleElementCount(subshape); ++i) {
     ShapeIndex element_index = shape_index;
     element_index.push_back(i);
     std::vector<string> element_pieces;
-    ToStringHelper(literal, element_index, print_layout, &element_pieces);
+    ToStringHelper(literal, element_index, print_shape, print_layout,
+                   &element_pieces);
     tuple_pieces.push_back(absl::StrJoin(element_pieces, ""));
   }
   pieces->push_back(absl::StrJoin(tuple_pieces, ",\n"));
@@ -1049,9 +1050,11 @@ void TupleToStringHelper(const LiteralBase& literal,
 }
 
 void SparseArrayToStringHelper(const LiteralBase& literal,
-                               const Shape& subshape, bool print_layout,
-                               std::vector<string>* pieces) {
-  pieces->push_back(ShapeToString(print_layout, subshape));
+                               const Shape& subshape, bool print_shape,
+                               bool print_layout, std::vector<string>* pieces) {
+  if (print_shape) {
+    pieces->push_back(ShapeToString(print_layout, subshape));
+  }
   pieces->push_back("{");
   int64 rank = ShapeUtil::Rank(subshape);
   int64 num_elements = literal.sparse_element_count();
@@ -1073,8 +1076,8 @@ void SparseArrayToStringHelper(const LiteralBase& literal,
 }
 
 void DenseArrayToStringHelper(const LiteralBase& literal,
-                              const ShapeIndex& shape_index, bool print_layout,
-                              std::vector<string>* pieces) {
+                              const ShapeIndex& shape_index, bool print_shape,
+                              bool print_layout, std::vector<string>* pieces) {
   const Shape& subshape = ShapeUtil::GetSubshape(literal.shape(), shape_index);
   int64 rank = ShapeUtil::Rank(subshape);
 
@@ -1135,7 +1138,7 @@ void DenseArrayToStringHelper(const LiteralBase& literal,
         }
       };
 
-  if (rank > 1) {
+  if (print_shape) {
     pieces->push_back(ShapeToString(print_layout, subshape));
     pieces->push_back(" ");
   }
@@ -1146,19 +1149,23 @@ void DenseArrayToStringHelper(const LiteralBase& literal,
 }
 
 void ToStringHelper(const LiteralBase& literal, const ShapeIndex& shape_index,
-                    bool print_layout, std::vector<string>* pieces) {
+                    bool print_shape, bool print_layout,
+                    std::vector<string>* pieces) {
   const Shape& subshape = ShapeUtil::GetSubshape(literal.shape(), shape_index);
   CHECK(LayoutUtil::HasLayout(literal.shape()));
   CHECK(LayoutUtil::HasLayout(subshape));
   if (ShapeUtil::IsTuple(subshape)) {
-    TupleToStringHelper(literal, shape_index, print_layout, pieces);
+    TupleToStringHelper(literal, shape_index, print_shape, print_layout,
+                        pieces);
   } else if (ShapeUtil::IsToken(subshape)) {
     pieces->push_back("token");
   } else if (LayoutUtil::IsSparseArray(subshape)) {
-    SparseArrayToStringHelper(literal, subshape, print_layout, pieces);
+    SparseArrayToStringHelper(literal, subshape, print_shape, print_layout,
+                              pieces);
   } else {
     CHECK(LayoutUtil::IsDenseArray(subshape));
-    DenseArrayToStringHelper(literal, shape_index, print_layout, pieces);
+    DenseArrayToStringHelper(literal, shape_index, print_shape, print_layout,
+                             pieces);
   }
 }
 
@@ -1169,10 +1176,27 @@ int64 LiteralBase::sparse_element_count() const {
   return sparse_indices()->index_count();
 }
 
-string LiteralBase::ToString(bool print_layout) const {
+string LiteralBase::ToString() const {
+  std::vector<string> pieces;
+  CHECK(LayoutUtil::HasLayout(this->shape()));
+  ToStringHelper(*this, {}, /*print_shape=*/true,
+                 /*print_layout=*/false, &pieces);
+  return absl::StrJoin(pieces, "");
+}
+
+string LiteralBase::ToStringWithoutShape() const {
+  std::vector<string> pieces;
+  CHECK(LayoutUtil::HasLayout(this->shape()));
+  ToStringHelper(*this, {}, /*print_shape=*/false,
+                 /*print_layout=*/false, &pieces);
+  return absl::StrJoin(pieces, "");
+}
+
+string LiteralBase::ToStringWithLayout() const {
   std::vector<string> pieces;
   CHECK(LayoutUtil::HasLayout(this->shape()));
-  ToStringHelper(*this, {}, print_layout, &pieces);
+  ToStringHelper(*this, {}, /*print_shape=*/true,
+                 /*print_layout=*/true, &pieces);
   return absl::StrJoin(pieces, "");
 }
 
diff --git a/tensorflow/compiler/xla/literal.h b/tensorflow/compiler/xla/literal.h
index fa9a71af4c..67e908e7ec 100644
--- a/tensorflow/compiler/xla/literal.h
+++ b/tensorflow/compiler/xla/literal.h
@@ -92,9 +92,20 @@ class LiteralBase {
   // array.
   string GetR1U8AsString() const;
 
-  // Returns a string representation of the literal value.
-  // Warning: this function can take minutes for multi-million element Literals.
-  string ToString(bool print_layout = false) const;
+  // Returns a string representation of the literal value. The Shape of the
+  // literal is a prefix of the literal value in the string.
+
+  // Warning: this function can take minutes for multi-million
+  // element Literals.
+  string ToString() const;
+
+  // Returns a string representation of the literal value which does *not*
+  // include the shape string.
+  string ToStringWithoutShape() const;
+
+  // Returns a string representation of the literal value which includes the
+  // shape string with its layout.does *not* include the shape string.
+  string ToStringWithLayout() const;
 
   // Gets an element in the literal at the given index. The multi_index is
   // CHECKed against the dimension sizes.
diff --git a/tensorflow/compiler/xla/literal_test.cc b/tensorflow/compiler/xla/literal_test.cc
index 49363ad802..d8c7141cac 100644
--- a/tensorflow/compiler/xla/literal_test.cc
+++ b/tensorflow/compiler/xla/literal_test.cc
@@ -98,42 +98,42 @@ class LiteralUtilTest : public ::testing::Test {
 
 TEST_F(LiteralUtilTest, LiteralScalarToString) {
   auto true_lit = LiteralUtil::CreateR0<bool>(true);
-  EXPECT_EQ("true", true_lit.ToString());
+  EXPECT_EQ("pred[] true", true_lit.ToString());
 
   auto false_lit = LiteralUtil::CreateR0<bool>(false);
-  EXPECT_EQ("false", false_lit.ToString());
+  EXPECT_EQ("pred[] false", false_lit.ToString());
 
   auto u32_lit = LiteralUtil::CreateR0<uint32>(42);
-  EXPECT_EQ("42", u32_lit.ToString());
+  EXPECT_EQ("u32[] 42", u32_lit.ToString());
 
   auto s32_lit = LiteralUtil::CreateR0<int32>(-999);
-  EXPECT_EQ("-999", s32_lit.ToString());
+  EXPECT_EQ("s32[] -999", s32_lit.ToString());
 
   auto f32_lit = LiteralUtil::CreateR0<float>(3.14f);
-  EXPECT_EQ("3.14", f32_lit.ToString());
+  EXPECT_EQ("f32[] 3.14", f32_lit.ToString());
 
   auto f16_lit = LiteralUtil::CreateR0<half>(static_cast<half>(0.5f));
-  EXPECT_EQ("0.5", f16_lit.ToString());
+  EXPECT_EQ("f16[] 0.5", f16_lit.ToString());
 
   auto c64_lit = LiteralUtil::CreateR0<complex64>({3.14f, 2.78f});
-  EXPECT_EQ("(3.14, 2.78)", c64_lit.ToString());
+  EXPECT_EQ("c64[] (3.14, 2.78)", c64_lit.ToString());
 
   auto bf16_lit = LiteralUtil::CreateR0<bfloat16>(static_cast<bfloat16>(0.5f));
-  EXPECT_EQ("0.5", bf16_lit.ToString());
+  EXPECT_EQ("bf16[] 0.5", bf16_lit.ToString());
 
   // 3.14 will be rounded to 3.14062 in bfloat16 format.
   auto bf16_lit_truncated =
       LiteralUtil::CreateR0<bfloat16>(static_cast<bfloat16>(3.14f));
-  ASSERT_EQ("3.14062", bf16_lit_truncated.ToString());
+  ASSERT_EQ("bf16[] 3.14062", bf16_lit_truncated.ToString());
 
   auto bf16_lit_truncated2 =
       LiteralUtil::CreateR0<bfloat16>(static_cast<bfloat16>(9.001f));
-  EXPECT_EQ("9", bf16_lit_truncated2.ToString());
+  EXPECT_EQ("bf16[] 9", bf16_lit_truncated2.ToString());
 }
 
 TEST_F(LiteralUtilTest, LiteralVectorToString) {
   auto pred_vec = LiteralUtil::CreateR1<bool>({true, false, true});
-  EXPECT_EQ("{1, 0, 1}", pred_vec.ToString());
+  EXPECT_EQ("pred[3] {1, 0, 1}", pred_vec.ToString());
 }
 
 TEST_F(LiteralUtilTest, R2ToString) {
@@ -210,8 +210,8 @@ TEST_F(LiteralUtilTest, TupleToString) {
   auto scalar = LiteralUtil::CreateR0<float>(1.0);
   auto matrix = LiteralUtil::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
   auto tuple = LiteralUtil::MakeTuple({&scalar, &matrix});
-  const string expected = R"((f32[], f32[2,2]) (
-1,
+  const string expected = R"((
+f32[] 1,
 f32[2,2] {
   { 1, 2 },
   { 3, 4 }
@@ -1890,7 +1890,7 @@ TEST_F(LiteralUtilTest, SortSparseElements) {
   literal.AppendSparseElement<float>({3, 4, 5}, 3.0);
   literal.AppendSparseElement<float>({1, 2, 3}, 1.0);
   literal.SortSparseElements();
-  EXPECT_EQ(literal.ToString(false),
+  EXPECT_EQ(literal.ToString(),
             "f32[10,10,10]{[1, 2, 3]: 1, [2, 3, 4]: 2, [3, 4, 5]: 3}");
 }
 
diff --git a/tensorflow/compiler/xla/primitive_util.cc b/tensorflow/compiler/xla/primitive_util.cc
index b16147e3be..00ad01fc40 100644
--- a/tensorflow/compiler/xla/primitive_util.cc
+++ b/tensorflow/compiler/xla/primitive_util.cc
@@ -15,6 +15,9 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/primitive_util.h"
 
+#include "absl/strings/ascii.h"
+#include "absl/strings/numbers.h"
+#include "tensorflow/compiler/xla/util.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace xla {
@@ -90,5 +93,65 @@ bool IsArrayType(PrimitiveType primitive_type) {
          primitive_type != OPAQUE && primitive_type != TOKEN;
 }
 
+// Class to memoize the computation of
+//   absl::AsciiStrToLower(PrimitiveType_Name(p))
+// for all PrimitiveType values "p"
+class PrimitiveTypeNameGenerator {
+ public:
+  PrimitiveTypeNameGenerator() {
+    for (int i = 0; i < PrimitiveType_ARRAYSIZE; i++) {
+      if (PrimitiveType_IsValid(i)) {
+        lowercase_name_[i] = absl::AsciiStrToLower(
+            PrimitiveType_Name(static_cast<PrimitiveType>(i)));
+      }
+    }
+  }
+  const string& LowercaseName(PrimitiveType t) {
+    return lowercase_name_[static_cast<int>(t)];
+  }
+
+ private:
+  string lowercase_name_[PrimitiveType_ARRAYSIZE];
+};
+
+const string& LowercasePrimitiveTypeName(PrimitiveType s) {
+  static auto* gen = new PrimitiveTypeNameGenerator();
+  return gen->LowercaseName(s);
+}
+
+namespace {
+
+// Returns a map from lower-case primitive type name to primitive type.
+const std::unordered_map<string, PrimitiveType>& GetPrimitiveTypeStringMap() {
+  static std::unordered_map<string, PrimitiveType>* name_to_type = [] {
+    static auto* map = new std::unordered_map<string, PrimitiveType>;
+    for (int i = 0; i < PrimitiveType_ARRAYSIZE; i++) {
+      if (PrimitiveType_IsValid(i) && i != PRIMITIVE_TYPE_INVALID) {
+        auto value = static_cast<PrimitiveType>(i);
+        (*map)[LowercasePrimitiveTypeName(value)] = value;
+      }
+    }
+    return map;
+  }();
+  return *name_to_type;
+}
+
+}  // namespace
+
+StatusOr<PrimitiveType> StringToPrimitiveType(absl::string_view name) {
+  const auto& map = GetPrimitiveTypeStringMap();
+  auto found = map.find(string(name));
+  if (found == map.end()) {
+    return InvalidArgument("Invalid element type string: \"%s\".", name);
+  }
+  return found->second;
+}
+
+bool IsPrimitiveTypeName(absl::string_view name) {
+  const auto& map = GetPrimitiveTypeStringMap();
+  auto found = map.find(string(name));
+  return found != map.end();
+}
+
 }  // namespace primitive_util
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/primitive_util.h b/tensorflow/compiler/xla/primitive_util.h
index 889e9a1cec..70603b6fed 100644
--- a/tensorflow/compiler/xla/primitive_util.h
+++ b/tensorflow/compiler/xla/primitive_util.h
@@ -20,6 +20,9 @@ limitations under the License.
 
 #include <type_traits>
 
+#include "absl/strings/string_view.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 
@@ -221,6 +224,17 @@ template <>
 struct PrimitiveTypeToNative<C64> {
   using type = complex64;
 };
+
+// Returns the lower-case name of the given primitive type.
+const string& LowercasePrimitiveTypeName(PrimitiveType s);
+
+// Returns the PrimitiveType matching the given name. The given name is expected
+// to be lower-case.
+StatusOr<PrimitiveType> StringToPrimitiveType(absl::string_view name);
+
+// Returns true if the given name is a primitive type string (lower-case).
+bool IsPrimitiveTypeName(absl::string_view name);
+
 }  // namespace primitive_util
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/primitive_util_test.cc b/tensorflow/compiler/xla/primitive_util_test.cc
new file mode 100644
index 0000000000..1f765d6da9
--- /dev/null
+++ b/tensorflow/compiler/xla/primitive_util_test.cc
@@ -0,0 +1,46 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/primitive_util.h"
+
+#include <numeric>
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/test.h"
+#include "tensorflow/compiler/xla/test_helpers.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+
+namespace xla {
+namespace {
+
+TEST(PrimitiveUtilTest, StringToPrimitiveType) {
+  auto expect_ok_and_equal = [](const string& str, PrimitiveType expected) {
+    TF_ASSERT_OK_AND_ASSIGN(PrimitiveType actual,
+                            primitive_util::StringToPrimitiveType(str));
+    EXPECT_EQ(expected, actual);
+  };
+  expect_ok_and_equal("f32", F32);
+  expect_ok_and_equal("tuple", TUPLE);
+  expect_ok_and_equal("pred", PRED);
+  expect_ok_and_equal("s32", S32);
+
+  EXPECT_IS_NOT_OK(primitive_util::StringToPrimitiveType("F32").status());
+  EXPECT_IS_NOT_OK(primitive_util::StringToPrimitiveType("Pred").status());
+  EXPECT_IS_NOT_OK(primitive_util::StringToPrimitiveType("preD").status());
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 0c92ea7364..f20121e490 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1014,6 +1014,7 @@ cc_library(
     srcs = ["name_uniquer.cc"],
     hdrs = ["name_uniquer.h"],
     deps = [
+        "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:types",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
@@ -1785,6 +1786,7 @@ tf_cc_test(
         ":hlo_cse",
         ":hlo_dce",
         ":hlo_matchers",
+        ":hlo_parser",
         ":hlo_pass",
         ":hlo_pass_pipeline",
         ":tuple_simplifier",
@@ -3628,7 +3630,6 @@ cc_library(
     srcs = ["hlo_lexer.cc"],
     hdrs = [
         "hlo_lexer.h",
-        "hlo_token.h",
     ],
     deps = [
         "//tensorflow/compiler/xla:shape_util",
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
index 2f7a53bfc8..8a4fd0ee1b 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
@@ -32,8 +32,8 @@ HloModule foobar
 
 ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
   %p = f32[2,2] parameter(0)
-  %constant.f32.1 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
-  %constant.f32.2 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+  %constant.f32.1 = f32[2,2] constant({{1, 2}, {3, 4}})
+  %constant.f32.2 = f32[2,2] constant({{1, 2}, {3, 4}})
   ROOT %tuple = (f32[2,2], f32[2,2]) tuple(%constant.f32.1, %constant.f32.2)
 }
 )";
@@ -91,7 +91,7 @@ HloModule foobar
 
 ENTRY %entrycomp (p: f32[2,2]) -> ((f32[2,2]), (f32[2,2], f32[2,2])) {
   %p = f32[2,2] parameter(0)
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+  %constant.f32 = f32[2,2] constant({{1, 2}, {3, 4}})
   %tuple1 = (f32[2,2]) tuple(%constant.f32)
   %tuple2 = (f32[2,2], f32[2,2]) tuple(%constant.f32, %constant.f32)
   ROOT %tuple = ((f32[2,2]), (f32[2,2], f32[2,2])) tuple(%tuple1, %tuple2)
@@ -152,7 +152,7 @@ HloModule foobar
 
 ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
   %p = f32[2,2] parameter(0)
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+  %constant.f32 = f32[2,2] constant({{1, 2}, {3, 4}})
   %tuple.1 = (f32[2,2], f32[2,2]) tuple(%constant.f32, %constant.f32)
   %get-tuple-element.1 = f32[2,2] get-tuple-element(%tuple.1), index=0
   %get-tuple-element.2 = f32[2,2] get-tuple-element(%tuple.1), index=0
@@ -174,7 +174,7 @@ HloModule foobar
 
 ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
   %p = f32[2,2] parameter(0)
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+  %constant.f32 = f32[2,2] constant({{1, 2}, {3, 4}})
   %tuple.1 = (f32[2,2], f32[2,2]) tuple(%constant.f32, %constant.f32)
   %get-tuple-element.1 = f32[2,2] get-tuple-element(%tuple.1), index=0
   %get-tuple-element.2 = f32[2,2] get-tuple-element(%tuple.1), index=1
@@ -196,8 +196,8 @@ HloModule foobar
 
 ENTRY %entrycomp (p: f32[2,2]) -> (f32[2,2], f32[2,2]) {
   %p = f32[2,2] parameter(0)
-  %constant.f32.1 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
-  %constant.f32.2 = f32[2,2] constant(f32[2,2] {{2, 3}, {4, 5}})
+  %constant.f32.1 = f32[2,2] constant({{1, 2}, {3, 4}})
+  %constant.f32.2 = f32[2,2] constant({{2, 3}, {4, 5}})
   %tuple.1 = (f32[2,2], f32[2,2]) tuple(%constant.f32.1, %constant.f32.2)
   %get-tuple-element.1 = f32[2,2] get-tuple-element(%tuple.1), index=0
   %get-tuple-element.2 = f32[2,2] get-tuple-element(%tuple.1), index=1
@@ -226,7 +226,7 @@ HloModule foobar
 
 %body (x: (f32[2,2], f32[2,2])) -> (f32[2,2], f32[2,2]) {
   %x = (f32[2,2], f32[2,2]) parameter(0)
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+  %constant.f32 = f32[2,2] constant({{1, 2}, {3, 4}})
   %get-tuple-element.1 = f32[2,2] get-tuple-element(%x), index=0
   %get-tuple-element.2 = f32[2,2] get-tuple-element(%x), index=1
   %add.1 = f32[2,2] add(%get-tuple-element.1, %constant.f32)
@@ -235,7 +235,7 @@ HloModule foobar
 }
 
 ENTRY %WhileLoop () -> (f32[2,2], f32[2,2]) {
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{3, 4}, {5, 6}})
+  %constant.f32 = f32[2,2] constant({{3, 4}, {5, 6}})
   %init.tuple = (f32[2,2], f32[2,2]) tuple(%constant.f32, %constant.f32)
   ROOT %while = (f32[2,2], f32[2,2]) while(%init.tuple), condition=%condition, body=%body
 }
@@ -263,7 +263,7 @@ HloModule foobar
 
 %body (x: (f32[2,2], f32[2,2])) -> (f32[2,2], f32[2,2]) {
   %x = (f32[2,2], f32[2,2]) parameter(0)
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+  %constant.f32 = f32[2,2] constant({{1, 2}, {3, 4}})
   %get-tuple-element.1 = f32[2,2] get-tuple-element(%x), index=0
   %get-tuple-element.2 = f32[2,2] get-tuple-element(%x), index=1
   %add.1 = f32[2,2] add(%get-tuple-element.1, %constant.f32)
@@ -272,8 +272,8 @@ HloModule foobar
 }
 
 ENTRY %WhileLoop () -> (f32[2,2], f32[2,2]) {
-  %constant.f32.1 = f32[2,2] constant(f32[2,2] {{3, 4}, {5, 6}})
-  %constant.f32.2 = f32[2,2] constant(f32[2,2] {{3, 4}, {7, 8}})
+  %constant.f32.1 = f32[2,2] constant({{3, 4}, {5, 6}})
+  %constant.f32.2 = f32[2,2] constant({{3, 4}, {7, 8}})
   %init.tuple = (f32[2,2], f32[2,2]) tuple(%constant.f32.1, %constant.f32.2)
   ROOT %while = (f32[2,2], f32[2,2]) while(%init.tuple), condition=%condition, body=%body
 }
@@ -301,8 +301,8 @@ HloModule foobar
 
 %body (x: (f32[2,2], f32[2,2])) -> (f32[2,2], f32[2,2]) {
   %x = (f32[2,2], f32[2,2]) parameter(0)
-  %constant.f32.1 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
-  %constant.f32.2 = f32[2,2] constant(f32[2,2] {{3, 4}, {1, 2}})
+  %constant.f32.1 = f32[2,2] constant({{1, 2}, {3, 4}})
+  %constant.f32.2 = f32[2,2] constant({{3, 4}, {1, 2}})
   %get-tuple-element.1 = f32[2,2] get-tuple-element(%x), index=0
   %get-tuple-element.2 = f32[2,2] get-tuple-element(%x), index=1
   %add.1 = f32[2,2] add(%get-tuple-element.1, %constant.f32.1)
@@ -311,7 +311,7 @@ HloModule foobar
 }
 
 ENTRY %WhileLoop () -> (f32[2,2], f32[2,2]) {
-  %constant.f32 = f32[2,2] constant(f32[2,2] {{3, 4}, {5, 6}})
+  %constant.f32 = f32[2,2] constant({{3, 4}, {5, 6}})
   %init.tuple = (f32[2,2], f32[2,2]) tuple(%constant.f32, %constant.f32)
   ROOT %while = (f32[2,2], f32[2,2]) while(%init.tuple), condition=%condition, body=%body
 }
diff --git a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc
index f0b65046c1..35ae62b42d 100644
--- a/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/parallel_task_assignment_test.cc
@@ -112,10 +112,10 @@ TEST_F(ParallelTaskAssignmentTest, InfeedOutfeedOperationNotParallelized) {
   const string hlo_string = R"(
     HloModule TestTaskParallel_infeed_outfeed
     ENTRY InfeedOutfeed {
-      token = token[] after-all()
-      infeed0 = (u32[12345678,2]{1,0}, token[]) infeed(token)
+      token0 = token[] after-all()
+      infeed0 = (u32[12345678,2]{1,0}, token[]) infeed(token0)
       infeed0.data = u32[12345678,2]{1,0} get-tuple-element((u32[12345678,2]{1,0}, token[]) infeed0), index=0
-      ROOT outfeed0 = token[] outfeed(infeed0.data, token)
+      ROOT outfeed0 = token[] outfeed(infeed0.data, token0)
     }
   )";
 
diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc
index fa0e09ff6b..0584c0484f 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_literal_caching_test.cc
@@ -31,29 +31,27 @@ HloModule RepeatedConstants
 while_body {
   arg_body = f32[2,3,2] parameter(0)
   ROOT const = f32[2,3,2] constant(
-  f32[2,3,2]
     {{{1, 2}, {1001, 1002}, {2001, 2002}},
      {{2, 1}, {2001, 3002}, {2001, 2002}}})
 }
 
 while_cond {
   arg_cond = f32[2,3,2] parameter(0)
-  token = token[] after-all()
-  infeed = (pred[], token[]) infeed(token)
+  token0 = token[] after-all()
+  infeed = (pred[], token[]) infeed(token0)
   ROOT unknown = pred[] get-tuple-element((pred[], token[]) infeed), index=0
 }
 
 ENTRY main {
   param = f32[2,3,2] parameter(0)
   const_a = f32[2,3,2] constant(
-  f32[2,3,2]
     {{{1, 2}, {1001, 1002}, {2001, 2002}},
      {{2, 1}, {2001, 3002}, {2001, 2002}}})
   const_b = f32[2,3,2] while(f32[2,3,2] const_a), condition=while_cond, body=while_body
 
-  token = token[] after-all()
-  out0 = token[] outfeed(f32[2,3,2] const_a, token[] token)
-  ROOT out1 = token[] outfeed(f32[2,3,2] const_b, token[] token)
+  token0 = token[] after-all()
+  out0 = token[] outfeed(f32[2,3,2] const_a, token[] token0)
+  ROOT out1 = token[] outfeed(f32[2,3,2] const_b, token[] token0)
 }
 )";
 
@@ -82,24 +80,24 @@ HloModule RepeatedConstants
 
 while_body {
   arg_body = (f32[2,1]{1,0}, f32[1]{0}) parameter(0)
-  ROOT const = (f32[2,1]{1,0}, f32[1]{0}) constant((f32[2,1], f32[1]) ( f32[2,1] { { 1 }, { 2 } }, {2} ))
+  ROOT const = (f32[2,1]{1,0}, f32[1]{0}) constant(({ { 1 }, { 2 } }, {2} ))
 }
 
 while_cond {
   arg_cond = (f32[2,1]{1,0}, f32[1]{0}) parameter(0)
-  token = token[] after-all()
-  infeed = (pred[], token[]) infeed(token)
+  token0 = token[] after-all()
+  infeed = (pred[], token[]) infeed(token0)
   ROOT unknown = pred[] get-tuple-element((pred[], token[]) infeed), index=0
 }
 
 ENTRY main {
   param = f32[2,3,2] parameter(0)
-  const_a = (f32[2,1]{1,0}, f32[1]{0}) constant((f32[2,1], f32[1]) ( f32[2,1] { { 1 }, { 2 } }, {2} ))
+  const_a = (f32[2,1]{1,0}, f32[1]{0}) constant(( { { 1 }, { 2 } }, {2} ))
   const_b = (f32[2,1]{1,0}, f32[1]{0}) while((f32[2,1]{1,0}, f32[1]{0}) const_a), condition=while_cond, body=while_body
 
-  token = token[] after-all()
-  out0 = () outfeed((f32[2,1]{1,0}, f32[1]{0}) const_a, token[] token)
-  ROOT out1 = () outfeed((f32[2,1]{1,0}, f32[1]{0}) const_b, token[] token)
+  token0 = token[] after-all()
+  out0 = () outfeed((f32[2,1]{1,0}, f32[1]{0}) const_a, token[] token0)
+  ROOT out1 = () outfeed((f32[2,1]{1,0}, f32[1]{0}) const_b, token[] token0)
 }
 )";
 
diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc
index e2c7af541e..aab7f0b393 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_outfeed_test.cc
@@ -28,12 +28,11 @@ HloModule Outfeed
 
 ENTRY main {
   const_a = f32[2,3,2] constant(
-  f32[2,3,2]
     {{{1, 2}, {1001, 1002}, {2001, 2002}},
      {{2, 1}, {2001, 3002}, {2001, 2002}}})
 
-  token = token[] after-all()
-  outfeed = token[] outfeed(f32[2,3,2] const_a, token)
+  token0 = token[] after-all()
+  outfeed = token[] outfeed(f32[2,3,2] const_a, token0)
   ROOT root = () tuple()
 }
 )";
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
index 443883a89f..73af18f87a 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
@@ -599,7 +599,7 @@ TEST_F(CudnnConvRewriterTest, BackwardInputConvolveConstantFilter) {
   Array4D<float> constant_arr(4, 4, 2, 2);
   constant_arr.FillIota(0);
   string constant_str =
-      LiteralUtil::CreateR4FromArray4D(constant_arr).ToString();
+      LiteralUtil::CreateR4FromArray4D(constant_arr).ToStringWithoutShape();
 
   const string module_str = absl::StrFormat(R"(
     HloModule test
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
index 2ffc8bfb49..29756d2726 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc
@@ -369,7 +369,7 @@ TEST_F(LayoutAssignmentTest, SortLayout) {
   const char* hlo_text = R"(
   HloModule SortLayout
   ENTRY sort {
-    keys = f32[3,2]{0,1} constant(f32[3,2]{0,1}{{0,1},{0,1},{0,1}})
+    keys = f32[3,2]{0,1} constant({{0,1},{0,1},{0,1}})
     values = f32[2,3]{1,0} parameter(0)
     transpose = f32[3,2]{1,0} transpose(values), dimensions={1,0}
     ROOT sort = (f32[3,2]{1,0}, f32[3,2]{1,0}) sort(keys, transpose),
diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
index 4f81dc94e5..92b748d813 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
@@ -252,7 +252,7 @@ const char* const kConstantFoldLargePad = R"(
   HloModule ConstantFoldLargePad
 
   ENTRY r {
-    a = f32[1,1,1] constant(f32[1,1,1]{{{7}}})
+    a = f32[1,1,1] constant({{{7}}})
     b = f32[] constant(42)
     ROOT pad = f32[2048,2048,128] pad(a, b), padding=1024_1023x1024_1023x64_63
   })";
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
index f7a1f19a6f..94de7c55dd 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc
@@ -1882,8 +1882,8 @@ TEST_P(HloDataflowAnalysisTest, AddDependency) {
 HloModule AddDependency
 ENTRY %AddDependency (p: f32[3]) -> f32[3] {
   %p = f32[3] parameter(0)
-  %token = token[] after-all()
-  ROOT %add_dep = f32[3] add-dependency(f32[3] %p, token[] %token)
+  %token0 = token[] after-all()
+  ROOT %add_dep = f32[3] add-dependency(f32[3] %p, token[] %token0)
 }
 )";
   TF_ASSERT_OK_AND_ASSIGN(
diff --git a/tensorflow/compiler/xla/service/hlo_domain_test.cc b/tensorflow/compiler/xla/service/hlo_domain_test.cc
index acdb42128e..fd4fb0246d 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_test.cc
@@ -195,10 +195,10 @@ HloModule Module
 ENTRY entry {
   p0 = (f32[4]) parameter(0)
   a = f32[4] get-tuple-element(p0), index=0
-  token = token[] after-all()
-  b = (f32[4], u32[], token[]) send(a, token), channel_id=1, sharding={maximal device=0}
+  token0 = token[] after-all()
+  b = (f32[4], u32[], token[]) send(a, token0), channel_id=1, sharding={maximal device=0}
   c = token[] send-done(b), channel_id=1, sharding={maximal device=0}
-  d = (f32[4], u32[], token[]) recv(token), channel_id=2, sharding={maximal device=0}
+  d = (f32[4], u32[], token[]) recv(token0), channel_id=2, sharding={maximal device=0}
   e = (f32[4], token[]) recv-done(d), channel_id=2, sharding={maximal device=0}
   e_element = f32[4] get-tuple-element(e), index=0, sharding={maximal device=0}
   f = f32[4] add(a, e_element)
@@ -235,12 +235,12 @@ TEST_F(HloDomainTest, CheckNoDomainAddedOnPureIOComputation) {
 HloModule Module
 
 ENTRY entry {
-  token = token[] after-all(), sharding={maximal device=-1}
-  a = (f32[4], u32[], token[]) recv(token), channel_id=1, sharding={maximal device=-1}
+  token0 = token[] after-all(), sharding={maximal device=-1}
+  a = (f32[4], u32[], token[]) recv(token0), channel_id=1, sharding={maximal device=-1}
   b = (f32[4], token[]) recv-done(a), channel_id=1, sharding={maximal device=-1}
   b_element = f32[4] get-tuple-element(b), index=0, sharding={maximal device=-1}
   c = f32[4] add(b_element, b_element), sharding={maximal device=-1}
-  d = (f32[4], u32[], token[]) send(c, token), channel_id=2, sharding={maximal device=-1}
+  d = (f32[4], u32[], token[]) send(c, token0), channel_id=2, sharding={maximal device=-1}
   ROOT e = token[] send-done(d), channel_id=2, sharding={maximal device=-1}
 }
 )";
@@ -259,12 +259,12 @@ TEST_F(HloDomainTest, CheckNormalizationOnPureIOComputation) {
 HloModule Module
 
 ENTRY entry {
-  token = token[] after-all(), sharding={maximal device=0}
-  a = (f32[4], u32[], token[]) recv(token), channel_id=1, sharding={maximal device=0}
+  token0 = token[] after-all(), sharding={maximal device=0}
+  a = (f32[4], u32[], token[]) recv(token0), channel_id=1, sharding={maximal device=0}
   b = (f32[4], token[]) recv-done(a), channel_id=1, sharding={maximal device=0}
   b_element = f32[4] get-tuple-element(b), index=0, sharding={maximal device=0}
   c = f32[4] add(b_element, b_element)
-  d = (f32[4], u32[], token[]) send(c, token), channel_id=2, sharding={maximal device=0}
+  d = (f32[4], u32[], token[]) send(c, token0), channel_id=2, sharding={maximal device=0}
   ROOT e = token[] send-done(d), channel_id=2, sharding={maximal device=0}
 }
 )";
@@ -344,8 +344,8 @@ TEST_F(HloDomainTest, CheckNormalizationOnInfeedTuple) {
 HloModule Module
 
 ENTRY entry {
-  token = token[] after-all()
-  infeed = ((f32[4], f32[4]), token[]) infeed(token),
+  token0 = token[] after-all()
+  infeed = ((f32[4], f32[4]), token[]) infeed(token0),
     sharding={{maximal device=1}, {maximal device=0}, {maximal device=0}}
   infeed.data = (f32[4], f32[4]) get-tuple-element(infeed), index=0,
     sharding={{maximal device=1}, {maximal device=0}}
diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc b/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc
index c170e36c73..a3b56a44a0 100644
--- a/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter_test.cc
@@ -57,10 +57,10 @@ TEST_F(HloElementTypeConverterTest, InfeedsOutfeedsNotConverted) {
   const string& hlo_string = R"(
     HloModule InfeedOutfeed
     ENTRY RoundTrip16MiBR1.v2 {
-      token = token[] after-all()
-      infeed = (bf16[4]{0}, token[]) infeed(token)
+      token0 = token[] after-all()
+      infeed = (bf16[4]{0}, token[]) infeed(token0)
       ROOT infeed.data = bf16[4]{0} get-tuple-element(infeed), index=0
-      outfeed = token[] outfeed(infeed.data, token)
+      outfeed = token[] outfeed(infeed.data, token0)
     }
   )";
   auto module = CreateModuleFromHloString(hlo_string);
@@ -96,13 +96,13 @@ TEST_F(HloElementTypeConverterTest, BatchNormGradBF16Converted) {
   const string& hlo_string = R"(
     HloModule BatchNormGrad
     ENTRY BatchNormGrad.v6 {
-      constant.4 = bf16[2,2,2,1]{3,2,1,0} constant(bf16[2,2,2,1] { { /*i0=0*/ 
+      constant.4 = bf16[2,2,2,1]{3,2,1,0} constant({ { /*i0=0*/
       { /*i1=0*/ {0}, {0} }, { /*i1=1*/ {0}, {0} } }, { /*i0=1*/ { /*i1=0*/ {0},
       {0} }, { /*i1=1*/ {0}, {0} } } })
       constant.5 = bf16[2]{0} constant({1, 1})
       constant.6 = bf16[2]{0} constant({0, 0})
       constant.7 = bf16[2]{0} constant({1, 1})
-      constant.8 = bf16[2,2,2,1]{3,2,1,0} constant(bf16[2,2,2,1] { { /*i0=0*/
+      constant.8 = bf16[2,2,2,1]{3,2,1,0} constant({ { /*i0=0*/
       { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} } }, { /*i0=1*/ { /*i1=0*/
       {5}, {6} }, { /*i1=1*/ {7}, {8} } } })
       ROOT batch-norm-grad = (bf16[2,2,2,1]{3,2,1,0}, bf16[2]{0}, bf16[2]{0})
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index f55de6a1c0..5521e5bd9a 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -905,7 +905,7 @@ string HloConstantInstruction::OperandsToStringWithCanonicalNameMap(
        options.print_large_constants())) {
     // Literal::ToString emits multidimensional arrays over multiple
     // lines. Compact this into one line by stripping out white space.
-    string tmp = literal().ToString();
+    string tmp = literal().ToStringWithoutShape();
     std::replace(tmp.begin(), tmp.end(), '\n', ' ');
     std::vector<string> v = absl::StrSplit(tmp, ' ');
     bool first = true;
diff --git a/tensorflow/compiler/xla/service/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc
index 1390537101..dc712e5e42 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.cc
+++ b/tensorflow/compiler/xla/service/hlo_lexer.cc
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "absl/strings/escaping.h"
 #include "absl/strings/numbers.h"
+#include "absl/strings/str_split.h"
 #include "absl/types/optional.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -82,9 +83,23 @@ tensorflow::RegexpStringPiece HloLexer::RegexpStringPieceFromPointers(
   return tensorflow::RegexpStringPiece(begin, end - begin);
 }
 
+TokKind HloLexer::LookAhead() {
+  if (GetKind() == TokKind::kEof || GetKind() == TokKind::kError) {
+    return GetKind();
+  }
+
+  const char* old_current_ptr = current_ptr_;
+  TokenState old_token_state = token_state_;
+  Lex();
+  TokKind kind = GetKind();
+  token_state_ = old_token_state;
+  current_ptr_ = old_current_ptr;
+  return kind;
+}
+
 TokKind HloLexer::LexToken() {
   while (true) {
-    token_start_ = current_ptr_;
+    token_state_.token_start = current_ptr_;
 
     int current_char = GetNextChar();
     switch (current_char) {
@@ -206,43 +221,37 @@ TokKind HloLexer::LexToken() {
 // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
 // identifiers ::= other cases that match [a-zA-Z_][a-zA-Z0-9_.-]*
 TokKind HloLexer::LexIdentifier() {
-  {
-    auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
-    // 'consumable' will be advanced iff its prefix matches the pattern.
-    static LazyRE2 shape_pattern = {
-        R"(^(\w*\d*)\[([\d,\s]*)\](?:(dense|sparse)?{([\d,\s]+)})?)"};
-    if (RE2::Consume(&consumable, *shape_pattern)) {
-      auto status_or_shape = ShapeUtil::ParseShapeString(
-          StringPieceFromPointers(token_start_, consumable.begin()));
-      if (status_or_shape.ok()) {
-        // This is a shape string.
-        shape_val_ = status_or_shape.ValueOrDie();
-        current_ptr_ = consumable.begin();
-        return TokKind::kShape;
-      }
-    }
-  }
-
   while (IsIdentifierChar(PeekCurrentChar())) {
     current_ptr_++;
   }
 
   // If followed by ':', it's a name.
   if (PeekCurrentChar() == ':') {
-    str_val_.assign(token_start_, current_ptr_);
+    token_state_.str_val.assign(token_state_.token_start, current_ptr_);
     current_ptr_++;  // skip ':'
     return TokKind::kName;
   }
 
   // If followed by '=', it's a attribute name.
   if (PeekCurrentChar() == '=') {
-    str_val_.assign(token_start_, current_ptr_);
+    token_state_.str_val.assign(token_state_.token_start, current_ptr_);
     current_ptr_++;  // skip '='
     return TokKind::kAttributeName;
   }
 
   absl::string_view identifier =
-      StringPieceFromPointers(token_start_, current_ptr_);
+      StringPieceFromPointers(token_state_.token_start, current_ptr_);
+
+  // Primitive type strings are reserved words. The exception is 'tuple' whose
+  // type is represented using nested parentheses without the string 'tuple'.
+  if (primitive_util::IsPrimitiveTypeName(identifier)) {
+    PrimitiveType primitive_type =
+        primitive_util::StringToPrimitiveType(identifier).ValueOrDie();
+    if (primitive_type != TUPLE) {
+      token_state_.primitive_type_val = primitive_type;
+      return TokKind::kPrimitiveType;
+    }
+  }
 
   // See if this is a keyword.
 #define KEYWORD(STR)            \
@@ -261,21 +270,23 @@ TokKind HloLexer::LexIdentifier() {
   KEYWORD(ROOT);
   KEYWORD(maximal);
   KEYWORD(replicated);
+  KEYWORD(sparse);
 
 #undef KEYWORD
 
   {
-    auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
+    auto consumable =
+        RegexpStringPieceFromPointers(token_state_.token_start, buf_.end());
     static LazyRE2 dim_labels_pattern = {
         R"([0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,})"};
     if (RE2::Consume(&consumable, *dim_labels_pattern)) {
       current_ptr_ = consumable.begin();
-      str_val_.assign(token_start_, current_ptr_);
+      token_state_.str_val.assign(token_state_.token_start, current_ptr_);
       return TokKind::kDimLabels;
     }
   }
 
-  str_val_ = string(identifier);
+  token_state_.str_val = string(identifier);
   return TokKind::kIdent;
 }
 
@@ -289,7 +300,7 @@ TokKind HloLexer::LexPercent() {
     while (IsIdentifierChar(PeekCurrentChar())) {
       current_ptr_++;
     }
-    str_val_.assign(name_start, current_ptr_);
+    token_state_.str_val.assign(name_start, current_ptr_);
     return TokKind::kName;
   }
   return TokKind::kError;
@@ -307,12 +318,14 @@ TokKind HloLexer::LexPercent() {
 // int ::=  [-]?[0-9]+
 // negative inf ::= '-inf'
 TokKind HloLexer::LexNumberOrPattern() {
-  auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
+  auto consumable =
+      RegexpStringPieceFromPointers(token_state_.token_start, buf_.end());
   static LazyRE2 float_pattern = {
       R"([-]?((\d+|\d+[.]\d*|\d*[.]\d+)([eE][+-]?\d+))|[-]?(\d+[.]\d*|\d*[.]\d+))"};
   if (RE2::Consume(&consumable, *float_pattern)) {
     current_ptr_ = consumable.begin();
-    CHECK(absl::SimpleAtod(string(token_start_, current_ptr_), &decimal_val_));
+    CHECK(absl::SimpleAtod(string(token_state_.token_start, current_ptr_),
+                           &token_state_.decimal_val));
     return TokKind::kDecimal;
   }
 
@@ -324,27 +337,28 @@ TokKind HloLexer::LexNumberOrPattern() {
 
   if (RE2::Consume(&consumable, *dim_labels_pattern)) {
     current_ptr_ = consumable.begin();
-    str_val_.assign(token_start_, current_ptr_);
+    token_state_.str_val.assign(token_state_.token_start, current_ptr_);
     return TokKind::kDimLabels;
   }
 
   if (RE2::Consume(&consumable, *dxd_pattern)) {
     current_ptr_ = consumable.begin();
-    str_val_.assign(token_start_, current_ptr_);
+    token_state_.str_val.assign(token_state_.token_start, current_ptr_);
     return TokKind::kDxD;
   }
 
   if (RE2::Consume(&consumable, *pad_pattern)) {
     current_ptr_ = consumable.begin();
-    str_val_.assign(token_start_, current_ptr_);
+    token_state_.str_val.assign(token_state_.token_start, current_ptr_);
     return TokKind::kPad;
   }
 
   static LazyRE2 int_pattern = {R"([-]?\d+)"};
   if (RE2::Consume(&consumable, *int_pattern)) {
     current_ptr_ = consumable.begin();
-    auto slice = StringPieceFromPointers(token_start_, current_ptr_);
-    if (absl::SimpleAtoi(slice, &int64_val_)) {
+    auto slice =
+        StringPieceFromPointers(token_state_.token_start, current_ptr_);
+    if (absl::SimpleAtoi(slice, &token_state_.int64_val)) {
       return TokKind::kInt;
     }
     LOG(ERROR) << "Failed to parse int literal: " << slice;
@@ -403,16 +417,17 @@ absl::string_view HloLexer::GetLine(LocTy loc) const {
 }
 
 // Lexes quoted string with escaping characters. If matched, the quoted string
-// will be unescaped and stored to str_val_.
+// will be unescaped and stored to token_state_.str_val.
 TokKind HloLexer::LexString() {
-  auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
+  auto consumable =
+      RegexpStringPieceFromPointers(token_state_.token_start, buf_.end());
   static LazyRE2 escaping_pattern = {R"("([^"\\]|\\.)*")"};
   if (RE2::Consume(&consumable, *escaping_pattern)) {
     current_ptr_ = consumable.begin();
     absl::string_view raw =
-        StringPieceFromPointers(token_start_ + 1, current_ptr_ - 1);
+        StringPieceFromPointers(token_state_.token_start + 1, current_ptr_ - 1);
     string error;
-    if (!absl::CUnescape(raw, &str_val_, &error)) {
+    if (!absl::CUnescape(raw, &token_state_.str_val, &error)) {
       LOG(ERROR) << "Failed unescaping string: " << raw << ". error: " << error;
       return TokKind::kError;
     }
@@ -467,6 +482,10 @@ string TokKindToString(TokKind kind) {
       return "kw_inf";
     case TokKind::kNegInf:
       return "kNegInf";
+    case TokKind::kw_sparse:
+      return "kw_sparse";
+    case TokKind::kPrimitiveType:
+      return "kPrimitiveType";
     case TokKind::kName:
       return "kName";
     case TokKind::kAttributeName:
@@ -481,8 +500,6 @@ string TokKindToString(TokKind kind) {
       return "kIdent";
     case TokKind::kString:
       return "kString";
-    case TokKind::kShape:
-      return "kShape";
     case TokKind::kInt:
       return "kInt";
     case TokKind::kDecimal:
diff --git a/tensorflow/compiler/xla/service/hlo_lexer.h b/tensorflow/compiler/xla/service/hlo_lexer.h
index d6a2b292a3..41f5043904 100644
--- a/tensorflow/compiler/xla/service/hlo_lexer.h
+++ b/tensorflow/compiler/xla/service/hlo_lexer.h
@@ -19,7 +19,6 @@ limitations under the License.
 #include <string>
 
 #include "absl/strings/string_view.h"
-#include "tensorflow/compiler/xla/service/hlo_token.h"
 #include "tensorflow/compiler/xla/shape.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
@@ -29,6 +28,57 @@ limitations under the License.
 
 namespace xla {
 
+// Defines different kinds of tokens used by the HLO lexer.
+//
+// You shouldn't need to use this directly unless you're using HloLexer
+// directly, and you probably don't need to do that.  Use hlo_parser instead.
+enum class TokKind {
+  // Markers
+  kEof,
+  kError,
+
+  // Tokens with no info.
+  kEqual,  // =
+  kComma,  // ,
+  kColon,  // :
+  kLsquare,
+  kRsquare,  // [  ]
+  kLbrace,
+  kRbrace,  // {  }
+  kLparen,
+  kRparen,  // (  )
+
+  kArrow,  // ->
+
+  // Keywords
+  kw_HloModule,
+  kw_ENTRY,
+  kw_ROOT,
+  kw_true,
+  kw_false,
+  kw_maximal,
+  kw_replicated,
+  kw_nan,
+  kw_inf,
+  kw_sparse,
+
+  kNegInf,  // -inf
+
+  // Typed tokens.
+  kPrimitiveType,  // F32, PRED, etc.
+  kName,           // %foo
+  kAttributeName,  // dimensions=
+  kDimLabels,      // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
+  kDxD,            // [0-9]+(x[0-9]+)+
+  kPad,            // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
+  kIdent,          // other identifiers
+  kString,         // "abcd\"\n"
+  kInt,            // 42
+  kDecimal,        // 4.2
+};
+
+string TokKindToString(TokKind kind);
+
 // Lexer for the HloModule::ToString() format text.
 //
 // This class is meant to be used by hlo_parser.cc.  You shouldn't need to use
@@ -39,9 +89,9 @@ class HloLexer {
     current_ptr_ = buf_.begin();
   }
 
-  TokKind Lex() { return current_kind_ = LexToken(); }
+  TokKind Lex() { return token_state_.current_kind = LexToken(); }
 
-  TokKind GetKind() const { return current_kind_; }
+  TokKind GetKind() const { return token_state_.current_kind; }
   string GetStrVal() const {
     switch (GetKind()) {
       case TokKind::kName:
@@ -51,28 +101,28 @@ class HloLexer {
       case TokKind::kPad:
       case TokKind::kString:
       case TokKind::kIdent:
-        return str_val_;
+        return token_state_.str_val;
       default:
         LOG(FATAL) << "This token does not have string value";
     }
   }
-  Shape GetShapeVal() const {
-    CHECK(GetKind() == TokKind::kShape);
-    return shape_val_;
-  }
   tensorflow::int64 GetInt64Val() const {
     CHECK(GetKind() == TokKind::kInt);
-    return int64_val_;
+    return token_state_.int64_val;
   }
   double GetDecimalVal() const {
     CHECK(GetKind() == TokKind::kDecimal);
-    return decimal_val_;
+    return token_state_.decimal_val;
+  }
+  PrimitiveType GetPrimitiveTypeVal() const {
+    CHECK(GetKind() == TokKind::kPrimitiveType);
+    return token_state_.primitive_type_val;
   }
 
   typedef const char* LocTy;
 
   // Returns the location of the current token.
-  LocTy GetLoc() const { return token_start_; }
+  LocTy GetLoc() const { return token_state_.token_start; }
 
   // Returns the line and column of a location in the buffer.
   std::pair<unsigned, unsigned> GetLineAndColumn(LocTy location) const;
@@ -80,6 +130,9 @@ class HloLexer {
   // Returns the whole line given the location.
   absl::string_view GetLine(LocTy loc) const;
 
+  // Looks ahead one token and returns it. Lexer state is unchanged.
+  TokKind LookAhead();
+
  private:
   // Returns the current character. If it's neither the end of input buffer nor
   // an invalid character, moves the pointer forward.
@@ -112,12 +165,15 @@ class HloLexer {
   const char* current_ptr_;
 
   // Information about the current token.
-  const char* token_start_ = nullptr;
-  TokKind current_kind_;
-  string str_val_;
-  Shape shape_val_;
-  tensorflow::int64 int64_val_;
-  double decimal_val_;
+  struct TokenState {
+    const char* token_start = nullptr;
+    TokKind current_kind;
+    string str_val;
+    tensorflow::int64 int64_val;
+    double decimal_val;
+    PrimitiveType primitive_type_val;
+  };
+  TokenState token_state_;
 
   struct LineNoCacheTy {
     const char* last_query;
diff --git a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
index e0ae1173c6..436cccb1fb 100644
--- a/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_liveness_analysis_test.cc
@@ -403,9 +403,9 @@ TEST_F(HloLivenessAnalysisTest, WhileWithOutfeed) {
   HloModule OutfeedLoop
   WhileBody {
     body_param = (s32[]) parameter(0)
-    token = token[] after-all()
+    token0 = token[] after-all()
     constant.2 = s32[] constant(2)
-    outfeed_tuple = (s32[]) outfeed(constant.2, token)
+    outfeed_tuple = (s32[]) outfeed(constant.2, token0)
     get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
     constant.1 = s32[] constant(1)
     add = s32[] add(get-tuple-element.1, constant.1)
@@ -436,9 +436,9 @@ TEST_F(HloLivenessAnalysisTest, NestedWhileWithOutfeed) {
   HloModule OutfeedLoop
   InnerWhileBody {
     body_param = (s32[]) parameter(0)
-    token = token[] after-all()
+    token0 = token[] after-all()
     constant.2 = s32[] constant(2)
-    outfeed_tuple = (s32[]) outfeed(constant.2, token)
+    outfeed_tuple = (s32[]) outfeed(constant.2, token0)
     get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
     constant.1 = s32[] constant(1)
     add = s32[] add(get-tuple-element.1, constant.1)
diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h
index 235efb19ce..1fbcbdf98d 100644
--- a/tensorflow/compiler/xla/service/hlo_matchers.h
+++ b/tensorflow/compiler/xla/service/hlo_matchers.h
@@ -312,8 +312,8 @@ inline ::testing::Matcher<const ::xla::HloInstruction*> Shape(
 }
 inline ::testing::Matcher<const ::xla::HloInstruction*> Shape(
     absl::string_view shape) {
-  return ::testing::MakeMatcher(new ::xla::testing::HloShapeMatcher(
-      ShapeUtil::ParseShapeString(shape).ValueOrDie()));
+  return ::testing::MakeMatcher(
+      new ::xla::testing::HloShapeMatcher(ParseShape(shape).ValueOrDie()));
 }
 inline ::testing::Matcher<const ::xla::HloInstruction*> ShapeWithLayout(
     const class Shape& shape) {
@@ -323,7 +323,7 @@ inline ::testing::Matcher<const ::xla::HloInstruction*> ShapeWithLayout(
 inline ::testing::Matcher<const ::xla::HloInstruction*> ShapeWithLayout(
     absl::string_view shape) {
   return ::testing::MakeMatcher(new ::xla::testing::HloShapeAndLayoutMatcher(
-      ShapeUtil::ParseShapeString(shape).ValueOrDie()));
+      ParseShape(shape).ValueOrDie()));
 }
 
 // Verifies the value of the HloSharing against the provided sharding object.
diff --git a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
index bf66cc6bc3..e535b7d749 100644
--- a/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_dce_test.cc
@@ -373,9 +373,9 @@ TEST_F(HloModuleDceTest, WhileWithOutfeed) {
   HloModule OutfeedLoop
   WhileBody {
     body_param = (s32[]) parameter(0)
-    token = token[] after-all()
+    token0 = token[] after-all()
     constant.2 = s32[] constant(2)
-    outfeed_tuple = (s32[]) outfeed(constant.2, token)
+    outfeed_tuple = (s32[]) outfeed(constant.2, token0)
     get-tuple-element.1 = s32[] get-tuple-element(body_param), index=0
     constant.1 = s32[] constant(1)
     add = s32[] add(get-tuple-element.1, constant.1)
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 9b5bb5d0bd..29bb088f6d 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "absl/strings/str_split.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/service/hlo_domain_metadata.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
@@ -74,6 +75,7 @@ class HloParser {
   string GetError() const { return StrJoin(error_, "\n"); }
 
   // Stand alone parsing utils for various aggregate data types.
+  StatusOr<Shape> ParseShapeOnly();
   StatusOr<HloSharding> ParseShardingOnly();
   StatusOr<Window> ParseWindowOnly();
   StatusOr<ConvolutionDimensionNumbers> ParseConvolutionDimensionNumbersOnly();
@@ -255,7 +257,9 @@ class HloParser {
   bool ParseName(string* result);
   bool ParseAttributeName(string* result);
   bool ParseString(string* result);
+  bool ParseDimensionSizes(std::vector<int64>* dimension_sizes);
   bool ParseShape(Shape* result);
+  bool ParseLayout(Layout* layout);
   bool ParseOpcode(HloOpcode* result);
   bool ParseFftType(FftType* result);
   bool ParseFusionKind(HloInstruction::FusionKind* result);
@@ -279,9 +283,6 @@ class HloParser {
   // If the current token is 'kind', eats it (i.e. lexes the next token) and
   // returns true.
   bool EatIfPresent(TokKind kind);
-  // Parses a shape, and returns true if the result is compatible with the given
-  // shape.
-  bool EatShapeAndCheckCompatible(const Shape& shape);
 
   // Adds the instruction to the pool. Returns false and emits an error if the
   // instruction already exists.
@@ -1697,11 +1698,6 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding,
         }
         break;
       }
-      case TokKind::kShape:
-        // TODO(b/112302613): Left here for backward compatibility to ignore the
-        // removed tile shape data.
-        lexer_.Lex();
-        break;
       case TokKind::kRbrace:
         break;
       default:
@@ -1925,19 +1921,6 @@ bool HloParser::SetValueInLiteralHelper(ParsedElemT value,
   return true;
 }
 
-bool HloParser::EatShapeAndCheckCompatible(const Shape& shape) {
-  Shape new_shape;
-  if (!ParseShape(&new_shape)) {
-    return TokenError(StrCat("expects shape ", ShapeUtil::HumanString(shape)));
-  }
-  if (!ShapeUtil::Compatible(shape, new_shape)) {
-    return TokenError(StrCat(
-        "expects shape ", ShapeUtil::HumanString(shape),
-        ", but sees a different shape: ", ShapeUtil::HumanString(new_shape)));
-  }
-  return true;
-}
-
 // literal
 //  ::= tuple
 //  ::= non_tuple
@@ -1952,10 +1935,6 @@ bool HloParser::ParseLiteral(Literal* literal, const Shape& shape) {
 //  ::= /*empty*/
 //  ::= literal (',' literal)*
 bool HloParser::ParseTupleLiteral(Literal* literal, const Shape& shape) {
-  if (!EatShapeAndCheckCompatible(shape)) {
-    return TokenError(StrCat("expects tuple constant in shape ",
-                             ShapeUtil::HumanString(shape)));
-  }
   if (!ParseToken(TokKind::kLparen, "expects '(' in front of tuple elements")) {
     return false;
   }
@@ -1990,16 +1969,12 @@ bool HloParser::ParseNonTupleLiteral(Literal* literal, const Shape& shape) {
     return ParseSparseLiteral(literal, shape);
   }
 
-  CHECK(LayoutUtil::IsDenseArray(shape));
+  CHECK(LayoutUtil::IsDenseArray(shape)) << shape.ToString(true);
   return ParseDenseLiteral(literal, shape);
 }
 
 bool HloParser::ParseDenseLiteral(Literal* literal, const Shape& shape) {
   const tensorflow::int64 rank = ShapeUtil::Rank(shape);
-  if (rank > 1 && !EatShapeAndCheckCompatible(shape)) {
-    return false;
-  }
-
   // Create a literal with the given shape in default layout.
   *literal = LiteralUtil::CreateFromDimensions(
       shape.element_type(), AsInt64Slice(shape.dimensions()));
@@ -2126,10 +2101,6 @@ bool HloParser::ParseDenseLiteral(Literal* literal, const Shape& shape) {
 }
 
 bool HloParser::ParseSparseLiteral(Literal* literal, const Shape& shape) {
-  if (!EatShapeAndCheckCompatible(shape)) {
-    return false;
-  }
-
   switch (shape.element_type()) {
     case PRED:
       return ParseSparseLiteralHelper<tensorflow::uint8>(literal, shape);
@@ -2994,6 +2965,39 @@ bool HloParser::ParseParamList() {
   return ParseToken(TokKind::kRparen, "expects ')' at the end of param list");
 }
 
+// dimension_sizes ::= '[' int64_list ']'
+bool HloParser::ParseDimensionSizes(std::vector<int64>* dimension_sizes) {
+  auto parse_and_add_item = [&]() {
+    tensorflow::int64 i;
+    if (!ParseInt64(&i)) {
+      return false;
+    }
+    dimension_sizes->push_back(i);
+    return true;
+  };
+  return ParseList(TokKind::kLsquare, TokKind::kRsquare, TokKind::kComma,
+                   parse_and_add_item);
+}
+
+// layout ::= '{' int64_list '}'
+bool HloParser::ParseLayout(Layout* layout) {
+  std::vector<int64> minor_to_major;
+  auto parse_and_add_item = [&]() {
+    tensorflow::int64 i;
+    if (!ParseInt64(&i)) {
+      return false;
+    }
+    minor_to_major.push_back(i);
+    return true;
+  };
+  if (!ParseList(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma,
+                 parse_and_add_item)) {
+    return false;
+  }
+  *layout = LayoutUtil::MakeLayout(minor_to_major);
+  return true;
+}
+
 // shape ::= shape_val_
 // shape ::= '(' tuple_elements ')'
 // tuple_elements
@@ -3017,19 +3021,61 @@ bool HloParser::ParseShape(Shape* result) {
     return ParseToken(TokKind::kRparen, "expects ')' at the end of tuple.");
   }
 
-  if (lexer_.GetKind() != TokKind::kShape) {
-    return TokenError(absl::StrCat("expected shape, saw ",
+  if (lexer_.GetKind() != TokKind::kPrimitiveType) {
+    return TokenError(absl::StrCat("expected primitive type, saw ",
                                    TokKindToString(lexer_.GetKind())));
   }
-  *result = lexer_.GetShapeVal();
+  PrimitiveType primitive_type = lexer_.GetPrimitiveTypeVal();
   lexer_.Lex();
+
+  std::vector<int64> dimension_sizes;
+  if (!ParseDimensionSizes(&dimension_sizes)) {
+    return false;
+  }
+  result->set_element_type(primitive_type);
+  *result->mutable_dimensions() = dimension_sizes;
+  LayoutUtil::SetToDefaultLayout(result);
+
+  if (lexer_.GetKind() == TokKind::kw_sparse) {
+    lexer_.Lex();
+    const string message =
+        "expects a brace-bracketed integer for sparse layout";
+    tensorflow::int64 max_sparse_elements;
+    if (!ParseToken(TokKind::kLbrace, message) ||
+        !ParseInt64(&max_sparse_elements) ||
+        !ParseToken(TokKind::kRbrace, message)) {
+      return false;
+    }
+    *result->mutable_layout() =
+        LayoutUtil::MakeSparseLayout(max_sparse_elements);
+    return true;
+  }
+
+  // We need to lookahead to see if a following open brace is the start of a
+  // layout. The specific problematic case is:
+  //
+  // ENTRY %foo (x: f32[42]) -> f32[123] {
+  //  ...
+  // }
+  //
+  // The open brace could either be the start of a computation or the start of a
+  // layout for the f32[123] shape. We consider it the start of a layout if the
+  // next token after the open brace is a integer
+  if (lexer_.GetKind() == TokKind::kLbrace &&
+      lexer_.LookAhead() == TokKind::kInt) {
+    Layout layout;
+    if (!ParseLayout(&layout)) {
+      return false;
+    }
+    *result->mutable_layout() = layout;
+  }
   return true;
 }
 
 bool HloParser::CanBeShape() {
-  // A non-tuple shape starts with a kShape token; a tuple shape starts with
-  // '('.
-  return lexer_.GetKind() == TokKind::kShape ||
+  // A non-tuple shape starts with a kPrimitiveType token; a tuple shape starts
+  // with '('.
+  return lexer_.GetKind() == TokKind::kPrimitiveType ||
          lexer_.GetKind() == TokKind::kLparen;
 }
 
@@ -3332,6 +3378,18 @@ bool HloParser::AddComputation(const string& name, HloComputation* computation,
   return true;
 }
 
+StatusOr<Shape> HloParser::ParseShapeOnly() {
+  lexer_.Lex();
+  Shape shape;
+  if (!ParseShape(&shape)) {
+    return InvalidArgument("Syntax error:\n%s", GetError());
+  }
+  if (lexer_.GetKind() != TokKind::kEof) {
+    return InvalidArgument("Syntax error:\nExtra content after shape");
+  }
+  return shape;
+}
+
 StatusOr<HloSharding> HloParser::ParseShardingOnly() {
   lexer_.Lex();
   OpSharding op_sharding;
@@ -3475,4 +3533,9 @@ StatusOr<PaddingConfig> ParsePaddingConfig(absl::string_view str) {
   return parser.ParsePaddingConfigOnly();
 }
 
+StatusOr<Shape> ParseShape(absl::string_view str) {
+  HloParser parser(str);
+  return parser.ParseShapeOnly();
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h
index d830fa6143..450a54c54c 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.h
+++ b/tensorflow/compiler/xla/service/hlo_parser.h
@@ -60,6 +60,9 @@ StatusOr<ConvolutionDimensionNumbers> ParseConvolutionDimensionNumbers(
 // Parses the result of PaddingConfigToString(), e.g. "0_0x1_1".
 StatusOr<PaddingConfig> ParsePaddingConfig(absl::string_view str);
 
+// Parses and returns a Shape::ToString-format string.
+StatusOr<Shape> ParseShape(absl::string_view str);
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PARSER_H_
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index ab71f011ac..80882d490d 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -82,7 +82,7 @@ ENTRY %constant_pred () -> pred[] {
 R"(HloModule module
 
 ENTRY %constant_pred_array () -> pred[2,3] {
-  ROOT %constant = pred[2,3]{1,0} constant(pred[2,3] { { 0, 1, 0 }, { 1, 0, 1 } })
+  ROOT %constant = pred[2,3]{1,0} constant({ { 0, 1, 0 }, { 1, 0, 1 } })
 }
 
 )"
@@ -128,7 +128,7 @@ ENTRY %ConstantF32Empty.v4 () -> f32[0] {
 R"(HloModule ConstantF32R4Empty_module
 
 ENTRY %ConstantF32R4Empty.v4 () -> f32[2,0,4,3] {
-  ROOT %constant = f32[2,0,4,3]{3,2,1,0} constant(f32[2,0,4,3] { { /*i0=0*/ }, { /*i0=1*/ } })
+  ROOT %constant = f32[2,0,4,3]{3,2,1,0} constant({ { /*i0=0*/ }, { /*i0=1*/ } })
 }
 
 )"
@@ -139,7 +139,7 @@ ENTRY %ConstantF32R4Empty.v4 () -> f32[2,0,4,3] {
 R"(HloModule Small_3x2x1x1_module
 
 ENTRY %Small_3x2x1x1.v1 () -> f32[3,2,1,1] {
-  ROOT %constant = f32[3,2,1,1]{3,2,1,0} constant(f32[3,2,1,1] { { /*i0=0*/ { /*i1=0*/ {-1} }, { /*i1=1*/ {4.1} } }, { /*i0=1*/ { /*i1=0*/ {2} }, { /*i1=1*/ {4.1} } }, { /*i0=2*/ { /*i1=0*/ {5} }, { /*i1=1*/ {4.4} } } })
+  ROOT %constant = f32[3,2,1,1]{3,2,1,0} constant({ { /*i0=0*/ { /*i1=0*/ {-1} }, { /*i1=1*/ {4.1} } }, { /*i0=1*/ { /*i1=0*/ {2} }, { /*i1=1*/ {4.1} } }, { /*i0=2*/ { /*i1=0*/ {5} }, { /*i1=1*/ {4.4} } } })
 }
 
 )"
@@ -196,7 +196,7 @@ ENTRY %add_constants () -> f32[] {
 R"(HloModule TupleConstant_module
 
 ENTRY %TupleConstant.v1 () -> (f32[2,1], f32[2]) {
-  ROOT %constant = (f32[2,1]{1,0}, f32[2]{0}) constant((f32[2,1], f32[2]) ( f32[2,1] { {1}, {2} }, {2, 42} ))
+  ROOT %constant = (f32[2,1]{1,0}, f32[2]{0}) constant(( { {1}, {2} }, {2, 42} ))
 }
 
 )"
@@ -295,11 +295,11 @@ ENTRY %WhileWithScalarS32Result.v2 () -> s32[] {
 R"(HloModule TwoSendRecvBothWayRecvFist_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> (f32[], token[]) {
-  %token = token[] after-all()
-  %recv = (f32[], u32[], token[]) recv(token[] %token), channel_id=15, sharding={maximal device=1}
+  %token0 = token[] after-all()
+  %recv = (f32[], u32[], token[]) recv(token[] %token0), channel_id=15, sharding={maximal device=1}
   ROOT %recv-done = (f32[], token[]) recv-done((f32[], u32[], token[]) %recv), channel_id=15, sharding={maximal device=1}
   %constant = f32[] constant(2.1), sharding={maximal device=0}
-  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv}
+  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token0), channel_id=16, sharding={maximal device=0}, control-predecessors={%recv}
   %send-done = token[] send-done((f32[], u32[], token[]) %send), channel_id=16, sharding={maximal device=0}
 }
 
@@ -310,11 +310,11 @@ ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> (f32[], token[]) {
 R"(HloModule HostTransferSendRecv_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> (f32[], token[]) {
-  %token = token[] after-all()
-  %recv = (f32[], u32[], token[]) recv(token[] %token), channel_id=15, is_host_transfer=true
+  %token0 = token[] after-all()
+  %recv = (f32[], u32[], token[]) recv(token[] %token0), channel_id=15, is_host_transfer=true
   ROOT %recv-done = (f32[], token[]) recv-done((f32[], u32[], token[]) %recv), channel_id=15, is_host_transfer=true
   %constant = f32[] constant(2.1), sharding={maximal device=0}
-  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token), channel_id=16, is_host_transfer=true
+  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token0), channel_id=16, is_host_transfer=true
   %send-done = token[] send-done((f32[], u32[], token[]) %send), channel_id=16, is_host_transfer=true
 }
 
@@ -327,7 +327,7 @@ R"(HloModule GetTupleElement_module
 
 ENTRY %GetTupleElement.v4 () -> s32[2,3] {
   %constant = f32[3]{0} constant({1, 2, 3})
-  %constant.1 = s32[2,3]{1,0} constant(s32[2,3] { { 1, 2, 3 }, { 4, 5, 6 } })
+  %constant.1 = s32[2,3]{1,0} constant({ { 1, 2, 3 }, { 4, 5, 6 } })
   %tuple = (f32[3]{0}, s32[2,3]{1,0}) tuple(f32[3]{0} %constant, s32[2,3]{1,0} %constant.1)
   ROOT %get-tuple-element = s32[2,3]{1,0} get-tuple-element((f32[3]{0}, s32[2,3]{1,0}) %tuple), index=1, sharding={maximal device=0}
 }
@@ -434,7 +434,7 @@ ENTRY %ConvolveBackward (input: f32[128,7,7,512], filter: f32[3,3,512,512]) -> f
 R"(HloModule Reverse4DFloatArrayOnDim01_module
 
 ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] {
-  %constant = f32[4,3,2,1]{0,1,2,3} constant(f32[4,3,2,1] { { /*i0=0*/ { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} }, { /*i1=2*/ {5}, {6} } }, { /*i0=1*/ { /*i1=0*/ {7}, {8} }, { /*i1=1*/ {9}, {10} }, { /*i1=2*/ {11}, {12} } }, { /*i0=2*/ { /*i1=0*/ {13}, {14} }, { /*i1=1*/ {15}, {16} }, { /*i1=2*/ {17}, {18} } }, { /*i0=3*/ { /*i1=0*/ {19}, {20} }, { /*i1=1*/ {21}, {22} }, { /*i1=2*/ {23}, {24} } } })
+  %constant = f32[4,3,2,1]{0,1,2,3} constant({ { /*i0=0*/ { /*i1=0*/ {1}, {2} }, { /*i1=1*/ {3}, {4} }, { /*i1=2*/ {5}, {6} } }, { /*i0=1*/ { /*i1=0*/ {7}, {8} }, { /*i1=1*/ {9}, {10} }, { /*i1=2*/ {11}, {12} } }, { /*i0=2*/ { /*i1=0*/ {13}, {14} }, { /*i1=1*/ {15}, {16} }, { /*i1=2*/ {17}, {18} } }, { /*i0=3*/ { /*i1=0*/ {19}, {20} }, { /*i1=1*/ {21}, {22} }, { /*i1=2*/ {23}, {24} } } })
   ROOT %reverse = f32[4,3,2,1]{0,1,2,3} reverse(f32[4,3,2,1]{0,1,2,3} %constant), dimensions={0,1}
 }
 
@@ -446,8 +446,8 @@ ENTRY %Reverse4DFloatArrayOnDim01.v2 () -> f32[4,3,2,1] {
 R"(HloModule Concat2x3With2x5_module
 
 ENTRY %Concat2x3With2x5.v3 () -> f32[2,8] {
-  %constant = f32[2,3]{1,0} constant(f32[2,3] { { 0, 1, 2 }, { 1000, 1001, 1002 } })
-  %constant.1 = f32[2,5]{1,0} constant(f32[2,5] { { 64, 65, 66, 67, 68 }, { 1064, 1065, 1066, 1067, 1068 } })
+  %constant = f32[2,3]{1,0} constant({ { 0, 1, 2 }, { 1000, 1001, 1002 } })
+  %constant.1 = f32[2,5]{1,0} constant({ { 64, 65, 66, 67, 68 }, { 1064, 1065, 1066, 1067, 1068 } })
   ROOT %concatenate = f32[2,8]{1,0} concatenate(f32[2,3]{1,0} %constant, f32[2,5]{1,0} %constant.1), dimensions={1}
 }
 
@@ -471,8 +471,8 @@ R"(HloModule R4F32OverlapSmall_module
 }
 
 ENTRY %R4F32OverlapSmall.v4 () -> f32[4,5,1,1] {
-  %constant = f32[4,5,1,1]{3,2,1,0} constant(f32[4,5,1,1] { { /*i0=0*/ { /*i1=0*/ {7} }, { /*i1=1*/ {2} }, { /*i1=2*/ {5} }, { /*i1=3*/ {3} }, { /*i1=4*/ {8} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {8} }, { /*i1=2*/ {9} }, { /*i1=3*/ {3} }, { /*i1=4*/ {4} } }, { /*i0=2*/ { /*i1=0*/ {1} }, { /*i1=1*/ {5} }, { /*i1=2*/ {7} }, { /*i1=3*/ {5} }, { /*i1=4*/ {6} } }, { /*i0=3*/ { /*i1=0*/ {0} }, { /*i1=1*/ {6} }, { /*i1=2*/ {2} }, { /*i1=3*/ {10} }, { /*i1=4*/ {2} } } })
-  %constant.1 = f32[2,2,1,1]{3,2,1,0} constant(f32[2,2,1,1] { { /*i0=0*/ { /*i1=0*/ {2} }, { /*i1=1*/ {6} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {1} } } })
+  %constant = f32[4,5,1,1]{3,2,1,0} constant({ { /*i0=0*/ { /*i1=0*/ {7} }, { /*i1=1*/ {2} }, { /*i1=2*/ {5} }, { /*i1=3*/ {3} }, { /*i1=4*/ {8} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {8} }, { /*i1=2*/ {9} }, { /*i1=3*/ {3} }, { /*i1=4*/ {4} } }, { /*i0=2*/ { /*i1=0*/ {1} }, { /*i1=1*/ {5} }, { /*i1=2*/ {7} }, { /*i1=3*/ {5} }, { /*i1=4*/ {6} } }, { /*i0=3*/ { /*i1=0*/ {0} }, { /*i1=1*/ {6} }, { /*i1=2*/ {2} }, { /*i1=3*/ {10} }, { /*i1=4*/ {2} } } })
+  %constant.1 = f32[2,2,1,1]{3,2,1,0} constant({ { /*i0=0*/ { /*i1=0*/ {2} }, { /*i1=1*/ {6} } }, { /*i0=1*/ { /*i1=0*/ {3} }, { /*i1=1*/ {1} } } })
   %constant.2 = f32[] constant(0)
   ROOT %select-and-scatter = f32[4,5,1,1]{3,2,1,0} select-and-scatter(f32[4,5,1,1]{3,2,1,0} %constant, f32[2,2,1,1]{3,2,1,0} %constant.1, f32[] %constant.2), window={size=2x3x1x1 stride=2x2x1x1}, select=%ge_F32.v3, scatter=%add_F32.v3
 }
@@ -523,7 +523,7 @@ ENTRY %slice.v2 (p0: f32[3,3,4,4]) -> f32[3,3,2,4] {
 R"(HloModule Slice3x3x3_To_1x3x3_F32_module
 
 ENTRY %Slice3x3x3_To_1x3x3_F32.v2 () -> f32[1,3,3] {
-  %constant = f32[3,3,3]{2,1,0} constant(f32[3,3,3] { { { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 } }, { { 9, 10, 11 }, { 12, 13, 14 }, { 15, 16, 17 } }, { { 18, 19, 20 }, { 21, 22, 23 }, { 24, 25, 26 } } })
+  %constant = f32[3,3,3]{2,1,0} constant({ { { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 } }, { { 9, 10, 11 }, { 12, 13, 14 }, { 15, 16, 17 } }, { { 18, 19, 20 }, { 21, 22, 23 }, { 24, 25, 26 } } })
   ROOT %slice = f32[1,3,3]{2,1,0} slice(f32[3,3,3]{2,1,0} %constant), slice={[0:1], [0:3], [0:3]}
 }
 
@@ -547,7 +547,7 @@ ENTRY %SliceR0.v2 () -> s32[] {
 R"(HloModule Transpose_module
 
 ENTRY %Transpose.v2 () -> s32[1,2,3] {
-  %constant = s32[1,2,3]{2,1,0} constant(s32[1,2,3] { { { 1, 2, 3 }, { 4, 5, 6 } } })
+  %constant = s32[1,2,3]{2,1,0} constant({ { { 1, 2, 3 }, { 4, 5, 6 } } })
   ROOT %transpose = s32[1,2,3]{2,1,0} transpose(s32[1,2,3]{2,1,0} %constant), dimensions={0,1,2}
 }
 
@@ -588,7 +588,7 @@ ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_
 R"(HloModule BasicTraining_module
 
 ENTRY %BasicTraining.v4 () -> (f32[2,2,1,2], f32[2], f32[2]) {
-  %constant = f32[2,2,1,2]{3,2,1,0} constant(f32[2,2,1,2] { { /*i0=0*/ { /*i1=0*/ { 1, 2 } }, { /*i1=1*/ { 3, 4 } } }, { /*i0=1*/ { /*i1=0*/ { 5, 6 } }, { /*i1=1*/ { 7, 8 } } } })
+  %constant = f32[2,2,1,2]{3,2,1,0} constant({ { /*i0=0*/ { /*i1=0*/ { 1, 2 } }, { /*i1=1*/ { 3, 4 } } }, { /*i0=1*/ { /*i1=0*/ { 5, 6 } }, { /*i1=1*/ { 7, 8 } } } })
   %constant.1 = f32[2]{0} constant({2, 3})
   %constant.2 = f32[2]{0} constant({1, 2})
   ROOT %batch-norm-training = (f32[2,2,1,2]{3,2,1,0}, f32[2]{0}, f32[2]{0}) batch-norm-training(f32[2,2,1,2]{3,2,1,0} %constant, f32[2]{0} %constant.1, f32[2]{0} %constant.2), epsilon=0.001, feature_index=3
@@ -728,7 +728,7 @@ R"(HloModule fusion_module
 }
 
 ENTRY %fusion.v3 () -> f32[3,2,1,1] {
-  %constant = f32[3,2,1,1]{3,2,1,0} constant(f32[3,2,1,1] { { /*i0=0*/ { /*i1=0*/ {-1} }, { /*i1=1*/ {4.1} } }, { /*i0=1*/ { /*i1=0*/ {2} }, { /*i1=1*/ {4.1} } }, { /*i0=2*/ { /*i1=0*/ {5} }, { /*i1=1*/ {4.4} } } })
+  %constant = f32[3,2,1,1]{3,2,1,0} constant({ { /*i0=0*/ { /*i1=0*/ {-1} }, { /*i1=1*/ {4.1} } }, { /*i0=1*/ { /*i1=0*/ {2} }, { /*i1=1*/ {4.1} } }, { /*i0=2*/ { /*i1=0*/ {5} }, { /*i1=1*/ {4.4} } } })
   %constant.1 = f32[2]{0} constant({3.14, 4.25})
   ROOT %fusion = f32[3,2,1,1]{3,2,1,0} fusion(f32[3,2,1,1]{3,2,1,0} %constant, f32[2]{0} %constant.1), kind=kLoop, calls=%fused_computation
 }
@@ -740,7 +740,7 @@ ENTRY %fusion.v3 () -> f32[3,2,1,1] {
 R"(HloModule sparse_f32
 
 ENTRY %sparse () -> f32[2,3,4] {
-  ROOT %foo = f32[2,3,4]sparse{10} constant(f32[2,3,4]{[0, 1, 2]: 1, [1, 2, 3]: 2, [2, 3, 4]: 3})
+  ROOT %foo = f32[2,3,4]sparse{10} constant({[0, 1, 2]: 1, [1, 2, 3]: 2, [2, 3, 4]: 3})
 }
 
 )"
@@ -750,7 +750,7 @@ ENTRY %sparse () -> f32[2,3,4] {
 R"(HloModule sparse_f32_empty
 
 ENTRY %sparse_f32_empty () -> f32[2,3,4] {
-  ROOT %foo = f32[2,3,4]sparse{10} constant(f32[2,3,4]{})
+  ROOT %foo = f32[2,3,4]sparse{10} constant({})
 }
 
 )"
@@ -760,7 +760,7 @@ ENTRY %sparse_f32_empty () -> f32[2,3,4] {
 R"(HloModule sparse_f32_r1
 
 ENTRY %sparse_f32_r1 () -> f32[9] {
-  ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6})
+  ROOT %foo = f32[9]sparse{10} constant({1: 2, 3: 4, 5: 6})
 }
 
 )"
@@ -931,11 +931,11 @@ ENTRY reduce_entry {
 R"(HloModule outfeed_module
 
 ENTRY InfeedToOutfeed {
-  token = token[] after-all()
-  infeed = ((u32[3]{0}, pred[]), token[]) infeed(token)
+  token0 = token[] after-all()
+  infeed = ((u32[3]{0}, pred[]), token[]) infeed(token0)
   infeed.data = (u32[3]{0}, pred[]) get-tuple-element(infeed), index=0
-  outfeed = token[] outfeed(infeed.data, token)
-  ROOT infeed.1 = ((u32[3]{0}, pred[]), token[]) infeed(token)
+  outfeed = token[] outfeed(infeed.data, token0)
+  ROOT infeed.1 = ((u32[3]{0}, pred[]), token[]) infeed(token0)
   infeed.1.data = (u32[3]{0}, pred[]) get-tuple-element(infeed.1), index=0
   infeed.1.token = token[] get-tuple-element(infeed.1), index=1
   outfeed.1 = token[] outfeed(infeed.1.data, infeed.1.token)
@@ -1266,8 +1266,8 @@ R"(HloModule AddDependency
 ENTRY AddDependency {
   p = f32[] parameter(0)
   neg = f32[] negate(p)
-  token = token[] after-all(neg)
-  p_after_token = f32[] add-dependency(p, token)
+  token0 = token[] after-all(neg)
+  p_after_token = f32[] add-dependency(p, token0)
   exp = f32[] exponential(p_after_token)
   ROOT sum = f32[] add(neg, exp)
 }
@@ -1419,7 +1419,7 @@ TEST_F(HloParserTest, MoreConstants) {
 
 ENTRY %SelectScalarS32True.v4 () -> s32[] {
   %constant.2 = pred[] constant(true)
-  %constant.1 = s32[] constant(-42), sharding={s32[5,6] devices=[2,2]1,2,3,4}
+  %constant.1 = s32[] constant(-42), sharding={devices=[2,2]1,2,3,4}
   %constant = s32[] constant(42)
   %select = s32[] select(pred[] %constant.2, s32[] %constant.1, s32[] %constant)
 }
@@ -1462,7 +1462,7 @@ TEST_F(HloParserTest, LiteralDimensionsMismatch_2) {
   const string original = R"(HloModule some_2x3_module
 
 ENTRY %some_2x3 () -> f32[2,3] {
-  ROOT %constant = f32[2,3]{1,0} constant(f32[2,3] {1, 2, 3, 4, 5, 6})
+  ROOT %constant = f32[2,3]{1,0} constant({1, 2, 3, 4, 5, 6})
 }
 
 )";
@@ -1476,7 +1476,7 @@ TEST_F(HloParserTest, LiteralDimensionsMismatch_3) {
   const string original = R"(HloModule some_2x3x2_module
 
 ENTRY %some_2x3x2 () -> f32[2,3,2] {
-  ROOT %constant = f32[2,3,2]{2,1,0} constant(f32[2,3,2] {{{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}, {11, 12}}})
+  ROOT %constant = f32[2,3,2]{2,1,0} constant({{{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}, {11, 12}}})
 }
 
 )";
@@ -1594,11 +1594,11 @@ TEST_F(HloParserTest, UnexpectedAttribute) {
   const string original = R"(HloModule unexpected_attr_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
-  %token = token[] after-all()
-  %recv = (f32[], u32[], token[]) recv(token[] %token), channel_id=15
+  %token0 = token[] after-all()
+  %recv = (f32[], u32[], token[]) recv(token[] %token0), channel_id=15
   %recv-done = (f32[], token[]) recv-done((f32[], u32[], token[]) %recv), channel_id=15
   ROOT %constant = f32[] constant(2.1)
-  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token), channel_id=16, calls=%recv
+  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token0), channel_id=16, calls=%recv
   %send-done = token[] send-done((f32[], u32[], token[]) %send), channel_id=16
 }
 
@@ -1611,11 +1611,11 @@ TEST_F(HloParserTest, MissingAttribute) {
   const string original = R"(HloModule missing_attr_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
-  %token = token[] after-all()
-  %recv = (f32[], u32[], token[]) recv(token[] %token), channel_id=15
+  %token0 = token[] after-all()
+  %recv = (f32[], u32[], token[]) recv(token[] %token0), channel_id=15
   %recv-done = (f32[], token[]) recv-done((f32[], u32[], token[]) %recv), channel_id=15
   ROOT %constant = f32[] constant(-2.1)
-  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token)
+  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token0)
   %send-done = token[] send-done((f32[], u32[], token[]) %send), channel_id=16
 }
 
@@ -1628,11 +1628,11 @@ TEST_F(HloParserTest, PredecessorUndefined) {
   const string original = R"(HloModule pre_not_found_module
 
 ENTRY %TwoSendRecvBothWayRecvFist.v3 () -> f32[] {
-  %token = token[] after-all()
-  %recv = (f32[], u32[], token[]) recv(token[] %token), channel_id=15
+  %token0 = token[] after-all()
+  %recv = (f32[], u32[], token[]) recv(token[] %token0), channel_id=15
   %recv-done = (f32[], token[]) recv-done((f32[], u32[], token[]) %recv), channel_id=15
   ROOT %constant = f32[] constant(2.1)
-  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token), channel_id=16, control-predecessors={%done}
+  %send = (f32[], u32[], token[]) send(f32[] %constant, token[] %token0), channel_id=16, control-predecessors={%done}
   %send-done = token[] send-done((f32[], u32[], token[]) %send), channel_id=16
 }
 
@@ -1940,8 +1940,8 @@ TEST_F(HloParserTest, ParsePaddingConfigInteriorPaddingImplicitZeroDim) {
 TEST_F(HloParserTest, NontupleInfeed) {
   const string original = R"(HloModule nontuple_infeed:
 ENTRY nontuple_infeed {
-  token = token[] after-all()
-  ROOT infeed = pred[] infeed(token)
+  token0 = token[] after-all()
+  ROOT infeed = pred[] infeed(token0)
 })";
   ExpectHasSubstr(ParseHloString(original).status().error_message(),
                   "infeed must have a non-empty tuple shape");
@@ -2239,7 +2239,7 @@ HloModule foobar
 
 ENTRY %entrycomp (p: f32[2,2]) -> f32[2,2] {
   %p = f32[2,2] parameter(0)
-  %constant.1 = f32[2,2] constant(f32[2,2] {{1, 2}, {3, 4}})
+  %constant.1 = f32[2,2] constant({{1, 2}, {3, 4}})
   ROOT %add.1 = f32[2,2] add(f32[2,2] %p, f32[2,5] %constant.1)
 }
 )";
@@ -2249,7 +2249,85 @@ ENTRY %entrycomp (p: f32[2,2]) -> f32[2,2] {
                   " with the shape of the operand instruction f32[2,2]{1,0}.");
 }
 
-// custom call incompatible shape.
+TEST_F(HloParserTest, ParseShapeStringR2F32) {
+  string shape_string = "f32[123,456]";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
+  Shape expected = ShapeUtil::MakeShape(F32, {123, 456});
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST_F(HloParserTest, ParseShapeStringTupleOfArrays) {
+  string shape_string = "(f32[1572864],s8[5120,1024])";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
+  Shape expected =
+      ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {1572864}),
+                                 ShapeUtil::MakeShape(S8, {5120, 1024})});
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST_F(HloParserTest, ParseShapeStringNestedTuple) {
+  string shape_string = "(f32[1],(f32[2], token[]), opaque[], f32[3])";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
+  Shape expected = ShapeUtil::MakeTupleShape({
+      ShapeUtil::MakeShape(F32, {1}),
+      ShapeUtil::MakeTupleShape(
+          {ShapeUtil::MakeShape(F32, {2}), ShapeUtil::MakeTokenShape()}),
+      ShapeUtil::MakeOpaqueShape(),
+      ShapeUtil::MakeShape(F32, {3}),
+  });
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST_F(HloParserTest, ParseShapeStringWithLayout) {
+  string shape_string = "f32[123,456]{0,1}";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
+  Shape expected = ShapeUtil::MakeShapeWithLayout(F32, {123, 456}, {0, 1});
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST_F(HloParserTest, ParseShapeStringWithSparseLayout) {
+  string shape_string = "f32[123,456]sparse{10}";
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape(shape_string));
+  Shape expected = ShapeUtil::MakeShapeWithSparseLayout(F32, {123, 456}, 10);
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual: " << ShapeUtil::HumanString(actual);
+}
+
+TEST_F(HloParserTest, ParseOpaqueType) {
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape("opaque[]"));
+  Shape expected = ShapeUtil::MakeOpaqueShape();
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST_F(HloParserTest, ParseTokenType) {
+  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ParseShape("token[]"));
+  Shape expected = ShapeUtil::MakeTokenShape();
+  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
+      << "expected: " << ShapeUtil::HumanString(expected)
+      << "actual:   " << ShapeUtil::HumanString(actual);
+}
+
+TEST_F(HloParserTest, ParseInvalidShapeString) {
+  string shape_strings[] = {
+      "f32[123,456]foobar{0,1}", "f32[123,456]sparse{0,1}", "f32[123,456]{foo}",
+      "f32[123,456]dense{foo}",  "f32[123,456]sparse{foo}",
+  };
+  for (const string& shape_string : shape_strings) {
+    StatusOr<Shape> result = ParseShape(shape_string);
+    ASSERT_FALSE(result.ok()) << "shape: " << shape_string;
+  }
+}
 
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_token.h b/tensorflow/compiler/xla/service/hlo_token.h
deleted file mode 100644
index 4458c251de..0000000000
--- a/tensorflow/compiler/xla/service/hlo_token.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TOKEN_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TOKEN_H_
-
-#include <string>
-
-#include "tensorflow/compiler/xla/types.h"
-#include "tensorflow/core/platform/types.h"
-
-namespace xla {
-
-// Defines different kinds of tokens in a hlo module string.
-//
-// You shouldn't need to use this directly unless you're using HloLexer
-// directly, and you probably don't need to do that.  Use hlo_parser instead.
-enum class TokKind {
-  // Markers
-  kEof,
-  kError,
-
-  // Tokens with no info.
-  kEqual,  // =
-  kComma,  // ,
-  kColon,  // :
-  kLsquare,
-  kRsquare,  // [  ]
-  kLbrace,
-  kRbrace,  // {  }
-  kLparen,
-  kRparen,  // (  )
-
-  kArrow,    // ->
-
-  // Keywords
-  kw_HloModule,
-  kw_ENTRY,
-  kw_ROOT,
-  kw_true,
-  kw_false,
-  kw_maximal,
-  kw_replicated,
-  kw_nan,
-  kw_inf,
-
-  kNegInf,  // -inf
-
-  // Typed tokens.
-  kName,           // %foo
-  kAttributeName,  // dimensions=
-  kDimLabels,      // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
-  kDxD,            // [0-9]+(x[0-9]+)+
-  kPad,            // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
-  kIdent,          // other identifiers
-  kString,         // "abcd\"\n"
-  kShape,          // f32[2,3]{1,0}
-  kInt,            // 42
-  kDecimal,        // 4.2
-};
-
-string TokKindToString(TokKind kind);
-
-}  // namespace xla
-
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_TOKEN_H_
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc
index 98246d5403..295465c848 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis_test.cc
@@ -99,7 +99,7 @@ TEST_F(IndexedArrayAnalysisTest, SimpleOneToOneConstantGather) {
 HloModule SimpleGather
 
 ENTRY main {
-  operand = s32[3,3] constant(s32[3,3]{{1,2,3},{1,2,3},{1,2,3}})
+  operand = s32[3,3] constant({{1,2,3},{1,2,3},{1,2,3}})
   indices = s32[5] parameter(0)
   ROOT gather = s32[5,3] gather(operand, indices),
       offset_dims={1},
@@ -119,7 +119,7 @@ TEST_F(IndexedArrayAnalysisTest, GatherIsNotScalarIndexed0) {
 HloModule SimpleGather
 
 ENTRY main {
-  operand = s32[3,3] constant(s32[3,3]{{1,2,3},{1,2,3},{1,2,3}})
+  operand = s32[3,3] constant({{1,2,3},{1,2,3},{1,2,3}})
   indices = s32[5,2] parameter(0)
   ROOT gather = s32[5] gather(operand, indices),
       offset_dims={},
@@ -195,7 +195,7 @@ TEST_F(IndexedArrayAnalysisTest, GatherOfGather_OneToOne) {
 HloModule SimpleGather
 
 ENTRY main {
-  operand = s32[3,3] constant(s32[3,3]{{1,2,3},{1,2,3},{1,2,3}})
+  operand = s32[3,3] constant({{1,2,3},{1,2,3},{1,2,3}})
   indices_a = s32[5] parameter(0)
   indices_b = s32[2] parameter(1)
   gather_a = s32[5,3] gather(operand, indices_a),
@@ -309,7 +309,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather0) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,2,3,4},{1,2,3,4}})
+  operand = s32[3,4] constant({{1,2,3,4},{1,2,3,4},{1,2,3,4}})
   indices = s32[5] parameter(0)
   gather = s32[5,4] gather(operand, indices),
       offset_dims={1},
@@ -330,7 +330,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather1) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,2,3,4},{1,2,3,4}})
+  operand = s32[3,4] constant({{1,2,3,4},{1,2,3,4},{1,2,3,4}})
   indices = s32[5,7] parameter(0)
   gather = s32[5,4,7] gather(operand, indices),
       offset_dims={1},
@@ -352,7 +352,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather2) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[3,2,6] constant(s32[3,2,6]{
+  operand = s32[3,2,6] constant({
       {{1,2,3,4,5,6},{1,2,3,4,5,6}},
       {{1,2,3,4,5,6},{1,2,3,4,5,6}},
       {{1,2,3,4,5,6},{1,2,3,4,5,6}}})
@@ -377,7 +377,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather3) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[2,6] constant(s32[2,6]{
+  operand = s32[2,6] constant({
       {1,2,3,4,5,6},{1,2,3,4,5,6}})
   indices = s32[1] parameter(0)
   gather = s32[1,6] gather(operand, indices),
@@ -405,7 +405,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather4) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[2,3]{1,0} constant(s32[2,3] { { 1, 2, 3 }, { 1, 2, 3 } })
+  operand = s32[2,3]{1,0} constant({ { 1, 2, 3 }, { 1, 2, 3 } })
 
   i.0 = s64[1,3]{1,0} parameter(0)
   g.0 = s32[1,3,3]{2,1,0} gather(operand, i.0), offset_dims={2},
@@ -438,7 +438,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather5) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[1,6] constant(s32[1,6]{{1,2,3,4,5,6}})
+  operand = s32[1,6] constant({{1,2,3,4,5,6}})
   indices = s32[1] parameter(0)
   gather = s32[1,6] gather(operand, indices),
       offset_dims={1},
@@ -465,7 +465,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather6) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[1,2,6] constant(s32[1,2,6]{{
+  operand = s32[1,2,6] constant({{
       {1,2,3,4,5,6},{1,2,3,4,5,6}}})
   indices = s32[1] parameter(0)
   gather = s32[1,1,6] gather(operand, indices),
@@ -496,7 +496,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGather7) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[2,6] constant(s32[2,6]{
+  operand = s32[2,6] constant({
       {1,2,3,4,5,6},{1,2,3,4,5,6}})
   indices = s32[1,5] parameter(0)
   gather = s32[1,5,6] gather(operand, indices),
@@ -527,7 +527,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGatherNoFold0) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,2,3,4},{1,2,3,4}})
+  operand = s32[3,4] constant({{1,2,3,4},{1,2,3,4},{1,2,3,4}})
   indices = s32[5,6] parameter(0)
   gather = s32[5,4,6] gather(operand, indices),
       offset_dims={1},
@@ -556,7 +556,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGatherNoFold1) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[3,5,2] constant(s32[3,5,2]{
+  operand = s32[3,5,2] constant({
       {{1,2},{3,4},{5,6},{7,8},{9,10}},
       {{1,2},{3,4},{5,6},{7,8},{9,10}},
       {{1,2},{3,4},{5,6},{7,8},{9,10}}})
@@ -588,7 +588,7 @@ TEST_F(IndexedArrayAnalysisTest, ReshapeOfGatherNoFold2) {
 HloModule ReshapeOfGather
 
 ENTRY main {
-  operand = s32[3,4,1] constant(s32[3,4,1]{
+  operand = s32[3,4,1] constant({
     {{1},{2},{3},{4}},
     {{1},{2},{3},{4}},
     {{1},{2},{3},{4}}})
@@ -620,7 +620,7 @@ TEST_F(IndexedArrayAnalysisTest, UnaryOpOfGather) {
 HloModule UnaryOpOfGather
 
 ENTRY main {
-  operand = f32[3,4] constant(f32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}})
+  operand = f32[3,4] constant({{1,2,3,4},{1,3,2,4},{4,3,2,1}})
   indices = s32[5] parameter(0)
   gather = f32[5,4] gather(operand, indices),
       offset_dims={1},
@@ -645,7 +645,7 @@ TEST_F(IndexedArrayAnalysisTest, AddBroadcastedScalarWithGather) {
 HloModule AddBroadcastedScalarWithGather
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{1,3,2,4},{4,3,2,1}})
   constant = s32[] constant(5)
   constant_broadcasted = s32[5,4] broadcast(constant), dimensions={}
   indices = s32[5] parameter(0)
@@ -673,7 +673,7 @@ TEST_F(IndexedArrayAnalysisTest,
 HloModule SubtractBroadcastedScalarWithGather
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{1,3,2,4},{4,3,2,1}})
   constant = s32[] constant(5)
   constant_broadcasted = s32[5,4] broadcast(constant), dimensions={}
   indices = s32[5] parameter(0)
@@ -701,7 +701,7 @@ TEST_F(IndexedArrayAnalysisTest,
 HloModule SubtractBroadcastedScalarWithGather
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{1,3,2,4},{4,3,2,1}})
   constant = s32[] constant(5)
   constant_broadcasted = s32[5,4] broadcast(constant), dimensions={}
   indices = s32[5] parameter(0)
@@ -728,7 +728,7 @@ TEST_F(IndexedArrayAnalysisTest, AddBroadcastedVectorWithGather) {
 HloModule AddBroadcastedVectorWithGather
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{1,3,2,4},{4,3,2,1}})
   constant_vect = s32[4] constant({10,11,12,13})
   constant_broadcasted = s32[5,4] broadcast(constant_vect), dimensions={1}
   indices = s32[5] parameter(0)
@@ -755,7 +755,7 @@ TEST_F(IndexedArrayAnalysisTest, AddBroadcastedVectorWithGather_Negative) {
 HloModule AddBroadcastedVectorWithGather
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{1,3,2,4},{4,3,2,1}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{1,3,2,4},{4,3,2,1}})
   constant_vect = s32[5] constant({10,11,12,13,14})
   constant_broadcasted = s32[5,4] broadcast(constant_vect), dimensions={0}
   indices = s32[5] parameter(0)
@@ -804,8 +804,8 @@ TEST_F(IndexedArrayAnalysisTest, DotOpBasic_0) {
 HloModule DotOp
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{5,6,7,8},{9,10,11,12}})
-  dot_rhs_constant = s32[4,3] constant(s32[4,3]{{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{5,6,7,8},{9,10,11,12}})
+  dot_rhs_constant = s32[4,3] constant({{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
   indices = s32[5] parameter(0)
   dot_lhs = s32[5,4] gather(gather_operand, indices),
       offset_dims={1},
@@ -831,8 +831,8 @@ TEST_F(IndexedArrayAnalysisTest, DotOpBasic_1) {
 HloModule DotOp
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{5,6,7,8},{9,10,11,12}})
-  dot_rhs_constant = s32[3,3] constant(s32[3,3]{{1,2,3},{4,5,6},{7,8,9}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{5,6,7,8},{9,10,11,12}})
+  dot_rhs_constant = s32[3,3] constant({{1,2,3},{4,5,6},{7,8,9}})
   indices = s32[5] parameter(0)
   dot_lhs = s32[3,5] gather(gather_operand, indices),
       offset_dims={0},
@@ -859,8 +859,8 @@ TEST_F(IndexedArrayAnalysisTest, DotOpBasic_2) {
 HloModule DotOp
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{5,6,7,8},{9,10,11,12}})
-  dot_lhs_constant = s32[4,3] constant(s32[4,3]{{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{5,6,7,8},{9,10,11,12}})
+  dot_lhs_constant = s32[4,3] constant({{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
   indices = s32[5] parameter(0)
   dot_rhs = s32[3,5] gather(gather_operand, indices),
       offset_dims={0},
@@ -888,8 +888,8 @@ TEST_F(IndexedArrayAnalysisTest, DotOpBasic_3) {
 HloModule DotOp
 
 ENTRY main {
-  gather_operand = s32[4,3] constant(s32[4,3]{{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
-  dot_lhs_constant = s32[4,3] constant(s32[4,3]{{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
+  gather_operand = s32[4,3] constant({{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
+  dot_lhs_constant = s32[4,3] constant({{1,2,3},{4,5,6},{7,8,9},{10,11,12}})
   indices = s32[5] parameter(0)
   dot_rhs = s32[5,3] gather(gather_operand, indices),
       offset_dims={1},
@@ -917,8 +917,8 @@ TEST_F(IndexedArrayAnalysisTest, DotOpWithBatch) {
 HloModule DotOp
 
 ENTRY main {
-  gather_operand = s32[2,3,2] constant(s32[2,3,2]{{{1,2},{3,4},{5,6}},{{7,8},{9,10},{11,12}}})
-  dot_lhs_constant = s32[2,2,3] constant(s32[2,2,3]{{{1,2,3},{4,5,6}},{{7,8,9},{10,11,12}}})
+  gather_operand = s32[2,3,2] constant({{{1,2},{3,4},{5,6}},{{7,8},{9,10},{11,12}}})
+  dot_lhs_constant = s32[2,2,3] constant({{{1,2,3},{4,5,6}},{{7,8,9},{10,11,12}}})
   indices = s32[4] parameter(0)
   dot_rhs = s32[2,3,4] gather(gather_operand, indices),
       offset_dims={0,1},
@@ -948,8 +948,8 @@ TEST_F(IndexedArrayAnalysisTest, DotOpNegative) {
 HloModule DotOp
 
 ENTRY main {
-  gather_operand = s32[3,4] constant(s32[3,4]{{1,2,3,4},{5,6,7,8},{9,10,11,12}})
-  dot_rhs_constant = s32[2,3] constant(s32[2,3]{{1,2,3},{4,5,6}})
+  gather_operand = s32[3,4] constant({{1,2,3,4},{5,6,7,8},{9,10,11,12}})
+  dot_rhs_constant = s32[2,3] constant({{1,2,3},{4,5,6}})
   indices = s32[2] parameter(0)
   dot_lhs = s32[3,2] gather(gather_operand, indices),
       offset_dims={0},
diff --git a/tensorflow/compiler/xla/service/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/instruction_fusion_test.cc
index 58b7135cea..611cfd404d 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion_test.cc
@@ -259,8 +259,8 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusibleRecursively) {
     add = f32[4,3]{1,0} add(p0, p0)
     abs1 = f32[4,3]{1,0} abs(add)
     log = f32[4,3]{1,0} log(abs1)
-    token = token[] after-all()
-    send = f32[4,3]{1,0} send(log, token), channel_id=0
+    token0 = token[] after-all()
+    send = f32[4,3]{1,0} send(log, token0), channel_id=0
     abs2 = f32[4,3]{1,0} abs(log)
     ROOT root = f32[4,3]{1,0} subtract(abs2, add)
   })")
@@ -290,8 +290,8 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusibleRecursively) {
     p0 = f32[4,3]{1,0} parameter(0)
     add1 = f32[4,3]{1,0} add(p0, p0)
     log = f32[4,3]{1,0} log(p0)
-    token = token[] after-all()
-    send = f32[4,3]{1,0} send(log, token), channel_id=0
+    token0 = token[] after-all()
+    send = f32[4,3]{1,0} send(log, token0), channel_id=0
     add2 = f32[4,3]{1,0} add(log, add1)
     ROOT root = f32[4,3]{1,0} subtract(add1, add2)
   })")
@@ -324,8 +324,8 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusibleRecursively) {
     add1 = f32[4,3]{1,0} add(p0, p0)
     add2 = f32[4,3]{1,0} add(add1, add1)
     log = f32[4,3]{1,0} log(add2)
-    token = token[] after-all()
-    send = f32[4,3]{1,0} send(log, token), channel_id=0
+    token0 = token[] after-all()
+    send = f32[4,3]{1,0} send(log, token0), channel_id=0
     sub1 = f32[4,3]{1,0} subtract(log, add2)
     sub2 = f32[4,3]{1,0} subtract(add2, add1)
     ROOT root = (f32[4,3]{1,0}, f32[4,3]{1,0}) tuple(sub1, sub2)
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 5c661bfacb..9fe8c3accb 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -847,12 +847,12 @@ TEST_F(LayoutAssignmentTest, ChannelLayoutMismatch) {
     ENTRY entry_computation {
       param = (f32[2,2]) parameter(0)
       gte = f32[2,2] get-tuple-element(param), index=0
-      token = token[] after-all()
-      recv = (f32[2,2], u32[], token[]) recv(token), channel_id=1, sharding={maximal device=1}
+      token0 = token[] after-all()
+      recv = (f32[2,2], u32[], token[]) recv(token0), channel_id=1, sharding={maximal device=1}
       recv-done = (f32[2,2], token[]) recv-done(recv), channel_id=1,
         sharding={maximal device=1}
       ROOT root = f32[2,2] get-tuple-element(recv-done), index=0
-      send = (f32[2,2], u32[], token[]) send(gte, token), channel_id=1,
+      send = (f32[2,2], u32[], token[]) send(gte, token0), channel_id=1,
         sharding={maximal device=0}
       send-done = token[] send-done(send), channel_id=1, sharding={maximal device=0}
     }
@@ -897,7 +897,7 @@ TEST_F(LayoutAssignmentTest, AllReduceLayoutMissmatch) {
       ar.0 = f32[2,2] cross-replica-sum(gte),
         all_reduce_id=1, replica_groups={{0}}, to_apply=add,
         sharding={maximal device=0}
-      const = f32[2,2] constant(f32[2,2]{{0,1},{2,3}})
+      const = f32[2,2] constant({{0,1},{2,3}})
       ROOT ar.1 = f32[2,2] cross-replica-sum(const),
         all_reduce_id=1, replica_groups={{0}}, to_apply=add,
         sharding={maximal device=1}
diff --git a/tensorflow/compiler/xla/service/name_uniquer.cc b/tensorflow/compiler/xla/service/name_uniquer.cc
index ac2f79674f..daa718879d 100644
--- a/tensorflow/compiler/xla/service/name_uniquer.cc
+++ b/tensorflow/compiler/xla/service/name_uniquer.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "absl/strings/numbers.h"
 #include "absl/strings/str_cat.h"
+#include "tensorflow/compiler/xla/primitive_util.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/types.h"
@@ -42,6 +43,7 @@ NameUniquer::NameUniquer(const string& separator) {
   if (name.empty()) {
     return "";
   }
+
   string result = name;
   char c = static_cast<unsigned char>(result[0]);
   if (!isalpha(c) && c != '_') {
@@ -52,6 +54,13 @@ NameUniquer::NameUniquer(const string& separator) {
       result[i] = '_';
     }
   }
+
+  // HLO primitive type names (with the exception of 'tuple') are keywords in
+  // the HLO text representation and cannot be names, so append an underscore if
+  // the name is a primitive type.
+  if (primitive_util::IsPrimitiveTypeName(result) && result != "tuple") {
+    result += "_";
+  }
   return result;
 }
 
diff --git a/tensorflow/compiler/xla/service/name_uniquer_test.cc b/tensorflow/compiler/xla/service/name_uniquer_test.cc
index 3e2592c6ac..d0d04147e0 100644
--- a/tensorflow/compiler/xla/service/name_uniquer_test.cc
+++ b/tensorflow/compiler/xla/service/name_uniquer_test.cc
@@ -104,5 +104,21 @@ TEST_F(NameUniquerTest, KeepNamesInRandomOrder) {
   EXPECT_EQ("foo.3", uniquer.GetUniqueName("foo.3"));
 }
 
+TEST_F(NameUniquerTest, AvoidKeywords) {
+  NameUniquer uniquer(".");
+
+  EXPECT_EQ("f32_", uniquer.GetUniqueName("f32"));
+  EXPECT_EQ("s64_", uniquer.GetUniqueName("s64"));
+  EXPECT_EQ("pred_", uniquer.GetUniqueName("pred"));
+
+  // Though a primitive type, "tuple" is not a keyword.
+  EXPECT_EQ("tuple", uniquer.GetUniqueName("tuple"));
+
+  // Keywords are not capitalized.
+  EXPECT_EQ("F32", uniquer.GetUniqueName("F32"));
+  EXPECT_EQ("S32", uniquer.GetUniqueName("S32"));
+  EXPECT_EQ("Pred", uniquer.GetUniqueName("Pred"));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index c35f72699b..81db3bb643 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -1737,7 +1737,8 @@ class HloConstantScalarImpl {
               literal_r0_as_val_ty_or.ValueOrDie() == val_literal &&
               literal_r0 == val_as_literal_ty;
     if (!rv) {
-      EXPLAIN << "HloInstruction's constant value " << literal_r0.ToString()
+      EXPLAIN << "HloInstruction's constant value "
+              << literal_r0.ToStringWithoutShape()
               << " did not match expected value " << *val_;
     }
     return rv;
diff --git a/tensorflow/compiler/xla/service/pattern_matcher_test.cc b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
index 186ef0c791..5c3c009a68 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher_test.cc
+++ b/tensorflow/compiler/xla/service/pattern_matcher_test.cc
@@ -242,8 +242,8 @@ TEST(PatternMatcherTest, ConstantScalar) {
     HloModule test_module
     ENTRY test {
       a = s32[] constant(1)
-      b = s32[1,1] constant(s32[1,1]{{2}})
-      c = s32[1,2] constant(s32[1,2]{{2,2}})
+      b = s32[1,1] constant({{2}})
+      c = s32[1,2] constant({{2,2}})
       d = f32[] constant(1)
       e = f32[] constant(1.25)
       ROOT tuple = (s32[], s32[1,1], s32[1,2], f32[], f32[]) tuple(a,b,c,d,e)
diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc
index 17cdaa74fc..3ca53edc81 100644
--- a/tensorflow/compiler/xla/service/transpose_folding_test.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc
@@ -139,9 +139,9 @@ TEST_F(TransposeFoldingTest, FoldDotTransposeConstant) {
 HloModule FoldDotTransposeConstant
 
 ENTRY entry_computation {
-  constant = f32[2,1]{1,0} constant(f32[2,1] { { 1 }, { 2 } })
+  constant = f32[2,1]{1,0} constant({ { 1 }, { 2 } })
   transpose = f32[1,2]{1,0} transpose(constant), dimensions={1,0}
-  constant.1 = f32[3,2]{1,0} constant(f32[3,2] { { 1, 2 }, { 3, 4 }, { 5, 6 } })
+  constant.1 = f32[3,2]{1,0} constant({ { 1, 2 }, { 3, 4 }, { 5, 6 } })
   transpose.1 = f32[2,3]{1,0} transpose(constant.1), dimensions={1,0}
   ROOT dot = f32[1,3]{1,0} dot(transpose, transpose.1), lhs_contracting_dims={1}, rhs_contracting_dims={0}
 }
diff --git a/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc b/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc
index 75d406435b..3bcf5c3830 100644
--- a/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc
+++ b/tensorflow/compiler/xla/service/while_loop_constant_sinking_test.cc
@@ -129,7 +129,7 @@ condition {
 
 ENTRY entry {
   const_0 = f32[2] constant({1, 2})
-  const_1 = (f32[2], f32[2]) constant((f32[2], f32[2]) ({2, 1},{3,1}))
+  const_1 = (f32[2], f32[2]) constant(({2, 1},{3,1}))
   while_init = (f32[2],(f32[2],f32[2])) tuple(const_0, const_1)
   ROOT while = (f32[2],(f32[2],f32[2])) while(while_init), condition=condition, body=body
 }
@@ -206,8 +206,8 @@ body {
   p_body.0 = f32[2] get-tuple-element((f32[2],f32[2]) p_body), index=0
   p_body.1 = f32[2] get-tuple-element((f32[2],f32[2]) p_body), index=1
 
-  token = token[] after-all()
-  outfeed = token[] outfeed(p_body.0, token)
+  token0 = token[] after-all()
+  outfeed = token[] outfeed(p_body.0, token0)
   ROOT root = (f32[2],f32[2],f32[2]) tuple(p_body.0, p_body.1, p_body.1)
 }
 
@@ -305,7 +305,7 @@ condition {
 
 ENTRY entry {
   const_0 = f32[] constant(0)
-  const_1 = (f32[], f32[]) constant((f32[], f32[]) (1, 10))
+  const_1 = (f32[], f32[]) constant((1, 10))
   while_init = (f32[],(f32[],f32[])) tuple(const_0, const_1)
   ROOT while = (f32[],(f32[],f32[])) while(while_init), condition=condition, body=body
 }
diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
index 4950e8269e..3713989ca2 100644
--- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_dce.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/service/tuple_simplifier.h"
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/compiler/xla/tests/hlo_test_base.h"
@@ -554,8 +555,7 @@ TEST_F(WhileLoopSimplifierTest, FlattenNestedTuple) {
 
   HloInstruction* new_while = FindFirstWhile(m.get());
   Shape flat_tuple =
-      ShapeUtil::ParseShapeString("(s32[1], s32[2], s32[3], s32[4])")
-          .ValueOrDie();
+      ParseShape("(s32[1], s32[2], s32[3], s32[4])").ValueOrDie();
   SCOPED_TRACE(m->ToString());
   EXPECT_TRUE(ShapeUtil::Equal(new_while->shape(), flat_tuple));
   EXPECT_TRUE(ShapeUtil::Equal(
@@ -567,8 +567,7 @@ TEST_F(WhileLoopSimplifierTest, FlattenNestedTuple) {
       flat_tuple));
   EXPECT_TRUE(ShapeUtil::Equal(
       m->entry_computation()->root_instruction()->shape(),
-      ShapeUtil::ParseShapeString("((s32[1]), (s32[2], s32[3], (s32[4])))")
-          .ValueOrDie()));
+      ParseShape("((s32[1]), (s32[2], s32[3], (s32[4])))").ValueOrDie()));
 }
 
 // Edge-case: All elements of the loop carry are constants which can be removed,
@@ -641,8 +640,7 @@ TEST_F(WhileLoopSimplifierTest, RemoveConstantFromLoopCarry) {
   EXPECT_TRUE(TupleSimplifier().Run(m.get()).ok());
 
   HloInstruction* new_while = FindFirstWhile(m.get());
-  Shape new_while_shape =
-      ShapeUtil::ParseShapeString("(s32[1], s32[3])").ValueOrDie();
+  Shape new_while_shape = ParseShape("(s32[1], s32[3])").ValueOrDie();
   EXPECT_TRUE(ShapeUtil::Equal(new_while->shape(), new_while_shape));
   EXPECT_TRUE(ShapeUtil::Equal(
       new_while->while_body()->root_instruction()->shape(), new_while_shape));
@@ -652,9 +650,9 @@ TEST_F(WhileLoopSimplifierTest, RemoveConstantFromLoopCarry) {
   EXPECT_TRUE(ShapeUtil::Equal(
       new_while->while_condition()->parameter_instruction(0)->shape(),
       new_while_shape));
-  EXPECT_TRUE(ShapeUtil::Equal(
-      m->entry_computation()->root_instruction()->shape(),
-      ShapeUtil::ParseShapeString("(s32[1], s32[2], s32[3])").ValueOrDie()));
+  EXPECT_TRUE(
+      ShapeUtil::Equal(m->entry_computation()->root_instruction()->shape(),
+                       ParseShape("(s32[1], s32[2], s32[3])").ValueOrDie()));
   EXPECT_THAT(m->entry_computation()->root_instruction(),
               op::Tuple(_, op::Constant(), _));
 }
@@ -712,7 +710,7 @@ TEST_F(WhileLoopSimplifierTest, MergeInductionVariables_Simple) {
   // We should have added a new loop counter for s32[] to the end of the tuple.
   SCOPED_TRACE(m->ToString());
   Shape new_while_shape =
-      ShapeUtil::ParseShapeString("(s32[], s32[], s32[], s32[])").ValueOrDie();
+      ParseShape("(s32[], s32[], s32[], s32[])").ValueOrDie();
   EXPECT_TRUE(ShapeUtil::Equal(new_while->shape(), new_while_shape));
   EXPECT_TRUE(ShapeUtil::Equal(
       new_while->while_body()->root_instruction()->shape(), new_while_shape));
diff --git a/tensorflow/compiler/xla/service/while_util_test.cc b/tensorflow/compiler/xla/service/while_util_test.cc
index 5e69419333..d92b9870f3 100644
--- a/tensorflow/compiler/xla/service/while_util_test.cc
+++ b/tensorflow/compiler/xla/service/while_util_test.cc
@@ -180,8 +180,8 @@ body {
 
 cond {
   param.c = (s32[], s32[]) parameter(0)
-  token = token[] after-all()
-  infeed = (pred[], token[]) infeed(token)
+  token0 = token[] after-all()
+  infeed = (pred[], token[]) infeed(token0)
   ROOT condition = pred[] get-tuple-element(infeed), index=0
 }
 
diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc
index da61873732..be7d71ada0 100644
--- a/tensorflow/compiler/xla/shape_util.cc
+++ b/tensorflow/compiler/xla/shape_util.cc
@@ -234,7 +234,7 @@ StatusOr<Shape> MakeShapeWithLayoutInternal(
 
 /* static */ StatusOr<Shape> ShapeUtil::MakeValidatedShape(
     PrimitiveType element_type, absl::Span<const int64> dimensions) {
-  CHECK(IsArrayPrimitiveType(element_type));
+  CHECK(IsArrayPrimitiveType(element_type)) << element_type;
   Shape result;
   TF_RETURN_IF_ERROR(PopulateShape(element_type, dimensions, &result));
   return result;
@@ -480,54 +480,6 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
   return IsScalar(shape) && shape.element_type() == element_type;
 }
 
-namespace {
-
-// Class to memoize the computation of
-//   absl::AsciiStrToLower(PrimitiveType_Name(p))
-// for all PrimitiveType values "p"
-class PrimitiveTypeNameGenerator {
- public:
-  PrimitiveTypeNameGenerator() {
-    for (int i = 0; i < PrimitiveType_ARRAYSIZE; i++) {
-      if (PrimitiveType_IsValid(i)) {
-        lowercase_name_[i] = absl::AsciiStrToLower(
-            PrimitiveType_Name(static_cast<PrimitiveType>(i)));
-      }
-    }
-  }
-  const string& LowercaseName(PrimitiveType t) {
-    return lowercase_name_[static_cast<int>(t)];
-  }
-
- private:
-  string lowercase_name_[PrimitiveType_ARRAYSIZE];
-};
-
-const string& LowercasePrimitiveTypeName(PrimitiveType s) {
-  static PrimitiveTypeNameGenerator* gen = new PrimitiveTypeNameGenerator();
-  return gen->LowercaseName(s);
-}
-
-StatusOr<PrimitiveType> StringToPrimitiveType(const string& name) {
-  static std::unordered_map<string, PrimitiveType>* name_to_type = [] {
-    static auto* map = new std::unordered_map<string, PrimitiveType>;
-    for (int i = 0; i < PrimitiveType_ARRAYSIZE; i++) {
-      if (PrimitiveType_IsValid(i)) {
-        auto value = static_cast<PrimitiveType>(i);
-        (*map)[LowercasePrimitiveTypeName(value)] = value;
-      }
-    }
-    return map;
-  }();
-  auto found = name_to_type->find(name);
-  if (found == name_to_type->end()) {
-    return InvalidArgument("Invalid element type string: \"%s\".", name);
-  }
-  return found->second;
-}
-
-}  // namespace
-
 /* static */ string ShapeUtil::HumanString(const Shape& shape) {
   if (IsTuple(shape)) {
     string text = "(";
@@ -539,8 +491,9 @@ StatusOr<PrimitiveType> StringToPrimitiveType(const string& name) {
     text += ")";
     return text;
   }
-  return StrCat(LowercasePrimitiveTypeName(shape.element_type()), "[",
-                absl::StrJoin(shape.dimensions(), ","), "]");
+  return StrCat(
+      primitive_util::LowercasePrimitiveTypeName(shape.element_type()), "[",
+      absl::StrJoin(shape.dimensions(), ","), "]");
 }
 
 /* static */ string ShapeUtil::HumanStringWithLayout(const Shape& shape) {
@@ -554,7 +507,8 @@ StatusOr<PrimitiveType> StringToPrimitiveType(const string& name) {
     text += ")";
     return text;
   }
-  string result = StrCat(LowercasePrimitiveTypeName(shape.element_type()), "[");
+  string result = StrCat(
+      primitive_util::LowercasePrimitiveTypeName(shape.element_type()), "[");
   for (int i = 0; i < shape.dimensions().size(); i++) {
     StrAppend(&result, (i > 0) ? "," : "", shape.dimensions(i));
   }
@@ -580,116 +534,6 @@ StatusOr<PrimitiveType> StringToPrimitiveType(const string& name) {
                 HumanString(program_shape.result()));
 }
 
-namespace {
-// Parses shapes with simple recursive descent structure -- consumes from the
-// front of s and passes that view recursively as required.
-StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
-  *s = absl::StripLeadingAsciiWhitespace(*s);
-
-  if (absl::ConsumePrefix(s, "(")) {  // Tuple.
-    std::vector<Shape> shapes;
-    bool must_end = false;
-    while (true) {
-      if (absl::ConsumePrefix(s, ")")) {
-        break;
-      } else if (must_end) {
-        return InvalidArgument("Expected end of tuple; got: \"%s\"", *s);
-      }
-      shapes.emplace_back();
-      TF_ASSIGN_OR_RETURN(shapes.back(), ParseShapeStringInternal(s));
-      *s = absl::StripLeadingAsciiWhitespace(*s);
-      must_end = !absl::ConsumePrefix(s, ",");
-    }
-    return ShapeUtil::MakeTupleShape(shapes);
-  }
-
-  string element_type_string;
-  string dimensions_string;
-  string format_string;
-  string layout_string;
-  // absl::string_view is not compatible with internal RE2 StringPiece, so
-  // we convert in to the RE2-consumable type and then consume the corresponding
-  // amount from our string_view type.
-  static LazyRE2 shape_pattern = {
-      "^(\\w*\\d*)\\[([\\d,\\s]*)\\](?:\\s*(dense|sparse)?\\s*{([\\d,\\s]+)})"
-      "?"};
-  tensorflow::RegexpStringPiece s_consumable(s->data(), s->size());
-  if (RE2::Consume(&s_consumable, *shape_pattern, &element_type_string,
-                   &dimensions_string, &format_string, &layout_string)) {
-    size_t consumed = s->size() - s_consumable.size();
-    s->remove_prefix(consumed);
-    auto string_to_int64 = [&s](absl::string_view input) -> StatusOr<int64> {
-      int64 element;
-      if (!absl::SimpleAtoi(input, &element)) {
-        return InvalidArgument(
-            "Invalid s64 value in parsed shape string: \"%s\" in \"%s\"", input,
-            *s);
-      }
-      return element;
-    };
-
-    auto comma_list_to_int64s =
-        [string_to_int64](const string& input) -> StatusOr<std::vector<int64>> {
-      std::vector<int64> results;
-      for (const auto& piece : absl::StrSplit(input, ',', absl::SkipEmpty())) {
-        TF_ASSIGN_OR_RETURN(int64 element, string_to_int64(piece));
-        results.push_back(element);
-      }
-      return results;
-    };
-
-    // Extract the dimensions.
-    TF_ASSIGN_OR_RETURN(std::vector<int64> dimensions,
-                        comma_list_to_int64s(dimensions_string));
-
-    // Extract the primitive element type.
-    TF_ASSIGN_OR_RETURN(const PrimitiveType primitive_type,
-                        StringToPrimitiveType(element_type_string));
-    if (primitive_type == PRIMITIVE_TYPE_INVALID || primitive_type == TUPLE) {
-      return InvalidArgument("Invalid element type string: \"%s\".",
-                             element_type_string);
-    }
-
-    Shape result;
-    if (primitive_type == OPAQUE) {
-      result = ShapeUtil::MakeOpaqueShape();
-    } else if (primitive_type == TOKEN) {
-      result = ShapeUtil::MakeTokenShape();
-    } else if (format_string.empty() && layout_string.empty()) {
-      // Create a shape without a layout set.
-      TF_ASSIGN_OR_RETURN(
-          result, ShapeUtil::MakeValidatedShape(primitive_type, dimensions));
-    } else if (format_string == "sparse") {
-      TF_ASSIGN_OR_RETURN(int64 max_elements, string_to_int64(layout_string));
-      result = ShapeUtil::MakeShapeWithSparseLayout(primitive_type, dimensions,
-                                                    max_elements);
-    } else if (format_string.empty() || format_string == "dense") {
-      // Extract the layout minor-to-major and set it.
-      TF_ASSIGN_OR_RETURN(std::vector<int64> min2maj,
-                          comma_list_to_int64s(layout_string));
-      TF_ASSIGN_OR_RETURN(result, MakeShapeWithLayoutInternal(
-                                      primitive_type, dimensions, min2maj));
-    } else {
-      // This should not be reached.
-      LOG(FATAL) << "Unhandled condition when parsing shape; format: \""
-                 << format_string << "\", layout: \"" << layout_string << "\"";
-    }
-    TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(result));
-    return std::move(result);
-  }
-
-  return InvalidArgument("Invalid shape string to parse: \"%s\"", *s);
-}
-}  // namespace
-
-/* static */ StatusOr<Shape> ShapeUtil::ParseShapeString(absl::string_view s) {
-  TF_ASSIGN_OR_RETURN(Shape shape, ParseShapeStringInternal(&s));
-  if (!s.empty()) {
-    return InvalidArgument("Invalid shape string to parse: \"%s\"", s);
-  }
-  return shape;
-}
-
 /* static */ bool ShapeUtil::SameDimensions(const Shape& lhs,
                                             const Shape& rhs) {
   CHECK(ShapeUtil::IsArray(lhs));
@@ -867,13 +711,13 @@ StatusOr<Shape> ParseShapeStringInternal(absl::string_view* s) {
     if (shape.dimensions_size() != 0) {
       return InvalidArgument(
           "shape has %s element type, but has dimensions field: %s",
-          LowercasePrimitiveTypeName(shape.element_type()),
+          primitive_util::LowercasePrimitiveTypeName(shape.element_type()),
           shape.ShortDebugString());
     }
     if (shape.has_layout()) {
       return InvalidArgument(
           "shape has %s element type, but has layout field: %s",
-          LowercasePrimitiveTypeName(shape.element_type()),
+          primitive_util::LowercasePrimitiveTypeName(shape.element_type()),
           shape.ShortDebugString());
     }
     return Status::OK();
diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h
index e02804dc88..6b7a9cd34f 100644
--- a/tensorflow/compiler/xla/shape_util.h
+++ b/tensorflow/compiler/xla/shape_util.h
@@ -241,10 +241,6 @@ class ShapeUtil {
   // (param_name: f32[42x12], ...) -> f32[24x42]
   static string HumanString(const ProgramShape& program_shape);
 
-  // Parses a ShapeUtil::HumanString-format shape string back into a shape
-  // object.
-  static StatusOr<Shape> ParseShapeString(absl::string_view s);
-
   // Returns whether the LHS and RHS shapes have the same dimensions; note: does
   // not check element type.
   // Precondition: IsArray(lhs) && IsArray(rhs)
diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc
index 60bdbe3020..0a3081f516 100644
--- a/tensorflow/compiler/xla/shape_util_test.cc
+++ b/tensorflow/compiler/xla/shape_util_test.cc
@@ -82,102 +82,6 @@ TEST(ShapeUtilTest, Rank4DimensionIndexing) {
   ASSERT_EQ(3, shape.dimensions(0));
 }
 
-TEST(ShapeUtilTest, ParseShapeStringR2F32) {
-  string shape_string = "f32[123,456]";
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
-                          ShapeUtil::ParseShapeString(shape_string));
-  Shape expected = ShapeUtil::MakeShape(F32, {123, 456});
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual:   " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseShapeStringTupleOfArrays) {
-  string shape_string = "(f32[1572864],s8[5120,1024])";
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
-                          ShapeUtil::ParseShapeString(shape_string));
-  Shape expected =
-      ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(F32, {1572864}),
-                                 ShapeUtil::MakeShape(S8, {5120, 1024})});
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual:   " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseShapeStringNestedTuple) {
-  string shape_string = "(f32[1],(f32[2], token[]), opaque[], f32[3])";
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
-                          ShapeUtil::ParseShapeString(shape_string));
-  Shape expected = ShapeUtil::MakeTupleShape({
-      ShapeUtil::MakeShape(F32, {1}),
-      ShapeUtil::MakeTupleShape(
-          {ShapeUtil::MakeShape(F32, {2}), ShapeUtil::MakeTokenShape()}),
-      ShapeUtil::MakeOpaqueShape(),
-      ShapeUtil::MakeShape(F32, {3}),
-  });
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual:   " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseShapeStringWithLayout) {
-  string shape_string = "f32[123,456]{0,1}";
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
-                          ShapeUtil::ParseShapeString(shape_string));
-  Shape expected = ShapeUtil::MakeShapeWithLayout(F32, {123, 456}, {0, 1});
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual:   " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseShapeStringWithExplicitDenseLayout) {
-  string shape_string = "f32[123,456]dense{0,1}";
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
-                          ShapeUtil::ParseShapeString(shape_string));
-  Shape expected = ShapeUtil::MakeShapeWithLayout(F32, {123, 456}, {0, 1});
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual:   " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseShapeStringWithSparseLayout) {
-  string shape_string = "f32[123,456]sparse{10}";
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
-                          ShapeUtil::ParseShapeString(shape_string));
-  Shape expected = ShapeUtil::MakeShapeWithSparseLayout(F32, {123, 456}, 10);
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual: " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseOpaqueType) {
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual,
-                          ShapeUtil::ParseShapeString("opaque[]"));
-  Shape expected = ShapeUtil::MakeOpaqueShape();
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual:   " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseTokenType) {
-  TF_ASSERT_OK_AND_ASSIGN(Shape actual, ShapeUtil::ParseShapeString("token[]"));
-  Shape expected = ShapeUtil::MakeTokenShape();
-  ASSERT_TRUE(ShapeUtil::Equal(expected, actual))
-      << "expected: " << ShapeUtil::HumanString(expected)
-      << "actual:   " << ShapeUtil::HumanString(actual);
-}
-
-TEST(ShapeUtilTest, ParseInvalidShapeString) {
-  string shape_strings[] = {
-      "f32[123,456]foobar{0,1}", "f32[123,456]sparse{0,1}", "f32[123,456]{foo}",
-      "f32[123,456]dense{foo}",  "f32[123,456]sparse{foo}",
-  };
-  for (const string& shape_string : shape_strings) {
-    StatusOr<Shape> result = ShapeUtil::ParseShapeString(shape_string);
-    ASSERT_FALSE(result.ok()) << "shape: " << shape_string;
-  }
-}
-
 TEST(ShapeUtilTest, CompatibleIdenticalShapes) {
   Shape shape1 = ShapeUtil::MakeShape(F32, {3, 2});
   Shape shape2 = ShapeUtil::MakeShape(F32, {3, 2});
diff --git a/tensorflow/compiler/xla/tests/literal_test_util_test.cc b/tensorflow/compiler/xla/tests/literal_test_util_test.cc
index b6f9b8156b..ea9b3037cf 100644
--- a/tensorflow/compiler/xla/tests/literal_test_util_test.cc
+++ b/tensorflow/compiler/xla/tests/literal_test_util_test.cc
@@ -89,11 +89,11 @@ TEST(LiteralTestUtilTest, ExpectNearFailurePlacesResultsInTemporaryDirectory) {
     Literal literal =
         Literal::CreateFromProto(literal_proto).ConsumeValueOrDie();
     if (result.find("expected") != string::npos) {
-      EXPECT_EQ("2", literal.ToString());
+      EXPECT_EQ("f32[] 2", literal.ToString());
     } else if (result.find("actual") != string::npos) {
-      EXPECT_EQ("4", literal.ToString());
+      EXPECT_EQ("f32[] 4", literal.ToString());
     } else if (result.find("mismatches") != string::npos) {
-      EXPECT_EQ("true", literal.ToString());
+      EXPECT_EQ("pred[] true", literal.ToString());
     } else {
       FAIL() << "unknown file in temporary directory: " << result;
     }
@@ -105,9 +105,9 @@ TEST(LiteralTestUtilTest, NotEqualHasValuesInMessage) {
   auto actual = LiteralUtil::CreateR1<int32>({4, 5, 6});
   ::testing::AssertionResult result = LiteralTestUtil::Equal(expected, actual);
   EXPECT_THAT(result.message(),
-              ::testing::HasSubstr("Expected literal:\n{1, 2, 3}"));
+              ::testing::HasSubstr("Expected literal:\ns32[3] {1, 2, 3}"));
   EXPECT_THAT(result.message(),
-              ::testing::HasSubstr("Actual literal:\n{4, 5, 6}"));
+              ::testing::HasSubstr("Actual literal:\ns32[3] {4, 5, 6}"));
 }
 
 TEST(LiteralTestUtilTest, NearComparatorR1) {
diff --git a/tensorflow/compiler/xla/tests/test_utils_test.cc b/tensorflow/compiler/xla/tests/test_utils_test.cc
index e8f5d7a9a7..448a66cfdd 100644
--- a/tensorflow/compiler/xla/tests/test_utils_test.cc
+++ b/tensorflow/compiler/xla/tests/test_utils_test.cc
@@ -61,11 +61,11 @@ XLA_TEST_F(TestUtilsTest, Token) {
                     R"(HloModule outfeed_module
 
     ENTRY InfeedToOutfeed {
-      token = token[] parameter(0)
-      infeed = ((u32[3]{0}, pred[]), token[]) infeed(token)
+      token0 = token[] parameter(0)
+      infeed = ((u32[3]{0}, pred[]), token[]) infeed(token0)
       infeed.data = (u32[3]{0}, pred[]) get-tuple-element(infeed), index=0
-      outfeed = token[] outfeed(infeed.data, token)
-      ROOT infeed.1 = ((u32[3]{0}, pred[]), token[]) infeed(token)
+      outfeed = token[] outfeed(infeed.data, token0)
+      ROOT infeed.1 = ((u32[3]{0}, pred[]), token[]) infeed(token0)
       infeed.1.data = (u32[3]{0}, pred[]) get-tuple-element(infeed.1), index=0
       infeed.1.token = token[] get-tuple-element(infeed.1), index=1
       outfeed.1 = token[] outfeed(infeed.1.data, infeed.1.token)
diff --git a/tensorflow/compiler/xla/tests/token_hlo_test.cc b/tensorflow/compiler/xla/tests/token_hlo_test.cc
index 601c6b0693..b77cf38ed8 100644
--- a/tensorflow/compiler/xla/tests/token_hlo_test.cc
+++ b/tensorflow/compiler/xla/tests/token_hlo_test.cc
@@ -214,8 +214,8 @@ ENTRY %AddDependency (p0: f32[], p1: f32[]) -> f32[] {
 
   %forty_two = f32[] constant(42.0)
   %add = f32[] add(f32[] %p0, f32[] %forty_two)
-  %token = token[] after-all(f32[] %add)
-  %p1_after_token = f32[] add-dependency(f32[] %p1, token[] %token)
+  %token0 = token[] after-all(f32[] %add)
+  %p1_after_token = f32[] add-dependency(f32[] %p1, token[] %token0)
   %neg = f32[] negate(f32[] %p1_after_token)
   ROOT %product = f32[] multiply(f32[] %add, f32[] %neg)
 }
@@ -236,8 +236,8 @@ HloModule AddDependencyOfConstant, is_scheduled=true
 ENTRY %AddDependency (p0: f32[]) -> f32[] {
   %p0 = f32[] parameter(0)
   %forty_two = f32[] constant(42.0)
-  %token = token[] after-all(f32[] %p0)
-  %forty_two_after_token = f32[] add-dependency(f32[] %forty_two, token[] %token)
+  %token0 = token[] after-all(f32[] %p0)
+  %forty_two_after_token = f32[] add-dependency(f32[] %forty_two, token[] %token0)
   ROOT %product = f32[] multiply(f32[] %p0, f32[] %forty_two_after_token)
 }
 )";
@@ -255,8 +255,8 @@ HloModule AddDependencyAsRoot, is_scheduled=true
 ENTRY %AddDependency (p: f32[3]) -> f32[3] {
   %p = f32[3] parameter(0)
   %neg = f32[3] negate(f32[3] %p)
-  %token = token[] after-all()
-  ROOT %add_dep = f32[3] add-dependency(f32[3] %neg, token[] %token)
+  %token0 = token[] after-all()
+  ROOT %add_dep = f32[3] add-dependency(f32[3] %neg, token[] %token0)
 }
 )";
   TF_ASSERT_OK_AND_ASSIGN(
@@ -274,9 +274,9 @@ ENTRY %TupleShapedAddDependency (p0: f32[3], p1: f32[3]) -> f32[3] {
   %p0 = f32[3] parameter(0)
   %p1 = f32[3] parameter(1)
   %forty_two = f32[] constant(42.0)
-  %token = token[] after-all()
-  %tuple = (f32[3], token[], f32[3], f32[]) tuple(f32[3] %p0, token[] %token, f32[3] %p1, f32[] %forty_two)
-  %add_dep = (f32[3], token[], f32[3], f32[]) add-dependency((f32[3], token[], f32[3], f32[]) %tuple, token[] %token)
+  %token0 = token[] after-all()
+  %tuple = (f32[3], token[], f32[3], f32[]) tuple(f32[3] %p0, token[] %token0, f32[3] %p1, f32[] %forty_two)
+  %add_dep = (f32[3], token[], f32[3], f32[]) add-dependency((f32[3], token[], f32[3], f32[]) %tuple, token[] %token0)
   %elem0 = f32[3] get-tuple-element((f32[3], token[], f32[3], f32[]) %add_dep), index=0
   %elem2 = f32[3] get-tuple-element((f32[3], token[], f32[3], f32[]) %add_dep), index=2
   ROOT %diff = f32[3] subtract(f32[3] %elem0, f32[3] %elem2)
diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc
index 27ce243e9b..9c586bdeb0 100644
--- a/tensorflow/compiler/xla/tests/tuple_test.cc
+++ b/tensorflow/compiler/xla/tests/tuple_test.cc
@@ -555,8 +555,8 @@ XLA_TEST_F(TupleHloTest,
       s = (f32[2],f32[2]) tuple-select(cond, tup0, tup1)
       gte = f32[2] get-tuple-element(s), index=0
       tuple = (f32[2]) tuple(gte)
-      token = token[] after-all()
-      ROOT outfeed = token[] outfeed(tuple, token)
+      token0 = token[] after-all()
+      ROOT outfeed = token[] outfeed(tuple, token0)
     }
   )";
   auto module =
diff --git a/tensorflow/compiler/xla/text_literal_reader.cc b/tensorflow/compiler/xla/text_literal_reader.cc
index cdde88c135..c78ec522aa 100644
--- a/tensorflow/compiler/xla/text_literal_reader.cc
+++ b/tensorflow/compiler/xla/text_literal_reader.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "absl/strings/string_view.h"
 #include "absl/strings/strip.h"
 #include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -66,7 +67,7 @@ StatusOr<Literal> TextLiteralReader::ReadAllLines() {
   }
 
   absl::StripAsciiWhitespace(&shape_string);
-  TF_ASSIGN_OR_RETURN(Shape shape, ShapeUtil::ParseShapeString(shape_string));
+  TF_ASSIGN_OR_RETURN(Shape shape, ParseShape(shape_string));
   if (shape.element_type() != F32) {
     return Unimplemented(
         "unsupported element type for text literal reading: %s",
diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index 1a51303148..27a8dd1330 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -145,8 +145,7 @@ StatusOr<Literal> ReplayComputation(const HloSnapshot& module,
   bool provide_infeed = false;
   Shape infeed_shape;
   if (!opts.fake_infeed_shape.empty()) {
-    StatusOr<Shape> shape_status =
-        ShapeUtil::ParseShapeString(opts.fake_infeed_shape);
+    StatusOr<Shape> shape_status = ParseShape(opts.fake_infeed_shape);
     TF_CHECK_OK(shape_status.status());
     infeed_shape = std::move(shape_status).ValueOrDie();
     provide_infeed = true;
-- 
GitLab


From 359e8b7a3e141970b842724f9f43d5a00f3aab5a Mon Sep 17 00:00:00 2001
From: Karmel Allison <karmel@google.com>
Date: Fri, 14 Dec 2018 08:44:37 -0800
Subject: [PATCH 593/873] Enable v2 tests for model_subclassing, and fix a few
 bugs that enabling revealed.

PiperOrigin-RevId: 225547920
---
 tensorflow/python/keras/BUILD                 |   2 +-
 tensorflow/python/keras/backend.py            |   1 +
 tensorflow/python/keras/engine/base_layer.py  |   4 +-
 tensorflow/python/keras/engine/training.py    |  32 +-
 .../python/keras/engine/training_utils.py     |   5 +
 .../python/keras/model_subclassing_test.py    | 403 ++++++++++--------
 6 files changed, 240 insertions(+), 207 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 4259e6d592..87a9dfb605 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -863,7 +863,7 @@ py_test(
     name = "model_subclassing_test",
     size = "medium",
     srcs = ["model_subclassing_test.py"],
-    shard_count = 2,
+    shard_count = 4,
     srcs_version = "PY2AND3",
     tags = ["notsan"],
     deps = [
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index c93a716b3c..095273071f 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -2559,6 +2559,7 @@ def arange(start, stop=None, step=1, dtype='int32'):
     result = cast(result, dtype)
   return result
 
+
 @tf_export('keras.backend.tile')
 def tile(x, n):
   """Creates a tensor by tiling `x` by `n`.
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index c4c431d7d4..8a56546ac0 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -585,7 +585,9 @@ class Layer(checkpointable.CheckpointableBase):
           if hasattr(self, '_set_inputs') and not self.inputs:
             # Subclassed network: explicitly set metadata normally set by
             # a call to self._set_inputs().
-            # This is not relevant in eager execution.
+            # TODO(b/120997007): This should be done in Eager as well, but
+            # causes garbage collection issues because of the placeholders
+            # created on the default Keras graph.
             self._set_inputs(inputs, outputs)
       else:
         # Eager execution on data tensors.
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 38c8819c36..f8cba47a41 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -1292,6 +1292,11 @@ class Model(Network):
       elif isinstance(inputs, collections.Sequence):
         inputs = [
             ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs]
+
+        # Unwrap lists with only one input, as we do when training on batch
+        if len(inputs) == 1:
+          inputs = inputs[0]
+
       return self(inputs)  # pylint: disable=not-callable
 
     self._make_predict_function()
@@ -2220,12 +2225,9 @@ class Model(Network):
     # If input data is a dataset iterator in graph mode or if it is an eager
     # iterator and only one batch of samples is required, we fetch the data
     # tensors from the iterator and then standardize them.
-    if is_x_iterator or is_x_eager_iterator:
+    if is_x_iterator:
       try:
-        if is_x_iterator:
-          next_element = self._get_iterator_get_next_tensors(x)
-        else:
-          next_element = x.get_next()
+        next_element = self._get_iterator_get_next_tensors(x)
       except errors.OutOfRangeError:
         raise RuntimeError('Your dataset iterator ran out of data; '
                            'Make sure that your dataset can generate '
@@ -2278,15 +2280,14 @@ class Model(Network):
       # Build the model using the retrieved inputs (value or symbolic).
       # If values or generated from a dataset, then in symbolic-mode
       # placeholders will be created to match the value shapes.
-      if not self.inputs:
-        is_build_called = True
-        if is_x_iterator:
-          cast_inputs = nest.map_structure(lambda v: v.shape, x)
-        elif training_utils.has_tensors(x):
-          cast_inputs = training_utils.cast_if_floating_dtype(x)
-        else:
-          cast_inputs = x
-        self._set_inputs(cast_inputs)
+      is_build_called = True
+      if is_x_iterator:
+        cast_inputs = nest.map_structure(lambda v: v.shape, x)
+      elif training_utils.has_tensors(x):
+        cast_inputs = training_utils.cast_if_floating_dtype(x)
+      else:
+        cast_inputs = x
+      self._set_inputs(cast_inputs)
     else:
       dict_inputs = isinstance(self.inputs, dict)
     if dict_inputs and context.executing_eagerly():
@@ -2516,12 +2517,11 @@ class Model(Network):
 
     for k, v in model_inputs.as_dict():
       if K.is_placeholder(v):
-        self._feed_inputs.append(v)
         self._feed_input_names.append(k)
+        self._feed_inputs.append(v)
         self._feed_input_shapes.append(K.int_shape(v))
 
     # TODO(fchollet): consider calling `_maybe_build` before calling the model.
-
     if outputs is None:
       # Obtain symbolic outputs by calling the model.
       with K.get_graph().as_default():
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index ec6b39704a..7c368e804e 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -1109,6 +1109,9 @@ class ModelInputs(object):
 
   def get_symbolic_inputs(self, return_single_as_list=False):
     """Returns inputs to be set as self.inputs for a model."""
+    # TODO(karmel): There is a side-effect here where what you get
+    # with as_list and as_dict depends on whether you have called this
+    # method first, since it modifies in place.
     for i in range(len(self._flattened_inputs)):
       k = self._input_names[i]
       v = self._flattened_inputs[i]
@@ -1116,6 +1119,7 @@ class ModelInputs(object):
         v = np.asarray(v)
         if v.ndim == 1:
           v = np.expand_dims(v, 1)
+
       if isinstance(v, (np.ndarray, ops.EagerTensor)):
         # We fix the placeholder shape except the batch size.
         # This is suboptimal, but it is the best we can do with the info
@@ -1126,6 +1130,7 @@ class ModelInputs(object):
       elif isinstance(v, tensor_shape.TensorShape):
         shape = (None,) + tuple(v.as_list()[1:])
         v = K.placeholder(shape=shape, name=k)
+
       self._flattened_inputs[i] = v
 
     if self._is_dict:
diff --git a/tensorflow/python/keras/model_subclassing_test.py b/tensorflow/python/keras/model_subclassing_test.py
index 620275e50f..cf64e00d20 100644
--- a/tensorflow/python/keras/model_subclassing_test.py
+++ b/tensorflow/python/keras/model_subclassing_test.py
@@ -28,6 +28,8 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import init_ops
@@ -408,6 +410,158 @@ class ModelSubclassingTest(test.TestCase):
     x2 = array_ops.ones((num_samples, input_dim))
     model([x1, x2])
 
+  def test_summary(self):
+
+    class ToString(object):
+
+      def __init__(self):
+        self.contents = ''
+
+      def __call__(self, msg):
+        self.contents += msg + '\n'
+
+    # Single-io
+    model = SimpleTestModel(num_classes=4, use_bn=True, use_dp=True)
+    model._set_inputs(np.ones((3, 4)))  # need to build model first
+    print_fn = ToString()
+    model.summary(print_fn=print_fn)
+    self.assertTrue('Trainable params: 356' in print_fn.contents)
+
+    # Multi-io
+    model = MultiIOTestModel(num_classes=(5, 6), use_bn=True, use_dp=True)
+    model._set_inputs([np.ones((3, 4)),
+                       np.ones((3, 4))])  # need to build model first
+    print_fn = ToString()
+    model.summary(print_fn=print_fn)
+    self.assertTrue('Trainable params: 587' in print_fn.contents)
+
+  def test_no_dependency(self):
+    class Foo(keras.Model):
+
+      def __init__(self):
+        super(Foo, self).__init__()
+        self.isdep = keras.layers.Dense(1)
+        self.notdep = data_structures.NoDependency(keras.layers.Dense(2))
+        self.notdep_var = data_structures.NoDependency(
+            resource_variable_ops.ResourceVariable(1., name='notdep_var'))
+
+    m = Foo()
+    self.assertEqual([m.isdep, m.notdep], m.layers)
+    self.assertEqual(1, len(m._checkpoint_dependencies))
+    self.assertIs(m.isdep, m._checkpoint_dependencies[0].ref)
+    self.assertEqual('notdep_var:0', m.notdep_var.name)
+
+  def test_extra_variable(self):
+
+    class ExtraVar(keras.Model):
+
+      def __init__(self):
+        super(ExtraVar, self).__init__()
+        self.dense = keras.layers.Dense(1)
+        self.var = resource_variable_ops.ResourceVariable(1.)
+        self.not_trainable_var = resource_variable_ops.ResourceVariable(
+            2., trainable=False)
+
+      def call(self, inputs):
+        return self.dense(inputs + self.var)
+
+    m = ExtraVar()
+    self.assertTrue(m.trainable)
+    self.assertEqual([m.dense], m.layers)
+    self.assertEqual([m.var, m.not_trainable_var], m.variables)
+    self.assertEqual([m.var], m.trainable_variables)
+    self.assertEqual([m.not_trainable_var], m.non_trainable_variables)
+    m.trainable = False
+    self.assertEqual([m.var, m.not_trainable_var], m.variables)
+    self.assertEqual([], m.trainable_variables)
+    self.assertEqual([m.var, m.not_trainable_var], m.non_trainable_variables)
+    m.trainable = True
+
+    m(array_ops.ones([1, 1]))
+
+    self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.variables)
+    self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.weights)
+
+    self.assertEqual([m.dense.kernel, m.dense.bias, m.var, m.not_trainable_var],
+                     m.variables)
+    self.assertEqual([m.dense.kernel, m.dense.bias, m.var],
+                     m.trainable_variables)
+    self.assertEqual([m.not_trainable_var], m.non_trainable_variables)
+
+    m.dense.trainable = False
+    self.assertEqual(
+        [m.var, m.dense.kernel, m.dense.bias, m.not_trainable_var],
+        m.variables)
+    self.assertEqual([m.var], m.trainable_variables)
+    self.assertEqual([m.dense.kernel, m.dense.bias, m.not_trainable_var],
+                     m.non_trainable_variables)
+
+  def test_add_weight_in_model(self):
+
+    class MyModel(keras.Model):
+
+      def __init__(self):
+        super(MyModel, self).__init__()
+        self.b = self.add_weight('bias', (10,))
+        self.c = self.add_weight('bias2', (10,), trainable=False)
+
+      def call(self, inputs):
+        return inputs + self.b + self.c
+
+    x = ops.convert_to_tensor(np.ones((10, 10), 'float32'))
+    model = MyModel()
+    model(x)
+    self.assertEqual(1, len(model.trainable_weights))
+    self.assertEqual(1, len(model.non_trainable_weights))
+    self.assertEqual(2, len(model.weights))
+
+    class MyModelCustomBuild(keras.Model):
+
+      def build(self, input_shape):
+        self.b = self.add_weight('bias', (10,))
+        self.c = self.add_weight('bias2', (10,), trainable=False)
+
+      def call(self, inputs):
+        return inputs + self.b + self.c
+
+    x = ops.convert_to_tensor(np.ones((10, 10), 'float32'))
+    model = MyModelCustomBuild()
+    model(x)
+    self.assertEqual(1, len(model.trainable_weights))
+    self.assertEqual(1, len(model.non_trainable_weights))
+    self.assertEqual(2, len(model.weights))
+
+  def test_add_update_in_model(self):
+
+    class MyModel(keras.Model):
+
+      def __init__(self):
+        super(MyModel, self).__init__()
+        self.b = self.add_weight('bias', (10,))
+        self.c = self.add_weight('bias2', (10,))
+
+      def call(self, inputs):
+        # Unconditional
+        self.add_update(self.b.assign(self.b * 2))
+        # Conditional
+        self.add_update(self.c.assign(inputs[1, :]), inputs)
+        return inputs + self.b + self.c
+
+    x = ops.convert_to_tensor(np.ones((10, 10), 'float32'))
+    model = MyModel()
+    model(x)
+
+    if context.executing_eagerly():
+      self.assertEqual(0, len(model.updates))
+    else:
+      self.assertEqual(2, len(model.updates))
+      self.assertEqual(1, len(model.get_updates_for(None)))
+      self.assertEqual(1, len(model.get_updates_for(x)))
+
+
+@keras_parameterized.run_all_keras_modes
+class ModelSubclassCompiledTest(keras_parameterized.TestCase):
+
   def test_single_io_workflow_with_np_arrays(self):
     num_classes = 2
     num_samples = 100
@@ -419,7 +573,8 @@ class ModelSubclassingTest(test.TestCase):
     model.compile(
         loss='mse',
         optimizer=RMSPropOptimizer(learning_rate=0.001),
-        metrics=['acc', keras.metrics.CategoricalAccuracy()])
+        metrics=['acc', keras.metrics.CategoricalAccuracy()],
+        run_eagerly=testing_utils.should_run_eagerly())
 
     x = np.ones((num_samples, input_dim))
     y = np.zeros((num_samples, num_classes))
@@ -437,7 +592,8 @@ class ModelSubclassingTest(test.TestCase):
                              use_bn=True)
     model.compile(loss='mse',
                   optimizer=RMSPropOptimizer(learning_rate=0.001),
-                  metrics=['acc'])
+                  metrics=['acc'],
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     x1 = np.ones((num_samples, input_dim))
     x2 = np.ones((num_samples, input_dim))
@@ -454,7 +610,9 @@ class ModelSubclassingTest(test.TestCase):
 
     with self.cached_session():
       model = SimpleTestModel(num_classes=num_classes, use_dp=True, use_bn=True)
-      model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+      model.compile(
+          loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+          run_eagerly=testing_utils.should_run_eagerly())
 
       x = np.ones((num_samples, input_dim), dtype=np.float32)
       y = np.zeros((num_samples, num_classes), dtype=np.float32)
@@ -484,7 +642,9 @@ class ModelSubclassingTest(test.TestCase):
     self.assertEqual(model.built, False)
     self.assertEqual(len(model.weights), 0)
 
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch([x1, x2], [y1, y2])
 
     self.assertEqual(model.built, True)
@@ -514,7 +674,9 @@ class ModelSubclassingTest(test.TestCase):
     y = np.ones((num_samples, input_dim))
 
     model = BNNet()
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     y_ref = model.predict(x)
 
     model.train_on_batch(x, y)
@@ -544,7 +706,9 @@ class ModelSubclassingTest(test.TestCase):
     x = np.ones((num_samples, input_dim))
     y = model.predict(x)
     self.assertEqual(np.sum(y), np.sum(x))
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     loss = model.train_on_batch(x, y)
     self.assertGreater(loss, 0.1)
 
@@ -562,7 +726,9 @@ class ModelSubclassingTest(test.TestCase):
     y2 = np.zeros((num_samples, num_classes[1]))
 
     model = MultiIOTestModel(num_classes=num_classes, use_bn=True)
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0)
     model.fit({'input_1': x1, 'input_2': x2},
               {'output_1': y1, 'output_2': y2},
@@ -571,7 +737,9 @@ class ModelSubclassingTest(test.TestCase):
               validation_data=([x1, x2], [y1, y2]))
 
     model = MultiIOTestModel(num_classes=num_classes, use_bn=True)
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     model.train_on_batch([x1, x2], [y1, y2])
     model.train_on_batch({'input_1': x1, 'input_2': x2},
                          {'output_1': y1, 'output_2': y2})
@@ -589,7 +757,9 @@ class ModelSubclassingTest(test.TestCase):
     y2 = np.zeros((num_samples, num_classes[1]))
 
     model = MultiIOTestModel(num_classes=num_classes, use_bn=True)
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     model.evaluate([x1, x2], [y1, y2])
     model.test_on_batch([x1, x2], [y1, y2])
 
@@ -611,7 +781,9 @@ class ModelSubclassingTest(test.TestCase):
     y2 = np.zeros((num_samples, num_classes[1]))
 
     model = MultiIOTestModel(num_classes=num_classes, use_bn=True)
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0)
     y_ref_1, y_ref_2 = model.predict([x1, x2])
 
@@ -640,31 +812,6 @@ class ModelSubclassingTest(test.TestCase):
       self.assertAllClose(y_ref_1, y1, atol=1e-5)
       self.assertAllClose(y_ref_2, y2, atol=1e-5)
 
-  def test_summary(self):
-
-    class ToString(object):
-
-      def __init__(self):
-        self.contents = ''
-
-      def __call__(self, msg):
-        self.contents += msg + '\n'
-
-    # Single-io
-    model = SimpleTestModel(num_classes=4, use_bn=True, use_dp=True)
-    model._set_inputs(np.ones((3, 4)))  # need to build model first
-    print_fn = ToString()
-    model.summary(print_fn=print_fn)
-    self.assertTrue('Trainable params: 356' in print_fn.contents)
-
-    # Multi-io
-    model = MultiIOTestModel(num_classes=(5, 6), use_bn=True, use_dp=True)
-    model._set_inputs([np.ones((3, 4)),
-                       np.ones((3, 4))])  # need to build model first
-    print_fn = ToString()
-    model.summary(print_fn=print_fn)
-    self.assertTrue('Trainable params: 587' in print_fn.contents)
-
   def test_subclass_nested_in_subclass(self):
     num_classes = 2
     num_samples = 100
@@ -673,7 +820,8 @@ class ModelSubclassingTest(test.TestCase):
     model = NestedTestModel1(num_classes=num_classes)
     model.compile(loss='mse',
                   optimizer=RMSPropOptimizer(learning_rate=0.001),
-                  metrics=['acc'])
+                  metrics=['acc'],
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     x = np.ones((num_samples, input_dim))
     y = np.zeros((num_samples, num_classes))
@@ -695,7 +843,8 @@ class ModelSubclassingTest(test.TestCase):
     model = NestedTestModel2(num_classes=num_classes)
     model.compile(loss='mse',
                   optimizer=RMSPropOptimizer(learning_rate=0.001),
-                  metrics=['acc'])
+                  metrics=['acc'],
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     x = np.ones((num_samples, input_dim))
     y = np.zeros((num_samples, num_classes))
@@ -717,7 +866,8 @@ class ModelSubclassingTest(test.TestCase):
     model = get_nested_model_3(input_dim=input_dim, num_classes=num_classes)
     model.compile(loss='mse',
                   optimizer=RMSPropOptimizer(learning_rate=0.001),
-                  metrics=['acc'])
+                  metrics=['acc'],
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     x = np.ones((num_samples, input_dim))
     y = np.zeros((num_samples, num_classes))
@@ -750,7 +900,8 @@ class ModelSubclassingTest(test.TestCase):
     model = keras.Sequential([Inner()])
     model.compile(loss='mse',
                   optimizer=RMSPropOptimizer(learning_rate=0.001),
-                  metrics=['acc'])
+                  metrics=['acc'],
+                  run_eagerly=testing_utils.should_run_eagerly())
 
     x = np.ones((num_samples, input_dim))
     y = np.zeros((num_samples, num_classes))
@@ -786,134 +937,12 @@ class ModelSubclassingTest(test.TestCase):
     x = np.ones((10, 10))
     y = model.predict(x)
     self.assertEqual(np.sum(y), np.sum(x))
-    model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001),
+        run_eagerly=testing_utils.should_run_eagerly())
     loss = model.train_on_batch(x, y)
     self.assertGreater(loss, 0.1)
 
-  def test_no_dependency(self):
-    class Foo(keras.Model):
-
-      def __init__(self):
-        super(Foo, self).__init__()
-        self.isdep = keras.layers.Dense(1)
-        self.notdep = data_structures.NoDependency(keras.layers.Dense(2))
-        self.notdep_var = data_structures.NoDependency(
-            resource_variable_ops.ResourceVariable(1., name='notdep_var'))
-
-    m = Foo()
-    self.assertEqual([m.isdep, m.notdep], m.layers)
-    self.assertEqual(1, len(m._checkpoint_dependencies))
-    self.assertIs(m.isdep, m._checkpoint_dependencies[0].ref)
-    self.assertEqual('notdep_var:0', m.notdep_var.name)
-
-  def test_extra_variable(self):
-
-    class ExtraVar(keras.Model):
-
-      def __init__(self):
-        super(ExtraVar, self).__init__()
-        self.dense = keras.layers.Dense(1)
-        self.var = resource_variable_ops.ResourceVariable(1.)
-        self.not_trainable_var = resource_variable_ops.ResourceVariable(
-            2., trainable=False)
-
-      def call(self, inputs):
-        return self.dense(inputs + self.var)
-
-    m = ExtraVar()
-    self.assertTrue(m.trainable)
-    self.assertEqual([m.dense], m.layers)
-    self.assertEqual([m.var, m.not_trainable_var], m.variables)
-    self.assertEqual([m.var], m.trainable_variables)
-    self.assertEqual([m.not_trainable_var], m.non_trainable_variables)
-    m.trainable = False
-    self.assertEqual([m.var, m.not_trainable_var], m.variables)
-    self.assertEqual([], m.trainable_variables)
-    self.assertEqual([m.var, m.not_trainable_var], m.non_trainable_variables)
-    m.trainable = True
-
-    m(array_ops.ones([1, 1]))
-
-    self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.variables)
-    self.assertEqual([m.dense.kernel, m.dense.bias], m.dense.weights)
-
-    self.assertEqual([m.dense.kernel, m.dense.bias, m.var, m.not_trainable_var],
-                     m.variables)
-    self.assertEqual([m.dense.kernel, m.dense.bias, m.var],
-                     m.trainable_variables)
-    self.assertEqual([m.not_trainable_var], m.non_trainable_variables)
-
-    m.dense.trainable = False
-    self.assertEqual(
-        [m.var, m.dense.kernel, m.dense.bias, m.not_trainable_var],
-        m.variables)
-    self.assertEqual([m.var], m.trainable_variables)
-    self.assertEqual([m.dense.kernel, m.dense.bias, m.not_trainable_var],
-                     m.non_trainable_variables)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_add_weight_in_model(self):
-
-    class MyModel(keras.Model):
-
-      def __init__(self):
-        super(MyModel, self).__init__()
-        self.b = self.add_weight('bias', (10,))
-        self.c = self.add_weight('bias2', (10,), trainable=False)
-
-      def call(self, inputs):
-        return inputs + self.b + self.c
-
-    x = ops.convert_to_tensor(np.ones((10, 10), 'float32'))
-    model = MyModel()
-    model(x)
-    self.assertEqual(1, len(model.trainable_weights))
-    self.assertEqual(1, len(model.non_trainable_weights))
-    self.assertEqual(2, len(model.weights))
-
-    class MyModelCustomBuild(keras.Model):
-
-      def build(self, input_shape):
-        self.b = self.add_weight('bias', (10,))
-        self.c = self.add_weight('bias2', (10,), trainable=False)
-
-      def call(self, inputs):
-        return inputs + self.b + self.c
-
-    x = ops.convert_to_tensor(np.ones((10, 10), 'float32'))
-    model = MyModelCustomBuild()
-    model(x)
-    self.assertEqual(1, len(model.trainable_weights))
-    self.assertEqual(1, len(model.non_trainable_weights))
-    self.assertEqual(2, len(model.weights))
-
-  def test_add_update_in_model(self):
-
-    class MyModel(keras.Model):
-
-      def __init__(self):
-        super(MyModel, self).__init__()
-        self.b = self.add_weight('bias', (10,))
-        self.c = self.add_weight('bias2', (10,))
-
-      def call(self, inputs):
-        # Unconditional
-        self.add_update(self.b.assign(self.b * 2))
-        # Conditional
-        self.add_update(self.c.assign(inputs[1, :]), inputs)
-        return inputs + self.b + self.c
-
-    x = ops.convert_to_tensor(np.ones((10, 10), 'float32'))
-    model = MyModel()
-    model(x)
-
-    if context.executing_eagerly():
-      self.assertEqual(0, len(model.updates))
-    else:
-      self.assertEqual(2, len(model.updates))
-      self.assertEqual(1, len(model.get_updates_for(None)))
-      self.assertEqual(1, len(model.get_updates_for(x)))
-
 
 class GraphSpecificModelSubclassingTests(test.TestCase):
 
@@ -1083,9 +1112,9 @@ class TrainingMaskingModel(keras.Model):
     return self.dense1(x)
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class CustomCallSignatureTests(test.TestCase):
 
-  @test_util.run_in_graph_and_eager_modes
   def test_no_inputs_in_signature(self):
     model = CustomCallModel()
     first = array_ops.ones([2, 3])
@@ -1099,7 +1128,6 @@ class CustomCallSignatureTests(test.TestCase):
     output = model(first, second=second, training=False)
     self.assertAllClose(expected_output, self.evaluate(output))
 
-  @test_util.run_in_graph_and_eager_modes
   def test_training_args_call_build(self):
     input_dim = 2
 
@@ -1112,7 +1140,6 @@ class CustomCallSignatureTests(test.TestCase):
                                     'has been properly built.'))
     self.assertTrue(model.built, 'Model should be built after calling `build`.')
 
-  @test_util.run_in_graph_and_eager_modes
   def test_training_and_mask_args_call_build(self):
     input_dim = 2
 
@@ -1125,7 +1152,6 @@ class CustomCallSignatureTests(test.TestCase):
                                     'has been properly built.'))
     self.assertTrue(model.built, 'Model should be built after calling `build`.')
 
-  @test_util.run_in_graph_and_eager_modes
   def test_custom_call_kwargs_and_build(self):
     first_input_shape = (2, 3)
     second_input_shape = (2, 5)
@@ -1138,7 +1164,6 @@ class CustomCallSignatureTests(test.TestCase):
         ValueError, 'cannot build your model if it has positional'):
       model.build(input_shape=[first_input_shape, second_input_shape])
 
-  @test_util.run_in_graph_and_eager_modes
   def test_inputs_in_signature(self):
 
     class HasInputsAndOtherPositional(keras.Model):
@@ -1155,7 +1180,6 @@ class CustomCallSignatureTests(test.TestCase):
       x1, x2 = keras.Input((1, 1)), keras.Input((1, 1))
       model(x1, x2)
 
-  @test_util.run_in_graph_and_eager_modes
   def test_kwargs_in_signature(self):
 
     class HasKwargs(keras.Model):
@@ -1164,12 +1188,11 @@ class CustomCallSignatureTests(test.TestCase):
         return x
 
     model = HasKwargs()
-    arg = array_ops.ones([])
+    arg = array_ops.ones([1])
     model(arg, a=3)
     if not context.executing_eagerly():
       self.assertEqual(len(model.inputs), 1)
 
-  @test_util.run_in_graph_and_eager_modes
   def test_args_in_signature(self):
 
     class HasArgs(keras.Model):
@@ -1189,23 +1212,26 @@ class CustomCallSignatureTests(test.TestCase):
 
     class HasArgs(keras.Model):
 
-      def call(self, x, training=True, *args, **kwargs):
+      def call(self, x, training=True, *args, **kwargs):  # pylint:disable=keyword-arg-before-vararg
         return x
 
-    with context.graph_mode():
-      model = HasArgs()
-      x1, x2, x3 = keras.Input((1, 1)), keras.Input((1, 1)), keras.Input((1, 1))
-      with self.assertRaisesRegexp(
-          TypeError, 'may not accept both positional arguments and '):
-        model(x1, x2, x3, a=3)
+    model = HasArgs()
+    x1, x2, x3 = keras.Input((1, 1)), keras.Input((1, 1)), keras.Input((1, 1))
+    with self.assertRaisesRegexp(
+        TypeError, 'may not accept both positional arguments and '):
+      model(x1, x2, x3, a=3)
 
+  @test_util.assert_no_new_tensors
+  @test_util.assert_no_garbage_created
   def test_training_no_default(self):
+    if context.executing_eagerly():
+      self.skipTest('b/120997007')
 
-    with context.graph_mode():
-      model = TrainingNoDefaultModel()
-      arg = array_ops.ones([1, 1])
-      model(arg, True)
-      self.assertEqual(len(model.inputs), 1)
+    model = TrainingNoDefaultModel()
+
+    arg = array_ops.ones([1, 1])
+    model(arg, True)
+    self.assertEqual(len(model.inputs), 1)
 
   def test_training_no_default_with_positional(self):
 
@@ -1214,11 +1240,10 @@ class CustomCallSignatureTests(test.TestCase):
       def call(self, x, training, positional):
         return x
 
-    with context.graph_mode():
-      model = TrainingNoDefaultWithPositional()
-      x1, x2, x3 = keras.Input((1, 1)), keras.Input((1, 1)), keras.Input((1, 1))
-      with self.assertRaisesRegexp(TypeError, 'after a non-input'):
-        model(x1, x2, x3)
+    model = TrainingNoDefaultWithPositional()
+    x1, x2, x3 = keras.Input((1, 1)), keras.Input((1, 1)), keras.Input((1, 1))
+    with self.assertRaisesRegexp(TypeError, 'after a non-input'):
+      model(x1, x2, x3)
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From ef255d358b71bddc6196db7343f3bd7546d40ebe Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 14 Dec 2018 08:45:32 -0800
Subject: [PATCH 594/873] Fixing tf nightly build.

PiperOrigin-RevId: 225548043
---
 tensorflow/python/keras/layers/gru_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/keras/layers/gru_test.py b/tensorflow/python/keras/layers/gru_test.py
index 61c502c3b6..d05e7eeb63 100644
--- a/tensorflow/python/keras/layers/gru_test.py
+++ b/tensorflow/python/keras/layers/gru_test.py
@@ -98,7 +98,7 @@ class GRULayerTest(keras_parameterized.TestCase):
                                  reset_after=True)
     output = gru_layer(inputs)
     gru_model = keras.models.Model(inputs, output)
-    gru_model.compile('rmsprop', 'mse',
+    gru_model.compile(RMSPropOptimizer(0.01), 'mse',
                       run_eagerly=testing_utils.should_run_eagerly())
     gru_model.fit(x_train, y_train)
     gru_model.predict(x_train)
-- 
GitLab


From d19a8c92a23db3cc5c2ae74a8443b16d6d48eb13 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Fri, 14 Dec 2018 08:51:16 -0800
Subject: [PATCH 595/873] Add test for xla.estimator_model_fn

PiperOrigin-RevId: 225548792
---
 tensorflow/contrib/compiler/BUILD       |   8 +-
 tensorflow/contrib/compiler/xla_test.py | 340 ++++++++++++++++++++++++
 2 files changed, 346 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index e4566437c6..0897728272 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -75,17 +75,21 @@ tf_py_test(
     srcs = ["xla_test.py"],
     additional_deps = [
         ":xla",
-        "@six_archive//:six",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/contrib/tpu:tpu_estimator",
+        "//tensorflow/contrib/tpu:tpu_lib",
+        "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:control_flow_util",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
-        "//tensorflow/contrib/tpu:tpu_lib",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:summary",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
+        "//tensorflow/python/data/ops:dataset_ops",
     ],
     tags = ["no_pip"],
+    xla_enabled = True,
 )
diff --git a/tensorflow/contrib/compiler/xla_test.py b/tensorflow/contrib/compiler/xla_test.py
index 3b49755afc..a85b2dd155 100644
--- a/tensorflow/contrib/compiler/xla_test.py
+++ b/tensorflow/contrib/compiler/xla_test.py
@@ -18,11 +18,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import re
+from absl.testing import parameterized
+
 from tensorflow.contrib.compiler import xla
+from tensorflow.contrib.tpu.python.tpu import tpu_estimator
 from tensorflow.contrib.tpu.python.tpu import tpu_feed
+from tensorflow.contrib.training.python.training import hparam
 from tensorflow.python import summary
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import logging_ops
@@ -30,6 +38,14 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
+from tensorflow.python.training import training
+
+
+_TRAIN = model_fn_lib.ModeKeys.TRAIN
+_EVAL = model_fn_lib.ModeKeys.EVAL
+_EXPECTED_LOSS = 1
+_EXPECTED_FEATURE = 2
+_EXPECTED_LABEL = 3
 
 
 class XLACompileContextTest(test.TestCase):
@@ -252,5 +268,329 @@ class CheckFunctionArgumentCountTest(test.TestCase):
                      xla.check_function_argument_count(func, 0, queue))
 
 
+def _test_train_model_fn(features, labels, mode, params):
+  """A dummy model_fn for testing purpose."""
+  del features, labels, params
+  loss = constant_op.constant(_EXPECTED_LOSS)
+  return model_fn_lib.EstimatorSpec(
+      mode=mode, loss=loss, train_op=array_ops.identity(loss))
+
+
+@xla.estimator_model_fn
+def decorated_model_fn(features, labels, mode, params):
+  return _test_train_model_fn(features, labels, mode, params)
+
+
+def make_dummy_features_labels():
+  # XLA CPU/GPU backend doesn't support guaranteed constant, thus use dataset
+  # container to work around.
+  features_dataset = dataset_ops.Dataset.from_tensors(
+      constant_op.constant(_EXPECTED_FEATURE)).repeat(10)
+  features_op = features_dataset.make_one_shot_iterator().get_next()
+  labels_dataset = dataset_ops.Dataset.from_tensors(
+      constant_op.constant(_EXPECTED_LABEL)).repeat(10)
+  labels_op = labels_dataset.make_one_shot_iterator().get_next()
+  return features_op, labels_op
+
+
+class XlaDecoratorTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('test_use_as_decorator', decorated_model_fn, None),
+      ('test_use_as_function', xla.estimator_model_fn(_test_train_model_fn),
+       None),
+      ('test_use_tpu_false_hparams', decorated_model_fn,
+       hparam.HParams(use_tpu=False)),
+      ('test_use_tpu_false_dict_params', decorated_model_fn, {
+          'use_tpu': False
+      }),
+  )
+  def test_compile(self, model_fn, params):
+    """Calls model_fn and verifies it is compiled."""
+    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      mock_xla_compile.return_value = [loss]
+
+      features, labels = make_dummy_features_labels()
+      estimator_spec = model_fn(
+          features=features, labels=labels, mode=_TRAIN, params=params or {})
+
+      mock_xla_compile.assert_called_once()
+      self.assertEqual(estimator_spec.mode, _TRAIN)
+
+      with self.test_session() as sess:
+        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
+        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))
+
+  @parameterized.named_parameters(
+      ('test_use_tpu_true_hparams', decorated_model_fn,
+       hparam.HParams(use_tpu=True)),
+      ('test_use_tpu_true_dict_params', decorated_model_fn, {
+          'use_tpu': True
+      }),
+  )
+  def test_not_compile(self, model_fn, params):
+    """Calls model_fn and verifies it is NOT compiled."""
+    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      mock_xla_compile.return_value = [loss]
+
+      features, labels = make_dummy_features_labels()
+      estimator_spec = model_fn(
+          features=features, labels=labels, mode=_TRAIN, params=params or {})
+
+      mock_xla_compile.assert_not_called()
+      self.assertEqual(estimator_spec.mode, _TRAIN)
+
+      with self.test_session() as sess:
+        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
+        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))
+
+  def test_model_with_summary(self):
+    """Tests that summary ops are disabled."""
+
+    @xla.estimator_model_fn
+    def model_fn_with_summary(features, labels, mode, params):
+      del features, labels, params
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      summary.scalar('loss_scalar_summary', loss)
+      summary.histogram('loss_histogram_summary', loss)
+      summary.image('loss_image_summary', loss)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode, loss=loss, train_op=array_ops.identity(loss))
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = model_fn_with_summary(
+        features=features, labels=labels, mode=_TRAIN, params={})
+
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+
+def _test_eval_metric_fn(eval_tensor_1, eval_tensor_2):
+  return {
+      'metric_1': (eval_tensor_1, eval_tensor_1),
+      'metric_2': (eval_tensor_2, eval_tensor_2),
+  }
+
+
+class XlaDecoratorEvaluationTest(test.TestCase):
+
+  def _verify_evaluation_result(self, eval_model_fn):
+    features, labels = make_dummy_features_labels()
+    estimator_spec = eval_model_fn(
+        features=features, labels=labels, mode=_EVAL, params={})
+
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_1'][0]),
+          _EXPECTED_FEATURE + _EXPECTED_LABEL)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_1'][1]),
+          _EXPECTED_FEATURE + _EXPECTED_LABEL)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_2'][0]),
+          _EXPECTED_FEATURE - _EXPECTED_LABEL)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_2'][1]),
+          _EXPECTED_FEATURE - _EXPECTED_LABEL)
+
+  def test_eval_base_estimator_spec_eval_metric_ops_disallowed(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn_return_estimator_spec(features, labels, mode, params):
+      del features, labels, params
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=loss,
+          eval_metric_ops={
+              'metric': (array_ops.identity(loss), control_flow_ops.no_op())
+          })
+
+    with self.assertRaisesRegexp(
+        ValueError, 'EstimatorSpec.eval_metric_ops is not supported with XLA '
+        'compilation. Please use TPUEstimatorSpec.eval_metrics instead.'):
+      self._verify_evaluation_result(eval_model_fn_return_estimator_spec)
+
+  def test_eval_base_estimator_spec_no_eval_metric_ops(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn_no_eval_metric_ops(features, labels, mode, params):
+      del features, labels, params
+      return model_fn_lib.EstimatorSpec(
+          mode=mode, loss=constant_op.constant(_EXPECTED_LOSS))
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = eval_model_fn_no_eval_metric_ops(
+        features=features, labels=labels, mode=_EVAL, params={})
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+  def test_eval_no_eval_metrics(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn_no_eval_metrics(features, labels, mode, params):
+      del features, labels, params
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode, loss=constant_op.constant(_EXPECTED_LOSS))
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = eval_model_fn_no_eval_metrics(
+        features=features, labels=labels, mode=_EVAL, params={})
+
+    self.assertEqual(estimator_spec.eval_metric_ops, {})
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+  def test_eval_fn_missing_input_tensor(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors_dict = {
+          'eval_tensor_1': features + labels,
+      }
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn,
+                        dummy_eval_metric_fn_tensors_dict))
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        re.escape("Arguments ['eval_tensor_2'] are needed by metric_fn (first "
+                  'element of TPUEstimatorSpec.eval_metrics) but they are not '
+                  'provided by evaluation tensors (second element of '
+                  'TPUEstimatorSpec.eval_metrics).')):
+      self._verify_evaluation_result(eval_model_fn)
+
+  def test_eval_fn_extraneous_input_tensor(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors_dict = {
+          'eval_tensor_1': features + labels,
+          'eval_tensor_2': features - labels,
+          'extra_tensor': features * 2 - labels,
+      }
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn,
+                        dummy_eval_metric_fn_tensors_dict))
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        re.escape("Arguments ['extra_tensor'] are provided by evaluation "
+                  'tensors (second element of TPUEstimatorSpec.eval_metrics) '
+                  'but they are not needed by metric_fn (first element of '
+                  'TPUEstimatorSpec.eval_metrics).')):
+      self._verify_evaluation_result(eval_model_fn)
+
+  def test_eval_tensors_as_list(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors = [features + labels, features - labels]
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn, dummy_eval_metric_fn_tensors))
+
+    self._verify_evaluation_result(eval_model_fn)
+
+  def test_eval_tensors_as_dict(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors_dict = {
+          'eval_tensor_1': features + labels,
+          'eval_tensor_2': features - labels,
+      }
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn,
+                        dummy_eval_metric_fn_tensors_dict))
+
+    self._verify_evaluation_result(eval_model_fn)
+
+  def test_model_with_summary(self):
+    """Tests that summary ops are disabled."""
+
+    @xla.estimator_model_fn
+    def model_fn_with_summary(features, labels, mode, params):
+      del features, labels, params
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      summary.scalar('loss_scalar_summary', loss)
+      summary.histogram('loss_histogram_summary', loss)
+      summary.image('loss_image_summary', loss)
+      return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss)
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = model_fn_with_summary(
+        features=features, labels=labels, mode=_EVAL, params={})
+
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+
+class XlaDecoratorScaffoldTest(test.TestCase, parameterized.TestCase):
+
+  def _make_scaffold_fn(self, mode):
+
+    def _scaffold_fn_on_cpu():
+      scaffold = training.Scaffold()
+      self.assertNotIn(mode, self.is_scaffold_fn_called)
+      self.is_scaffold_fn_called[mode] = True
+      return scaffold
+
+    return _scaffold_fn_on_cpu
+
+  def test_scaffold_fn_return_none(self):
+
+    @xla.estimator_model_fn
+    def model_fn(features, labels, mode, params):
+      del features, labels, params
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          train_op=control_flow_ops.no_op(),
+          scaffold_fn=lambda: None)
+
+    features, labels = make_dummy_features_labels()
+    with self.assertRaisesRegexp(
+        ValueError,
+        'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed'):
+      model_fn(features=features, labels=labels, mode=_TRAIN, params={})
+
+  @parameterized.named_parameters(
+      ('train_mode', _TRAIN),
+      ('eval_mode', _EVAL),
+      # TODO(ycao): Add predict_mode test after PREDICT mode is implemented.
+  )
+  def test_scaffold_fn_in_mode(self, mode):
+
+    @xla.estimator_model_fn
+    def model_fn(features, labels, mode, params):
+      del features, labels, params
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          train_op=control_flow_ops.no_op(),
+          scaffold_fn=self._make_scaffold_fn(mode))
+
+    features, labels = make_dummy_features_labels()
+
+    self.is_scaffold_fn_called = {}
+    model_fn(features=features, labels=labels, mode=mode, params={})
+    self.assertTrue(self.is_scaffold_fn_called[mode])
+
+
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 5aa0b0de0ca2a931f1af924e3a0ce5b6402b3e9d Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Fri, 14 Dec 2018 08:58:23 -0800
Subject: [PATCH 596/873] Explicitly declare MKLDNN internal headers as source
 files in MKLDNN build file.

Fixes Bazel build error when building TF as a submodule with MKLDNN enabled; by explicitly declaring these files as inputs Bazel can find them in the build sandbox.

PiperOrigin-RevId: 225549870
---
 third_party/mkl_dnn/mkldnn.BUILD | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/third_party/mkl_dnn/mkldnn.BUILD b/third_party/mkl_dnn/mkldnn.BUILD
index d80c7135d6..bd842b87f8 100644
--- a/third_party/mkl_dnn/mkldnn.BUILD
+++ b/third_party/mkl_dnn/mkldnn.BUILD
@@ -17,8 +17,12 @@ cc_library(
     name = "mkl_dnn",
     srcs = glob([
         "src/common/*.cpp",
+        "src/common/*.hpp",
         "src/cpu/*.cpp",
+        "src/cpu/*.hpp",
         "src/cpu/gemm/*.cpp",
+        "src/cpu/gemm/*.hpp",
+        "src/cpu/xbyak/*.h",
     ]),
     hdrs = glob(["include/*"]),
     copts = [
@@ -68,8 +72,12 @@ cc_library(
     name = "mkldnn_single_threaded",
     srcs = glob([
         "src/common/*.cpp",
+        "src/common/*.hpp",
         "src/cpu/*.cpp",
+        "src/cpu/*.hpp",
         "src/cpu/gemm/*.cpp",
+        "src/cpu/gemm/*.hpp",
+        "src/cpu/xbyak/*.h",
     ]),
     hdrs = glob(["include/*"]),
     copts = [
-- 
GitLab


From b02f50c0ee8ca37f05cc21fe1f8e9c2d9b65d6e4 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 14 Dec 2018 09:01:18 -0800
Subject: [PATCH 597/873] Always validate model with VerifyModelBuilder in
 VerifyAndBuildFromFile.

PiperOrigin-RevId: 225550251
---
 tensorflow/lite/model.cc | 23 +++++++++++++++++------
 tensorflow/lite/model.h  | 23 +++++++++++++----------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/tensorflow/lite/model.cc b/tensorflow/lite/model.cc
index 831c81aa00..bfadf2d6a0 100644
--- a/tensorflow/lite/model.cc
+++ b/tensorflow/lite/model.cc
@@ -91,16 +91,25 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromFile(
 }
 
 std::unique_ptr<FlatBufferModel> FlatBufferModel::VerifyAndBuildFromFile(
-    const char* filename, TfLiteVerifier* verifier,
+    const char* filename, TfLiteVerifier* extra_verifier,
     ErrorReporter* error_reporter) {
   error_reporter = ValidateErrorReporter(error_reporter);
 
   std::unique_ptr<FlatBufferModel> model;
   auto allocation = GetAllocationFromFile(filename, /*mmap_file=*/true,
                                           error_reporter, /*use_nnapi=*/true);
-  if (verifier &&
-      !verifier->Verify(static_cast<const char*>(allocation->base()),
-                        allocation->bytes(), error_reporter)) {
+
+  flatbuffers::Verifier base_verifier(
+      reinterpret_cast<const uint8_t*>(allocation->base()),
+      allocation->bytes());
+  if (!VerifyModelBuffer(base_verifier)) {
+    error_reporter->Report("The model is not a valid Flatbuffer file");
+    return nullptr;
+  }
+
+  if (extra_verifier &&
+      !extra_verifier->Verify(static_cast<const char*>(allocation->base()),
+                              allocation->bytes(), error_reporter)) {
     return model;
   }
   model.reset(new FlatBufferModel(allocation.release(), error_reporter));
@@ -122,17 +131,19 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromBuffer(
 }
 
 std::unique_ptr<FlatBufferModel> FlatBufferModel::VerifyAndBuildFromBuffer(
-    const char* buffer, size_t buffer_size, TfLiteVerifier* verifier,
+    const char* buffer, size_t buffer_size, TfLiteVerifier* extra_verifier,
     ErrorReporter* error_reporter) {
   error_reporter = ValidateErrorReporter(error_reporter);
 
   flatbuffers::Verifier base_verifier(reinterpret_cast<const uint8_t*>(buffer),
                                       buffer_size);
   if (!VerifyModelBuffer(base_verifier)) {
+    error_reporter->Report("The model is not a valid Flatbuffer buffer");
     return nullptr;
   }
 
-  if (verifier && !verifier->Verify(buffer, buffer_size, error_reporter)) {
+  if (extra_verifier &&
+      !extra_verifier->Verify(buffer, buffer_size, error_reporter)) {
     return nullptr;
   }
 
diff --git a/tensorflow/lite/model.h b/tensorflow/lite/model.h
index 58c9767849..bd0f4baef6 100644
--- a/tensorflow/lite/model.h
+++ b/tensorflow/lite/model.h
@@ -68,11 +68,15 @@ class FlatBufferModel {
 
   // Verifies whether the content of the file is legit, then builds a model
   // based on the file.
+  // The extra_verifier argument is an additional optional verifier for the file
+  // contents. By default, we always check with tflite::VerifyModelBuffer. If
+  // extra_verifier is supplied, the file contents is also checked against the
+  // extra_verifier after the check against tflite::VerifyModelBuilder.
   // Caller retains ownership of `error_reporter` and must ensure its lifetime
   // is longer than the FlatBufferModel instance.
   // Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> VerifyAndBuildFromFile(
-      const char* filename, TfLiteVerifier* verifier = nullptr,
+      const char* filename, TfLiteVerifier* extra_verifier = nullptr,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
   // Builds a model based on a pre-loaded flatbuffer. The caller retains
@@ -88,18 +92,17 @@ class FlatBufferModel {
 
   // Verifies whether the content of the buffer is legit, then builds a model
   // based on the pre-loaded flatbuffer.
-  // The verifier argument is an additional optional verifier for the buffer. By
-  // default, we always check with tflite::VerifyModelBuffer. If verifier is
-  // supplied, the buffer is checked against the verifier after the check
-  // against tflite::VerifyModelBuilder.
-  // The caller retains ownership of the buffer and should keep it alive until
-  // the returned object is destroyed. Caller retains ownership of
-  // `error_reporter` and must ensure its lifetime is longer than the
-  // FlatBufferModel instance.
+  // The extra_verifier argument is an additional optional verifier for the
+  // buffer. By default, we always check with tflite::VerifyModelBuffer. If
+  // extra_verifier is supplied, the buffer is checked against the
+  // extra_verifier after the check against tflite::VerifyModelBuilder. The
+  // caller retains ownership of the buffer and should keep it alive until the
+  // returned object is destroyed. Caller retains ownership of `error_reporter`
+  // and must ensure its lifetime is longer than the FlatBufferModel instance.
   // Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> VerifyAndBuildFromBuffer(
       const char* buffer, size_t buffer_size,
-      TfLiteVerifier* verifier = nullptr,
+      TfLiteVerifier* extra_verifier = nullptr,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
   // Builds a model directly from a flatbuffer pointer. The caller retains
-- 
GitLab


From fbc072b45d4a64e8fcc13783fa6047144a8ace2d Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 14 Dec 2018 09:31:12 -0800
Subject: [PATCH 598/873] Fix flaky test by increasing sleep timer

PiperOrigin-RevId: 225554476
---
 .../python/data/experimental/kernel_tests/map_defun_op_test.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index 85652bf00f..19830a23bb 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -237,7 +237,7 @@ class MapDefunTest(test_base.DatasetTestBase):
       thread = self.checkedThread(
           self._assert_op_cancelled, args=(sess, map_defun_op))
       thread.start()
-      time.sleep(0.1)
+      time.sleep(0.2)
       sess.close()
       thread.join()
 
-- 
GitLab


From 9b65091ef0fc64152b1f93a680215e3073fb62b9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 09:44:12 -0800
Subject: [PATCH 599/873] Internal change.

PiperOrigin-RevId: 225556417
---
 .../kernel_tests/attention_wrapper_test.py    | 63 +++++++++++++++++++
 .../seq2seq/python/ops/attention_wrapper.py   | 19 +++++-
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
index 922f21b98b..d815f81f84 100644
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py
@@ -35,6 +35,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import rnn
 from tensorflow.python.ops import rnn_cell
 from tensorflow.python.ops import variables
 from tensorflow.python.ops import variable_scope as vs
@@ -992,5 +993,67 @@ class AttentionWrapperTest(test.TestCase):
         expected_final_alignment_history=expected_final_alignment_history,
         name='testMultiAttention')
 
+  def testCustomizedAttention(self):
+    batch_size = 2
+    max_time = 3
+    num_units = 2
+    memory = constant_op.constant([[[1., 1.], [2., 2.], [3., 3.]],
+                                   [[4., 4.], [5., 5.], [6., 6.]]])
+    memory_sequence_length = constant_op.constant([3, 2])
+    attention_mechanism = wrapper.BahdanauAttention(num_units, memory,
+                                                    memory_sequence_length)
+
+    # Sets all returned values to be all ones.
+    def _customized_attention(unused_attention_mechanism, unused_cell_output,
+                              unused_attention_state, unused_attention_layer):
+      """Customized attention.
+
+      Returns:
+        attention: `Tensor` of shape [batch_size, num_units], attention output.
+        alignments: `Tensor` of shape [batch_size, max_time], sigma value for
+          each input memory (prob. function of input keys).
+        next_attention_state: A `Tensor` representing the next state for the
+          attention.
+      """
+      attention = array_ops.ones([batch_size, num_units])
+      alignments = array_ops.ones([batch_size, max_time])
+      next_attention_state = alignments
+      return attention, alignments, next_attention_state
+
+    attention_cell = wrapper.AttentionWrapper(
+        rnn_cell.LSTMCell(2),
+        attention_mechanism,
+        attention_layer_size=None,  # don't use attention layer.
+        output_attention=False,
+        alignment_history=(),
+        attention_fn=_customized_attention,
+        name='attention')
+    self.assertEqual(num_units, attention_cell.output_size)
+
+    initial_state = attention_cell.zero_state(
+        batch_size=2, dtype=dtypes.float32)
+    source_input_emb = array_ops.ones([2, 3, 2])
+    source_input_length = constant_op.constant([3, 2])
+
+    # 'state' is a tuple of
+    # (cell_state, h, attention, alignments, alignment_history, attention_state)
+    output, state = rnn.dynamic_rnn(
+        attention_cell,
+        inputs=source_input_emb,
+        sequence_length=source_input_length,
+        initial_state=initial_state,
+        dtype=dtypes.float32)
+
+    with self.session() as sess:
+      sess.run(variables.global_variables_initializer())
+      output_value, state_value = sess.run([output, state], feed_dict={})
+      self.assertAllEqual(np.array([2, 3, 2]), output_value.shape)
+      self.assertAllClose(np.array([[1., 1.], [1., 1.]]), state_value.attention)
+      self.assertAllClose(
+          np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.alignments)
+      self.assertAllClose(
+          np.array([[1., 1., 1.], [1., 1., 1.]]), state_value.attention_state)
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
index 77e9f848b1..60ec3efffe 100644
--- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py
@@ -1088,7 +1088,8 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
                output_attention=True,
                initial_cell_state=None,
                name=None,
-               attention_layer=None):
+               attention_layer=None,
+               attention_fn=None):
     """Construct the `AttentionWrapper`.
 
     **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in
@@ -1132,7 +1133,9 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
         feed the context and cell output into the attention layer to generate
         attention at each time step. If attention_mechanism is a list,
         attention_layer_size must be a list of the same length. If
-        attention_layer is set, this must be None.
+        attention_layer is set, this must be None. If attention_fn is set,
+        it must guaranteed that the outputs of attention_fn also meet the
+        above requirements.
       alignment_history: Python boolean, whether to store alignment history
         from all time steps in the final output state (currently stored as a
         time major `TensorArray` on which you must call `stack()`).
@@ -1158,6 +1161,12 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
         the context as attention at each time step. If attention_mechanism is a
         list, attention_layer must be a list of the same length. If
         attention_layers_size is set, this must be None.
+      attention_fn: An optional callable function that allows users to provide
+        their own customized attention function, which takes input
+        (attention_mechanism, cell_output, attention_state, attention_layer) and
+        outputs (attention, alignments, next_attention_state). If provided,
+        the attention_layer_size should be the size of the outputs of
+        attention_fn.
 
     Raises:
       TypeError: `attention_layer_size` is not None and (`attention_mechanism`
@@ -1240,6 +1249,10 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
           tensor_shape.dimension_value(attention_mechanism.values.shape[-1])
           for attention_mechanism in attention_mechanisms)
 
+    if attention_fn is None:
+      attention_fn = _compute_attention
+    self._attention_fn = attention_fn
+
     self._cell = cell
     self._attention_mechanisms = attention_mechanisms
     self._cell_input_fn = cell_input_fn
@@ -1443,7 +1456,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell):
     all_attention_states = []
     maybe_all_histories = []
     for i, attention_mechanism in enumerate(self._attention_mechanisms):
-      attention, alignments, next_attention_state = _compute_attention(
+      attention, alignments, next_attention_state = self._attention_fn(
           attention_mechanism, cell_output, previous_attention_state[i],
           self._attention_layers[i] if self._attention_layers else None)
       alignment_history = previous_alignment_history[i].write(
-- 
GitLab


From d21ea6525475df90a6646a321c616264b214a1b8 Mon Sep 17 00:00:00 2001
From: Andy Ly <lyandy@google.com>
Date: Fri, 14 Dec 2018 10:01:11 -0800
Subject: [PATCH 600/873] [Grappler] Don't rewrite
 reduction(inner_function(foo)) to inner_function(opposite_reduction(foo)) if
 reduction is a fetch node.

PiperOrigin-RevId: 225558983
---
 .../optimizers/arithmetic_optimizer.cc        |  3 ++
 .../optimizers/arithmetic_optimizer_test.cc   | 29 +++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
index d35c00f29e..e28f991e2d 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc
@@ -2722,6 +2722,9 @@ class OptimizeMaxOrMinOfMonotonicStage : public ArithmeticOptimizerStage {
 
   Status TrySimplify(NodeDef* reduction_node,
                      string* simplified_node_name) override {
+    if (IsInPreserveSet(*reduction_node)) {
+      return Status::OK();
+    }
     NodeDef* inner_function;
     TF_RETURN_IF_ERROR(GetInputNode(reduction_node->input(0), &inner_function));
     // Optimize only if:
diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
index 35d22898f6..94c59c68c8 100644
--- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc
@@ -3490,6 +3490,35 @@ TEST_F(ArithmeticOptimizerTest,
   VerifyGraphsMatch(item.graph, output, __LINE__);
 }
 
+TEST_F(ArithmeticOptimizerTest,
+       OptimizeMaxOrMinOfMonotonicElementWiseDoNotChangeFetchNodeReduction) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  auto x = ops::Const(s.WithOpName("x"), {2, 3}, {1, 2});
+  Output reshape = ops::Reshape(s.WithOpName("reshape"), x, {-1});
+  Output y = ops::Neg(s.WithOpName("y"), reshape);
+  Output z = ops::Max(s.WithOpName("z"), y, {0});
+
+  GrapplerItem item;
+  item.fetch = {"z"};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch);
+  ASSERT_EQ(1, tensors_expected.size());
+
+  GraphDef output;
+  ArithmeticOptimizer optimizer;
+  EnableOnlyOptimizeMaxOrMinOfMonotonic(&optimizer);
+  OptimizeTwice(&optimizer, &item, &output);
+
+  // Should be a NoOp since we are not allowed to change the output of fetch
+  // nodes.
+  VerifyGraphsMatch(item.graph, output, __LINE__);
+
+  auto tensors = EvaluateNodes(output, item.fetch);
+  ASSERT_EQ(1, tensors.size());
+  test::ExpectTensorEqual<int>(tensors[0], tensors_expected[0]);
+  test::ExpectTensorEqual<int>(tensors[0], Tensor(-2));
+}
+
 TEST_F(ArithmeticOptimizerTest,
        OptimizeMaxOrMinOfMonotonicElementWiseNonIncreasing) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
-- 
GitLab


From 771552a3dfc3c8f115c6c71730db537193406e62 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Fri, 14 Dec 2018 10:09:34 -0800
Subject: [PATCH 601/873] Sort edges in convert graph as well

---
 .../contrib/tensorrt/convert/convert_graph.cc | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 623cd79f32..67d39a6463 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -323,6 +323,13 @@ tensorflow::Status ConvertGraphDefToTensorRT(
   return Status::OK();
 }
 
+struct EdgePtrCompare {
+  bool operator()(const tensorflow::Edge* lhs,
+                  const tensorflow::Edge* rhs) const {
+    return (lhs->id() < rhs->id());
+  }
+};
+
 // Function to get subsegment information structure.
 tensorflow::Status GetEngineInfo(
     const tensorflow::Graph* g,
@@ -361,8 +368,12 @@ tensorflow::Status GetEngineInfo(
     }
     const int node_id = node->id();
     subgraph_node_ids.push_back(node_id);
-    // Create input connections.
-    for (const auto edge : node->in_edges()) {
+    // Create input connections. Sort edges first to make determnistic since
+    // in_edges is a set of pointers.
+    std::vector<const tensorflow::Edge*> in_edges(node->in_edges().begin(),
+                                                  node->in_edges().end());
+    std::sort(in_edges.begin(), in_edges.end(), EdgePtrCompare());
+    for (const auto edge : in_edges) {
       auto input_node = edge->src();
       if (input_node->IsSource() || segment_nodes.count(input_node->name())) {
         continue;
@@ -410,8 +421,12 @@ tensorflow::Status GetEngineInfo(
             node_id, edge->dst_input(), /*input_edge=*/true, port);
       }
     }
-    // Create output connections.
-    for (const auto edge : node->out_edges()) {
+    // Create output connections. Sort edges first to make determnistic since
+    // out_edges is a set of pointers.
+    std::vector<const tensorflow::Edge*> out_edges(node->out_edges().begin(),
+                                                   node->out_edges().end());
+    std::sort(out_edges.begin(), out_edges.end(), EdgePtrCompare());
+    for (const auto edge : out_edges) {
       auto output_node = edge->dst();
       if (output_node->IsSink() || segment_nodes.count(output_node->name())) {
         continue;
-- 
GitLab


From 607c744519a90930fb9984fdc67810b17055af56 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 10:18:20 -0800
Subject: [PATCH 602/873] Internal change.

PiperOrigin-RevId: 225562069
---
 tensorflow/core/BUILD | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index f98280c3ec..8bf1480d33 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -1626,6 +1626,9 @@ filegroup(
             "**/*main.cc",
             "debug/**/*",
             "framework/op_gen_*",
+            "framework/node_def_util.*",
+            "framework/op_kernel.*",
+            "framework/dataset.*",
             "lib/jpeg/**/*",
             "lib/png/**/*",
             "lib/gif/**/*",
@@ -1668,6 +1671,9 @@ filegroup(
             "common_runtime/**/*.cc",
             "graph/**/*.h",
             "graph/**/*.cc",
+            "framework/node_def_util.*",
+            "framework/op_kernel.*",
+            "framework/dataset.*",
         ],
         exclude = [
             "**/*test.*",
-- 
GitLab


From cf01e6479e333216e70cc253ca9263c629dd8d08 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Fri, 14 Dec 2018 10:20:19 -0800
Subject: [PATCH 603/873] Additional checks to handle calls to custom keras
 layer objects. In general, we do not support converting objects altogether.
 However, we do support converting callable ones, that is, we just convert
 their __call__ method. This change verifies whether that method is defined in
 a whitelisted module. It specifically applies to calling custom keras layers.

PiperOrigin-RevId: 225562394
---
 tensorflow/python/autograph/impl/api.py       | 16 ++++----
 .../python/autograph/impl/conversion.py       | 38 +++++++++++++++++++
 .../python/autograph/pyct/inspect_utils.py    | 37 +++++++++++++-----
 .../autograph/pyct/inspect_utils_test.py      | 21 +++++-----
 4 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index c6c137c8fd..a98c1dfe9a 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -187,8 +187,8 @@ def converted_call(f, owner, options, *args, **kwargs):
     # When conversion is skipped, `self` is not necessary, because the
     # original bound method is being executed. This code removes it.
     if tf_inspect.ismethod(f) and args:
-      f_class = inspect_utils.getmethodclass(f)
-      if args[0] is f_class:
+      f_self = inspect_utils.getmethodself(f)
+      if args[0] is f_self:
         args = args[1:]
 
     return f(*args, **kwargs)
@@ -215,10 +215,10 @@ def converted_call(f, owner, options, *args, **kwargs):
     # Regular functions
     target_entity = f
     arg_map_target = f
-    f_class = inspect_utils.getmethodclass(f)
+    f_self = inspect_utils.getmethodself(f)
 
     # TODO(b/119246461): This may be more elegantly handled using __get__?
-    if f_class is not None:
+    if f_self is not None:
       # If this is a method call, it may or may not include self.
       #
       # Example when self is included:
@@ -233,11 +233,11 @@ def converted_call(f, owner, options, *args, **kwargs):
         # When the owner is not specified, use the result of
         # inspect_utils.getmethodclass.
         # TODO(b/119246461): Make sure an owner is always specified.
-        if not args or args[0] is not f_class:
-          effective_args = (f_class,) + args
+        if not args or args[0] is not f_self:
+          effective_args = (f_self,) + args
         else:
-          effective_args = (f_class,) + args[1:]
-      partial_types = (f_class,)
+          effective_args = (f_self,) + args[1:]
+      partial_types = (f_self,)
     else:
       effective_args = args
       partial_types = ()
diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index d0a12df6eb..733d4f1c71 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -80,13 +80,49 @@ def is_whitelisted_for_graph(o):
     m = functools
   else:
     m = tf_inspect.getmodule(o)
+  if not hasattr(m, '__name__'):
+    logging.vlog(1, '%s is NOT whitelisted for graph: unknown module name', o)
+    return False
+
   for prefix, in config.DEFAULT_UNCOMPILED_MODULES:
     if m.__name__.startswith(prefix):
+      logging.vlog(1, '%s is whitelisted: name starts with "%s"', o, prefix)
       return True
 
   if hasattr(o, 'autograph_info__'):
     return True
 
+  if (not inspect_utils.isweakrefself(o) and not tf_inspect.isclass(o) and
+      hasattr(o, '__call__') and hasattr(o, '__class__')):
+    # Callable objects: whitelisted if their __call__ method is.
+    retval = is_whitelisted_for_graph(o.__call__)
+    logging.vlog(1, '%s is whitelisted: object __call__ whitelisted', o)
+    return retval
+
+  if tf_inspect.ismethod(o):
+    # Methods of whitelisted classes are also whitelisted, even if they are
+    # bound via user subclasses.
+    #
+    # For example, suppose `tf.Foo` has a method called `bar`, and `baz` is
+    # defined as below. `tf.Foo` is whitelisted. Then `baz.bar` is also
+    # whitelisted.
+    #
+    #   class Custom(tf.Foo):
+    #     pass
+    #
+    #   baz = Custom()
+    #
+    # For the example above, if `Custom` did overload `bar`, then it would no
+    # longer be whitelisted.
+
+    owner_class = inspect_utils.getmethodclass(o)
+    if owner_class is not None:
+      owner_class = inspect_utils.getdefiningclass(o, owner_class)
+      if is_whitelisted_for_graph(owner_class):
+        logging.vlog(1, '%s is whitelisted: owner is whitelisted %s', o,
+                     owner_class)
+        return True
+
   if inspect_utils.isnamedtuple(o):
     # Due to the way they're constructed, namedtuple types cannot be converted
     # because they don't expose source code. But we assume they are safe for
@@ -96,8 +132,10 @@ def is_whitelisted_for_graph(o):
           logging.level_warning(),
           'Entity {} looks like a namedtuple subclass. If it has any custom'
           ' methods, they will not be converted by AutoGraph.'.format(o), 1)
+    logging.vlog(1, '%s is whitelisted: named tuple', o)
     return True
 
+  logging.vlog(1, '%s is NOT whitelisted for graph', o)
   return False
 
 
diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 360dd83b5e..07453e8d48 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -183,6 +183,27 @@ def getdefiningclass(m, owner_class):
   return owner_class
 
 
+def isweakrefself(m):
+  """Tests whether an object is a "weakref self" wrapper, see getmethodself."""
+  return hasattr(m, '__self__') and hasattr(m.__self__, 'ag_self_weakref__')
+
+
+def getmethodself(m):
+  """An extended version of inspect.getmethodclass."""
+  if not hasattr(m, '__self__'):
+    return None
+  if m.__self__ is None:
+    return None
+
+  # A fallback allowing methods to be actually bound to a type different
+  # than __self__. This is useful when a strong reference from the method
+  # to the object is not desired, for example when caching is involved.
+  if isweakrefself(m):
+    return m.__self__.ag_self_weakref__()
+
+  return m.__self__
+
+
 def getmethodclass(m):
   """Resolves a function's owner, e.g. a method's class.
 
@@ -213,16 +234,12 @@ def getmethodclass(m):
     if isinstance(m.__class__, six.class_types):
       return m.__class__
 
-  # Instance method and class methods: should be bound to a non-null "self".
-  if hasattr(m, '__self__'):
-    if m.__self__ is not None:
-      # A fallback allowing methods to be actually bound to a type different
-      # than __self__. This is useful when a strong reference from the method
-      # to the object is not desired, for example when caching is involved.
-      if hasattr(m.__self__, 'ag_self_weakref__'):
-        return m.__self__.ag_self_weakref__()
-
-      return m.__self__
+  # Instance method and class methods: return the class of "self".
+  m_self = getmethodself(m)
+  if m_self is not None:
+    if tf_inspect.isclass(m_self):
+      return m_self
+    return m_self.__class__
 
   # Class, static and unbound methods: search all defined classes in any
   # namespace. This is inefficient but more robust method.
diff --git a/tensorflow/python/autograph/pyct/inspect_utils_test.py b/tensorflow/python/autograph/pyct/inspect_utils_test.py
index 420a20c22f..fd2cd04a22 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils_test.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils_test.py
@@ -277,16 +277,16 @@ class InspectUtilsTest(test.TestCase):
     test_obj = TestClass()
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.member_function),
-        test_obj)
+        TestClass)
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.decorated_member),
-        test_obj)
+        TestClass)
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.fn_decorated_member),
-        test_obj)
+        TestClass)
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.wrap_decorated_member),
-        test_obj)
+        TestClass)
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.static_method),
         TestClass)
@@ -335,16 +335,16 @@ class InspectUtilsTest(test.TestCase):
     test_obj = LocalClass()
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.member_function),
-        test_obj)
+        LocalClass)
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.decorated_member),
-        test_obj)
+        LocalClass)
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.fn_decorated_member),
-        test_obj)
+        LocalClass)
     self.assertEqual(
         inspect_utils.getmethodclass(test_obj.wrap_decorated_member),
-        test_obj)
+        LocalClass)
 
   def test_getmethodclass_callables(self):
     class TestCallable(object):
@@ -367,12 +367,13 @@ class InspectUtilsTest(test.TestCase):
       return self
 
     bound_method = types.MethodType(test_fn, WeakrefWrapper())
-    self.assertEqual(inspect_utils.getmethodclass(bound_method), test_obj)
+    self.assertEqual(inspect_utils.getmethodclass(bound_method), TestClass)
 
   def test_getmethodclass_no_bool_conversion(self):
 
     tensor = constant_op.constant([1])
-    self.assertEqual(inspect_utils.getmethodclass(tensor.get_shape), tensor)
+    self.assertEqual(
+        inspect_utils.getmethodclass(tensor.get_shape), type(tensor))
 
   def test_getdefiningclass(self):
     class Superclass(object):
-- 
GitLab


From cb716972aef4fc8b4d9067b85f6fee680f924e04 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Fri, 14 Dec 2018 10:41:10 -0800
Subject: [PATCH 604/873] Remove extra parenthesis

---
 tensorflow/contrib/tensorrt/convert/convert_graph.cc | 2 +-
 tensorflow/contrib/tensorrt/segment/segment.cc       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 67d39a6463..746514b930 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -326,7 +326,7 @@ tensorflow::Status ConvertGraphDefToTensorRT(
 struct EdgePtrCompare {
   bool operator()(const tensorflow::Edge* lhs,
                   const tensorflow::Edge* rhs) const {
-    return (lhs->id() < rhs->id());
+    return lhs->id() < rhs->id();
   }
 };
 
diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc
index a32356710d..084a96e0fa 100644
--- a/tensorflow/contrib/tensorrt/segment/segment.cc
+++ b/tensorflow/contrib/tensorrt/segment/segment.cc
@@ -232,14 +232,14 @@ SimpleGraph::~SimpleGraph() {
 // cause a mismatch between the calibration tables of the conversions.
 struct SimpleEdgePtrCompare {
   bool operator()(const SimpleEdge* lhs, const SimpleEdge* rhs) const {
-    return (lhs->id() < rhs->id());
+    return lhs->id() < rhs->id();
   }
 };
 
 struct NodePtrCompare {
   bool operator()(const tensorflow::Node* lhs,
                   const tensorflow::Node* rhs) const {
-    return (lhs->name() < rhs->name());
+    return lhs->name() < rhs->name();
   }
 };
 
-- 
GitLab


From c7eca709a20edf7c03e56ca106f23a3b277b6fdb Mon Sep 17 00:00:00 2001
From: Michael Banfield <micban@google.com>
Date: Fri, 14 Dec 2018 10:44:14 -0800
Subject: [PATCH 605/873] Only create a GCS directory object if the object does
 not already exist.

PiperOrigin-RevId: 225566741
---
 .../core/platform/cloud/gcs_file_system.cc    |  9 ++++++-
 .../platform/cloud/gcs_file_system_test.cc    | 24 ++++++++++---------
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index c61b68aeeb..26eff8f834 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -1433,9 +1433,16 @@ Status GcsFileSystem::CreateDir(const string& dirname) {
                      : errors::NotFound("The specified bucket ", dirname,
                                         " was not found.");
   }
+
+  const string dirname_with_slash = MaybeAppendSlash(dirname);
+
+  if (FileExists(dirname_with_slash).ok()) {
+    return errors::AlreadyExists(dirname);
+  }
+
   // Create a zero-length directory marker object.
   std::unique_ptr<WritableFile> file;
-  TF_RETURN_IF_ERROR(NewWritableFile(MaybeAppendSlash(dirname), &file));
+  TF_RETURN_IF_ERROR(NewWritableFile(dirname_with_slash, &file));
   TF_RETURN_IF_ERROR(file->Close());
   return Status::OK();
 }
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index 702802b185..f0f5f592fa 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/core/platform/cloud/gcs_file_system.h"
 #include <fstream>
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/cloud/http_request_fake.h"
@@ -2789,6 +2790,12 @@ TEST(GcsFileSystemTest, IsDirectory_BucketNotFound) {
 TEST(GcsFileSystemTest, CreateDir_Folder) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
+           "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
+           "subpath%2F?fields=size%2Cgeneration%2Cupdated\n"
+           "Auth Token: fake_token\n"
+           "Timeouts: 5 1 10\n",
+           "{}"),
+       new FakeHttpRequest(
            "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
            "uploadType=resumable&name=subpath%2F\n"
            "Auth Token: fake_token\n"
@@ -2802,18 +2809,12 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                            "Put body: \n",
                            ""),
        new FakeHttpRequest(
-           "Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?"
-           "uploadType=resumable&name=subpath%2F\n"
+           "Uri: https://www.googleapis.com/storage/v1/b/bucket/o/"
+           "subpath%2F?fields=size%2Cgeneration%2Cupdated\n"
            "Auth Token: fake_token\n"
-           "Header X-Upload-Content-Length: 0\n"
-           "Post: yes\n"
            "Timeouts: 5 1 10\n",
-           "", {{"Location", "https://custom/upload/location"}}),
-       new FakeHttpRequest("Uri: https://custom/upload/location\n"
-                           "Auth Token: fake_token\n"
-                           "Timeouts: 5 1 30\n"
-                           "Put body: \n",
-                           "")});
+           strings::StrCat("{\"size\": \"1010\",\"generation\": \"1\","
+                           "\"updated\": \"2016-04-29T23:15:24.896Z\"}"))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
                        new FakeHttpRequestFactory(&requests)),
@@ -2826,7 +2827,8 @@ TEST(GcsFileSystemTest, CreateDir_Folder) {
                    nullptr /* gcs additional header */);
 
   TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath"));
-  TF_EXPECT_OK(fs.CreateDir("gs://bucket/subpath/"));
+  EXPECT_EQ(errors::AlreadyExists("gs://bucket/subpath/"),
+            fs.CreateDir("gs://bucket/subpath/"));
 }
 
 TEST(GcsFileSystemTest, CreateDir_Bucket) {
-- 
GitLab


From 240b770c2a5fce86110b3030d775ccd4b740178c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 10:48:17 -0800
Subject: [PATCH 606/873] Add `nest.flatten_with_tuple_paths` and
 `nest.map_structure_with_tuple_paths`

PiperOrigin-RevId: 225567465
---
 tensorflow/python/util/nest.py      |  71 +++++++++++++++++-
 tensorflow/python/util/nest_test.py | 111 +++++++++++++++++++---------
 2 files changed, 148 insertions(+), 34 deletions(-)

diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index be8b0f1949..70e5ebb3b6 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -413,6 +413,51 @@ def map_structure_with_paths(func, *structure, **kwargs):
       the type of sequence in any of their substructures.
     ValueError: If no structures are provided.
   """
+  return _map_structure_with_tuple_or_string_paths(
+      use_string_paths=True, func=func, structure=structure, kwargs=kwargs)
+
+
+def map_structure_with_tuple_paths(func, *structure, **kwargs):
+  """Applies `func` to each entry in `structure` and returns a new structure.
+
+  Applies `func(tuple_path, x[0], x[1], ..., **kwargs)` where `x[i]` is an entry
+  in `structure[i]` and `tuple_path` is a tuple of indices and/or dictionary
+  keys (as returned by `nest.yield_flat_paths`), which uniquely specifies the
+  common path to x[i] in the structures. All structures in `structure` must have
+  the same arity, and the return value will contain the results in the same
+  structure. Special kwarg `check_types` determines whether the types of
+  iterables within the structure must be the same-- see **kwargs definition
+  below.
+
+  Args:
+    func: A callable with the signature `func(tuple_path, *values, **kwargs)`
+      that is evaluated on the leaves of the structure.
+    *structure: A variable number of compatible structures to process.
+    **kwargs: Optional kwargs to be passed through to func. Special kwarg
+      `check_types` is not passed to func, but instead determines whether the
+      types of iterables within the structures have to be same (e.g.
+      `map_structure(func, [1], (1,))` raises a `TypeError` exception). To allow
+      this set this argument to `False`.
+
+  Returns:
+    A structure of the same form as the input structures whose leaves are the
+    result of evaluating func on corresponding leaves of the input structures.
+
+  Raises:
+    TypeError: If `func` is not callable or if the structures do not match
+      each other by depth tree.
+    TypeError: If `check_types` is not `False` and the two structures differ in
+      the type of sequence in any of their substructures.
+    ValueError: If no structures are provided.
+  """
+  return _map_structure_with_tuple_or_string_paths(
+      use_string_paths=False, func=func, structure=structure, kwargs=kwargs)
+
+
+def _map_structure_with_tuple_or_string_paths(
+    use_string_paths, func, structure, kwargs):
+  """Implements `map_structure` with either tuple or string paths."""
+
   if not callable(func):
     raise TypeError("func must be callable, got: %s" % func)
   if not structure:
@@ -422,9 +467,14 @@ def map_structure_with_paths(func, *structure, **kwargs):
   for other in structure[1:]:
     assert_same_structure(structure[0], other, check_types=check_types)
 
+  if use_string_paths:
+    flatten_func = flatten_with_joined_string_paths
+  else:
+    flatten_func = flatten_with_tuple_paths
+
   # First set paths_and_values to:
   # [[(p11, v11), ... (p1n, v1n)], ... [(pm1, vm1), ... (pmn, vmn)]]
-  paths_and_values = [flatten_with_joined_string_paths(s) for s in structure]
+  paths_and_values = [flatten_func(s) for s in structure]
 
   # Now zip(*paths_and_values) would be:
   # [((p11, v11), ... (pm1, vm1)), ... ((p1n, v1n), ... (pmn, vmn))]
@@ -820,5 +870,24 @@ def flatten_with_joined_string_paths(structure, separator="/"):
   return list(zip(flat_string_paths, flatten(structure)))
 
 
+def flatten_with_tuple_paths(structure):
+  """Returns a list of `(tuple_path, leaf_element)` tuples.
+
+  The order of pairs produced matches that of `nest.flatten`. This allows you
+  to flatten a nested structure while keeping information about where in the
+  structure each data element was located. See `nest.yield_flat_paths`
+  for more information about tuple paths.
+
+  Args:
+    structure: the nested structure to flatten.
+
+  Returns:
+    A list of `(tuple_path, leaf_element)` tuples. Each `tuple_path` is a tuple
+    of indices and/or dictionary keys that uniquely specify the path to
+    `leaf_element` within `structure`.
+  """
+  return list(zip(yield_flat_paths(structure), flatten(structure)))
+
+
 _pywrap_tensorflow.RegisterType("Mapping", _collections.Mapping)
 _pywrap_tensorflow.RegisterType("Sequence", _collections.Sequence)
diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py
index d0d0c5f793..83fa5dd660 100644
--- a/tensorflow/python/util/nest_test.py
+++ b/tensorflow/python/util/nest_test.py
@@ -209,12 +209,12 @@ class NestTest(parameterized.TestCase, test.TestCase):
   def testFlatten_numpyIsNotFlattened(self):
     structure = np.array([1, 2, 3])
     flattened = nest.flatten(structure)
-    self.assertEqual(len(flattened), 1)
+    self.assertLen(flattened, 1)
 
   def testFlatten_stringIsNotFlattened(self):
     structure = "lots of letters"
     flattened = nest.flatten(structure)
-    self.assertEqual(len(flattened), 1)
+    self.assertLen(flattened, 1)
     unflattened = nest.pack_sequence_as("goodbye", flattened)
     self.assertEqual(structure, unflattened)
 
@@ -791,37 +791,46 @@ class NestTest(parameterized.TestCase, test.TestCase):
       expected = inputs_expected["expected"]
       self.assertEqual(list(nest.yield_flat_paths(inputs)), expected)
 
-  def testFlattenWithStringPaths(self):
-    for inputs_expected in (
-        {"inputs": [], "expected": []},
-        {"inputs": [23, "42"], "expected": [("0", 23), ("1", "42")]},
-        {"inputs": [[[[108]]]], "expected": [("0/0/0/0", 108)]}):
-      inputs = inputs_expected["inputs"]
-      expected = inputs_expected["expected"]
-      self.assertEqual(
-          nest.flatten_with_joined_string_paths(inputs, separator="/"),
-          expected)
-
-  # Need a separate test for namedtuple as we can't declare tuple definitions
-  # in the @parameterized arguments.
-  def testFlattenNamedTuple(self):
-    # pylint: disable=invalid-name
-    Foo = collections.namedtuple("Foo", ["a", "b"])
-    Bar = collections.namedtuple("Bar", ["c", "d"])
-    # pylint: enable=invalid-name
-    test_cases = [
-        (Foo(a=3, b=Bar(c=23, d=42)),
-         [("a", 3), ("b/c", 23), ("b/d", 42)]),
-        (Foo(a=Bar(c=23, d=42), b=Bar(c=0, d="something")),
-         [("a/c", 23), ("a/d", 42), ("b/c", 0), ("b/d", "something")]),
-        (Bar(c=42, d=43),
-         [("c", 42), ("d", 43)]),
-        (Bar(c=[42], d=43),
-         [("c/0", 42), ("d", 43)]),
-    ]
-    for inputs, expected in test_cases:
-      self.assertEqual(
-          list(nest.flatten_with_joined_string_paths(inputs)), expected)
+  # We cannot define namedtuples within @parameterized argument lists.
+  # pylint: disable=invalid-name
+  Foo = collections.namedtuple("Foo", ["a", "b"])
+  Bar = collections.namedtuple("Bar", ["c", "d"])
+  # pylint: enable=invalid-name
+
+  @parameterized.parameters([
+      dict(inputs=[], expected=[]),
+      dict(inputs=[23, "42"], expected=[("0", 23), ("1", "42")]),
+      dict(inputs=[[[[108]]]], expected=[("0/0/0/0", 108)]),
+      dict(inputs=Foo(a=3, b=Bar(c=23, d=42)),
+           expected=[("a", 3), ("b/c", 23), ("b/d", 42)]),
+      dict(inputs=Foo(a=Bar(c=23, d=42), b=Bar(c=0, d="thing")),
+           expected=[("a/c", 23), ("a/d", 42), ("b/c", 0), ("b/d", "thing")]),
+      dict(inputs=Bar(c=42, d=43),
+           expected=[("c", 42), ("d", 43)]),
+      dict(inputs=Bar(c=[42], d=43),
+           expected=[("c/0", 42), ("d", 43)]),
+  ])
+  def testFlattenWithStringPaths(self, inputs, expected):
+    self.assertEqual(
+        nest.flatten_with_joined_string_paths(inputs, separator="/"),
+        expected)
+
+  @parameterized.parameters([
+      dict(inputs=[], expected=[]),
+      dict(inputs=[23, "42"], expected=[((0,), 23), ((1,), "42")]),
+      dict(inputs=[[[[108]]]], expected=[((0, 0, 0, 0), 108)]),
+      dict(inputs=Foo(a=3, b=Bar(c=23, d=42)),
+           expected=[(("a",), 3), (("b", "c"), 23), (("b", "d"), 42)]),
+      dict(inputs=Foo(a=Bar(c=23, d=42), b=Bar(c=0, d="thing")),
+           expected=[(("a", "c"), 23), (("a", "d"), 42), (("b", "c"), 0),
+                     (("b", "d"), "thing")]),
+      dict(inputs=Bar(c=42, d=43),
+           expected=[(("c",), 42), (("d",), 43)]),
+      dict(inputs=Bar(c=[42], d=43),
+           expected=[(("c", 0), 42), (("d",), 43)]),
+  ])
+  def testFlattenWithTuplePaths(self, inputs, expected):
+    self.assertEqual(nest.flatten_with_tuple_paths(inputs), expected)
 
   @parameterized.named_parameters(
       ("tuples", (1, 2), (3, 4), True, (("0", 4), ("1", 6))),
@@ -852,6 +861,42 @@ class NestTest(parameterized.TestCase, test.TestCase):
     with self.assertRaises(error_type):
       nest.map_structure_with_paths(lambda path, *s: 0, s1, s2)
 
+  @parameterized.named_parameters([
+      dict(testcase_name="Tuples", s1=(1, 2), s2=(3, 4),
+           check_types=True, expected=(((0,), 4), ((1,), 6))),
+      dict(testcase_name="Dicts", s1={"a": 1, "b": 2}, s2={"b": 4, "a": 3},
+           check_types=True, expected={"a": (("a",), 4), "b": (("b",), 6)}),
+      dict(testcase_name="Mixed", s1=(1, 2), s2=[3, 4],
+           check_types=False, expected=(((0,), 4), ((1,), 6))),
+      dict(testcase_name="Nested",
+           s1={"a": [2, 3], "b": [1, 2, 3]},
+           s2={"b": [5, 6, 7], "a": [8, 9]},
+           check_types=True,
+           expected={"a": [(("a", 0), 10), (("a", 1), 12)],
+                     "b": [(("b", 0), 6), (("b", 1), 8), (("b", 2), 10)]}),
+  ])
+  def testMapWithTuplePathsCompatibleStructures(
+      self, s1, s2, check_types, expected):
+    def path_and_sum(path, *values):
+      return path, sum(values)
+    result = nest.map_structure_with_tuple_paths(
+        path_and_sum, s1, s2, check_types=check_types)
+    self.assertEqual(expected, result)
+
+  @parameterized.named_parameters([
+      dict(testcase_name="Tuples", s1=(1, 2), s2=(3, 4, 5),
+           error_type=ValueError),
+      dict(testcase_name="Dicts", s1={"a": 1}, s2={"b": 2},
+           error_type=ValueError),
+      dict(testcase_name="Mixed", s1=(1, 2), s2=[3, 4], error_type=TypeError),
+      dict(testcase_name="Nested",
+           s1={"a": [2, 3], "b": [1, 3]}, s2={"b": [5, 6, 7], "a": [8, 9]},
+           error_type=ValueError)
+  ])
+  def testMapWithTuplePathsIncompatibleStructures(self, s1, s2, error_type):
+    with self.assertRaises(error_type):
+      nest.map_structure_with_tuple_paths(lambda path, *s: 0, s1, s2)
+
 
 class NestBenchmark(test.Benchmark):
 
-- 
GitLab


From 0b1086a6f8cc51c5a230a299ee26c5353a7600cd Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 14 Dec 2018 11:09:13 -0800
Subject: [PATCH 607/873] Some renames for tf.contrib.rnn symbols.

PiperOrigin-RevId: 225571582
---
 tensorflow/tools/compatibility/tf_upgrade_v2.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index 427e22b721..06a7bb781d 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py
@@ -470,6 +470,10 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             "tf.data.experimental.unbatch",
         "tf.contrib.data.unique":
             "tf.data.experimental.unique",
+        "tf.contrib.rnn.RNNCell":
+            "tf.nn.rnn_cell.RNNCell",
+        "tf.contrib.rnn.LSTMStateTuple":
+            "tf.nn.rnn_cell.LSTMStateTuple",
         "tf.contrib.framework.sort":
             "tf.sort",
         "tf.contrib.framework.argsort":
-- 
GitLab


From f8d64de82e6a9f4c9f374a2d14a5fa206899465f Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Fri, 14 Dec 2018 11:14:17 -0800
Subject: [PATCH 608/873] Temporary disable inlining functions with functional
 control flow.

PiperOrigin-RevId: 225572476
---
 tensorflow/core/grappler/op_types.cc               | 10 ++++++++++
 tensorflow/core/grappler/op_types.h                |  2 ++
 .../core/grappler/optimizers/function_optimizer.cc | 14 ++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc
index 38fc1fff32..b201c3a717 100644
--- a/tensorflow/core/grappler/op_types.cc
+++ b/tensorflow/core/grappler/op_types.cc
@@ -247,6 +247,11 @@ bool IsIdentityNSingleInput(const NodeDef& node) {
          node.attr().at("T").list().type_size() == 1;
 }
 
+bool IsIf(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "If" || op == "StatelessIf";
+}
+
 bool IsIgamma(const NodeDef& node) { return node.op() == "Igamma"; }
 
 bool IsIgammac(const NodeDef& node) { return node.op() == "Igammac"; }
@@ -524,6 +529,11 @@ bool IsVariable(const NodeDef& node) {
          op == "VarHandleOp" || op == "ReadVariableOp";
 }
 
+bool IsWhile(const NodeDef& node) {
+  const auto& op = node.op();
+  return op == "While" || op == "StatelessWhile";
+}
+
 bool IsZeta(const NodeDef& node) { return node.op() == "Zeta"; }
 
 namespace {
diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h
index 67897e8512..cb7781ec6e 100644
--- a/tensorflow/core/grappler/op_types.h
+++ b/tensorflow/core/grappler/op_types.h
@@ -75,6 +75,7 @@ bool IsHistogramSummary(const NodeDef& node);
 bool IsIdentity(const NodeDef& node);
 bool IsIdentityN(const NodeDef& node);
 bool IsIdentityNSingleInput(const NodeDef& node);
+bool IsIf(const NodeDef& node);
 bool IsIgamma(const NodeDef& node);
 bool IsIgammac(const NodeDef& node);
 bool IsImag(const NodeDef& node);
@@ -167,6 +168,7 @@ bool IsTruncateDiv(const NodeDef& node);
 bool IsTruncateMod(const NodeDef& node);
 bool IsUnpack(const NodeDef& node);
 bool IsVariable(const NodeDef& node);
+bool IsWhile(const NodeDef& node);
 bool IsZeta(const NodeDef& node);
 
 // Return true if the op is an aggregation (e.g. Add, AddN).
diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc
index 7069e5ea20..4ec68c7543 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc
@@ -1270,6 +1270,20 @@ Status IsInlinableIndirectFunctionCall(const FunctionOptimizerContext& ctx,
         SummarizeNodeDef(func_node));
   }
 
+  // TODO(b/120991525, b/120986912): We need to lower `If` and `While` nodes to
+  // `Switch` nodes after function inlining (one more PRE_PLACEMENT pass?), but
+  // because of the reason described above we are not sure that it's safe, for
+  // now just disable inlining functions with functional control flow.
+  const auto is_functional_ctrl_flow_op = [](const NodeDef& node) {
+    return IsIf(node) || IsWhile(node);
+  };
+  if (absl::c_any_of(func.node_def(), is_functional_ctrl_flow_op)) {
+    return errors::FailedPrecondition(
+        "Can't inline function with `If` or `While` nodes in the function "
+        "body: ",
+        SummarizeNodeDef(func_node));
+  }
+
   return Status::OK();
 }
 
-- 
GitLab


From 4caa17185fb39420c87373af77b538e9016be46d Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Fri, 14 Dec 2018 11:17:11 -0800
Subject: [PATCH 609/873] Eager coverage for data experimental tests, remaining
 half.

PiperOrigin-RevId: 225572958
---
 .../bucket_by_sequence_length_test.py         |  76 +-
 .../dense_to_sparse_batch_test.py             | 146 ++-
 .../kernel_tests/indexed_dataset_ops_test.py  |  45 +-
 .../make_batched_features_dataset_test.py     | 238 +++--
 .../make_tf_record_dataset_test.py            | 127 ++-
 .../kernel_tests/map_and_batch_test.py        | 367 ++++----
 .../kernel_tests/parallel_interleave_test.py  | 846 ++++++++----------
 .../reader_dataset_ops_test_base.py           |  50 +-
 .../kernel_tests/sql_dataset_test.py          | 726 +++++++--------
 .../kernel_tests/sql_dataset_test_base.py     |  21 +-
 .../python/data/kernel_tests/test_base.py     |   2 +-
 11 files changed, 1172 insertions(+), 1472 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
index 8264dee3c1..3324243c54 100644
--- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
@@ -22,10 +22,12 @@ import random
 from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
@@ -69,9 +71,11 @@ def _get_record_shape(sparse):
   return tensor_shape.TensorShape([None])
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class BucketBySequenceLengthTest(test_base.DatasetTestBase):
 
-  def testBucket(self):
+  # TODO(b/117581999): add eager coverage.
+  def testSkipEagerBucket(self):
 
     boundaries = [10, 20, 30]
     batch_sizes = [10, 8, 4, 2]
@@ -105,14 +109,14 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
               boundaries,
               batch_sizes,
               no_padding=no_padding))
-      batch, = dataset_ops.make_one_shot_iterator(dataset).get_next()
-
-      with self.cached_session() as sess:
-        batches = []
-        for _ in range(4):
-          batches.append(self.evaluate(batch))
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(batch)
+      get_next = self.getNext(dataset)
+      batches = []
+      for _ in range(4):
+        batch, = self.evaluate(get_next())
+        batches.append(batch)
+      with self.assertRaises(errors.OutOfRangeError):
+        self.evaluate(get_next())
+
       batch_sizes_val = []
       lengths_val = []
       for batch in batches:
@@ -121,8 +125,9 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
         length = shape[1]
         batch_sizes_val.append(batch_size)
         lengths_val.append(length)
-        sum_check = batch.values.sum() if no_padding else batch.sum()
-        self.assertEqual(sum_check, batch_size * length - 1)
+        if not context.executing_eagerly():
+          sum_check = batch.values.sum() if no_padding else batch.sum()
+          self.assertEqual(sum_check, batch_size * length - 1)
       self.assertEqual(sum(batch_sizes_val), sum(batch_sizes))
       self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val))
       self.assertEqual(sorted(lengths), sorted(lengths_val))
@@ -155,14 +160,15 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
             grouping.bucket_by_sequence_length(
                 element_len, boundaries, batch_sizes,
                 pad_to_bucket_boundary=True))
-    batch, = dataset_ops.make_one_shot_iterator(dataset).get_next()
+    get_next = self.getNext(dataset)
+
+    batches = []
+    for _ in range(3):
+      batch, = self.evaluate(get_next())
+      batches.append(batch)
+    with self.assertRaisesOpError("bucket_boundaries"):
+      self.evaluate(get_next())
 
-    with self.cached_session() as sess:
-      batches = []
-      for _ in range(3):
-        batches.append(self.evaluate(batch))
-      with self.assertRaisesOpError("bucket_boundaries"):
-        self.evaluate(batch)
     batch_sizes_val = []
     lengths_val = []
     for batch in batches:
@@ -192,14 +198,14 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
             grouping.bucket_by_sequence_length(
                 element_len, boundaries, batch_sizes,
                 pad_to_bucket_boundary=True))
-    batch, = dataset_ops.make_one_shot_iterator(dataset).get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      batches = []
-      for _ in range(5):
-        batches.append(self.evaluate(batch))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(batch)
+    batches = []
+    for _ in range(5):
+      batch, = self.evaluate(get_next())
+      batches.append(batch)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
     self.assertAllEqual(batches[0], [[1, 0],
                                      [1, 1]])
@@ -243,7 +249,8 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
     for no_padding in (True, False):
       _test_tuple_elements_by_padding(no_padding)
 
-  def testBucketSparse(self):
+  # TODO(b/117581999): add eager coverage
+  def testSkipEagerBucketSparse(self):
     """Tests bucketing of sparse tensors (case where `no_padding` == True).
 
     Test runs on following dataset:
@@ -295,17 +302,16 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase):
 
     def _compute_batches(dataset):
       """Computes actual batch outputs of dataset and stores in a set."""
-      batch = dataset_ops.make_one_shot_iterator(dataset).get_next()
+      batch = self.getNext(dataset)
       all_sparse_tensors = set()
-      with self.cached_session() as sess:
-        with self.assertRaises(errors.OutOfRangeError):
-          while True:
-            output = self.evaluate(batch)
-            sprs_tensor = (tuple([tuple(idx) for idx in output.indices]),
-                           tuple(output.values))
-            all_sparse_tensors.add(sprs_tensor)
-      return all_sparse_tensors
+      with self.assertRaises(errors.OutOfRangeError):
+        while True:
+          output = self.evaluate(batch())
+          sprs_tensor = (tuple([tuple(idx) for idx in output.indices]),
+                         tuple(output.values))
+          all_sparse_tensors.add(sprs_tensor)
 
+      return all_sparse_tensors
     dataset = _build_dataset()
     boundaries = range(min_len + bucket_size + 1, max_len, bucket_size)
     dataset = dataset.apply(grouping.bucket_by_sequence_length(
diff --git a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
index 22e057a284..cca7ae073e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
@@ -22,105 +22,87 @@ import numpy as np
 from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class DenseToSparseBatchTest(test_base.DatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testDenseToSparseBatchDataset(self):
     components = np.random.randint(12, size=(100,)).astype(np.int32)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([x], x)).apply(
-            batching.dense_to_sparse_batch(4, [12])))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-
-      for start in range(0, len(components), 4):
-        results = self.evaluate(get_next)
-        self.assertAllEqual([[i, j]
-                             for i, c in enumerate(components[start:start + 4])
-                             for j in range(c)], results.indices)
-        self.assertAllEqual(
-            [c for c in components[start:start + 4] for _ in range(c)],
-            results.values)
-        self.assertAllEqual([min(4,
-                                 len(components) - start), 12],
-                            results.dense_shape)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-
-  @test_util.run_deprecated_v1
+    dataset = dataset_ops.Dataset.from_tensor_slices(
+        components).map(lambda x: array_ops.fill([x], x)).apply(
+            batching.dense_to_sparse_batch(4, [12]))
+    get_next = self.getNext(dataset)
+
+    for start in range(0, len(components), 4):
+      results = self.evaluate(get_next())
+      self.assertAllEqual([[i, j]
+                           for i, c in enumerate(components[start:start + 4])
+                           for j in range(c)], results.indices)
+      self.assertAllEqual(
+          [c for c in components[start:start + 4] for _ in range(c)],
+          results.values)
+      self.assertAllEqual([min(4,
+                               len(components) - start), 12],
+                          results.dense_shape)
+
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
   def testDenseToSparseBatchDatasetWithUnknownShape(self):
     components = np.random.randint(5, size=(40,)).astype(np.int32)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensor_slices(components)
-        .map(lambda x: array_ops.fill([x, x], x)).apply(
-            batching.dense_to_sparse_batch(4, [5, None])))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-
-      for start in range(0, len(components), 4):
-        results = self.evaluate(get_next)
-        self.assertAllEqual([[i, j, z]
-                             for i, c in enumerate(components[start:start + 4])
-                             for j in range(c)
-                             for z in range(c)], results.indices)
-        self.assertAllEqual([
-            c
-            for c in components[start:start + 4] for _ in range(c)
-            for _ in range(c)
-        ], results.values)
-        self.assertAllEqual([
-            min(4,
-                len(components) - start), 5,
-            np.max(components[start:start + 4])
-        ], results.dense_shape)
-
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-
-  @test_util.run_deprecated_v1
+    dataset = dataset_ops.Dataset.from_tensor_slices(
+        components).map(lambda x: array_ops.fill([x, x], x)).apply(
+            batching.dense_to_sparse_batch(4, [5, None]))
+
+    get_next = self.getNext(dataset)
+
+    for start in range(0, len(components), 4):
+      results = self.evaluate(get_next())
+      self.assertAllEqual([[i, j, z]
+                           for i, c in enumerate(components[start:start + 4])
+                           for j in range(c)
+                           for z in range(c)], results.indices)
+      self.assertAllEqual([
+          c for c in components[start:start + 4] for _ in range(c)
+          for _ in range(c)
+      ], results.values)
+      self.assertAllEqual([
+          min(4,
+              len(components) - start), 5,
+          np.max(components[start:start + 4])
+      ], results.dense_shape)
+
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
   def testDenseToSparseBatchDatasetWithInvalidShape(self):
     input_tensor = array_ops.constant([[1]])
     with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
-      dataset_ops.make_initializable_iterator(
-          dataset_ops.Dataset.from_tensors(input_tensor).apply(
-              batching.dense_to_sparse_batch(4, [-2])))
+      dataset_ops.Dataset.from_tensors(input_tensor).apply(
+          batching.dense_to_sparse_batch(4, [-2]))
 
-  @test_util.run_deprecated_v1
   def testDenseToSparseBatchDatasetShapeErrors(self):
-    input_tensor = array_ops.placeholder(dtypes.int32)
-    iterator = dataset_ops.make_initializable_iterator(
-        dataset_ops.Dataset.from_tensors(input_tensor).apply(
-            batching.dense_to_sparse_batch(4, [12])))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      # Initialize with an input tensor of incompatible rank.
-      sess.run(init_op, feed_dict={input_tensor: [[1]]})
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "incompatible with the row shape"):
-        self.evaluate(get_next)
-
-      # Initialize with an input tensor that is larger than `row_shape`.
-      sess.run(init_op, feed_dict={input_tensor: range(13)})
-      with self.assertRaisesRegexp(errors.DataLossError,
-                                   "larger than the row shape"):
-        self.evaluate(get_next)
+
+    def dataset_fn(input_tensor):
+      return dataset_ops.Dataset.from_tensors(input_tensor).apply(
+          batching.dense_to_sparse_batch(4, [12]))
+
+    # Initialize with an input tensor of incompatible rank.
+    get_next = self.getNext(dataset_fn([[1]]))
+    with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                 "incompatible with the row shape"):
+      self.evaluate(get_next())
+
+    # Initialize with an input tensor that is larger than `row_shape`.
+    get_next = self.getNext(dataset_fn(np.int32(range(13))))
+    with self.assertRaisesRegexp(errors.DataLossError,
+                                 "larger than the row shape"):
+      self.evaluate(get_next())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
index c3c4ccd077..79b8c492c1 100644
--- a/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/indexed_dataset_ops_test.py
@@ -25,14 +25,13 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class IndexedDatasetOpsTest(test_base.DatasetTestBase):
 
-  @test_util.run_deprecated_v1
   def testLowLevelIndexedDatasetOps(self):
     identity = ged_ops.experimental_identity_indexed_dataset(
         ops.convert_to_tensor(16, dtype=dtypes.uint64))
@@ -43,40 +42,34 @@ class IndexedDatasetOpsTest(test_base.DatasetTestBase):
         output_shapes=[[]])
     materialize = ged_ops.experimental_indexed_dataset_materialize(
         identity, handle)
-    index = array_ops.placeholder(dtypes.uint64)
     get_op = ged_ops.experimental_indexed_dataset_get(
-        handle, index, output_types=[dtypes.uint64], output_shapes=[[]])
+        handle, 3, output_types=[dtypes.uint64], output_shapes=[[]])
 
-    with self.cached_session() as sess:
-      self.evaluate(materialize)
-      self.assertEqual([3], sess.run(get_op, feed_dict={index: 3}))
+    self.evaluate(materialize)
+    self.assertEqual([3], self.evaluate(get_op))
 
+  # TODO(b/117581999): Eager mode not supported.
   @test_util.run_deprecated_v1
-  def testIdentityIndexedDataset(self):
+  def testSkipEagerIdentityIndexedDataset(self):
     ds = indexed_dataset_ops.IdentityIndexedDataset(16)
     materialized = ds.materialize()
-    with self.cached_session() as sess:
-      self.evaluate(materialized.initializer)
-      placeholder = array_ops.placeholder(dtypes.uint64, shape=[])
-      for i in range(16):
-        output = sess.run(
-            materialized.get(placeholder), feed_dict={placeholder: i})
-        self.assertEqual([i], output)
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(materialized.get(placeholder), feed_dict={placeholder: 16})
+    self.evaluate(materialized.initializer)
+    for i in range(16):
+      output = self.evaluate(materialized.get(i))
+      self.assertEqual([i], output)
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(materialized.get(16))
 
   @unittest.skip("Requisite functionality currently unimplemented.")
   def testIdentityIndexedDatasetIterator(self):
     ds = indexed_dataset_ops.IdentityIndexedDataset(16)
-    itr = ds.make_initializable_iterator()
-    n = itr.get_next()
-    with self.cached_session() as sess:
-      self.evaluate(itr.initializer)
-      for i in range(16):
-        output = self.evaluate(n)
-        self.assertEqual(i, output)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(n)
+    n = self.getNext(ds)
+
+    for i in range(16):
+      output = self.evaluate(n())
+      self.assertEqual(i, output)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(n())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
index 7c78810494..1fb6971ecd 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
@@ -21,7 +21,6 @@ import numpy as np
 
 from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.ops import readers
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import dtypes
@@ -33,78 +32,58 @@ from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class MakeBatchedFeaturesDatasetTest(
     reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
 
   def testRead(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 10]:
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from file 0.
-            self.outputs = dataset_ops.make_one_shot_iterator(
-                self.make_batch_feature(
-                    filenames=self.test_filenames[0],
-                    label_key="label",
-                    num_epochs=num_epochs,
-                    batch_size=batch_size)).get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                0,
+        # Basic test: read from file 0.
+        self.outputs = self.getNext(
+            self.make_batch_feature(
+                filenames=self.test_filenames[0],
+                label_key="label",
                 num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from file 1.
-            self.outputs = dataset_ops.make_one_shot_iterator(
-                self.make_batch_feature(
-                    filenames=self.test_filenames[1],
-                    label_key="label",
-                    num_epochs=num_epochs,
-                    batch_size=batch_size)).get_next()
-            self.verify_records(
-                sess,
-                batch_size,
-                1,
+                batch_size=batch_size))
+        self.verify_records(
+            batch_size, 0, num_epochs=num_epochs, label_key_provided=True)
+        with self.assertRaises(errors.OutOfRangeError):
+          self._next_actual_batch(label_key_provided=True)
+
+          # Basic test: read from file 1.
+        self.outputs = self.getNext(
+            self.make_batch_feature(
+                filenames=self.test_filenames[1],
+                label_key="label",
                 num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from both files.
-            self.outputs = dataset_ops.make_one_shot_iterator(
-                self.make_batch_feature(
-                    filenames=self.test_filenames,
-                    label_key="label",
-                    num_epochs=num_epochs,
-                    batch_size=batch_size)).get_next()
-            self.verify_records(
-                sess,
-                batch_size,
+                batch_size=batch_size))
+        self.verify_records(
+            batch_size, 1, num_epochs=num_epochs, label_key_provided=True)
+        with self.assertRaises(errors.OutOfRangeError):
+          self._next_actual_batch(label_key_provided=True)
+
+        # Basic test: read from both files.
+        self.outputs = self.getNext(
+            self.make_batch_feature(
+                filenames=self.test_filenames,
+                label_key="label",
                 num_epochs=num_epochs,
-                label_key_provided=True)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess, label_key_provided=True)
-
-        with ops.Graph().as_default() as g:
-          with self.session(graph=g) as sess:
-            # Basic test: read from both files.
-            self.outputs = dataset_ops.make_one_shot_iterator(
-                self.make_batch_feature(
-                    filenames=self.test_filenames,
-                    num_epochs=num_epochs,
-                    batch_size=batch_size)).get_next()
-            self.verify_records(sess, batch_size, num_epochs=num_epochs)
-            with self.assertRaises(errors.OutOfRangeError):
-              self._next_actual_batch(sess)
+                batch_size=batch_size))
+        self.verify_records(
+            batch_size, num_epochs=num_epochs, label_key_provided=True)
+        with self.assertRaises(errors.OutOfRangeError):
+          self._next_actual_batch(label_key_provided=True)
+        # Basic test: read from both files.
+        self.outputs = self.getNext(
+            self.make_batch_feature(
+                filenames=self.test_filenames,
+                num_epochs=num_epochs,
+                batch_size=batch_size))
+        self.verify_records(batch_size, num_epochs=num_epochs)
+        with self.assertRaises(errors.OutOfRangeError):
+          self._next_actual_batch()
 
-  @test_util.run_deprecated_v1
   def testReadWithEquivalentDataset(self):
     features = {
         "file": parsing_ops.FixedLenFeature([], dtypes.int64),
@@ -114,120 +93,109 @@ class MakeBatchedFeaturesDatasetTest(
         core_readers.TFRecordDataset(self.test_filenames)
         .map(lambda x: parsing_ops.parse_single_example(x, features))
         .repeat(10).batch(2))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    next_element = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
-          range(self._num_files), 2, 10):
-        actual_batch = self.evaluate(next_element)
-        self.assertAllEqual(file_batch, actual_batch["file"])
-        self.assertAllEqual(record_batch, actual_batch["record"])
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+    next_element = self.getNext(dataset)
+    for file_batch, _, _, _, record_batch, _ in self._next_expected_batch(
+        range(self._num_files), 2, 10):
+      actual_batch = self.evaluate(next_element())
+      self.assertAllEqual(file_batch, actual_batch["file"])
+      self.assertAllEqual(record_batch, actual_batch["record"])
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testReadWithFusedShuffleRepeatDataset(self):
     num_epochs = 5
     total_records = num_epochs * self._num_records
     for batch_size in [1, 2]:
       # Test that shuffling with same seed produces the same result.
-      with ops.Graph().as_default() as g:
-        with self.session(graph=g) as sess:
-          outputs1 = dataset_ops.make_one_shot_iterator(self.make_batch_feature(
+      outputs1 = self.getNext(
+          self.make_batch_feature(
               filenames=self.test_filenames[0],
               num_epochs=num_epochs,
               batch_size=batch_size,
               shuffle=True,
-              shuffle_seed=5)).get_next()
-          outputs2 = dataset_ops.make_one_shot_iterator(self.make_batch_feature(
+              shuffle_seed=5))
+      outputs2 = self.getNext(
+          self.make_batch_feature(
               filenames=self.test_filenames[0],
               num_epochs=num_epochs,
               batch_size=batch_size,
               shuffle=True,
-              shuffle_seed=5)).get_next()
-          for _ in range(total_records // batch_size):
-            batch1 = self._run_actual_batch(outputs1, sess)
-            batch2 = self._run_actual_batch(outputs2, sess)
-            for i in range(len(batch1)):
-              self.assertAllEqual(batch1[i], batch2[i])
+              shuffle_seed=5))
+      for _ in range(total_records // batch_size):
+        batch1 = self._run_actual_batch(outputs1)
+        batch2 = self._run_actual_batch(outputs2)
+        for i in range(len(batch1)):
+          self.assertAllEqual(batch1[i], batch2[i])
 
       # Test that shuffling with different seeds produces a different order.
-      with ops.Graph().as_default() as g:
-        with self.session(graph=g) as sess:
-          outputs1 = dataset_ops.make_one_shot_iterator(self.make_batch_feature(
+      outputs1 = self.getNext(
+          self.make_batch_feature(
               filenames=self.test_filenames[0],
               num_epochs=num_epochs,
               batch_size=batch_size,
               shuffle=True,
-              shuffle_seed=5)).get_next()
-          outputs2 = dataset_ops.make_one_shot_iterator(self.make_batch_feature(
+              shuffle_seed=5))
+      outputs2 = self.getNext(
+          self.make_batch_feature(
               filenames=self.test_filenames[0],
               num_epochs=num_epochs,
               batch_size=batch_size,
               shuffle=True,
-              shuffle_seed=15)).get_next()
-          all_equal = True
-          for _ in range(total_records // batch_size):
-            batch1 = self._run_actual_batch(outputs1, sess)
-            batch2 = self._run_actual_batch(outputs2, sess)
-            for i in range(len(batch1)):
-              all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
-          self.assertFalse(all_equal)
+              shuffle_seed=15))
+      all_equal = True
+      for _ in range(total_records // batch_size):
+        batch1 = self._run_actual_batch(outputs1)
+        batch2 = self._run_actual_batch(outputs2)
+        for i in range(len(batch1)):
+          all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
+      self.assertFalse(all_equal)
 
   def testParallelReadersAndParsers(self):
     num_epochs = 5
     for batch_size in [1, 2]:
       for reader_num_threads in [2, 4]:
         for parser_num_threads in [2, 4]:
-          with ops.Graph().as_default() as g:
-            with self.session(graph=g) as sess:
-              self.outputs = dataset_ops.make_one_shot_iterator(
-                  self.make_batch_feature(
-                      filenames=self.test_filenames,
-                      label_key="label",
-                      num_epochs=num_epochs,
-                      batch_size=batch_size,
-                      reader_num_threads=reader_num_threads,
-                      parser_num_threads=parser_num_threads)).get_next()
-              self.verify_records(
-                  sess,
-                  batch_size,
+          self.outputs = self.getNext(
+              self.make_batch_feature(
+                  filenames=self.test_filenames,
+                  label_key="label",
                   num_epochs=num_epochs,
-                  label_key_provided=True,
-                  interleave_cycle_length=reader_num_threads)
-              with self.assertRaises(errors.OutOfRangeError):
-                self._next_actual_batch(sess, label_key_provided=True)
-
-          with ops.Graph().as_default() as g:
-            with self.session(graph=g) as sess:
-              self.outputs = dataset_ops.make_one_shot_iterator(
-                  self.make_batch_feature(
-                      filenames=self.test_filenames,
-                      num_epochs=num_epochs,
-                      batch_size=batch_size,
-                      reader_num_threads=reader_num_threads,
-                      parser_num_threads=parser_num_threads)).get_next()
-              self.verify_records(
-                  sess,
-                  batch_size,
+                  batch_size=batch_size,
+                  reader_num_threads=reader_num_threads,
+                  parser_num_threads=parser_num_threads))
+          self.verify_records(
+              batch_size,
+              num_epochs=num_epochs,
+              label_key_provided=True,
+              interleave_cycle_length=reader_num_threads)
+          with self.assertRaises(errors.OutOfRangeError):
+            self._next_actual_batch(label_key_provided=True)
+
+          self.outputs = self.getNext(
+              self.make_batch_feature(
+                  filenames=self.test_filenames,
                   num_epochs=num_epochs,
-                  interleave_cycle_length=reader_num_threads)
-              with self.assertRaises(errors.OutOfRangeError):
-                self._next_actual_batch(sess)
+                  batch_size=batch_size,
+                  reader_num_threads=reader_num_threads,
+                  parser_num_threads=parser_num_threads))
+          self.verify_records(
+              batch_size,
+              num_epochs=num_epochs,
+              interleave_cycle_length=reader_num_threads)
+          with self.assertRaises(errors.OutOfRangeError):
+            self._next_actual_batch()
 
   def testDropFinalBatch(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 10]:
         with ops.Graph().as_default():
           # Basic test: read from file 0.
-          outputs = dataset_ops.make_one_shot_iterator(self.make_batch_feature(
+          outputs = self.make_batch_feature(
               filenames=self.test_filenames[0],
               label_key="label",
               num_epochs=num_epochs,
               batch_size=batch_size,
-              drop_final_batch=True)).get_next()
+              drop_final_batch=True)
           for tensor in nest.flatten(outputs):
             if isinstance(tensor, ops.Tensor):  # Guard against SparseTensor.
               self.assertEqual(tensor.shape[0], batch_size)
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
index ab2feb6426..9f35aa69a8 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
@@ -19,14 +19,14 @@ from __future__ import print_function
 
 from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.ops import readers
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class MakeTFRecordDatasetTest(
     reader_dataset_ops_test_base.TFRecordDatasetTestBase):
 
@@ -90,7 +90,6 @@ class MakeTFRecordDatasetTest(
       yield record_batch
 
   def _verify_records(self,
-                      sess,
                       outputs,
                       batch_size,
                       file_index,
@@ -106,7 +105,7 @@ class MakeTFRecordDatasetTest(
     for expected_batch in self._next_expected_batch(
         file_indices, batch_size, num_epochs, interleave_cycle_length,
         drop_final_batch, use_parser_fn):
-      actual_batch = self.evaluate(outputs)
+      actual_batch = self.evaluate(outputs())
       self.assertAllEqual(expected_batch, actual_batch)
 
   def _read_test(self, batch_size, num_epochs, file_index=None,
@@ -121,23 +120,25 @@ class MakeTFRecordDatasetTest(
     else:
       fn = None
 
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        outputs = dataset_ops.make_one_shot_iterator(
-            readers.make_tf_record_dataset(
-                file_pattern=file_pattern,
-                num_epochs=num_epochs,
-                batch_size=batch_size,
-                parser_fn=fn,
-                num_parallel_reads=num_parallel_reads,
-                drop_final_batch=drop_final_batch,
-                shuffle=False)).get_next()
-        self._verify_records(
-            sess, outputs, batch_size, file_index, num_epochs=num_epochs,
-            interleave_cycle_length=num_parallel_reads,
-            drop_final_batch=drop_final_batch, use_parser_fn=parser_fn)
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(outputs)
+    outputs = self.getNext(
+        readers.make_tf_record_dataset(
+            file_pattern=file_pattern,
+            num_epochs=num_epochs,
+            batch_size=batch_size,
+            parser_fn=fn,
+            num_parallel_reads=num_parallel_reads,
+            drop_final_batch=drop_final_batch,
+            shuffle=False))
+    self._verify_records(
+        outputs,
+        batch_size,
+        file_index,
+        num_epochs=num_epochs,
+        interleave_cycle_length=num_parallel_reads,
+        drop_final_batch=drop_final_batch,
+        use_parser_fn=parser_fn)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(outputs())
 
   def testRead(self):
     for batch_size in [1, 2]:
@@ -178,50 +179,46 @@ class MakeTFRecordDatasetTest(
 
   def _shuffle_test(self, batch_size, num_epochs, num_parallel_reads=1,
                     seed=None):
-    with ops.Graph().as_default() as g:
-      with self.session(graph=g) as sess:
-        dataset = readers.make_tf_record_dataset(
-            file_pattern=self.test_filenames,
-            num_epochs=num_epochs,
-            batch_size=batch_size,
-            num_parallel_reads=num_parallel_reads,
-            shuffle=True,
-            shuffle_seed=seed)
-        iterator = dataset_ops.make_initializable_iterator(dataset)
-        next_element = iterator.get_next()
-
-        self.evaluate(iterator.initializer)
-        first_batches = []
-        try:
-          while True:
-            first_batches.append(self.evaluate(next_element))
-        except errors.OutOfRangeError:
-          pass
-
-        self.evaluate(iterator.initializer)
-        second_batches = []
-        try:
-          while True:
-            second_batches.append(self.evaluate(next_element))
-        except errors.OutOfRangeError:
-          pass
-
-        self.assertEqual(len(first_batches), len(second_batches))
-        if seed is not None:
-          # if you set a seed, should get the same results
-          for i in range(len(first_batches)):
-            self.assertAllEqual(first_batches[i], second_batches[i])
-
-        expected = []
-        for f in range(self._num_files):
-          for r in range(self._num_records):
-            expected.extend([self._record(f, r)] * num_epochs)
-
-        for batches in (first_batches, second_batches):
-          actual = []
-          for b in batches:
-            actual.extend(b)
-          self.assertAllEqual(sorted(expected), sorted(actual))
+    dataset = readers.make_tf_record_dataset(
+        file_pattern=self.test_filenames,
+        num_epochs=num_epochs,
+        batch_size=batch_size,
+        num_parallel_reads=num_parallel_reads,
+        shuffle=True,
+        shuffle_seed=seed)
+
+    next_element = self.getNext(dataset)
+    first_batches = []
+    try:
+      while True:
+        first_batches.append(self.evaluate(next_element()))
+    except errors.OutOfRangeError:
+      pass
+
+    next_element = self.getNext(dataset)
+    second_batches = []
+    try:
+      while True:
+        second_batches.append(self.evaluate(next_element()))
+    except errors.OutOfRangeError:
+      pass
+
+    self.assertEqual(len(first_batches), len(second_batches))
+    if seed is not None:
+      # if you set a seed, should get the same results
+      for i in range(len(first_batches)):
+        self.assertAllEqual(first_batches[i], second_batches[i])
+
+    expected = []
+    for f in range(self._num_files):
+      for r in range(self._num_records):
+        expected.extend([self._record(f, r)] * num_epochs)
+
+    for batches in (first_batches, second_batches):
+      actual = []
+      for b in batches:
+        actual.extend(b)
+      self.assertAllEqual(sorted(expected), sorted(actual))
 
   def testShuffle(self):
     for batch_size in [1, 2]:
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index a8a65dde13..ceadebc541 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -25,6 +25,7 @@ import numpy as np
 from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -35,9 +36,11 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.named_parameters(
@@ -50,7 +53,6 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("ParallelCallsNUMA", 2, None, True),
       ("ParallelBatchesNUMA", None, 10, True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatch(self, num_parallel_calls, num_parallel_batches,
                       numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
@@ -60,74 +62,66 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
                   np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis],
                   np.array(37.0) * np.arange(7))
 
-    count = array_ops.placeholder(dtypes.int64, shape=[])
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-
     def _map_fn(x, y, z):
       return math_ops.square(x), math_ops.square(y), math_ops.square(z)
 
-    dataset = (
-        dataset_ops.Dataset.from_tensor_slices(components).repeat(count).apply(
-            batching.map_and_batch(
-                map_func=_map_fn,
-                batch_size=batch_size,
-                num_parallel_calls=num_parallel_calls,
-                num_parallel_batches=num_parallel_batches)))
-
-    if numa_aware:
-      options = dataset_ops.Options()
-      options.experimental_numa_aware = True
-      dataset = dataset.with_options(options)
-
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
+    def dataset_fn(batch_size, count, numa_aware=numa_aware):
+      dataset = dataset_ops.Dataset.from_tensor_slices(components).repeat(
+          count).apply(
+              batching.map_and_batch(
+                  map_func=_map_fn,
+                  batch_size=batch_size,
+                  num_parallel_calls=num_parallel_calls,
+                  num_parallel_batches=num_parallel_batches))
+      if numa_aware:
+        options = dataset_ops.Options()
+        options.experimental_numa_aware = True
+        dataset = dataset.with_options(options)
+      return dataset
+
+    # Batch of a finite input, where the batch_size divides the
+    # total number of elements.
+    dataset = dataset_fn(14, 28)
+    get_next = self.getNext(dataset)
     self.assertEqual([[None] + list(c.shape[1:]) for c in components],
-                     [t.shape.as_list() for t in get_next])
+                     [shape.as_list() for shape in dataset.output_shapes])
+    num_batches = (28 * 7) // 14
+    for i in range(num_batches):
+      result = self.evaluate(get_next())
+      for component, result_component in zip(components, result):
+        for j in range(14):
+          self.assertAllEqual(component[(i * 14 + j) % 7]**2,
+                              result_component[j])
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-    with self.cached_session() as sess:
-      # Batch of a finite input, where the batch_size divides the
-      # total number of elements.
-      sess.run(init_op, feed_dict={count: 28, batch_size: 14})
-      num_batches = (28 * 7) // 14
-      for i in range(num_batches):
-        result = self.evaluate(get_next)
-        for component, result_component in zip(components, result):
-          for j in range(14):
-            self.assertAllEqual(component[(i * 14 + j) % 7]**2,
-                                result_component[j])
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-
-      # Batch of a finite input, where the batch_size does not
-      # divide the total number of elements.
-      sess.run(init_op, feed_dict={count: 14, batch_size: 8})
-
-      # We expect (num_batches - 1) full-sized batches.
-      num_batches = int(math.ceil((14 * 7) / 8))
-      for i in range(num_batches - 1):
-        result = self.evaluate(get_next)
-        for component, result_component in zip(components, result):
-          for j in range(8):
-            self.assertAllEqual(component[(i * 8 + j) % 7]**2,
-                                result_component[j])
-      result = self.evaluate(get_next)
+    # Batch of a finite input, where the batch_size does not
+    # divide the total number of elements.
+    get_next = self.getNext(dataset_fn(8, 14))
+
+    # We expect (num_batches - 1) full-sized batches.
+    num_batches = int(math.ceil((14 * 7) / 8))
+    for i in range(num_batches - 1):
+      result = self.evaluate(get_next())
       for component, result_component in zip(components, result):
-        for j in range((14 * 7) % 8):
-          self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
+        for j in range(8):
+          self.assertAllEqual(component[(i * 8 + j) % 7]**2,
                               result_component[j])
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
 
-      # Batch of an empty input should fail straight away.
-      sess.run(init_op, feed_dict={count: 0, batch_size: 8})
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    result = self.evaluate(get_next())
+    for component, result_component in zip(components, result):
+      for j in range((14 * 7) % 8):
+        self.assertAllEqual(component[((num_batches - 1) * 8 + j) % 7]**2,
+                            result_component[j])
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-      # Empty batch should be an initialization time error.
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(init_op, feed_dict={count: 14, batch_size: 0})
+    # Batch of an empty input should fail straight away.
+    self.assertDatasetProduces(dataset_fn(8, 0), expected_output=[])
+
+    # Empty batch should be an initialization time error.
+    self.assertDatasetProduces(
+        dataset_fn(0, 14), expected_error=(errors.InvalidArgumentError, ""))
 
   @parameterized.named_parameters(
       ("Even", False, False),
@@ -135,7 +129,6 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("EvenNUMA", False, True),
       ("UnevenNUMA", True, True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchPartialBatch(self, drop_remainder, numa_aware):
     dataset = (
         dataset_ops.Dataset.range(10).apply(
@@ -148,26 +141,20 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
 
     if drop_remainder:
-      self.assertEqual([4, 1], iterator.output_shapes.as_list())
+      self.assertEqual([4, 1], dataset.output_shapes.as_list())
     else:
-      self.assertEqual([None, 1], iterator.output_shapes.as_list())
-    next_element = iterator.get_next()
-    with self.cached_session():
-      self.assertAllEqual([[0], [1], [4], [9]], self.evaluate(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], self.evaluate(next_element))
-      if not drop_remainder:
-        self.assertAllEqual([[64], [81]], self.evaluate(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+      self.assertEqual([None, 1], dataset.output_shapes.as_list())
+    expected_output = [[[0], [1], [4], [9]], [[16], [25], [36], [49]]]
+    if not drop_remainder:
+      expected_output.append([[64], [81]])
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
 
   @parameterized.named_parameters(
       ("Normal", False),
       ("NUMA", True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchYieldsPartialBatch(self, numa_aware):
     dataset = (
         dataset_ops.Dataset.range(10).apply(
@@ -177,16 +164,12 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
+    self.assertEqual([None, 1], dataset.output_shapes.as_list())
+    expected_output = [[[0], [1], [4], [9]], [[16], [25], [36], [49]],
+                       [[64], [81]]]
+    self.assertDatasetProduces(dataset, expected_output=expected_output)
 
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    self.assertEqual([None, 1], iterator.output_shapes.as_list())
-    next_element = iterator.get_next()
-    with self.cached_session():
-      self.assertAllEqual([[0], [1], [4], [9]], self.evaluate(next_element))
-      self.assertAllEqual([[16], [25], [36], [49]], self.evaluate(next_element))
-      self.assertAllEqual([[64], [81]], self.evaluate(next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(next_element)
+# TODO(b/117581999): eager expected not same as actual, debug.
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -200,27 +183,32 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
+
+    if context.executing_eagerly():
+      iterator = iter(dataset)
+      get_next = iterator._next_internal  # pylint: disable=protected-access
+    else:
+      iterator = dataset_ops.make_one_shot_iterator(dataset)
+      get_next = iterator.get_next
 
     elements = []
     for _ in range(100):
-      elements.append(iterator.get_next())
-    with self.cached_session():
-      for i in range(5):
-        got = self.evaluate(elements)
-        got.sort(key=lambda x: x[0])
-        expected = []
-        for j in range(100):
-          expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
-        self.assertAllEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(elements)
+      elements.append(get_next)
+
+    for i in range(5):
+      got = self.evaluate([element() for element in elements])
+      got.sort(key=lambda x: x[0])
+      expected = []
+      for j in range(100):
+        expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
+      self.assertAllEqual(got, expected)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate([element() for element in elements])
 
   @parameterized.named_parameters(
       ("Normal", False),
       ("NUMA", True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchParallelGetNextDropRemainder(self, numa_aware):
     dataset = dataset_ops.Dataset.range(49999).apply(
         batching.map_and_batch(
@@ -230,27 +218,32 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
+
+    if context.executing_eagerly():
+      iterator = iter(dataset)
+      get_next = iterator._next_internal  # pylint: disable=protected-access
+    else:
+      iterator = dataset_ops.make_one_shot_iterator(dataset)
+      get_next = iterator.get_next
 
     elements = []
     for _ in range(100):
-      elements.append(iterator.get_next())
-    with self.cached_session():
-      for i in range(4):
-        got = self.evaluate(elements)
-        got.sort(key=lambda x: x[0])
-        expected = []
-        for j in range(100):
-          expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
-        self.assertAllEqual(got, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(elements)
+      elements.append(get_next)
+
+    for i in range(4):
+      got = self.evaluate([element() for element in elements])
+      got.sort(key=lambda x: x[0])
+      expected = []
+      for j in range(100):
+        expected.append(range(i * 10000 + j * 100, i * 10000 + (j + 1) * 100))
+      self.assertAllEqual(got, expected)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate([element() for element in elements])
 
   @parameterized.named_parameters(
       ("Normal", False),
       ("NUMA", True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchSparse(self, numa_aware):
 
     def _sparse(i):
@@ -263,52 +256,39 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session():
-      self.evaluate(init_op)
-      for i in range(2):
-        actual = self.evaluate(get_next)
-        expected = sparse_tensor.SparseTensorValue(
-            indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
-            values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
-            dense_shape=[5, 1])
-        self.assertTrue(sparse_tensor.is_sparse(actual))
-        self.assertSparseValuesEqual(actual, expected)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+
+    self.assertDatasetProduces(
+        dataset,
+        expected_output=[
+            sparse_tensor.SparseTensorValue(
+                indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]],
+                values=[i * 5, i * 5 + 1, i * 5 + 2, i * 5 + 3, i * 5 + 4],
+                dense_shape=[5, 1]) for i in range(2)
+        ])
 
   @parameterized.named_parameters(
       ("Normal", False),
       ("NUMA", True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchFails(self, numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
-    dataset = dataset_ops.Dataset.from_tensors(
-        array_ops.check_numerics(
-            constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
-    batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-    dataset = dataset.apply(batching.map_and_batch(lambda x: x, batch_size))
-    if numa_aware:
-      options = dataset_ops.Options()
-      options.experimental_numa_aware = True
-      dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
 
-    init_op = iterator.initializer
-    with self.cached_session() as sess:
-      with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
-        sess.run(init_op, feed_dict={batch_size: 14})
+    with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"):
+      dataset = dataset_ops.Dataset.from_tensors(
+          array_ops.check_numerics(
+              constant_op.constant(1.0) / constant_op.constant(0.0), "oops"))
+      dataset = dataset.apply(batching.map_and_batch(lambda x: x, 14))
+      if numa_aware:
+        options = dataset_ops.Options()
+        options.experimental_numa_aware = True
+        dataset = dataset.with_options(options)
+      get_next = self.getNext(dataset)
+      self.evaluate(get_next())
 
   @parameterized.named_parameters(
       ("Normal", False),
       ("NUMA", True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchShapeMismatch(self, numa_aware):
     """Test a dataset that maps a TF function across its input elements."""
 
@@ -326,15 +306,10 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    with self.cached_session():
-      self.evaluate(init_op)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   "number of elements does not match"):
-        self.evaluate(get_next)
+    self.assertDatasetProduces(
+        dataset,
+        expected_error=(errors.InvalidArgumentError,
+                        "number of elements does not match"))
 
   @parameterized.named_parameters(
       ("Normal", False),
@@ -359,12 +334,9 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
-
-    with self.cached_session():
-      for _ in range(3):
-        self.evaluate(get_next)
+    get_next = self.getNext(dataset)
+    for _ in range(3):
+      self.evaluate(get_next())
 
   @parameterized.named_parameters(
       ("1", 0, False),
@@ -380,7 +352,6 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("5NUMA", 95, True),
       ("6NUMA", 99, True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchMapError(self, threshold, numa_aware):
 
     def raising_py_fn(i):
@@ -397,24 +368,22 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
 
-    with self.cached_session():
-      for i in range(threshold // 10):
-        self.assertAllEqual([i * 10 + j for j in range(10)],
-                            self.evaluate(get_next))
-      if numa_aware:
-        if threshold % 10 != 0:
-          self.assertAllEqual(
-              [threshold // 10 * 10 + j for j in range(threshold % 10)],
-              self.evaluate(get_next))
-      else:
-        for i in range(threshold // 10, 10):
-          with self.assertRaises(errors.InvalidArgumentError):
-            self.evaluate(get_next)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(dataset)
+    for i in range(threshold // 10):
+      self.assertAllEqual([i * 10 + j for j in range(10)],
+                          self.evaluate(get_next()))
+    if numa_aware:
+      if threshold % 10 != 0:
+        self.assertAllEqual(
+            [threshold // 10 * 10 + j for j in range(threshold % 10)],
+            self.evaluate(get_next()))
+    else:
+      for i in range(threshold // 10, 10):
+        with self.assertRaises(errors.InvalidArgumentError):
+          self.evaluate(get_next())
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   @parameterized.named_parameters(
       ("1", False, dtypes.bool, False),
@@ -453,13 +422,12 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
 
-    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
-
-    with self.cached_session():
-      for _ in range(10):
-        self.assertAllEqual([element for _ in range(10)],
-                            self.evaluate(get_next))
+    get_next = self.getNext(dataset)
+    for _ in range(10):
+      self.assertAllEqual([element for _ in range(10)],
+                          self.evaluate(get_next()))
 
+  # TODO(b/117581999): add eager coverage.
   @parameterized.named_parameters(
       ("Identity", None, lambda x: x, None),
       ("Replicate", None, lambda x: (x, x), None),
@@ -467,7 +435,7 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("Project", (None, None), lambda x, y: x, None),
   )
   @test_util.run_deprecated_v1
-  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
+  def testSkipEagerShortCircuit(self, structure, map_fn, num_parallel_calls):
     dataset = self.structuredDataset(structure).repeat().apply(
         batching.map_and_batch(map_fn, batch_size=10))
     get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
@@ -481,23 +449,18 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
             sess.run(self.structuredElement(structure, shape=[10])))
       self.assertAllEqual(expected, self.evaluate(get_next))
 
-  @test_util.run_deprecated_v1
   def testShortCircuitCapturedInput(self):
-    captured_t = array_ops.placeholder(dtypes.int64, shape=[])
+    captured_t = variables.Variable(42)
     dataset = self.structuredDataset(None).repeat().apply(
         batching.map_and_batch(lambda x: captured_t, batch_size=10))
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(iterator.initializer, feed_dict={captured_t: 42})
-      self.assertAllEqual([42] * 10, self.evaluate(get_next))
+    self.evaluate(variables.global_variables_initializer())
+    get_next = self.getNext(dataset, requires_initialization=True)
+    self.assertAllEqual([42] * 10, self.evaluate(get_next()))
 
   @parameterized.named_parameters(
       ("Normal", False),
       ("NUMA", True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchControlFlow(self, numa_aware):
 
     def map_fn(x):
@@ -513,19 +476,17 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       options = dataset_ops.Options()
       options.experimental_numa_aware = True
       dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
-    with self.cached_session():
-      for i in range(10):
-        if i < 5:
-          self.assertAllEqual([i * 10 + j + 1 for j in range(10)],
-                              self.evaluate(get_next))
-        else:
-          self.assertAllEqual(
-              [((i * 10) + j) * ((i * 10) + j) for j in range(10)],
-              self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(dataset)
+    for i in range(10):
+      if i < 5:
+        self.assertAllEqual([i * 10 + j + 1 for j in range(10)],
+                            self.evaluate(get_next()))
+      else:
+        self.assertAllEqual(
+            [((i * 10) + j) * ((i * 10) + j) for j in range(10)],
+            self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
index 113326c028..9d53531661 100644
--- a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
@@ -22,6 +22,7 @@ import math
 import threading
 import time
 
+import numpy as np
 from six.moves import zip_longest
 
 from tensorflow.python.data.experimental.ops import interleave_ops
@@ -30,24 +31,18 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.ops import array_ops
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class ParallelInterleaveTest(test_base.DatasetTestBase):
 
   def setUp(self):
 
-    self.input_values = array_ops.placeholder(dtypes.int64, shape=[None])
-    self.cycle_length = array_ops.placeholder(dtypes.int64, shape=[])
-    self.block_length = array_ops.placeholder(dtypes.int64, shape=[])
-    self.sloppy = array_ops.placeholder(dtypes.bool, shape=[])
-    self.buffer_output_elements = array_ops.placeholder(dtypes.int64, shape=[])
-    self.prefetch_input_elements = array_ops.placeholder(dtypes.int64, shape=[])
-
     self.error = None
     self.repeat_count = 2
 
@@ -61,6 +56,9 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       self.read_coordination_events[i] = threading.Semaphore(0)
       self.write_coordination_events[i] = threading.Event()
 
+  def dataset_fn(self, input_values, cycle_length, block_length, sloppy,
+                 buffer_output_elements, prefetch_input_elements):
+
     def map_py_fn(x):
       self.write_coordination_events[x].wait()
       self.write_coordination_events[x].clear()
@@ -79,16 +77,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       dataset = dataset.repeat(x)
       return dataset.map(map_fn)
 
-    self.dataset = (
-        dataset_ops.Dataset.from_tensor_slices(self.input_values)
-        .repeat(self.repeat_count).apply(
-            interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
-                                               self.block_length, self.sloppy,
-                                               self.buffer_output_elements,
-                                               self.prefetch_input_elements)))
-    self.iterator = dataset_ops.make_initializable_iterator(self.dataset)
-    self.init_op = self.iterator.initializer
-    self.next_element = self.iterator.get_next()
+    return dataset_ops.Dataset.from_tensor_slices(input_values).repeat(
+        self.repeat_count).apply(
+            interleave_ops.parallel_interleave(
+                interleave_fn, cycle_length, block_length, sloppy,
+                buffer_output_elements, prefetch_input_elements))
 
   def _interleave(self, lists, cycle_length, block_length):
     """Python implementation of interleave used for testing."""
@@ -178,26 +171,22 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
   def _testSingleThreaded(self, sloppy=False, prefetch_input_elements=0):
     # cycle_length=1,block_length=1 acts like `Dataset.interleave()` and
     # `Dataset.flat_map()` and is single-threaded. No synchronization required.
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 1,
-              self.block_length: 1,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: prefetch_input_elements,
-          })
-
-      for expected_element in self._interleave(
-          [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 1):
-        self.write_coordination_events[expected_element].set()
-        self.assertEqual(expected_element * expected_element,
-                         self.evaluate(self.next_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=1,
+            block_length=1,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=prefetch_input_elements))
+    for expected_element in self._interleave(
+        [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 1):
+      self.write_coordination_events[expected_element].set()
+      self.assertEqual(expected_element * expected_element,
+                       self.evaluate(next_element()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testSingleThreaded(self):
     self._testSingleThreaded()
@@ -213,64 +202,59 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
 
   def testSingleThreadedRagged(self):
     # Tests a sequence with wildly different elements per iterator.
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [3, 7, 4],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: False,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 1,
-          })
-
-      # Add coordination values for 3 and 7
-      self.read_coordination_events[3] = threading.Semaphore(0)
-      self.write_coordination_events[3] = threading.Event()
-      self.read_coordination_events[7] = threading.Semaphore(0)
-      self.write_coordination_events[7] = threading.Event()
-
-      for expected_element in self._interleave(
-          [[3] * 3, [7] * 7, [4] * 4] * self.repeat_count, 2, 1):
-        self.write_coordination_events[expected_element].set()
-        output = self.evaluate(self.next_element)
-        self.assertEqual(expected_element * expected_element, output)
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([3, 7, 4]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=False,
+            buffer_output_elements=1,
+            prefetch_input_elements=1))
+
+    # Add coordination values for 3 and 7
+    self.read_coordination_events[3] = threading.Semaphore(0)
+    self.write_coordination_events[3] = threading.Event()
+    self.read_coordination_events[7] = threading.Semaphore(0)
+    self.write_coordination_events[7] = threading.Event()
+
+    for expected_element in self._interleave(
+        [[3] * 3, [7] * 7, [4] * 4] * self.repeat_count, 2, 1):
+      self.write_coordination_events[expected_element].set()
+      output = self.evaluate(next_element())
+      self.assertEqual(expected_element * expected_element, output)
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def _testTwoThreadsNoContention(self, sloppy=False):
     # num_threads > 1.
     # Explicit coordination should result in `Dataset.interleave()` behavior
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      done_first_event = False
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 1,
-          })
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
-                           1)):
-        self.write_coordination_events[expected_element].set()
-        if done_first_event:  # First event starts the worker threads.
-          self.read_coordination_events[expected_element].acquire()
-        actual_element = self.evaluate(self.next_element)
-        if not done_first_event:
-          self.read_coordination_events[expected_element].acquire()
-          done_first_event = True
-        self.assertEqual(expected_element * expected_element, actual_element,
-                         "At index %s: %s expected, got: %s" %
-                         (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    done_first_event = False
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=1))
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                         1)):
+      self.write_coordination_events[expected_element].set()
+      if done_first_event:  # First event starts the worker threads.
+        self.read_coordination_events[expected_element].acquire()
+      actual_element = self.evaluate(next_element())
+      if not done_first_event:
+        self.read_coordination_events[expected_element].acquire()
+        done_first_event = True
+      self.assertEqual(
+          expected_element * expected_element, actual_element,
+          "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                 actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testTwoThreadsNoContention(self):
     self._testTwoThreadsNoContention()
@@ -287,38 +271,36 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     Args:
       sloppy: Whether to be sloppy or not.
     """
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      done_first_event = False
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 1,
-          })
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
-                           1)):
-        if done_first_event:  # First event starts the worker threads.
-          self._allow_all_map_threads()
-          self.read_coordination_events[expected_element].acquire()
-        else:
-          self.write_coordination_events[expected_element].set()
-        time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
-        actual_element = self.evaluate(self.next_element)
-        if not done_first_event:
-          done_first_event = True
-          self.assertTrue(
-              self.read_coordination_events[expected_element].acquire(False))
-        self.assertEqual(expected_element * expected_element, actual_element,
-                         "At index %s: %s expected, got: %s" %
-                         (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    done_first_event = False
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=1))
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                         1)):
+      if done_first_event:  # First event starts the worker threads.
+        self._allow_all_map_threads()
+        self.read_coordination_events[expected_element].acquire()
+      else:
+        self.write_coordination_events[expected_element].set()
+      time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
+      actual_element = self.evaluate(next_element())
+      if not done_first_event:
+        done_first_event = True
+        self.assertTrue(
+            self.read_coordination_events[expected_element].acquire(False))
+      self.assertEqual(
+          expected_element * expected_element, actual_element,
+          "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                 actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testTwoThreadsNoContentionWithRaces(self):
     self._testTwoThreadsNoContentionWithRaces()
@@ -329,34 +311,32 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
   def _testTwoThreadsNoContentionBlockLength(self, sloppy=False):
     # num_threads > 1.
     # Explicit coordination should result in `Dataset.interleave()` behavior
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      done_first_event = False
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 2,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 1,
-          })
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
-                           2)):
-        self.write_coordination_events[expected_element].set()
-        if done_first_event:  # First event starts the worker threads.
-          self.read_coordination_events[expected_element].acquire()
-        actual_element = self.evaluate(self.next_element)
-        if not done_first_event:
-          done_first_event = True
-          self.read_coordination_events[expected_element].acquire()
-        self.assertEqual(expected_element * expected_element, actual_element,
-                         "At index %s: %s expected, got: %s" %
-                         (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    done_first_event = False
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=2,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=1))
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                         2)):
+      self.write_coordination_events[expected_element].set()
+      if done_first_event:  # First event starts the worker threads.
+        self.read_coordination_events[expected_element].acquire()
+      actual_element = self.evaluate(next_element())
+      if not done_first_event:
+        done_first_event = True
+        self.read_coordination_events[expected_element].acquire()
+      self.assertEqual(
+          expected_element * expected_element, actual_element,
+          "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                 actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testTwoThreadsNoContentionBlockLength(self):
     self._testTwoThreadsNoContentionBlockLength()
@@ -374,38 +354,36 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     Args:
       sloppy: Whether to be sloppy or not.
     """
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      done_first_event = False
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 2,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 1,
-          })
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
-                           2)):
-        if done_first_event:  # First event starts the worker threads.
-          self._allow_all_map_threads()
-          self.read_coordination_events[expected_element].acquire()
-        else:
-          self.write_coordination_events[expected_element].set()
-        time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
-        actual_element = self.evaluate(self.next_element)
-        if not done_first_event:
-          done_first_event = True
-          self.assertTrue(
-              self.read_coordination_events[expected_element].acquire(False))
-        self.assertEqual(expected_element * expected_element, actual_element,
-                         "At index %s: %s expected, got: %s" %
-                         (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    done_first_event = False
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=2,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=1))
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                         2)):
+      if done_first_event:  # First event starts the worker threads.
+        self._allow_all_map_threads()
+        self.read_coordination_events[expected_element].acquire()
+      else:
+        self.write_coordination_events[expected_element].set()
+      time.sleep(0.5)  # Sleep to consistently "avoid" the race condition.
+      actual_element = self.evaluate(next_element())
+      if not done_first_event:
+        done_first_event = True
+        self.assertTrue(
+            self.read_coordination_events[expected_element].acquire(False))
+      self.assertEqual(
+          expected_element * expected_element, actual_element,
+          "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                 actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testTwoThreadsNoContentionWithRacesAndBlocking(self):
     self._testTwoThreadsNoContentionWithRacesAndBlocking()
@@ -414,21 +392,18 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     self._testTwoThreadsNoContentionWithRacesAndBlocking(sloppy=True)
 
   def _testEmptyInput(self, sloppy=False):
-    with self.cached_session() as sess:
-      # Empty input.
-      self._clear_coordination_events()
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [],
-              self.cycle_length: 2,
-              self.block_length: 3,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 0,
-          })
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    # Empty input.
+    self._clear_coordination_events()
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([]),
+            cycle_length=2,
+            block_length=3,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=0))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testEmptyInput(self):
     self._testEmptyInput()
@@ -438,20 +413,17 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
 
   def _testNonEmptyInputIntoEmptyOutputs(self, sloppy=False):
     # Non-empty input leading to empty output.
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [0, 0, 0],
-              self.cycle_length: 2,
-              self.block_length: 3,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 0,
-          })
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([0, 0, 0]),
+            cycle_length=2,
+            block_length=3,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=0))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testNonEmptyInputIntoEmptyOutputs(self):
     self._testNonEmptyInputIntoEmptyOutputs()
@@ -462,35 +434,33 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
   def _testPartiallyEmptyOutputs(self, sloppy=False, prefetch_input_elements=1):
     race_indices = {2, 8, 14}  # Sequence points when sloppy mode has race conds
     # Mixture of non-empty and empty interleaved datasets.
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      done_first_event = False
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 0, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: prefetch_input_elements,
-          })
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [], [6] * 6] * self.repeat_count, 2, 1)):
-        self.write_coordination_events[expected_element].set()
-        # First event starts the worker threads. Additionally, when running the
-        # sloppy case with prefetch_input_elements=0, we get stuck if we wait
-        # for the read coordination event for certain event orderings in the
-        # presence of finishing iterators.
-        if done_first_event and not (sloppy and (i in race_indices)):
-          self.read_coordination_events[expected_element].acquire()
-        actual_element = self.evaluate(self.next_element)
-        if not done_first_event or (sloppy and (i in race_indices)):
-          done_first_event = True
-          self.read_coordination_events[expected_element].acquire()
-        self.assertEqual(expected_element * expected_element, actual_element,
-                         "At index %s: %s expected, got: %s" %
-                         (i, expected_element, actual_element))
+    self._clear_coordination_events()
+    done_first_event = False
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 0, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=prefetch_input_elements))
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [], [6] * 6] * self.repeat_count, 2, 1)):
+      self.write_coordination_events[expected_element].set()
+      # First event starts the worker threads. Additionally, when running the
+      # sloppy case with prefetch_input_elements=0, we get stuck if we wait
+      # for the read coordination event for certain event orderings in the
+      # presence of finishing iterators.
+      if done_first_event and not (sloppy and (i in race_indices)):
+        self.read_coordination_events[expected_element].acquire()
+      actual_element = self.evaluate(next_element())
+      if not done_first_event or (sloppy and (i in race_indices)):
+        done_first_event = True
+        self.read_coordination_events[expected_element].acquire()
+      self.assertEqual(
+          expected_element * expected_element, actual_element,
+          "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                 actual_element))
 
   def testPartiallyEmptyOutputs(self):
     self._testPartiallyEmptyOutputs()
@@ -501,89 +471,81 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
   def testDelayedOutputSloppy(self):
     # Explicitly control the sequence of events to ensure we correctly avoid
     # head-of-line blocking.
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: True,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 0,
-          })
-
-      mis_ordering = [
-          4, 4, 5, 4, 5, 5, 4, 5, 6, 6, 6, 5, 4, 4, 6, 6, 4, 4, 6, 5, 6, 6, 6,
-          6, 5, 5, 5, 5, 6, 6
-      ]
-      for element in mis_ordering:
-        self.write_coordination_events[element].set()
-        self.assertEqual(element * element, self.evaluate(self.next_element))
-        self.assertTrue(self.read_coordination_events[element].acquire(False))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=True,
+            buffer_output_elements=1,
+            prefetch_input_elements=0))
+
+    mis_ordering = [
+        4, 4, 5, 4, 5, 5, 4, 5, 6, 6, 6, 5, 4, 4, 6, 6, 4, 4, 6, 5, 6, 6, 6, 6,
+        5, 5, 5, 5, 6, 6
+    ]
+    for element in mis_ordering:
+      self.write_coordination_events[element].set()
+      self.assertEqual(element * element, self.evaluate(next_element()))
+      self.assertTrue(self.read_coordination_events[element].acquire(False))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testBlockLengthWithContentionSloppy(self):
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      done_first_event = False
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: True,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 1,
-          })
-      # Test against a generating sequence that differs from the uncontended
-      # case, in order to prove sloppy correctness.
-      for i, expected_element in enumerate(
-          self._interleave(
-              [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count,
-              cycle_length=2,
-              block_length=3)):
-        self.write_coordination_events[expected_element].set()
-        if done_first_event:  # First event starts the worker threads.
-          self.read_coordination_events[expected_element].acquire()
-        actual_element = self.evaluate(self.next_element)
-        if not done_first_event:
-          self.read_coordination_events[expected_element].acquire()
-          done_first_event = True
-        self.assertEqual(expected_element * expected_element, actual_element,
-                         "At index %s: %s expected, got: %s" %
-                         (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    done_first_event = False
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=True,
+            buffer_output_elements=1,
+            prefetch_input_elements=1))
+    # Test against a generating sequence that differs from the uncontended
+    # case, in order to prove sloppy correctness.
+    for i, expected_element in enumerate(
+        self._interleave(
+            [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count,
+            cycle_length=2,
+            block_length=3)):
+      self.write_coordination_events[expected_element].set()
+      if done_first_event:  # First event starts the worker threads.
+        self.read_coordination_events[expected_element].acquire()
+      actual_element = self.evaluate(next_element())
+      if not done_first_event:
+        self.read_coordination_events[expected_element].acquire()
+        done_first_event = True
+      self.assertEqual(
+          expected_element * expected_element, actual_element,
+          "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                 actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def _testEarlyExit(self, sloppy=False):
     # Exiting without consuming all input should not block
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 3,
-              self.block_length: 2,
-              self.sloppy: sloppy,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 0,
-          })
-      for i in range(4, 7):
-        self.write_coordination_events[i].set()
-      elem = self.evaluate(self.next_element)  # Start all workers
-      # Allow the one successful worker to progress beyond the py_func again.
-      elem = int(math.sqrt(elem))
-      self.write_coordination_events[elem].set()
-      self.read_coordination_events[elem].acquire()
-      # Allow the prefetch to succeed
-      for i in range(4, 7):
-        self.read_coordination_events[i].acquire()
-        self.write_coordination_events[i].set()
+    self._clear_coordination_events()
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=3,
+            block_length=2,
+            sloppy=sloppy,
+            buffer_output_elements=1,
+            prefetch_input_elements=0))
+    for i in range(4, 7):
+      self.write_coordination_events[i].set()
+    elem = self.evaluate(next_element())  # Start all workers
+    # Allow the one successful worker to progress beyond the py_func again.
+    elem = int(math.sqrt(elem))
+    self.write_coordination_events[elem].set()
+    self.read_coordination_events[elem].acquire()
+    # Allow the prefetch to succeed
+    for i in range(4, 7):
+      self.read_coordination_events[i].acquire()
+      self.write_coordination_events[i].set()
 
   def testEarlyExit(self):
     self._testEarlyExit()
@@ -603,12 +565,10 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     dataset = dataset.apply(
         interleave_ops.parallel_interleave(
             interleave_fn, cycle_length=16, block_length=2, sloppy=sloppy))
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-
-    with self.cached_session() as sess:
-      output_values = []
-      for _ in range(30):
-        output_values.append(self.evaluate(iterator.get_next()))
+    get_next = self.getNext(dataset)
+    output_values = []
+    for _ in range(30):
+      output_values.append(self.evaluate(get_next()))
 
     expected_values = self._interleave(
         [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 2)
@@ -629,53 +589,47 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       return dataset_ops.Dataset.from_tensor_slices(
           sparse_ops.sparse_to_dense(x.indices, x.dense_shape, x.values))
 
-    dataset = dataset_ops.Dataset.range(10).map(_map_fn)
-    iterator = dataset_ops.make_initializable_iterator(dataset.apply(
-        interleave_ops.parallel_interleave(_interleave_fn, cycle_length=1)))
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-
-    with self.cached_session() as sess:
-      self.evaluate(init_op)
-      for i in range(10):
-        for j in range(2):
-          expected = [i, 0] if j % 2 == 0 else [0, -i]
-          self.assertAllEqual(expected, self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    dataset = dataset_ops.Dataset.range(10).map(_map_fn).apply(
+        interleave_ops.parallel_interleave(_interleave_fn, cycle_length=1))
+    get_next = self.getNext(dataset)
+
+    for i in range(10):
+      for j in range(2):
+        expected = [i, 0] if j % 2 == 0 else [0, -i]
+        self.assertAllEqual(expected, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   def testErrorsInOutputFn(self):
-    with self.cached_session() as sess:
-      self._clear_coordination_events()
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: False,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 0,
-          })
-
-      except_on_element_indices = set([3])
-
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
-                           1)):
-        if i in except_on_element_indices:
-          self.error = ValueError()
-          self.write_coordination_events[expected_element].set()
-          with self.assertRaises(errors.InvalidArgumentError):
-            self.evaluate(self.next_element)
-        else:
-          self.write_coordination_events[expected_element].set()
-          actual_element = self.evaluate(self.next_element)
-          self.assertEqual(expected_element * expected_element, actual_element,
-                           "At index %s: %s expected, got: %s" %
-                           (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    self._clear_coordination_events()
+    next_element = self.getNext(
+        self.dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=False,
+            buffer_output_elements=1,
+            prefetch_input_elements=0))
+
+    except_on_element_indices = set([3])
+
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 2,
+                         1)):
+      if i in except_on_element_indices:
+        self.error = ValueError()
+        self.write_coordination_events[expected_element].set()
+        with self.assertRaises(errors.InvalidArgumentError):
+          self.evaluate(next_element())
+      else:
+        self.write_coordination_events[expected_element].set()
+        actual_element = self.evaluate(next_element())
+        self.assertEqual(
+            expected_element * expected_element, actual_element,
+            "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                   actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testErrorsInInputFn(self):
 
@@ -692,41 +646,35 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       dataset = dataset.repeat(x)
       return dataset
 
-    self.dataset = (
-        dataset_ops.Dataset.from_tensor_slices(self.input_values).map(map_fn)
-        .repeat(self.repeat_count).apply(
-            interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
-                                               self.block_length, self.sloppy,
-                                               self.buffer_output_elements,
-                                               self.prefetch_input_elements)))
-
-    self.iterator = dataset_ops.make_initializable_iterator(self.dataset)
-    self.init_op = self.iterator.initializer
-    self.next_element = self.iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: False,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 0,
-          })
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
-        if expected_element == 5:
-          with self.assertRaises(errors.InvalidArgumentError):
-            self.evaluate(self.next_element)
-        else:
-          actual_element = self.evaluate(self.next_element)
-          self.assertEqual(expected_element, actual_element,
-                           "At index %s: %s expected, got: %s" %
-                           (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    def dataset_fn(input_values, cycle_length, block_length, sloppy,
+                   buffer_output_elements, prefetch_input_elements):
+      return dataset_ops.Dataset.from_tensor_slices(input_values).map(
+          map_fn).repeat(self.repeat_count).apply(
+              interleave_ops.parallel_interleave(
+                  interleave_fn, cycle_length, block_length, sloppy,
+                  buffer_output_elements, prefetch_input_elements))
+
+    next_element = self.getNext(
+        dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=False,
+            buffer_output_elements=1,
+            prefetch_input_elements=0))
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
+      if expected_element == 5:
+        with self.assertRaises(errors.InvalidArgumentError):
+          self.evaluate(next_element())
+      else:
+        actual_element = self.evaluate(next_element())
+        self.assertEqual(
+            expected_element, actual_element,
+            "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                   actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testErrorsInInterleaveFn(self):
 
@@ -741,41 +689,35 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       dataset = dataset.repeat(y)
       return dataset
 
-    self.dataset = (
-        dataset_ops.Dataset.from_tensor_slices(self.input_values)
-        .repeat(self.repeat_count).apply(
-            interleave_ops.parallel_interleave(interleave_fn, self.cycle_length,
-                                               self.block_length, self.sloppy,
-                                               self.buffer_output_elements,
-                                               self.prefetch_input_elements)))
-
-    self.iterator = dataset_ops.make_initializable_iterator(self.dataset)
-    self.init_op = self.iterator.initializer
-    self.next_element = self.iterator.get_next()
-
-    with self.cached_session() as sess:
-      sess.run(
-          self.init_op,
-          feed_dict={
-              self.input_values: [4, 5, 6],
-              self.cycle_length: 2,
-              self.block_length: 1,
-              self.sloppy: False,
-              self.buffer_output_elements: 1,
-              self.prefetch_input_elements: 0,
-          })
-      for i, expected_element in enumerate(
-          self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
-        if expected_element == 5:
-          with self.assertRaises(errors.InvalidArgumentError):
-            self.evaluate(self.next_element)
-        else:
-          actual_element = self.evaluate(self.next_element)
-          self.assertEqual(expected_element, actual_element,
-                           "At index %s: %s expected, got: %s" %
-                           (i, expected_element, actual_element))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(self.next_element)
+    def dataset_fn(input_values, cycle_length, block_length, sloppy,
+                   buffer_output_elements, prefetch_input_elements):
+      return dataset_ops.Dataset.from_tensor_slices(input_values).repeat(
+          self.repeat_count).apply(
+              interleave_ops.parallel_interleave(
+                  interleave_fn, cycle_length, block_length, sloppy,
+                  buffer_output_elements, prefetch_input_elements))
+
+    next_element = self.getNext(
+        dataset_fn(
+            input_values=np.int64([4, 5, 6]),
+            cycle_length=2,
+            block_length=1,
+            sloppy=False,
+            buffer_output_elements=1,
+            prefetch_input_elements=0))
+    for i, expected_element in enumerate(
+        self._interleave([[4] * 4, [5], [6] * 6] * self.repeat_count, 2, 1)):
+      if expected_element == 5:
+        with self.assertRaises(errors.InvalidArgumentError):
+          self.evaluate(next_element())
+      else:
+        actual_element = self.evaluate(next_element())
+        self.assertEqual(
+            expected_element, actual_element,
+            "At index %s: %s expected, got: %s" % (i, expected_element,
+                                                   actual_element))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(next_element())
 
   def testShutdownRace(self):
     dataset = dataset_ops.Dataset.range(20)
@@ -788,21 +730,17 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
             buffer_output_elements=1,
             prefetch_input_elements=0))
     dataset = dataset.batch(32)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    next_element = iterator.get_next()
 
     results = []
-    with self.cached_session() as sess:
-      for _ in range(2):
-        elements = []
-        self.evaluate(iterator.initializer)
-        try:
-          while True:
-            elements.extend(self.evaluate(next_element))
-        except errors.OutOfRangeError:
-          pass
-        results.append(elements)
-
+    for _ in range(2):
+      elements = []
+      next_element = self.getNext(dataset)
+      try:
+        while True:
+          elements.extend(self.evaluate(next_element()))
+      except errors.OutOfRangeError:
+        pass
+      results.append(elements)
     self.assertAllEqual(results[0], results[1])
 
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
index 77df8310d4..f36f94c02f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/reader_dataset_ops_test_base.py
@@ -26,12 +26,9 @@ from tensorflow.core.example import example_pb2
 from tensorflow.core.example import feature_pb2
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.ops import readers as core_readers
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.lib.io import python_io
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.util import compat
 
@@ -150,26 +147,25 @@ class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase):
       writer.close()
     return filenames
 
-  def _run_actual_batch(self, outputs, sess, label_key_provided=False):
+  def _run_actual_batch(self, outputs, label_key_provided=False):
     if label_key_provided:
       # outputs would be a tuple of (feature dict, label)
-      label_op = outputs[1]
-      features_op = outputs[0]
+      features, label = self.evaluate(outputs())
     else:
-      features_op = outputs
-      label_op = features_op["label"]
-    file_op = features_op["file"]
-    keywords_indices_op = features_op["keywords"].indices
-    keywords_values_op = features_op["keywords"].values
-    keywords_dense_shape_op = features_op["keywords"].dense_shape
-    record_op = features_op["record"]
-    return sess.run([
-        file_op, keywords_indices_op, keywords_values_op,
-        keywords_dense_shape_op, record_op, label_op
+      features = self.evaluate(outputs())
+      label = features["label"]
+    file_out = features["file"]
+    keywords_indices = features["keywords"].indices
+    keywords_values = features["keywords"].values
+    keywords_dense_shape = features["keywords"].dense_shape
+    record = features["record"]
+    return ([
+        file_out, keywords_indices, keywords_values, keywords_dense_shape,
+        record, label
     ])
 
-  def _next_actual_batch(self, sess, label_key_provided=False):
-    return self._run_actual_batch(self.outputs, sess, label_key_provided)
+  def _next_actual_batch(self, label_key_provided=False):
+    return self._run_actual_batch(self.outputs, label_key_provided)
 
   def _interleave(self, iterators, cycle_length):
     pending_iterators = iterators
@@ -251,7 +247,6 @@ class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase):
       ]
 
   def verify_records(self,
-                     sess,
                      batch_size,
                      file_index=None,
                      num_epochs=1,
@@ -268,7 +263,7 @@ class MakeBatchedFeaturesDatasetTestBase(test_base.DatasetTestBase):
         num_epochs,
         cycle_length=interleave_cycle_length):
       actual_batch = self._next_actual_batch(
-          sess, label_key_provided=label_key_provided)
+          label_key_provided=label_key_provided)
       for i in range(len(expected_batch)):
         self.assertAllEqual(expected_batch[i], actual_batch[i])
 
@@ -323,21 +318,6 @@ class TFRecordDatasetTestBase(test_base.DatasetTestBase):
 
     self.test_filenames = self._createFiles()
 
-    self.filenames = array_ops.placeholder(dtypes.string, shape=[None])
-    self.num_epochs = array_ops.placeholder_with_default(
-        constant_op.constant(1, dtypes.int64), shape=[])
-    self.compression_type = array_ops.placeholder_with_default("", shape=[])
-    self.batch_size = array_ops.placeholder(dtypes.int64, shape=[])
-
-    repeat_dataset = core_readers.TFRecordDataset(
-        self.filenames, self.compression_type).repeat(self.num_epochs)
-    batch_dataset = repeat_dataset.batch(self.batch_size)
-
-    iterator = iterator_ops.Iterator.from_structure(batch_dataset.output_types)
-    self.init_op = iterator.make_initializer(repeat_dataset)
-    self.init_batch_op = iterator.make_initializer(batch_dataset)
-    self.get_next = iterator.get_next()
-
   def _record(self, f, r):
     return compat.as_bytes("Record %d of file %d" % (r, f))
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
index eb66927ee5..fd96c0b521 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
@@ -21,574 +21,454 @@ from __future__ import print_function
 from tensorflow.python.data.experimental.kernel_tests import sql_dataset_test_base
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
+@test_util.run_all_in_graph_and_eager_modes
 class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that SqlDataset can read from a database table.
   def testReadResultSet(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string), 2)
-    with self.cached_session() as sess:
-      for _ in range(2):  # Run twice to verify statelessness of db operations.
-        sess.run(
-            init_op,
-            feed_dict={
-                self.query: "SELECT first_name, last_name, motto FROM students "
-                            "ORDER BY first_name DESC"
-            })
-        for _ in range(2):  # Dataset is repeated. See setUp.
-          self.assertEqual((b"John", b"Doe", b"Hi!"), self.evaluate(get_next))
-          self.assertEqual((b"Jane", b"Moe", b"Hi again!"),
-                           self.evaluate(get_next))
-        with self.assertRaises(errors.OutOfRangeError):
-          self.evaluate(get_next)
+    for _ in range(2):  # Run twice to verify statelessness of db operations.
+      dataset = self._createSqlDataset(
+          query="SELECT first_name, last_name, motto FROM students "
+          "ORDER BY first_name DESC",
+          output_types=(dtypes.string, dtypes.string, dtypes.string),
+          num_repeats=2)
+      self.assertDatasetProduces(
+          dataset,
+          expected_output=[(b"John", b"Doe", b"Hi!"),
+                           (b"Jane", b"Moe", b"Hi again!")] * 2,
+          num_test_iterations=2)
 
   # Test that SqlDataset works on a join query.
   def testReadResultSetJoinQuery(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT students.first_name, state, motto FROM students "
-                  "INNER JOIN people "
-                  "ON students.first_name = people.first_name "
-                  "AND students.last_name = people.last_name"
-          })
-      self.assertEqual((b"John", b"California", b"Hi!"),
-                       self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT students.first_name, state, motto FROM students "
+            "INNER JOIN people "
+            "ON students.first_name = people.first_name "
+            "AND students.last_name = people.last_name",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+
+    self.assertEqual((b"John", b"California", b"Hi!"),
+                     self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that SqlDataset can read a database entry with a null-terminator
   # in the middle of the text and place the entry in a `string` tensor.
   def testReadResultSetNullTerminator(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT first_name, last_name, favorite_nonsense_word "
-                  "FROM students ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", b"Doe", b"n\0nsense"), self.evaluate(get_next))
-      self.assertEqual((b"Jane", b"Moe", b"nonsense\0"),
-                       self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, favorite_nonsense_word "
+            "FROM students ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+
+    self.assertEqual((b"John", b"Doe", b"n\0nsense"), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", b"Moe", b"nonsense\0"),
+                     self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that SqlDataset works when used on two different queries.
   # Because the output types of the dataset must be determined at graph-creation
   # time, the two queries must have the same number and types of columns.
   def testReadResultSetReuseSqlDataset(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, last_name, motto FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", b"Doe", b"Hi!"), self.evaluate(get_next))
-      self.assertEqual((b"Jane", b"Moe", b"Hi again!"), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, last_name, state FROM people "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", b"Doe", b"California"),
-                       self.evaluate(get_next))
-      self.assertEqual((b"Benjamin", b"Franklin", b"Pennsylvania"),
-                       self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, motto FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+    self.assertEqual((b"John", b"Doe", b"Hi!"), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", b"Moe", b"Hi again!"), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, state FROM people "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+    self.assertEqual((b"John", b"Doe", b"California"),
+                     self.evaluate(get_next()))
+    self.assertEqual((b"Benjamin", b"Franklin", b"Pennsylvania"),
+                     self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that an `OutOfRangeError` is raised on the first call to
   # `get_next_str_only` if result set is empty.
   def testReadEmptyResultSet(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, last_name, motto FROM students "
-                          "WHERE first_name = 'Nonexistent'"
-          })
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, motto FROM students "
+            "WHERE first_name = 'Nonexistent'",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that an error is raised when `driver_name` is invalid.
   def testReadResultSetWithInvalidDriverName(self):
-    init_op = self._createSqlDataset((dtypes.string, dtypes.string,
-                                      dtypes.string))[0]
-    with self.cached_session() as sess:
-      with self.assertRaises(errors.InvalidArgumentError):
-        sess.run(
-            init_op,
-            feed_dict={
-                self.driver_name: "sqlfake",
-                self.query: "SELECT first_name, last_name, motto FROM students "
-                            "ORDER BY first_name DESC"
-            })
+    dataset = self._createSqlDataset(
+        driver_name="sqlfake",
+        query="SELECT first_name, last_name, motto FROM students "
+        "ORDER BY first_name DESC",
+        output_types=(dtypes.string, dtypes.string, dtypes.string))
+    self.assertDatasetProduces(
+        dataset, expected_error=(errors.InvalidArgumentError, ""))
 
   # Test that an error is raised when a column name in `query` is nonexistent
   def testReadResultSetWithInvalidColumnName(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT first_name, last_name, fake_column FROM students "
-                  "ORDER BY first_name DESC"
-          })
-      with self.assertRaises(errors.UnknownError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, fake_column FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+    with self.assertRaises(errors.UnknownError):
+      self.evaluate(get_next())
 
   # Test that an error is raised when there is a syntax error in `query`.
   def testReadResultSetOfQueryWithSyntaxError(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELEmispellECT first_name, last_name, motto FROM students "
-                  "ORDER BY first_name DESC"
-          })
-      with self.assertRaises(errors.UnknownError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELEmispellECT first_name, last_name, motto FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+    with self.assertRaises(errors.UnknownError):
+      self.evaluate(get_next())
 
   # Test that an error is raised when the number of columns in `query`
-  # does not match the length of `output_types`.
+  # does not match the length of `, output_types`.
   def testReadResultSetWithMismatchBetweenColumnsAndOutputTypes(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, last_name FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      with self.assertRaises(errors.InvalidArgumentError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(get_next())
 
   # Test that no results are returned when `query` is an insert query rather
   # than a select query. In particular, the error refers to the number of
   # output types passed to the op not matching the number of columns in the
   # result set of the query (namely, 0 for an insert statement.)
   def testReadResultSetOfInsertQuery(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.string))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "INSERT INTO students (first_name, last_name, motto) "
-                  "VALUES ('Foo', 'Bar', 'Baz'), ('Fizz', 'Buzz', 'Fizzbuzz')"
-          })
-      with self.assertRaises(errors.InvalidArgumentError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="INSERT INTO students (first_name, last_name, motto) "
+            "VALUES ('Foo', 'Bar', 'Baz'), ('Fizz', 'Buzz', 'Fizzbuzz')",
+            output_types=(dtypes.string, dtypes.string, dtypes.string)))
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int8` tensor.
   def testReadResultSetInt8(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int8))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, desk_number FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 9), self.evaluate(get_next))
-      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, desk_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int8)))
+    self.assertEqual((b"John", 9), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", 127), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int8` tensor.
   def testReadResultSetInt8NegativeAndZero(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int8,
-                                                dtypes.int8))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, income, favorite_negative_number "
-                          "FROM students "
-                          "WHERE first_name = 'John' ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 0, -2), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, income, favorite_negative_number "
+            "FROM students "
+            "WHERE first_name = 'John' ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int8, dtypes.int8)))
+    self.assertEqual((b"John", 0, -2), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int8` tensor.
   def testReadResultSetInt8MaxValues(self):
-    init_op, get_next = self._createSqlDataset((dtypes.int8, dtypes.int8))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT desk_number, favorite_negative_number FROM students "
-                  "ORDER BY first_name DESC"
-          })
-      self.assertEqual((9, -2), self.evaluate(get_next))
-      # Max and min values of int8
-      self.assertEqual((127, -128), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT desk_number, favorite_negative_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.int8, dtypes.int8)))
+    self.assertEqual((9, -2), self.evaluate(get_next()))
+    # Max and min values of int8
+    self.assertEqual((127, -128), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int16` tensor.
   def testReadResultSetInt16(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int16))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, desk_number FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 9), self.evaluate(get_next))
-      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, desk_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int16)))
+    self.assertEqual((b"John", 9), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", 127), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int16` tensor.
   def testReadResultSetInt16NegativeAndZero(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int16,
-                                                dtypes.int16))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, income, favorite_negative_number "
-                          "FROM students "
-                          "WHERE first_name = 'John' ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 0, -2), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, income, favorite_negative_number "
+            "FROM students "
+            "WHERE first_name = 'John' ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int16, dtypes.int16)))
+    self.assertEqual((b"John", 0, -2), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int16` tensor.
   def testReadResultSetInt16MaxValues(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int16))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, favorite_medium_sized_number "
-                          "FROM students ORDER BY first_name DESC"
-          })
-      # Max value of int16
-      self.assertEqual((b"John", 32767), self.evaluate(get_next))
-      # Min value of int16
-      self.assertEqual((b"Jane", -32768), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, favorite_medium_sized_number "
+            "FROM students ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int16)))
+    # Max value of int16
+    self.assertEqual((b"John", 32767), self.evaluate(get_next()))
+    # Min value of int16
+    self.assertEqual((b"Jane", -32768), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int32` tensor.
   def testReadResultSetInt32(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int32))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, desk_number FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 9), self.evaluate(get_next))
-      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, desk_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int32)))
+    self.assertEqual((b"John", 9), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", 127), self.evaluate(get_next()))
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int32` tensor.
   def testReadResultSetInt32NegativeAndZero(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int32))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, income FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 0), self.evaluate(get_next))
-      self.assertEqual((b"Jane", -20000), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, income FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int32)))
+    self.assertEqual((b"John", 0), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", -20000), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int32` tensor.
   def testReadResultSetInt32MaxValues(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int32))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, favorite_number FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      # Max value of int32
-      self.assertEqual((b"John", 2147483647), self.evaluate(get_next))
-      # Min value of int32
-      self.assertEqual((b"Jane", -2147483648), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, favorite_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int32)))
+    # Max value of int32
+    self.assertEqual((b"John", 2147483647), self.evaluate(get_next()))
+    # Min value of int32
+    self.assertEqual((b"Jane", -2147483648), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a numeric `varchar` from a SQLite database
   # table and place it in an `int32` tensor.
   def testReadResultSetInt32VarCharColumnAsInt(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int32))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, school_id FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 123), self.evaluate(get_next))
-      self.assertEqual((b"Jane", 1000), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, school_id FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int32)))
+    self.assertEqual((b"John", 123), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", 1000), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read an integer from a SQLite database table
   # and place it in an `int64` tensor.
   def testReadResultSetInt64(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int64))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, desk_number FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 9), self.evaluate(get_next))
-      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, desk_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int64)))
+    self.assertEqual((b"John", 9), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", 127), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int64` tensor.
   def testReadResultSetInt64NegativeAndZero(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int64))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, income FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 0), self.evaluate(get_next))
-      self.assertEqual((b"Jane", -20000), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, income FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int64)))
+    self.assertEqual((b"John", 0), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", -20000), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int64` tensor.
   def testReadResultSetInt64MaxValues(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.int64))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT first_name, favorite_big_number FROM students "
-                  "ORDER BY first_name DESC"
-          })
-      # Max value of int64
-      self.assertEqual((b"John", 9223372036854775807), self.evaluate(get_next))
-      # Min value of int64
-      self.assertEqual((b"Jane", -9223372036854775808), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, favorite_big_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.int64)))
+    # Max value of int64
+    self.assertEqual((b"John", 9223372036854775807), self.evaluate(get_next()))
+    # Min value of int64
+    self.assertEqual((b"Jane", -9223372036854775808), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in a `uint8` tensor.
   def testReadResultSetUInt8(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.uint8))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, desk_number FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 9), self.evaluate(get_next))
-      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, desk_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.uint8)))
+    self.assertEqual((b"John", 9), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", 127), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read the minimum and maximum uint8 values from a
   # SQLite database table and place them in `uint8` tensors.
   def testReadResultSetUInt8MinAndMaxValues(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.uint8))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, brownie_points FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      # Min value of uint8
-      self.assertEqual((b"John", 0), self.evaluate(get_next))
-      # Max value of uint8
-      self.assertEqual((b"Jane", 255), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, brownie_points FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.uint8)))
+    # Min value of uint8
+    self.assertEqual((b"John", 0), self.evaluate(get_next()))
+    # Max value of uint8
+    self.assertEqual((b"Jane", 255), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read an integer from a SQLite database table
   # and place it in a `uint16` tensor.
   def testReadResultSetUInt16(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.uint16))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, desk_number FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", 9), self.evaluate(get_next))
-      self.assertEqual((b"Jane", 127), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, desk_number FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.uint16)))
+    self.assertEqual((b"John", 9), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", 127), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read the minimum and maximum uint16 values from a
   # SQLite database table and place them in `uint16` tensors.
   def testReadResultSetUInt16MinAndMaxValues(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.uint16))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, account_balance FROM students "
-                          "ORDER BY first_name DESC"
-          })
-      # Min value of uint16
-      self.assertEqual((b"John", 0), self.evaluate(get_next))
-      # Max value of uint16
-      self.assertEqual((b"Jane", 65535), self.evaluate(get_next))
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, account_balance FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.uint16)))
+    # Min value of uint16
+    self.assertEqual((b"John", 0), self.evaluate(get_next()))
+    # Max value of uint16
+    self.assertEqual((b"Jane", 65535), self.evaluate(get_next()))
     with self.assertRaises(errors.OutOfRangeError):
-      self.evaluate(get_next)
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a 0-valued and 1-valued integer from a
   # SQLite database table and place them as `True` and `False` respectively
   # in `bool` tensors.
   def testReadResultSetBool(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.bool))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT first_name, registration_complete FROM students "
-                  "ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", True), self.evaluate(get_next))
-      self.assertEqual((b"Jane", False), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, registration_complete FROM students "
+            "ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.bool)))
+    self.assertEqual((b"John", True), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", False), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read an integer that is not 0-valued or 1-valued
   # from a SQLite database table and place it as `True` in a `bool` tensor.
   def testReadResultSetBoolNotZeroOrOne(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.bool))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query: "SELECT first_name, favorite_medium_sized_number "
-                          "FROM students ORDER BY first_name DESC"
-          })
-      self.assertEqual((b"John", True), self.evaluate(get_next))
-      self.assertEqual((b"Jane", True), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, favorite_medium_sized_number "
+            "FROM students ORDER BY first_name DESC",
+            output_types=(dtypes.string, dtypes.bool)))
+    self.assertEqual((b"John", True), self.evaluate(get_next()))
+    self.assertEqual((b"Jane", True), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a float from a SQLite database table
   # and place it in a `float64` tensor.
   def testReadResultSetFloat64(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.float64))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT first_name, last_name, victories FROM townspeople "
-                  "ORDER BY first_name"
-          })
-      self.assertEqual((b"George", b"Washington", 20.0),
-                       self.evaluate(get_next))
-      self.assertEqual((b"John", b"Adams", -19.95), self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, victories FROM townspeople "
+            "ORDER BY first_name",
+            output_types=(dtypes.string, dtypes.string, dtypes.float64)))
+    self.assertEqual((b"George", b"Washington", 20.0),
+                     self.evaluate(get_next()))
+    self.assertEqual((b"John", b"Adams", -19.95), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a float from a SQLite database table beyond
   # the precision of 64-bit IEEE, without throwing an error. Test that
   # `SqlDataset` identifies such a value as equal to itself.
   def testReadResultSetFloat64OverlyPrecise(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.float64))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT first_name, last_name, accolades FROM townspeople "
-                  "ORDER BY first_name"
-          })
-      self.assertEqual(
-          (b"George", b"Washington",
-           1331241.321342132321324589798264627463827647382647382643874),
-          self.evaluate(get_next))
-      self.assertEqual(
-          (b"John", b"Adams",
-           1331241321342132321324589798264627463827647382647382643874.0),
-          self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, accolades FROM townspeople "
+            "ORDER BY first_name",
+            output_types=(dtypes.string, dtypes.string, dtypes.float64)))
+    self.assertEqual(
+        (b"George", b"Washington",
+         1331241.321342132321324589798264627463827647382647382643874),
+        self.evaluate(get_next()))
+    self.assertEqual(
+        (b"John", b"Adams",
+         1331241321342132321324589798264627463827647382647382643874.0),
+        self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   # Test that `SqlDataset` can read a float from a SQLite database table,
   # representing the largest integer representable as a 64-bit IEEE float
   # such that the previous integer is also representable as a 64-bit IEEE float.
   # Test that `SqlDataset` can distinguish these two numbers.
   def testReadResultSetFloat64LargestConsecutiveWholeNumbersNotEqual(self):
-    init_op, get_next = self._createSqlDataset((dtypes.string, dtypes.string,
-                                                dtypes.float64))
-    with self.cached_session() as sess:
-      sess.run(
-          init_op,
-          feed_dict={
-              self.query:
-                  "SELECT first_name, last_name, triumphs FROM townspeople "
-                  "ORDER BY first_name"
-          })
-      self.assertNotEqual((b"George", b"Washington", 9007199254740992.0),
-                          self.evaluate(get_next))
-      self.assertNotEqual((b"John", b"Adams", 9007199254740991.0),
-                          self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        self.evaluate(get_next)
+    get_next = self.getNext(
+        self._createSqlDataset(
+            query="SELECT first_name, last_name, triumphs FROM townspeople "
+            "ORDER BY first_name",
+            output_types=(dtypes.string, dtypes.string, dtypes.float64)))
+    self.assertNotEqual((b"George", b"Washington", 9007199254740992.0),
+                        self.evaluate(get_next()))
+    self.assertNotEqual((b"John", b"Adams", 9007199254740991.0),
+                        self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
index 809e09c804..90451b865f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test_base.py
@@ -24,28 +24,23 @@ import sqlite3
 
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
 class SqlDatasetTestBase(test_base.DatasetTestBase):
   """Base class for setting up and testing SqlDataset."""
 
-  def _createSqlDataset(self, output_types, num_repeats=1):
-    dataset = readers.SqlDataset(self.driver_name, self.data_source_name,
-                                 self.query, output_types).repeat(num_repeats)
-    iterator = dataset_ops.make_initializable_iterator(dataset)
-    init_op = iterator.initializer
-    get_next = iterator.get_next()
-    return init_op, get_next
+  def _createSqlDataset(self,
+                        query,
+                        output_types,
+                        driver_name="sqlite",
+                        num_repeats=1):
+    dataset = readers.SqlDataset(driver_name, self.data_source_name, query,
+                                 output_types).repeat(num_repeats)
+    return dataset
 
   def setUp(self):
     self.data_source_name = os.path.join(test.get_temp_dir(), "tftest.sqlite")
-    self.driver_name = array_ops.placeholder_with_default(
-        array_ops.constant("sqlite", dtypes.string), shape=[])
-    self.query = array_ops.placeholder(dtypes.string, shape=[])
 
     conn = sqlite3.connect(self.data_source_name)
     c = conn.cursor()
diff --git a/tensorflow/python/data/kernel_tests/test_base.py b/tensorflow/python/data/kernel_tests/test_base.py
index 8793fd31bd..7aa7f33003 100644
--- a/tensorflow/python/data/kernel_tests/test_base.py
+++ b/tensorflow/python/data/kernel_tests/test_base.py
@@ -58,7 +58,7 @@ class DatasetTestBase(test.TestCase):
       A callable that returns the next element of `dataset`.
     """
     if context.executing_eagerly():
-      iterator = dataset.__iter__()
+      iterator = iter(dataset)
       return iterator._next_internal  # pylint: disable=protected-access
     else:
       if requires_initialization:
-- 
GitLab


From 595df0920f7809c826b9bc14b21653602c27c34a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 11:20:26 -0800
Subject: [PATCH 610/873] Update docker image with TensorRT 5.0.2.

PiperOrigin-RevId: 225573492
---
 .../ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04    | 7 ++++++-
 .../ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04     | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04
index 03de89b717..4fe86066c9 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04
@@ -20,6 +20,7 @@ ENV CUDA_VERSION 10.0.130
 ENV CUDA_PKG_VERSION 10-0=$CUDA_VERSION-1
 ENV CUDNN_VERSION 7.3.1.20
 ENV NCCL_VERSION 2.3.5
+ENV TENSORRT_VERSION 5.0.2
 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
 ENV NVIDIA_REQUIRE_CUDA "cuda>=10.0,driver>=410"
 ENV NVIDIA_VISIBLE_DEVICES all
@@ -48,7 +49,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         libcudnn7=$CUDNN_VERSION-1+cuda10.0 \
         libcudnn7-dev=$CUDNN_VERSION-1+cuda10.0 \
         libnccl2=$NCCL_VERSION-2+cuda10.0 \
-        libnccl-dev=$NCCL_VERSION-2+cuda10.0 && \
+        libnccl-dev=$NCCL_VERSION-2+cuda10.0 \
+        nvinfer-runtime-trt-repo-ubuntu1604-$TENSORRT_VERSION-ga-cuda10.0 && \
+    apt-get update && apt-get install -y --no-install-recommends \
+        libnvinfer5=$TENSORRT_VERSION-1+cuda10.0 \
+        libnvinfer-dev=$TENSORRT_VERSION-1+cuda10.0 && \
     ln -s cuda-10.0 /usr/local/cuda && \
     apt-mark hold libcudnn7 && \
     apt-mark hold libnccl2 && \
diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04
index eb6ca7c8f0..60a23e1edb 100644
--- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04
+++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda9.0-cudnn7-ubuntu14.04
@@ -26,6 +26,7 @@ ENV NVIDIA_VISIBLE_DEVICES all
 ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
 ENV NVIDIA_REQUIRE_CUDA "cuda>=9.0"
 ENV NCCL_VERSION 2.2.13
+ENV TENSORRT_VERSION 5.0.2
 ENV CUDNN_VERSION 7.1.4.18
 
 # TODO(b/110903506): /usr/loca/cuda/lib64/stubs should not be needed in
@@ -53,7 +54,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         cuda-cublas-dev-9-0=9.0.176.4-1 \
         libnccl-dev=$NCCL_VERSION-1+cuda9.0 \
         libcudnn7-dev=$CUDNN_VERSION-1+cuda9.0 \
-        libcudnn7=$CUDNN_VERSION-1+cuda9.0 && \
+        libcudnn7=$CUDNN_VERSION-1+cuda9.0 \
+        nvinfer-runtime-trt-repo-ubuntu1604-$TENSORRT_VERSION-ga-cuda9.0 && \
+    apt-get update && apt-get install -y --no-install-recommends \
+        libnvinfer5=$TENSORRT_VERSION-1+cuda9.0 \
+        libnvinfer-dev=$TENSORRT_VERSION-1+cuda9.0 && \
     ln -s cuda-9.0 /usr/local/cuda && \
     apt-mark hold libnccl2 && \
     apt-mark hold libcudnn7 libcudnn7-dev && \
-- 
GitLab


From 446a5fb00e6ed8cb559bd2d12344fda2f9e5e0ce Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 11:22:00 -0800
Subject: [PATCH 611/873] Add remote configuration for TensorRT.

PiperOrigin-RevId: 225573787
---
 tensorflow/opensource_only.files              |  3 +
 third_party/tensorrt/remote.BUILD.tpl         |  7 +++
 third_party/tensorrt/tensorrt_configure.bzl   | 10 ++++
 .../toolchains/preconfig/generate/BUILD       | 12 ++--
 .../preconfig/generate/containers.bzl         |  2 +-
 .../preconfig/generate/generate.bzl           | 12 ++--
 .../toolchains/preconfig/generate/generate.sh | 14 +++--
 .../preconfig/ubuntu14.04/tensorrt5/BUILD     | 56 +++++++++++++++++++
 .../preconfig/ubuntu14.04/tensorrt5/WORKSPACE |  2 +
 .../ubuntu14.04/tensorrt5/build_defs.bzl      |  7 +++
 10 files changed, 108 insertions(+), 17 deletions(-)
 create mode 100644 third_party/tensorrt/remote.BUILD.tpl
 create mode 100755 third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
 create mode 100644 third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/WORKSPACE
 create mode 100755 third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl

diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 0af84f8f54..88800c2951 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -37,6 +37,8 @@ tensorflow/third_party/toolchains/clang6/README.md
 tensorflow/third_party/toolchains/clang6/repo.bzl
 tensorflow/third_party/toolchains/clang6/CROSSTOOL.tpl
 tensorflow/third_party/toolchains/clang6/clang.BUILD
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/py3/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda9.0/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
@@ -185,6 +187,7 @@ tensorflow/third_party/tensorrt/BUILD
 tensorflow/third_party/tensorrt/build_defs.bzl.tpl
 tensorflow/third_party/tensorrt/BUILD.tpl
 tensorflow/third_party/tensorrt/tensorrt_configure.bzl
+tensorflow/third_party/tensorrt/remote.BUILD.tpl
 tensorflow/third_party/kafka/config.patch
 tensorflow/third_party/kafka/BUILD
 tensorflow/third_party/android/BUILD
diff --git a/third_party/tensorrt/remote.BUILD.tpl b/third_party/tensorrt/remote.BUILD.tpl
new file mode 100644
index 0000000000..7598e7aa4b
--- /dev/null
+++ b/third_party/tensorrt/remote.BUILD.tpl
@@ -0,0 +1,7 @@
+licenses(["notice"])  # Apache 2.0
+
+package(default_visibility = ["//visibility:public"])
+
+alias(name="LICENSE", actual = "%{target}:LICENSE")
+alias(name = "tensorrt_headers", actual = "%{target}:tensorrt_headers")
+alias(name = "nv_infer", actual = "%{target}:nv_infer")
diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl
index 9b946505a6..77ee6622d1 100644
--- a/third_party/tensorrt/tensorrt_configure.bzl
+++ b/third_party/tensorrt/tensorrt_configure.bzl
@@ -17,6 +17,7 @@ load(
 )
 
 _TENSORRT_INSTALL_PATH = "TENSORRT_INSTALL_PATH"
+_TF_TENSORRT_CONFIG_REPO = "TF_TENSORRT_CONFIG_REPO"
 _TF_TENSORRT_VERSION = "TF_TENSORRT_VERSION"
 
 _TF_TENSORRT_LIBS = ["nvinfer"]
@@ -154,6 +155,15 @@ def _create_dummy_repository(repository_ctx):
 
 def _tensorrt_configure_impl(repository_ctx):
   """Implementation of the tensorrt_configure repository rule."""
+  if _TF_TENSORRT_CONFIG_REPO in repository_ctx.os.environ:
+    # Forward to the pre-configured remote repository.
+    repository_ctx.template("BUILD", Label("//third_party/tensorrt:remote.BUILD.tpl"), {
+        "%{target}": repository_ctx.os.environ[_TF_TENSORRT_CONFIG_REPO],
+    })
+    # Set up config file.
+    _tpl(repository_ctx, "build_defs.bzl", {"%{tensorrt_is_configured}": "True"})
+    return
+
   if _TENSORRT_INSTALL_PATH not in repository_ctx.os.environ:
     _create_dummy_repository(repository_ctx)
     return
diff --git a/third_party/toolchains/preconfig/generate/BUILD b/third_party/toolchains/preconfig/generate/BUILD
index 7e3e93d600..b4c98dc94d 100644
--- a/third_party/toolchains/preconfig/generate/BUILD
+++ b/third_party/toolchains/preconfig/generate/BUILD
@@ -3,33 +3,37 @@ licenses(["restricted"])
 load(":generate.bzl", "tensorflow_rbe_config")
 
 tensorflow_rbe_config(
-    name = "ubuntu14.04-py3-gcc-cuda9.0-cudnn7-nccl2",
+    name = "ubuntu14.04-py3-gcc-cuda9.0-cudnn7-tensorrt5",
     compiler = "gcc",
     cuda_version = "9.0",
     cudnn_version = "7",
     python_version = "3",
+    tensorrt_version = "5",
 )
 
 tensorflow_rbe_config(
-    name = "ubuntu14.04-py3-clang-cuda9.0-cudnn7-nccl2",
+    name = "ubuntu14.04-py3-clang-cuda9.0-cudnn7-tensorrt5",
     compiler = "clang",
     cuda_version = "9.0",
     cudnn_version = "7",
     python_version = "3",
+    tensorrt_version = "5",
 )
 
 tensorflow_rbe_config(
-    name = "ubuntu14.04-py3-gcc-cuda10.0-cudnn7-nccl2",
+    name = "ubuntu14.04-py3-gcc-cuda10.0-cudnn7-tensorrt5",
     compiler = "gcc",
     cuda_version = "10.0",
     cudnn_version = "7",
     python_version = "3",
+    tensorrt_version = "5",
 )
 
 tensorflow_rbe_config(
-    name = "ubuntu14.04-py3-clang-cuda10.0-cudnn7-nccl2",
+    name = "ubuntu14.04-py3-clang-cuda10.0-cudnn7-tensorrt5",
     compiler = "clang",
     cuda_version = "10.0",
     cudnn_version = "7",
     python_version = "3",
+    tensorrt_version = "5",
 )
diff --git a/third_party/toolchains/preconfig/generate/containers.bzl b/third_party/toolchains/preconfig/generate/containers.bzl
index 7099b9bf3e..c64f7f3f8f 100644
--- a/third_party/toolchains/preconfig/generate/containers.bzl
+++ b/third_party/toolchains/preconfig/generate/containers.bzl
@@ -1,4 +1,4 @@
 container_digests = {
-    "cuda9.0-cudnn7-ubuntu14.04": "sha256:c26138f4c38c754da2bad44a8a068523abf7fbd71d58a57ce92e5342c5431bf5",
+    "cuda9.0-cudnn7-ubuntu14.04": "sha256:c43ed5341dd765042e0bbd1bf50fadeedd649d1e0c34d81999cb6ce30916cb95",
     "cuda10.0-cudnn7-ubuntu14.04": "sha256:66e7d592c8149291d5562a0f3093655a15b09c22e0eb30a87b3b6469b7a30ffc",
 }
diff --git a/third_party/toolchains/preconfig/generate/generate.bzl b/third_party/toolchains/preconfig/generate/generate.bzl
index fb2af02a53..75deea41b8 100644
--- a/third_party/toolchains/preconfig/generate/generate.bzl
+++ b/third_party/toolchains/preconfig/generate/generate.bzl
@@ -3,15 +3,15 @@ load(
     "docker_toolchain_autoconfig",
 )
 
-def _tensorflow_rbe_config(name, cuda_version, cudnn_version, python_version, compiler):
+def _tensorflow_rbe_config(name, cuda_version, cudnn_version, python_version, compiler, tensorrt_version):
     docker_toolchain_autoconfig(
         name = name,
         base = "@cuda%s-cudnn%s-ubuntu14.04//image" % (cuda_version, cudnn_version),
-        bazel_version = "0.16.1",
+        bazel_version = "0.19.2",
         config_repos = [
             "local_config_cuda",
             "local_config_python",
-            "local_config_nccl",
+            "local_config_tensorrt",
         ],
         env = {
             "ABI_VERSION": "gcc",
@@ -31,10 +31,10 @@ def _tensorflow_rbe_config(name, cuda_version, cudnn_version, python_version, co
             "TF_ENABLE_XLA": "1",
             "TF_CUDNN_VERSION": cudnn_version,
             "TF_CUDA_VERSION": cuda_version,
-            "NCCL_INSTALL_PATH": "/usr/lib",
-            "NCCL_HDR_PATH": "/usr/include",
-            "TF_NCCL_VERSION": "2",
             "CUDNN_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
+            "TF_NEED_TENSORRT" : "1",
+            "TF_TENSORRT_VERSION": tensorrt_version,
+            "TENSORRT_INSTALL_PATH": "/usr/lib/x86_64-linux-gnu",
         },
         mount_project = "$(mount_project)",
         tags = ["manual"],
diff --git a/third_party/toolchains/preconfig/generate/generate.sh b/third_party/toolchains/preconfig/generate/generate.sh
index 1f39fcdf6d..79407d59ac 100755
--- a/third_party/toolchains/preconfig/generate/generate.sh
+++ b/third_party/toolchains/preconfig/generate/generate.sh
@@ -33,7 +33,9 @@ PY_VERSION="${PLATFORM[1]}"
 COMPILER="${PLATFORM[2]}"
 CUDA_VERSION="${PLATFORM[3]}"
 CUDNN_VERSION="${PLATFORM[4]}"
-NCCL_VERSION="${PLATFORM[5]}"
+TENSORRT_VERSION="${PLATFORM[5]}"
+
+# TODO(klimek): Put this into the name.
 
 if [[ "${COMPILER}" == "gcc" ]]; then
   COMPILER="gcc-nvcc-${CUDA_VERSION}"
@@ -44,7 +46,7 @@ echo "Python: ${PY_VERSION}"
 echo "Compiler: ${COMPILER}"
 echo "CUDA: ${CUDA_VERSION}"
 echo "CUDNN: ${CUDNN_VERSION}"
-echo "NCCL: ${NCCL_VERSION}"
+echo "TensorRT: ${TENSORRT_VERSION}"
 
 bazel build --define=mount_project="${PWD}" "${PKG}/generate:${TARGET}"
 cd "${TEMPDIR}"
@@ -58,8 +60,8 @@ find . -empty -delete
 # <OS>/
 #   <CUDA>-<CUDNN>/
 #   <COMPILER>/
-#   <NCCL>/
 #   <PYTHON>/
+#   <TENSORRT>/
 
 # Create our toplevel output directory for the OS.
 mkdir "${OS}"
@@ -67,15 +69,15 @@ mkdir "${OS}"
 # Python:
 mv local_config_python "${OS}/${PY_VERSION}"
 
-# NCCL:
-mv local_config_nccl "${OS}/${NCCL_VERSION}"
-
 # Compiler:
 mv local_config_cuda/crosstool "${OS}/${COMPILER}"
 
 # CUDA:
 mv local_config_cuda "${OS}/${CUDA_VERSION}-${CUDNN_VERSION}"
 
+# TensorRT:
+mv local_config_tensorrt "${OS}/${TENSORRT_VERSION}"
+
 # Cleanup for copybara.
 find "${OS}" -name 'BUILD' -o -name '*.bzl' |xargs buildifier
 find "${OS}" -name 'BUILD' -o -name '*.bzl' |xargs -I {} mv {} {}.oss
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
new file mode 100755
index 0000000000..399d7c1463
--- /dev/null
+++ b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
@@ -0,0 +1,56 @@
+# NVIDIA TensorRT
+# A high-performance deep learning inference optimizer and runtime.
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+load("@local_config_cuda//cuda:build_defs.bzl", "cuda_default_copts")
+
+package(default_visibility = ["//visibility:public"])
+
+cc_library(
+    name = "tensorrt_headers",
+    hdrs = [":tensorrt_include"],
+    includes = [
+        "include",
+    ],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "nv_infer",
+    srcs = ["tensorrt/lib/libnvinfer.so.5"],
+    copts = cuda_default_copts(),
+    data = ["tensorrt/lib/libnvinfer.so.5"],
+    includes = [
+        "include",
+    ],
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = [
+        ":tensorrt_headers",
+        "@local_config_cuda//cuda",
+    ],
+)
+
+genrule(
+    name = "tensorrt_lib",
+    outs = [
+        "tensorrt/lib/libnvinfer.so.5",
+    ],
+    cmd = """
+if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp -f "/usr/lib/x86_64-linux-gnu/libnvinfer.so.5.0.2" "$(@D)/libnvinfer.so.5"
+   """,
+)
+
+genrule(
+    name = "tensorrt_include",
+    outs = [
+        "tensorrt/include/NvInfer.h",
+        "tensorrt/include/NvUtils.h",
+    ],
+    cmd = """
+if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp -f "/usr/include/x86_64-linux-gnu/NvInfer.h" "$(@D)/tensorrt/include/NvInfer.h" && cp -f "/usr/include/x86_64-linux-gnu/NvUtils.h" "$(@D)/tensorrt/include/NvUtils.h"
+   """,
+)
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/WORKSPACE b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/WORKSPACE
new file mode 100644
index 0000000000..ce47f14b91
--- /dev/null
+++ b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/WORKSPACE
@@ -0,0 +1,2 @@
+# DO NOT EDIT: automatically generated WORKSPACE file for tensorrt_configure rule
+workspace(name = "local_config_tensorrt")
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl
new file mode 100755
index 0000000000..5c1c40361d
--- /dev/null
+++ b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl
@@ -0,0 +1,7 @@
+# Build configurations for TensorRT.
+
+def if_tensorrt(if_true, if_false = []):
+    """Tests whether TensorRT was enabled during the configure process."""
+    if True:
+        return if_true
+    return if_false
-- 
GitLab


From 16a9eb66fab6cc2c875f3f1dd600a5a460b1601a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 11:38:55 -0800
Subject: [PATCH 612/873] Allow to provide a pre-calculated hessian and
 gradient

PiperOrigin-RevId: 225577047
---
 .../python/training/functions/gbdt_batch.py   | 85 ++++++++++++-------
 1 file changed, 56 insertions(+), 29 deletions(-)

diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
index 9fdc2fc0c2..a5951fb737 100644
--- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
+++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py
@@ -614,13 +614,19 @@ class GradientBoostedDecisionTreeModel(object):
           predictions_dict[NUM_TREES_ATTEMPTED] % self._logits_dimension)
     return constant_op.constant(-1, dtype=dtypes.int32)
 
-  def update_stats(self, loss, predictions_dict):
+  def update_stats(self, loss, predictions_dict, gradients=None, hessians=None):
     """Update the accumulators with stats from this batch.
 
     Args:
       loss: A scalar tensor representing average loss of examples.
       predictions_dict: Dictionary of Rank 2 `Tensor` representing information
           about predictions per example.
+      gradients: A tensor with the gradients with the respect to logits from
+        predictions_dict. If not provided, tensorflow will do
+        autodifferentiation.
+      hessians: A tensor with the hessians with the respect to logits from
+        predictions_dict. If not provided, tensorflow will do
+        autodifferentiation.
 
     Returns:
       Three values:
@@ -642,13 +648,14 @@ class GradientBoostedDecisionTreeModel(object):
     predictions = predictions_dict[PREDICTIONS]
     partition_ids = predictions_dict[PARTITION_IDS]
     ensemble_stamp = predictions_dict[ENSEMBLE_STAMP]
-    gradients = gradients_impl.gradients(
-        loss,
-        predictions,
-        name="Gradients",
-        colocate_gradients_with_ops=False,
-        gate_gradients=0,
-        aggregation_method=None)[0]
+    if gradients is None:
+      gradients = gradients_impl.gradients(
+          loss,
+          predictions,
+          name="Gradients",
+          colocate_gradients_with_ops=False,
+          gate_gradients=0,
+          aggregation_method=None)[0]
     strategy = self._learner_config.multi_class_strategy
 
     class_id = self._get_class_id(predictions_dict)
@@ -657,17 +664,20 @@ class GradientBoostedDecisionTreeModel(object):
       # We build one vs rest trees.
       if self._logits_dimension == 1:
         # We have only 1 score, gradients is of shape [batch, 1].
-        hessians = gradients_impl.gradients(
-            gradients,
-            predictions,
-            name="Hessian",
-            colocate_gradients_with_ops=False,
-            gate_gradients=0,
-            aggregation_method=None)[0]
+        if hessians is None:
+          hessians = gradients_impl.gradients(
+              gradients,
+              predictions,
+              name="Hessian",
+              colocate_gradients_with_ops=False,
+              gate_gradients=0,
+              aggregation_method=None)[0]
 
         squeezed_gradients = array_ops.squeeze(gradients, axis=[1])
         squeezed_hessians = array_ops.squeeze(hessians, axis=[1])
       else:
+        if hessians is not None:
+          raise ValueError("Providing hessians is not yet supported here.")
         hessian_list = self._diagonal_hessian(gradients, predictions)
         # Assemble hessian list into a tensor.
         hessians = array_ops.stack(hessian_list, axis=1)
@@ -678,6 +688,8 @@ class GradientBoostedDecisionTreeModel(object):
         squeezed_hessians = array_ops.squeeze(
             _get_column_by_index(hessians, class_id))
     else:
+      if hessians is not None:
+        raise ValueError("Providing hessians is not yet supported here.")
       # Other multiclass strategies.
       if strategy == learner_pb2.LearnerConfig.FULL_HESSIAN:
         hessian_list = self._full_hessian(gradients, predictions)
@@ -835,9 +847,9 @@ class GradientBoostedDecisionTreeModel(object):
     stats_update_ops.append(
         control_flow_ops.cond(
             continue_centering,
-            self._make_update_bias_stats_fn(
-                ensemble_stamp, predictions, gradients,
-                bias_stats_accumulator), control_flow_ops.no_op))
+            self._make_update_bias_stats_fn(ensemble_stamp, predictions,
+                                            gradients, bias_stats_accumulator,
+                                            hessians), control_flow_ops.no_op))
 
     # Update handler stats.
     handler_reads = collections.OrderedDict()
@@ -1162,7 +1174,8 @@ class GradientBoostedDecisionTreeModel(object):
   def get_max_tree_depth(self):
     return self._max_tree_depth
 
-  def train(self, loss, predictions_dict, labels):
+  def train(self, loss, predictions_dict, labels, gradients=None,
+            hessians=None):
     """Updates the accumalator stats and grows the ensemble.
 
     Args:
@@ -1171,6 +1184,12 @@ class GradientBoostedDecisionTreeModel(object):
           about predictions per example.
       labels: Rank 2 `Tensor` representing labels per example. Has no effect
           on the training and is only kept for backward compatibility.
+      gradients: A tensor with the gradients with the respect to logits from
+        predictions_dict. If not provided, tensorflow will do
+        autodifferentiation.
+      hessians: A tensor with the hessians with the respect to logits from
+        predictions_dict. If not provided, tensorflow will do
+        autodifferentiation.
 
     Returns:
       An op that adds a new tree to the ensemble.
@@ -1179,7 +1198,8 @@ class GradientBoostedDecisionTreeModel(object):
       ValueError: if inputs are not valid.
     """
     del labels  # unused; kept for backward compatibility.
-    update_op, _, training_state = self.update_stats(loss, predictions_dict)
+    update_op, _, training_state = self.update_stats(loss, predictions_dict,
+                                                     gradients, hessians)
     with ops.control_dependencies(update_op):
       return self.increment_step_counter_and_maybe_update_ensemble(
           predictions_dict, training_state)
@@ -1271,21 +1291,28 @@ class GradientBoostedDecisionTreeModel(object):
         ps_ops=ps_ops,
         ps_strategy=ps_strategy)
 
-  def _make_update_bias_stats_fn(self, ensemble_stamp, predictions, gradients,
-                                 bias_stats_accumulator):
+  def _make_update_bias_stats_fn(self,
+                                 ensemble_stamp,
+                                 predictions,
+                                 gradients,
+                                 bias_stats_accumulator,
+                                 hessians=None):
     """A method to create the function which updates the bias stats."""
 
     def _update_bias_stats():
       """A method to update the bias stats."""
       # Get reduced gradients and hessians.
       grads_sum = math_ops.reduce_sum(gradients, 0)
-      hess = gradients_impl.gradients(
-          grads_sum,
-          predictions,
-          name="Hessians",
-          colocate_gradients_with_ops=False,
-          gate_gradients=0,
-          aggregation_method=None)[0]
+      if hessians is not None:
+        hess = hessians
+      else:
+        hess = gradients_impl.gradients(
+            grads_sum,
+            predictions,
+            name="Hessians",
+            colocate_gradients_with_ops=False,
+            gate_gradients=0,
+            aggregation_method=None)[0]
       hess_sum = math_ops.reduce_sum(hess, 0)
 
       # Accumulate gradients and hessians.
-- 
GitLab


From d0373dcae4810422f7575cb56a91d6695d2c4ec6 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Fri, 14 Dec 2018 11:42:12 -0800
Subject: [PATCH 613/873] Introduce `dynamic` constructor argument in Layer and
 Model. When subclassing a layer or model that relies on imperative control
 flow in its `call` method, users should pass `dynamic=True` to the
 constructor. This ensures that the layer will only ever be run eagerly (thus
 ensuring correctness for this type of layer). This requires you to have
 enabled eager execution.

PiperOrigin-RevId: 225577677
---
 tensorflow/python/keras/engine/base_layer.py  |  84 ++++----
 .../python/keras/engine/base_layer_test.py    | 198 ++++++++++++------
 tensorflow/python/keras/engine/network.py     |  15 +-
 tensorflow/python/keras/engine/sequential.py  |  11 +-
 tensorflow/python/keras/engine/training.py    |  44 ++--
 .../golden/v1/tensorflow.keras.-model.pbtxt   |   4 +
 .../v1/tensorflow.keras.-sequential.pbtxt     |   4 +
 ....experimental.-peephole-l-s-t-m-cell.pbtxt |   4 +
 .../tensorflow.keras.layers.-activation.pbtxt |   4 +
 ...eras.layers.-activity-regularization.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-add.pbtxt     |   4 +
 ...nsorflow.keras.layers.-alpha-dropout.pbtxt |   4 +
 ...low.keras.layers.-average-pooling1-d.pbtxt |   4 +
 ...low.keras.layers.-average-pooling2-d.pbtxt |   4 +
 ...low.keras.layers.-average-pooling3-d.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-average.pbtxt |   4 +
 ...tensorflow.keras.layers.-avg-pool1-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-avg-pool2-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-avg-pool3-d.pbtxt |   4 +
 ...ow.keras.layers.-batch-normalization.pbtxt |   4 +
 ...nsorflow.keras.layers.-bidirectional.pbtxt |   4 +
 ...tensorflow.keras.layers.-concatenate.pbtxt |   4 +
 ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-conv1-d.pbtxt |   4 +
 ...flow.keras.layers.-conv2-d-transpose.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-conv2-d.pbtxt |   4 +
 ...flow.keras.layers.-conv3-d-transpose.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-conv3-d.pbtxt |   4 +
 ...sorflow.keras.layers.-convolution1-d.pbtxt |   4 +
 ...ras.layers.-convolution2-d-transpose.pbtxt |   4 +
 ...sorflow.keras.layers.-convolution2-d.pbtxt |   4 +
 ...ras.layers.-convolution3-d-transpose.pbtxt |   4 +
 ...sorflow.keras.layers.-convolution3-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-cropping1-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-cropping2-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-cropping3-d.pbtxt |   4 +
 ...sorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt |   4 +
 ...rflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-dense.pbtxt   |   4 +
 ...flow.keras.layers.-depthwise-conv2-d.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-dot.pbtxt     |   4 +
 .../v1/tensorflow.keras.layers.-dropout.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-e-l-u.pbtxt   |   4 +
 .../tensorflow.keras.layers.-embedding.pbtxt  |   4 +
 .../v1/tensorflow.keras.layers.-flatten.pbtxt |   4 +
 .../tensorflow.keras.layers.-g-r-u-cell.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-g-r-u.pbtxt   |   4 +
 ...rflow.keras.layers.-gaussian-dropout.pbtxt |   4 +
 ...sorflow.keras.layers.-gaussian-noise.pbtxt |   4 +
 ...as.layers.-global-average-pooling1-d.pbtxt |   4 +
 ...as.layers.-global-average-pooling2-d.pbtxt |   4 +
 ...as.layers.-global-average-pooling3-d.pbtxt |   4 +
 ...low.keras.layers.-global-avg-pool1-d.pbtxt |   4 +
 ...low.keras.layers.-global-avg-pool2-d.pbtxt |   4 +
 ...low.keras.layers.-global-avg-pool3-d.pbtxt |   4 +
 ...low.keras.layers.-global-max-pool1-d.pbtxt |   4 +
 ...low.keras.layers.-global-max-pool2-d.pbtxt |   4 +
 ...low.keras.layers.-global-max-pool3-d.pbtxt |   4 +
 ....keras.layers.-global-max-pooling1-d.pbtxt |   4 +
 ....keras.layers.-global-max-pooling2-d.pbtxt |   4 +
 ....keras.layers.-global-max-pooling3-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-input-layer.pbtxt |   4 +
 ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-l-s-t-m.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-lambda.pbtxt  |   4 +
 .../v1/tensorflow.keras.layers.-layer.pbtxt   |   6 +-
 ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt |   4 +
 ...w.keras.layers.-locally-connected1-d.pbtxt |   4 +
 ...w.keras.layers.-locally-connected2-d.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-masking.pbtxt |   4 +
 ...tensorflow.keras.layers.-max-pool1-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-max-pool2-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-max-pool3-d.pbtxt |   4 +
 ...sorflow.keras.layers.-max-pooling1-d.pbtxt |   4 +
 ...sorflow.keras.layers.-max-pooling2-d.pbtxt |   4 +
 ...sorflow.keras.layers.-max-pooling3-d.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-maximum.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-minimum.pbtxt |   4 +
 .../tensorflow.keras.layers.-multiply.pbtxt   |   4 +
 .../tensorflow.keras.layers.-p-re-l-u.pbtxt   |   4 +
 .../v1/tensorflow.keras.layers.-permute.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-r-n-n.pbtxt   |   4 +
 .../v1/tensorflow.keras.layers.-re-l-u.pbtxt  |   4 +
 ...nsorflow.keras.layers.-repeat-vector.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-reshape.pbtxt |   4 +
 ...flow.keras.layers.-separable-conv1-d.pbtxt |   4 +
 ...flow.keras.layers.-separable-conv2-d.pbtxt |   4 +
 ...ras.layers.-separable-convolution1-d.pbtxt |   4 +
 ...ras.layers.-separable-convolution2-d.pbtxt |   4 +
 ...flow.keras.layers.-simple-r-n-n-cell.pbtxt |   4 +
 ...ensorflow.keras.layers.-simple-r-n-n.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-softmax.pbtxt |   4 +
 ...low.keras.layers.-spatial-dropout1-d.pbtxt |   4 +
 ...low.keras.layers.-spatial-dropout2-d.pbtxt |   4 +
 ...low.keras.layers.-spatial-dropout3-d.pbtxt |   4 +
 ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt |   4 +
 .../tensorflow.keras.layers.-subtract.pbtxt   |   4 +
 ...low.keras.layers.-thresholded-re-l-u.pbtxt |   4 +
 ...rflow.keras.layers.-time-distributed.pbtxt |   4 +
 ...sorflow.keras.layers.-up-sampling1-d.pbtxt |   4 +
 ...sorflow.keras.layers.-up-sampling2-d.pbtxt |   4 +
 ...sorflow.keras.layers.-up-sampling3-d.pbtxt |   4 +
 .../v1/tensorflow.keras.layers.-wrapper.pbtxt |   4 +
 ...orflow.keras.layers.-zero-padding1-d.pbtxt |   4 +
 ...orflow.keras.layers.-zero-padding2-d.pbtxt |   4 +
 ...orflow.keras.layers.-zero-padding3-d.pbtxt |   4 +
 .../tensorflow.keras.metrics.-accuracy.pbtxt  |   4 +
 ...rflow.keras.metrics.-binary-accuracy.pbtxt |   4 +
 ....keras.metrics.-categorical-accuracy.pbtxt |   4 +
 ...rflow.keras.metrics.-false-negatives.pbtxt |   4 +
 ...rflow.keras.metrics.-false-positives.pbtxt |   4 +
 .../v1/tensorflow.keras.metrics.-mean.pbtxt   |   4 +
 .../tensorflow.keras.metrics.-precision.pbtxt |   4 +
 .../v1/tensorflow.keras.metrics.-recall.pbtxt |   4 +
 ....metrics.-sensitivity-at-specificity.pbtxt |   4 +
 ...metrics.-sparse-categorical-accuracy.pbtxt |   4 +
 ....metrics.-specificity-at-sensitivity.pbtxt |   4 +
 ...orflow.keras.metrics.-true-negatives.pbtxt |   4 +
 ...orflow.keras.metrics.-true-positives.pbtxt |   4 +
 .../v1/tensorflow.keras.models.-model.pbtxt   |   4 +
 .../tensorflow.keras.models.-sequential.pbtxt |   4 +
 ...ensorflow.layers.-average-pooling1-d.pbtxt |   4 +
 ...ensorflow.layers.-average-pooling2-d.pbtxt |   4 +
 ...ensorflow.layers.-average-pooling3-d.pbtxt |   4 +
 ...nsorflow.layers.-batch-normalization.pbtxt |   4 +
 .../v1/tensorflow.layers.-conv1-d.pbtxt       |   4 +
 ...tensorflow.layers.-conv2-d-transpose.pbtxt |   4 +
 .../v1/tensorflow.layers.-conv2-d.pbtxt       |   4 +
 ...tensorflow.layers.-conv3-d-transpose.pbtxt |   4 +
 .../v1/tensorflow.layers.-conv3-d.pbtxt       |   4 +
 .../golden/v1/tensorflow.layers.-dense.pbtxt  |   4 +
 .../v1/tensorflow.layers.-dropout.pbtxt       |   4 +
 .../v1/tensorflow.layers.-flatten.pbtxt       |   4 +
 .../golden/v1/tensorflow.layers.-layer.pbtxt  |   4 +
 .../tensorflow.layers.-max-pooling1-d.pbtxt   |   4 +
 .../tensorflow.layers.-max-pooling2-d.pbtxt   |   4 +
 .../tensorflow.layers.-max-pooling3-d.pbtxt   |   4 +
 ...tensorflow.layers.-separable-conv1-d.pbtxt |   4 +
 ...tensorflow.layers.-separable-conv2-d.pbtxt |   4 +
 ...flow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt |   4 +
 ...orflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt |   4 +
 ...nsorflow.nn.rnn_cell.-device-wrapper.pbtxt |   4 +
 ...sorflow.nn.rnn_cell.-dropout-wrapper.pbtxt |   4 +
 .../tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt  |   4 +
 ...tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt |   4 +
 ...orflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt |   4 +
 .../tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt  |   4 +
 ...orflow.nn.rnn_cell.-residual-wrapper.pbtxt |   4 +
 .../golden/v2/tensorflow.keras.-model.pbtxt   |   4 +
 .../v2/tensorflow.keras.-sequential.pbtxt     |   4 +
 ....experimental.-peephole-l-s-t-m-cell.pbtxt |   4 +
 .../tensorflow.keras.layers.-activation.pbtxt |   4 +
 ...eras.layers.-activity-regularization.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-add.pbtxt     |   4 +
 ...nsorflow.keras.layers.-alpha-dropout.pbtxt |   4 +
 ...low.keras.layers.-average-pooling1-d.pbtxt |   4 +
 ...low.keras.layers.-average-pooling2-d.pbtxt |   4 +
 ...low.keras.layers.-average-pooling3-d.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-average.pbtxt |   4 +
 ...tensorflow.keras.layers.-avg-pool1-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-avg-pool2-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-avg-pool3-d.pbtxt |   4 +
 ...ow.keras.layers.-batch-normalization.pbtxt |   4 +
 ...nsorflow.keras.layers.-bidirectional.pbtxt |   4 +
 ...tensorflow.keras.layers.-concatenate.pbtxt |   4 +
 ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-conv1-d.pbtxt |   4 +
 ...flow.keras.layers.-conv2-d-transpose.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-conv2-d.pbtxt |   4 +
 ...flow.keras.layers.-conv3-d-transpose.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-conv3-d.pbtxt |   4 +
 ...sorflow.keras.layers.-convolution1-d.pbtxt |   4 +
 ...ras.layers.-convolution2-d-transpose.pbtxt |   4 +
 ...sorflow.keras.layers.-convolution2-d.pbtxt |   4 +
 ...ras.layers.-convolution3-d-transpose.pbtxt |   4 +
 ...sorflow.keras.layers.-convolution3-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-cropping1-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-cropping2-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-cropping3-d.pbtxt |   4 +
 ...sorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt |   4 +
 ...sorflow.keras.layers.-dense-features.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-dense.pbtxt   |   4 +
 ...flow.keras.layers.-depthwise-conv2-d.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-dot.pbtxt     |   4 +
 .../v2/tensorflow.keras.layers.-dropout.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-e-l-u.pbtxt   |   4 +
 .../tensorflow.keras.layers.-embedding.pbtxt  |   4 +
 .../v2/tensorflow.keras.layers.-flatten.pbtxt |   4 +
 .../tensorflow.keras.layers.-g-r-u-cell.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-g-r-u.pbtxt   |   4 +
 ...rflow.keras.layers.-gaussian-dropout.pbtxt |   4 +
 ...sorflow.keras.layers.-gaussian-noise.pbtxt |   4 +
 ...as.layers.-global-average-pooling1-d.pbtxt |   4 +
 ...as.layers.-global-average-pooling2-d.pbtxt |   4 +
 ...as.layers.-global-average-pooling3-d.pbtxt |   4 +
 ...low.keras.layers.-global-avg-pool1-d.pbtxt |   4 +
 ...low.keras.layers.-global-avg-pool2-d.pbtxt |   4 +
 ...low.keras.layers.-global-avg-pool3-d.pbtxt |   4 +
 ...low.keras.layers.-global-max-pool1-d.pbtxt |   4 +
 ...low.keras.layers.-global-max-pool2-d.pbtxt |   4 +
 ...low.keras.layers.-global-max-pool3-d.pbtxt |   4 +
 ....keras.layers.-global-max-pooling1-d.pbtxt |   4 +
 ....keras.layers.-global-max-pooling2-d.pbtxt |   4 +
 ....keras.layers.-global-max-pooling3-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-input-layer.pbtxt |   4 +
 ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-l-s-t-m.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-lambda.pbtxt  |   4 +
 .../v2/tensorflow.keras.layers.-layer.pbtxt   |   6 +-
 ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt |   4 +
 ...ensorflow.keras.layers.-linear-model.pbtxt |   4 +
 ...w.keras.layers.-locally-connected1-d.pbtxt |   4 +
 ...w.keras.layers.-locally-connected2-d.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-masking.pbtxt |   4 +
 ...tensorflow.keras.layers.-max-pool1-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-max-pool2-d.pbtxt |   4 +
 ...tensorflow.keras.layers.-max-pool3-d.pbtxt |   4 +
 ...sorflow.keras.layers.-max-pooling1-d.pbtxt |   4 +
 ...sorflow.keras.layers.-max-pooling2-d.pbtxt |   4 +
 ...sorflow.keras.layers.-max-pooling3-d.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-maximum.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-minimum.pbtxt |   4 +
 .../tensorflow.keras.layers.-multiply.pbtxt   |   4 +
 .../tensorflow.keras.layers.-p-re-l-u.pbtxt   |   4 +
 .../v2/tensorflow.keras.layers.-permute.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-r-n-n.pbtxt   |   4 +
 .../v2/tensorflow.keras.layers.-re-l-u.pbtxt  |   4 +
 ...nsorflow.keras.layers.-repeat-vector.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-reshape.pbtxt |   4 +
 ...flow.keras.layers.-separable-conv1-d.pbtxt |   4 +
 ...flow.keras.layers.-separable-conv2-d.pbtxt |   4 +
 ...ras.layers.-separable-convolution1-d.pbtxt |   4 +
 ...ras.layers.-separable-convolution2-d.pbtxt |   4 +
 ...flow.keras.layers.-simple-r-n-n-cell.pbtxt |   4 +
 ...ensorflow.keras.layers.-simple-r-n-n.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-softmax.pbtxt |   4 +
 ...low.keras.layers.-spatial-dropout1-d.pbtxt |   4 +
 ...low.keras.layers.-spatial-dropout2-d.pbtxt |   4 +
 ...low.keras.layers.-spatial-dropout3-d.pbtxt |   4 +
 ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt |   4 +
 .../tensorflow.keras.layers.-subtract.pbtxt   |   4 +
 ...low.keras.layers.-thresholded-re-l-u.pbtxt |   4 +
 ...rflow.keras.layers.-time-distributed.pbtxt |   4 +
 ...sorflow.keras.layers.-up-sampling1-d.pbtxt |   4 +
 ...sorflow.keras.layers.-up-sampling2-d.pbtxt |   4 +
 ...sorflow.keras.layers.-up-sampling3-d.pbtxt |   4 +
 .../v2/tensorflow.keras.layers.-wrapper.pbtxt |   4 +
 ...orflow.keras.layers.-zero-padding1-d.pbtxt |   4 +
 ...orflow.keras.layers.-zero-padding2-d.pbtxt |   4 +
 ...orflow.keras.layers.-zero-padding3-d.pbtxt |   4 +
 .../tensorflow.keras.metrics.-accuracy.pbtxt  |   4 +
 ...rflow.keras.metrics.-binary-accuracy.pbtxt |   4 +
 ....keras.metrics.-categorical-accuracy.pbtxt |   4 +
 ...rflow.keras.metrics.-false-negatives.pbtxt |   4 +
 ...rflow.keras.metrics.-false-positives.pbtxt |   4 +
 .../v2/tensorflow.keras.metrics.-mean.pbtxt   |   4 +
 .../tensorflow.keras.metrics.-precision.pbtxt |   4 +
 .../v2/tensorflow.keras.metrics.-recall.pbtxt |   4 +
 ....metrics.-sensitivity-at-specificity.pbtxt |   4 +
 ...metrics.-sparse-categorical-accuracy.pbtxt |   4 +
 ....metrics.-specificity-at-sensitivity.pbtxt |   4 +
 ...orflow.keras.metrics.-true-negatives.pbtxt |   4 +
 ...orflow.keras.metrics.-true-positives.pbtxt |   4 +
 .../v2/tensorflow.keras.models.-model.pbtxt   |   4 +
 .../tensorflow.keras.models.-sequential.pbtxt |   4 +
 ...nsorflow.nn.rnn_cell.-device-wrapper.pbtxt |   4 +
 ...sorflow.nn.rnn_cell.-dropout-wrapper.pbtxt |   4 +
 .../tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt  |   4 +
 ...orflow.nn.rnn_cell.-residual-wrapper.pbtxt |   4 +
 269 files changed, 1277 insertions(+), 135 deletions(-)

diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 8a56546ac0..aeed750652 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -84,6 +84,12 @@ class Layer(checkpointable.CheckpointableBase):
     name: String name of the layer.
     dtype: Default dtype of the layer's weights (default of `None` means use the
       type of the first input).
+    dynamic: Set this to `True` if your layer should only be run eagerly, and
+      should not be used to generate a static computation graph.
+      This would be the case for a Tree-RNN or a recursive network,
+      for example, or generally for any layer that manipulates tensors
+      using Python control flow. If `False`, we assume that the layer can
+      safely be used to generate a static computation graph.
 
   Read-only properties:
     name: The name of the layer (string).
@@ -104,7 +110,8 @@ class Layer(checkpointable.CheckpointableBase):
   """
 
   @checkpointable.no_automatic_dependency_tracking
-  def __init__(self, trainable=True, name=None, dtype=None, **kwargs):
+  def __init__(self, trainable=True, name=None, dtype=None, dynamic=False,
+               **kwargs):
     # These properties should be set by the user via keyword arguments.
     # note that 'dtype', 'input_shape' and 'batch_input_shape'
     # are only applicable to input layers: do not pass these keywords
@@ -183,7 +190,7 @@ class Layer(checkpointable.CheckpointableBase):
       self._expects_training_arg = False
 
     # Whether the `call` method can be used to build a TF graph without issues.
-    self._call_is_graph_friendly = True
+    self._dynamic = dynamic
 
     # Manage input shape information if passed.
     if 'input_shape' in kwargs or 'batch_input_shape' in kwargs:
@@ -515,7 +522,6 @@ class Layer(checkpointable.CheckpointableBase):
     # mode when all inputs can be traced back to `keras.Input()` (when building
     # models using the functional API).
     build_graph = tf_utils.are_all_symbolic_tensors(input_list)
-    executing_eagerly = context.executing_eagerly()
 
     # Handle Keras mask propagation from previous layer to current layer.
     previous_mask = None
@@ -530,8 +536,6 @@ class Layer(checkpointable.CheckpointableBase):
         # to __call__, hence we set previous_mask as the default value.
         kwargs['mask'] = previous_mask
 
-    input_shapes = None
-
     with ops.name_scope(self._name_scope()):
       if not self.built:
         # Build layer if applicable (if the `build` method has been overridden).
@@ -548,30 +552,28 @@ class Layer(checkpointable.CheckpointableBase):
             self.input_spec, inputs, self.name)
         graph = backend.get_graph()
         with graph.as_default():
-          if not executing_eagerly:
-            # In graph mode, failure to build the layer's graph
-            # implies a user-side bug. We don't catch exceptions.
-            outputs = self.call(inputs, *args, **kwargs)
-          else:
+          if not self.dynamic:
             try:
               outputs = self.call(inputs, *args, **kwargs)
-            except Exception:  # pylint: disable=broad-except
-              # Any issue during graph-building means we will later run the
-              # model in eager mode, whether the issue was related to
-              # graph mode or not. This provides a nice debugging experience.
-              self._call_is_graph_friendly = False
-              # We will use static shape inference to return symbolic tensors
-              # matching the specifications of the layer outputs.
-              # Since we have set `self._call_is_graph_friendly = False`,
-              # we will never attempt to run the underlying TF graph (which is
-              # disconnected).
-              # TODO(fchollet): consider py_func as an alternative, which
-              # would enable us to run the underlying graph if needed.
-              input_shapes = nest.map_structure(lambda x: x.shape, inputs)
-              output_shapes = self.compute_output_shape(input_shapes)
-              outputs = nest.map_structure(
-                  lambda shape: backend.placeholder(shape, dtype=self.dtype),
-                  output_shapes)
+            except TypeError as e:
+              messages = ['`tf.Tensor` as a Python `bool` is not allowed',
+                          'Tensor objects are only iterable when eager']
+              for msg in messages:
+                if msg in str(e):
+                  raise TypeError('You are attempting to use Python control '
+                                  'flow in a layer that was not declared to be '
+                                  'dynamic. Pass `dynamic=True` to the class '
+                                  'constructor.\nEncountered error:\n"""\n' +
+                                  str(e) + '\n"""')
+              raise e
+          else:
+            # We will use static shape inference to return symbolic tensors
+            # matching the specifications of the layer outputs.
+            # Since `self.dynamic` is True, we will never attempt to
+            # run the underlying TF graph (which is disconnected).
+            # TODO(fchollet): consider py_func as an alternative, which
+            # would enable us to run the underlying graph if needed.
+            outputs = self._symbolic_call(inputs)
 
           if outputs is None:
             raise ValueError('A layer\'s `call` method should return a '
@@ -612,6 +614,10 @@ class Layer(checkpointable.CheckpointableBase):
   def name(self):
     return self._name
 
+  @property
+  def dynamic(self):
+    return self._dynamic
+
   @property
   def activity_regularizer(self):
     """Optional regularizer function for the output of this layer."""
@@ -1570,23 +1576,6 @@ class Layer(checkpointable.CheckpointableBase):
     else:
       return values
 
-  @property
-  def _static_graph_friendly(self):
-    """Whether the layer can be called to create a static graph.
-
-    Because of nesting, there are two components to being "graph-friendly":
-      1) all inner layers are graph-friendly
-      2) the way they are composed is graph-friendly.
-    We denote the latter as "_call_is_graph_friendly", and define
-    "_static_graph_friendly" as being the combination of
-    "_call_is_graph_friendly" and "all inner layers are _static_graph_friendly".
-    For atomic layers (no inner layers), this is just "_call_is_graph_friendly".
-
-    Returns:
-      Boolean.
-    """
-    return self._call_is_graph_friendly
-
   def _maybe_build(self, inputs):
     # Check input assumptions set before layer building, e.g. input rank.
     input_spec.assert_input_compatibility(
@@ -1604,6 +1593,13 @@ class Layer(checkpointable.CheckpointableBase):
     if not hasattr(self.build, '_is_default'):
       self.build(input_shapes)
 
+  def _symbolic_call(self, inputs):
+    input_shapes = nest.map_structure(lambda x: x.shape, inputs)
+    output_shapes = self.compute_output_shape(input_shapes)
+    return nest.map_structure(
+        lambda shape: backend.placeholder(shape, dtype=self.dtype),
+        output_shapes)
+
   def __setattr__(self, name, value):
     if (not getattr(self, '_setattr_tracking', True) or
         getattr(self, '_is_graph_network', False)):
diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py
index 4431f89693..ebee4a3043 100644
--- a/tensorflow/python/keras/engine/base_layer_test.py
+++ b/tensorflow/python/keras/engine/base_layer_test.py
@@ -18,14 +18,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.ops import array_ops
@@ -37,6 +37,9 @@ from tensorflow.python.platform import test
 
 class DynamicLayer1(base_layer.Layer):
 
+  def __init__(self, dynamic=False, **kwargs):
+    super(DynamicLayer1, self).__init__(dynamic=dynamic, **kwargs)
+
   def call(self, inputs):
     if math_ops.reduce_sum(inputs) > 0:
       return math_ops.sqrt(inputs)
@@ -49,6 +52,9 @@ class DynamicLayer1(base_layer.Layer):
 
 class DynamicLayer2(base_layer.Layer):
 
+  def __init__(self, dynamic=False, **kwargs):
+    super(DynamicLayer2, self).__init__(dynamic=dynamic, **kwargs)
+
   def call(self, inputs):
     samples = []
     for sample in inputs:
@@ -64,91 +70,145 @@ class InvalidLayer(base_layer.Layer):
   def call(self, inputs):
     raise ValueError('You did something wrong!')
 
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
 
-class BaseLayerTest(keras_parameterized.TestCase):
+class BaseLayerTest(test.TestCase, parameterized.TestCase):
 
-  def _assert_static_graph_unfriendly_model(self, model):
-    self.assertEqual(model._static_graph_friendly, False)
-    if not testing_utils.should_run_eagerly():
+  @parameterized.parameters(DynamicLayer1, DynamicLayer2)
+  def test_dynamic_layer_in_functional_model_in_graph_mode(self, layer_class):
+    with context.graph_mode():
+      inputs = keras.Input((3,))
+      # Works when `dynamic=True` is declared.
+      outputs = layer_class(dynamic=True)(inputs)
+      model = keras.Model(inputs, outputs)
+      self.assertEqual(model.dynamic, True)
+      # But then you cannot run the model since you're in a graph scope.
       with self.assertRaisesRegexp(
-          ValueError, 'can only be successfully run in eager execution'):
-        model.compile(rmsprop.RMSprop(0.001), loss='mse',
-                      run_eagerly=testing_utils.should_run_eagerly())
-    else:
-      model.compile(rmsprop.RMSprop(0.001), loss='mse',
-                    run_eagerly=testing_utils.should_run_eagerly())
-      model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+          ValueError, 'You must enable eager execution'):
+        model.compile(rmsprop.RMSprop(0.001), loss='mse')
 
-  @test_util.run_v1_only
-  def test_dynamic_layer_fails_in_v1(self):
-    inputs = keras.Input((3,))
-
-    if not context.executing_eagerly():
-      with self.assertRaisesRegexp(
-          TypeError, 'Using a `tf.Tensor` as a Python `bool` is not allowed'):
-        DynamicLayer1()(inputs)
+      # Fails when `dynamic=True` not declared.
       with self.assertRaisesRegexp(
-          TypeError, 'Tensor objects are only iterable when eager'):
-        DynamicLayer2()(inputs)
+          TypeError, 'attempting to use Python control flow'):
+        _ = layer_class()(inputs)
 
-  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-  def test_dynamic_layer(self):
+  @parameterized.parameters(DynamicLayer1, DynamicLayer2)
+  def test_dynamic_layer_in_functional_model_in_eager_mode(self, layer_class):
     inputs = keras.Input((3,))
-    outputs = DynamicLayer1()(inputs)
+    # Fails when `dynamic=True` not declared.
+    with self.assertRaisesRegexp(
+        TypeError, 'attempting to use Python control flow'):
+      _ = layer_class()(inputs)
+    # Works when `dynamic=True` is declared.
+    outputs = layer_class(dynamic=True)(inputs)
     model = keras.Model(inputs, outputs)
-    self.assertAllClose([[0], [4], [9]], model.predict_on_batch([0, 2, -3]))
-    self._assert_static_graph_unfriendly_model(model)
+    self.assertEqual(model.dynamic, True)
+    model.compile(rmsprop.RMSprop(0.001), loss='mse')
+    self.assertEqual(model.run_eagerly, True)
+    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
 
+  def test_nested_dynamic_layers_in_eager_mode(self):
     inputs = keras.Input((3,))
-    outputs = DynamicLayer2()(inputs)
-    model = keras.Model(inputs, outputs)
-    self.assertAllClose([[0], [4], [9]], model.predict_on_batch([0, 2, -3]))
-    self._assert_static_graph_unfriendly_model(model)
-
-  # TODO(b/120985967): Test fails for nested models due to _set_mask_metadata.
-  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-  def nested_dynamic_layers_in_eager_mode(self):
-    inputs = keras.Input((3,))
-    outputs = DynamicLayer1()(inputs)
+    outputs = DynamicLayer1(dynamic=True)(inputs)
     inner_model = keras.Model(inputs, outputs)
+    self.assertEqual(inner_model.dynamic, True)
 
     inputs = keras.Input((3,))
-    x = DynamicLayer2()(inputs)
+    x = DynamicLayer2(dynamic=True)(inputs)
     outputs = inner_model(x)
 
     model = keras.Model(inputs, outputs)
-    self.assertEqual(model._static_graph_friendly, False)
-    if testing_utils.should_run_eagerly():
-      model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=True)
-      model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
-    else:
-      with self.assertRaisesRegexp(
-          ValueError, 'only be successfully run in eager execution'):
-        model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=False)
+    self.assertEqual(model.dynamic, True)
+    model.compile(rmsprop.RMSprop(0.001), loss='mse')
+    self.assertEqual(model.run_eagerly, True)
+    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+
+  def test_dynamic_layers_in_sequential_model(self):
+    # Without input_shape argument
+    model = keras.Sequential([DynamicLayer1(dynamic=True),
+                              keras.layers.Dense(3),
+                              DynamicLayer2(dynamic=True)])
+    self.assertEqual(model.dynamic, True)
+    model.compile(rmsprop.RMSprop(0.001), loss='mse')
+    self.assertEqual(model.run_eagerly, True)
+    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+
+    # With input_shape argument
+    model = keras.Sequential([DynamicLayer1(dynamic=True, input_shape=(3,)),
+                              DynamicLayer2(dynamic=True)])
+    self.assertEqual(model.dynamic, True)
+    model.compile(rmsprop.RMSprop(0.001), loss='mse')
+    self.assertEqual(model.run_eagerly, True)
+    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+
+  def test_dynamic_layers_in_subclassed_model(self):
+
+    class MyModel(keras.Model):
 
-  def test_invalid_forward_pass_in_graph_mode(self):
-    with context.graph_mode():
-      inputs = keras.Input((3,))
-      with self.assertRaisesRegexp(ValueError, 'You did something wrong!'):
-        _ = InvalidLayer()(inputs)
+      def __init__(self):
+        super(MyModel, self).__init__()
+        self.layer1 = DynamicLayer1(dynamic=True)
+
+      def call(self, inputs):
+        return self.layer1(inputs)
 
-  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-  def test_invalid_forward_pass_in_eager_mode(self):
+    model = MyModel()
+    self.assertEqual(model.dynamic, True)
+    model.compile(rmsprop.RMSprop(0.001), loss='mse')
+    self.assertEqual(model.run_eagerly, True)
+    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+
+  def test_dynamic_subclassed_model_no_shape_inference(self):
+
+    class MyModel(keras.Model):
+
+      def __init__(self):
+        super(MyModel, self).__init__(dynamic=True)
+        self.layer1 = keras.layers.Dense(3)
+        self.layer2 = keras.layers.Dense(3)
+
+      def call(self, inputs):
+        if math_ops.reduce_sum(inputs) > 0:
+          return self.layer1(inputs)
+        else:
+          return self.layer2(inputs)
+
+    model = MyModel()
+    self.assertEqual(model.dynamic, True)
+    model.compile(rmsprop.RMSprop(0.001), loss='mse')
+    self.assertEqual(model.run_eagerly, True)
+    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    self.assertEqual(model.outputs, [None])
+
+  def test_dynamic_subclassed_model_with_shape_inference(self):
+
+    class MyModel(keras.Model):
+
+      def __init__(self):
+        super(MyModel, self).__init__(dynamic=True)
+        self.layer1 = keras.layers.Dense(3)
+        self.layer2 = keras.layers.Dense(3)
+
+      def call(self, inputs):
+        if math_ops.reduce_sum(inputs) > 0:
+          return self.layer1(inputs)
+        else:
+          return self.layer2(inputs)
+
+      def compute_output_shape(self, input_shape):
+        return tensor_shape.TensorShape(
+            tuple(input_shape[:-1].as_list()) + (3,))
+
+    model = MyModel()
+    self.assertEqual(model.dynamic, True)
+    model.compile(rmsprop.RMSprop(0.001), loss='mse')
+    model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
+    self.assertEqual(model.outputs[0].shape.as_list(), [None, 3])
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_invalid_forward_pass(self):
     inputs = keras.Input((3,))
-    outputs = InvalidLayer()(inputs)
-    model = keras.Model(inputs, outputs)
-    self.assertEqual(model._static_graph_friendly, False)
-    if testing_utils.should_run_eagerly():
-      model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=True)
-      with self.assertRaisesRegexp(ValueError, 'You did something wrong!'):
-        model.train_on_batch(np.random.random((2, 3)), np.random.random((2, 3)))
-    else:
-      with self.assertRaisesRegexp(
-          ValueError, 'only be successfully run in eager execution'):
-        model.compile(rmsprop.RMSprop(0.001), loss='mse', run_eagerly=False)
+    with self.assertRaisesRegexp(ValueError, 'You did something wrong!'):
+      _ = InvalidLayer()(inputs)
 
   def test_using_symbolic_tensors_with_tf_ops(self):
     # Single-input.
@@ -178,7 +238,7 @@ class BaseLayerTest(keras_parameterized.TestCase):
     with ops.Graph().as_default():
       x1 = array_ops.ones((3, 3))
     x2 = array_ops.ones((3, 3))
-    self.assertTrue(isinstance(x2, ops.EagerTensor))
+    self.assertIsInstance(x2, ops.EagerTensor)
     with self.assertRaisesRegexp(TypeError,
                                  'provided list of inputs contains '
                                  'objects other than \'EagerTensor\''):
diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 642ac562cb..7435da61cc 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -142,7 +142,6 @@ class Network(base_layer.Layer):
     self._metrics_tensors = {}
     self._scope = None  # Never used.
     self._reuse = None  # Never used.
-    self._call_is_graph_friendly = True
     if context.executing_eagerly():
       self._graph = None
     else:
@@ -185,6 +184,7 @@ class Network(base_layer.Layer):
     self.built = True
     self._compute_output_and_mask_jointly = True
     self._is_graph_network = True
+    self._dynamic = False
 
     self._input_layers = []
     self._output_layers = []
@@ -251,9 +251,10 @@ class Network(base_layer.Layer):
       self.output_names.append(layer.name)
 
   @checkpointable.no_automatic_dependency_tracking
-  def _init_subclassed_network(self, name=None):
+  def _init_subclassed_network(self, name=None, dynamic=False):
     self._base_init(name=name)
     self._is_graph_network = False
+    self._dynamic = dynamic
     call_argspec = tf_inspect.getfullargspec(self.call)
     if 'training' in call_argspec.args:
       self._expects_training_arg = True
@@ -265,10 +266,10 @@ class Network(base_layer.Layer):
     self.built = False
 
   @property
-  def _static_graph_friendly(self):
+  def dynamic(self):
     if self._is_graph_network:
-      return all(layer._static_graph_friendly for layer in self.layers)
-    return self._call_is_graph_friendly
+      return any(layer.dynamic for layer in self.layers)
+    return self._dynamic or any(layer.dynamic for layer in self.layers)
 
   def _determine_call_convention(self, call_argspec):
     """Decides how `self.call()` is invoked. See `CallConvention`."""
@@ -998,6 +999,8 @@ class Network(base_layer.Layer):
               else:
                 if context.executing_eagerly():
                   output_tensors = layer(computed_tensor, **kwargs)
+                elif layer.dynamic:
+                  output_tensors = layer._symbolic_call(computed_tensor)  # pylint: disable=protected-call
                 else:
                   output_tensors = layer.call(computed_tensor, **kwargs)
                 if hasattr(layer, 'compute_mask'):
@@ -1022,6 +1025,8 @@ class Network(base_layer.Layer):
               else:
                 if context.executing_eagerly():
                   output_tensors = layer(computed_tensors, **kwargs)
+                elif layer.dynamic:
+                  output_tensors = layer._symbolic_call(computed_tensors)  # pylint: disable=protected-call
                 else:
                   output_tensors = layer.call(computed_tensors, **kwargs)
                 if hasattr(layer, 'compute_mask'):
diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py
index 3255613f6a..5a42afe847 100644
--- a/tensorflow/python/keras/engine/sequential.py
+++ b/tensorflow/python/keras/engine/sequential.py
@@ -121,8 +121,8 @@ class Sequential(Model):
     return layers[:]
 
   @property
-  def _static_graph_friendly(self):
-    return all(layer._static_graph_friendly for layer in self.layers)
+  def dynamic(self):
+    return any(layer.dynamic for layer in self.layers)
 
   @checkpointable.no_automatic_dependency_tracking
   def add(self, layer):
@@ -253,7 +253,12 @@ class Sequential(Model):
           with ops.name_scope(layer._name_scope()):
             layer._maybe_build(x)
           layer.built = True
-        x = layer.call(x, **kwargs)
+        if context.executing_eagerly():
+          x = layer(x, **kwargs)
+        elif layer.dynamic:
+          x = layer._symbolic_call(x)
+        else:
+          x = layer.call(x, **kwargs)
         if layer.supports_masking:
           mask = layer.compute_mask(x, mask)
         else:
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index f8cba47a41..94c3967625 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -554,16 +554,23 @@ class Model(Network):
     if self._run_eagerly is True and not context.executing_eagerly():
       raise ValueError('You can only set `run_eagerly=True` if eager execution '
                        'is enabled.')
-    if self._static_graph_friendly:
+    if not self.dynamic:
       if self._run_eagerly is None:
         return False
       else:
         return self._run_eagerly
     else:
+      if not context.executing_eagerly():
+        raise ValueError('Your model contains layers that can only be '
+                         'successfully run in eager execution (layers '
+                         'constructed with `dynamic=True`). '
+                         'You must enable eager execution with '
+                         '`tf.enable_eager_execution()`.')
       if self._run_eagerly is False:
         # TODO(fchollet): consider using py_func to enable this.
         raise ValueError('Your model contains layers that can only be '
-                         'successfully run in eager execution. '
+                         'successfully run in eager execution (layers '
+                         'constructed with `dynamic=True`). '
                          'You cannot set `run_eagerly=False`.')
       return context.executing_eagerly()
 
@@ -1637,10 +1644,12 @@ class Model(Network):
 
   def _cache_output_metric_attributes(self, metrics, weighted_metrics):
     """Caches metric name and function attributes for every model output."""
-    output_shapes = [
-        None if output is None else output.get_shape().as_list()
-        for output in self.outputs
-    ]
+    output_shapes = []
+    for output in self.outputs:
+      if output is None or output.shape.rank is None:
+        output_shapes.append(None)
+      else:
+        output_shapes.append(output.shape.as_list())
     self._per_output_metrics = training_utils.collect_per_output_metric_info(
         metrics, self.output_names, output_shapes, self.loss_functions)
     self._per_output_weighted_metrics = \
@@ -2523,12 +2532,23 @@ class Model(Network):
 
     # TODO(fchollet): consider calling `_maybe_build` before calling the model.
     if outputs is None:
-      # Obtain symbolic outputs by calling the model.
-      with K.get_graph().as_default():
-        if self._expects_training_arg:
-          outputs = self.call(inputs, training=training)
-        else:
-          outputs = self.call(inputs)
+      if not self._dynamic:
+        # The network may include dynamic layers but its `call`
+        # itself isn't dynamic.
+        # Obtain symbolic outputs by calling the model.
+        with K.get_graph().as_default():
+          if self._expects_training_arg:
+            outputs = self.call(inputs, training=training)
+          else:
+            outputs = self.call(inputs)
+      else:
+        # Case: network's `call` is dynamic.
+        try:
+          outputs = self._symbolic_call(inputs)
+        except NotImplementedError:
+          # Static shape inference was not implemented for this dynamic net.
+          # Do not specify symbolic outputs.
+          outputs = None
 
     outputs = nest.flatten(outputs)
     self.outputs = outputs
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
index a3254cbd94..6b0b4595ff 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
index b70e9ee98d..69cbecb898 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
index 1d814b2c8b..b2ab5006dc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt
index b84629540e..da212382c1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activation.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt
index 5918a13ad8..c910db027e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-activity-regularization.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt
index 599da06427..8b7b33e98c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-add.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt
index f9ff1538c8..5e3e41ba20 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-alpha-dropout.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index 723fc9cdb0..e160b10153 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt
index 957ce2f0ce..b6b71358c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt
index a52c0af681..5c5ab1580e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt
index a004db62dd..489de2e4d3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-average.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index 44f83d1387..30fec249b8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt
index 8378faf718..0e983c9234 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt
index 9d5655c964..ec50db7127 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-avg-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt
index b3d3c84f92..cbbb000e25 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-batch-normalization.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt
index d37a6b4710..23153d4284 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-bidirectional.pbtxt
@@ -17,6 +17,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt
index 1ad7a91be0..766c3f267f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-concatenate.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
index cb9abc2539..8980982271 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
@@ -42,6 +42,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "filters"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt
index 47dba1d81f..a74b8d2950 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index fd64941896..b093f8ead9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt
index 1b1425d531..0ce9f6fd59 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index 1741063fe8..c1f5bfae0d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt
index 50feb4f458..4aa872c4f0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-conv3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt
index faaa535df9..6e01f7c70c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 4079329d1e..c002042d77 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt
index 32e56696e1..f5e5446d2b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 381abe7340..d5f36f4bc3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt
index b3e4bf9689..346fec6056 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-convolution3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt
index 7aeff8003c..0f8fe9f05e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt
index a1728d9d4f..68fb7382a7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt
index 8d8fd142cc..deda82f9b3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cropping3-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
index 7758209adf..2eba3fb954 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
@@ -18,6 +18,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
index 7c463ff125..6ed13d37f2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-cu-d-n-n-l-s-t-m.pbtxt
@@ -18,6 +18,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt
index 4960d0264e..919aed5723 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dense.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
index 8fad7535f8..f590ce1ef7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt
index 5b425f2d4d..db4261fadc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dot.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt
index f6c4d0a438..7369552b3b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-dropout.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt
index 82b761fc17..f643ef9de2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-e-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt
index c9ff323877..ce053ae8c4 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-embedding.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt
index 9b4165d4cb..db95043077 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-flatten.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt
index f225f7c430..a6edba6b7e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u-cell.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
index 855d001700..f8c0dbb273 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-g-r-u.pbtxt
@@ -33,6 +33,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "implementation"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt
index 2c404c99cd..ac4bbe7d19 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-dropout.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt
index 6f109d59d0..947e3170ae 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-gaussian-noise.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 69f8a9031d..17e202c581 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
index 4299f765e5..9772c5df9b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
index 9153a1a240..cd65075591 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 625e81fd23..0423de7a24 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
index 2fc769742c..4471cba245 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
index e307a65c7c..c0e7fae456 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 4394ad0364..6975a6e88d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
index 050ed39fe9..56bd70db7e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
index 436191821e..656319920e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 4ba540aa6a..f815e66911 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
index a2e9322cb3..f61f0e521b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
index 5d16a57fc1..c58c8ce63f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt
index 9dd29c1251..0efe9a4297 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-input-layer.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
index 0045d5775e..5caa02e71a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
index 529c750f98..f21c7e5b21 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-l-s-t-m.pbtxt
@@ -33,6 +33,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "implementation"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt
index d4d1bc6b6b..381d5660b9 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-lambda.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt
index e1f5491180..36b0a86628 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-layer.pbtxt
@@ -11,6 +11,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
@@ -81,7 +85,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'dynamic\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'False\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
index 9b69d9a944..b41662e63a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt
index fd52259432..e4abfca913 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt
index 5fc8af0d03..cfcb92e293 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-locally-connected2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt
index 7f8932270e..e0721353d1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-masking.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
index 4723b99cb0..0618fbeead 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt
index 173c5d4a8b..4af52ffec8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt
index 14e1899e14..db9311ee58 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index a708e652bf..bfb15cb447 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt
index e6706b5cf9..1db962dbb8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt
index a73c082d1b..f80d5267e7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-max-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt
index f3f195554b..cd772d4ac7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-maximum.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt
index f345d1d67b..2bb6b3073a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-minimum.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt
index 31cb8bc177..e1a1f07355 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-multiply.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt
index 44cccc92bd..66c4446572 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-p-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt
index b55e191ff1..0839554f43 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-permute.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
index e9575436e5..b10695f6f7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt
index 98223b207f..b96500f710 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt
index 2df918b16b..a27d93ec62 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-repeat-vector.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt
index ce5f9e2129..6dda24d3d2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-reshape.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt
index a0bb917775..8a4ae8aaa7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv1-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt
index d7942f201b..a083c1da2e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-conv2-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
index f7ac9042d4..5d5b361f82 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
index e5a9268822..392c338d73 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
index 0fe2c974a7..1143604903 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
index 2ee5873f0f..5a15f1a55f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-simple-r-n-n.pbtxt
@@ -33,6 +33,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt
index 5b8f64aa35..c470d9c8e8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-softmax.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
index 240cb6e562..d17d6495c0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
index 6226c469f8..2d538b4734 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
index 34dabce6d8..b70923601a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
index 0ddf628ace..f453ddd50e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt
index 12eb35ad15..5759169e07 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-subtract.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
index c41020c2b4..bfde1c35f6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt
index 479f89cf6a..e7f59a9cc5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-time-distributed.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt
index 233363ce02..0354149d4f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index cb6228ac44..fff0e26bc1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt
index 03bad3ccb6..c49fa5663d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-up-sampling3-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt
index 158996792a..c961699053 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-wrapper.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt
index 63a56cd3ee..1911e128eb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt
index 965a4cca04..88be914347 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt
index 1a62430887..2bbb71ece2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.layers.-zero-padding3-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt
index 2db07df523..bad488f59b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt
index 904ad3a21a..a1e7601a51 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-binary-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
index 17b74924fa..5f2c2f9807 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt
index 49f577e136..c153e9cf4d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-negatives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt
index e8baf85866..aae2bd9988 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-false-positives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt
index 40fe64bbd2..904a2fa9ca 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-mean.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt
index ae6a85026d..e81ecfe3f6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-precision.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt
index 31068a51d5..f8470b94d7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-recall.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
index aa77d1972c..b70ef32bca 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
index 0c17452292..2e693269bf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
index 67857aa89f..e62a2df056 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt
index 1b5eb8d0de..1a524d73c0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-negatives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt
index 5b9c470e32..b9b4f565c5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.metrics.-true-positives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
index c58c7bef22..bdf695ed4d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
index 473a1c16fb..a64156f731 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt
index 059c91f724..85764cc8dc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling1-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt
index d06c8e81ee..259da2ad3e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling2-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt
index 6be8e7c210..ffda9334cf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-average-pooling3-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt
index 16d9ecce10..56a3fc3de7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-batch-normalization.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt
index 21c695935c..d72f24b3d5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv1-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt
index f24d030720..72a7339368 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d-transpose.pbtxt
@@ -16,6 +16,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt
index 0a510ece35..38a63df42d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv2-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt
index d0ee44bed3..29620561f7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d-transpose.pbtxt
@@ -16,6 +16,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt
index 546de3cdab..f1a2bcbb72 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-conv3-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt
index 3ad311581e..d1e2d57570 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dense.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt
index 9b83271350..92e40f6d96 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-dropout.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt
index 87a7fb3d84..087601a3c1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-flatten.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt
index 32b17e90ad..b052c6bb0a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-layer.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt
index 643c469717..9444a1bc76 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling1-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt
index 434e25adc1..83dcb5e4e7 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling2-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt
index 089fc6f924..eb26e2220b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-max-pooling3-d.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt
index bc3d58b9ca..38d75e8bd5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv1-d.pbtxt
@@ -16,6 +16,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt
index fe7d71af3a..90fc61cdfa 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.layers.-separable-conv2-d.pbtxt
@@ -16,6 +16,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
index f7f9978c06..adffc55227 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
index f9e898484b..95746cc49c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
index 9e52a42526..3547b66d19 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
index 9836433d08..7582fd52b6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
index 5fd9b329bd..7ec61661fd 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
index 76c8cff22b..9617d07568 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
index f53567af52..b31886f736 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
index d3b68e4f29..c36ecaa4b2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
index 1f7840ab91..42128ebd17 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
index a3254cbd94..6b0b4595ff 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
index b70e9ee98d..69cbecb898 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
index 1d814b2c8b..b2ab5006dc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-peephole-l-s-t-m-cell.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt
index b84629540e..da212382c1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activation.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt
index 5918a13ad8..c910db027e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-activity-regularization.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt
index 599da06427..8b7b33e98c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-add.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt
index f9ff1538c8..5e3e41ba20 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-alpha-dropout.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
index 723fc9cdb0..e160b10153 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt
index 957ce2f0ce..b6b71358c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt
index a52c0af681..5c5ab1580e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt
index a004db62dd..489de2e4d3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-average.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
index 44f83d1387..30fec249b8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt
index 8378faf718..0e983c9234 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt
index 9d5655c964..ec50db7127 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-avg-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt
index 5da7926812..36ea9d5851 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-batch-normalization.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt
index d37a6b4710..23153d4284 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-bidirectional.pbtxt
@@ -17,6 +17,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt
index 1ad7a91be0..766c3f267f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-concatenate.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
index cb9abc2539..8980982271 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt
@@ -42,6 +42,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "filters"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt
index 47dba1d81f..a74b8d2950 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
index fd64941896..b093f8ead9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt
index 1b1425d531..0ce9f6fd59 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
index 1741063fe8..c1f5bfae0d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt
index 50feb4f458..4aa872c4f0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-conv3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt
index faaa535df9..6e01f7c70c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
index 4079329d1e..c002042d77 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt
index 32e56696e1..f5e5446d2b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
index 381abe7340..d5f36f4bc3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt
index b3e4bf9689..346fec6056 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-convolution3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt
index 7aeff8003c..0f8fe9f05e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt
index a1728d9d4f..68fb7382a7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt
index 8d8fd142cc..deda82f9b3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cropping3-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
index 7758209adf..2eba3fb954 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
@@ -18,6 +18,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt
index 0781a93bd5..ff00ca1bb2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense-features.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt
index 4960d0264e..919aed5723 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dense.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
index 8fad7535f8..f590ce1ef7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-depthwise-conv2-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt
index 5b425f2d4d..db4261fadc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dot.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt
index f6c4d0a438..7369552b3b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-dropout.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt
index 82b761fc17..f643ef9de2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-e-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt
index c9ff323877..ce053ae8c4 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-embedding.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt
index 9b4165d4cb..db95043077 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-flatten.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt
index f225f7c430..a6edba6b7e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u-cell.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
index 855d001700..f8c0dbb273 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
@@ -33,6 +33,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "implementation"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt
index 2c404c99cd..ac4bbe7d19 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-dropout.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt
index 6f109d59d0..947e3170ae 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-gaussian-noise.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
index 69f8a9031d..17e202c581 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
index 4299f765e5..9772c5df9b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
index 9153a1a240..cd65075591 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
index 625e81fd23..0423de7a24 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
index 2fc769742c..4471cba245 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
index e307a65c7c..c0e7fae456 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
index 4394ad0364..6975a6e88d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
index 050ed39fe9..56bd70db7e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
index 436191821e..656319920e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
index 4ba540aa6a..f815e66911 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
index a2e9322cb3..f61f0e521b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
index 5d16a57fc1..c58c8ce63f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt
index 9dd29c1251..0efe9a4297 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-input-layer.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
index 0045d5775e..5caa02e71a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
index 9144a5b103..33082a6f06 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-l-s-t-m.pbtxt
@@ -34,6 +34,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "implementation"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt
index d4d1bc6b6b..381d5660b9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-lambda.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt
index e1f5491180..36b0a86628 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-layer.pbtxt
@@ -11,6 +11,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
@@ -81,7 +85,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'trainable\', \'name\', \'dtype\', \'dynamic\'], varargs=None, keywords=kwargs, defaults=[\'True\', \'None\', \'None\', \'False\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
index 9b69d9a944..b41662e63a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-leaky-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
index 2b66576c96..d061b9c221 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
@@ -18,6 +18,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt
index fd52259432..e4abfca913 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt
index 5fc8af0d03..cfcb92e293 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-locally-connected2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt
index 7f8932270e..e0721353d1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-masking.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
index 4723b99cb0..0618fbeead 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt
index 173c5d4a8b..4af52ffec8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt
index 14e1899e14..db9311ee58 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pool3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
index a708e652bf..bfb15cb447 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt
index e6706b5cf9..1db962dbb8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt
index a73c082d1b..f80d5267e7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-max-pooling3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt
index f3f195554b..cd772d4ac7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-maximum.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt
index f345d1d67b..2bb6b3073a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-minimum.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt
index 31cb8bc177..e1a1f07355 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-multiply.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt
index 44cccc92bd..66c4446572 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-p-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt
index b55e191ff1..0839554f43 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-permute.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
index e9575436e5..b10695f6f7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-r-n-n.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt
index 98223b207f..b96500f710 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt
index 2df918b16b..a27d93ec62 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-repeat-vector.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt
index ce5f9e2129..6dda24d3d2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-reshape.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt
index a0bb917775..8a4ae8aaa7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv1-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt
index d7942f201b..a083c1da2e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-conv2-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
index f7ac9042d4..5d5b361f82 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution1-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
index e5a9268822..392c338d73 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-separable-convolution2-d.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
index 0fe2c974a7..1143604903 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
index 2ee5873f0f..5a15f1a55f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-simple-r-n-n.pbtxt
@@ -33,6 +33,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt
index 5b8f64aa35..c470d9c8e8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-softmax.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
index 240cb6e562..d17d6495c0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
index 6226c469f8..2d538b4734 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
index 34dabce6d8..b70923601a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
index 0ddf628ace..f453ddd50e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt
index 12eb35ad15..5759169e07 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-subtract.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
index c41020c2b4..bfde1c35f6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt
index 479f89cf6a..e7f59a9cc5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-time-distributed.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt
index 233363ce02..0354149d4f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
index cb6228ac44..fff0e26bc1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt
index 03bad3ccb6..c49fa5663d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-up-sampling3-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt
index 158996792a..c961699053 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-wrapper.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt
index 63a56cd3ee..1911e128eb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding1-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt
index 965a4cca04..88be914347 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding2-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt
index 1a62430887..2bbb71ece2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-zero-padding3-d.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt
index 2db07df523..bad488f59b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt
index 904ad3a21a..a1e7601a51 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-binary-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
index 17b74924fa..5f2c2f9807 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-categorical-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt
index 49f577e136..c153e9cf4d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-negatives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt
index e8baf85866..aae2bd9988 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-false-positives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt
index 40fe64bbd2..904a2fa9ca 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-mean.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt
index ae6a85026d..e81ecfe3f6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-precision.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt
index 31068a51d5..f8470b94d7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-recall.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
index aa77d1972c..b70ef32bca 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sensitivity-at-specificity.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
index 0c17452292..2e693269bf 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-sparse-categorical-accuracy.pbtxt
@@ -15,6 +15,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
index 67857aa89f..e62a2df056 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-specificity-at-sensitivity.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt
index 1b5eb8d0de..1a524d73c0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-negatives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt
index 5b9c470e32..b9b4f565c5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.metrics.-true-positives.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
index c58c7bef22..bdf695ed4d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
index 473a1c16fb..a64156f731 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "inbound_nodes"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
index 9e52a42526..3547b66d19 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
index 9836433d08..7582fd52b6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
index d3b68e4f29..c36ecaa4b2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
index 1f7840ab91..42128ebd17 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt
@@ -14,6 +14,10 @@ tf_class {
     name: "dtype"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "dynamic"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "graph"
     mtype: "<type \'property\'>"
-- 
GitLab


From 89b11a3a87f7020810cd6376db5052bac9a3b26e Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Fri, 14 Dec 2018 11:45:10 -0800
Subject: [PATCH 614/873] Fix Keras tests by using TF optimizers in tests with
 run_eagerly enabled

PiperOrigin-RevId: 225578120
---
 tensorflow/python/keras/BUILD                 |  3 +--
 .../keras/engine/training_generator_test.py   | 22 ++++++++-----------
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 87a9dfb605..907730eca6 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -800,12 +800,11 @@ py_test(
 
 py_test(
     name = "training_generator_test",
-    size = "enormous",
+    size = "large",
     srcs = ["engine/training_generator_test.py"],
     shard_count = 3,
     srcs_version = "PY2AND3",
     tags = [
-        "no_oss",
         "notsan",
     ],
     deps = [
diff --git a/tensorflow/python/keras/engine/training_generator_test.py b/tensorflow/python/keras/engine/training_generator_test.py
index 956ca2fe7c..90c45dfcb7 100644
--- a/tensorflow/python/keras/engine/training_generator_test.py
+++ b/tensorflow/python/keras/engine/training_generator_test.py
@@ -33,8 +33,8 @@ from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import training_generator
+from tensorflow.python.keras.optimizer_v2 import rmsprop
 from tensorflow.python.platform import test
-from tensorflow.python.training.rmsprop import RMSPropOptimizer
 from tensorflow.python.util import nest
 
 
@@ -74,7 +74,7 @@ class TestGeneratorMethods(keras_parameterized.TestCase):
         num_hidden=3, num_classes=4, input_dim=2)
     model.compile(
         loss='mse',
-        optimizer='sgd',
+        optimizer=rmsprop.RMSprop(1e-3),
         metrics=['mae', metrics_module.CategoricalAccuracy()])
 
     model.fit_generator(custom_generator(),
@@ -115,7 +115,7 @@ class TestGeneratorMethods(keras_parameterized.TestCase):
         num_hidden=3, num_classes=4, input_dim=2)
     model.compile(
         loss='mse',
-        optimizer='sgd',
+        optimizer=rmsprop.RMSprop(1e-3),
         metrics=['mae', metrics_module.CategoricalAccuracy()],
         run_eagerly=testing_utils.should_run_eagerly())
 
@@ -143,11 +143,7 @@ class TestGeneratorMethods(keras_parameterized.TestCase):
   def test_predict_generator_method(self):
     model = testing_utils.get_small_mlp(
         num_hidden=3, num_classes=4, input_dim=2)
-    model.compile(
-        loss='mse',
-        optimizer='sgd',
-        metrics=['mae', metrics_module.CategoricalAccuracy()],
-        run_eagerly=testing_utils.should_run_eagerly())
+    model.run_eagerly = testing_utils.should_run_eagerly()
 
     model.predict_generator(custom_generator(),
                             steps=5,
@@ -185,7 +181,7 @@ class TestGeneratorMethods(keras_parameterized.TestCase):
         num_hidden=3, num_classes=4, input_dim=2)
     model.compile(
         loss='mse',
-        optimizer='sgd',
+        optimizer=rmsprop.RMSprop(1e-3),
         metrics=['mae', metrics_module.CategoricalAccuracy()],
         run_eagerly=testing_utils.should_run_eagerly())
 
@@ -223,7 +219,7 @@ class TestGeneratorMethods(keras_parameterized.TestCase):
 
     model = testing_utils.get_small_mlp(
         num_hidden=3, num_classes=4, input_dim=2)
-    model.compile(loss='mse', optimizer='sgd',
+    model.compile(loss='mse', optimizer=rmsprop.RMSprop(1e-3),
                   run_eagerly=testing_utils.should_run_eagerly())
 
     with self.assertRaises(ValueError):
@@ -266,7 +262,7 @@ class TestGeneratorMethods(keras_parameterized.TestCase):
     model = testing_utils.get_small_mlp(
         num_hidden=10, num_classes=1, input_dim=10)
 
-    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy',
+    model.compile(rmsprop.RMSprop(0.001), 'binary_crossentropy',
                   run_eagerly=testing_utils.should_run_eagerly())
     model.fit(
         ones_generator(),
@@ -294,7 +290,7 @@ class TestGeneratorMethodsWithSequences(keras_parameterized.TestCase):
 
     model = testing_utils.get_small_mlp(
         num_hidden=3, num_classes=4, input_dim=2)
-    model.compile(loss='mse', optimizer='sgd')
+    model.compile(loss='mse', optimizer=rmsprop.RMSprop(1e-3))
 
     model.fit_generator(DummySequence(),
                         steps_per_epoch=10,
@@ -328,7 +324,7 @@ class TestGeneratorMethodsWithSequences(keras_parameterized.TestCase):
     model = testing_utils.get_small_mlp(
         num_hidden=10, num_classes=1, input_dim=10)
 
-    model.compile(RMSPropOptimizer(0.001), 'binary_crossentropy')
+    model.compile(rmsprop.RMSprop(0.001), 'binary_crossentropy')
     model.fit(CustomSequence(), validation_data=val_data, epochs=2)
     model.evaluate(CustomSequence())
     model.predict(CustomSequence())
-- 
GitLab


From 690bd62639abc6638d3de9e5c9300d368fea49c8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 12:03:01 -0800
Subject: [PATCH 615/873] Update cuda 10 image with TensorRT.

PiperOrigin-RevId: 225581317
---
 third_party/toolchains/preconfig/generate/containers.bzl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/toolchains/preconfig/generate/containers.bzl b/third_party/toolchains/preconfig/generate/containers.bzl
index c64f7f3f8f..c56c6f3346 100644
--- a/third_party/toolchains/preconfig/generate/containers.bzl
+++ b/third_party/toolchains/preconfig/generate/containers.bzl
@@ -1,4 +1,4 @@
 container_digests = {
     "cuda9.0-cudnn7-ubuntu14.04": "sha256:c43ed5341dd765042e0bbd1bf50fadeedd649d1e0c34d81999cb6ce30916cb95",
-    "cuda10.0-cudnn7-ubuntu14.04": "sha256:66e7d592c8149291d5562a0f3093655a15b09c22e0eb30a87b3b6469b7a30ffc",
+    "cuda10.0-cudnn7-ubuntu14.04": "sha256:919e75247743ae1244d5d72ee9f18090379d4a9035e5853010f6d59d87cd2e8b",
 }
-- 
GitLab


From e8d4a3d079ec9c49c75e93978c5b9a3709a623fd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 12:24:46 -0800
Subject: [PATCH 616/873] Adds numerical correctness tests for all Keras modes
 and model types

PiperOrigin-RevId: 225584709
---
 tensorflow/python/keras/BUILD                 |  15 ++
 .../python/keras/engine/correctness_test.py   | 147 ++++++++++++++++++
 .../keras/engine/training_eager_test.py       |  15 --
 3 files changed, 162 insertions(+), 15 deletions(-)
 create mode 100644 tensorflow/python/keras/engine/correctness_test.py

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 907730eca6..3f4b42ca03 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -769,6 +769,21 @@ py_test(
     ],
 )
 
+py_test(
+    name = "correctness_test",
+    size = "medium",
+    srcs = ["engine/correctness_test.py"],
+    shard_count = 2,
+    srcs_version = "PY2AND3",
+    tags = ["notsan"],
+    deps = [
+        ":keras",
+        "//tensorflow/python:client_testlib",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
 py_test(
     name = "training_test",
     size = "medium",
diff --git a/tensorflow/python/keras/engine/correctness_test.py b/tensorflow/python/keras/engine/correctness_test.py
new file mode 100644
index 0000000000..c2f3b040de
--- /dev/null
+++ b/tensorflow/python/keras/engine/correctness_test.py
@@ -0,0 +1,147 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for numerical correctness."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.platform import test
+
+
+class Bias(keras.layers.Layer):
+  """Layer that add a bias to its inputs."""
+
+  def build(self, input_shape):
+    self.bias = self.add_variable('bias', (1,), initializer='zeros')
+
+  def call(self, inputs):
+    return inputs + self.bias
+
+
+class MultiInputSubclassed(keras.Model):
+  """Subclassed Model that adds its inputs and then adds a bias."""
+
+  def __init__(self):
+    super(MultiInputSubclassed, self).__init__()
+    self.add = keras.layers.Add()
+    self.bias = Bias()
+
+  def call(self, inputs):
+    added = self.add(inputs)
+    return self.bias(added)
+
+
+def multi_input_functional():
+  """Functional Model that adds its inputs and then adds a bias."""
+  input_1 = keras.Input(shape=(1,))
+  input_2 = keras.Input(shape=(1,))
+  input_3 = keras.Input(shape=(1,))
+  added = keras.layers.Add()([input_1, input_2, input_3])
+  output = Bias()(added)
+  return keras.Model([input_1, input_2, input_3], output)
+
+
+@keras_parameterized.run_with_all_model_types
+@keras_parameterized.run_all_keras_modes
+class SimpleBiasTest(keras_parameterized.TestCase):
+
+  def _get_simple_bias_model(self):
+    model = testing_utils.get_model_from_layers([Bias()], input_shape=(1,))
+    model.compile(keras.optimizer_v2.gradient_descent.SGD(0.1), 'mae')
+    return model
+
+  def test_simple_bias_fit(self):
+    x = np.array([[0.], [1.], [2.]])
+    y = np.array([[0.5], [2.], [3.5]])
+    model = self._get_simple_bias_model()
+
+    history = model.fit(x, y, batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6])
+
+  def test_simple_bias_evaluate(self):
+    x = np.array([[0.], [1.], [2.]])
+    y = np.array([[1.], [3.], [5.]])
+    model = self._get_simple_bias_model()
+
+    loss = model.evaluate(x, y, batch_size=1)
+    self.assertAlmostEqual(loss, 2.)
+
+  def test_simple_bias_predict(self):
+    x = np.array([[0.], [1.], [2.]])
+    model = self._get_simple_bias_model()
+
+    pred = model.predict(x, batch_size=1)
+    self.assertAllClose(x, pred)
+
+
+@keras_parameterized.run_all_keras_modes
+class MultipleInputTest(keras_parameterized.TestCase):
+
+  def _get_multiple_input_model(self, subclassed=True):
+    if subclassed:
+      model = MultiInputSubclassed()
+    else:
+      model = multi_input_functional()
+    model.compile(keras.optimizer_v2.gradient_descent.SGD(0.1), 'mae')
+    return model
+
+  @parameterized.named_parameters(('subclassed', True), ('functional', False))
+  def test_multiple_input_fit(self, subclassed):
+    x = [
+        np.array([[1.], [2.], [3.]]),
+        np.array([[4.], [5.], [6.]]),
+        np.array([[7.], [8.], [9.]])
+    ]
+    y = np.array([[12.5], [16.], [19.5]])
+
+    model = self._get_multiple_input_model(subclassed)
+    history = model.fit(x, y, batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6])
+
+  @parameterized.named_parameters(('subclassed', True), ('functional', False))
+  def test_multiple_input_evaluate(self, subclassed):
+    x = [
+        np.array([[1.], [2.], [3.]]),
+        np.array([[4.], [5.], [6.]]),
+        np.array([[7.], [8.], [9.]])
+    ]
+    y = np.array([[13.], [17.], [21.]])
+
+    model = self._get_multiple_input_model(subclassed)
+    loss = model.evaluate(x, y, batch_size=3)
+    self.assertAlmostEqual(loss, 2.)
+
+  @parameterized.named_parameters(('subclassed', True), ('functional', False))
+  def test_multiple_input_predict(self, subclassed):
+    x = [
+        np.array([[1.], [2.], [3.]]),
+        np.array([[4.], [5.], [6.]]),
+        np.array([[7.], [8.], [9.]])
+    ]
+
+    model = self._get_multiple_input_model(subclassed)
+    pred = model.predict(x, batch_size=1)
+    self.assertAllClose(pred, [[12.], [15.], [18.]])
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py
index 6b98067063..27eaea23ba 100644
--- a/tensorflow/python/keras/engine/training_eager_test.py
+++ b/tensorflow/python/keras/engine/training_eager_test.py
@@ -248,21 +248,6 @@ class CorrectnessTest(keras_parameterized.TestCase):
     layer(1.)  # Plain-value inputs are only valid in eager mode.
     self.assertEqual(1, len(layer.losses))
 
-  def test_predict_correctness(self):
-    i1 = keras.layers.Input(shape=(4, 5))
-    i2 = keras.layers.Input(shape=(4, 5))
-    i3 = keras.layers.Input(shape=(4, 5))
-    o = keras.layers.add([i1, i2, i3])
-    model = keras.models.Model([i1, i2, i3], o)
-    model.run_eagerly = True
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    x3 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2, x3])
-
-    self.assertAllClose(out, x1 + x2 + x3)
-
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
-- 
GitLab


From 06c3180d80147c39b44157a7fa752a86b5a2f21f Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Fri, 14 Dec 2018 12:40:03 -0800
Subject: [PATCH 617/873] Further cleanup for RunInputs for fuzzers.

PiperOrigin-RevId: 225587007
---
 .../core/kernels/fuzzing/check_numerics_fuzz.cc     |  2 +-
 tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc |  2 +-
 .../fuzzing/example_proto_fast_parsing_fuzz.cc      |  2 +-
 tensorflow/core/kernels/fuzzing/fuzz_session.h      | 13 ++++++++-----
 tensorflow/core/kernels/fuzzing/identity_fuzz.cc    |  2 +-
 tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc     |  3 +--
 .../core/kernels/fuzzing/parse_tensor_op_fuzz.cc    |  2 +-
 .../core/kernels/fuzzing/string_split_fuzz.cc       |  3 +--
 .../core/kernels/fuzzing/string_split_v2_fuzz.cc    |  3 +--
 9 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
index c62378a264..2258a094d9 100644
--- a/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/check_numerics_fuzz.cc
@@ -40,7 +40,7 @@ class FuzzCheckNumerics : public FuzzSession {
     for (size_t i = 0; i < num_floats; i++) {
       flat_tensor(i) = float_data[i];
     }
-    RunInputs({{"input", input_tensor}}).IgnoreError();
+    RunInputs({{"input", input_tensor}});
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc b/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc
index b13c37a42c..09d196147c 100644
--- a/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/encode_jpeg_fuzz.cc
@@ -52,7 +52,7 @@ class FuzzEncodeJpeg : public FuzzSession {
     for (size_t i = 0; i < actual_pixels; i++) {
       flat_tensor(i) = data[i];
     }
-    RunInputs({{"input", input_tensor}}).IgnoreError();
+    RunInputs({{"input", input_tensor}});
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
index 41b2eec62c..f72dfb39b3 100644
--- a/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/example_proto_fast_parsing_fuzz.cc
@@ -53,7 +53,7 @@ class FuzzExampleProtoFastParsing : public FuzzSession {
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
     input_tensor.scalar<string>()() =
         string(reinterpret_cast<const char*>(data), size);
-    RunInputs({{"input", input_tensor}}).IgnoreError();
+    RunInputs({{"input", input_tensor}});
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/fuzz_session.h b/tensorflow/core/kernels/fuzzing/fuzz_session.h
index f5eca49b96..4b036b181d 100644
--- a/tensorflow/core/kernels/fuzzing/fuzz_session.h
+++ b/tensorflow/core/kernels/fuzzing/fuzz_session.h
@@ -61,7 +61,7 @@ namespace fuzzing {
 //   SINGLE_INPUT_OP_BUILDER(DT_INT8, Identity);
 //   void FuzzImpl(const uint8_t* data, size_t size) {
 //      ... convert data and size to a Tensor, pass it to:
-//      RunInputs({{"input", input_tensor}}).IgnoreError();
+//      RunInputs({{"input", input_tensor}});
 //
 class FuzzSession {
  public:
@@ -109,11 +109,14 @@ class FuzzSession {
   // Runs the TF session by pulling on the "output" node, attaching
   // the supplied input_tensor to the input node(s), and discarding
   // any returned output.
-  Status RunInputs(const std::vector<std::pair<string, Tensor> >& inputs) {
-    return session_->Run(inputs, {}, {"output"}, nullptr);
+  // Note: We are ignoring Status from Run here since fuzzers don't need to
+  // check it (as that will slow them down and printing/logging is useless).
+  void RunInputs(const std::vector<std::pair<string, Tensor> >& inputs) {
+    RunInputsWithStatus(inputs).IgnoreError();
   }
 
-  Status RunMultipleInputs(
+  // Same as RunInputs but don't ignore status
+  Status RunInputsWithStatus(
       const std::vector<std::pair<string, Tensor> >& inputs) {
     return session_->Run(inputs, {}, {"output"}, nullptr);
   }
@@ -144,7 +147,7 @@ class FuzzStringInputOp : public FuzzSession {
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
     input_tensor.scalar<string>()() =
         string(reinterpret_cast<const char*>(data), size);
-    RunInputs({{"input", input_tensor}}).IgnoreError();
+    RunInputs({{"input", input_tensor}});
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/identity_fuzz.cc b/tensorflow/core/kernels/fuzzing/identity_fuzz.cc
index dc056331e5..4c1049d381 100644
--- a/tensorflow/core/kernels/fuzzing/identity_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/identity_fuzz.cc
@@ -30,9 +30,9 @@ class FuzzIdentity : public FuzzSession {
       flat_tensor(i) = data[i];
     }
 
-    Status s = RunInputs({{"input", input_tensor}});
     // Note:  For many ops, we don't care about this success -- but when
     // testing to make sure the harness actually works, it's useful.
+    Status s = RunInputsWithStatus({{"input", input_tensor}});
     if (!s.ok()) {
       LOG(ERROR) << "Execution failed: " << s.error_message();
     }
diff --git a/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
index d181f54a11..85cbe51ba8 100644
--- a/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/one_hot_fuzz.cc
@@ -68,8 +68,7 @@ class FuzzOneHot : public FuzzSession {
     RunInputs({{"input", input_tensor},
                {"depth", depth_tensor},
                {"on", on_tensor},
-               {"off", off_tensor}})
-        .IgnoreError();
+               {"off", off_tensor}});
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
index bf67d24266..0ce4206fc3 100644
--- a/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/parse_tensor_op_fuzz.cc
@@ -62,7 +62,7 @@ class FuzzParseTensor : public FuzzSession {
     // Now we can do the actual fuzz implementation
     Tensor input_tensor(tensorflow::DT_STRING, TensorShape({}));
     input_tensor.scalar<string>()() = as_string;
-    RunInputs({{"input", input_tensor}}).IgnoreError();
+    RunInputs({{"input", input_tensor}});
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc b/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
index 201e0c52c8..10958602b2 100644
--- a/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/string_split_fuzz.cc
@@ -48,8 +48,7 @@ class FuzzStringSplit : public FuzzSession {
           reinterpret_cast<const char*>(data + delim_len), size - delim_len);
     }
 
-    RunInputs({{"input", input_tensor}, {"delimiter", delimiter_tensor}})
-        .IgnoreError();
+    RunInputs({{"input", input_tensor}, {"delimiter", delimiter_tensor}});
   }
 };
 
diff --git a/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc b/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
index 2eee6a8871..969821dbba 100644
--- a/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
+++ b/tensorflow/core/kernels/fuzzing/string_split_v2_fuzz.cc
@@ -52,8 +52,7 @@ class FuzzStringSplitV2 : public FuzzSession {
           reinterpret_cast<const char*>(data + sep_len), size - sep_len);
     }
 
-    RunInputs({{"input", input_tensor}, {"separator", separator_tensor}})
-        .IgnoreError();
+    RunInputs({{"input", input_tensor}, {"separator", separator_tensor}});
   }
 
  private:
-- 
GitLab


From e9e534f3239d7cb7e2a815b49c1c3520d9566d70 Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <slebedev@google.com>
Date: Fri, 14 Dec 2018 12:52:22 -0800
Subject: [PATCH 618/873] Moved `Variable.constraint` to Keras.

* The signatures of `VariableV1` and `variable_scope` were unchanged.
* `backend.variable` and `Layer.add_weight` now manually set the
  `constraint` attribute instead of passing it to `Variable`.
* `ResourceVariable` and `Variable` no longer accepts `constraint`
  in the constructor, nor expose it as a @property. This is a
  non-backward compatible API change but it only affects
  `ResourceVariable` users.

PiperOrigin-RevId: 225589244
---
 .../contrib/eager/python/parameter_server.py  | 12 ----
 .../contrib/optimizer_v2/optimizer_v2.py      | 19 ++---
 .../contrib/optimizer_v2/optimizer_v2_test.py |  8 +--
 .../tpu/python/tpu/keras_tpu_variables.py     | 70 +++++++++----------
 tensorflow/python/eager/def_function.py       | 15 +---
 tensorflow/python/keras/backend.py            | 33 ++++++++-
 tensorflow/python/keras/engine/base_layer.py  |  2 +-
 .../python/keras/engine/base_layer_utils.py   |  6 +-
 .../python/keras/optimizer_v2/optimizer_v2.py |  9 +--
 .../keras/optimizer_v2/optimizer_v2_test.py   |  6 +-
 .../resource_variable_ops_test.py             | 17 +----
 .../kernel_tests/variable_scope_test.py       |  4 +-
 .../python/kernel_tests/variables_test.py     |  5 +-
 .../python/ops/resource_variable_ops.py       | 43 ++----------
 tensorflow/python/ops/variable_scope.py       |  3 +-
 tensorflow/python/ops/variables.py            | 28 +++-----
 tensorflow/python/training/optimizer.py       | 19 ++---
 tensorflow/python/training/optimizer_test.py  |  9 ++-
 .../api/golden/v2/tensorflow.-variable.pbtxt  |  6 +-
 19 files changed, 124 insertions(+), 190 deletions(-)

diff --git a/tensorflow/contrib/eager/python/parameter_server.py b/tensorflow/contrib/eager/python/parameter_server.py
index 7803a6799b..d50ff236bb 100644
--- a/tensorflow/contrib/eager/python/parameter_server.py
+++ b/tensorflow/contrib/eager/python/parameter_server.py
@@ -79,7 +79,6 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
                trainable=True,
                name=None,
                dtype=None,
-               constraint=None,
                initialize=True,
                **unused_kwargs):
     """Creates a variable.
@@ -99,13 +98,6 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
         If None, either the datatype will be kept (if initial_value is
         a Tensor) or float32 will be used (if it is a Python object convertible
         to a Tensor).
-      constraint: An optional projection function to be applied to the variable
-        after being updated by an `Optimizer` (e.g. used to implement norm
-        constraints or value constraints for layer weights). The function must
-        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
       initialize: if True, runs initialization in eager execution; leaves the
         variable uninitialized otherwise.
 
@@ -126,9 +118,6 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
                        "functions. Please file a feature request if this "
                        "restriction inconveniences you.")
 
-    if constraint is not None and not callable(constraint):
-      raise ValueError("The `constraint` argument must be a callable.")
-
     if isinstance(initial_value, checkpointable.CheckpointInitialValue):
       self._maybe_initialize_checkpointable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
@@ -201,7 +190,6 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
         self._initial_value = initial_value if self._in_graph_mode else None
         self._handle_name = handle_name + ":0"
         self._dtype = initial_value.dtype.base_dtype
-        self._constraint = constraint
 
         if self._in_graph_mode:
           with ops.name_scope("IsInitialized"):
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 7fb23abc38..54c2a749d1 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -69,16 +69,17 @@ class _RefVariableProcessor(_OptimizableVariable):
 
   def update_op(self, optimizer, g, *args):
     if isinstance(g, ops.Tensor):
-      update_op = optimizer._apply_dense(g, self._v, *args)  # pylint: disable=protected-access
-      if self._v.constraint is not None:
+      # pylint: disable=protected-access
+      update_op = optimizer._apply_dense(g, self._v, *args)
+      if getattr(self._v, "_constraint", None) is not None:
         with ops.control_dependencies([update_op]):
-          return self._v.assign(self._v.constraint(self._v))
+          return self._v.assign(self._v._constraint(self._v))
       else:
         return update_op
     else:
       assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
                                                 "tensor nor IndexedSlices.")
-      if self._v.constraint is not None:
+      if getattr(self._v, "_constraint", None) is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       # pylint: disable=protected-access
@@ -97,9 +98,9 @@ class _DenseReadResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g, *args):
     # pylint: disable=protected-access
     update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0], *args)
-    if self._v.constraint is not None:
+    if getattr(self._v, "_constraint", None) is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
+        return self._v.assign(self._v._constraint(self._v))
     else:
       return update_op
 
@@ -116,15 +117,15 @@ class _DenseResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g, *args):
     # pylint: disable=protected-access
     if isinstance(g, ops.IndexedSlices):
-      if self._v.constraint is not None:
+      if getattr(self._v, "_constraint", None) is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       return optimizer._resource_apply_sparse_duplicate_indices(
           g.values, self._v, g.indices, *args)
     update_op = optimizer._resource_apply_dense(g, self._v, *args)
-    if self._v.constraint is not None:
+    if getattr(self._v, "_constraint", None) is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
+        return self._v.assign(self._v._constraint(self._v))
     else:
       return update_op
 
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py b/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py
index dd7f2f4405..d71172642e 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py
@@ -222,14 +222,14 @@ class OptimizerTest(test.TestCase):
       opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
       self.assertTrue(opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
 
+  @test_util.run_v1_only(
+      '`ResourceVariable` does not support `constraint` argument.')
   def testConstraint(self):
     constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
     constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
     with self.cached_session():
-      var0 = variables.Variable([1.0, 2.0],
-                                constraint=constraint_01)
-      var1 = variables.Variable([3.0, 4.0],
-                                constraint=constraint_0)
+      var0 = variables.VariableV1([1.0, 2.0], constraint=constraint_01)
+      var1 = variables.VariableV1([3.0, 4.0], constraint=constraint_0)
       cost = 5 * var0 + 3 * var1
       global_step = variables.Variable(
           array_ops.zeros([], dtypes.int64), name='global_step')
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
index de425626c8..22a39a17e4 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
@@ -66,11 +66,12 @@ class ReplicatedVariable(object):
   * colocation.
   """
 
-  def __init__(self, name, variables):
+  def __init__(self, name, variables, constraint=None):
     self._name = name
     self._primary_var = variables[0]
     self._common_name = self._primary_var.name.split(":")[0]
     self._vars = variables
+    self._constraint = constraint
     self._cached_value = None
     self._dtype = variables[0].dtype
 
@@ -133,7 +134,7 @@ class ReplicatedVariable(object):
 
   @property
   def constraint(self):
-    return None
+    return self._constraint
 
   @property
   def op(self):
@@ -305,42 +306,37 @@ def replicated_variable_for_optimizer(num_replicas):
     yield
     return
 
-  try:
-    old_v = backend.variable
-
-    def opt_variable(value, dtype=None, name=None, constraint=None):
-      """Instantiates a variable and returns it."""
-      if dtype is None:
-        dtype = backend.floatx()
-
-      variables = []
-      for i in range(num_replicas):
-        # Keras holds the variables in optimizer class instance , so the name
-        # does not matter here. ResourceVariable constructor will find a unique
-        # name (including name=None) for each replica.
-        with ops.device("device:TPU:{}".format(i)):
-          v = resource_variable_ops.ResourceVariable(
-              value,
-              dtype=dtypes_module.as_dtype(dtype),
-              name=name,
-              constraint=constraint)
-          variables.append(v)
-      name = "replicate_{}_{}".format("variable" if name is None else name,
-                                      ops.uid())
-      v = ReplicatedVariable(name, variables)
-
-      # pylint: disable=protected-access
-
-      if isinstance(value, np.ndarray):
-        v._keras_shape = value.shape
-      elif hasattr(value, "shape"):
-        v._keras_shape = backend.int_shape(value)
-      v._uses_learning_phase = False
-      backend.track_variable(v)
-      return v
+  def opt_variable(value, dtype=None, name=None, constraint=None):
+    """Instantiates a variable and returns it."""
+    if dtype is None:
+      dtype = backend.floatx()
+
+    variables = []
+    for i in range(num_replicas):
+      # Keras holds the variables in optimizer class instance , so the name
+      # does not matter here. ResourceVariable constructor will find a unique
+      # name (including name=None) for each replica.
+      with ops.device("device:TPU:{}".format(i)):
+        v = resource_variable_ops.ResourceVariable(
+            value,
+            dtype=dtypes_module.as_dtype(dtype),
+            name=name)
+        variables.append(v)
+    name = "replicate_{}_{}".format("variable" if name is None else name,
+                                    ops.uid())
+    v = ReplicatedVariable(name, variables, constraint)
+    # pylint: disable=protected-access
+    if isinstance(value, np.ndarray):
+      v._keras_shape = value.shape
+    elif hasattr(value, "shape"):
+      v._keras_shape = backend.int_shape(value)
+    v._uses_learning_phase = False
+    backend.track_variable(v)
+    return v
 
+  old_variable = backend.variable
+  try:
     backend.variable = opt_variable
     yield
-
   finally:
-    backend.variable = old_v
+    backend.variable = old_variable
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index a12f9ed765..9c4710175e 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -51,7 +51,6 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
                caching_device=None,
                name=None,
                dtype=None,
-               constraint=None,
                add_initializers_to=None,
                **unused_kwargs):
     """Creates a variable.
@@ -76,13 +75,6 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
         If None, either the datatype will be kept (if initial_value is
        a Tensor) or float32 will be used (if it is a Python object convertible
        to a Tensor).
-      constraint: An optional projection function to be applied to the variable
-        after being updated by an `Optimizer` (e.g. used to implement norm
-        constraints or value constraints for layer weights). The function must
-        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
       add_initializers_to: if not None and not in legacy graph mode, the
         initializer tensor will be added to this map instead of adding the
         assignment to the function.
@@ -97,8 +89,7 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
       # here; we can't really do the capturing or conditional logic.
       resource_variable_ops.ResourceVariable.__init__(
           self, initial_value=initial_value, trainable=trainable,
-          caching_device=caching_device, name=name, dtype=dtype,
-          constraint=constraint)
+          caching_device=caching_device, name=name, dtype=dtype)
       return
     with ops.init_scope():
       self._in_graph_mode = not context.executing_eagerly()
@@ -106,9 +97,6 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
       raise ValueError("initial_value must be specified.")
     init_from_fn = callable(initial_value)
 
-    if constraint is not None and not callable(constraint):
-      raise ValueError("The `constraint` argument must be a callable.")
-
     if isinstance(initial_value, checkpointable.CheckpointInitialValue):
       self._maybe_initialize_checkpointable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
@@ -147,7 +135,6 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
       self._unique_id = shared_name
       self._handle_name = shared_name + ":0"
       self._dtype = initial_value.dtype.base_dtype
-      self._constraint = constraint
       assert initial_value is not None
       if self._in_graph_mode:
         with ops.init_scope():
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 095273071f..4c87bf3769 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -673,7 +673,11 @@ def variable(value, dtype=None, name=None, constraint=None):
       dtype: Tensor type.
       name: Optional name string for the tensor.
       constraint: Optional projection function to be
-          applied to the variable after an optimizer update.
+          applied to the variable after an optimizer update. The function
+          must take as input the unprojected tensor representing the value
+          of the variable and return the tensor for the projected value
+          (which must have the same shape). Constraints are not safe to
+          use when doing asynchronous distributed training.
 
   Returns:
       A variable instance (with Keras metadata included).
@@ -706,8 +710,8 @@ def variable(value, dtype=None, name=None, constraint=None):
   v = resource_variable_ops.ResourceVariable(
       value,
       dtype=dtypes_module.as_dtype(dtype),
-      name=name,
-      constraint=constraint)
+      name=name)
+  v._constraint = constraint
   if isinstance(value, np.ndarray):
     v._keras_shape = value.shape
   elif hasattr(value, 'shape'):
@@ -765,6 +769,29 @@ def _initialize_variables(session):
       session.run(variables_module.variables_initializer(uninitialized_vars))
 
 
+def _has_constraint(v):
+  """Returns `True` if a variable has a constraint and `False` otherwise."""
+  return getattr(v, '_constraint', None) is not None
+
+
+def _maybe_enforce_constraint(v):
+  """Enforces a constraint for a variable.
+
+  Args:
+    v: A variable.
+
+  Returns:
+    A `Tensor` which corresponds to the value of this variable with
+    the constraint enforced, or the current value of this variable,
+    if no constraint is present.
+  """
+  constraint = getattr(v, '_constraint', None)
+  if constraint is None:
+    return array_ops.identity(v)
+  else:
+    return v.assign(constraint(v))
+
+
 @tf_export('keras.backend.constant')
 def constant(value, dtype=None, shape=None, name=None):
   """Creates a constant tensor.
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index aeed750652..5ab48e8b3d 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -347,13 +347,13 @@ class Layer(checkpointable.CheckpointableBase):
         overwrite=True,
         initializer=initializer,
         dtype=dtype,
-        constraint=constraint,
         trainable=trainable and self.trainable,
         partitioner=partitioner,
         use_resource=use_resource,
         collections=collections,
         synchronization=synchronization,
         aggregation=aggregation)
+    variable._constraint = constraint
     backend.track_variable(variable)
 
     if regularizer is not None:
diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py
index d2f947f177..d2343969ee 100644
--- a/tensorflow/python/keras/engine/base_layer_utils.py
+++ b/tensorflow/python/keras/engine/base_layer_utils.py
@@ -59,7 +59,6 @@ def make_variable(name,
                   trainable=None,
                   caching_device=None,
                   validate_shape=True,
-                  constraint=None,
                   use_resource=None,
                   collections=None,
                   synchronization=tf_variables.VariableSynchronization.AUTO,
@@ -93,7 +92,6 @@ def make_variable(name,
       `synchronization` is set to `ON_READ`.
     caching_device: Passed to `tf.Variable`.
     validate_shape: Passed to `tf.Variable`.
-    constraint: Constraint instance (callable).
     use_resource: Whether to use a `ResourceVariable`.
     collections: List of graph collections keys. The new variable is added to
       these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
@@ -131,19 +129,17 @@ def make_variable(name,
 
   # TODO(apassos,rohanj) figure out how to remove collections from here so we
   # can remove the V1.
-  v = tf_variables.VariableV1(
+  return tf_variables.VariableV1(
       initial_value=init_val,
       name=name,
       trainable=trainable,
       caching_device=caching_device,
       dtype=variable_dtype,
       validate_shape=validate_shape,
-      constraint=constraint,
       use_resource=use_resource,
       collections=collections,
       synchronization=synchronization,
       aggregation=aggregation)
-  return v
 
 
 def get_default_graph_uid_map():
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index 0e909d0d79..041d36a931 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -344,17 +344,14 @@ class OptimizerV2(checkpointable.CheckpointableBase):
       if isinstance(var, ops.Tensor):
         raise NotImplementedError("Trying to update a Tensor ", var)
       if isinstance(grad, ops.IndexedSlices):
-        if var.constraint is not None:
+        if backend._has_constraint(var):  # pylint: disable=protected-access
           raise RuntimeError(
               "Cannot use a constraint function on a sparse variable.")
         return self._resource_apply_sparse_duplicate_indices(
             grad.values, var, grad.indices)
       update_op = self._resource_apply_dense(grad, var)
-      if var.constraint is not None:
-        with ops.control_dependencies([update_op]):
-          return var.assign(var.constraint(var))
-      else:
-        return update_op
+      with ops.control_dependencies([update_op]):
+        return backend._maybe_enforce_constraint(var)  # pylint: disable=protected-access
 
     with ops.name_scope(name, self._name) as name:
       for grad, var in grads_and_vars:
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
index 8b2865e2aa..f27f3516b5 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
@@ -235,10 +235,8 @@ class OptimizerTest(test.TestCase):
     constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
     constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
     with self.cached_session():
-      var0 = variables.Variable([1.0, 2.0],
-                                constraint=constraint_01)
-      var1 = variables.Variable([3.0, 4.0],
-                                constraint=constraint_0)
+      var0 = backend.variable([1.0, 2.0], constraint=constraint_01)
+      var1 = backend.variable([3.0, 4.0], constraint=constraint_0)
       loss = lambda: 5 * var0 + 3 * var1
       sgd = gradient_descent.SGD(3.0)
 
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index df7b686165..4689e2d814 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -497,18 +497,6 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       state_ops.scatter_update(ref, indices, updates)
       self.assertAllEqual(ref.read_value(), [True, True, True])
 
-  @test_util.run_in_graph_and_eager_modes
-  def testConstraintArg(self):
-    constraint = lambda x: x
-    v = resource_variable_ops.ResourceVariable(
-        initial_value=lambda: 1, constraint=constraint, name="var0")
-    self.assertEqual(v.constraint, constraint)
-
-    constraint = 0
-    with self.assertRaises(ValueError):
-      v = resource_variable_ops.ResourceVariable(
-          initial_value=lambda: 1, constraint=constraint, name="var1")
-
   # TODO(alive): how should this work in Eager mode?
   @test_util.run_deprecated_v1
   def testInitFn(self):
@@ -868,19 +856,16 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
   def testVariableEager(self):
     with context.eager_mode():
       init = array_ops.ones(shape=[10, 20, 35], dtype=dtypes.int32)
-      constraint = lambda x: x
       with ops.name_scope("foo"):
         v = resource_variable_ops.ResourceVariable(
             name="var7",
             initial_value=init,
-            caching_device="cpu:0",
-            constraint=constraint)
+            caching_device="cpu:0")
       # Test properties
       self.assertEqual(dtypes.int32, v.dtype)
       self.assertEqual("foo/var7:0", v.name)
       self.assertAllEqual([10, 20, 35], v.shape.as_list())
       self.assertTrue(isinstance(v.handle, ops.EagerTensor))
-      self.assertEqual(constraint, v.constraint)
       self.assertAllEqual(init.numpy(), v.read_value().numpy())
       self.assertAllEqual(init.numpy(), v.value().numpy())
 
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index 451eb38530..e79d822207 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -136,8 +136,8 @@ class VariableScopeTest(test.TestCase):
         self.evaluate(variables_lib.variables_initializer([w]))
         self.assertAllClose(self.evaluate(w.value()), 0.3)
 
-  @test_util.run_in_graph_and_eager_modes
-  @run_inside_wrap_function_in_eager_mode
+  @test_util.run_v1_only(
+      "`ResourceVariable` does not support `constraint` argument.")
   def testVarScopeConstraint(self):
     constraint = lambda x: 0. * x
     with variable_scope.variable_scope("tower1") as tower:
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 07807e89d0..467867125e 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -446,16 +446,17 @@ class VariablesTestCase(test.TestCase):
       self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(np.negative(value), self.evaluate(v2))
 
+  @test_util.run_v1_only("`constraint` argument is removed in TF2.0")
   def testConstraintArg(self):
     constraint = lambda x: x
-    v = variables.Variable(
+    v = variables.VariableV1(
         lambda: constant_op.constant(1.),
         constraint=constraint)
     self.assertEqual(v.constraint, constraint)
 
     constraint = 0
     with self.assertRaises(ValueError):
-      v = variables.Variable(
+      v = variables.VariableV1(
           lambda: constant_op.constant(1.),
           constraint=constraint)
 
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 6104cfa7ff..95db6a1463 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -256,13 +256,7 @@ class ResourceVariable(variables.RefVariable):
         arguments (except for import_scope) are mutually exclusive.
       import_scope: Optional `string`. Name scope to add to the
         ResourceVariable. Only used when `variable_def` is provided.
-      constraint: An optional projection function to be applied to the variable
-        after being updated by an `Optimizer` (e.g. used to implement norm
-        constraints or value constraints for layer weights). The function must
-        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
+      constraint: Ignored. Provided for compatibility with tf.Variable.
 
     Raises:
       ValueError: If the initial value is not specified, or does not have a
@@ -274,6 +268,10 @@ class ResourceVariable(variables.RefVariable):
     collections.
     @end_compatibility
     """
+    if constraint is not None:
+      raise RuntimeError(  # pylint: disable=g-doc-exception
+          "`ResourceVariable` does not support `constraint` argument.")
+
     if variable_def:
       if initial_value is not None:
         raise ValueError("variable_def and initial_value are mutually "
@@ -290,8 +288,7 @@ class ResourceVariable(variables.RefVariable):
           validate_shape=validate_shape,
           caching_device=caching_device,
           name=name,
-          dtype=dtype,
-          constraint=constraint)
+          dtype=dtype)
 
   # pylint: disable=unused-argument
   def _init_from_args(self,
@@ -301,8 +298,7 @@ class ResourceVariable(variables.RefVariable):
                       validate_shape=True,
                       caching_device=None,
                       name=None,
-                      dtype=None,
-                      constraint=None):
+                      dtype=None):
     """Creates a variable.
 
     Args:
@@ -329,13 +325,6 @@ class ResourceVariable(variables.RefVariable):
         If None, either the datatype will be kept (if initial_value is
        a Tensor) or float32 will be used (if it is a Python object convertible
        to a Tensor).
-      constraint: An optional projection function to be applied to the variable
-        after being updated by an `Optimizer` (e.g. used to implement norm
-        constraints or value constraints for layer weights). The function must
-        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
 
     Raises:
       ValueError: If the initial value is not specified, or does not have a
@@ -367,8 +356,6 @@ class ResourceVariable(variables.RefVariable):
       raise ValueError(
           "collections argument to Variable constructor must be a list, tuple, "
           "or set. Got %s of type %s" % (collections, type(collections)))
-    if constraint is not None and not callable(constraint):
-      raise ValueError("The `constraint` argument must be a callable.")
 
     if isinstance(initial_value, checkpointable.CheckpointInitialValue):
       self._maybe_initialize_checkpointable()
@@ -425,7 +412,6 @@ class ResourceVariable(variables.RefVariable):
         self._initial_value = initial_value if self._in_graph_mode else None
         self._handle_name = handle_name + ":0"
         self._dtype = initial_value.dtype.base_dtype
-        self._constraint = constraint
 
         if self._in_graph_mode:
           with ops.name_scope("IsInitialized"):
@@ -543,7 +529,6 @@ class ResourceVariable(variables.RefVariable):
       self._save_slice_info = None
     self._caching_device = None
     self._dtype = dtypes.as_dtype(self._handle.op.get_attr("dtype"))
-    self._constraint = None
     self._cached_shape_as_list = None
 
   @contextlib.contextmanager
@@ -577,7 +562,6 @@ class ResourceVariable(variables.RefVariable):
     copied_variable = ResourceVariable(
         initial_value=self.read_value(),
         trainable=self._trainable,
-        constraint=self._constraint,
         dtype=self._dtype,
         name=self._shared_name + "_copy")
     memo[self._unique_id] = copied_variable
@@ -659,16 +643,6 @@ class ResourceVariable(variables.RefVariable):
       raise RuntimeError("initial_value not supported in EAGER mode.")
     return self._initial_value
 
-  @property
-  def constraint(self):
-    """Returns the constraint function associated with this variable.
-
-    Returns:
-      The constraint function that was passed to the variable constructor.
-      Can be `None` if no constraint was passed.
-    """
-    return self._constraint
-
   @property
   def op(self):
     """The op for this variable."""
@@ -1244,7 +1218,6 @@ class _UnreadVariable(ResourceVariable):
       self._handle_name = self._handle.name
     self._unique_id = unique_id
     self._dtype = dtype
-    self._constraint = None
     self._cached_value = None
     self._is_initialized_op = None
     self._initializer_op = None
@@ -1331,7 +1304,6 @@ class _MixedPrecisionVariable(ResourceVariable):
       self._handle_name = self.handle.name
     self._unique_id = var._unique_id  # pylint: disable=protected-access
     self._dtype = var.dtype
-    self._constraint = None
     self._cached_value = None
     self._is_initialized_op = var._is_initialized_op  # pylint: disable=protected-access
     self._initializer_op = var._initializer_op  # pylint: disable=protected-access
@@ -1495,7 +1467,6 @@ def copy_to_graph_uninitialized(var):
           shape=var.shape, dtype=var.dtype,
           name="unused_initial_variable_value"),
       trainable=var.trainable,
-      constraint=var._constraint,
       dtype=var.dtype,
       name=var._shared_name)
   new_variable._maybe_initialize_checkpointable()
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index ccce9e2f93..1b55e9db6a 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -2506,7 +2506,6 @@ def default_variable_creator_v2(next_creator=None, **kwargs):
   variable_def = kwargs.get("variable_def", None)
   dtype = kwargs.get("dtype", None)
   import_scope = kwargs.get("import_scope", None)
-  constraint = kwargs.get("constraint", None)
 
   # Set trainable value based on synchronization value.
   synchronization = kwargs.get("synchronization", VariableSynchronization.AUTO)
@@ -2516,7 +2515,7 @@ def default_variable_creator_v2(next_creator=None, **kwargs):
   return resource_variable_ops.ResourceVariable(
       initial_value=initial_value, trainable=trainable,
       validate_shape=validate_shape, caching_device=caching_device,
-      name=name, dtype=dtype, constraint=constraint, variable_def=variable_def,
+      name=name, dtype=dtype, variable_def=variable_def,
       import_scope=import_scope)
 
 
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index d7d064aba1..4bb4488c9e 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -343,7 +343,6 @@ class Variable(six.with_metaclass(VariableMetaclass,
                variable_def=None,
                dtype=None,
                import_scope=None,
-               constraint=None,
                synchronization=VariableSynchronization.AUTO,
                aggregation=VariableAggregation.NONE):
     """Creates a new variable with value `initial_value`.
@@ -385,13 +384,6 @@ class Variable(six.with_metaclass(VariableMetaclass,
         a Tensor), or `convert_to_tensor` will decide.
       import_scope: Optional `string`. Name scope to add to the
         `Variable.` Only used when initializing from protocol buffer.
-      constraint: An optional projection function to be applied to the variable
-        after being updated by an `Optimizer` (e.g. used to implement norm
-        constraints or value constraints for layer weights). The function must
-        take as input the unprojected Tensor representing the value of the
-        variable and return the Tensor for the projected value
-        (which must have the same shape). Constraints are not safe to
-        use when doing asynchronous distributed training.
       synchronization: Indicates when a distributed a variable will be
         aggregated. Accepted values are constants defined in the class
         `tf.VariableSynchronization`. By default the synchronization is set to
@@ -525,16 +517,6 @@ class Variable(six.with_metaclass(VariableMetaclass,
     """
     raise NotImplementedError
 
-  @property
-  def constraint(self):
-    """Returns the constraint function associated with this variable.
-
-    Returns:
-      The constraint function that was passed to the variable constructor.
-      Can be `None` if no constraint was passed.
-    """
-    raise NotImplementedError
-
   def assign(self, value, use_locking=False, name=None, read_value=True):
     """Assigns a new value to the variable.
 
@@ -1320,6 +1302,16 @@ class VariableV1(Variable):
       RuntimeError: If eager execution is enabled.
     """
 
+  @property
+  def constraint(self):
+    """Returns the constraint function associated with this variable.
+
+    Returns:
+      The constraint function that was passed to the variable constructor.
+      Can be `None` if no constraint was passed.
+    """
+    raise NotImplementedError
+
   SaveSliceInfo = Variable.SaveSliceInfo
 
 
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index eaa563e84a..d27eb15422 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -116,16 +116,17 @@ class _RefVariableProcessor(_OptimizableVariable):
 
   def update_op(self, optimizer, g):
     if isinstance(g, ops.Tensor):
-      update_op = optimizer._apply_dense(g, self._v)  # pylint: disable=protected-access
-      if self._v.constraint is not None:
+      # pylint: disable=protected-access
+      update_op = optimizer._apply_dense(g, self._v)
+      if getattr(self._v, "_constraint", None) is not None:
         with ops.control_dependencies([update_op]):
-          return self._v.assign(self._v.constraint(self._v))
+          return self._v.assign(self._v._constraint(self._v))
       else:
         return update_op
     else:
       assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
                                                 "tensor nor IndexedSlices.")
-      if self._v.constraint is not None:
+      if getattr(self._v, "_constraint", None) is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       # pylint: disable=protected-access
@@ -144,9 +145,9 @@ class _DenseReadResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g):
     # pylint: disable=protected-access
     update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0])
-    if self._v.constraint is not None:
+    if getattr(self._v, "_constraint", None) is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
+        return self._v.assign(self._v._constraint(self._v))
     else:
       return update_op
 
@@ -163,15 +164,15 @@ class _DenseResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g):
     # pylint: disable=protected-access
     if isinstance(g, ops.IndexedSlices):
-      if self._v.constraint is not None:
+      if getattr(self._v, "_constraint", None) is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       return optimizer._resource_apply_sparse_duplicate_indices(
           g.values, self._v, g.indices)
     update_op = optimizer._resource_apply_dense(g, self._v)
-    if self._v.constraint is not None:
+    if getattr(self._v, "_constraint", None) is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v.constraint(self._v))
+        return self._v.assign(self._v._constraint(self._v))
     else:
       return update_op
 
diff --git a/tensorflow/python/training/optimizer_test.py b/tensorflow/python/training/optimizer_test.py
index e175b5a799..ae6aca22aa 100644
--- a/tensorflow/python/training/optimizer_test.py
+++ b/tensorflow/python/training/optimizer_test.py
@@ -244,15 +244,14 @@ class OptimizerTest(test.TestCase):
       opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
       self.assertTrue(opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
 
-  @test_util.run_deprecated_v1
+  @test_util.run_v1_only(
+      '`ResourceVariable` does not support `constraint` argument.')
   def testConstraint(self):
     constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
     constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
     with self.cached_session():
-      var0 = variables.Variable([1.0, 2.0],
-                                constraint=constraint_01)
-      var1 = variables.Variable([3.0, 4.0],
-                                constraint=constraint_0)
+      var0 = variables.VariableV1([1.0, 2.0], constraint=constraint_01)
+      var1 = variables.VariableV1([3.0, 4.0], constraint=constraint_0)
       cost = 5 * var0 + 3 * var1
       global_step = variables.Variable(
           array_ops.zeros([], dtypes.int64), name='global_step')
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
index 6136c8fbe7..12963e0c89 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
@@ -7,10 +7,6 @@ tf_class {
     name: "SaveSliceInfo"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "constraint"
-    mtype: "<type \'property\'>"
-  }
   member {
     name: "device"
     mtype: "<type \'property\'>"
@@ -49,7 +45,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'initial_value\', \'trainable\', \'validate_shape\', \'caching_device\', \'name\', \'variable_def\', \'dtype\', \'import_scope\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+    argspec: "args=[\'self\', \'initial_value\', \'trainable\', \'validate_shape\', \'caching_device\', \'name\', \'variable_def\', \'dtype\', \'import_scope\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
   }
   member_method {
     name: "assign"
-- 
GitLab


From 779e3a9ed34b737cdd5e9006b55d111ebfa1a897 Mon Sep 17 00:00:00 2001
From: Trevor Morris <tmorris@nvidia.com>
Date: Fri, 14 Dec 2018 13:01:55 -0800
Subject: [PATCH 619/873] Fall back to native TF for segments who do not have
 fully defined inputs in static mode.

---
 tensorflow/contrib/tensorrt/convert/convert_graph.cc | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 623cd79f32..9a3cd56222 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -567,6 +567,18 @@ tensorflow::Status CreateTRTNode(const std::vector<EngineInfo>& infos, int pos,
         }
         input_shape_protos.at(conn.port_number) = in_shape;
         input_shapes.at(conn.port_number) = conn.outside_shape;
+        // Shape must be fully defined (excluding batch dimension) for static
+        // mode.
+        if (info.engine_type == EngineInfo::EngineType::TRTStatic) {
+          for (int i = 1; i < conn.outside_shape.dims(); i++) {
+            if (conn.outside_shape.dim_size(i) <= 0) {
+              return tensorflow::errors::Internal(
+                  "Input shapes must be fully defined when in static mode. "
+                  "Please try is_dynamic_op=True (shape was ",
+                  conn.outside_shape.DebugString(), ")");
+            }
+          }
+        }
 
         // Rewrire data input if it's not found in original graph.
         tensorflow::Node* input_node = graph->FindNodeId(conn.outside_id);
-- 
GitLab


From d52e7e1d9a14a1d5d3e57499c56e74f19d2fe98e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 13:59:58 -0800
Subject: [PATCH 620/873] Make the default exponent a float so that when using
 override_from_dict() or set() float values aren't rejected.

PiperOrigin-RevId: 225600026
---
 tensorflow/contrib/model_pruning/python/pruning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py
index f6b4373edd..43ea66ac5a 100644
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@@ -214,7 +214,7 @@ def get_pruning_hparams():
       target_sparsity=0.5,
       sparsity_function_begin_step=0,
       sparsity_function_end_step=100,
-      sparsity_function_exponent=3,
+      sparsity_function_exponent=3.0,
       use_tpu=False)
 
 
-- 
GitLab


From 29b8d495c3d3cbd13eed86f542586c1bd1597e46 Mon Sep 17 00:00:00 2001
From: Toby Boyd <tobyboyd@google.com>
Date: Fri, 14 Dec 2018 22:07:29 +0000
Subject: [PATCH 621/873] Upgrade to CUDA 10

---
 tensorflow/tools/dockerfiles/README.md        |  2 +-
 .../dockerfiles/devel-cpu-jupyter.Dockerfile  | 12 ++--
 .../dockerfiles/devel-cpu.Dockerfile          | 12 ++--
 .../dockerfiles/devel-gpu-jupyter.Dockerfile  | 59 ++++++++-----------
 .../dockerfiles/devel-gpu.Dockerfile          | 59 ++++++++-----------
 .../dockerfiles/gpu-jupyter.Dockerfile        | 33 +++++------
 .../dockerfiles/dockerfiles/gpu.Dockerfile    | 33 +++++------
 .../partials/ubuntu/bazel.partial.Dockerfile  | 12 ++--
 .../ubuntu/devel-nvidia.partial.Dockerfile    | 47 ++++++---------
 .../partials/ubuntu/nvidia.partial.Dockerfile | 33 +++++------
 10 files changed, 137 insertions(+), 165 deletions(-)

diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md
index 07bfd5960e..b42dd9fc0c 100644
--- a/tensorflow/tools/dockerfiles/README.md
+++ b/tensorflow/tools/dockerfiles/README.md
@@ -87,7 +87,7 @@ $ alias asm_dockerfiles="docker run --rm -u $(id -u):$(id -g) -v $(pwd):/tf tf-t
 $ asm_dockerfiles --help
 
 # Assemble all of the Dockerfiles
-$ asm_dockerfiles --release ubuntu-dockerfiles --construct_dockerfiles
+$ asm_dockerfiles --release dockerfiles --construct_dockerfiles
 
 # Build all of the "nightly" images on your local machine:
 $ asm_images --release nightly --build_images
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
index 43265676f8..c1f6dafbe0 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu-jupyter.Dockerfile
@@ -73,6 +73,7 @@ RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
+    wget \
     openjdk-8-jdk \
     ${PYTHON}-dev \
     swig
@@ -92,10 +93,13 @@ RUN ${PIP} --no-cache-dir install \
     enum34
 
 # Install bazel
-RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \
-    curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \
-    apt-get update && \
-    apt-get install -y bazel
+ARG BAZEL_VERSION=0.19.2
+RUN mkdir /bazel && \
+    wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
+    wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
+    chmod +x /bazel/installer.sh && \
+    /bazel/installer.sh && \
+    rm -f /bazel/installer.sh
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
index 5c5b2f9163..b4dfc8b099 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-cpu.Dockerfile
@@ -73,6 +73,7 @@ RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
+    wget \
     openjdk-8-jdk \
     ${PYTHON}-dev \
     swig
@@ -92,10 +93,13 @@ RUN ${PIP} --no-cache-dir install \
     enum34
 
 # Install bazel
-RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \
-    curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \
-    apt-get update && \
-    apt-get install -y bazel
+ARG BAZEL_VERSION=0.19.2
+RUN mkdir /bazel && \
+    wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
+    wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
+    chmod +x /bazel/installer.sh && \
+    /bazel/installer.sh && \
+    rm -f /bazel/installer.sh
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
index 8769e4e9cd..6d76c06332 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu-jupyter.Dockerfile
@@ -21,51 +21,41 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base
+FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-9-0 \
-        cuda-cublas-dev-9-0 \
-        cuda-cudart-dev-9-0 \
-        cuda-cufft-dev-9-0 \
-        cuda-curand-dev-9-0 \
-        cuda-cusolver-dev-9-0 \
-        cuda-cusparse-dev-9-0 \
-        curl \
-        git \
-        libcudnn7=7.2.1.38-1+cuda9.0 \
-        libcudnn7-dev=7.2.1.38-1+cuda9.0 \
-        libnccl2=2.2.13-1+cuda9.0 \
-        libnccl-dev=2.2.13-1+cuda9.0 \
+        cuda-command-line-tools-10-0 \
+        cuda-cublas-dev-10-0 \
+        cuda-cudart-dev-10-0 \
+        cuda-cufft-dev-10-0 \
+        cuda-curand-dev-10-0 \
+        cuda-cusolver-dev-10-0 \
+        cuda-cusparse-dev-10-0 \
+        libcudnn7=7.4.1.5-1+cuda10.0 \
+        libcudnn7-dev=7.4.1.5-1+cuda10.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
         libpng12-dev \
         libzmq3-dev \
         pkg-config \
-        python-dev \
         rsync \
         software-properties-common \
         unzip \
         zip \
         zlib1g-dev \
         wget \
+        git \
         && \
-    rm -rf /var/lib/apt/lists/* && \
-    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
 
 RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
-        apt-get update && \
-        apt-get install libnvinfer4=4.1.2-1+cuda9.0 && \
-        apt-get install libnvinfer-dev=4.1.2-1+cuda9.0
-
-# Link NCCL libray and header where the build script expects them.
-RUN mkdir /usr/local/cuda-9.0/lib &&  \
-    ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
-    ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \
+        && rm -rf /var/lib/apt/lists/*
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
@@ -73,12 +63,9 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
-ENV TF_CUDA_VERSION=9.0
+ENV TF_CUDA_VERSION=10.0
 ENV TF_CUDNN_VERSION=7
 
-# NCCL 2.x
-ENV TF_NCCL_VERSION=2
-
 # Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
 ARG CHECKOUT_TF_SRC=0
 RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
@@ -106,6 +93,7 @@ RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
+    wget \
     openjdk-8-jdk \
     ${PYTHON}-dev \
     swig
@@ -125,10 +113,13 @@ RUN ${PIP} --no-cache-dir install \
     enum34
 
 # Install bazel
-RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \
-    curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \
-    apt-get update && \
-    apt-get install -y bazel
+ARG BAZEL_VERSION=0.19.2
+RUN mkdir /bazel && \
+    wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
+    wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
+    chmod +x /bazel/installer.sh && \
+    /bazel/installer.sh && \
+    rm -f /bazel/installer.sh
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
index 809cda679e..160abc8763 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/devel-gpu.Dockerfile
@@ -21,51 +21,41 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base
+FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-9-0 \
-        cuda-cublas-dev-9-0 \
-        cuda-cudart-dev-9-0 \
-        cuda-cufft-dev-9-0 \
-        cuda-curand-dev-9-0 \
-        cuda-cusolver-dev-9-0 \
-        cuda-cusparse-dev-9-0 \
-        curl \
-        git \
-        libcudnn7=7.2.1.38-1+cuda9.0 \
-        libcudnn7-dev=7.2.1.38-1+cuda9.0 \
-        libnccl2=2.2.13-1+cuda9.0 \
-        libnccl-dev=2.2.13-1+cuda9.0 \
+        cuda-command-line-tools-10-0 \
+        cuda-cublas-dev-10-0 \
+        cuda-cudart-dev-10-0 \
+        cuda-cufft-dev-10-0 \
+        cuda-curand-dev-10-0 \
+        cuda-cusolver-dev-10-0 \
+        cuda-cusparse-dev-10-0 \
+        libcudnn7=7.4.1.5-1+cuda10.0 \
+        libcudnn7-dev=7.4.1.5-1+cuda10.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
         libpng12-dev \
         libzmq3-dev \
         pkg-config \
-        python-dev \
         rsync \
         software-properties-common \
         unzip \
         zip \
         zlib1g-dev \
         wget \
+        git \
         && \
-    rm -rf /var/lib/apt/lists/* && \
-    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
 
 RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
-        apt-get update && \
-        apt-get install libnvinfer4=4.1.2-1+cuda9.0 && \
-        apt-get install libnvinfer-dev=4.1.2-1+cuda9.0
-
-# Link NCCL libray and header where the build script expects them.
-RUN mkdir /usr/local/cuda-9.0/lib &&  \
-    ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
-    ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \
+        && rm -rf /var/lib/apt/lists/*
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
@@ -73,12 +63,9 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
-ENV TF_CUDA_VERSION=9.0
+ENV TF_CUDA_VERSION=10.0
 ENV TF_CUDNN_VERSION=7
 
-# NCCL 2.x
-ENV TF_NCCL_VERSION=2
-
 # Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
 ARG CHECKOUT_TF_SRC=0
 RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
@@ -106,6 +93,7 @@ RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
+    wget \
     openjdk-8-jdk \
     ${PYTHON}-dev \
     swig
@@ -125,10 +113,13 @@ RUN ${PIP} --no-cache-dir install \
     enum34
 
 # Install bazel
-RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \
-    curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \
-    apt-get update && \
-    apt-get install -y bazel
+ARG BAZEL_VERSION=0.19.2
+RUN mkdir /bazel && \
+    wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
+    wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
+    chmod +x /bazel/installer.sh && \
+    /bazel/installer.sh && \
+    rm -f /bazel/installer.sh
 
 COPY bashrc /etc/bash.bashrc
 RUN chmod a+rwx /etc/bash.bashrc
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
index acfe4d8607..46252c5413 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile
@@ -21,35 +21,32 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base
+FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
 
+# Pick up some TF dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-9-0 \
-        cuda-cublas-9-0 \
-        cuda-cufft-9-0 \
-        cuda-curand-9-0 \
-        cuda-cusolver-9-0 \
-        cuda-cusparse-9-0 \
-        curl \
-        libcudnn7=7.2.1.38-1+cuda9.0 \
-        libnccl2=2.2.13-1+cuda9.0 \
+        cuda-command-line-tools-10-0 \
+        cuda-cublas-10-0 \
+        cuda-cufft-10-0 \
+        cuda-curand-10-0 \
+        cuda-cusolver-10-0 \
+        cuda-cusparse-10-0 \
+        libcudnn7=7.4.1.5-1+cuda10.0 \
         libfreetype6-dev \
         libhdf5-serial-dev \
         libpng12-dev \
         libzmq3-dev \
         pkg-config \
-        rsync \
         software-properties-common \
-        unzip \
-        && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+        unzip
 
 RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
-        apt-get update && \
-        apt-get install libnvinfer4=4.1.2-1+cuda9.0
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
diff --git a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
index f36a21eaf0..80e427f824 100644
--- a/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
+++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile
@@ -21,35 +21,32 @@
 
 ARG UBUNTU_VERSION=16.04
 
-FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base
+FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
 
+# Pick up some TF dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-9-0 \
-        cuda-cublas-9-0 \
-        cuda-cufft-9-0 \
-        cuda-curand-9-0 \
-        cuda-cusolver-9-0 \
-        cuda-cusparse-9-0 \
-        curl \
-        libcudnn7=7.2.1.38-1+cuda9.0 \
-        libnccl2=2.2.13-1+cuda9.0 \
+        cuda-command-line-tools-10-0 \
+        cuda-cublas-10-0 \
+        cuda-cufft-10-0 \
+        cuda-curand-10-0 \
+        cuda-cusolver-10-0 \
+        cuda-cusparse-10-0 \
+        libcudnn7=7.4.1.5-1+cuda10.0 \
         libfreetype6-dev \
         libhdf5-serial-dev \
         libpng12-dev \
         libzmq3-dev \
         pkg-config \
-        rsync \
         software-properties-common \
-        unzip \
-        && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+        unzip
 
 RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
-        apt-get update && \
-        apt-get install libnvinfer4=4.1.2-1+cuda9.0
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile
index 156bb01991..855a01c379 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile
@@ -2,6 +2,7 @@ RUN apt-get update && apt-get install -y \
     build-essential \
     curl \
     git \
+    wget \
     openjdk-8-jdk \
     ${PYTHON}-dev \
     swig
@@ -21,7 +22,10 @@ RUN ${PIP} --no-cache-dir install \
     enum34
 
 # Install bazel
-RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \
-    curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \
-    apt-get update && \
-    apt-get install -y bazel
+ARG BAZEL_VERSION=0.19.2
+RUN mkdir /bazel && \
+    wget -O /bazel/installer.sh "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-installer-linux-x86_64.sh" && \
+    wget -O /bazel/LICENSE.txt "https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE" && \
+    chmod +x /bazel/installer.sh && \
+    /bazel/installer.sh && \
+    rm -f /bazel/installer.sh
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
index 95f9875012..2b4494ac59 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/devel-nvidia.partial.Dockerfile
@@ -1,48 +1,38 @@
-FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base
+FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-9-0 \
-        cuda-cublas-dev-9-0 \
-        cuda-cudart-dev-9-0 \
-        cuda-cufft-dev-9-0 \
-        cuda-curand-dev-9-0 \
-        cuda-cusolver-dev-9-0 \
-        cuda-cusparse-dev-9-0 \
-        curl \
-        git \
-        libcudnn7=7.2.1.38-1+cuda9.0 \
-        libcudnn7-dev=7.2.1.38-1+cuda9.0 \
-        libnccl2=2.2.13-1+cuda9.0 \
-        libnccl-dev=2.2.13-1+cuda9.0 \
+        cuda-command-line-tools-10-0 \
+        cuda-cublas-dev-10-0 \
+        cuda-cudart-dev-10-0 \
+        cuda-cufft-dev-10-0 \
+        cuda-curand-dev-10-0 \
+        cuda-cusolver-dev-10-0 \
+        cuda-cusparse-dev-10-0 \
+        libcudnn7=7.4.1.5-1+cuda10.0 \
+        libcudnn7-dev=7.4.1.5-1+cuda10.0 \
         libcurl3-dev \
         libfreetype6-dev \
         libhdf5-serial-dev \
         libpng12-dev \
         libzmq3-dev \
         pkg-config \
-        python-dev \
         rsync \
         software-properties-common \
         unzip \
         zip \
         zlib1g-dev \
         wget \
+        git \
         && \
-    rm -rf /var/lib/apt/lists/* && \
-    find /usr/local/cuda-9.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
+    find /usr/local/cuda-10.0/lib64/ -type f -name 'lib*_static.a' -not -name 'libcudart_static.a' -delete && \
     rm /usr/lib/x86_64-linux-gnu/libcudnn_static_v7.a
 
 RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
-        apt-get update && \
-        apt-get install libnvinfer4=4.1.2-1+cuda9.0 && \
-        apt-get install libnvinfer-dev=4.1.2-1+cuda9.0
-
-# Link NCCL libray and header where the build script expects them.
-RUN mkdir /usr/local/cuda-9.0/lib &&  \
-    ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \
-    ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer-dev=5.0.2-1+cuda10.0 \
+        && rm -rf /var/lib/apt/lists/*
 
 # Configure the build for our CUDA configuration.
 ENV CI_BUILD_PYTHON python
@@ -50,12 +40,9 @@ ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
 ENV TF_NEED_CUDA 1
 ENV TF_NEED_TENSORRT 1
 ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0
-ENV TF_CUDA_VERSION=9.0
+ENV TF_CUDA_VERSION=10.0
 ENV TF_CUDNN_VERSION=7
 
-# NCCL 2.x
-ENV TF_NCCL_VERSION=2
-
 # Check out TensorFlow source code if --build_arg CHECKOUT_TENSORFLOW=1
 ARG CHECKOUT_TF_SRC=0
 RUN test "${CHECKOUT_TF_SRC}" -eq 1 && git clone https://github.com/tensorflow/tensorflow.git /tensorflow_src
diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
index 1dc8e43aad..a6393a3280 100644
--- a/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
+++ b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile
@@ -1,32 +1,29 @@
-FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base
+FROM nvidia/cuda:10.0-base-ubuntu${UBUNTU_VERSION} as base
 
+# Pick up some TF dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
-        cuda-command-line-tools-9-0 \
-        cuda-cublas-9-0 \
-        cuda-cufft-9-0 \
-        cuda-curand-9-0 \
-        cuda-cusolver-9-0 \
-        cuda-cusparse-9-0 \
-        curl \
-        libcudnn7=7.2.1.38-1+cuda9.0 \
-        libnccl2=2.2.13-1+cuda9.0 \
+        cuda-command-line-tools-10-0 \
+        cuda-cublas-10-0 \
+        cuda-cufft-10-0 \
+        cuda-curand-10-0 \
+        cuda-cusolver-10-0 \
+        cuda-cusparse-10-0 \
+        libcudnn7=7.4.1.5-1+cuda10.0 \
         libfreetype6-dev \
         libhdf5-serial-dev \
         libpng12-dev \
         libzmq3-dev \
         pkg-config \
-        rsync \
         software-properties-common \
-        unzip \
-        && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
+        unzip
 
 RUN apt-get update && \
-        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \
-        apt-get update && \
-        apt-get install libnvinfer4=4.1.2-1+cuda9.0
+        apt-get install nvinfer-runtime-trt-repo-ubuntu1604-5.0.2-ga-cuda10.0 \
+        && apt-get update \
+        && apt-get install -y --no-install-recommends libnvinfer5=5.0.2-1+cuda10.0 \
+        && apt-get clean \
+        && rm -rf /var/lib/apt/lists/*
 
 # For CUDA profiling, TensorFlow requires CUPTI.
 ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH
-- 
GitLab


From ebf9140175f7f4ed082999f9a631d92938d91029 Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Fri, 14 Dec 2018 14:14:02 -0800
Subject: [PATCH 622/873] Address PR comments

---
 tensorflow/compiler/jit/xla_device.h          |  4 +--
 tensorflow/compiler/jit/xla_gpu_device.cc     | 26 ++++++++++---------
 .../compiler/xla/client/client_library.h      |  2 +-
 .../compiler/xla/service/platform_util.h      |  5 ++--
 tensorflow/compiler/xla/service/service.h     |  3 ++-
 5 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_device.h b/tensorflow/compiler/jit/xla_device.h
index 857792d813..45f18ac9ee 100644
--- a/tensorflow/compiler/jit/xla_device.h
+++ b/tensorflow/compiler/jit/xla_device.h
@@ -127,7 +127,7 @@ class XlaDevice : public LocalDevice {
     PaddedShapeFn padded_shape_fn;
 
     // Set of devices to use. This controls which of the devices on the given
-    // platform resources will have resources allocated. For GPUs this will be
+    // platform will have resources allocated. For GPUs this will be
     // filled from visible_gpu_devices list from session configuration.
     absl::optional<std::set<int>> allowed_devices;
   };
@@ -265,7 +265,7 @@ class XlaDevice : public LocalDevice {
   condition_variable outstanding_asynchronous_operations_cv_;
 
   // Set of devices to use. This controls which of the devices on the given
-  // platform resources will have resources allocated. For GPUs this will be
+  // platform will have resources allocated. For GPUs this will be
   // filled from visible_gpu_devices list from session configuration.
   absl::optional<std::set<int>> allowed_devices_;
 };
diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index 3841d03606..57b1547f16 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -34,17 +34,17 @@ class XlaGpuDeviceFactory : public DeviceFactory {
   Status CreateDevices(const SessionOptions& options, const string& name_prefix,
                        std::vector<std::unique_ptr<Device>>* devices) override;
   // Returns a set containing the device ids contained in visible_device_list or
-  // -1 if the string is empty.
-  static xla::StatusOr<std::set<int>> ParseVisibleDeviceList(
+  // nullopt if it is empty. It returns error in case of malformed configuration
+  // string.
+  static xla::StatusOr<absl::optional<std::set<int>>> ParseVisibleDeviceList(
       const string& visible_device_list);
 };
 
-xla::StatusOr<std::set<int>> XlaGpuDeviceFactory::ParseVisibleDeviceList(
-    const string& visible_device_list) {
+xla::StatusOr<absl::optional<std::set<int>>>
+XlaGpuDeviceFactory::ParseVisibleDeviceList(const string& visible_device_list) {
   std::set<int> gpu_ids;
-  if (visible_device_list.length() == 0) {
-    gpu_ids.insert(-1);
-    return gpu_ids;
+  if (visible_device_list.empty()) {
+    return absl::optional<std::set<int>>(absl::nullopt);
   }
   const std::vector<string> visible_devices =
       absl::StrSplit(visible_device_list, ',');
@@ -53,12 +53,12 @@ xla::StatusOr<std::set<int>> XlaGpuDeviceFactory::ParseVisibleDeviceList(
     if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) {
       return errors::InvalidArgument(
           "Could not parse entry in 'visible_device_list': '",
-          platform_gpu_id_str, "'. visible_device_list = ",
-          visible_device_list);
+          platform_gpu_id_str,
+          "'. visible_device_list = ", visible_device_list);
     }
     gpu_ids.insert(platform_gpu_id);
   }
-  return gpu_ids;
+  return absl::optional<std::set<int>>(gpu_ids);
 }
 
 Status XlaGpuDeviceFactory::CreateDevices(
@@ -83,14 +83,16 @@ Status XlaGpuDeviceFactory::CreateDevices(
   }
   string allowed_gpus =
       session_options.config.gpu_options().visible_device_list();
+  auto parsed_gpus=ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
+  // We want to fill the gpu_ids set with all devices if config string is empty.
   std::set<int> gpu_ids;
   int num_visible_devices = platform.ValueOrDie()->VisibleDeviceCount();
-  if (allowed_gpus.empty()) {
+  if (!parsed_gpus) {
     for (int i = 0; i < num_visible_devices; ++i) {
       gpu_ids.insert(i);
     }
   } else {
-    gpu_ids = ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
+    gpu_ids = *parsed_gpus;
   }
   for (int i : gpu_ids) {
     XlaDevice::Options options;
diff --git a/tensorflow/compiler/xla/client/client_library.h b/tensorflow/compiler/xla/client/client_library.h
index 0cf548aeae..c02dcf7793 100644
--- a/tensorflow/compiler/xla/client/client_library.h
+++ b/tensorflow/compiler/xla/client/client_library.h
@@ -83,7 +83,7 @@ class ClientLibrary {
   //   platform : The platform the underlying XLA service should target. If
   //     null then default platform is used.
   //   device_set: Set of device IDs for which the stream executor will be
-  //   created for, for the given platform.
+  //   created, for the given platform.
   static StatusOr<LocalClient*> GetOrCreateLocalClient(
       se::Platform* platform = nullptr,
       const absl::optional<std::set<int>>& allowed_devices = absl::nullopt);
diff --git a/tensorflow/compiler/xla/service/platform_util.h b/tensorflow/compiler/xla/service/platform_util.h
index 13b0323959..592b20282f 100644
--- a/tensorflow/compiler/xla/service/platform_util.h
+++ b/tensorflow/compiler/xla/service/platform_util.h
@@ -61,8 +61,9 @@ class PlatformUtil {
   // Returns a vector of StreamExecutors for the given platform. The vector is
   // indexed by device ordinal (device numbering used by StreamExecutor). If an
   // element is nullptr, then the device is present by not supported by XLA.
-  // Optional parameter, allowed_devices controls which of the devices on the
-  // platform will have StreamExecutors constructed for. 
+  // If populated, only the devices in allowed_devices will have
+  // their StreamExecutors initialized, otherwise all StreamExecutors will be
+  // initialized and returned.
   //
   // If the platform has no visible devices, a not-found error is returned.
   static StatusOr<std::vector<se::StreamExecutor*>> GetStreamExecutors(
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 91edc21d87..abd3ee5a05 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -62,7 +62,8 @@ class ServiceOptions {
   ServiceOptions& set_intra_op_parallelism_threads(int num_threads);
   int intra_op_parallelism_threads() const;
 
-  // Sets the allowed_devices set for creation of stream executors.
+  // Sets the allowed_devices set for selectively constructing stream executors
+  // on the platform.
   ServiceOptions& set_allowed_devices(
       const absl::optional<std::set<int>>& allowed_devices);
   const absl::optional<std::set<int>>& allowed_devices() const;
-- 
GitLab


From ece6ba3a873c1df6bcd94309cc1161fe7d8c04eb Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Fri, 14 Dec 2018 14:22:38 -0800
Subject: [PATCH 623/873] Fix broken benchmarks

PiperOrigin-RevId: 225604092
---
 .../data/experimental/benchmarks/optimize_benchmark.py      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py b/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
index 73c21d17aa..1bbee5e7a3 100644
--- a/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/optimize_benchmark.py
@@ -47,7 +47,7 @@ class OptimizationBenchmark(test.Benchmark):
         dataset = dataset.map(lambda x: x)
       if optimize_dataset:
         options = dataset_ops.Options()
-        options.experimental_map_fusion = True
+        options.experimental_optimization.map_fusion = True
         dataset = dataset.with_options(options)
 
       iterator = dataset_ops.make_one_shot_iterator(dataset)
@@ -90,7 +90,7 @@ class OptimizationBenchmark(test.Benchmark):
             lambda x: math_ops.greater_equal(x - 5, 0))
       if optimize_dataset:
         options = dataset_ops.Options()
-        options.experimental_map_and_filter_fusion = True
+        options.experimental_optimization.map_and_filter_fusion = True
         dataset = dataset.with_options(options)
       iterator = dataset_ops.make_one_shot_iterator(dataset)
       next_element = iterator.get_next()
@@ -131,7 +131,7 @@ class OptimizationBenchmark(test.Benchmark):
         dataset = dataset.filter(lambda x: math_ops.greater_equal(x - 5, 0))
       if optimize_dataset:
         options = dataset_ops.Options()
-        options.experimental_filter_fusion = True
+        options.experimental_optimization.filter_fusion = True
         dataset = dataset.with_options(options)
 
       iterator = dataset_ops.make_one_shot_iterator(dataset)
-- 
GitLab


From 5926ac2fbb4d319ef19411c2d836a2d71ff1bd44 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 14 Dec 2018 14:27:02 -0800
Subject: [PATCH 624/873] Automatically clean up tf.function reference cycles

Only works with autograph=False to start because of a function wrapping requirement which I've filed a bug for and will follow up on.

Attaches objects which clean up reference cycles to PolymorphicFunctions and the Functions the PolymorphicFunctions use internally. Right now it leaves reference cycles in concrete functions which are returned outside of a PolymorphicFunction, since we have dataset tests which hold on to the FuncGraph but not the Function (and users might do this too).

PiperOrigin-RevId: 225604754
---
 tensorflow/python/eager/def_function.py      | 23 +++++---
 tensorflow/python/eager/def_function_test.py | 14 +++++
 tensorflow/python/eager/function.py          | 58 +++++++++++++++++++-
 tensorflow/python/eager/function_test.py     | 18 ++++--
 tensorflow/python/framework/func_graph.py    | 20 +++++++
 tensorflow/python/framework/test_util.py     | 40 --------------
 6 files changed, 117 insertions(+), 56 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 9c4710175e..00f02d5011 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -236,10 +236,16 @@ class PolymorphicFunction(object):
   def _defun_with_scope(self, scope):
     """Creates a defun wrapped inside a variable creator scope."""
 
-    def wrapped_fn(*args, **kwds):
-      with variable_scope.variable_creator_scope(scope):
-        # __wrapped__ allows AutoGraph to swap in a converted function.
-        return wrapped_fn.__wrapped__(*args, **kwds)
+    # TODO(b/120990892): Remove this conditional
+    if self._autograph:
+      def wrapped_fn(*args, **kwds):
+        with variable_scope.variable_creator_scope(scope):
+          return wrapped_fn.__wrapped__(*args, **kwds)
+    else:
+      python_function = self._python_function
+      def wrapped_fn(*args, **kwds):
+        with variable_scope.variable_creator_scope(scope):
+          return python_function(*args, **kwds)
 
     # TODO(mdan): Pipe self._experimental_autograph_options through.
     return function_lib.defun(
@@ -250,21 +256,22 @@ class PolymorphicFunction(object):
   def _initialize(self, args, kwds, add_initializers_to=None):
     """Initializes, on the first call."""
 
-    self._created_variables = []
+    created_variables = []
 
     def variable_capturing_scope(unused_next_creator, **kwds):
       """Creates UnliftedInitializerVariables and saves references to them."""
       v = UnliftedInitializerVariable(
           add_initializers_to=add_initializers_to, **kwds)
-      self._created_variables.append(weakref.ref(v))
+      created_variables.append(weakref.ref(v))
       return v
 
+    self._created_variables = created_variables
     self._stateful_fn = self._defun_with_scope(variable_capturing_scope)
     self._stateful_fn._name = self._name  # pylint: disable=protected-access
-
     # Force the definition of the function for these arguments
     self._concrete_stateful_fn = (
-        self._stateful_fn._get_concrete_function_internal(*args, **kwds))  # pylint: disable=protected-access
+        self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
+            *args, **kwds))
 
     def invalid_creator_scope(*unused_args, **unused_kwds):
       """Disables variable creation."""
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 8b4c40791a..723cdde6cf 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import functools
+import weakref
 
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import def_function
@@ -25,6 +26,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
+from tensorflow.python.framework import test_util
 from tensorflow.python.keras.engine import training
 from tensorflow.python.keras.layers import core
 from tensorflow.python.ops import math_ops
@@ -258,6 +260,18 @@ class DefFunctionTest(test.TestCase):
              (tensor_spec.TensorSpec([1, 3], dtypes.int32),
               tensor_spec.TensorSpec([1], dtypes.int32)))))
 
+  @test_util.assert_no_garbage_created
+  def testReferenceCycles(self):
+    # TODO(b/120990892): Enable autograph in this test
+    fn = def_function.function(lambda x: 2. * x, autograph=False)
+    fn(constant_op.constant(4.0))
+    weak_fn = weakref.ref(fn)
+    del fn
+    # Tests that the weak reference we made to the function is now dead, which
+    # means the object has been deleted. This should be true as long as the
+    # function itself is not involved in a reference cycle.
+    self.assertIs(None, weak_fn())
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 6770f1d3b3..0af0cb608e 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -50,6 +50,7 @@ from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import compat
+from tensorflow.python.util import memory
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
@@ -816,6 +817,8 @@ class PolymorphicFunction(object):
     self._name = name
     self._autograph = autograph
     self._function_cache = collections.OrderedDict()
+    self._garbage_collector = _PolymorphicFunctionGarbageCollector(
+        self._function_cache)
     self._function_attributes = attributes or {}
 
     self._lock = threading.Lock()
@@ -869,13 +872,24 @@ class PolymorphicFunction(object):
     """Returns the wrapped Python function."""
     return self._python_function
 
-  def _get_concrete_function_internal(self, *args, **kwargs):
-    """Bypasses error checking when getting a graph function."""
+  def _get_concrete_function_internal_garbage_collected(self, *args, **kwargs):
+    """Returns a concrete function which cleans up its graph function."""
     if self._input_signature:
       args, kwargs = None, None
     graph_function, _, _ = self._maybe_define_function(args, kwargs)
     return graph_function
 
+  def _get_concrete_function_internal(self, *args, **kwargs):
+    """Bypasses error checking when getting a graph function."""
+    graph_function = self._get_concrete_function_internal_garbage_collected(
+        *args, **kwargs)
+    # We're returning this concrete function to someone, and they may keep a
+    # reference to the FuncGraph without keeping a reference to the Function
+    # object. So we won't clean up the reference cycles manually and instead
+    # will leave them to Python's garbage collector.
+    graph_function._garbage_collector.release()  # pylint: disable=protected-access
+    return graph_function
+
   def get_concrete_function(self, *args, **kwargs):
     """Returns a `Function` object specialized to inputs and execution context.
 
@@ -1180,9 +1194,17 @@ class PolymorphicFunction(object):
         else:
           python_call_signature = tuple(
               _encode_arg_for_serialization(arg) for arg in args)
+        # pylint: disable=protected-access
         # Save information about non-Tensor arguments with the concrete
         # function. Used to serialize PolymorphicFunctions.
-        graph_function._python_call_signature = python_call_signature  # pylint: disable=protected-access
+        graph_function._python_call_signature = python_call_signature
+        # Tell the Function to clean up its graph once it goes out of
+        # scope. Function does not do this in its constructor since it gets used
+        # in some places (like Keras) where the FuncGraph lives longer than the
+        # Function.
+        graph_function._garbage_collector = _FunctionGarbageCollector(
+            graph_function.graph)
+        # pylint: enable=protected-access
         self._function_cache[cache_key] = graph_function
       return graph_function, args, kwargs
 
@@ -1670,3 +1692,33 @@ def class_method_to_instance_method(original_function, instance):
   wrapped_instance_func = tf_decorator.make_decorator(
       original_function.python_function, instance_func)
   return wrapped_instance_func
+
+
+class _PolymorphicFunctionGarbageCollector(object):
+  """Cleans up cycles when a defun goes out of scope."""
+
+  def __init__(self, cache):
+    self._cache = cache
+
+  def __del__(self):
+    if func_graph_module is None or memory is None:
+      return
+    while self._cache:
+      self._cache.popitem()
+    memory.dismantle_ordered_dict(self._cache)
+
+
+class _FunctionGarbageCollector(object):
+  """Cleans up reference cycles when a Function goes out of scope."""
+
+  def __init__(self, func_graph):
+    self._func_graph = func_graph
+
+  def release(self):
+    """Call off the FuncGraph deletion."""
+    self._func_graph = None
+
+  def __del__(self):
+    if func_graph_module is None or memory is None or self._func_graph is None:
+      return
+    func_graph_module.dismantle_func_graph(self._func_graph)
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 95777a3a65..c7959441d8 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -937,9 +937,6 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllClose([[[[4.0]]]], self.evaluate(y))
 
-    # Remove reference cycles in model
-    test_util.dismantle_polymorphic_function(model)
-
   @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
   def testDefunKerasModelCall(self):
     model = MiniModel()
@@ -953,8 +950,6 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllEqual([[3.0]], self.evaluate(y))
 
-    # Remove reference cycles in defun.
-    test_util.dismantle_polymorphic_function(model.call)
     # Break the reference cycle between the MiniModel and the defun:
     # MiniModel --(through its `call` method)--> PolymorphicFunction
     # PolymorphicFunction --(instancemethod on MiniModel)--> MiniModel
@@ -2041,6 +2036,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
             5,
             add_five(constant_op.constant(0, dtype=dtypes.int32)).numpy())
 
+  @test_util.assert_no_garbage_created
+  def testReferenceCycles(self):
+
+    fn = function.defun(lambda x: 2. * x)
+
+    fn(constant_op.constant(4.0))
+    weak_fn = weakref.ref(fn)
+    del fn
+    # Tests that the weak reference we made to the function is now dead, which
+    # means the object has been deleted. This should be true as long as the
+    # function itself is not involved in a reference cycle.
+    self.assertIs(None, weak_fn())
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution(
diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py
index 75a420e91a..f8be5e9edf 100644
--- a/tensorflow/python/framework/func_graph.py
+++ b/tensorflow/python/framework/func_graph.py
@@ -35,6 +35,7 @@ from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import compat
+from tensorflow.python.util import memory
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util import tf_decorator
@@ -679,3 +680,22 @@ def _get_defun_inputs_from_kwargs(kwargs):
     names = []
     flat_args = []
   return _get_defun_inputs(flat_args, names, structure=kwargs)
+
+
+def dismantle_func_graph(func_graph):
+  """Removes reference cycles in `func_graph` FuncGraph.
+
+  Helpful for making sure the garbage collector doesn't need to run when
+  the FuncGraph goes out of scope, e.g. in tests using defun with
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True).
+
+  Args:
+    func_graph: A `FuncGraph` object to destroy. `func_graph` is unusable
+      after this function.
+  """
+  # TODO(b/115366440): Delete this method when a custom OrderedDict is added.
+  # Clearing captures using clear() leaves some cycles around.
+  while func_graph.captures:
+    func_graph.captures.popitem()
+  memory.dismantle_ordered_dict(func_graph.captures)
+  ops.dismantle_graph(func_graph)
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index af1687c8ef..ffab93c84e 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -75,7 +75,6 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
 from tensorflow.python.util import deprecation
-from tensorflow.python.util import memory
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
@@ -2590,42 +2589,3 @@ def set_producer_version(graph, producer_version):
   with graph.as_default():
     importer.import_graph_def(graph_def)
   assert graph.graph_def_versions.producer, producer_version
-
-
-def dismantle_func_graph(func_graph):
-  """Removes reference cycles in `func_graph` FuncGraph.
-
-  Helpful for making sure the garbage collector doesn't need to run when
-  the FuncGraph goes out of scope, e.g. in tests using defun with
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True).
-
-  Args:
-    func_graph: A `FuncGraph` object to destroy. `func_graph` is unusable
-      after this function.
-  """
-  # TODO(b/115366440): Delete this method when a custom OrderedDict is added.
-  # Clearing captures using clear() leaves some cycles around.
-  while func_graph.captures:
-    func_graph.captures.popitem()
-  memory.dismantle_ordered_dict(func_graph.captures)
-  ops.dismantle_graph(func_graph)
-
-
-def dismantle_polymorphic_function(func):
-  """Removes reference cycles in PolymorphicFunction `func`.
-
-  Helpful for making sure the garbage collector doesn't need to run when
-  PolymorphicFunction goes out of scope, e.g. in tests using defun with
-  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True).
-
-  Args:
-    func: A `PolymorphicFunction` object to destroy. `func` is unusable
-      after this function.
-  """
-  # TODO(b/115366440): Delete this method when a custom OrderedDict is added
-  cache = func._function_cache  # pylint: disable=protected-access
-  for concrete_func in cache.values():
-    dismantle_func_graph(concrete_func.graph)
-  while cache:
-    cache.popitem()
-  memory.dismantle_ordered_dict(cache)
-- 
GitLab


From 11ea11dc510246f75a468f54c924c4787376757f Mon Sep 17 00:00:00 2001
From: Sergei Lebedev <slebedev@google.com>
Date: Fri, 14 Dec 2018 14:27:19 -0800
Subject: [PATCH 625/873] Deprecated Variable.initialized_value and
 Variable.load in eager

PiperOrigin-RevId: 225604795
---
 tensorflow/python/ops/variables.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index 4bb4488c9e..d5ffab74a1 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -479,6 +479,10 @@ class Variable(six.with_metaclass(VariableMetaclass,
     """
     raise NotImplementedError
 
+  @deprecated(
+      None,
+      "Use Variable.read_value. Variables in 2.X are initialized "
+      "automatically both in eager and graph (inside tf.defun) contexts.")
   def initialized_value(self):
     """Returns the value of the initialized variable.
 
@@ -845,6 +849,9 @@ class Variable(six.with_metaclass(VariableMetaclass,
     """
     raise NotImplementedError
 
+  @deprecated(
+      None,
+      "Prefer Variable.assign which has equivalent behavior in 2.X.")
   def load(self, value, session=None):
     """Load new value into this variable.
 
-- 
GitLab


From 6634bc8b562f1f20de30f9ea8a04483b16e21297 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 14:46:43 -0800
Subject: [PATCH 626/873] Fix error message when bucketized_column is missing
 boundaries.

PiperOrigin-RevId: 225607816
---
 tensorflow/python/feature_column/feature_column_v2.py      | 5 +++--
 tensorflow/python/feature_column/feature_column_v2_test.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 4cc8efa925..914044d6d6 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -1354,8 +1354,9 @@ def bucketized_column(source_column, boundaries):
     raise ValueError(
         'source_column must be one-dimensional column. '
         'Given: {}'.format(source_column))
-  if (not boundaries or
-      not (isinstance(boundaries, list) or isinstance(boundaries, tuple))):
+  if not boundaries:
+    raise ValueError('boundaries must not be empty.')
+  if not (isinstance(boundaries, list) or isinstance(boundaries, tuple)):
     raise ValueError('boundaries must be a sorted list.')
   for i in range(len(boundaries) - 1):
     if boundaries[i] >= boundaries[i + 1]:
diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py
index 0755c0b6ac..a247425369 100644
--- a/tensorflow/python/feature_column/feature_column_v2_test.py
+++ b/tensorflow/python/feature_column/feature_column_v2_test.py
@@ -485,7 +485,7 @@ class BucketizedColumnTest(test.TestCase):
   def test_invalid_boundaries(self):
     a = fc.numeric_column('aaa')
     with self.assertRaisesRegexp(ValueError,
-                                 'boundaries must be a sorted list'):
+                                 'boundaries must not be empty'):
       fc.bucketized_column(a, boundaries=None)
     with self.assertRaisesRegexp(ValueError,
                                  'boundaries must be a sorted list'):
-- 
GitLab


From 55bc1b5aa7adc1f4276861b2b0dd57f6a68fca5c Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Fri, 14 Dec 2018 14:50:01 -0800
Subject: [PATCH 627/873] Temporarily disable an EXPECT_GE in port_test. This
 EXPECT_GE currently fails on MacOS.

PiperOrigin-RevId: 225608340
---
 tensorflow/core/platform/port_test.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc
index be99ff09e0..33c66a6f25 100644
--- a/tensorflow/core/platform/port_test.cc
+++ b/tensorflow/core/platform/port_test.cc
@@ -35,7 +35,9 @@ TEST(Port, AlignedMalloc) {
 
 TEST(Port, GetCurrentCPU) {
   const int cpu = GetCurrentCPU();
-  EXPECT_GE(cpu, 0);
+  // TODO(b/120919972): Re-enable this EXPECT_GE after fixing MacOS Kokoro
+  // failures.
+  // EXPECT_GE(cpu, 0);
   EXPECT_LT(cpu, NumTotalCPUs());
 }
 
-- 
GitLab


From 777eaf24729e5d06b352076662a4a90302104b8d Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 14 Dec 2018 14:53:10 -0800
Subject: [PATCH 628/873] Remove one last(-ish) reference cycle from
 tf.function

PiperOrigin-RevId: 225608851
---
 tensorflow/python/eager/def_function.py      | 17 +++++++----------
 tensorflow/python/eager/def_function_test.py |  3 +--
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 00f02d5011..0ca52bd808 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -236,16 +236,13 @@ class PolymorphicFunction(object):
   def _defun_with_scope(self, scope):
     """Creates a defun wrapped inside a variable creator scope."""
 
-    # TODO(b/120990892): Remove this conditional
-    if self._autograph:
-      def wrapped_fn(*args, **kwds):
-        with variable_scope.variable_creator_scope(scope):
-          return wrapped_fn.__wrapped__(*args, **kwds)
-    else:
-      python_function = self._python_function
-      def wrapped_fn(*args, **kwds):
-        with variable_scope.variable_creator_scope(scope):
-          return python_function(*args, **kwds)
+    weak_wrapped_fn = None
+    def wrapped_fn(*args, **kwds):
+      with variable_scope.variable_creator_scope(scope):
+        # __wrapped__ allows AutoGraph to swap in a converted function. We give
+        # the function a weak reference to itself to avoid a reference cycle.
+        return weak_wrapped_fn().__wrapped__(*args, **kwds)
+    weak_wrapped_fn = weakref.ref(wrapped_fn)
 
     # TODO(mdan): Pipe self._experimental_autograph_options through.
     return function_lib.defun(
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index 723cdde6cf..e885ad3d79 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -262,8 +262,7 @@ class DefFunctionTest(test.TestCase):
 
   @test_util.assert_no_garbage_created
   def testReferenceCycles(self):
-    # TODO(b/120990892): Enable autograph in this test
-    fn = def_function.function(lambda x: 2. * x, autograph=False)
+    fn = def_function.function(lambda x: 2. * x)
     fn(constant_op.constant(4.0))
     weak_fn = weakref.ref(fn)
     del fn
-- 
GitLab


From 62e61f434bb50c26b0f4cd755cde5a70ae7112c1 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Fri, 14 Dec 2018 14:59:19 -0800
Subject: [PATCH 629/873] More weakreffing in function

Now we also don't make reference cycles when decorating methods

PiperOrigin-RevId: 225609671
---
 tensorflow/python/eager/def_function_test.py | 20 +++++++++++++++++++-
 tensorflow/python/eager/function.py          |  7 +++++--
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index e885ad3d79..77cc8ee981 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -55,6 +55,13 @@ class _ModelWithOptimizer(training.Model):
     return {'loss': loss}
 
 
+class _HasDecoratedMethod(object):
+
+  @def_function.function
+  def f(self, x):
+    return x * 3.
+
+
 class DefFunctionTest(test.TestCase):
 
   def testNoVariables(self):
@@ -261,7 +268,7 @@ class DefFunctionTest(test.TestCase):
               tensor_spec.TensorSpec([1], dtypes.int32)))))
 
   @test_util.assert_no_garbage_created
-  def testReferenceCycles(self):
+  def testFunctionReferenceCycles(self):
     fn = def_function.function(lambda x: 2. * x)
     fn(constant_op.constant(4.0))
     weak_fn = weakref.ref(fn)
@@ -271,6 +278,17 @@ class DefFunctionTest(test.TestCase):
     # function itself is not involved in a reference cycle.
     self.assertIs(None, weak_fn())
 
+  @test_util.assert_no_garbage_created
+  def testMethodReferenceCycles(self):
+    has_decorated_method = _HasDecoratedMethod()
+    has_decorated_method.f(constant_op.constant(5.))
+    weak_fn = weakref.ref(has_decorated_method.f)
+    del has_decorated_method
+    # Tests that the weak reference we made to the function is now dead, which
+    # means the object has been deleted. This should be true as long as the
+    # function itself is not involved in a reference cycle.
+    self.assertIs(None, weak_fn())
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 0af0cb608e..885403dd10 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1661,12 +1661,14 @@ def class_method_to_instance_method(original_function, instance):
   assert hasattr(original_function, "_input_signature")
   assert hasattr(original_function, "python_function")
 
+  weak_bound_method_wrapper = None
   def bound_method_wrapper(*args, **kwargs):
     """Wraps either a dummy MethodType or a converted AutoGraph function."""
     # __wrapped__ allows AutoGraph to swap in a converted function.
-    wrapped_fn = bound_method_wrapper.__wrapped__
+    strong_bound_method_wrapper = weak_bound_method_wrapper()
+    wrapped_fn = strong_bound_method_wrapper.__wrapped__
 
-    if wrapped_fn is bound_method_wrapper.__original_wrapped__:
+    if wrapped_fn is strong_bound_method_wrapper.__original_wrapped__:
       # If __wrapped__ was not replaced, then call original_function.
       wrapped_fn = original_function.python_function
       if tf_inspect.ismethod(wrapped_fn):
@@ -1676,6 +1678,7 @@ def class_method_to_instance_method(original_function, instance):
     # If __wrapped__ was replaced, then it is always an unbound function
     # that takes self as first argument.
     return wrapped_fn(weak_instance(), *args, **kwargs)
+  weak_bound_method_wrapper = weakref.ref(bound_method_wrapper)
 
   # pylint: disable=protected-access
   # We make a dummy MethodType object to generate the correct bound method
-- 
GitLab


From abe1c5f6c00f7aa1ac1820ad4566a75a466663c5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 15:07:08 -0800
Subject: [PATCH 630/873] Add Callbacks hooks for `evaluate` and `predict`.

Adds Callback methods that can be used during validation, evaluation, and
prediction.

PiperOrigin-RevId: 225611013
---
 tensorflow/python/keras/callbacks.py          | 325 +++++++++++++++---
 tensorflow/python/keras/callbacks_test.py     | 138 ++++++++
 tensorflow/python/keras/engine/training.py    |  38 +-
 .../python/keras/engine/training_arrays.py    |  17 +-
 .../python/keras/engine/training_generator.py |  17 +-
 .../python/keras/engine/training_utils.py     |  12 -
 .../golden/v1/tensorflow.keras.-model.pbtxt   |   8 +-
 .../v1/tensorflow.keras.-sequential.pbtxt     |   8 +-
 ...sorflow.keras.callbacks.-base-logger.pbtxt |  32 ++
 ...orflow.keras.callbacks.-c-s-v-logger.pbtxt |  34 +-
 ...tensorflow.keras.callbacks.-callback.pbtxt |  36 +-
 ...flow.keras.callbacks.-early-stopping.pbtxt |  34 +-
 .../tensorflow.keras.callbacks.-history.pbtxt |  34 +-
 ...low.keras.callbacks.-lambda-callback.pbtxt |  36 +-
 ...s.callbacks.-learning-rate-scheduler.pbtxt |  32 ++
 ...ow.keras.callbacks.-model-checkpoint.pbtxt |  34 +-
 ...flow.keras.callbacks.-progbar-logger.pbtxt |  32 ++
 ...ras.callbacks.-reduce-l-r-on-plateau.pbtxt |  34 +-
 ...flow.keras.callbacks.-remote-monitor.pbtxt |  34 +-
 ...orflow.keras.callbacks.-tensor-board.pbtxt |  32 ++
 ...w.keras.callbacks.-terminate-on-na-n.pbtxt |  36 +-
 .../v1/tensorflow.keras.models.-model.pbtxt   |   8 +-
 .../tensorflow.keras.models.-sequential.pbtxt |   8 +-
 .../golden/v2/tensorflow.keras.-model.pbtxt   |   8 +-
 .../v2/tensorflow.keras.-sequential.pbtxt     |   8 +-
 ...sorflow.keras.callbacks.-base-logger.pbtxt |  32 ++
 ...orflow.keras.callbacks.-c-s-v-logger.pbtxt |  34 +-
 ...tensorflow.keras.callbacks.-callback.pbtxt |  36 +-
 ...flow.keras.callbacks.-early-stopping.pbtxt |  34 +-
 .../tensorflow.keras.callbacks.-history.pbtxt |  34 +-
 ...low.keras.callbacks.-lambda-callback.pbtxt |  36 +-
 ...s.callbacks.-learning-rate-scheduler.pbtxt |  32 ++
 ...ow.keras.callbacks.-model-checkpoint.pbtxt |  34 +-
 ...flow.keras.callbacks.-progbar-logger.pbtxt |  32 ++
 ...ras.callbacks.-reduce-l-r-on-plateau.pbtxt |  34 +-
 ...flow.keras.callbacks.-remote-monitor.pbtxt |  34 +-
 ...orflow.keras.callbacks.-tensor-board.pbtxt |  32 ++
 ...w.keras.callbacks.-terminate-on-na-n.pbtxt |  36 +-
 ...ensorflow.keras.layers.-linear-model.pbtxt |   8 +-
 .../v2/tensorflow.keras.models.-model.pbtxt   |   8 +-
 .../tensorflow.keras.models.-sequential.pbtxt |   8 +-
 41 files changed, 1364 insertions(+), 135 deletions(-)

diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 2d7d5a415d..1cb3267527 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -45,13 +45,17 @@ from tensorflow.python.summary import summary as tf_summary
 from tensorflow.python.training import saver
 from tensorflow.python.util.tf_export import tf_export
 
-
 try:
   import requests
 except ImportError:
   requests = None
 
 
+_TRAIN = 'train'
+_TEST = 'test'
+_PREDICT = 'predict'
+
+
 # pylint: disable=protected-access
 def configure_callbacks(callbacks,
                         model,
@@ -62,7 +66,7 @@ def configure_callbacks(callbacks,
                         samples=None,
                         verbose=1,
                         count_mode='steps',
-                        mode='train'):
+                        mode=_TRAIN):
   """Configures callbacks for use in various training loops.
 
   Arguments:
@@ -89,7 +93,7 @@ def configure_callbacks(callbacks,
     callbacks = []
 
   # Add additional callbacks during training.
-  if mode == 'train':
+  if mode == _TRAIN:
     model.history = History()
     stateful_metric_names = None
     if hasattr(model, 'metrics_names'):
@@ -109,7 +113,7 @@ def configure_callbacks(callbacks,
   callback_metrics = []
   # When we have deferred build scenario with iterator input, we will compile
   # when we standardize first batch of data.
-  if mode != 'predict' and hasattr(model, 'metrics_names'):
+  if mode != _PREDICT and hasattr(model, 'metrics_names'):
     callback_metrics = copy.copy(model.metrics_names)
     if do_validation:
       callback_metrics += ['val_' + n for n in model.metrics_names]
@@ -142,6 +146,17 @@ def _is_generator_like(data):
       data, (Sequence, iterator_ops.Iterator, iterator_ops.EagerIterator)))
 
 
+def make_logs(model, logs, outputs, mode, prefix=''):
+  """Computes logs for sending to `on_batch_end` methods."""
+  if mode in {_TRAIN, _TEST}:
+    if hasattr(model, 'metrics_names'):
+      for label, output in zip(model.metrics_names, outputs):
+        logs[prefix + label] = output
+  else:
+    logs['outputs'] = outputs
+  return logs
+
+
 class CallbackList(object):
   """Container abstracting a list of callbacks.
 
@@ -179,10 +194,6 @@ class CallbackList(object):
 
   def _call_batch_hook(self, mode, hook, batch, logs=None):
     """Helper function for all batch_{begin | end} methods."""
-    # TODO(omalleyt): add batch hooks for test/predict.
-    if mode != 'train':
-      return
-
     hook_name = 'on_{mode}_batch_{hook}'.format(mode=mode, hook=hook)
     if hook == 'begin':
       self._t_enter_batch = time.time()
@@ -207,87 +218,175 @@ class CallbackList(object):
 
   def _call_begin_hook(self, mode):
     """Helper function for on_{train|test|predict}_begin methods."""
-    # TODO(omalleyt): add test/predict methods.
-    if mode == 'train':
+    if mode == _TRAIN:
       self.on_train_begin()
+    elif mode == _TEST:
+      self.on_test_begin()
+    else:
+      self.on_predict_begin()
 
   def _call_end_hook(self, mode):
     """Helper function for on_{train|test|predict}_end methods."""
-    # TODO(omalleyt): add test/predict methods.
-    if mode == 'train':
+    if mode == _TRAIN:
       self.on_train_end()
+    elif mode == _TEST:
+      self.on_test_end()
+    else:
+      self.on_predict_end()
 
   def on_batch_begin(self, batch, logs=None):
-    self._call_batch_hook('train', 'begin', batch, logs=logs)
+    self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs)
 
   def on_batch_end(self, batch, logs=None):
-    self._call_batch_hook('train', 'end', batch, logs=logs)
+    self._call_batch_hook(_TRAIN, 'end', batch, logs=logs)
 
   def on_epoch_begin(self, epoch, logs=None, mode='train'):
-    """Called at the start of an epoch.
+    """Calls the `on_epoch_begin` methods of its callbacks.
 
     Arguments:
         epoch: integer, index of epoch.
-        logs: dictionary of logs.
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
         mode: One of 'train'/'test'/'predict'
     """
-    if mode == 'train':
+    if mode == _TRAIN:
       logs = logs or {}
       for callback in self.callbacks:
         callback.on_epoch_begin(epoch, logs)
     self._reset_batch_timing()
 
   def on_epoch_end(self, epoch, logs=None, mode='train'):
-    """Called at the end of an epoch.
+    """Calls the `on_epoch_end` methods of its callbacks.
 
     Arguments:
         epoch: integer, index of epoch.
-        logs: dictionary of logs.
+        logs: dict, metric results for this training epoch, and for the
+          validation epoch if validation is performed. Validation result keys
+          are prefixed with `val_`.
         mode: One of 'train'/'test'/'predict'
     """
-    if mode == 'train':
+    if mode == _TRAIN:
       logs = logs or {}
       for callback in self.callbacks:
         callback.on_epoch_end(epoch, logs)
 
   def on_train_batch_begin(self, batch, logs=None):
-    """Called at the beginning of a training batch in `fit` methods.
+    """Calls the `on_train_batch_begin` methods of its callbacks.
 
     Arguments:
         batch: integer, index of batch within the current epoch.
-        logs: dictionary of logs.
+        logs: dict. Has keys `batch` and `size` representing the current batch
+          number and the size of the batch.
     """
-    self._call_batch_hook('train', 'begin', batch, logs=logs)
+    self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs)
 
   def on_train_batch_end(self, batch, logs=None):
-    """Called at the end of a training batch in `fit` methods.
+    """Calls the `on_train_batch_end` methods of its callbacks.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Metric results for this batch.
+    """
+    self._call_batch_hook(_TRAIN, 'end', batch, logs=logs)
+
+  def on_test_batch_begin(self, batch, logs=None):
+    """Calls the `on_test_batch_begin` methods of its callbacks.
 
     Arguments:
         batch: integer, index of batch within the current epoch.
-        logs: dictionary of logs.
+        logs: dict. Has keys `batch` and `size` representing the current batch
+          number and the size of the batch.
     """
-    self._call_batch_hook('train', 'end', batch, logs=logs)
+    self._call_batch_hook(_TEST, 'begin', batch, logs=logs)
+
+  def on_test_batch_end(self, batch, logs=None):
+    """Calls the `on_test_batch_end` methods of its callbacks.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Metric results for this batch.
+    """
+    self._call_batch_hook(_TEST, 'end', batch, logs=logs)
+
+  def on_predict_batch_begin(self, batch, logs=None):
+    """Calls the `on_predict_batch_begin` methods of its callbacks.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Has keys `batch` and `size` representing the current batch
+          number and the size of the batch.
+    """
+    self._call_batch_hook(_PREDICT, 'begin', batch, logs=logs)
+
+  def on_predict_batch_end(self, batch, logs=None):
+    """Calls the `on_predict_batch_end` methods of its callbacks.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Metric results for this batch.
+    """
+    self._call_batch_hook(_PREDICT, 'end', batch, logs=logs)
 
   def on_train_begin(self, logs=None):
-    """Called at the beginning of training.
+    """Calls the `on_train_begin` methods of its callbacks.
 
     Arguments:
-        logs: dictionary of logs.
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
     """
-    logs = logs or {}
     for callback in self.callbacks:
       callback.on_train_begin(logs)
 
   def on_train_end(self, logs=None):
-    """Called at the end of training.
+    """Calls the `on_train_end` methods of its callbacks.
 
     Arguments:
-        logs: dictionary of logs.
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
     """
-    logs = logs or {}
     for callback in self.callbacks:
       callback.on_train_end(logs)
 
+  def on_test_begin(self, logs=None):
+    """Calls the `on_test_begin` methods of its callbacks.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+    for callback in self.callbacks:
+      callback.on_test_begin(logs)
+
+  def on_test_end(self, logs=None):
+    """Calls the `on_test_end` methods of its callbacks.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+    for callback in self.callbacks:
+      callback.on_test_end(logs)
+
+  def on_predict_begin(self, logs=None):
+    """Calls the 'on_predict_begin` methods of its callbacks.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+    for callback in self.callbacks:
+      callback.on_predict_begin(logs)
+
+  def on_predict_end(self, logs=None):
+    """Calls the `on_predict_end` methods of its callbacks.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+    for callback in self.callbacks:
+      callback.on_predict_end(logs)
+
   def __iter__(self):
     return iter(self.callbacks)
 
@@ -330,31 +429,169 @@ class Callback(object):
   def set_model(self, model):
     self.model = model
 
-  def on_epoch_begin(self, epoch, logs=None):
-    pass
-
-  def on_epoch_end(self, epoch, logs=None):
-    pass
-
   def on_batch_begin(self, batch, logs=None):
-    pass
+    """A backwards compatibility alias for `on_train_batch_begin`."""
 
   def on_batch_end(self, batch, logs=None):
-    pass
+    """A backwards compatibility alias for `on_train_batch_end`."""
+
+  def on_epoch_begin(self, epoch, logs=None, mode='train'):
+    """Called at the start of an epoch.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        epoch: integer, index of epoch.
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+        mode: One of 'train'/'test'/'predict'
+    """
+
+  def on_epoch_end(self, epoch, logs=None, mode='train'):
+    """Called at the end of an epoch.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        epoch: integer, index of epoch.
+        logs: dict, metric results for this training epoch, and for the
+          validation epoch if validation is performed. Validation result keys
+          are prefixed with `val_`.
+        mode: One of 'train'/'test'/'predict'
+    """
 
   def on_train_batch_begin(self, batch, logs=None):
-    # For backwards compatibility
+    """Called at the beginning of a training batch in `fit` methods.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Has keys `batch` and `size` representing the current batch
+          number and the size of the batch.
+    """
+    # For backwards compatibility.
     self.on_batch_begin(batch, logs=logs)
 
   def on_train_batch_end(self, batch, logs=None):
-    # For backwards compatibility
+    """Called at the end of a training batch in `fit` methods.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Metric results for this batch.
+    """
+    # For backwards compatibility.
     self.on_batch_end(batch, logs=logs)
 
+  def on_test_batch_begin(self, batch, logs=None):
+    """Called at the beginning of a batch in `evaluate` methods.
+
+    Also called at the beginning of a validation batch in the `fit`
+    methods, if validation data is provided.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Has keys `batch` and `size` representing the current batch
+          number and the size of the batch.
+    """
+
+  def on_test_batch_end(self, batch, logs=None):
+    """Called at the end of a batch in `evaluate` methods.
+
+    Also called at the end of a validation batch in the `fit`
+    methods, if validation data is provided.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Metric results for this batch.
+    """
+
+  def on_predict_batch_begin(self, batch, logs=None):
+    """Called at the beginning of a batch in `predict` methods.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Has keys `batch` and `size` representing the current batch
+          number and the size of the batch.
+    """
+
+  def on_predict_batch_end(self, batch, logs=None):
+    """Called at the end of a batch in `predict` methods.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        batch: integer, index of batch within the current epoch.
+        logs: dict. Metric results for this batch.
+    """
+
   def on_train_begin(self, logs=None):
-    pass
+    """Called at the beginning of training.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
 
   def on_train_end(self, logs=None):
-    pass
+    """Called at the end of training.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+
+  def on_test_begin(self, logs=None):
+    """Called at the beginning of evaluation or validation.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+
+  def on_test_end(self, logs=None):
+    """Called at the end of evaluation or validation.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+
+  def on_predict_begin(self, logs=None):
+    """Called at the beginning of prediction.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
+
+  def on_predict_end(self, logs=None):
+    """Called at the end of prediction.
+
+    Subclasses should override for any actions to run.
+
+    Arguments:
+        logs: dict. Currently no data is passed to this argument for this method
+          but that may change in the future.
+    """
 
 
 @tf_export('keras.callbacks.BaseLogger')
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 5f0567c663..ef469c5e4f 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import csv
 import os
 import re
@@ -33,6 +34,7 @@ from tensorflow.python import keras
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
@@ -57,6 +59,142 @@ NUM_HIDDEN = 5
 BATCH_SIZE = 5
 
 
+class Counter(keras.callbacks.Callback):
+  """Counts the number of times each callback method was run.
+
+  Attributes:
+    method_counts: dict. Contains the counts of time  each callback method was
+      run.
+  """
+
+  def __init__(self):
+    self.method_counts = collections.defaultdict(int)
+    methods_to_count = [
+        'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end',
+        'on_predict_batch_begin', 'on_predict_batch_end', 'on_predict_begin',
+        'on_predict_end', 'on_test_batch_begin', 'on_test_batch_end',
+        'on_test_begin', 'on_test_end', 'on_train_batch_begin',
+        'on_train_batch_end', 'on_train_begin', 'on_train_end'
+    ]
+    for method_name in methods_to_count:
+      setattr(self, method_name,
+              self.wrap_with_counts(method_name, getattr(self, method_name)))
+
+  def wrap_with_counts(self, method_name, method):
+
+    def _call_and_count(*args, **kwargs):
+      self.method_counts[method_name] += 1
+      return method(*args, **kwargs)
+
+    return _call_and_count
+
+
+@keras_parameterized.run_with_all_model_types
+@keras_parameterized.run_all_keras_modes
+class CallbackCountsTest(keras_parameterized.TestCase):
+
+  def _check_counts(self, counter, expected_counts):
+    """Checks that the counts registered by `counter` are those expected."""
+    for method_name, expected_count in expected_counts.items():
+      self.assertEqual(
+          counter.method_counts[method_name],
+          expected_count,
+          msg='For method {}: expected {}, got: {}'.format(
+              method_name, expected_count, counter.method_counts[method_name]))
+
+  def _get_model(self):
+    layers = [
+        keras.layers.Dense(10, activation='relu'),
+        keras.layers.Dense(1, activation='sigmoid')
+    ]
+    model = testing_utils.get_model_from_layers(layers, input_shape=(10,))
+    model.compile(
+        adam.AdamOptimizer(0.001),
+        'binary_crossentropy',
+        run_eagerly=testing_utils.should_run_eagerly())
+    return model
+
+  def test_callback_hooks_are_called_in_fit(self):
+    x, y = np.ones((10, 10)), np.ones((10, 1))
+    val_x, val_y = np.ones((4, 10)), np.ones((4, 1))
+
+    model = self._get_model()
+    counter = Counter()
+    model.fit(
+        x,
+        y,
+        validation_data=(val_x, val_y),
+        batch_size=2,
+        epochs=5,
+        callbacks=[counter])
+
+    self._check_counts(
+        counter, {
+            'on_batch_begin': 25,
+            'on_batch_end': 25,
+            'on_epoch_begin': 5,
+            'on_epoch_end': 5,
+            'on_predict_batch_begin': 0,
+            'on_predict_batch_end': 0,
+            'on_predict_begin': 0,
+            'on_predict_end': 0,
+            'on_test_batch_begin': 10,
+            'on_test_batch_end': 10,
+            'on_test_begin': 5,
+            'on_test_end': 5,
+            'on_train_batch_begin': 25,
+            'on_train_batch_end': 25,
+            'on_train_begin': 1,
+            'on_train_end': 1
+        })
+
+  def test_callback_hooks_are_called_in_evaluate(self):
+    x, y = np.ones((10, 10)), np.ones((10, 1))
+
+    model = self._get_model()
+    counter = Counter()
+    model.evaluate(x, y, batch_size=2, callbacks=[counter])
+    self._check_counts(
+        counter, {
+            'on_test_batch_begin': 5,
+            'on_test_batch_end': 5,
+            'on_test_begin': 1,
+            'on_test_end': 1
+        })
+
+  def test_callback_hooks_are_called_in_predict(self):
+    x = np.ones((10, 10))
+
+    model = self._get_model()
+    counter = Counter()
+    model.predict(x, batch_size=2, callbacks=[counter])
+    self._check_counts(
+        counter, {
+            'on_predict_batch_begin': 5,
+            'on_predict_batch_end': 5,
+            'on_predict_begin': 1,
+            'on_predict_end': 1
+        })
+
+  def test_callback_list_methods(self):
+    counter = Counter()
+    callback_list = keras.callbacks.CallbackList([counter])
+
+    batch = 0
+    callback_list.on_test_batch_begin(batch)
+    callback_list.on_test_batch_end(batch)
+    callback_list.on_predict_batch_begin(batch)
+    callback_list.on_predict_batch_end(batch)
+
+    self._check_counts(
+        counter, {
+            'on_test_batch_begin': 1,
+            'on_test_batch_end': 1,
+            'on_predict_batch_begin': 1,
+            'on_predict_batch_end': 1
+        })
+
+
 class KerasCallbacksTest(test.TestCase):
 
   def test_ModelCheckpoint(self):
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 94c3967625..320e76162e 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -893,6 +893,7 @@ class Model(Network):
                verbose=1,
                sample_weight=None,
                steps=None,
+               callbacks=None,
                max_queue_size=10,
                workers=1,
                use_multiprocessing=False):
@@ -943,6 +944,9 @@ class Model(Network):
             Total number of steps (batches of samples)
             before declaring the evaluation round finished.
             Ignored with the default value of `None`.
+        callbacks: List of `keras.callbacks.Callback` instances.
+            List of callbacks to apply during evaluation.
+            See [callbacks](/api_docs/python/tf/keras/callbacks).
         max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
             input only. Maximum size for the generator queue.
             If unspecified, `max_queue_size` will default to 10.
@@ -1002,7 +1006,8 @@ class Model(Network):
           steps=steps,
           batch_size=batch_size,
           verbose=verbose,
-          workers=0)
+          workers=0,
+          callbacks=callbacks)
     elif distributed_training_utils.is_tpu_strategy(
         self._distribution_strategy):
       return training_distributed.experimental_test_loop(
@@ -1015,13 +1020,15 @@ class Model(Network):
           sample_weights=sample_weights,
           batch_size=batch_size,
           verbose=verbose,
-          steps=steps)
+          steps=steps,
+          callbacks=callbacks)
 
   def predict(self,
               x,
               batch_size=None,
               verbose=0,
               steps=None,
+              callbacks=None,
               max_queue_size=10,
               workers=1,
               use_multiprocessing=False):
@@ -1048,6 +1055,9 @@ class Model(Network):
         steps: Total number of steps (batches of samples)
             before declaring the prediction round finished.
             Ignored with the default value of `None`.
+        callbacks: List of `keras.callbacks.Callback` instances.
+            List of callbacks to apply during prediction.
+            See [callbacks](/api_docs/python/tf/keras/callbacks).
         max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
             input only. Maximum size for the generator queue.
             If unspecified, `max_queue_size` will default to 10.
@@ -1110,14 +1120,20 @@ class Model(Network):
           steps=steps,
           batch_size=batch_size,
           verbose=verbose,
-          workers=0)
+          workers=0,
+          callbacks=callbacks)
     elif distributed_training_utils.is_tpu_strategy(
         self._distribution_strategy):
       return training_distributed.experimental_predict_loop(
           self, x, verbose=verbose, steps=steps)
     else:
       return training_arrays.predict_loop(
-          self, x, batch_size=batch_size, verbose=verbose, steps=steps)
+          self,
+          x,
+          batch_size=batch_size,
+          verbose=verbose,
+          steps=steps,
+          callbacks=callbacks)
 
   def reset_metrics(self):
     """Resets the state of metrics."""
@@ -1440,6 +1456,7 @@ class Model(Network):
   def evaluate_generator(self,
                          generator,
                          steps=None,
+                         callbacks=None,
                          max_queue_size=10,
                          workers=1,
                          use_multiprocessing=False,
@@ -1459,6 +1476,9 @@ class Model(Network):
             to yield from `generator` before stopping.
             Optional for `Sequence`: if unspecified, will use
             the `len(generator)` as a number of steps.
+        callbacks: List of `keras.callbacks.Callback` instances.
+            List of callbacks to apply during evaluation.
+            See [callbacks](/api_docs/python/tf/keras/callbacks).
         max_queue_size: maximum size for the generator queue
         workers: Integer. Maximum number of processes to spin up
             when using process-based threading.
@@ -1494,11 +1514,13 @@ class Model(Network):
         max_queue_size=max_queue_size,
         workers=workers,
         use_multiprocessing=use_multiprocessing,
-        verbose=verbose)
+        verbose=verbose,
+        callbacks=callbacks)
 
   def predict_generator(self,
                         generator,
                         steps=None,
+                        callbacks=None,
                         max_queue_size=10,
                         workers=1,
                         use_multiprocessing=False,
@@ -1516,6 +1538,9 @@ class Model(Network):
             to yield from `generator` before stopping.
             Optional for `Sequence`: if unspecified, will use
             the `len(generator)` as a number of steps.
+        callbacks: List of `keras.callbacks.Callback` instances.
+            List of callbacks to apply during prediction.
+            See [callbacks](/api_docs/python/tf/keras/callbacks).
         max_queue_size: Maximum size for the generator queue.
         workers: Integer. Maximum number of processes to spin up
             when using process-based threading.
@@ -1545,7 +1570,8 @@ class Model(Network):
         max_queue_size=max_queue_size,
         workers=workers,
         use_multiprocessing=use_multiprocessing,
-        verbose=verbose)
+        verbose=verbose,
+        callbacks=callbacks)
 
   def _get_callback_model(self):
     """Returns the Callback Model for this Model."""
diff --git a/tensorflow/python/keras/engine/training_arrays.py b/tensorflow/python/keras/engine/training_arrays.py
index 196d48faec..03033c3334 100644
--- a/tensorflow/python/keras/engine/training_arrays.py
+++ b/tensorflow/python/keras/engine/training_arrays.py
@@ -285,7 +285,7 @@ def model_iteration(model,
         aggregator.aggregate(batch_outs)
 
         # Callbacks batch end.
-        batch_logs.update(training_utils.make_logs(model, batch_outs, mode))
+        batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
         callbacks._call_batch_hook(mode, 'end', step, batch_logs)
         progbar.on_batch_end(step, batch_logs)
 
@@ -336,7 +336,7 @@ def model_iteration(model,
         aggregator.aggregate(batch_outs, batch_start, batch_end)
 
         # Callbacks batch end.
-        batch_logs.update(training_utils.make_logs(model, batch_outs, mode))
+        batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
         callbacks._call_batch_hook(mode, 'end', batch_index, batch_logs)
         progbar.on_batch_end(batch_index, batch_logs)
 
@@ -345,7 +345,7 @@ def model_iteration(model,
 
     aggregator.finalize()
     results = aggregator.results
-    epoch_logs.update(training_utils.make_logs(model, results, mode))
+    epoch_logs = cbks.make_logs(model, epoch_logs, results, mode)
     if len(results) == 1:
       results = results[0]
 
@@ -364,11 +364,14 @@ def model_iteration(model,
           validation_in_fit=True)
       if not isinstance(val_results, list):
         val_results = [val_results]
-      epoch_logs.update(
-          training_utils.make_logs(model, val_results, mode, prefix='val_'))
+      epoch_logs = cbks.make_logs(
+          model, epoch_logs, val_results, mode, prefix='val_')
+
+    if mode == 'train':
+      # Epochs only apply to `fit`.
+      callbacks.on_epoch_end(epoch, epoch_logs, mode=mode)
+      progbar.on_epoch_end(epoch, epoch_logs)
 
-    callbacks.on_epoch_end(epoch, epoch_logs, mode=mode)
-    progbar.on_epoch_end(epoch, epoch_logs)
   callbacks._call_end_hook(mode)
 
   if model._distribution_strategy:
diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py
index 88f21b3707..bc6a3e8dd0 100644
--- a/tensorflow/python/keras/engine/training_generator.py
+++ b/tensorflow/python/keras/engine/training_generator.py
@@ -198,7 +198,7 @@ def model_iteration(model,
       aggregator.aggregate(batch_outs)
 
       # Callbacks batch end.
-      batch_logs.update(training_utils.make_logs(model, batch_outs, mode))
+      batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
       callbacks._call_batch_hook(mode, 'end', step, batch_logs)
       progbar.on_batch_end(step, batch_logs)
 
@@ -207,7 +207,7 @@ def model_iteration(model,
 
     aggregator.finalize()
     results = aggregator.results
-    epoch_logs.update(training_utils.make_logs(model, results, mode))
+    epoch_logs = cbks.make_logs(model, epoch_logs, results, mode)
     if len(results) == 1:
       results = results[0]
 
@@ -222,15 +222,20 @@ def model_iteration(model,
           workers=workers,
           use_multiprocessing=use_multiprocessing,
           max_queue_size=max_queue_size,
+          callbacks=callbacks,
+          verbose=0,
           mode='test')
 
       if not isinstance(val_results, list):
         val_results = [val_results]
-      epoch_logs.update(
-          training_utils.make_logs(model, val_results, mode, prefix='val_'))
+      epoch_logs = cbks.make_logs(
+          model, epoch_logs, val_results, mode, prefix='val_')
+
+    if mode == 'train':
+      # Epochs only apply to `fit`.
+      callbacks.on_epoch_end(epoch, epoch_logs, mode=mode)
+      progbar.on_epoch_end(epoch, epoch_logs)
 
-    callbacks.on_epoch_end(epoch, epoch_logs, mode=mode)
-    progbar.on_epoch_end(epoch, epoch_logs)
   callbacks._call_end_hook(mode)
 
   if enqueuer is not None:
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 7c368e804e..64c6f727c9 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -136,18 +136,6 @@ class OutputsAggregator(Aggregator):
       self.results = [np.concatenate(result, axis=0) for result in self.results]
 
 
-def make_logs(model, outputs, mode, prefix=''):
-  """Computes logs for sending to `on_batch_end` methods."""
-  logs = {}
-  # TODO(omalleyt): handle outputs in prediction when Callback
-  # hooks are ready.
-  if mode in ['train', 'test']:
-    if hasattr(model, 'metrics_names'):
-      for label, output in zip(model.metrics_names, outputs):
-        logs[prefix + label] = output
-  return logs
-
-
 def get_progbar(model, count_mode):
   """Get Progbar."""
   stateful_metric_names = None
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
index 6b0b4595ff..eced2e1cb0 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
@@ -167,11 +167,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -235,11 +235,11 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
index 69cbecb898..2acb90173f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
@@ -172,11 +172,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -244,7 +244,7 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_classes"
@@ -252,7 +252,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-base-logger.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-base-logger.pbtxt
index 7d298e9513..9dbdaf0f5f 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-base-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-base-logger.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
index 133205ab88..a5804d3bbc 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt
index d766c09ac5..bbc02c4d71 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt
@@ -16,11 +16,43 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+  }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_train_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
index 605f74e560..6182baf0a3 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -21,12 +21,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt
index cd893e6726..9b1b068e22 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt
index 50f2054cab..92440188c8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt
@@ -17,11 +17,43 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+  }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_train_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt
index 9ed9db0a89..a04ffb92eb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
index 3d8d1363bb..c10c236ad1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-progbar-logger.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-progbar-logger.pbtxt
index 5012f1517d..624f856d27 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-progbar-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-progbar-logger.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
index 73652c2b61..0db6b8d371 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
@@ -21,12 +21,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt
index 24db71de11..dac2049fe1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
index c5503c69a5..2e0f77eda8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
index de6e8ef072..2834b74e8a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
@@ -17,11 +17,43 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+  }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_train_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
index bdf695ed4d..5885cd21c1 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
@@ -167,11 +167,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -235,11 +235,11 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
index a64156f731..935fa32f8c 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
@@ -172,11 +172,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -244,7 +244,7 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_classes"
@@ -252,7 +252,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
index 6b0b4595ff..eced2e1cb0 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
@@ -167,11 +167,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -235,11 +235,11 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
index 69cbecb898..2acb90173f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
@@ -172,11 +172,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -244,7 +244,7 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_classes"
@@ -252,7 +252,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-base-logger.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-base-logger.pbtxt
index 7d298e9513..9dbdaf0f5f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-base-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-base-logger.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
index 133205ab88..a5804d3bbc 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt
index d766c09ac5..bbc02c4d71 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt
@@ -16,11 +16,43 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+  }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_train_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
index 605f74e560..6182baf0a3 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -21,12 +21,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt
index cd893e6726..9b1b068e22 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt
index 50f2054cab..92440188c8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt
@@ -17,11 +17,43 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+  }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_train_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt
index 9ed9db0a89..a04ffb92eb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-learning-rate-scheduler.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
index 3d8d1363bb..c10c236ad1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-progbar-logger.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-progbar-logger.pbtxt
index 5012f1517d..624f856d27 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-progbar-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-progbar-logger.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
index 73652c2b61..0db6b8d371 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
@@ -21,12 +21,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt
index 24db71de11..dac2049fe1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt
@@ -17,12 +17,44 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
index c5503c69a5..2e0f77eda8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-tensor-board.pbtxt
@@ -23,6 +23,38 @@ tf_class {
     name: "on_epoch_end"
     argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
   member_method {
     name: "on_train_batch_begin"
     argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
index de6e8ef072..2834b74e8a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
@@ -17,11 +17,43 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+  }
+  member_method {
+    name: "on_predict_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_predict_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_begin"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_batch_end"
+    argspec: "args=[\'self\', \'batch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_begin"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
+  }
+  member_method {
+    name: "on_test_end"
+    argspec: "args=[\'self\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_train_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
index d061b9c221..5766528b31 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-linear-model.pbtxt
@@ -172,11 +172,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -240,11 +240,11 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
index bdf695ed4d..5885cd21c1 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
@@ -167,11 +167,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -235,11 +235,11 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
index a64156f731..935fa32f8c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
@@ -172,11 +172,11 @@ tf_class {
   }
   member_method {
     name: "evaluate"
-    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'y\', \'batch_size\', \'verbose\', \'sample_weight\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'1\', \'None\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "evaluate_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "fit"
@@ -244,7 +244,7 @@ tf_class {
   }
   member_method {
     name: "predict"
-    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'10\', \'1\', \'False\'], "
+    argspec: "args=[\'self\', \'x\', \'batch_size\', \'verbose\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'None\', \'None\', \'10\', \'1\', \'False\'], "
   }
   member_method {
     name: "predict_classes"
@@ -252,7 +252,7 @@ tf_class {
   }
   member_method {
     name: "predict_generator"
-    argspec: "args=[\'self\', \'generator\', \'steps\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'10\', \'1\', \'False\', \'0\'], "
+    argspec: "args=[\'self\', \'generator\', \'steps\', \'callbacks\', \'max_queue_size\', \'workers\', \'use_multiprocessing\', \'verbose\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'10\', \'1\', \'False\', \'0\'], "
   }
   member_method {
     name: "predict_on_batch"
-- 
GitLab


From e2acc3dfd9e4757a1a4be894428e3f3683e30348 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 15:11:09 -0800
Subject: [PATCH 631/873] Automated rollback of commit
 306cf4b2834cca1f1fc1fc58ba16c3248516dfd9

PiperOrigin-RevId: 225611671
---
 tensorflow/BUILD | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 823ad8f506..449a1372ed 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -267,6 +267,15 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+# By default, XLA GPU is compiled into tensorflow when building with
+# --config=cuda even when `with_xla_support` is false. The config setting
+# here allows us to override the behavior if needed.
+config_setting(
+    name = "no_xla_deps_in_cuda",
+    define_values = {"no_xla_deps_in_cuda": "true"},
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "with_gdr_support",
     define_values = {"with_gdr_support": "true"},
-- 
GitLab


From 8bcc801a7cd748e7d9d47f0f5d7ebd84a2f2eaea Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Fri, 14 Dec 2018 15:26:00 -0800
Subject: [PATCH 632/873] Create v2 ModeKeys class that's shared among
 TensorFlow (Keras + Estimator). It will be exported as tf.estimator.ModeKeys
 in a follow-up change.

PiperOrigin-RevId: 225613853
---
 tensorflow/python/BUILD                      | 25 +++++++++++++++
 tensorflow/python/training/mode_keys.py      | 33 ++++++++++++++++++++
 tensorflow/python/training/mode_keys_test.py | 29 +++++++++++++++++
 3 files changed, 87 insertions(+)
 create mode 100644 tensorflow/python/training/mode_keys.py
 create mode 100644 tensorflow/python/training/mode_keys_test.py

diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 8c0e95ea4e..d851c229ac 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -116,6 +116,7 @@ py_library(
         ":manip_ops",
         ":math_ops",
         ":metrics",
+        ":mode_keys",
         ":nccl_ops",
         ":nn",
         ":ops",
@@ -5916,6 +5917,30 @@ py_binary(
     ],
 )
 
+py_library(
+    name = "mode_keys",
+    srcs = [
+        "training/mode_keys.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":util",
+    ],
+)
+
+py_test(
+    name = "mode_keys_test",
+    size = "small",
+    srcs = [
+        "training/mode_keys_test.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":client_testlib",
+        ":mode_keys",
+    ],
+)
+
 pyx_library(
     name = "framework_fast_tensor_util",
     srcs = ["framework/fast_tensor_util.pyx"],
diff --git a/tensorflow/python/training/mode_keys.py b/tensorflow/python/training/mode_keys.py
new file mode 100644
index 0000000000..ef64554bd5
--- /dev/null
+++ b/tensorflow/python/training/mode_keys.py
@@ -0,0 +1,33 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model modeKeys for TensorFlow and Estimator."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class ModeKeys(object):
+  """Standard names for model modes.
+
+  The following standard keys are defined:
+
+  * `TRAIN`: training/fitting mode.
+  * `TEST`: testing/evaluation mode.
+  * `PREDICT`: prediction/inference mode.
+  """
+
+  TRAIN = 'train'
+  TEST = 'test'
+  PREDICT = 'predict'
diff --git a/tensorflow/python/training/mode_keys_test.py b/tensorflow/python/training/mode_keys_test.py
new file mode 100644
index 0000000000..c4435b7d48
--- /dev/null
+++ b/tensorflow/python/training/mode_keys_test.py
@@ -0,0 +1,29 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for `tf.train.ModeKeys."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.platform import test
+from tensorflow.python.training import mode_keys
+
+
+class ModeKeysTest(test.TestCase):
+
+  def testKeyEquality(self):
+    self.assertEqual(mode_keys.ModeKeys.PREDICT, 'predict')
+    self.assertEqual(mode_keys.ModeKeys.TRAIN, 'train')
+    self.assertEqual(mode_keys.ModeKeys.TEST, 'test')
-- 
GitLab


From 720cb665feca8fa115936a15501e3c1f32dc89b8 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Fri, 14 Dec 2018 15:29:32 -0800
Subject: [PATCH 633/873] Automated rollback of commit
 e9e534f3239d7cb7e2a815b49c1c3520d9566d70

PiperOrigin-RevId: 225614329
---
 .../contrib/eager/python/parameter_server.py  | 12 ++++
 .../contrib/optimizer_v2/optimizer_v2.py      | 19 +++--
 .../contrib/optimizer_v2/optimizer_v2_test.py |  8 +--
 .../tpu/python/tpu/keras_tpu_variables.py     | 70 ++++++++++---------
 tensorflow/python/eager/def_function.py       | 15 +++-
 tensorflow/python/keras/backend.py            | 33 +--------
 tensorflow/python/keras/engine/base_layer.py  |  2 +-
 .../python/keras/engine/base_layer_utils.py   |  6 +-
 .../python/keras/optimizer_v2/optimizer_v2.py |  9 ++-
 .../keras/optimizer_v2/optimizer_v2_test.py   |  6 +-
 .../resource_variable_ops_test.py             | 17 ++++-
 .../kernel_tests/variable_scope_test.py       |  4 +-
 .../python/kernel_tests/variables_test.py     |  5 +-
 .../python/ops/resource_variable_ops.py       | 43 ++++++++++--
 tensorflow/python/ops/variable_scope.py       |  3 +-
 tensorflow/python/ops/variables.py            | 28 +++++---
 tensorflow/python/training/optimizer.py       | 19 +++--
 tensorflow/python/training/optimizer_test.py  |  9 +--
 .../api/golden/v2/tensorflow.-variable.pbtxt  |  6 +-
 19 files changed, 190 insertions(+), 124 deletions(-)

diff --git a/tensorflow/contrib/eager/python/parameter_server.py b/tensorflow/contrib/eager/python/parameter_server.py
index d50ff236bb..7803a6799b 100644
--- a/tensorflow/contrib/eager/python/parameter_server.py
+++ b/tensorflow/contrib/eager/python/parameter_server.py
@@ -79,6 +79,7 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
                trainable=True,
                name=None,
                dtype=None,
+               constraint=None,
                initialize=True,
                **unused_kwargs):
     """Creates a variable.
@@ -98,6 +99,13 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
         If None, either the datatype will be kept (if initial_value is
         a Tensor) or float32 will be used (if it is a Python object convertible
         to a Tensor).
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
       initialize: if True, runs initialization in eager execution; leaves the
         variable uninitialized otherwise.
 
@@ -118,6 +126,9 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
                        "functions. Please file a feature request if this "
                        "restriction inconveniences you.")
 
+    if constraint is not None and not callable(constraint):
+      raise ValueError("The `constraint` argument must be a callable.")
+
     if isinstance(initial_value, checkpointable.CheckpointInitialValue):
       self._maybe_initialize_checkpointable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
@@ -190,6 +201,7 @@ class SharedVariable(resource_variable_ops.ResourceVariable):
         self._initial_value = initial_value if self._in_graph_mode else None
         self._handle_name = handle_name + ":0"
         self._dtype = initial_value.dtype.base_dtype
+        self._constraint = constraint
 
         if self._in_graph_mode:
           with ops.name_scope("IsInitialized"):
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2.py b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
index 54c2a749d1..7fb23abc38 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2.py
@@ -69,17 +69,16 @@ class _RefVariableProcessor(_OptimizableVariable):
 
   def update_op(self, optimizer, g, *args):
     if isinstance(g, ops.Tensor):
-      # pylint: disable=protected-access
-      update_op = optimizer._apply_dense(g, self._v, *args)
-      if getattr(self._v, "_constraint", None) is not None:
+      update_op = optimizer._apply_dense(g, self._v, *args)  # pylint: disable=protected-access
+      if self._v.constraint is not None:
         with ops.control_dependencies([update_op]):
-          return self._v.assign(self._v._constraint(self._v))
+          return self._v.assign(self._v.constraint(self._v))
       else:
         return update_op
     else:
       assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
                                                 "tensor nor IndexedSlices.")
-      if getattr(self._v, "_constraint", None) is not None:
+      if self._v.constraint is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       # pylint: disable=protected-access
@@ -98,9 +97,9 @@ class _DenseReadResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g, *args):
     # pylint: disable=protected-access
     update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0], *args)
-    if getattr(self._v, "_constraint", None) is not None:
+    if self._v.constraint is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v._constraint(self._v))
+        return self._v.assign(self._v.constraint(self._v))
     else:
       return update_op
 
@@ -117,15 +116,15 @@ class _DenseResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g, *args):
     # pylint: disable=protected-access
     if isinstance(g, ops.IndexedSlices):
-      if getattr(self._v, "_constraint", None) is not None:
+      if self._v.constraint is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       return optimizer._resource_apply_sparse_duplicate_indices(
           g.values, self._v, g.indices, *args)
     update_op = optimizer._resource_apply_dense(g, self._v, *args)
-    if getattr(self._v, "_constraint", None) is not None:
+    if self._v.constraint is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v._constraint(self._v))
+        return self._v.assign(self._v.constraint(self._v))
     else:
       return update_op
 
diff --git a/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py b/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py
index d71172642e..dd7f2f4405 100644
--- a/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py
+++ b/tensorflow/contrib/optimizer_v2/optimizer_v2_test.py
@@ -222,14 +222,14 @@ class OptimizerTest(test.TestCase):
       opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
       self.assertTrue(opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
 
-  @test_util.run_v1_only(
-      '`ResourceVariable` does not support `constraint` argument.')
   def testConstraint(self):
     constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
     constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
     with self.cached_session():
-      var0 = variables.VariableV1([1.0, 2.0], constraint=constraint_01)
-      var1 = variables.VariableV1([3.0, 4.0], constraint=constraint_0)
+      var0 = variables.Variable([1.0, 2.0],
+                                constraint=constraint_01)
+      var1 = variables.Variable([3.0, 4.0],
+                                constraint=constraint_0)
       cost = 5 * var0 + 3 * var1
       global_step = variables.Variable(
           array_ops.zeros([], dtypes.int64), name='global_step')
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
index 22a39a17e4..de425626c8 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_tpu_variables.py
@@ -66,12 +66,11 @@ class ReplicatedVariable(object):
   * colocation.
   """
 
-  def __init__(self, name, variables, constraint=None):
+  def __init__(self, name, variables):
     self._name = name
     self._primary_var = variables[0]
     self._common_name = self._primary_var.name.split(":")[0]
     self._vars = variables
-    self._constraint = constraint
     self._cached_value = None
     self._dtype = variables[0].dtype
 
@@ -134,7 +133,7 @@ class ReplicatedVariable(object):
 
   @property
   def constraint(self):
-    return self._constraint
+    return None
 
   @property
   def op(self):
@@ -306,37 +305,42 @@ def replicated_variable_for_optimizer(num_replicas):
     yield
     return
 
-  def opt_variable(value, dtype=None, name=None, constraint=None):
-    """Instantiates a variable and returns it."""
-    if dtype is None:
-      dtype = backend.floatx()
-
-    variables = []
-    for i in range(num_replicas):
-      # Keras holds the variables in optimizer class instance , so the name
-      # does not matter here. ResourceVariable constructor will find a unique
-      # name (including name=None) for each replica.
-      with ops.device("device:TPU:{}".format(i)):
-        v = resource_variable_ops.ResourceVariable(
-            value,
-            dtype=dtypes_module.as_dtype(dtype),
-            name=name)
-        variables.append(v)
-    name = "replicate_{}_{}".format("variable" if name is None else name,
-                                    ops.uid())
-    v = ReplicatedVariable(name, variables, constraint)
-    # pylint: disable=protected-access
-    if isinstance(value, np.ndarray):
-      v._keras_shape = value.shape
-    elif hasattr(value, "shape"):
-      v._keras_shape = backend.int_shape(value)
-    v._uses_learning_phase = False
-    backend.track_variable(v)
-    return v
-
-  old_variable = backend.variable
   try:
+    old_v = backend.variable
+
+    def opt_variable(value, dtype=None, name=None, constraint=None):
+      """Instantiates a variable and returns it."""
+      if dtype is None:
+        dtype = backend.floatx()
+
+      variables = []
+      for i in range(num_replicas):
+        # Keras holds the variables in optimizer class instance , so the name
+        # does not matter here. ResourceVariable constructor will find a unique
+        # name (including name=None) for each replica.
+        with ops.device("device:TPU:{}".format(i)):
+          v = resource_variable_ops.ResourceVariable(
+              value,
+              dtype=dtypes_module.as_dtype(dtype),
+              name=name,
+              constraint=constraint)
+          variables.append(v)
+      name = "replicate_{}_{}".format("variable" if name is None else name,
+                                      ops.uid())
+      v = ReplicatedVariable(name, variables)
+
+      # pylint: disable=protected-access
+
+      if isinstance(value, np.ndarray):
+        v._keras_shape = value.shape
+      elif hasattr(value, "shape"):
+        v._keras_shape = backend.int_shape(value)
+      v._uses_learning_phase = False
+      backend.track_variable(v)
+      return v
+
     backend.variable = opt_variable
     yield
+
   finally:
-    backend.variable = old_variable
+    backend.variable = old_v
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 0ca52bd808..5e7e866fd8 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -51,6 +51,7 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
                caching_device=None,
                name=None,
                dtype=None,
+               constraint=None,
                add_initializers_to=None,
                **unused_kwargs):
     """Creates a variable.
@@ -75,6 +76,13 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
         If None, either the datatype will be kept (if initial_value is
        a Tensor) or float32 will be used (if it is a Python object convertible
        to a Tensor).
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
       add_initializers_to: if not None and not in legacy graph mode, the
         initializer tensor will be added to this map instead of adding the
         assignment to the function.
@@ -89,7 +97,8 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
       # here; we can't really do the capturing or conditional logic.
       resource_variable_ops.ResourceVariable.__init__(
           self, initial_value=initial_value, trainable=trainable,
-          caching_device=caching_device, name=name, dtype=dtype)
+          caching_device=caching_device, name=name, dtype=dtype,
+          constraint=constraint)
       return
     with ops.init_scope():
       self._in_graph_mode = not context.executing_eagerly()
@@ -97,6 +106,9 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
       raise ValueError("initial_value must be specified.")
     init_from_fn = callable(initial_value)
 
+    if constraint is not None and not callable(constraint):
+      raise ValueError("The `constraint` argument must be a callable.")
+
     if isinstance(initial_value, checkpointable.CheckpointInitialValue):
       self._maybe_initialize_checkpointable()
       self._update_uid = initial_value.checkpoint_position.restore_uid
@@ -135,6 +147,7 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
       self._unique_id = shared_name
       self._handle_name = shared_name + ":0"
       self._dtype = initial_value.dtype.base_dtype
+      self._constraint = constraint
       assert initial_value is not None
       if self._in_graph_mode:
         with ops.init_scope():
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 4c87bf3769..095273071f 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -673,11 +673,7 @@ def variable(value, dtype=None, name=None, constraint=None):
       dtype: Tensor type.
       name: Optional name string for the tensor.
       constraint: Optional projection function to be
-          applied to the variable after an optimizer update. The function
-          must take as input the unprojected tensor representing the value
-          of the variable and return the tensor for the projected value
-          (which must have the same shape). Constraints are not safe to
-          use when doing asynchronous distributed training.
+          applied to the variable after an optimizer update.
 
   Returns:
       A variable instance (with Keras metadata included).
@@ -710,8 +706,8 @@ def variable(value, dtype=None, name=None, constraint=None):
   v = resource_variable_ops.ResourceVariable(
       value,
       dtype=dtypes_module.as_dtype(dtype),
-      name=name)
-  v._constraint = constraint
+      name=name,
+      constraint=constraint)
   if isinstance(value, np.ndarray):
     v._keras_shape = value.shape
   elif hasattr(value, 'shape'):
@@ -769,29 +765,6 @@ def _initialize_variables(session):
       session.run(variables_module.variables_initializer(uninitialized_vars))
 
 
-def _has_constraint(v):
-  """Returns `True` if a variable has a constraint and `False` otherwise."""
-  return getattr(v, '_constraint', None) is not None
-
-
-def _maybe_enforce_constraint(v):
-  """Enforces a constraint for a variable.
-
-  Args:
-    v: A variable.
-
-  Returns:
-    A `Tensor` which corresponds to the value of this variable with
-    the constraint enforced, or the current value of this variable,
-    if no constraint is present.
-  """
-  constraint = getattr(v, '_constraint', None)
-  if constraint is None:
-    return array_ops.identity(v)
-  else:
-    return v.assign(constraint(v))
-
-
 @tf_export('keras.backend.constant')
 def constant(value, dtype=None, shape=None, name=None):
   """Creates a constant tensor.
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 5ab48e8b3d..aeed750652 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -347,13 +347,13 @@ class Layer(checkpointable.CheckpointableBase):
         overwrite=True,
         initializer=initializer,
         dtype=dtype,
+        constraint=constraint,
         trainable=trainable and self.trainable,
         partitioner=partitioner,
         use_resource=use_resource,
         collections=collections,
         synchronization=synchronization,
         aggregation=aggregation)
-    variable._constraint = constraint
     backend.track_variable(variable)
 
     if regularizer is not None:
diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py
index d2343969ee..d2f947f177 100644
--- a/tensorflow/python/keras/engine/base_layer_utils.py
+++ b/tensorflow/python/keras/engine/base_layer_utils.py
@@ -59,6 +59,7 @@ def make_variable(name,
                   trainable=None,
                   caching_device=None,
                   validate_shape=True,
+                  constraint=None,
                   use_resource=None,
                   collections=None,
                   synchronization=tf_variables.VariableSynchronization.AUTO,
@@ -92,6 +93,7 @@ def make_variable(name,
       `synchronization` is set to `ON_READ`.
     caching_device: Passed to `tf.Variable`.
     validate_shape: Passed to `tf.Variable`.
+    constraint: Constraint instance (callable).
     use_resource: Whether to use a `ResourceVariable`.
     collections: List of graph collections keys. The new variable is added to
       these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
@@ -129,17 +131,19 @@ def make_variable(name,
 
   # TODO(apassos,rohanj) figure out how to remove collections from here so we
   # can remove the V1.
-  return tf_variables.VariableV1(
+  v = tf_variables.VariableV1(
       initial_value=init_val,
       name=name,
       trainable=trainable,
       caching_device=caching_device,
       dtype=variable_dtype,
       validate_shape=validate_shape,
+      constraint=constraint,
       use_resource=use_resource,
       collections=collections,
       synchronization=synchronization,
       aggregation=aggregation)
+  return v
 
 
 def get_default_graph_uid_map():
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index 041d36a931..0e909d0d79 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -344,14 +344,17 @@ class OptimizerV2(checkpointable.CheckpointableBase):
       if isinstance(var, ops.Tensor):
         raise NotImplementedError("Trying to update a Tensor ", var)
       if isinstance(grad, ops.IndexedSlices):
-        if backend._has_constraint(var):  # pylint: disable=protected-access
+        if var.constraint is not None:
           raise RuntimeError(
               "Cannot use a constraint function on a sparse variable.")
         return self._resource_apply_sparse_duplicate_indices(
             grad.values, var, grad.indices)
       update_op = self._resource_apply_dense(grad, var)
-      with ops.control_dependencies([update_op]):
-        return backend._maybe_enforce_constraint(var)  # pylint: disable=protected-access
+      if var.constraint is not None:
+        with ops.control_dependencies([update_op]):
+          return var.assign(var.constraint(var))
+      else:
+        return update_op
 
     with ops.name_scope(name, self._name) as name:
       for grad, var in grads_and_vars:
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
index f27f3516b5..8b2865e2aa 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
@@ -235,8 +235,10 @@ class OptimizerTest(test.TestCase):
     constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
     constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
     with self.cached_session():
-      var0 = backend.variable([1.0, 2.0], constraint=constraint_01)
-      var1 = backend.variable([3.0, 4.0], constraint=constraint_0)
+      var0 = variables.Variable([1.0, 2.0],
+                                constraint=constraint_01)
+      var1 = variables.Variable([3.0, 4.0],
+                                constraint=constraint_0)
       loss = lambda: 5 * var0 + 3 * var1
       sgd = gradient_descent.SGD(3.0)
 
diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
index 4689e2d814..df7b686165 100644
--- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py
+++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py
@@ -497,6 +497,18 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       state_ops.scatter_update(ref, indices, updates)
       self.assertAllEqual(ref.read_value(), [True, True, True])
 
+  @test_util.run_in_graph_and_eager_modes
+  def testConstraintArg(self):
+    constraint = lambda x: x
+    v = resource_variable_ops.ResourceVariable(
+        initial_value=lambda: 1, constraint=constraint, name="var0")
+    self.assertEqual(v.constraint, constraint)
+
+    constraint = 0
+    with self.assertRaises(ValueError):
+      v = resource_variable_ops.ResourceVariable(
+          initial_value=lambda: 1, constraint=constraint, name="var1")
+
   # TODO(alive): how should this work in Eager mode?
   @test_util.run_deprecated_v1
   def testInitFn(self):
@@ -856,16 +868,19 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
   def testVariableEager(self):
     with context.eager_mode():
       init = array_ops.ones(shape=[10, 20, 35], dtype=dtypes.int32)
+      constraint = lambda x: x
       with ops.name_scope("foo"):
         v = resource_variable_ops.ResourceVariable(
             name="var7",
             initial_value=init,
-            caching_device="cpu:0")
+            caching_device="cpu:0",
+            constraint=constraint)
       # Test properties
       self.assertEqual(dtypes.int32, v.dtype)
       self.assertEqual("foo/var7:0", v.name)
       self.assertAllEqual([10, 20, 35], v.shape.as_list())
       self.assertTrue(isinstance(v.handle, ops.EagerTensor))
+      self.assertEqual(constraint, v.constraint)
       self.assertAllEqual(init.numpy(), v.read_value().numpy())
       self.assertAllEqual(init.numpy(), v.value().numpy())
 
diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py
index e79d822207..451eb38530 100644
--- a/tensorflow/python/kernel_tests/variable_scope_test.py
+++ b/tensorflow/python/kernel_tests/variable_scope_test.py
@@ -136,8 +136,8 @@ class VariableScopeTest(test.TestCase):
         self.evaluate(variables_lib.variables_initializer([w]))
         self.assertAllClose(self.evaluate(w.value()), 0.3)
 
-  @test_util.run_v1_only(
-      "`ResourceVariable` does not support `constraint` argument.")
+  @test_util.run_in_graph_and_eager_modes
+  @run_inside_wrap_function_in_eager_mode
   def testVarScopeConstraint(self):
     constraint = lambda x: 0. * x
     with variable_scope.variable_scope("tower1") as tower:
diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py
index 467867125e..07807e89d0 100644
--- a/tensorflow/python/kernel_tests/variables_test.py
+++ b/tensorflow/python/kernel_tests/variables_test.py
@@ -446,17 +446,16 @@ class VariablesTestCase(test.TestCase):
       self.evaluate(variables.global_variables_initializer())
       self.assertAllClose(np.negative(value), self.evaluate(v2))
 
-  @test_util.run_v1_only("`constraint` argument is removed in TF2.0")
   def testConstraintArg(self):
     constraint = lambda x: x
-    v = variables.VariableV1(
+    v = variables.Variable(
         lambda: constant_op.constant(1.),
         constraint=constraint)
     self.assertEqual(v.constraint, constraint)
 
     constraint = 0
     with self.assertRaises(ValueError):
-      v = variables.VariableV1(
+      v = variables.Variable(
           lambda: constant_op.constant(1.),
           constraint=constraint)
 
diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py
index 95db6a1463..6104cfa7ff 100644
--- a/tensorflow/python/ops/resource_variable_ops.py
+++ b/tensorflow/python/ops/resource_variable_ops.py
@@ -256,7 +256,13 @@ class ResourceVariable(variables.RefVariable):
         arguments (except for import_scope) are mutually exclusive.
       import_scope: Optional `string`. Name scope to add to the
         ResourceVariable. Only used when `variable_def` is provided.
-      constraint: Ignored. Provided for compatibility with tf.Variable.
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
 
     Raises:
       ValueError: If the initial value is not specified, or does not have a
@@ -268,10 +274,6 @@ class ResourceVariable(variables.RefVariable):
     collections.
     @end_compatibility
     """
-    if constraint is not None:
-      raise RuntimeError(  # pylint: disable=g-doc-exception
-          "`ResourceVariable` does not support `constraint` argument.")
-
     if variable_def:
       if initial_value is not None:
         raise ValueError("variable_def and initial_value are mutually "
@@ -288,7 +290,8 @@ class ResourceVariable(variables.RefVariable):
           validate_shape=validate_shape,
           caching_device=caching_device,
           name=name,
-          dtype=dtype)
+          dtype=dtype,
+          constraint=constraint)
 
   # pylint: disable=unused-argument
   def _init_from_args(self,
@@ -298,7 +301,8 @@ class ResourceVariable(variables.RefVariable):
                       validate_shape=True,
                       caching_device=None,
                       name=None,
-                      dtype=None):
+                      dtype=None,
+                      constraint=None):
     """Creates a variable.
 
     Args:
@@ -325,6 +329,13 @@ class ResourceVariable(variables.RefVariable):
         If None, either the datatype will be kept (if initial_value is
        a Tensor) or float32 will be used (if it is a Python object convertible
        to a Tensor).
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
 
     Raises:
       ValueError: If the initial value is not specified, or does not have a
@@ -356,6 +367,8 @@ class ResourceVariable(variables.RefVariable):
       raise ValueError(
           "collections argument to Variable constructor must be a list, tuple, "
           "or set. Got %s of type %s" % (collections, type(collections)))
+    if constraint is not None and not callable(constraint):
+      raise ValueError("The `constraint` argument must be a callable.")
 
     if isinstance(initial_value, checkpointable.CheckpointInitialValue):
       self._maybe_initialize_checkpointable()
@@ -412,6 +425,7 @@ class ResourceVariable(variables.RefVariable):
         self._initial_value = initial_value if self._in_graph_mode else None
         self._handle_name = handle_name + ":0"
         self._dtype = initial_value.dtype.base_dtype
+        self._constraint = constraint
 
         if self._in_graph_mode:
           with ops.name_scope("IsInitialized"):
@@ -529,6 +543,7 @@ class ResourceVariable(variables.RefVariable):
       self._save_slice_info = None
     self._caching_device = None
     self._dtype = dtypes.as_dtype(self._handle.op.get_attr("dtype"))
+    self._constraint = None
     self._cached_shape_as_list = None
 
   @contextlib.contextmanager
@@ -562,6 +577,7 @@ class ResourceVariable(variables.RefVariable):
     copied_variable = ResourceVariable(
         initial_value=self.read_value(),
         trainable=self._trainable,
+        constraint=self._constraint,
         dtype=self._dtype,
         name=self._shared_name + "_copy")
     memo[self._unique_id] = copied_variable
@@ -643,6 +659,16 @@ class ResourceVariable(variables.RefVariable):
       raise RuntimeError("initial_value not supported in EAGER mode.")
     return self._initial_value
 
+  @property
+  def constraint(self):
+    """Returns the constraint function associated with this variable.
+
+    Returns:
+      The constraint function that was passed to the variable constructor.
+      Can be `None` if no constraint was passed.
+    """
+    return self._constraint
+
   @property
   def op(self):
     """The op for this variable."""
@@ -1218,6 +1244,7 @@ class _UnreadVariable(ResourceVariable):
       self._handle_name = self._handle.name
     self._unique_id = unique_id
     self._dtype = dtype
+    self._constraint = None
     self._cached_value = None
     self._is_initialized_op = None
     self._initializer_op = None
@@ -1304,6 +1331,7 @@ class _MixedPrecisionVariable(ResourceVariable):
       self._handle_name = self.handle.name
     self._unique_id = var._unique_id  # pylint: disable=protected-access
     self._dtype = var.dtype
+    self._constraint = None
     self._cached_value = None
     self._is_initialized_op = var._is_initialized_op  # pylint: disable=protected-access
     self._initializer_op = var._initializer_op  # pylint: disable=protected-access
@@ -1467,6 +1495,7 @@ def copy_to_graph_uninitialized(var):
           shape=var.shape, dtype=var.dtype,
           name="unused_initial_variable_value"),
       trainable=var.trainable,
+      constraint=var._constraint,
       dtype=var.dtype,
       name=var._shared_name)
   new_variable._maybe_initialize_checkpointable()
diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py
index 1b55e9db6a..ccce9e2f93 100644
--- a/tensorflow/python/ops/variable_scope.py
+++ b/tensorflow/python/ops/variable_scope.py
@@ -2506,6 +2506,7 @@ def default_variable_creator_v2(next_creator=None, **kwargs):
   variable_def = kwargs.get("variable_def", None)
   dtype = kwargs.get("dtype", None)
   import_scope = kwargs.get("import_scope", None)
+  constraint = kwargs.get("constraint", None)
 
   # Set trainable value based on synchronization value.
   synchronization = kwargs.get("synchronization", VariableSynchronization.AUTO)
@@ -2515,7 +2516,7 @@ def default_variable_creator_v2(next_creator=None, **kwargs):
   return resource_variable_ops.ResourceVariable(
       initial_value=initial_value, trainable=trainable,
       validate_shape=validate_shape, caching_device=caching_device,
-      name=name, dtype=dtype, variable_def=variable_def,
+      name=name, dtype=dtype, constraint=constraint, variable_def=variable_def,
       import_scope=import_scope)
 
 
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index d5ffab74a1..d01b95666b 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -343,6 +343,7 @@ class Variable(six.with_metaclass(VariableMetaclass,
                variable_def=None,
                dtype=None,
                import_scope=None,
+               constraint=None,
                synchronization=VariableSynchronization.AUTO,
                aggregation=VariableAggregation.NONE):
     """Creates a new variable with value `initial_value`.
@@ -384,6 +385,13 @@ class Variable(six.with_metaclass(VariableMetaclass,
         a Tensor), or `convert_to_tensor` will decide.
       import_scope: Optional `string`. Name scope to add to the
         `Variable.` Only used when initializing from protocol buffer.
+      constraint: An optional projection function to be applied to the variable
+        after being updated by an `Optimizer` (e.g. used to implement norm
+        constraints or value constraints for layer weights). The function must
+        take as input the unprojected Tensor representing the value of the
+        variable and return the Tensor for the projected value
+        (which must have the same shape). Constraints are not safe to
+        use when doing asynchronous distributed training.
       synchronization: Indicates when a distributed a variable will be
         aggregated. Accepted values are constants defined in the class
         `tf.VariableSynchronization`. By default the synchronization is set to
@@ -521,6 +529,16 @@ class Variable(six.with_metaclass(VariableMetaclass,
     """
     raise NotImplementedError
 
+  @property
+  def constraint(self):
+    """Returns the constraint function associated with this variable.
+
+    Returns:
+      The constraint function that was passed to the variable constructor.
+      Can be `None` if no constraint was passed.
+    """
+    raise NotImplementedError
+
   def assign(self, value, use_locking=False, name=None, read_value=True):
     """Assigns a new value to the variable.
 
@@ -1309,16 +1327,6 @@ class VariableV1(Variable):
       RuntimeError: If eager execution is enabled.
     """
 
-  @property
-  def constraint(self):
-    """Returns the constraint function associated with this variable.
-
-    Returns:
-      The constraint function that was passed to the variable constructor.
-      Can be `None` if no constraint was passed.
-    """
-    raise NotImplementedError
-
   SaveSliceInfo = Variable.SaveSliceInfo
 
 
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index d27eb15422..eaa563e84a 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -116,17 +116,16 @@ class _RefVariableProcessor(_OptimizableVariable):
 
   def update_op(self, optimizer, g):
     if isinstance(g, ops.Tensor):
-      # pylint: disable=protected-access
-      update_op = optimizer._apply_dense(g, self._v)
-      if getattr(self._v, "_constraint", None) is not None:
+      update_op = optimizer._apply_dense(g, self._v)  # pylint: disable=protected-access
+      if self._v.constraint is not None:
         with ops.control_dependencies([update_op]):
-          return self._v.assign(self._v._constraint(self._v))
+          return self._v.assign(self._v.constraint(self._v))
       else:
         return update_op
     else:
       assert isinstance(g, ops.IndexedSlices), ("Gradient ", g, " is neither a "
                                                 "tensor nor IndexedSlices.")
-      if getattr(self._v, "_constraint", None) is not None:
+      if self._v.constraint is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       # pylint: disable=protected-access
@@ -145,9 +144,9 @@ class _DenseReadResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g):
     # pylint: disable=protected-access
     update_op = optimizer._resource_apply_dense(g, self._v.op.inputs[0])
-    if getattr(self._v, "_constraint", None) is not None:
+    if self._v.constraint is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v._constraint(self._v))
+        return self._v.assign(self._v.constraint(self._v))
     else:
       return update_op
 
@@ -164,15 +163,15 @@ class _DenseResourceVariableProcessor(_OptimizableVariable):
   def update_op(self, optimizer, g):
     # pylint: disable=protected-access
     if isinstance(g, ops.IndexedSlices):
-      if getattr(self._v, "_constraint", None) is not None:
+      if self._v.constraint is not None:
         raise RuntimeError(
             "Cannot use a constraint function on a sparse variable.")
       return optimizer._resource_apply_sparse_duplicate_indices(
           g.values, self._v, g.indices)
     update_op = optimizer._resource_apply_dense(g, self._v)
-    if getattr(self._v, "_constraint", None) is not None:
+    if self._v.constraint is not None:
       with ops.control_dependencies([update_op]):
-        return self._v.assign(self._v._constraint(self._v))
+        return self._v.assign(self._v.constraint(self._v))
     else:
       return update_op
 
diff --git a/tensorflow/python/training/optimizer_test.py b/tensorflow/python/training/optimizer_test.py
index ae6aca22aa..e175b5a799 100644
--- a/tensorflow/python/training/optimizer_test.py
+++ b/tensorflow/python/training/optimizer_test.py
@@ -244,14 +244,15 @@ class OptimizerTest(test.TestCase):
       opt_op = sgd_op.minimize(cost, global_step, [var0, var1])
       self.assertTrue(opt_op in ops.get_collection(ops.GraphKeys.TRAIN_OP))
 
-  @test_util.run_v1_only(
-      '`ResourceVariable` does not support `constraint` argument.')
+  @test_util.run_deprecated_v1
   def testConstraint(self):
     constraint_01 = lambda x: clip_ops.clip_by_value(x, -0.1, 0.)
     constraint_0 = lambda x: clip_ops.clip_by_value(x, 0., 1.)
     with self.cached_session():
-      var0 = variables.VariableV1([1.0, 2.0], constraint=constraint_01)
-      var1 = variables.VariableV1([3.0, 4.0], constraint=constraint_0)
+      var0 = variables.Variable([1.0, 2.0],
+                                constraint=constraint_01)
+      var1 = variables.Variable([3.0, 4.0],
+                                constraint=constraint_0)
       cost = 5 * var0 + 3 * var1
       global_step = variables.Variable(
           array_ops.zeros([], dtypes.int64), name='global_step')
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
index 12963e0c89..6136c8fbe7 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt
@@ -7,6 +7,10 @@ tf_class {
     name: "SaveSliceInfo"
     mtype: "<type \'type\'>"
   }
+  member {
+    name: "constraint"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "device"
     mtype: "<type \'property\'>"
@@ -45,7 +49,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'initial_value\', \'trainable\', \'validate_shape\', \'caching_device\', \'name\', \'variable_def\', \'dtype\', \'import_scope\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
+    argspec: "args=[\'self\', \'initial_value\', \'trainable\', \'validate_shape\', \'caching_device\', \'name\', \'variable_def\', \'dtype\', \'import_scope\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
   }
   member_method {
     name: "assign"
-- 
GitLab


From d5d3416d4f5a2e98a4b66432b025cdfaf2401845 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Fri, 14 Dec 2018 15:51:13 -0800
Subject: [PATCH 634/873] Add zip and enumerate to the builtins list.

PiperOrigin-RevId: 225617494
---
 tensorflow/python/autograph/pyct/inspect_utils.py      | 5 ++++-
 tensorflow/python/autograph/pyct/inspect_utils_test.py | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/autograph/pyct/inspect_utils.py b/tensorflow/python/autograph/pyct/inspect_utils.py
index 07453e8d48..6d9bc43d34 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils.py
@@ -31,15 +31,18 @@ from tensorflow.python.util import tf_inspect
 
 # These functions test negative for isinstance(*, types.BuiltinFunctionType)
 # and inspect.isbuiltin, and are generally not visible in globals().
+# TODO(mdan): Find a more generic way to test this - just enumerate __builtin__?
 SPECIAL_BUILTINS = {
     'dict': dict,
+    'enumerate': enumerate,
     'float': float,
     'int': int,
     'len': len,
     'list': list,
     'print': print,
     'range': range,
-    'tuple': tuple
+    'tuple': tuple,
+    'zip': zip
 }
 
 if six.PY2:
diff --git a/tensorflow/python/autograph/pyct/inspect_utils_test.py b/tensorflow/python/autograph/pyct/inspect_utils_test.py
index fd2cd04a22..4c4c0977b0 100644
--- a/tensorflow/python/autograph/pyct/inspect_utils_test.py
+++ b/tensorflow/python/autograph/pyct/inspect_utils_test.py
@@ -407,10 +407,12 @@ class InspectUtilsTest(test.TestCase):
         Superclass)
 
   def test_isbuiltin(self):
-    self.assertTrue(inspect_utils.isbuiltin(range))
+    self.assertTrue(inspect_utils.isbuiltin(enumerate))
     self.assertTrue(inspect_utils.isbuiltin(float))
     self.assertTrue(inspect_utils.isbuiltin(int))
     self.assertTrue(inspect_utils.isbuiltin(len))
+    self.assertTrue(inspect_utils.isbuiltin(range))
+    self.assertTrue(inspect_utils.isbuiltin(zip))
     self.assertFalse(inspect_utils.isbuiltin(function_decorator))
 
   def test_super_wrapper_for_dynamic_attrs(self):
-- 
GitLab


From 8cafd2962c63387de4c91a9f68610553cf0a9a6e Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Fri, 14 Dec 2018 16:11:50 -0800
Subject: [PATCH 635/873] Clang format fix

---
 tensorflow/compiler/jit/xla_gpu_device.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index 57b1547f16..0350e8b36d 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -53,8 +53,8 @@ XlaGpuDeviceFactory::ParseVisibleDeviceList(const string& visible_device_list) {
     if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) {
       return errors::InvalidArgument(
           "Could not parse entry in 'visible_device_list': '",
-          platform_gpu_id_str,
-          "'. visible_device_list = ", visible_device_list);
+          platform_gpu_id_str, "'. visible_device_list = ",
+          visible_device_list);
     }
     gpu_ids.insert(platform_gpu_id);
   }
@@ -83,7 +83,7 @@ Status XlaGpuDeviceFactory::CreateDevices(
   }
   string allowed_gpus =
       session_options.config.gpu_options().visible_device_list();
-  auto parsed_gpus=ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
+  auto parsed_gpus = ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
   // We want to fill the gpu_ids set with all devices if config string is empty.
   std::set<int> gpu_ids;
   int num_visible_devices = platform.ValueOrDie()->VisibleDeviceCount();
-- 
GitLab


From a8009f4970d38e107b4e53571fbceb7fa83caf56 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Fri, 14 Dec 2018 16:10:36 -0800
Subject: [PATCH 636/873] Add UnifiedGRU as the new GRU implementation for
 tf2.0.

The unified GRU layer use the same approach as the UnifiedLSTM, which use grappler to swap the backend in graph mode. The major change comparing to existing GRU are:

1. recurrent activation default has been changed from "hard_sigmoid" to "sigmoid", so that it matches the default CuDNN implemetation.
2. 'reset_after' default has been change from False to True, so that the defaults match the CuDNN behavior.

Note that the default parameter value change will cause any existing v1 checkpoint to fail. If user want to keep preserve the existing v1 behavior, please construct the layer with 'recurrent_activation=hard_sigmoid' and 'reset_after=False'.

All the existing test cases for GRU layer has been added to ensure the correctness for UnifiedGRU.

PiperOrigin-RevId: 225620163
---
 tensorflow/python/keras/BUILD                 |  13 +
 tensorflow/python/keras/layers/__init__.py    |   1 +
 .../python/keras/layers/cudnn_recurrent.py    |   2 +-
 tensorflow/python/keras/layers/recurrent.py   | 420 +++++++++++-
 .../python/keras/layers/unified_gru_test.py   | 599 ++++++++++++++++++
 ...sorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt | 201 ------
 .../v2/tensorflow.keras.layers.-g-r-u.pbtxt   |   3 +-
 .../golden/v2/tensorflow.keras.layers.pbtxt   |   4 -
 tensorflow/tools/compatibility/renames_v2.py  |   1 +
 9 files changed, 1024 insertions(+), 220 deletions(-)
 create mode 100644 tensorflow/python/keras/layers/unified_gru_test.py
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 3f4b42ca03..082c9ebfb3 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -559,6 +559,19 @@ cuda_py_test(
     shard_count = 4,
 )
 
+cuda_py_test(
+    name = "unified_gru_test",
+    size = "medium",
+    srcs = ["layers/unified_gru_test.py"],
+    additional_deps = [
+        ":keras",
+        "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+    ],
+    shard_count = 4,
+)
+
 py_test(
     name = "serialization_test",
     size = "small",
diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py
index df7571e5d5..285388f340 100644
--- a/tensorflow/python/keras/layers/__init__.py
+++ b/tensorflow/python/keras/layers/__init__.py
@@ -149,6 +149,7 @@ from tensorflow.python.keras.layers.recurrent import PeepholeLSTMCell
 from tensorflow.python.keras.layers.recurrent import SimpleRNN
 from tensorflow.python.keras.layers.recurrent import GRU
 from tensorflow.python.keras.layers.recurrent import LSTM
+from tensorflow.python.keras.layers.recurrent import UnifiedGRU
 from tensorflow.python.keras.layers.recurrent import UnifiedLSTM
 
 # Convolutional-recurrent layers.
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent.py b/tensorflow/python/keras/layers/cudnn_recurrent.py
index e695a68b60..e9925eeba6 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent.py
@@ -158,7 +158,7 @@ class _CuDNNRNN(RNN):
         RNN, self).get_losses_for(inputs=inputs)
 
 
-@tf_export('keras.layers.CuDNNGRU')
+@tf_export(v1=['keras.layers.CuDNNGRU'])
 class CuDNNGRU(_CuDNNRNN):
   """Fast GRU implementation backed by cuDNN.
 
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 604544efbe..3051416c6e 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -47,6 +47,14 @@ from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
 
+# The following string constants are used by Defun approach for unified backend
+# of LSTM and GRU.
+_DEFUN_API_NAME_ATTRIBUTE = 'experimental_api_implements'
+_DEFUN_DEVICE_ATTRIBUTE = 'experimental_api_preferred_device'
+_CPU_DEVICE_NAME = 'CPU'
+_GPU_DEVICE_NAME = 'GPU'
+
+
 @tf_export('keras.layers.StackedRNNCells')
 class StackedRNNCells(Layer):
   """Wrapper allowing a stack of RNN cells to behave as a single cell.
@@ -1655,7 +1663,7 @@ class GRUCell(Layer):
     return _generate_zero_filled_state_for_cell(self, inputs, batch_size, dtype)
 
 
-@tf_export('keras.layers.GRU')
+@tf_export(v1=['keras.layers.GRU'])
 class GRU(RNN):
   """Gated Recurrent Unit - Cho et al. 2014.
 
@@ -1914,6 +1922,391 @@ class GRU(RNN):
     return cls(**config)
 
 
+@tf_export('keras.layers.GRU', v1=[])
+class UnifiedGRU(GRU):
+  """Gated Recurrent Unit - Cho et al. 2014.
+
+  `UnifiedGRU` unifies the implementations between standard `GRU` layer and
+  `CuDNNGRU` layer. Based on available runtime hardware and constraints,
+  `UnifiedGRU` will choose different implementations to maximize the
+  performance. For instance, if GPU is available and all the parameters meet the
+  requirement of CuDNN kernel, `UnifiedGRU` will use CuDNN kernel for the
+  calculation. The requirements to use CuDNN kernel are:
+
+    1. `activation` == 'tanh'
+    2. `recurrent_activation` == 'sigmoid'
+    3. `recurrent_dropout` == 0
+    4. `unroll` is False
+    5. `use_bias` is True
+    6. `reset_after` is True
+    7. Use masking in previous layers.
+
+  There are two variants. The default one is based on
+  [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to hidden
+  state before matrix multiplication. The other one is based on
+  [original](https://arxiv.org/abs/1406.1078v1) and has the order reversed.
+
+  The second variant is compatible with CuDNNGRU (GPU-only) and allows
+  inference on CPU. Thus it has separate biases for `kernel` and
+  `recurrent_kernel`. Use `'reset_after'=True` and
+  `recurrent_activation='sigmoid'`.
+
+  Arguments:
+      units: Positive integer, dimensionality of the output space.
+      activation: Activation function to use.
+          Default: hyperbolic tangent (`tanh`).
+          If you pass `None`, no activation is applied
+          (ie. "linear" activation: `a(x) = x`).
+      recurrent_activation: Activation function to use
+          for the recurrent step.
+          Default: sigmoid (`sigmoid`).
+          If you pass `None`, no activation is applied
+          (ie. "linear" activation: `a(x) = x`).
+      use_bias: Boolean, whether the layer uses a bias vector.
+      kernel_initializer: Initializer for the `kernel` weights matrix,
+          used for the linear transformation of the inputs.
+      recurrent_initializer: Initializer for the `recurrent_kernel`
+          weights matrix,
+          used for the linear transformation of the recurrent state.
+      bias_initializer: Initializer for the bias vector.
+      kernel_regularizer: Regularizer function applied to
+          the `kernel` weights matrix.
+      recurrent_regularizer: Regularizer function applied to
+          the `recurrent_kernel` weights matrix.
+      bias_regularizer: Regularizer function applied to the bias vector.
+      activity_regularizer: Regularizer function applied to
+          the output of the layer (its "activation")..
+      kernel_constraint: Constraint function applied to
+          the `kernel` weights matrix.
+      recurrent_constraint: Constraint function applied to
+          the `recurrent_kernel` weights matrix.
+      bias_constraint: Constraint function applied to the bias vector.
+      dropout: Float between 0 and 1.
+          Fraction of the units to drop for
+          the linear transformation of the inputs.
+      recurrent_dropout: Float between 0 and 1.
+          Fraction of the units to drop for
+          the linear transformation of the recurrent state.
+      implementation: Implementation mode, either 1 or 2.
+          Mode 1 will structure its operations as a larger number of
+          smaller dot products and additions, whereas mode 2 will
+          batch them into fewer, larger operations. These modes will
+          have different performance profiles on different hardware and
+          for different applications.
+      return_sequences: Boolean. Whether to return the last output
+          in the output sequence, or the full sequence.
+      return_state: Boolean. Whether to return the last state
+          in addition to the output.
+      go_backwards: Boolean (default False).
+          If True, process the input sequence backwards and return the
+          reversed sequence.
+      stateful: Boolean (default False). If True, the last state
+          for each sample at index i in a batch will be used as initial
+          state for the sample of index i in the following batch.
+      unroll: Boolean (default False).
+          If True, the network will be unrolled,
+          else a symbolic loop will be used.
+          Unrolling can speed-up a RNN,
+          although it tends to be more memory-intensive.
+          Unrolling is only suitable for short sequences.
+      reset_after: GRU convention (whether to apply reset gate after or
+          before matrix multiplication). False = "before",
+          True = "after" (default and CuDNN compatible).
+  """
+
+  def __init__(self,
+               units,
+               activation='tanh',
+               recurrent_activation='sigmoid',
+               use_bias=True,
+               kernel_initializer='glorot_uniform',
+               recurrent_initializer='orthogonal',
+               bias_initializer='zeros',
+               kernel_regularizer=None,
+               recurrent_regularizer=None,
+               bias_regularizer=None,
+               activity_regularizer=None,
+               kernel_constraint=None,
+               recurrent_constraint=None,
+               bias_constraint=None,
+               dropout=0.,
+               recurrent_dropout=0.,
+               implementation=1,
+               return_sequences=False,
+               return_state=False,
+               go_backwards=False,
+               stateful=False,
+               unroll=False,
+               time_major=False,
+               reset_after=True,
+               **kwargs):
+    # return_runtime is a flag for testing, which shows the real backend
+    # implementation chosen by grappler in graph mode.
+    self._return_runtime = kwargs.pop('return_runtime', False)
+
+    super(UnifiedGRU, self).__init__(
+        units,
+        activation=activation,
+        recurrent_activation=recurrent_activation,
+        use_bias=use_bias,
+        kernel_initializer=kernel_initializer,
+        recurrent_initializer=recurrent_initializer,
+        bias_initializer=bias_initializer,
+        kernel_regularizer=kernel_regularizer,
+        recurrent_regularizer=recurrent_regularizer,
+        bias_regularizer=bias_regularizer,
+        activity_regularizer=activity_regularizer,
+        kernel_constraint=kernel_constraint,
+        recurrent_constraint=recurrent_constraint,
+        bias_constraint=bias_constraint,
+        dropout=dropout,
+        recurrent_dropout=recurrent_dropout,
+        implementation=implementation,
+        return_sequences=return_sequences,
+        return_state=return_state,
+        go_backwards=go_backwards,
+        stateful=stateful,
+        unroll=unroll,
+        time_major=time_major,
+        reset_after=reset_after,
+        **kwargs)
+    self._dropout_mask = None
+    # CuDNN uses following setting by default and not configurable.
+    self.could_use_cudnn = (
+        activation == 'tanh' and recurrent_activation == 'sigmoid' and
+        recurrent_dropout == 0 and not unroll and use_bias and
+        reset_after is True)
+
+  def call(self, inputs, mask=None, training=None, initial_state=None):
+    # GRU does not support constants. Ignore it during process.
+    inputs, initial_state, _ = self._process_inputs(inputs, initial_state, None)
+
+    if isinstance(mask, list):
+      mask = mask[0]
+
+    input_shape = K.int_shape(inputs)
+    timesteps = input_shape[0] if self.time_major else input_shape[1]
+
+    if mask is not None or not self.could_use_cudnn:
+      # CuDNN does not support masking, fall back to use the normal GRU.
+      kwargs = {'training': training}
+      self.cell._dropout_mask = None
+      self.cell._recurrent_dropout_mask = None
+
+      def step(cell_inputs, cell_states):
+        return self.cell.call(cell_inputs, cell_states, **kwargs)
+
+      last_output, outputs, states = K.rnn(
+          step,
+          inputs,
+          initial_state,
+          constants=None,
+          go_backwards=self.go_backwards,
+          mask=mask,
+          unroll=self.unroll,
+          input_length=timesteps,
+          time_major=self.time_major,
+          zero_output_for_mask=self.zero_output_for_mask)
+      # This is a dummy tensor for testing purpose.
+      runtime = constant_op.constant(
+          'unknown', dtype=dtypes.string, name='runtime')
+    else:
+      last_output, outputs, runtime, states = self._defun_gru_call(
+          inputs, initial_state, training)
+
+    if self.stateful:
+      updates = [state_ops.assign(self.states[0], states[0])]
+      self.add_update(updates, inputs)
+
+    if self.return_sequences:
+      output = outputs
+    else:
+      output = last_output
+
+    if self.return_state:
+      return [output] + states
+    elif self._return_runtime:
+      return output, runtime
+    else:
+      return output
+
+  def _defun_gru_call(self, inputs, initial_state, training):
+    # Use the new defun approach for backend implementation swap.
+    # Note that different implementations need to have same function
+    # signature, eg, the tensor parameters need to have same shape and dtypes.
+    if self.go_backwards:
+      # Reverse time axis.
+      inputs = K.reverse(inputs, 0 if self.time_major else 1)
+    if 0 < self.dropout < 1:
+      if self._dropout_mask is None:
+        self._dropout_mask = _generate_dropout_mask(
+            array_ops.ones_like(inputs),
+            self.dropout,
+            training=training,
+            count=3)
+
+      inputs *= self._dropout_mask[0]
+    experimental_api_name = 'gru_' + str(uuid.uuid4())
+    defun_standard_gru = _generate_defun_backend(
+        experimental_api_name, _CPU_DEVICE_NAME, standard_gru)
+    defun_cudnn_gru = _generate_defun_backend(
+        experimental_api_name, _GPU_DEVICE_NAME, cudnn_gru)
+    if ops.executing_eagerly_outside_functions():
+      # Under eager context, the device placement is already known. Prefer the
+      # GPU implementation when GPU is available.
+      if context.num_gpus() > 0:
+        last_output, outputs, new_h, runtime = defun_cudnn_gru(
+            inputs=inputs,
+            init_h=initial_state[0],
+            kernel=self.cell.kernel,
+            recurrent_kernel=self.cell.recurrent_kernel,
+            bias=self.cell.bias,
+            time_major=self.time_major)
+      else:
+        last_output, outputs, new_h, runtime = defun_standard_gru(
+            inputs=inputs,
+            init_h=initial_state[0],
+            kernel=self.cell.kernel,
+            recurrent_kernel=self.cell.recurrent_kernel,
+            bias=self.cell.bias,
+            activation=self.activation,
+            recurrent_activation=self.recurrent_activation,
+            time_major=self.time_major)
+    else:
+      # Call the normal GRU impl and register the CuDNN impl function. The
+      # grappler will kick in during session execution to optimize the graph.
+      last_output, outputs, new_h, runtime = defun_standard_gru(
+          inputs=inputs,
+          init_h=initial_state[0],
+          kernel=self.cell.kernel,
+          recurrent_kernel=self.cell.recurrent_kernel,
+          bias=self.cell.bias,
+          activation=self.activation,
+          recurrent_activation=self.recurrent_activation,
+          time_major=self.time_major)
+
+      function.register(defun_cudnn_gru, inputs, initial_state[0],
+                        self.cell.kernel, self.cell.recurrent_kernel,
+                        self.cell.bias, self.time_major)
+    states = [new_h]
+    return last_output, outputs, runtime, states
+
+
+def standard_gru(inputs, init_h, kernel, recurrent_kernel, bias, activation,
+                 recurrent_activation, time_major):
+  """GRU with standard kernel implementation.
+
+  This implementation can be run on all types of hardware.
+
+  This implementation lifts out all the layer weights and make them function
+  parameters. It has same number of tensor input params as the CuDNN
+  counterpart. The RNN step logic has been simplified, eg dropout and mask is
+  removed since CuDNN implementation does not support that.
+
+  Args:
+    inputs: input tensor of GRU layer.
+    init_h: initial state tensor for the cell output.
+    kernel: weights for cell kernel.
+    recurrent_kernel: weights for cell recurrent kernel.
+    bias: weights for cell kernel bias and recurrent bias. The bias contains the
+      combined input_bias and recurrent_bias.
+    activation: Activation function to use for output.
+    recurrent_activation: Activation function to use for hidden recurrent state.
+    time_major: boolean, whether the inputs are in the format of
+      [time, batch, feature] or [batch, time, feature].
+
+  Returns:
+    last_output: output tensor for the last timestep, which has shape
+      [batch, units].
+    outputs: output tensor for all timesteps, which has shape
+      [batch, time, units].
+    state_0: the cell output, which has same shape as init_h.
+    runtime: constant string tensor which indicate real runtime hardware. This
+      value is for testing purpose and should be used by user.
+  """
+  input_shape = K.int_shape(inputs)
+  timesteps = input_shape[0] if time_major else input_shape[1]
+
+  input_bias, recurrent_bias = array_ops.unstack(bias)
+
+  def step(cell_inputs, cell_states):
+    """Step function that will be used by Keras RNN backend."""
+    h_tm1 = cell_states[0]
+
+    # inputs projected by all gate matrices at once
+    matrix_x = K.dot(cell_inputs, kernel)
+    matrix_x = K.bias_add(matrix_x, input_bias)
+
+    x_z, x_r, x_h = array_ops.split(matrix_x, 3, axis=1)
+
+    # hidden state projected by all gate matrices at once
+    matrix_inner = K.dot(h_tm1, recurrent_kernel)
+    matrix_inner = K.bias_add(matrix_inner, recurrent_bias)
+
+    recurrent_z, recurrent_r, recurrent_h = array_ops.split(matrix_inner, 3,
+                                                            axis=1)
+    z = recurrent_activation(x_z + recurrent_z)
+    r = recurrent_activation(x_r + recurrent_r)
+    hh = activation(x_h + r * recurrent_h)
+
+    # previous and candidate state mixed by update gate
+    h = z * h_tm1 + (1 - z) * hh
+    return h, [h]
+
+  last_output, outputs, new_states = K.rnn(
+      step,
+      inputs, [init_h],
+      constants=None,
+      unroll=False,
+      time_major=time_major,
+      input_length=timesteps)
+  return last_output, outputs, new_states[0], constant_op.constant(
+      'cpu', dtype=dtypes.string, name='runtime')
+
+
+def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, time_major):
+  """GRU with CuDNN implementation which is only available for GPU."""
+  if not time_major:
+    inputs = array_ops.transpose(inputs, perm=(1, 0, 2))
+  init_h = array_ops.expand_dims(init_h, axis=0)
+
+  weights = array_ops.split(kernel, 3, axis=1)
+  weights += array_ops.split(recurrent_kernel, 3, axis=1)
+  # Note that the bias was initialized as shape (2, 3 * units), flat it into
+  # (6 * units)
+  bias = array_ops.split(K.flatten(bias), 6)
+  # Note that the gate order for CuDNN is different from the canonical format.
+  # canonical format is [z, r, h], whereas CuDNN is [r, z, h]. The swap need to
+  # be done for kernel, recurrent_kernel, input_bias, recurrent_bias.
+  # z is update gate weights.
+  # r is reset gate weights.
+  # h is output gate weights.
+  weights[0], weights[1] = weights[1], weights[0]
+  weights[3], weights[4] = weights[4], weights[3]
+  bias[0], bias[1] = bias[1], bias[0]
+  bias[3], bias[4] = bias[4], bias[3]
+
+  params = _canonical_to_params(
+      weights=weights,
+      biases=bias,
+      shape=constant_op.constant([-1]),
+      transpose_weights=True)
+
+  outputs, h, _, _ = gen_cudnn_rnn_ops.cudnn_rnn(
+      inputs,
+      input_h=init_h,
+      input_c=0,
+      params=params,
+      is_training=True,
+      rnn_mode='gru')
+  last_output = outputs[-1]
+  if not time_major:
+    outputs = array_ops.transpose(outputs, perm=[1, 0, 2])
+  h = h[0]
+  return last_output, outputs, h, constant_op.constant(
+      'cudnn', dtype=dtypes.string, name='runtime')
+
+
 @tf_export('keras.layers.LSTMCell')
 class LSTMCell(Layer):
   """Cell class for the LSTM layer.
@@ -2718,18 +3111,10 @@ class UnifiedLSTM(LSTM):
       # LSTM layer added into same graph, and it will be able to pair up the
       # different implementations across them.
       experimental_api_name = 'lstm_' + str(uuid.uuid4())
-      standard_lstm_attributes = {
-          'experimental_api_implements': experimental_api_name,
-          'experimental_api_preferred_device': 'CPU',
-      }
-      cudnn_lstm_attributes = {
-          'experimental_api_implements': experimental_api_name,
-          'experimental_api_preferred_device': 'GPU',
-      }
-      defun_standard_lstm = function.defun_with_attributes(
-          standard_lstm, attributes=standard_lstm_attributes)
-      defun_cudnn_lstm = function.defun_with_attributes(
-          cudnn_lstm, attributes=cudnn_lstm_attributes)
+      defun_standard_lstm = _generate_defun_backend(
+          experimental_api_name, _CPU_DEVICE_NAME, standard_lstm)
+      defun_cudnn_lstm = _generate_defun_backend(
+          experimental_api_name, _GPU_DEVICE_NAME, cudnn_lstm)
 
       if ops.executing_eagerly_outside_functions():
         # Under eager context, the device placement is already known. Prefer the
@@ -3017,3 +3402,12 @@ def _generate_zero_filled_state(batch_size_tensor, state_size, dtype):
     return nest.map_structure(create_zeros, state_size)
   else:
     return create_zeros(state_size)
+
+
+def _generate_defun_backend(unique_api_name, preferred_device, func):
+  function_attributes = {
+      _DEFUN_API_NAME_ATTRIBUTE: unique_api_name,
+      _DEFUN_DEVICE_ATTRIBUTE: preferred_device,
+  }
+  return function.defun_with_attributes(func=func,
+                                        attributes=function_attributes)
diff --git a/tensorflow/python/keras/layers/unified_gru_test.py b/tensorflow/python/keras/layers/unified_gru_test.py
new file mode 100644
index 0000000000..5d482b866d
--- /dev/null
+++ b/tensorflow/python/keras/layers/unified_gru_test.py
@@ -0,0 +1,599 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for UnifiedGRU layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python import keras
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
+
+
+# Global config for grappler setting that is used for graph mode test.
+_rewrites = rewriter_config_pb2.RewriterConfig()
+_rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF
+_customer_optimizer = _rewrites.custom_optimizers.add()
+_customer_optimizer.name = 'ExperimentalImplementationSelector'
+_rewrites.min_graph_nodes = -1
+_graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites)
+_config = config_pb2.ConfigProto(graph_options=_graph_options)
+
+
+@keras_parameterized.run_all_keras_modes(config=_config)
+class UnifiedGRUTest(keras_parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('non_tan_activation', 'relu', 'sigmoid', 0, False, True, True),
+      ('non_sigmoid_recur_activation', 'tanh', 'relu', 0, False, True, True),
+      ('use_recurrent_dropout', 'tanh', 'sigmoid', 0.1, False, True, True),
+      ('unroll', 'tanh', 'sigmoid', 0, True, True, True),
+      ('not_use_bias', 'tanh', 'sigmoid', 0, False, False, True),
+      ('not_reset_after', 'tanh', 'sigmoid', 0, False, True, False)
+  )
+  def test_could_use_defun_backend(self, activation, recurrent_activation,
+                                   recurrent_dropout, unroll, use_bias,
+                                   reset_after):
+    layer = keras.layers.UnifiedGRU(1,
+                                    activation=activation,
+                                    recurrent_activation=recurrent_activation,
+                                    recurrent_dropout=recurrent_dropout,
+                                    unroll=unroll,
+                                    use_bias=use_bias,
+                                    reset_after=reset_after)
+    self.assertFalse(layer.could_use_cudnn)
+
+  def test_keras_model_with_gru(self):
+    input_shape = 10
+    rnn_state_size = 8
+    output_shape = 8
+    timestep = 4
+    batch = 100
+    epoch = 10
+
+    (x_train, y_train), _ = testing_utils.get_test_data(
+        train_samples=batch,
+        test_samples=0,
+        input_shape=(timestep, input_shape),
+        num_classes=output_shape)
+    y_train = keras.utils.to_categorical(y_train, output_shape)
+
+    layer = keras.layers.UnifiedGRU(rnn_state_size)
+
+    inputs = keras.layers.Input(
+        shape=[timestep, input_shape], dtype=dtypes.float32)
+
+    outputs = layer(inputs)
+    model = keras.models.Model(inputs, outputs)
+    model.compile('rmsprop', loss='mse')
+    model.fit(x_train, y_train, epochs=epoch)
+    model.evaluate(x_train, y_train)
+    model.predict(x_train)
+
+  def test_dynamic_behavior_GRU(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    layer = keras.layers.UnifiedGRU(units, input_shape=(None, embedding_dim))
+    model = keras.models.Sequential()
+    model.add(layer)
+    model.compile(gradient_descent.GradientDescentOptimizer(0.001), 'mse')
+    x = np.random.random((num_samples, timesteps, embedding_dim))
+    y = np.random.random((num_samples, units))
+    model.train_on_batch(x, y)
+
+  def test_stacking_GRU(self):
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.UnifiedGRU(10, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedGRU(5, return_sequences=True, unroll=False))
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
+
+  def test_from_config_GRU(self):
+    layer_class = keras.layers.UnifiedGRU
+    for stateful in (False, True):
+      l1 = layer_class(units=1, stateful=stateful)
+      l2 = layer_class.from_config(l1.get_config())
+      assert l1.get_config() == l2.get_config()
+
+
+# TODO(scottzhu): Re-enable those tests in v2 mode once bugs attached are fixed.
+@test_util.run_v1_only
+class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
+
+  # b/120911602
+  def test_unified_gru_feature_parity_with_canonical_gru(self):
+    with context.eager_mode():
+      # Run this test under eager only due to b/120160788 for model.set_weights.
+      input_shape = 10
+      rnn_state_size = 8
+      timestep = 4
+      batch = 20
+
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=rnn_state_size)
+      y_train = keras.utils.to_categorical(y_train, rnn_state_size)
+
+      inputs = keras.layers.Input(
+          shape=[timestep, input_shape], dtype=dtypes.float32)
+      gru_layer = keras.layers.GRU(rnn_state_size,
+                                   recurrent_activation='sigmoid',
+                                   reset_after=True)
+      output = gru_layer(inputs)
+      gru_model = keras.models.Model(inputs, output)
+      weights = gru_model.get_weights()
+      y_1 = gru_model.predict(x_train)
+      gru_model.compile('rmsprop', 'mse')
+      gru_model.fit(x_train, y_train)
+      y_2 = gru_model.predict(x_train)
+
+      with test_util.device(use_gpu=True):
+        cudnn_layer = keras.layers.UnifiedGRU(rnn_state_size,
+                                              recurrent_activation='sigmoid',
+                                              reset_after=True)
+        cudnn_model = keras.models.Model(inputs, cudnn_layer(inputs))
+      cudnn_model.set_weights(weights)
+      y_3 = cudnn_model.predict(x_train)
+      cudnn_model.compile('rmsprop', 'mse')
+      cudnn_model.fit(x_train, y_train)
+      y_4 = cudnn_model.predict(x_train)
+
+      self.assertAllClose(y_1, y_3)
+      self.assertAllClose(y_2, y_4)
+
+  # b/120911602
+  @parameterized.named_parameters(
+      # test_name, use_bias, bias_initializer, activation
+      ('normal', True, 'zeros'),
+      ('no_bias', False, 'zeros'),
+      ('random_bias', True, 'random_uniform'),
+  )
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_unified_gru_model_save_load(self, use_bias, bias_initializer):
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir)
+    h5_path = os.path.join(temp_dir, 'test.h5')
+
+    batch = 10
+    timestep = 3
+    input_dim = 5
+    units = 2
+
+    x = np.random.random((batch, timestep, input_dim))
+
+    def build_model():
+      inputs = keras.layers.Input(
+          shape=[timestep, input_dim], dtype=dtypes.float32)
+      layer = keras.layers.UnifiedGRU(
+          units,
+          use_bias=use_bias,
+          bias_initializer=bias_initializer)
+      output = layer(inputs)
+      return keras.models.Model(inputs, output), layer
+
+    model, layer = build_model()
+    y_ref = model.predict(x)
+    model.save_weights(h5_path)
+
+    cloned_model, new_layer = build_model()
+    cloned_model.load_weights(h5_path)
+    y = cloned_model.predict(x)
+
+    self.assertAllClose(y, y_ref)
+    self.assertAllClose(layer.get_weights(), new_layer.get_weights())
+
+  # b/120911602
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_unified_gru_output_on_multiple_kernel(self):
+    input_shape = 10
+    rnn_state_size = 8
+    timestep = 4
+    batch = 100
+
+    x_train = np.random.random((batch, timestep, input_shape))
+
+    inputs = keras.layers.Input(
+        shape=[timestep, input_shape], dtype=dtypes.float32)
+    with test_util.device(use_gpu=False):
+      layer = keras.layers.UnifiedGRU(rnn_state_size)
+      output = layer(inputs)
+      cpu_model = keras.models.Model(inputs, output)
+      weights = cpu_model.get_weights()
+      y_1 = cpu_model.predict(x_train)
+
+    with test_util.device(use_gpu=True):
+      layer = keras.layers.UnifiedGRU(rnn_state_size)
+      output = layer(inputs)
+      gpu_model = keras.models.Model(inputs, output)
+      gpu_model.set_weights(weights)
+      y_2 = gpu_model.predict(x_train)
+
+    # Note that CuDNN uses 'sigmoid' as activation, so the unified GRU uses
+    # 'sigmoid' as default. Construct the canonical GRU with sigmoid to achieve
+    # the same output.
+    with test_util.device(use_gpu=True):
+      layer = keras.layers.GRU(rnn_state_size,
+                               recurrent_activation='sigmoid',
+                               reset_after=True)
+      output = layer(inputs)
+      canonical_model = keras.models.Model(inputs, output)
+      canonical_model.set_weights(weights)
+      y_3 = canonical_model.predict(x_train)
+
+    self.assertAllClose(y_1, y_2)
+    self.assertAllClose(y_2, y_3)
+
+  # b/120911602
+  @parameterized.named_parameters(
+      # test_name, time_major, go_backwards
+      ('normal', False, False),
+      ('time_major', True, False),
+      ('go_backwards', False, True),
+      ('both', True, True),
+  )
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_time_major_and_go_backward(self, time_major, go_backwards):
+    input_shape = 10
+    rnn_state_size = 8
+    timestep = 4
+    batch = 100
+
+    x_train = np.random.random((batch, timestep, input_shape))
+
+    def build_model(layer_cls):
+      inputs = keras.layers.Input(
+          shape=[timestep, input_shape], dtype=dtypes.float32)
+      layer = layer_cls(rnn_state_size,
+                        recurrent_activation='sigmoid',
+                        time_major=time_major,
+                        return_sequences=True,
+                        go_backwards=go_backwards,
+                        reset_after=True)
+      if time_major:
+        converted_input = keras.layers.Lambda(
+            lambda t: array_ops.transpose(t, [1, 0, 2]))(inputs)
+        outputs = layer(converted_input)
+        outputs = keras.layers.Lambda(
+            lambda t: array_ops.transpose(t, [1, 0, 2]))(outputs)
+      else:
+        outputs = layer(inputs)
+      return keras.models.Model(inputs, outputs)
+
+    gru_model = build_model(keras.layers.GRU)
+    y_ref = gru_model.predict(x_train)
+    weights = gru_model.get_weights()
+
+    unified_gru_model = build_model(keras.layers.UnifiedGRU)
+    unified_gru_model.set_weights(weights)
+    y = unified_gru_model.predict(x_train)
+
+    self.assertAllClose(y, y_ref)
+
+  # b/120911602
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_with_masking_layer_GRU(self):
+    layer_class = keras.layers.UnifiedGRU
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(input_shape=(3, 4)))
+    model.add(layer_class(units=5, return_sequences=True, unroll=False))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=gradient_descent.GradientDescentOptimizer(0.001))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
+
+  # b/120911602
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_masking_with_stacking_GRU(self):
+    inputs = np.random.random((2, 3, 4))
+    targets = np.abs(np.random.random((2, 3, 5)))
+    targets /= targets.sum(axis=-1, keepdims=True)
+    model = keras.models.Sequential()
+    model.add(keras.layers.Masking(input_shape=(3, 4)))
+    model.add(keras.layers.UnifiedGRU(10, return_sequences=True, unroll=False))
+    model.add(keras.layers.UnifiedGRU(5, return_sequences=True, unroll=False))
+    model.compile(
+        loss='categorical_crossentropy',
+        optimizer=gradient_descent.GradientDescentOptimizer(0.01))
+    model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
+
+  # b/120911602
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_return_sequences_GRU(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    testing_utils.layer_test(
+        keras.layers.UnifiedGRU,
+        kwargs={'units': units,
+                'return_sequences': True},
+        input_shape=(num_samples, timesteps, embedding_dim))
+
+  # b/120911602
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_dropout_GRU(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    testing_utils.layer_test(
+        keras.layers.UnifiedGRU,
+        kwargs={'units': units,
+                'dropout': 0.1,
+                'recurrent_dropout': 0.1},
+        input_shape=(num_samples, timesteps, embedding_dim))
+
+  # b/120911602
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_constraints_GRU(self):
+    embedding_dim = 4
+    layer_class = keras.layers.UnifiedGRU
+    k_constraint = keras.constraints.max_norm(0.01)
+    r_constraint = keras.constraints.max_norm(0.01)
+    b_constraint = keras.constraints.max_norm(0.01)
+    layer = layer_class(
+        5,
+        return_sequences=False,
+        weights=None,
+        input_shape=(None, embedding_dim),
+        kernel_constraint=k_constraint,
+        recurrent_constraint=r_constraint,
+        bias_constraint=b_constraint)
+    layer.build((None, None, embedding_dim))
+    self.assertEqual(layer.cell.kernel.constraint, k_constraint)
+    self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
+    self.assertEqual(layer.cell.bias.constraint, b_constraint)
+
+  # b/120911602
+  @parameterized.parameters([0, 1, 2])
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_implementation_mode_GRU(self, implementation_mode):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    testing_utils.layer_test(
+        keras.layers.UnifiedGRU,
+        kwargs={'units': units,
+                'implementation': implementation_mode},
+        input_shape=(num_samples, timesteps, embedding_dim))
+
+  # b/120911602
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_statefulness_GRU(self):
+    num_samples = 2
+    timesteps = 3
+    embedding_dim = 4
+    units = 2
+    layer_class = keras.layers.UnifiedGRU
+    model = keras.models.Sequential()
+    model.add(
+        keras.layers.Embedding(
+            4,
+            embedding_dim,
+            mask_zero=True,
+            input_length=timesteps,
+            batch_input_shape=(num_samples, timesteps)))
+    layer = layer_class(
+        units, return_sequences=False, stateful=True, weights=None)
+    model.add(layer)
+    model.compile(optimizer='sgd', loss='mse')
+    out1 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertEqual(out1.shape, (num_samples, units))
+
+    # train once so that the states change
+    model.train_on_batch(
+        np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
+    out2 = model.predict(np.ones((num_samples, timesteps)))
+
+    # if the state is not reset, output should be different
+    self.assertNotEqual(out1.max(), out2.max())
+
+    # check that output changes after states are reset
+    # (even though the model itself didn't change)
+    layer.reset_states()
+    out3 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertNotEqual(out2.max(), out3.max())
+
+    # check that container-level reset_states() works
+    model.reset_states()
+    out4 = model.predict(np.ones((num_samples, timesteps)))
+    np.testing.assert_allclose(out3, out4, atol=1e-5)
+
+    # check that the call to `predict` updated the states
+    out5 = model.predict(np.ones((num_samples, timesteps)))
+    self.assertNotEqual(out4.max(), out5.max())
+
+    # Check masking
+    layer.reset_states()
+
+    left_padded_input = np.ones((num_samples, timesteps))
+    left_padded_input[0, :1] = 0
+    left_padded_input[1, :2] = 0
+    out6 = model.predict(left_padded_input)
+
+    layer.reset_states()
+
+    right_padded_input = np.ones((num_samples, timesteps))
+    right_padded_input[0, -1:] = 0
+    right_padded_input[1, -2:] = 0
+    out7 = model.predict(right_padded_input)
+
+    np.testing.assert_allclose(out7, out6, atol=1e-5)
+
+
+class GRULayerGraphOnlyTest(test.TestCase):
+
+  # Need session for test
+  @test_util.run_deprecated_v1
+  def test_unifiedGRU(self):
+    input_shape = 10
+    rnn_state_size = 8
+    output_shape = 8
+    timestep = 4
+    batch = 100
+    epoch = 1
+
+    with self.cached_session(config=_config, use_gpu=True) as sess:
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=output_shape)
+      y_train = keras.utils.to_categorical(y_train, output_shape)
+
+      layer = keras.layers.UnifiedGRU(rnn_state_size, return_runtime=True)
+
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
+      predict = array_ops.placeholder(
+          dtypes.float32, shape=(None, output_shape), name='predict')
+
+      outputs, runtime = layer(inputs)
+      loss = losses.softmax_cross_entropy(predict, outputs)
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      train_op = optimizer.minimize(loss)
+
+      sess.run([variables.global_variables_initializer()])
+      existing_loss = 0
+      for _ in range(epoch):
+        loss_value, _, runtime_value = sess.run([loss, train_op, runtime], {
+            inputs: x_train,
+            predict: y_train
+        })
+        if test.is_gpu_available():
+          self.assertEqual(runtime_value, b'cudnn')
+        else:
+          self.assertEqual(runtime_value, b'cpu')
+        # Make sure the loss is updated for every epoch
+        # (layer weights properly updated).
+        self.assertNotEqual(existing_loss, loss_value)
+        existing_loss = loss_value
+
+  # Need session for test
+  @test_util.run_deprecated_v1
+  def test_UnifiedGRU_with_cond(self):
+    # This test is to demonstrate the graph rewrite of grappler plugin under
+    # the condition that the function returns different number of internal
+    # states.
+    input_shape = 10
+    rnn_state_size = 8
+    output_shape = 8
+    timestep = 4
+    batch = 100
+    epoch = 1
+
+    with self.cached_session(config=_config, use_gpu=True) as sess:
+      (x_train, y_train), _ = testing_utils.get_test_data(
+          train_samples=batch,
+          test_samples=0,
+          input_shape=(timestep, input_shape),
+          num_classes=output_shape)
+      y_train = keras.utils.to_categorical(y_train, output_shape)
+
+      layer = keras.layers.UnifiedGRU(rnn_state_size, return_runtime=True)
+
+      inputs = array_ops.placeholder(
+          dtypes.float32, shape=(None, timestep, input_shape), name='inputs')
+      predict = array_ops.placeholder(
+          dtypes.float32, shape=(None, output_shape), name='predict')
+
+      zeros = array_ops.zeros([batch, output_shape])
+      dummy_runtime = constant_op.constant(
+          'unknown', dtype=dtypes.string, name='runtime')
+      a = constant_op.constant(0)
+      b = constant_op.constant(1)
+      # Will always run the GRU layer.
+      outputs, runtime = control_flow_ops.cond(
+          gen_math_ops.less(a, b),
+          lambda: layer(inputs),
+          lambda: (zeros, dummy_runtime))
+      loss = losses.softmax_cross_entropy(predict, outputs)
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      train_op = optimizer.minimize(loss)
+
+      sess.run([variables.global_variables_initializer()])
+      existing_loss = 0
+
+      for _ in range(epoch):
+        loss_value, _, runtime_value = sess.run([loss, train_op, runtime], {
+            inputs: x_train,
+            predict: y_train
+        })
+        if test.is_gpu_available():
+          self.assertEqual(runtime_value, b'cudnn')
+        else:
+          self.assertEqual(runtime_value, b'cpu')
+        # Make sure the loss is updated for every epoch
+        # (layer weights properly updated).
+        self.assertNotEqual(existing_loss, loss_value)
+        existing_loss = loss_value
+
+  # b/120919032
+  @test_util.run_deprecated_v1
+  def test_regularizers_GRU(self):
+    embedding_dim = 4
+    layer_class = keras.layers.UnifiedGRU
+    with self.cached_session(config=_config):
+      layer = layer_class(
+          5,
+          return_sequences=False,
+          weights=None,
+          input_shape=(None, embedding_dim),
+          kernel_regularizer=keras.regularizers.l1(0.01),
+          recurrent_regularizer=keras.regularizers.l1(0.01),
+          bias_regularizer='l2',
+          activity_regularizer='l1')
+      layer.build((None, None, 2))
+      self.assertEqual(len(layer.losses), 3)
+
+      x = keras.backend.variable(np.ones((2, 3, 2)))
+      layer(x)
+      self.assertEqual(len(layer.get_losses_for(x)), 1)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
deleted file mode 100644
index 2eba3fb954..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-cu-d-n-n-g-r-u.pbtxt
+++ /dev/null
@@ -1,201 +0,0 @@
-path: "tensorflow.keras.layers.CuDNNGRU"
-tf_class {
-  is_instance: "<class \'tensorflow.python.keras.layers.cudnn_recurrent.CuDNNGRU\'>"
-  is_instance: "<class \'tensorflow.python.keras.layers.cudnn_recurrent._CuDNNRNN\'>"
-  is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
-  is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
-  is_instance: "<class \'tensorflow.python.training.checkpointable.base.CheckpointableBase\'>"
-  is_instance: "<type \'object\'>"
-  member {
-    name: "activity_regularizer"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "cell"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dtype"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "dynamic"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "inbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "input_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "losses"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "name"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "non_trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "outbound_nodes"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_mask"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "output_shape"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "states"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "trainable_weights"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "updates"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "variables"
-    mtype: "<type \'property\'>"
-  }
-  member {
-    name: "weights"
-    mtype: "<type \'property\'>"
-  }
-  member_method {
-    name: "__init__"
-    argspec: "args=[\'self\', \'units\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\'], varargs=None, keywords=kwargs, defaults=[\'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'False\', \'False\', \'False\', \'False\'], "
-  }
-  member_method {
-    name: "add_loss"
-    argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_metric"
-    argspec: "args=[\'self\', \'value\', \'aggregation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], "
-  }
-  member_method {
-    name: "add_update"
-    argspec: "args=[\'self\', \'updates\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "add_variable"
-    argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "add_weight"
-    argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], "
-  }
-  member_method {
-    name: "apply"
-    argspec: "args=[\'self\', \'inputs\'], varargs=args, keywords=kwargs, defaults=None"
-  }
-  member_method {
-    name: "build"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "call"
-    argspec: "args=[\'self\', \'inputs\', \'mask\', \'training\', \'initial_state\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], "
-  }
-  member_method {
-    name: "compute_mask"
-    argspec: "args=[\'self\', \'inputs\', \'mask\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "compute_output_shape"
-    argspec: "args=[\'self\', \'input_shape\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "count_params"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "from_config"
-    argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_config"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_initial_state"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_input_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_losses_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "get_output_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_mask_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_output_shape_at"
-    argspec: "args=[\'self\', \'node_index\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_updates_for"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "get_weights"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
-  }
-  member_method {
-    name: "reset_states"
-    argspec: "args=[\'self\', \'states\'], varargs=None, keywords=None, defaults=[\'None\'], "
-  }
-  member_method {
-    name: "set_weights"
-    argspec: "args=[\'self\', \'weights\'], varargs=None, keywords=None, defaults=None"
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
index f8c0dbb273..df2ea3fbe9 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.-g-r-u.pbtxt
@@ -1,5 +1,6 @@
 path: "tensorflow.keras.layers.GRU"
 tf_class {
+  is_instance: "<class \'tensorflow.python.keras.layers.recurrent.UnifiedGRU\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.GRU\'>"
   is_instance: "<class \'tensorflow.python.keras.layers.recurrent.RNN\'>"
   is_instance: "<class \'tensorflow.python.keras.engine.base_layer.Layer\'>"
@@ -159,7 +160,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'hard_sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\'], "
+    argspec: "args=[\'self\', \'units\', \'activation\', \'recurrent_activation\', \'use_bias\', \'kernel_initializer\', \'recurrent_initializer\', \'bias_initializer\', \'kernel_regularizer\', \'recurrent_regularizer\', \'bias_regularizer\', \'activity_regularizer\', \'kernel_constraint\', \'recurrent_constraint\', \'bias_constraint\', \'dropout\', \'recurrent_dropout\', \'implementation\', \'return_sequences\', \'return_state\', \'go_backwards\', \'stateful\', \'unroll\', \'time_major\', \'reset_after\'], varargs=None, keywords=kwargs, defaults=[\'tanh\', \'sigmoid\', \'True\', \'glorot_uniform\', \'orthogonal\', \'zeros\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'0.0\', \'0.0\', \'1\', \'False\', \'False\', \'False\', \'False\', \'False\', \'False\', \'True\'], "
   }
   member_method {
     name: "add_loss"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
index 10ac3a7520..e84c9a2a8f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.layers.pbtxt
@@ -112,10 +112,6 @@ tf_module {
     name: "Cropping3D"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "CuDNNGRU"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "Dense"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index 2763a0ca63..ba72d1d202 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -243,6 +243,7 @@ renames = {
     'tf.is_strictly_increasing': 'tf.math.is_strictly_increasing',
     'tf.is_variable_initialized': 'tf.compat.v1.is_variable_initialized',
     'tf.keras.backend.get_session': 'tf.compat.v1.keras.backend.get_session',
+    'tf.keras.layers.CuDNNGRU': 'tf.compat.v1.keras.layers.CuDNNGRU',
     'tf.keras.layers.CuDNNLSTM': 'tf.compat.v1.keras.layers.CuDNNLSTM',
     'tf.layers.AveragePooling1D': 'tf.compat.v1.layers.AveragePooling1D',
     'tf.layers.AveragePooling2D': 'tf.compat.v1.layers.AveragePooling2D',
-- 
GitLab


From c152e8c1a8a27dda8d733493a64c3598d196053d Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Fri, 14 Dec 2018 16:11:19 -0800
Subject: [PATCH 637/873] Automated rollback of commit
 d19a8c92a23db3cc5c2ae74a8443b16d6d48eb13

PiperOrigin-RevId: 225620261
---
 tensorflow/contrib/compiler/BUILD       |   8 +-
 tensorflow/contrib/compiler/xla_test.py | 340 ------------------------
 2 files changed, 2 insertions(+), 346 deletions(-)

diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index 0897728272..e4566437c6 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -75,21 +75,17 @@ tf_py_test(
     srcs = ["xla_test.py"],
     additional_deps = [
         ":xla",
-        "@absl_py//absl/testing:parameterized",
-        "//tensorflow/contrib/tpu:tpu_estimator",
-        "//tensorflow/contrib/tpu:tpu_lib",
-        "//tensorflow/python:client_testlib",
+        "@six_archive//:six",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:control_flow_util",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
+        "//tensorflow/contrib/tpu:tpu_lib",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:summary",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
-        "//tensorflow/python/data/ops:dataset_ops",
     ],
     tags = ["no_pip"],
-    xla_enabled = True,
 )
diff --git a/tensorflow/contrib/compiler/xla_test.py b/tensorflow/contrib/compiler/xla_test.py
index a85b2dd155..3b49755afc 100644
--- a/tensorflow/contrib/compiler/xla_test.py
+++ b/tensorflow/contrib/compiler/xla_test.py
@@ -18,19 +18,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import re
-from absl.testing import parameterized
-
 from tensorflow.contrib.compiler import xla
-from tensorflow.contrib.tpu.python.tpu import tpu_estimator
 from tensorflow.contrib.tpu.python.tpu import tpu_feed
-from tensorflow.contrib.training.python.training import hparam
 from tensorflow.python import summary
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import logging_ops
@@ -38,14 +30,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
-from tensorflow.python.training import training
-
-
-_TRAIN = model_fn_lib.ModeKeys.TRAIN
-_EVAL = model_fn_lib.ModeKeys.EVAL
-_EXPECTED_LOSS = 1
-_EXPECTED_FEATURE = 2
-_EXPECTED_LABEL = 3
 
 
 class XLACompileContextTest(test.TestCase):
@@ -268,329 +252,5 @@ class CheckFunctionArgumentCountTest(test.TestCase):
                      xla.check_function_argument_count(func, 0, queue))
 
 
-def _test_train_model_fn(features, labels, mode, params):
-  """A dummy model_fn for testing purpose."""
-  del features, labels, params
-  loss = constant_op.constant(_EXPECTED_LOSS)
-  return model_fn_lib.EstimatorSpec(
-      mode=mode, loss=loss, train_op=array_ops.identity(loss))
-
-
-@xla.estimator_model_fn
-def decorated_model_fn(features, labels, mode, params):
-  return _test_train_model_fn(features, labels, mode, params)
-
-
-def make_dummy_features_labels():
-  # XLA CPU/GPU backend doesn't support guaranteed constant, thus use dataset
-  # container to work around.
-  features_dataset = dataset_ops.Dataset.from_tensors(
-      constant_op.constant(_EXPECTED_FEATURE)).repeat(10)
-  features_op = features_dataset.make_one_shot_iterator().get_next()
-  labels_dataset = dataset_ops.Dataset.from_tensors(
-      constant_op.constant(_EXPECTED_LABEL)).repeat(10)
-  labels_op = labels_dataset.make_one_shot_iterator().get_next()
-  return features_op, labels_op
-
-
-class XlaDecoratorTest(test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      ('test_use_as_decorator', decorated_model_fn, None),
-      ('test_use_as_function', xla.estimator_model_fn(_test_train_model_fn),
-       None),
-      ('test_use_tpu_false_hparams', decorated_model_fn,
-       hparam.HParams(use_tpu=False)),
-      ('test_use_tpu_false_dict_params', decorated_model_fn, {
-          'use_tpu': False
-      }),
-  )
-  def test_compile(self, model_fn, params):
-    """Calls model_fn and verifies it is compiled."""
-    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
-      loss = constant_op.constant(_EXPECTED_LOSS)
-      mock_xla_compile.return_value = [loss]
-
-      features, labels = make_dummy_features_labels()
-      estimator_spec = model_fn(
-          features=features, labels=labels, mode=_TRAIN, params=params or {})
-
-      mock_xla_compile.assert_called_once()
-      self.assertEqual(estimator_spec.mode, _TRAIN)
-
-      with self.test_session() as sess:
-        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
-        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))
-
-  @parameterized.named_parameters(
-      ('test_use_tpu_true_hparams', decorated_model_fn,
-       hparam.HParams(use_tpu=True)),
-      ('test_use_tpu_true_dict_params', decorated_model_fn, {
-          'use_tpu': True
-      }),
-  )
-  def test_not_compile(self, model_fn, params):
-    """Calls model_fn and verifies it is NOT compiled."""
-    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
-      loss = constant_op.constant(_EXPECTED_LOSS)
-      mock_xla_compile.return_value = [loss]
-
-      features, labels = make_dummy_features_labels()
-      estimator_spec = model_fn(
-          features=features, labels=labels, mode=_TRAIN, params=params or {})
-
-      mock_xla_compile.assert_not_called()
-      self.assertEqual(estimator_spec.mode, _TRAIN)
-
-      with self.test_session() as sess:
-        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
-        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))
-
-  def test_model_with_summary(self):
-    """Tests that summary ops are disabled."""
-
-    @xla.estimator_model_fn
-    def model_fn_with_summary(features, labels, mode, params):
-      del features, labels, params
-      loss = constant_op.constant(_EXPECTED_LOSS)
-      summary.scalar('loss_scalar_summary', loss)
-      summary.histogram('loss_histogram_summary', loss)
-      summary.image('loss_image_summary', loss)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode, loss=loss, train_op=array_ops.identity(loss))
-
-    features, labels = make_dummy_features_labels()
-    estimator_spec = model_fn_with_summary(
-        features=features, labels=labels, mode=_TRAIN, params={})
-
-    with self.test_session() as sess:
-      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
-
-
-def _test_eval_metric_fn(eval_tensor_1, eval_tensor_2):
-  return {
-      'metric_1': (eval_tensor_1, eval_tensor_1),
-      'metric_2': (eval_tensor_2, eval_tensor_2),
-  }
-
-
-class XlaDecoratorEvaluationTest(test.TestCase):
-
-  def _verify_evaluation_result(self, eval_model_fn):
-    features, labels = make_dummy_features_labels()
-    estimator_spec = eval_model_fn(
-        features=features, labels=labels, mode=_EVAL, params={})
-
-    with self.test_session() as sess:
-      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
-      self.assertEqual(
-          sess.run(estimator_spec.eval_metric_ops['metric_1'][0]),
-          _EXPECTED_FEATURE + _EXPECTED_LABEL)
-      self.assertEqual(
-          sess.run(estimator_spec.eval_metric_ops['metric_1'][1]),
-          _EXPECTED_FEATURE + _EXPECTED_LABEL)
-      self.assertEqual(
-          sess.run(estimator_spec.eval_metric_ops['metric_2'][0]),
-          _EXPECTED_FEATURE - _EXPECTED_LABEL)
-      self.assertEqual(
-          sess.run(estimator_spec.eval_metric_ops['metric_2'][1]),
-          _EXPECTED_FEATURE - _EXPECTED_LABEL)
-
-  def test_eval_base_estimator_spec_eval_metric_ops_disallowed(self):
-
-    @xla.estimator_model_fn
-    def eval_model_fn_return_estimator_spec(features, labels, mode, params):
-      del features, labels, params
-      loss = constant_op.constant(_EXPECTED_LOSS)
-      return model_fn_lib.EstimatorSpec(
-          mode=mode,
-          loss=loss,
-          eval_metric_ops={
-              'metric': (array_ops.identity(loss), control_flow_ops.no_op())
-          })
-
-    with self.assertRaisesRegexp(
-        ValueError, 'EstimatorSpec.eval_metric_ops is not supported with XLA '
-        'compilation. Please use TPUEstimatorSpec.eval_metrics instead.'):
-      self._verify_evaluation_result(eval_model_fn_return_estimator_spec)
-
-  def test_eval_base_estimator_spec_no_eval_metric_ops(self):
-
-    @xla.estimator_model_fn
-    def eval_model_fn_no_eval_metric_ops(features, labels, mode, params):
-      del features, labels, params
-      return model_fn_lib.EstimatorSpec(
-          mode=mode, loss=constant_op.constant(_EXPECTED_LOSS))
-
-    features, labels = make_dummy_features_labels()
-    estimator_spec = eval_model_fn_no_eval_metric_ops(
-        features=features, labels=labels, mode=_EVAL, params={})
-    with self.test_session() as sess:
-      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
-
-  def test_eval_no_eval_metrics(self):
-
-    @xla.estimator_model_fn
-    def eval_model_fn_no_eval_metrics(features, labels, mode, params):
-      del features, labels, params
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode, loss=constant_op.constant(_EXPECTED_LOSS))
-
-    features, labels = make_dummy_features_labels()
-    estimator_spec = eval_model_fn_no_eval_metrics(
-        features=features, labels=labels, mode=_EVAL, params={})
-
-    self.assertEqual(estimator_spec.eval_metric_ops, {})
-    with self.test_session() as sess:
-      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
-
-  def test_eval_fn_missing_input_tensor(self):
-
-    @xla.estimator_model_fn
-    def eval_model_fn(features, labels, mode, params):
-      del params
-      dummy_eval_metric_fn_tensors_dict = {
-          'eval_tensor_1': features + labels,
-      }
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(_EXPECTED_LOSS),
-          eval_metrics=(_test_eval_metric_fn,
-                        dummy_eval_metric_fn_tensors_dict))
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        re.escape("Arguments ['eval_tensor_2'] are needed by metric_fn (first "
-                  'element of TPUEstimatorSpec.eval_metrics) but they are not '
-                  'provided by evaluation tensors (second element of '
-                  'TPUEstimatorSpec.eval_metrics).')):
-      self._verify_evaluation_result(eval_model_fn)
-
-  def test_eval_fn_extraneous_input_tensor(self):
-
-    @xla.estimator_model_fn
-    def eval_model_fn(features, labels, mode, params):
-      del params
-      dummy_eval_metric_fn_tensors_dict = {
-          'eval_tensor_1': features + labels,
-          'eval_tensor_2': features - labels,
-          'extra_tensor': features * 2 - labels,
-      }
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(_EXPECTED_LOSS),
-          eval_metrics=(_test_eval_metric_fn,
-                        dummy_eval_metric_fn_tensors_dict))
-
-    with self.assertRaisesRegexp(
-        ValueError,
-        re.escape("Arguments ['extra_tensor'] are provided by evaluation "
-                  'tensors (second element of TPUEstimatorSpec.eval_metrics) '
-                  'but they are not needed by metric_fn (first element of '
-                  'TPUEstimatorSpec.eval_metrics).')):
-      self._verify_evaluation_result(eval_model_fn)
-
-  def test_eval_tensors_as_list(self):
-
-    @xla.estimator_model_fn
-    def eval_model_fn(features, labels, mode, params):
-      del params
-      dummy_eval_metric_fn_tensors = [features + labels, features - labels]
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(_EXPECTED_LOSS),
-          eval_metrics=(_test_eval_metric_fn, dummy_eval_metric_fn_tensors))
-
-    self._verify_evaluation_result(eval_model_fn)
-
-  def test_eval_tensors_as_dict(self):
-
-    @xla.estimator_model_fn
-    def eval_model_fn(features, labels, mode, params):
-      del params
-      dummy_eval_metric_fn_tensors_dict = {
-          'eval_tensor_1': features + labels,
-          'eval_tensor_2': features - labels,
-      }
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(_EXPECTED_LOSS),
-          eval_metrics=(_test_eval_metric_fn,
-                        dummy_eval_metric_fn_tensors_dict))
-
-    self._verify_evaluation_result(eval_model_fn)
-
-  def test_model_with_summary(self):
-    """Tests that summary ops are disabled."""
-
-    @xla.estimator_model_fn
-    def model_fn_with_summary(features, labels, mode, params):
-      del features, labels, params
-      loss = constant_op.constant(_EXPECTED_LOSS)
-      summary.scalar('loss_scalar_summary', loss)
-      summary.histogram('loss_histogram_summary', loss)
-      summary.image('loss_image_summary', loss)
-      return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss)
-
-    features, labels = make_dummy_features_labels()
-    estimator_spec = model_fn_with_summary(
-        features=features, labels=labels, mode=_EVAL, params={})
-
-    with self.test_session() as sess:
-      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
-
-
-class XlaDecoratorScaffoldTest(test.TestCase, parameterized.TestCase):
-
-  def _make_scaffold_fn(self, mode):
-
-    def _scaffold_fn_on_cpu():
-      scaffold = training.Scaffold()
-      self.assertNotIn(mode, self.is_scaffold_fn_called)
-      self.is_scaffold_fn_called[mode] = True
-      return scaffold
-
-    return _scaffold_fn_on_cpu
-
-  def test_scaffold_fn_return_none(self):
-
-    @xla.estimator_model_fn
-    def model_fn(features, labels, mode, params):
-      del features, labels, params
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(_EXPECTED_LOSS),
-          train_op=control_flow_ops.no_op(),
-          scaffold_fn=lambda: None)
-
-    features, labels = make_dummy_features_labels()
-    with self.assertRaisesRegexp(
-        ValueError,
-        'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed'):
-      model_fn(features=features, labels=labels, mode=_TRAIN, params={})
-
-  @parameterized.named_parameters(
-      ('train_mode', _TRAIN),
-      ('eval_mode', _EVAL),
-      # TODO(ycao): Add predict_mode test after PREDICT mode is implemented.
-  )
-  def test_scaffold_fn_in_mode(self, mode):
-
-    @xla.estimator_model_fn
-    def model_fn(features, labels, mode, params):
-      del features, labels, params
-      return tpu_estimator.TPUEstimatorSpec(
-          mode=mode,
-          loss=constant_op.constant(_EXPECTED_LOSS),
-          train_op=control_flow_ops.no_op(),
-          scaffold_fn=self._make_scaffold_fn(mode))
-
-    features, labels = make_dummy_features_labels()
-
-    self.is_scaffold_fn_called = {}
-    model_fn(features=features, labels=labels, mode=mode, params={})
-    self.assertTrue(self.is_scaffold_fn_called[mode])
-
-
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 98c0deb828c2f98f0d6d77a12d32f3b33ed92887 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Fri, 14 Dec 2018 16:36:42 -0800
Subject: [PATCH 638/873] Disable automatic dependency tracking for the legacy
 tf.layers.Layer class.

PiperOrigin-RevId: 225623461
---
 tensorflow/python/layers/base.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py
index bfe591f875..5354d437b4 100644
--- a/tensorflow/python/layers/base.py
+++ b/tensorflow/python/layers/base.py
@@ -26,6 +26,7 @@ from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.engine import base_layer_utils
 from tensorflow.python.ops import variable_scope as vs
 from tensorflow.python.ops import variables as tf_variables
+from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import function_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_contextlib
@@ -551,6 +552,10 @@ class Layer(base_layer.Layer):
         setattr(result, k, copy.deepcopy(v, memo))
     return result
 
+  def __setattr__(self, value, name):
+    # By-pass the automatic dependency tracking performed by the parent Layer.
+    super(checkpointable.CheckpointableBase, self).__setattr__(value, name)
+
 
 def _add_elements_to_collection(elements, collection_list):
   if context.executing_eagerly():
-- 
GitLab


From 212d8cd51cd2afbd8361270e6cef2bf4cb164f10 Mon Sep 17 00:00:00 2001
From: Katherine Wu <kathywu@google.com>
Date: Fri, 14 Dec 2018 16:44:26 -0800
Subject: [PATCH 639/873] Move Keras to SavedModel code and some SavedModel
 utils into core.

Overall:
- Moved functions and constants from various parts of the Estimator codebase
- Exposed the save/load code as tf.keras.experimental.export and tf.keras.experimental.load_from_saved_model

In upcoming changes:
- Move code from keras/engine/saving.py into the keras/saving folder
- Resolve circular dependency between SavedModels and Keras, so saving can be added into the engine target.
- Remove copied fuctions from tensorflow_estimator

PiperOrigin-RevId: 225624440
---
 tensorflow/python/keras/BUILD                 |   6 +-
 tensorflow/python/keras/__init__.py           |   2 +
 tensorflow/python/keras/saving/BUILD          |  69 +++
 tensorflow/python/keras/saving/__init__.py    |  21 +
 tensorflow/python/keras/saving/saved_model.py | 418 ++++++++++++++
 .../python/keras/saving/saved_model_test.py   | 539 ++++++++++++++++++
 .../python/keras/utils/metrics_utils.py       |  77 +++
 .../python/saved_model/model_utils/BUILD      | 100 ++++
 .../saved_model/model_utils/__init__.py       |  28 +
 .../saved_model/model_utils/export_output.py  | 407 +++++++++++++
 .../model_utils/export_output_test.py         | 405 +++++++++++++
 .../saved_model/model_utils/export_test.py    | 257 +++++++++
 .../saved_model/model_utils/export_utils.py   | 340 +++++++++++
 .../v1/tensorflow.keras.experimental.pbtxt    |   8 +
 .../v2/tensorflow.keras.experimental.pbtxt    |   8 +
 15 files changed, 2684 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/python/keras/saving/BUILD
 create mode 100644 tensorflow/python/keras/saving/__init__.py
 create mode 100644 tensorflow/python/keras/saving/saved_model.py
 create mode 100644 tensorflow/python/keras/saving/saved_model_test.py
 create mode 100644 tensorflow/python/keras/utils/metrics_utils.py
 create mode 100644 tensorflow/python/saved_model/model_utils/BUILD
 create mode 100644 tensorflow/python/saved_model/model_utils/__init__.py
 create mode 100644 tensorflow/python/saved_model/model_utils/export_output.py
 create mode 100644 tensorflow/python/saved_model/model_utils/export_output_test.py
 create mode 100644 tensorflow/python/saved_model/model_utils/export_test.py
 create mode 100644 tensorflow/python/saved_model/model_utils/export_utils.py

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 082c9ebfb3..3b3986dc2f 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -61,10 +61,13 @@ py_library(
         ":engine",
         ":layers",
         ":pil_for_keras",
+        "@keras_applications_archive//:keras_applications",
         "//tensorflow/python:training",
         "//tensorflow/python/keras/optimizer_v2",
+        # TODO(kathywu): move saving into engine after resolving circular
+        # dependencies between Keras and SavedModel
+        "//tensorflow/python/keras/saving",
         "//tensorflow/python/saved_model",
-        "@keras_applications_archive//:keras_applications",
     ],
 )
 
@@ -145,6 +148,7 @@ py_library(
         "utils/data_utils.py",
         "utils/io_utils.py",
         "utils/losses_utils.py",
+        "utils/metrics_utils.py",
     ],
     srcs_version = "PY2AND3",
     deps = [
diff --git a/tensorflow/python/keras/__init__.py b/tensorflow/python/keras/__init__.py
index be46a894e1..2a6de2de88 100644
--- a/tensorflow/python/keras/__init__.py
+++ b/tensorflow/python/keras/__init__.py
@@ -41,6 +41,8 @@ from tensorflow.python.keras import wrappers
 from tensorflow.python.keras.layers import Input
 from tensorflow.python.keras.models import Model
 from tensorflow.python.keras.models import Sequential
+from tensorflow.python.keras.saving.saved_model import export
+from tensorflow.python.keras.saving.saved_model import load_from_saved_model
 
 from tensorflow.python.util.tf_export import tf_export
 
diff --git a/tensorflow/python/keras/saving/BUILD b/tensorflow/python/keras/saving/BUILD
new file mode 100644
index 0000000000..1ab7aca58e
--- /dev/null
+++ b/tensorflow/python/keras/saving/BUILD
@@ -0,0 +1,69 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Description:
+#   Keras saving and loading libraries.
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+package(default_visibility = ["//tensorflow:__subpackages__"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "saving",
+    srcs = ["__init__.py"],
+    deps = [":saved_model"],
+)
+
+py_library(
+    name = "saved_model",
+    srcs = ["saved_model.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:lib",
+        "//tensorflow/python:metrics",
+        "//tensorflow/python:mode_keys",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:saver",
+        "//tensorflow/python:util",
+        "//tensorflow/python/keras:engine",
+        "//tensorflow/python/saved_model",
+        "//tensorflow/python/saved_model/model_utils",
+    ],
+)
+
+py_test(
+    name = "saved_model_test",
+    size = "medium",
+    srcs = ["saved_model_test.py"],
+    srcs_version = "PY2AND3",
+    tags = [
+        "no_oss",  # TODO(b/119349471): Re-enable
+        "no_windows",
+    ],
+    deps = [
+        ":saved_model",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:mode_keys",
+        "//tensorflow/python/keras",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
diff --git a/tensorflow/python/keras/saving/__init__.py b/tensorflow/python/keras/saving/__init__.py
new file mode 100644
index 0000000000..8ff9f3b74e
--- /dev/null
+++ b/tensorflow/python/keras/saving/__init__.py
@@ -0,0 +1,21 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utils for saving a Keras Model or Estimator to the SavedModel format."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.keras.saving.saved_model import export
+from tensorflow.python.keras.saving.saved_model import load_from_saved_model
diff --git a/tensorflow/python/keras/saving/saved_model.py b/tensorflow/python/keras/saving/saved_model.py
new file mode 100644
index 0000000000..2b83f321c2
--- /dev/null
+++ b/tensorflow/python/keras/saving/saved_model.py
@@ -0,0 +1,418 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=protected-access
+"""Utility functions to save/load keras Model to/from SavedModel."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import six
+
+from tensorflow.python.client import session
+from tensorflow.python.framework import ops
+from tensorflow.python.keras import backend as K
+from tensorflow.python.keras import models as models_lib
+from tensorflow.python.keras import optimizers
+from tensorflow.python.keras.engine import sequential
+from tensorflow.python.keras.engine import training_utils
+from tensorflow.python.keras.metrics import Metric
+from tensorflow.python.keras.models import model_from_json
+from tensorflow.python.keras.utils import metrics_utils
+from tensorflow.python.lib.io import file_io
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import builder as saved_model_builder
+from tensorflow.python.saved_model import constants
+from tensorflow.python.saved_model import model_utils
+from tensorflow.python.saved_model import save as save_lib
+from tensorflow.python.saved_model import utils_impl as saved_model_utils
+from tensorflow.python.training import mode_keys
+from tensorflow.python.training import saver as saver_lib
+from tensorflow.python.training.checkpointable import util as checkpointable_utils
+from tensorflow.python.util import compat
+from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import tf_export
+
+
+@tf_export('keras.experimental.export')
+def export(
+    model, saved_model_path, custom_objects=None, as_text=None,
+    input_signature=None, serving_only=False):
+  """Saves a `tf.keras.Model` into Tensorflow SavedModel format.
+
+  `save_model` generates new files/folders under the `saved_model_path` folder:
+  1) a checkpoint containing the model weights.
+  2) a saved_model.pb file containing the model's MetaGraphs. The prediction
+     graph is always exported. The evaluaton and training graphs are exported
+     if the following conditions are met:
+     - Evaluation: model loss is defined.
+     - Training: model is compiled with an optimizer defined under `tf.train`.
+       This is because `tf.keras.optimizers.Optimizer` instances cannot be
+       saved to checkpoints.
+  3) Model's json configuration, if model.get_config() has been implemented.
+     This file can be used to reload the model using
+     tf.keras.models.model_from_json(). Note that if any custom objects were
+     used, they should be passed to the `custom_object` argument when loading
+     the model.
+
+  Model limitations:
+  - Sequential and functional models can always be saved.
+  - Subclassed models can only be saved when `serving_only=True`. This is due to
+    the current implementation copying the model in order to export the training
+    and evaluation graphs. Because the topology of subclassed models cannot be
+    determined, the subclassed models cannot be cloned. Subclassed models will
+    be entirely exportable in the future.
+
+  Note that each mode is exported in separate graphs, so different modes do not
+  share variables. To use the train graph with evaluation or prediction graphs,
+  create a new checkpoint if variable values have been updated.
+
+  Example:
+
+  ```python
+  import tensorflow as tf
+
+  # Create a tf.keras model.
+  model = tf.keras.Sequential()
+  model.add(tf.keras.layers.Dense(1, input_shape=[10]))
+  model.summary()
+
+  # Save the tf.keras model in the SavedModel format.
+  saved_to_path = tf.keras.experimental.export(
+        model, '/tmp/my_simple_tf_keras_saved_model')
+
+  # Load the saved keras model back.
+  model_prime = tf.keras.experimental.load_from_saved_model(saved_to_path)
+  model_prime.summary()
+  ```
+
+  Args:
+    model: A `tf.keras.Model` to be saved. If the model is subclassed, the flag
+      `serving_only` must be set to True.
+    saved_model_path: a string specifying the path to the SavedModel directory.
+      The SavedModel will be saved to a timestamped folder created within this
+      directory.
+    custom_objects: Optional dictionary mapping string names to custom classes
+      or functions (e.g. custom loss functions).
+    as_text: whether to write the `SavedModel` proto in text format. Currently
+      unavailable in serving-only mode.
+    input_signature: A possibly nested sequence of `tf.TensorSpec` objects, used
+      to specify the expected model inputs. `input_signature`'s nested structure
+      should match the expected nested structure of the inputs to the model. If
+      this is not set, this function will attempt to infer the input shapes and
+      dtypes from the model. Note that if the model is subclassed, the tensor
+      inputs to the call function should be nested in the first argument (this
+      is a general requirement for using subclassed models with Keras functions
+      .fit(), .predict(), etc.).
+    serving_only: Export only the outputs produced from calling the model in
+      predict mode. The losses, optimizer, and other training configurations are
+      not saved. If the SavedModel will only be used for serving (rather than
+      retraining), or if the model is subclassed, this can be set to True.
+
+  Returns:
+    String path to the SavedModel folder, a subdirectory of `saved_model_path`.
+
+  Raises:
+    NotImplementedError: If the model is a subclassed model, and serving_only is
+      False.
+    ValueError: If the input signature cannot be inferred from the model.
+  """
+  export_dir = model_utils.get_timestamped_export_dir(saved_model_path)
+
+  if serving_only:
+    save_lib.save(
+        model, export_dir,
+        signatures=training_utils.trace_model_call(model, input_signature))
+  else:
+    _save_v1_format(model, export_dir, custom_objects, as_text, input_signature)
+
+  try:
+    _export_model_json(model, export_dir)
+  except NotImplementedError:
+    logging.warning('Skipped saving model JSON, subclassed model does not have '
+                    'get_config() defined.')
+
+  return export_dir
+
+
+def _export_model_json(model, saved_model_path):
+  """Saves model configuration as a json string under assets folder."""
+  model_json = model.to_json()
+  model_json_filepath = os.path.join(
+      saved_model_utils.get_or_create_assets_dir(saved_model_path),
+      compat.as_text(constants.SAVED_MODEL_FILENAME_JSON))
+  file_io.write_string_to_file(model_json_filepath, model_json)
+
+
+def _export_model_variables(model, saved_model_path):
+  """Saves model weights in checkpoint format under variables folder."""
+  saved_model_utils.get_or_create_variables_dir(saved_model_path)
+  checkpoint_prefix = saved_model_utils.get_variables_path(saved_model_path)
+  model.save_weights(checkpoint_prefix, save_format='tf', overwrite=True)
+  return checkpoint_prefix
+
+
+def _save_v1_format(model, path, custom_objects, as_text, input_signature):
+  """Exports model to v1 SavedModel format."""
+  if not model._is_graph_network:
+    if isinstance(model, sequential.Sequential):
+      # If input shape is not directly set in the model, the exported model
+      # will infer the expected shapes of the input from the model.
+      if not model.built and input_signature is None:
+        raise ValueError(
+            'Sequential model\'s input shape is unknown. Please build the '
+            'model, or use the input_signature argument to specify the '
+            'model inputs.')
+    else:
+      raise NotImplementedError(
+          'Subclassed models can only be exported for serving. Please set '
+          'argument serving_only=True.')
+
+  builder = saved_model_builder._SavedModelBuilder(path)
+
+  # Manually save variables to export them in an object-based checkpoint. This
+  # skips the `builder.add_meta_graph_and_variables()` step, which saves a
+  # named-based checkpoint.
+  # TODO(b/113134168): Add fn to Builder to save with object-based saver.
+  # TODO(b/113178242): This should only export the model json structure. Only
+  # one save is needed once the weights can be copied from the model to clone.
+  checkpoint_path = _export_model_variables(model, path)
+
+  # Export each mode. Use ModeKeys enums defined for `Estimator` to ensure that
+  # Keras models and `Estimator`s are exported with the same format.
+  # Every time a mode is exported, the code checks to see if new variables have
+  # been created (e.g. optimizer slot variables). If that is the case, the
+  # checkpoint is re-saved to include the new variables.
+  export_args = {'builder': builder,
+                 'model': model,
+                 'custom_objects': custom_objects,
+                 'checkpoint_path': checkpoint_path,
+                 'input_signature': input_signature}
+
+  has_saved_vars = False
+  if model.optimizer:
+    # TODO(kathywu): Verify this works with v2 optimizer.
+    if isinstance(model.optimizer, optimizers.TFOptimizer):
+      _export_mode(mode_keys.ModeKeys.TRAIN, has_saved_vars, **export_args)
+      has_saved_vars = True
+      _export_mode(mode_keys.ModeKeys.TEST, has_saved_vars, **export_args)
+    else:
+      logging.warning(
+          'Model was compiled with an optimizer, but the optimizer is not from '
+          '`tf.train` (e.g. `tf.train.AdagradOptimizer`). Only the serving '
+          'graph was exported. The train and evaluate graphs were not added to '
+          'the SavedModel.')
+  _export_mode(mode_keys.ModeKeys.PREDICT, has_saved_vars, **export_args)
+
+  builder.save(as_text)
+
+
+def _get_var_list(model):
+  """Returns list of all checkpointed saveable objects in the model."""
+  return checkpointable_utils.named_saveables(model)
+
+
+def create_placeholder(spec):
+  return K.placeholder(shape=spec.shape, dtype=spec.dtype, name=spec.name)
+
+
+def _export_mode(
+    mode, has_saved_vars, builder, model, custom_objects, checkpoint_path,
+    input_signature):
+  """Exports a model, and optionally saves new vars from the clone model.
+
+  Args:
+    mode: A `tf.estimator.ModeKeys` string.
+    has_saved_vars: A `boolean` indicating whether the SavedModel has already
+      exported variables.
+    builder: A `SavedModelBuilder` object.
+    model: A `tf.keras.Model` object.
+    custom_objects: A dictionary mapping string names to custom classes
+      or functions.
+    checkpoint_path: String path to checkpoint.
+    input_signature: Nested TensorSpec containing the expected inputs. Can be
+      `None`, in which case the signature will be inferred from the model.
+
+  Raises:
+    ValueError: If the train/eval mode is being exported, but the model does
+      not have an optimizer.
+  """
+  compile_clone = (mode != mode_keys.ModeKeys.PREDICT)
+  if compile_clone and not model.optimizer:
+    raise ValueError(
+        'Model does not have an optimizer. Cannot export mode %s' % mode)
+
+  model_graph = ops.get_default_graph()
+  with ops.Graph().as_default() as g:
+
+    K.set_learning_phase(mode == mode_keys.ModeKeys.TRAIN)
+
+    if input_signature is None:
+      input_tensors = None
+    else:
+      input_tensors = nest.map_structure(create_placeholder, input_signature)
+
+    # Clone the model into blank graph. This will create placeholders for inputs
+    # and targets.
+    clone = models_lib.clone_and_build_model(
+        model, input_tensors=input_tensors, custom_objects=custom_objects,
+        compile_clone=compile_clone)
+
+    # Make sure that iterations variable is added to the global step collection,
+    # to ensure that, when the SavedModel graph is loaded, the iterations
+    # variable is returned by `tf.train.get_global_step()`. This is required for
+    # compatibility with the SavedModelEstimator.
+    if compile_clone:
+      g.add_to_collection(ops.GraphKeys.GLOBAL_STEP, clone.optimizer.iterations)
+
+    # Extract update and train ops from train/test/predict functions.
+    train_op = None
+    if mode == mode_keys.ModeKeys.TRAIN:
+      clone._make_train_function()
+      train_op = clone.train_function.updates_op
+    elif mode == mode_keys.ModeKeys.TEST:
+      clone._make_test_function()
+    else:
+      clone._make_predict_function()
+    g.get_collection_ref(ops.GraphKeys.UPDATE_OPS).extend(clone.state_updates)
+
+    clone_var_list = checkpointable_utils.named_saveables(clone)
+
+    with session.Session().as_default():
+      if has_saved_vars:
+        # Confirm all variables in the clone have an entry in the checkpoint.
+        status = clone.load_weights(checkpoint_path)
+        status.assert_existing_objects_matched()
+      else:
+        # Confirm that variables between the clone and model match up exactly,
+        # not counting optimizer objects. Optimizer objects are ignored because
+        # if the model has not trained, the slot variables will not have been
+        # created yet.
+        # TODO(b/113179535): Replace with checkpointable equivalence.
+        _assert_same_non_optimizer_objects(model, model_graph, clone, g)
+
+        # TODO(b/113178242): Use value transfer for checkpointable objects.
+        clone.load_weights(checkpoint_path)
+
+        # Add graph and variables to SavedModel.
+        # TODO(b/113134168): Switch to add_meta_graph_and_variables.
+        clone.save_weights(checkpoint_path, save_format='tf', overwrite=True)
+        builder._has_saved_variables = True
+
+    # Add graph to the SavedModel builder.
+    builder.add_meta_graph(
+        model_utils.EXPORT_TAG_MAP[mode],
+        signature_def_map=_create_signature_def_map(clone, mode),
+        saver=saver_lib.Saver(clone_var_list),
+        init_op=variables.local_variables_initializer(),
+        train_op=train_op)
+    return None
+
+
+def _create_signature_def_map(model, mode):
+  """Creates a SignatureDef map from a Keras model."""
+  inputs_dict = {name: x for name, x in zip(model.input_names, model.inputs)}
+  if model.optimizer:
+    targets_dict = {x.name.split(':')[0]: x
+                    for x in model.targets if x is not None}
+    inputs_dict.update(targets_dict)
+  outputs_dict = {name: x
+                  for name, x in zip(model.output_names, model.outputs)}
+  metrics = metrics_utils.extract_model_metrics_as_v1_metrics(model)
+
+  # Add metric variables to the `LOCAL_VARIABLES` collection. Metric variables
+  # are by default not added to any collections. We are doing this here, so
+  # that metric variables get initialized.
+  local_vars = set(ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))
+  vars_to_add = set()
+  if metrics is not None:
+    for key, value in six.iteritems(metrics):
+      if isinstance(value, Metric):
+        vars_to_add.update(value.variables)
+        # Convert Metric instances to (value_tensor, update_op) tuple.
+        metrics[key] = (value.result(), value.updates[0])
+  # Remove variables that are in the local variables collection already.
+  vars_to_add = vars_to_add.difference(local_vars)
+  for v in vars_to_add:
+    ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, v)
+
+  export_outputs = model_utils.export_outputs_for_mode(
+      mode,
+      predictions=outputs_dict,
+      loss=model.total_loss if model.optimizer else None,
+      metrics=metrics)
+  return model_utils.build_all_signature_defs(
+      inputs_dict,
+      export_outputs=export_outputs,
+      serving_only=(mode == mode_keys.ModeKeys.PREDICT))
+
+
+def _assert_same_non_optimizer_objects(model, model_graph, clone, clone_graph):  # pylint: disable=unused-argument
+  """Asserts model and clone contain the same checkpointable objects."""
+
+  # TODO(fchollet, kathywu): make sure this works in eager mode.
+  return True
+
+
+@tf_export('keras.experimental.load_from_saved_model')
+def load_from_saved_model(saved_model_path):
+  """Loads a keras.Model from a SavedModel created by keras export().
+
+  This function reinstantiates model state by:
+  1) loading model topology from json (this will eventually come
+     from metagraph).
+  2) loading model weights from checkpoint.
+
+  Example:
+
+  ```python
+  import tensorflow as tf
+
+  # Create a tf.keras model.
+  model = tf.keras.Sequential()
+  model.add(tf.keras.layers.Dense(1, input_shape=[10]))
+  model.summary()
+
+  # Save the tf.keras model in the SavedModel format.
+  saved_to_path = tf.keras.experimental.export(
+        model, '/tmp/my_simple_tf_keras_saved_model')
+
+  # Load the saved keras model back.
+  model_prime = tf.keras.experimental.load_from_saved_model(saved_to_path)
+  model_prime.summary()
+  ```
+
+  Args:
+    saved_model_path: a string specifying the path to an existing SavedModel.
+
+  Returns:
+    a keras.Model instance.
+  """
+  # restore model topology from json string
+  model_json_filepath = os.path.join(
+      compat.as_bytes(saved_model_path),
+      compat.as_bytes(constants.ASSETS_DIRECTORY),
+      compat.as_bytes(constants.SAVED_MODEL_FILENAME_JSON))
+  model_json = file_io.read_file_to_string(model_json_filepath)
+  model = model_from_json(model_json)
+
+  # restore model weights
+  checkpoint_prefix = os.path.join(
+      compat.as_text(saved_model_path),
+      compat.as_text(constants.VARIABLES_DIRECTORY),
+      compat.as_text(constants.VARIABLES_FILENAME))
+  model.load_weights(checkpoint_prefix)
+  return model
diff --git a/tensorflow/python/keras/saving/saved_model_test.py b/tensorflow/python/keras/saving/saved_model_test.py
new file mode 100644
index 0000000000..8063b8af4d
--- /dev/null
+++ b/tensorflow/python/keras/saving/saved_model_test.py
@@ -0,0 +1,539 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=protected-access
+"""Tests for saving/loading function for keras Model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import shutil
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.client import session
+from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.saving import saved_model as keras_saved_model
+from tensorflow.python.keras.utils import tf_utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import loader_impl
+from tensorflow.python.saved_model import model_utils
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.training import mode_keys
+from tensorflow.python.training import training as training_module
+
+
+class TestModelSavingandLoading(test.TestCase):
+
+  def _save_model_dir(self, dirname='saved_model'):
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+    return os.path.join(temp_dir, dirname)
+
+  def test_saving_sequential_model(self):
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(keras.layers.Dense(2, input_shape=(3,)))
+      model.add(keras.layers.RepeatVector(3))
+      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
+      model.compile(
+          loss=keras.losses.MSE,
+          optimizer=keras.optimizers.RMSprop(lr=0.0001),
+          metrics=[keras.metrics.categorical_accuracy],
+          sample_weight_mode='temporal')
+      x = np.random.random((1, 3))
+      y = np.random.random((1, 3, 3))
+      model.train_on_batch(x, y)
+
+      ref_y = model.predict(x)
+
+      temp_saved_model = self._save_model_dir()
+      output_path = keras_saved_model.export(model, temp_saved_model)
+
+      loaded_model = keras_saved_model.load_from_saved_model(output_path)
+      y = loaded_model.predict(x)
+      self.assertAllClose(ref_y, y, atol=1e-05)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_saving_sequential_model_without_compile(self):
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(keras.layers.Dense(2, input_shape=(3,)))
+      model.add(keras.layers.RepeatVector(3))
+      model.add(keras.layers.TimeDistributed(keras.layers.Dense(3)))
+
+      x = np.random.random((1, 3))
+      ref_y = model.predict(x)
+
+      temp_saved_model = self._save_model_dir()
+      output_path = keras_saved_model.export(model, temp_saved_model)
+      loaded_model = keras_saved_model.load_from_saved_model(output_path)
+
+      y = loaded_model.predict(x)
+      self.assertAllClose(ref_y, y, atol=1e-05)
+
+  def test_saving_functional_model(self):
+    with self.cached_session():
+      inputs = keras.layers.Input(shape=(3,))
+      x = keras.layers.Dense(2)(inputs)
+      output = keras.layers.Dense(3)(x)
+
+      model = keras.models.Model(inputs, output)
+      model.compile(
+          loss=keras.losses.MSE,
+          optimizer=keras.optimizers.RMSprop(lr=0.0001),
+          metrics=[keras.metrics.categorical_accuracy])
+      x = np.random.random((1, 3))
+      y = np.random.random((1, 3))
+      model.train_on_batch(x, y)
+
+      ref_y = model.predict(x)
+
+      temp_saved_model = self._save_model_dir()
+      output_path = keras_saved_model.export(model, temp_saved_model)
+      loaded_model = keras_saved_model.load_from_saved_model(output_path)
+
+      y = loaded_model.predict(x)
+      self.assertAllClose(ref_y, y, atol=1e-05)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_saving_functional_model_without_compile(self):
+    with self.cached_session():
+      inputs = keras.layers.Input(shape=(3,))
+      x = keras.layers.Dense(2)(inputs)
+      output = keras.layers.Dense(3)(x)
+
+      model = keras.models.Model(inputs, output)
+
+      x = np.random.random((1, 3))
+      y = np.random.random((1, 3))
+
+      ref_y = model.predict(x)
+
+      temp_saved_model = self._save_model_dir()
+      output_path = keras_saved_model.export(model, temp_saved_model)
+      loaded_model = keras_saved_model.load_from_saved_model(output_path)
+
+      y = loaded_model.predict(x)
+      self.assertAllClose(ref_y, y, atol=1e-05)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_saving_with_tf_optimizer(self):
+    with self.cached_session():
+      model = keras.models.Sequential()
+      model.add(keras.layers.Dense(2, input_shape=(3,)))
+      model.add(keras.layers.Dense(3))
+      model.compile(
+          loss='mse',
+          optimizer=training_module.RMSPropOptimizer(0.1),
+          metrics=['acc'])
+
+      x = np.random.random((1, 3))
+      y = np.random.random((1, 3))
+      model.train_on_batch(x, y)
+      ref_y = model.predict(x)
+
+      temp_saved_model = self._save_model_dir()
+      output_path = keras_saved_model.export(model, temp_saved_model)
+      loaded_model = keras_saved_model.load_from_saved_model(output_path)
+      loaded_model.compile(
+          loss='mse',
+          optimizer=training_module.RMSPropOptimizer(0.1),
+          metrics=['acc'])
+      y = loaded_model.predict(x)
+      self.assertAllClose(ref_y, y, atol=1e-05)
+
+      # test that new updates are the same with both models
+      x = np.random.random((1, 3))
+      y = np.random.random((1, 3))
+
+      ref_loss = model.train_on_batch(x, y)
+      loss = loaded_model.train_on_batch(x, y)
+      self.assertAllClose(ref_loss, loss, atol=1e-05)
+
+      ref_y = model.predict(x)
+      y = loaded_model.predict(x)
+      self.assertAllClose(ref_y, y, atol=1e-05)
+
+      # test saving/loading again
+      temp_saved_model2 = self._save_model_dir('saved_model_2')
+      output_path2 = keras_saved_model.export(
+          loaded_model, temp_saved_model2)
+      loaded_model = keras_saved_model.load_from_saved_model(output_path2)
+      y = loaded_model.predict(x)
+      self.assertAllClose(ref_y, y, atol=1e-05)
+
+  def test_saving_subclassed_model_raise_error(self):
+    # For now, saving subclassed model should raise an error. It should be
+    # avoided later with loading from SavedModel.pb.
+
+    class SubclassedModel(training.Model):
+
+      def __init__(self):
+        super(SubclassedModel, self).__init__()
+        self.layer1 = keras.layers.Dense(3)
+        self.layer2 = keras.layers.Dense(1)
+
+      def call(self, inp):
+        return self.layer2(self.layer1(inp))
+
+    model = SubclassedModel()
+
+    temp_saved_model = self._save_model_dir()
+    with self.assertRaises(NotImplementedError):
+      keras_saved_model.export(model, temp_saved_model)
+
+
+class LayerWithLearningPhase(keras.engine.base_layer.Layer):
+
+  def call(self, x):
+    phase = keras.backend.learning_phase()
+    output = tf_utils.smart_cond(
+        phase, lambda: x * 0, lambda: array_ops.identity(x))
+    if not context.executing_eagerly():
+      output._uses_learning_phase = True  # pylint: disable=protected-access
+    return output
+
+  def compute_output_shape(self, input_shape):
+    return input_shape
+
+
+def functional_model(uses_learning_phase=True):
+  inputs = keras.layers.Input(shape=(3,))
+  x = keras.layers.Dense(2)(inputs)
+  x = keras.layers.Dense(3)(x)
+  if uses_learning_phase:
+    x = LayerWithLearningPhase()(x)
+  return keras.models.Model(inputs, x)
+
+
+def sequential_model(uses_learning_phase=True):
+  model = keras.models.Sequential()
+  model.add(keras.layers.Dense(2, input_shape=(3,)))
+  model.add(keras.layers.Dense(3))
+  if uses_learning_phase:
+    model.add(LayerWithLearningPhase())
+  return model
+
+
+def sequential_model_without_input_shape(uses_learning_phase=True):
+  model = keras.models.Sequential()
+  model.add(keras.layers.Dense(2))
+  model.add(keras.layers.Dense(3))
+  if uses_learning_phase:
+    model.add(LayerWithLearningPhase())
+  return model
+
+
+class Subclassed(keras.models.Model):
+
+  def __init__(self):
+    super(Subclassed, self).__init__()
+    self.dense1 = keras.layers.Dense(2)
+    self.dense2 = keras.layers.Dense(3)
+
+  def call(self, inputs):
+    x = self.dense1(inputs)
+    x = self.dense2(x)
+    return x
+
+
+def subclassed_model():
+  return Subclassed()
+
+
+def load_model(sess, path, mode):
+  tags = model_utils.EXPORT_TAG_MAP[mode]
+  if mode == mode_keys.ModeKeys.PREDICT:
+    sig_def_key = signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+  else:
+    sig_def_key = mode
+
+  meta_graph_def = loader_impl.load(sess, tags, path)
+  inputs = {
+      k: sess.graph.get_tensor_by_name(v.name)
+      for k, v in meta_graph_def.signature_def[sig_def_key].inputs.items()}
+  outputs = {
+      k: sess.graph.get_tensor_by_name(v.name)
+      for k, v in meta_graph_def.signature_def[sig_def_key].outputs.items()}
+  return inputs, outputs, meta_graph_def
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class TestModelSavedModelExport(test.TestCase, parameterized.TestCase):
+
+  def _save_model_dir(self, dirname='saved_model'):
+    temp_dir = self.get_temp_dir()
+    self.addCleanup(shutil.rmtree, temp_dir, ignore_errors=True)
+    return os.path.join(temp_dir, dirname)
+
+  @parameterized.parameters(
+      {
+          'model_builder': functional_model,
+          'uses_learning_phase': True,
+          'optimizer': training_module.AdadeltaOptimizer(),
+          'train_before_export': True},
+      {
+          'model_builder': functional_model,
+          'uses_learning_phase': True,
+          'optimizer': training_module.AdadeltaOptimizer(),
+          'train_before_export': False},
+      {
+          'model_builder': functional_model,
+          'uses_learning_phase': False,
+          'optimizer': None,
+          'train_before_export': False},
+      {
+          'model_builder': sequential_model,
+          'uses_learning_phase': True,
+          'optimizer': training_module.AdadeltaOptimizer(),
+          'train_before_export': True},
+      {
+          'model_builder': sequential_model,
+          'uses_learning_phase': True,
+          'optimizer': training_module.AdadeltaOptimizer(),
+          'train_before_export': False},
+      {
+          'model_builder': sequential_model,
+          'uses_learning_phase': False,
+          'optimizer': None,
+          'train_before_export': False},
+      {
+          'model_builder': sequential_model_without_input_shape,
+          'uses_learning_phase': True,
+          'optimizer': training_module.AdadeltaOptimizer(),
+          'train_before_export': False})
+  def testSaveAndLoadSavedModelExport(
+      self, model_builder, uses_learning_phase, optimizer, train_before_export):
+    saved_model_path = self._save_model_dir()
+    with self.session(graph=ops.Graph()):
+      np.random.seed(130)
+      input_arr = np.random.random((1, 3))
+      target_arr = np.random.random((1, 3))
+
+      model = model_builder(uses_learning_phase)
+      if optimizer is not None:
+        model.compile(
+            loss='mse',
+            optimizer=optimizer,
+            metrics=['mae'])
+        if train_before_export:
+          model.train_on_batch(input_arr, target_arr)
+
+        ref_loss, ref_mae = model.evaluate(input_arr, target_arr)
+
+      ref_predict = model.predict(input_arr)
+
+      # Export SavedModel
+      output_path = keras_saved_model.export(model, saved_model_path)
+
+    input_name = model.input_names[0]
+    output_name = model.output_names[0]
+    target_name = output_name + '_target'
+
+    # Load predict graph, and test predictions
+    with session.Session(graph=ops.Graph()) as sess:
+      inputs, outputs, _ = load_model(sess, output_path,
+                                      mode_keys.ModeKeys.PREDICT)
+
+      predictions = sess.run(outputs[output_name],
+                             {inputs[input_name]: input_arr})
+      self.assertAllClose(ref_predict, predictions, atol=1e-05)
+
+    if optimizer:
+      # Load eval graph, and test predictions, loss and metric values
+      with session.Session(graph=ops.Graph()) as sess:
+        inputs, outputs, _ = load_model(sess, output_path,
+                                        mode_keys.ModeKeys.TEST)
+
+        # First obtain the loss and predictions, and run the metric update op by
+        # feeding in the inputs and targets.
+        loss, predictions, _ = sess.run(
+            (outputs['loss'], outputs['predictions/' + output_name],
+             outputs['metrics/mean_absolute_error/update_op']), {
+                 inputs[input_name]: input_arr,
+                 inputs[target_name]: target_arr
+             })
+
+        # The metric value should be run after the update op, to ensure that it
+        # reflects the correct value.
+        metric_value = sess.run(outputs['metrics/mean_absolute_error/value'])
+
+        self.assertEqual(int(train_before_export),
+                         sess.run(training_module.get_global_step()))
+        self.assertAllClose(ref_loss, loss, atol=1e-05)
+        self.assertAllClose(ref_mae, metric_value, atol=1e-05)
+        self.assertAllClose(ref_predict, predictions, atol=1e-05)
+
+      # Load train graph, and check for the train op, and prediction values
+      with session.Session(graph=ops.Graph()) as sess:
+        inputs, outputs, meta_graph_def = load_model(
+            sess, output_path, mode_keys.ModeKeys.TRAIN)
+        self.assertEqual(int(train_before_export),
+                         sess.run(training_module.get_global_step()))
+        self.assertIn('loss', outputs)
+        self.assertIn('metrics/mean_absolute_error/update_op', outputs)
+        self.assertIn('metrics/mean_absolute_error/value', outputs)
+        self.assertIn('predictions/' + output_name, outputs)
+
+        # Train for a step
+        train_op = loader_impl.get_train_op(meta_graph_def)
+        train_outputs, _ = sess.run(
+            [outputs, train_op], {inputs[input_name]: input_arr,
+                                  inputs[target_name]: target_arr})
+        self.assertEqual(int(train_before_export) + 1,
+                         sess.run(training_module.get_global_step()))
+
+        if uses_learning_phase:
+          self.assertAllClose(
+              [[0, 0, 0]], train_outputs['predictions/' + output_name],
+              atol=1e-05)
+        else:
+          self.assertNotAllClose(
+              [[0, 0, 0]], train_outputs['predictions/' + output_name],
+              atol=1e-05)
+
+  def testSaveAndLoadSavedModelWithCustomObject(self):
+    saved_model_path = self._save_model_dir()
+    with session.Session(graph=ops.Graph()) as sess:
+      def relu6(x):
+        return keras.backend.relu(x, max_value=6)
+      inputs = keras.layers.Input(shape=(1,))
+      outputs = keras.layers.Activation(relu6)(inputs)
+      model = keras.models.Model(inputs, outputs)
+      output_path = keras_saved_model.export(
+          model, saved_model_path, custom_objects={'relu6': relu6})
+    with session.Session(graph=ops.Graph()) as sess:
+      inputs, outputs, _ = load_model(sess, output_path,
+                                      mode_keys.ModeKeys.PREDICT)
+      input_name = model.input_names[0]
+      output_name = model.output_names[0]
+      predictions = sess.run(
+          outputs[output_name], {inputs[input_name]: [[7], [-3], [4]]})
+      self.assertAllEqual([[6], [0], [4]], predictions)
+
+  def testAssertModelCloneSameObjectsIgnoreOptimizer(self):
+    input_arr = np.random.random((1, 3))
+    target_arr = np.random.random((1, 3))
+
+    model_graph = ops.Graph()
+    clone_graph = ops.Graph()
+
+    # Create two models with the same layers but different optimizers.
+    with session.Session(graph=model_graph):
+      inputs = keras.layers.Input(shape=(3,))
+      x = keras.layers.Dense(2)(inputs)
+      x = keras.layers.Dense(3)(x)
+      model = keras.models.Model(inputs, x)
+
+      model.compile(loss='mse', optimizer=training_module.AdadeltaOptimizer())
+      model.train_on_batch(input_arr, target_arr)
+
+    with session.Session(graph=clone_graph):
+      inputs = keras.layers.Input(shape=(3,))
+      x = keras.layers.Dense(2)(inputs)
+      x = keras.layers.Dense(3)(x)
+      clone = keras.models.Model(inputs, x)
+      clone.compile(loss='mse', optimizer=keras.optimizers.RMSprop(lr=0.0001))
+      clone.train_on_batch(input_arr, target_arr)
+
+    keras_saved_model._assert_same_non_optimizer_objects(
+        model, model_graph, clone, clone_graph)
+
+  def testAssertModelCloneSameObjectsThrowError(self):
+    input_arr = np.random.random((1, 3))
+    target_arr = np.random.random((1, 3))
+
+    model_graph = ops.Graph()
+    clone_graph = ops.Graph()
+
+    # Create two models with the same layers but different optimizers.
+    with session.Session(graph=model_graph):
+      inputs = keras.layers.Input(shape=(3,))
+      x = keras.layers.Dense(2)(inputs)
+      x = keras.layers.Dense(3)(x)
+      model = keras.models.Model(inputs, x)
+
+      model.compile(loss='mse', optimizer=training_module.AdadeltaOptimizer())
+      model.train_on_batch(input_arr, target_arr)
+
+    with session.Session(graph=clone_graph):
+      inputs = keras.layers.Input(shape=(3,))
+      x = keras.layers.Dense(2)(inputs)
+      x = keras.layers.Dense(4)(x)
+      x = keras.layers.Dense(3)(x)
+      clone = keras.models.Model(inputs, x)
+      clone.compile(loss='mse', optimizer=keras.optimizers.RMSprop(lr=0.0001))
+      clone.train_on_batch(input_arr, target_arr)
+
+  def testSaveSequentialModelWithoutInputShapes(self):
+    model = sequential_model_without_input_shape(True)
+    # A Sequential model that hasn't been built should raise an error.
+    with self.assertRaisesRegexp(ValueError, 'Please build the model'):
+      keras_saved_model.export(model, '')
+
+    saved_model_path = self._save_model_dir()
+    output_path = keras_saved_model.export(
+        model, saved_model_path,
+        input_signature=tensor_spec.TensorSpec(shape=(10, 11, 12, 13, 14),
+                                               dtype=dtypes.float32,
+                                               name='spec_input'))
+
+    with session.Session(graph=ops.Graph()) as sess:
+      inputs, outputs, _ = load_model(sess, output_path,
+                                      mode_keys.ModeKeys.PREDICT)
+      self.assertEqual(5, inputs[next(iter(inputs.keys()))].shape.ndims)
+      self.assertEqual(5, outputs[next(iter(outputs.keys()))].shape.ndims)
+      self.assertEqual(3, outputs[next(iter(outputs.keys()))].shape[-1])
+
+  @parameterized.parameters(
+      {
+          'model_builder': sequential_model_without_input_shape,
+          'input_signature': [tensor_spec.TensorSpec(shape=[None, 3],
+                                                     dtype=dtypes.float32)]},
+      {
+          'model_builder': subclassed_model,
+          'input_signature': [tensor_spec.TensorSpec(shape=[None, 3],
+                                                     dtype=dtypes.float32)]})
+  def testServingOnly(self, model_builder, input_signature):
+    if context.executing_eagerly():
+      saved_model_path = self._save_model_dir()
+      input_arr = np.random.random((5, 3)).astype(np.float32)
+      model = model_builder()
+      ref_predict = model.predict(input_arr)
+
+      output_path = keras_saved_model.export(
+          model, saved_model_path, serving_only=True,
+          input_signature=input_signature)
+
+      # Load predict graph, and test predictions
+      with session.Session(graph=ops.Graph()) as sess:
+        inputs, outputs, _ = load_model(sess, output_path,
+                                        mode_keys.ModeKeys.PREDICT)
+        predictions = sess.run(outputs[next(iter(outputs.keys()))],
+                               {inputs[next(iter(inputs.keys()))]: input_arr})
+        self.assertAllClose(ref_predict, predictions, atol=1e-05)
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/keras/utils/metrics_utils.py b/tensorflow/python/keras/utils/metrics_utils.py
new file mode 100644
index 0000000000..431d107091
--- /dev/null
+++ b/tensorflow/python/keras/utils/metrics_utils.py
@@ -0,0 +1,77 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=protected-access
+"""Utils related to keras metrics.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import six
+
+from tensorflow.python.keras import metrics
+from tensorflow.python.ops import metrics as metrics_module
+
+
+def extract_model_metrics_as_v1_metrics(model):
+  """Convert metrics from a Keras model to (value, update) ops.
+
+  This is used for converting Keras models to Estimators and SavedModels.
+
+  Args:
+    model: A `tf.keras.Model` object.
+
+  Returns:
+    Dictionary mapping metric names to tuples of (value, update) ops. May return
+    `None` if the model does not contain any metrics.
+  """
+  if not getattr(model, 'metrics', None):
+    return None
+
+  eval_metric_ops = {}
+
+  def get_metric_name(metric):
+    if isinstance(metric, metrics.Metric):
+      return metric.name
+    if callable(metric):
+      return metric.__name__
+    assert isinstance(metric, six.string_types)
+    return metric
+
+  # When each metric maps to an output
+  if isinstance(model.metrics, dict):
+    for i, output_name in enumerate(model.metrics.keys()):
+      # `metric` is the user given metric value in `compile`. This can be
+      # metric name (`acc`), metric function (binary_accuracy) or a metric
+      # object (BinaryAccuracy()).
+      metric = model.metrics[output_name]
+      metric_name = get_metric_name(metric)
+      # When some outputs use the same metric
+      if list(model.metrics.values()).count(metric_name) > 1:
+        metric_name += '_' + output_name
+      if isinstance(metric, metrics.Metric):
+        eval_metric_ops[metric_name] = metric
+      else:
+        eval_metric_ops[metric_name] = metrics_module.mean(
+            model.metrics_tensors[i - len(model.metrics)])
+  else:
+    for i, metric in enumerate(model.metrics):
+      metric_name = get_metric_name(metric)
+      if isinstance(metric, metrics.Metric):
+        eval_metric_ops[metric_name] = metric
+      else:
+        eval_metric_ops[metric_name] = metrics_module.mean(
+            model.metrics_tensors[i])
+  return eval_metric_ops
diff --git a/tensorflow/python/saved_model/model_utils/BUILD b/tensorflow/python/saved_model/model_utils/BUILD
new file mode 100644
index 0000000000..192a610fd2
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/BUILD
@@ -0,0 +1,100 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Description:
+#   Keras saving and loading libraries.
+load("//tensorflow:tensorflow.bzl", "py_test")
+
+package(default_visibility = ["//tensorflow:__subpackages__"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "model_utils",
+    srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":export_output",
+        ":export_utils",
+    ],
+)
+
+py_library(
+    name = "export_output",
+    srcs = ["export_output.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python/saved_model:signature_def_utils",
+    ],
+)
+
+py_test(
+    name = "export_output_test",
+    srcs = ["export_output_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":export_output",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/keras",
+        "//tensorflow/python/saved_model:signature_constants",
+    ],
+)
+
+py_library(
+    name = "export_utils",
+    srcs = ["export_utils.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":export_output",
+        "//tensorflow/python:mode_keys",
+        "//tensorflow/python:platform",
+        "//tensorflow/python:util",
+        "//tensorflow/python/saved_model:signature_constants",
+        "//tensorflow/python/saved_model:signature_def_utils",
+        "//tensorflow/python/saved_model:tag_constants",
+    ],
+)
+
+py_test(
+    name = "export_test",
+    srcs = ["export_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":export_utils",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:constant_op",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python/keras",
+        "//tensorflow/python/saved_model:signature_constants",
+        "//tensorflow/python/saved_model:signature_def_utils",
+    ],
+)
diff --git a/tensorflow/python/saved_model/model_utils/__init__.py b/tensorflow/python/saved_model/model_utils/__init__.py
new file mode 100644
index 0000000000..84540badb4
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/__init__.py
@@ -0,0 +1,28 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utils for saving a Keras Model or Estimator to the SavedModel format."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# pylint: disable=wildcard-import
+from tensorflow.python.saved_model.model_utils.export_output import *
+from tensorflow.python.saved_model.model_utils.export_utils import build_all_signature_defs
+from tensorflow.python.saved_model.model_utils.export_utils import export_outputs_for_mode
+from tensorflow.python.saved_model.model_utils.export_utils import EXPORT_TAG_MAP
+from tensorflow.python.saved_model.model_utils.export_utils import get_export_outputs
+from tensorflow.python.saved_model.model_utils.export_utils import get_temp_export_dir
+from tensorflow.python.saved_model.model_utils.export_utils import get_timestamped_export_dir
+# pylint: enable=wildcard-import
diff --git a/tensorflow/python/saved_model/model_utils/export_output.py b/tensorflow/python/saved_model/model_utils/export_output.py
new file mode 100644
index 0000000000..b571bad067
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/export_output.py
@@ -0,0 +1,407 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Classes for different types of export output."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+
+import six
+
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.saved_model import signature_def_utils
+
+
+class ExportOutput(object):
+  """Represents an output of a model that can be served.
+
+  These typically correspond to model heads.
+  """
+
+  __metaclass__ = abc.ABCMeta
+
+  _SEPARATOR_CHAR = '/'
+
+  @abc.abstractmethod
+  def as_signature_def(self, receiver_tensors):
+    """Generate a SignatureDef proto for inclusion in a MetaGraphDef.
+
+    The SignatureDef will specify outputs as described in this ExportOutput,
+    and will use the provided receiver_tensors as inputs.
+
+    Args:
+      receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying
+        input nodes that will be fed.
+    """
+    pass
+
+  def _check_output_key(self, key, error_label):
+    # For multi-head models, the key can be a tuple.
+    if isinstance(key, tuple):
+      key = self._SEPARATOR_CHAR.join(key)
+
+    if not isinstance(key, six.string_types):
+      raise ValueError(
+          '{} output key must be a string; got {}.'.format(error_label, key))
+    return key
+
+  def _wrap_and_check_outputs(
+      self, outputs, single_output_default_name, error_label=None):
+    """Wraps raw tensors as dicts and checks type.
+
+    Note that we create a new dict here so that we can overwrite the keys
+    if necessary.
+
+    Args:
+      outputs: A `Tensor` or a dict of string to `Tensor`.
+      single_output_default_name: A string key for use in the output dict
+        if the provided `outputs` is a raw tensor.
+      error_label: descriptive string for use in error messages. If none,
+        single_output_default_name will be used.
+
+    Returns:
+      A dict of tensors
+
+    Raises:
+      ValueError: if the outputs dict keys are not strings or tuples of strings
+        or the values are not Tensors.
+    """
+    if not isinstance(outputs, dict):
+      outputs = {single_output_default_name: outputs}
+
+    output_dict = {}
+    for key, value in outputs.items():
+      error_name = error_label or single_output_default_name
+      key = self._check_output_key(key, error_name)
+      if not isinstance(value, ops.Tensor):
+        raise ValueError(
+            '{} output value must be a Tensor; got {}.'.format(
+                error_name, value))
+
+      output_dict[key] = value
+    return output_dict
+
+
+class ClassificationOutput(ExportOutput):
+  """Represents the output of a classification head.
+
+  Either classes or scores or both must be set.
+
+  The classes `Tensor` must provide string labels, not integer class IDs.
+
+  If only classes is set, it is interpreted as providing top-k results in
+  descending order.
+
+  If only scores is set, it is interpreted as providing a score for every class
+  in order of class ID.
+
+  If both classes and scores are set, they are interpreted as zipped, so each
+  score corresponds to the class at the same index.  Clients should not depend
+  on the order of the entries.
+  """
+
+  def __init__(self, scores=None, classes=None):
+    """Constructor for `ClassificationOutput`.
+
+    Args:
+      scores: A float `Tensor` giving scores (sometimes but not always
+          interpretable as probabilities) for each class.  May be `None`, but
+          only if `classes` is set.  Interpretation varies-- see class doc.
+      classes: A string `Tensor` giving predicted class labels.  May be `None`,
+          but only if `scores` is set.  Interpretation varies-- see class doc.
+
+    Raises:
+      ValueError: if neither classes nor scores is set, or one of them is not a
+          `Tensor` with the correct dtype.
+    """
+    if (scores is not None
+        and not (isinstance(scores, ops.Tensor)
+                 and scores.dtype.is_floating)):
+      raise ValueError('Classification scores must be a float32 Tensor; '
+                       'got {}'.format(scores))
+    if (classes is not None
+        and not (isinstance(classes, ops.Tensor)
+                 and dtypes.as_dtype(classes.dtype) == dtypes.string)):
+      raise ValueError('Classification classes must be a string Tensor; '
+                       'got {}'.format(classes))
+    if scores is None and classes is None:
+      raise ValueError('At least one of scores and classes must be set.')
+
+    self._scores = scores
+    self._classes = classes
+
+  @property
+  def scores(self):
+    return self._scores
+
+  @property
+  def classes(self):
+    return self._classes
+
+  def as_signature_def(self, receiver_tensors):
+    if len(receiver_tensors) != 1:
+      raise ValueError('Classification input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    (_, examples), = receiver_tensors.items()
+    if dtypes.as_dtype(examples.dtype) != dtypes.string:
+      raise ValueError('Classification input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    return signature_def_utils.classification_signature_def(
+        examples, self.classes, self.scores)
+
+
+class RegressionOutput(ExportOutput):
+  """Represents the output of a regression head."""
+
+  def __init__(self, value):
+    """Constructor for `RegressionOutput`.
+
+    Args:
+      value: a float `Tensor` giving the predicted values.  Required.
+
+    Raises:
+      ValueError: if the value is not a `Tensor` with dtype tf.float32.
+    """
+    if not (isinstance(value, ops.Tensor) and value.dtype.is_floating):
+      raise ValueError('Regression output value must be a float32 Tensor; '
+                       'got {}'.format(value))
+    self._value = value
+
+  @property
+  def value(self):
+    return self._value
+
+  def as_signature_def(self, receiver_tensors):
+    if len(receiver_tensors) != 1:
+      raise ValueError('Regression input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    (_, examples), = receiver_tensors.items()
+    if dtypes.as_dtype(examples.dtype) != dtypes.string:
+      raise ValueError('Regression input must be a single string Tensor; '
+                       'got {}'.format(receiver_tensors))
+    return signature_def_utils.regression_signature_def(examples, self.value)
+
+
+class PredictOutput(ExportOutput):
+  """Represents the output of a generic prediction head.
+
+  A generic prediction need not be either a classification or a regression.
+
+  Named outputs must be provided as a dict from string to `Tensor`,
+  """
+  _SINGLE_OUTPUT_DEFAULT_NAME = 'output'
+
+  def __init__(self, outputs):
+    """Constructor for PredictOutput.
+
+    Args:
+      outputs: A `Tensor` or a dict of string to `Tensor` representing the
+        predictions.
+
+    Raises:
+      ValueError: if the outputs is not dict, or any of its keys are not
+          strings, or any of its values are not `Tensor`s.
+    """
+
+    self._outputs = self._wrap_and_check_outputs(
+        outputs, self._SINGLE_OUTPUT_DEFAULT_NAME, error_label='Prediction')
+
+  @property
+  def outputs(self):
+    return self._outputs
+
+  def as_signature_def(self, receiver_tensors):
+    return signature_def_utils.predict_signature_def(receiver_tensors,
+                                                     self.outputs)
+
+
+class _SupervisedOutput(ExportOutput):
+  """Represents the output of a supervised training or eval process."""
+  __metaclass__ = abc.ABCMeta
+
+  LOSS_NAME = 'loss'
+  PREDICTIONS_NAME = 'predictions'
+  METRICS_NAME = 'metrics'
+
+  METRIC_VALUE_SUFFIX = 'value'
+  METRIC_UPDATE_SUFFIX = 'update_op'
+
+  _loss = None
+  _predictions = None
+  _metrics = None
+
+  def __init__(self, loss=None, predictions=None, metrics=None):
+    """Constructor for SupervisedOutput (ie, Train or Eval output).
+
+    Args:
+      loss: dict of Tensors or single Tensor representing calculated loss.
+      predictions: dict of Tensors or single Tensor representing model
+        predictions.
+      metrics: Dict of metric results keyed by name.
+        The values of the dict can be one of the following:
+        (1) instance of `Metric` class.
+        (2) (metric_value, update_op) tuples, or a single tuple.
+        metric_value must be a Tensor, and update_op must be a Tensor or Op.
+
+    Raises:
+      ValueError: if any of the outputs' dict keys are not strings or tuples of
+        strings or the values are not Tensors (or Operations in the case of
+        update_op).
+    """
+
+    if loss is not None:
+      loss_dict = self._wrap_and_check_outputs(loss, self.LOSS_NAME)
+      self._loss = self._prefix_output_keys(loss_dict, self.LOSS_NAME)
+    if predictions is not None:
+      pred_dict = self._wrap_and_check_outputs(
+          predictions, self.PREDICTIONS_NAME)
+      self._predictions = self._prefix_output_keys(
+          pred_dict, self.PREDICTIONS_NAME)
+    if metrics is not None:
+      self._metrics = self._wrap_and_check_metrics(metrics)
+
+  def _prefix_output_keys(self, output_dict, output_name):
+    """Prepend output_name to the output_dict keys if it doesn't exist.
+
+    This produces predictable prefixes for the pre-determined outputs
+    of SupervisedOutput.
+
+    Args:
+      output_dict: dict of string to Tensor, assumed valid.
+      output_name: prefix string to prepend to existing keys.
+
+    Returns:
+      dict with updated keys and existing values.
+    """
+
+    new_outputs = {}
+    for key, val in output_dict.items():
+      key = self._prefix_key(key, output_name)
+      new_outputs[key] = val
+    return new_outputs
+
+  def _prefix_key(self, key, output_name):
+    if key.find(output_name) != 0:
+      key = output_name + self._SEPARATOR_CHAR + key
+    return key
+
+  def _wrap_and_check_metrics(self, metrics):
+    """Handle the saving of metrics.
+
+    Metrics is either a tuple of (value, update_op), or a dict of such tuples.
+    Here, we separate out the tuples and create a dict with names to tensors.
+
+    Args:
+      metrics: Dict of metric results keyed by name.
+        The values of the dict can be one of the following:
+        (1) instance of `Metric` class.
+        (2) (metric_value, update_op) tuples, or a single tuple.
+        metric_value must be a Tensor, and update_op must be a Tensor or Op.
+
+    Returns:
+      dict of output_names to tensors
+
+    Raises:
+      ValueError: if the dict key is not a string, or the metric values or ops
+        are not tensors.
+    """
+    if not isinstance(metrics, dict):
+      metrics = {self.METRICS_NAME: metrics}
+
+    outputs = {}
+    for key, value in metrics.items():
+      if isinstance(value, tuple):
+        metric_val, metric_op = value
+      else:  # value is a keras.Metrics object
+        metric_val = value.result()
+        assert len(value.updates) == 1  # We expect only one update op.
+        metric_op = value.updates[0]
+      key = self._check_output_key(key, self.METRICS_NAME)
+      key = self._prefix_key(key, self.METRICS_NAME)
+
+      val_name = key + self._SEPARATOR_CHAR + self.METRIC_VALUE_SUFFIX
+      op_name = key + self._SEPARATOR_CHAR + self.METRIC_UPDATE_SUFFIX
+      if not isinstance(metric_val, ops.Tensor):
+        raise ValueError(
+            '{} output value must be a Tensor; got {}.'.format(
+                key, metric_val))
+      if (not isinstance(metric_op, ops.Tensor) and
+          not isinstance(metric_op, ops.Operation)):
+        raise ValueError(
+            '{} update_op must be a Tensor or Operation; got {}.'.format(
+                key, metric_op))
+
+      # We must wrap any ops in a Tensor before export, as the SignatureDef
+      # proto expects tensors only. See b/109740581
+      metric_op_tensor = metric_op
+      if isinstance(metric_op, ops.Operation):
+        with ops.control_dependencies([metric_op]):
+          metric_op_tensor = constant_op.constant([], name='metric_op_wrapper')
+
+      outputs[val_name] = metric_val
+      outputs[op_name] = metric_op_tensor
+
+    return outputs
+
+  @property
+  def loss(self):
+    return self._loss
+
+  @property
+  def predictions(self):
+    return self._predictions
+
+  @property
+  def metrics(self):
+    return self._metrics
+
+  @abc.abstractmethod
+  def _get_signature_def_fn(self):
+    """Returns a function that produces a SignatureDef given desired outputs."""
+    pass
+
+  def as_signature_def(self, receiver_tensors):
+    signature_def_fn = self._get_signature_def_fn()
+    return signature_def_fn(
+        receiver_tensors, self.loss, self.predictions, self.metrics)
+
+
+class TrainOutput(_SupervisedOutput):
+  """Represents the output of a supervised training process.
+
+  This class generates the appropriate signature def for exporting
+  training output by type-checking and wrapping loss, predictions, and metrics
+  values.
+  """
+
+  def _get_signature_def_fn(self):
+    return signature_def_utils.supervised_train_signature_def
+
+
+class EvalOutput(_SupervisedOutput):
+  """Represents the output of a supervised eval process.
+
+  This class generates the appropriate signature def for exporting
+  eval output by type-checking and wrapping loss, predictions, and metrics
+  values.
+  """
+
+  def _get_signature_def_fn(self):
+    return signature_def_utils.supervised_eval_signature_def
diff --git a/tensorflow/python/saved_model/model_utils/export_output_test.py b/tensorflow/python/saved_model/model_utils/export_output_test.py
new file mode 100644
index 0000000000..5262e9fa1e
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/export_output_test.py
@@ -0,0 +1,405 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for export."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.framework import tensor_shape_pb2
+from tensorflow.core.framework import types_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.keras import metrics as metrics_module
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model.model_utils import export_output as export_output_lib
+
+
+class ExportOutputTest(test.TestCase):
+
+  def test_regress_value_must_be_float(self):
+    with context.graph_mode():
+      value = array_ops.placeholder(dtypes.string, 1, name='output-tensor-1')
+      with self.assertRaisesRegexp(
+          ValueError, 'Regression output value must be a float32 Tensor'):
+        export_output_lib.RegressionOutput(value)
+
+  def test_classify_classes_must_be_strings(self):
+    with context.graph_mode():
+      classes = array_ops.placeholder(dtypes.float32, 1, name='output-tensor-1')
+      with self.assertRaisesRegexp(
+          ValueError, 'Classification classes must be a string Tensor'):
+        export_output_lib.ClassificationOutput(classes=classes)
+
+  def test_classify_scores_must_be_float(self):
+    with context.graph_mode():
+      scores = array_ops.placeholder(dtypes.string, 1, name='output-tensor-1')
+      with self.assertRaisesRegexp(
+          ValueError, 'Classification scores must be a float32 Tensor'):
+        export_output_lib.ClassificationOutput(scores=scores)
+
+  def test_classify_requires_classes_or_scores(self):
+    with self.assertRaisesRegexp(
+        ValueError, 'At least one of scores and classes must be set.'):
+      export_output_lib.ClassificationOutput()
+
+  def test_build_standardized_signature_def_regression(self):
+    with context.graph_mode():
+      input_tensors = {
+          'input-1':
+              array_ops.placeholder(
+                  dtypes.string, 1, name='input-tensor-1')
+      }
+      value = array_ops.placeholder(dtypes.float32, 1, name='output-tensor-1')
+
+      export_output = export_output_lib.RegressionOutput(value)
+      actual_signature_def = export_output.as_signature_def(input_tensors)
+
+      expected_signature_def = meta_graph_pb2.SignatureDef()
+      shape = tensor_shape_pb2.TensorShapeProto(
+          dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+      dtype_float = types_pb2.DataType.Value('DT_FLOAT')
+      dtype_string = types_pb2.DataType.Value('DT_STRING')
+      expected_signature_def.inputs[
+          signature_constants.REGRESS_INPUTS].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='input-tensor-1:0',
+                                        dtype=dtype_string,
+                                        tensor_shape=shape))
+      expected_signature_def.outputs[
+          signature_constants.REGRESS_OUTPUTS].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='output-tensor-1:0',
+                                        dtype=dtype_float,
+                                        tensor_shape=shape))
+
+      expected_signature_def.method_name = (
+          signature_constants.REGRESS_METHOD_NAME)
+      self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_build_standardized_signature_def_classify_classes_only(self):
+    """Tests classification with one output tensor."""
+    with context.graph_mode():
+      input_tensors = {
+          'input-1':
+              array_ops.placeholder(
+                  dtypes.string, 1, name='input-tensor-1')
+      }
+      classes = array_ops.placeholder(dtypes.string, 1, name='output-tensor-1')
+
+      export_output = export_output_lib.ClassificationOutput(classes=classes)
+      actual_signature_def = export_output.as_signature_def(input_tensors)
+
+      expected_signature_def = meta_graph_pb2.SignatureDef()
+      shape = tensor_shape_pb2.TensorShapeProto(
+          dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+      dtype_string = types_pb2.DataType.Value('DT_STRING')
+      expected_signature_def.inputs[
+          signature_constants.CLASSIFY_INPUTS].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='input-tensor-1:0',
+                                        dtype=dtype_string,
+                                        tensor_shape=shape))
+      expected_signature_def.outputs[
+          signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='output-tensor-1:0',
+                                        dtype=dtype_string,
+                                        tensor_shape=shape))
+
+      expected_signature_def.method_name = (
+          signature_constants.CLASSIFY_METHOD_NAME)
+      self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_build_standardized_signature_def_classify_both(self):
+    """Tests multiple output tensors that include classes and scores."""
+    with context.graph_mode():
+      input_tensors = {
+          'input-1':
+              array_ops.placeholder(
+                  dtypes.string, 1, name='input-tensor-1')
+      }
+      classes = array_ops.placeholder(dtypes.string, 1,
+                                      name='output-tensor-classes')
+      scores = array_ops.placeholder(dtypes.float32, 1,
+                                     name='output-tensor-scores')
+
+      export_output = export_output_lib.ClassificationOutput(
+          scores=scores, classes=classes)
+      actual_signature_def = export_output.as_signature_def(input_tensors)
+
+      expected_signature_def = meta_graph_pb2.SignatureDef()
+      shape = tensor_shape_pb2.TensorShapeProto(
+          dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+      dtype_float = types_pb2.DataType.Value('DT_FLOAT')
+      dtype_string = types_pb2.DataType.Value('DT_STRING')
+      expected_signature_def.inputs[
+          signature_constants.CLASSIFY_INPUTS].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='input-tensor-1:0',
+                                        dtype=dtype_string,
+                                        tensor_shape=shape))
+      expected_signature_def.outputs[
+          signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='output-tensor-classes:0',
+                                        dtype=dtype_string,
+                                        tensor_shape=shape))
+      expected_signature_def.outputs[
+          signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='output-tensor-scores:0',
+                                        dtype=dtype_float,
+                                        tensor_shape=shape))
+
+      expected_signature_def.method_name = (
+          signature_constants.CLASSIFY_METHOD_NAME)
+      self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_build_standardized_signature_def_classify_scores_only(self):
+    """Tests classification without classes tensor."""
+    with context.graph_mode():
+      input_tensors = {
+          'input-1':
+              array_ops.placeholder(
+                  dtypes.string, 1, name='input-tensor-1')
+      }
+
+      scores = array_ops.placeholder(dtypes.float32, 1,
+                                     name='output-tensor-scores')
+
+      export_output = export_output_lib.ClassificationOutput(
+          scores=scores)
+      actual_signature_def = export_output.as_signature_def(input_tensors)
+
+      expected_signature_def = meta_graph_pb2.SignatureDef()
+      shape = tensor_shape_pb2.TensorShapeProto(
+          dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)])
+      dtype_float = types_pb2.DataType.Value('DT_FLOAT')
+      dtype_string = types_pb2.DataType.Value('DT_STRING')
+      expected_signature_def.inputs[
+          signature_constants.CLASSIFY_INPUTS].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='input-tensor-1:0',
+                                        dtype=dtype_string,
+                                        tensor_shape=shape))
+      expected_signature_def.outputs[
+          signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom(
+              meta_graph_pb2.TensorInfo(name='output-tensor-scores:0',
+                                        dtype=dtype_float,
+                                        tensor_shape=shape))
+
+      expected_signature_def.method_name = (
+          signature_constants.CLASSIFY_METHOD_NAME)
+      self.assertEqual(actual_signature_def, expected_signature_def)
+
+  def test_predict_outputs_valid(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    outputs = {
+        'output0': constant_op.constant([0]),
+        u'output1': constant_op.constant(['foo']),
+    }
+    export_output_lib.PredictOutput(outputs)
+
+    # Single Tensor is OK too
+    export_output_lib.PredictOutput(constant_op.constant([0]))
+
+  def test_predict_outputs_invalid(self):
+    with self.assertRaisesRegexp(
+        ValueError,
+        'Prediction output key must be a string'):
+      export_output_lib.PredictOutput({1: constant_op.constant([0])})
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        'Prediction output value must be a Tensor'):
+      export_output_lib.PredictOutput({
+          'prediction1': sparse_tensor.SparseTensor(
+              indices=[[0, 0]], values=[1], dense_shape=[1, 1]),
+      })
+
+
+class MockSupervisedOutput(export_output_lib._SupervisedOutput):
+  """So that we can test the abstract class methods directly."""
+
+  def _get_signature_def_fn(self):
+    pass
+
+
+class SupervisedOutputTest(test.TestCase):
+
+  def test_supervised_outputs_valid(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    with context.graph_mode():
+      loss = {'my_loss': constant_op.constant([0])}
+      predictions = {u'output1': constant_op.constant(['foo'])}
+      metric_obj = metrics_module.Mean()
+      metric_obj.update_state(constant_op.constant([0]))
+      metrics = {
+          'metrics': metric_obj,
+          'metrics2': (constant_op.constant([0]), constant_op.constant([10]))
+      }
+
+      outputter = MockSupervisedOutput(loss, predictions, metrics)
+      self.assertEqual(outputter.loss['loss/my_loss'], loss['my_loss'])
+      self.assertEqual(
+          outputter.predictions['predictions/output1'], predictions['output1'])
+      self.assertEqual(outputter.metrics['metrics/update_op'].name,
+                       'metric_op_wrapper:0')
+      self.assertEqual(
+          outputter.metrics['metrics2/update_op'], metrics['metrics2'][1])
+
+      # Single Tensor is OK too
+      outputter = MockSupervisedOutput(
+          loss['my_loss'], predictions['output1'], metrics['metrics'])
+      self.assertEqual(outputter.loss, {'loss': loss['my_loss']})
+      self.assertEqual(
+          outputter.predictions, {'predictions': predictions['output1']})
+      self.assertEqual(outputter.metrics['metrics/update_op'].name,
+                       'metric_op_wrapper_1:0')
+
+  def test_supervised_outputs_none(self):
+    outputter = MockSupervisedOutput(
+        constant_op.constant([0]), None, None)
+    self.assertEqual(len(outputter.loss), 1)
+    self.assertEqual(outputter.predictions, None)
+    self.assertEqual(outputter.metrics, None)
+
+  def test_supervised_outputs_invalid(self):
+    with self.assertRaisesRegexp(ValueError, 'predictions output value must'):
+      MockSupervisedOutput(constant_op.constant([0]), [3], None)
+    with self.assertRaisesRegexp(ValueError, 'loss output value must'):
+      MockSupervisedOutput('str', None, None)
+    with self.assertRaisesRegexp(ValueError, 'metrics output value must'):
+      MockSupervisedOutput(None, None, (15.3, 4))
+    with self.assertRaisesRegexp(ValueError, 'loss output key must'):
+      MockSupervisedOutput({25: 'Tensor'}, None, None)
+
+  def test_supervised_outputs_tuples(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    with context.graph_mode():
+      loss = {('my', 'loss'): constant_op.constant([0])}
+      predictions = {(u'output1', '2'): constant_op.constant(['foo'])}
+      metric_obj = metrics_module.Mean()
+      metric_obj.update_state(constant_op.constant([0]))
+      metrics = {
+          ('metrics', '1'):
+              metric_obj,
+          ('metrics', '2'): (constant_op.constant([0]),
+                             constant_op.constant([10]))
+      }
+
+      outputter = MockSupervisedOutput(loss, predictions, metrics)
+      self.assertEqual(set(outputter.loss.keys()), set(['loss/my/loss']))
+      self.assertEqual(set(outputter.predictions.keys()),
+                       set(['predictions/output1/2']))
+      self.assertEqual(
+          set(outputter.metrics.keys()),
+          set([
+              'metrics/1/value', 'metrics/1/update_op', 'metrics/2/value',
+              'metrics/2/update_op'
+          ]))
+
+  def test_supervised_outputs_no_prepend(self):
+    """Tests that no errors are raised when provided outputs are valid."""
+    with context.graph_mode():
+      loss = {'loss': constant_op.constant([0])}
+      predictions = {u'predictions': constant_op.constant(['foo'])}
+      metric_obj = metrics_module.Mean()
+      metric_obj.update_state(constant_op.constant([0]))
+      metrics = {
+          'metrics_1': metric_obj,
+          'metrics_2': (constant_op.constant([0]), constant_op.constant([10]))
+      }
+
+      outputter = MockSupervisedOutput(loss, predictions, metrics)
+      self.assertEqual(set(outputter.loss.keys()), set(['loss']))
+      self.assertEqual(set(outputter.predictions.keys()), set(['predictions']))
+      self.assertEqual(
+          set(outputter.metrics.keys()),
+          set([
+              'metrics_1/value', 'metrics_1/update_op', 'metrics_2/update_op',
+              'metrics_2/value'
+          ]))
+
+  def test_train_signature_def(self):
+    with context.graph_mode():
+      loss = {'my_loss': constant_op.constant([0])}
+      predictions = {u'output1': constant_op.constant(['foo'])}
+      metric_obj = metrics_module.Mean()
+      metric_obj.update_state(constant_op.constant([0]))
+      metrics = {
+          'metrics_1': metric_obj,
+          'metrics_2': (constant_op.constant([0]), constant_op.constant([10]))
+      }
+
+      outputter = export_output_lib.TrainOutput(loss, predictions, metrics)
+
+      receiver = {u'features': constant_op.constant(100, shape=(100, 2)),
+                  'labels': constant_op.constant(100, shape=(100, 1))}
+      sig_def = outputter.as_signature_def(receiver)
+
+      self.assertTrue('loss/my_loss' in sig_def.outputs)
+      self.assertTrue('metrics_1/value' in sig_def.outputs)
+      self.assertTrue('metrics_2/value' in sig_def.outputs)
+      self.assertTrue('predictions/output1' in sig_def.outputs)
+      self.assertTrue('features' in sig_def.inputs)
+
+  def test_eval_signature_def(self):
+    with context.graph_mode():
+      loss = {'my_loss': constant_op.constant([0])}
+      predictions = {u'output1': constant_op.constant(['foo'])}
+
+      outputter = export_output_lib.EvalOutput(loss, predictions, None)
+
+      receiver = {u'features': constant_op.constant(100, shape=(100, 2)),
+                  'labels': constant_op.constant(100, shape=(100, 1))}
+      sig_def = outputter.as_signature_def(receiver)
+
+      self.assertTrue('loss/my_loss' in sig_def.outputs)
+      self.assertFalse('metrics/value' in sig_def.outputs)
+      self.assertTrue('predictions/output1' in sig_def.outputs)
+      self.assertTrue('features' in sig_def.inputs)
+
+  def test_metric_op_is_tensor(self):
+    """Tests that ops.Operation is wrapped by a tensor for metric_ops."""
+    with context.graph_mode():
+      loss = {'my_loss': constant_op.constant([0])}
+      predictions = {u'output1': constant_op.constant(['foo'])}
+      metric_obj = metrics_module.Mean()
+      metric_obj.update_state(constant_op.constant([0]))
+      metrics = {
+          'metrics_1': metric_obj,
+          'metrics_2': (constant_op.constant([0]), control_flow_ops.no_op())
+      }
+
+      outputter = MockSupervisedOutput(loss, predictions, metrics)
+
+      self.assertTrue(outputter.metrics['metrics_1/update_op'].name.startswith(
+          'metric_op_wrapper'))
+      self.assertTrue(
+          isinstance(outputter.metrics['metrics_1/update_op'], ops.Tensor))
+      self.assertTrue(
+          isinstance(outputter.metrics['metrics_1/value'], ops.Tensor))
+
+      self.assertEqual(outputter.metrics['metrics_2/value'],
+                       metrics['metrics_2'][0])
+      self.assertTrue(outputter.metrics['metrics_2/update_op'].name.startswith(
+          'metric_op_wrapper'))
+      self.assertTrue(
+          isinstance(outputter.metrics['metrics_2/update_op'], ops.Tensor))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/saved_model/model_utils/export_test.py b/tensorflow/python/saved_model/model_utils/export_test.py
new file mode 100644
index 0000000000..776bfff886
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/export_test.py
@@ -0,0 +1,257 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for export utils."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import tempfile
+import time
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.saved_model.model_utils import export_output
+from tensorflow.python.saved_model.model_utils import export_utils
+
+
+class LabeledTensorMock(object):
+  """Mock class emulating LabeledTensor."""
+
+  def __init__(self):
+    self.tensor = constant_op.constant([1])
+
+
+def _convert_labeled_tensor_mock_to_tensor(value, *args, **kwargs):
+  return ops.internal_convert_to_tensor(value.tensor, *args, **kwargs)
+
+
+ops.register_tensor_conversion_function(LabeledTensorMock,
+                                        _convert_labeled_tensor_mock_to_tensor)
+
+
+class ExportTest(test_util.TensorFlowTestCase):
+
+  def test_build_all_signature_defs_without_receiver_alternatives(self):
+    with context.graph_mode():
+      receiver_tensor = array_ops.placeholder(dtypes.string)
+      output_1 = constant_op.constant([1.])
+      output_2 = constant_op.constant(["2"])
+      output_3 = constant_op.constant(["3"])
+      export_outputs = {
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              export_output.RegressionOutput(value=output_1),
+          "head-2": export_output.ClassificationOutput(classes=output_2),
+          "head-3": export_output.PredictOutput(outputs={
+              "some_output_3": output_3
+          }),
+      }
+
+      signature_defs = export_utils.build_all_signature_defs(
+          receiver_tensor, export_outputs)
+
+      expected_signature_defs = {
+          "serving_default":
+              signature_def_utils.regression_signature_def(receiver_tensor,
+                                                           output_1),
+          "head-2":
+              signature_def_utils.classification_signature_def(receiver_tensor,
+                                                               output_2, None),
+          "head-3":
+              signature_def_utils.predict_signature_def({
+                  "input": receiver_tensor
+              }, {"some_output_3": output_3})
+      }
+
+      self.assertDictEqual(expected_signature_defs, signature_defs)
+
+  def test_build_all_signature_defs_with_dict_alternatives(self):
+    with context.graph_mode():
+      receiver_tensor = array_ops.placeholder(dtypes.string)
+      receiver_tensors_alternative_1 = {
+          "foo": array_ops.placeholder(dtypes.int64),
+          "bar": array_ops.sparse_placeholder(dtypes.float32)}
+      receiver_tensors_alternatives = {"other": receiver_tensors_alternative_1}
+      output_1 = constant_op.constant([1.])
+      output_2 = constant_op.constant(["2"])
+      output_3 = constant_op.constant(["3"])
+      export_outputs = {
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              export_output.RegressionOutput(value=output_1),
+          "head-2": export_output.ClassificationOutput(classes=output_2),
+          "head-3": export_output.PredictOutput(outputs={
+              "some_output_3": output_3
+          }),
+      }
+
+      signature_defs = export_utils.build_all_signature_defs(
+          receiver_tensor, export_outputs, receiver_tensors_alternatives)
+
+      expected_signature_defs = {
+          "serving_default":
+              signature_def_utils.regression_signature_def(
+                  receiver_tensor,
+                  output_1),
+          "head-2":
+              signature_def_utils.classification_signature_def(
+                  receiver_tensor,
+                  output_2, None),
+          "head-3":
+              signature_def_utils.predict_signature_def(
+                  {"input": receiver_tensor},
+                  {"some_output_3": output_3}),
+          "other:head-3":
+              signature_def_utils.predict_signature_def(
+                  receiver_tensors_alternative_1,
+                  {"some_output_3": output_3})
+
+          # Note that the alternatives 'other:serving_default' and
+          # 'other:head-2' are invalid, because regession and classification
+          # signatures must take a single string input.  Here we verify that
+          # these invalid signatures are not included in the export_utils.
+      }
+
+      self.assertDictEqual(expected_signature_defs, signature_defs)
+
+  def test_build_all_signature_defs_with_single_alternatives(self):
+    with context.graph_mode():
+      receiver_tensor = array_ops.placeholder(dtypes.string)
+      receiver_tensors_alternative_1 = array_ops.placeholder(dtypes.int64)
+      receiver_tensors_alternative_2 = array_ops.sparse_placeholder(
+          dtypes.float32)
+      # Note we are passing single Tensors as values of
+      # receiver_tensors_alternatives, where normally that is a dict.
+      # In this case a dict will be created using the default receiver tensor
+      # name "input".
+      receiver_tensors_alternatives = {"other1": receiver_tensors_alternative_1,
+                                       "other2": receiver_tensors_alternative_2}
+      output_1 = constant_op.constant([1.])
+      output_2 = constant_op.constant(["2"])
+      output_3 = constant_op.constant(["3"])
+      export_outputs = {
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              export_output.RegressionOutput(value=output_1),
+          "head-2": export_output.ClassificationOutput(classes=output_2),
+          "head-3": export_output.PredictOutput(outputs={
+              "some_output_3": output_3
+          }),
+      }
+
+    signature_defs = export_utils.build_all_signature_defs(
+        receiver_tensor, export_outputs, receiver_tensors_alternatives)
+
+    expected_signature_defs = {
+        "serving_default":
+            signature_def_utils.regression_signature_def(
+                receiver_tensor,
+                output_1),
+        "head-2":
+            signature_def_utils.classification_signature_def(
+                receiver_tensor,
+                output_2, None),
+        "head-3":
+            signature_def_utils.predict_signature_def(
+                {"input": receiver_tensor},
+                {"some_output_3": output_3}),
+        "other1:head-3":
+            signature_def_utils.predict_signature_def(
+                {"input": receiver_tensors_alternative_1},
+                {"some_output_3": output_3}),
+        "other2:head-3":
+            signature_def_utils.predict_signature_def(
+                {"input": receiver_tensors_alternative_2},
+                {"some_output_3": output_3})
+
+        # Note that the alternatives 'other:serving_default' and 'other:head-2'
+        # are invalid, because regession and classification signatures must take
+        # a single string input.  Here we verify that these invalid signatures
+        # are not included in the export_utils.
+    }
+
+    self.assertDictEqual(expected_signature_defs, signature_defs)
+
+  def test_build_all_signature_defs_export_outputs_required(self):
+    receiver_tensor = constant_op.constant(["11"])
+
+    with self.assertRaises(ValueError) as e:
+      export_utils.build_all_signature_defs(receiver_tensor, None)
+
+    self.assertTrue(str(e.exception).startswith(
+        "export_outputs must be a dict"))
+
+  def test_get_timestamped_export_dir(self):
+    export_dir_base = tempfile.mkdtemp() + "export/"
+    export_dir_1 = export_utils.get_timestamped_export_dir(
+        export_dir_base)
+    time.sleep(2)
+    export_dir_2 = export_utils.get_timestamped_export_dir(
+        export_dir_base)
+    time.sleep(2)
+    export_dir_3 = export_utils.get_timestamped_export_dir(
+        export_dir_base)
+
+    # Export directories should be named using a timestamp that is seconds
+    # since epoch.  Such a timestamp is 10 digits long.
+    time_1 = os.path.basename(export_dir_1)
+    self.assertEqual(10, len(time_1))
+    time_2 = os.path.basename(export_dir_2)
+    self.assertEqual(10, len(time_2))
+    time_3 = os.path.basename(export_dir_3)
+    self.assertEqual(10, len(time_3))
+
+    self.assertTrue(int(time_1) < int(time_2))
+    self.assertTrue(int(time_2) < int(time_3))
+
+  def test_build_all_signature_defs_serving_only(self):
+    with context.graph_mode():
+      receiver_tensor = {"input": array_ops.placeholder(dtypes.string)}
+      output_1 = constant_op.constant([1.])
+      export_outputs = {
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+              export_output.PredictOutput(outputs=output_1),
+          "train": export_output.TrainOutput(loss=output_1),
+      }
+
+      signature_defs = export_utils.build_all_signature_defs(
+          receiver_tensor, export_outputs)
+
+      expected_signature_defs = {
+          "serving_default": signature_def_utils.predict_signature_def(
+              receiver_tensor, {"output": output_1})
+      }
+
+      self.assertDictEqual(expected_signature_defs, signature_defs)
+
+      signature_defs = export_utils.build_all_signature_defs(
+          receiver_tensor, export_outputs, serving_only=False)
+
+      expected_signature_defs.update({
+          "train": signature_def_utils.supervised_train_signature_def(
+              receiver_tensor, loss={"loss": output_1})
+      })
+
+      self.assertDictEqual(expected_signature_defs, signature_defs)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/saved_model/model_utils/export_utils.py b/tensorflow/python/saved_model/model_utils/export_utils.py
new file mode 100644
index 0000000000..4f8933758d
--- /dev/null
+++ b/tensorflow/python/saved_model/model_utils/export_utils.py
@@ -0,0 +1,340 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for creating SavedModels."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import os
+import time
+
+import six
+
+from tensorflow.python.platform import gfile
+from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.python.saved_model import signature_def_utils
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.saved_model.model_utils import export_output as export_output_lib
+from tensorflow.python.training import mode_keys
+from tensorflow.python.util import compat
+
+
+# Mapping of the modes to appropriate MetaGraph tags in the SavedModel.
+EXPORT_TAG_MAP = {
+    mode_keys.ModeKeys.PREDICT: [tag_constants.SERVING],
+    mode_keys.ModeKeys.TRAIN: [tag_constants.TRAINING],
+    mode_keys.ModeKeys.TEST: [tag_constants.EVAL],
+}
+
+
+_SINGLE_FEATURE_DEFAULT_NAME = 'feature'
+_SINGLE_RECEIVER_DEFAULT_NAME = 'input'
+_SINGLE_LABEL_DEFAULT_NAME = 'label'
+
+### Below utilities are specific to SavedModel exports.
+
+
+def build_all_signature_defs(receiver_tensors,
+                             export_outputs,
+                             receiver_tensors_alternatives=None,
+                             serving_only=True):
+  """Build `SignatureDef`s for all export outputs.
+
+  Args:
+    receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying
+      input nodes where this receiver expects to be fed by default.  Typically,
+      this is a single placeholder expecting serialized `tf.Example` protos.
+    export_outputs: a dict of ExportOutput instances, each of which has
+      an as_signature_def instance method that will be called to retrieve
+      the signature_def for all export output tensors.
+    receiver_tensors_alternatives: a dict of string to additional
+      groups of receiver tensors, each of which may be a `Tensor` or a dict of
+      string to `Tensor`.  These named receiver tensor alternatives generate
+      additional serving signatures, which may be used to feed inputs at
+      different points within the input receiver subgraph.  A typical usage is
+      to allow feeding raw feature `Tensor`s *downstream* of the
+      tf.parse_example() op.  Defaults to None.
+    serving_only: boolean; if true, resulting signature defs will only include
+      valid serving signatures. If false, all requested signatures will be
+      returned.
+
+  Returns:
+    signature_def representing all passed args.
+
+  Raises:
+    ValueError: if export_outputs is not a dict
+  """
+  if not isinstance(receiver_tensors, dict):
+    receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors}
+  if export_outputs is None or not isinstance(export_outputs, dict):
+    raise ValueError('export_outputs must be a dict and not'
+                     '{}'.format(type(export_outputs)))
+
+  signature_def_map = {}
+  excluded_signatures = {}
+  for output_key, export_output in export_outputs.items():
+    signature_name = '{}'.format(output_key or 'None')
+    try:
+      signature = export_output.as_signature_def(receiver_tensors)
+      signature_def_map[signature_name] = signature
+    except ValueError as e:
+      excluded_signatures[signature_name] = str(e)
+
+  if receiver_tensors_alternatives:
+    for receiver_name, receiver_tensors_alt in (
+        six.iteritems(receiver_tensors_alternatives)):
+      if not isinstance(receiver_tensors_alt, dict):
+        receiver_tensors_alt = {
+            _SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt
+        }
+      for output_key, export_output in export_outputs.items():
+        signature_name = '{}:{}'.format(receiver_name or 'None', output_key or
+                                        'None')
+        try:
+          signature = export_output.as_signature_def(receiver_tensors_alt)
+          signature_def_map[signature_name] = signature
+        except ValueError as e:
+          excluded_signatures[signature_name] = str(e)
+
+  _log_signature_report(signature_def_map, excluded_signatures)
+
+  # The above calls to export_output_lib.as_signature_def should return only
+  # valid signatures; if there is a validity problem, they raise a ValueError,
+  # in which case we exclude that signature from signature_def_map above.
+  # The is_valid_signature check ensures that the signatures produced are
+  # valid for serving, and acts as an additional sanity check for export
+  # signatures produced for serving. We skip this check for training and eval
+  # signatures, which are not intended for serving.
+  if serving_only:
+    signature_def_map = {
+        k: v
+        for k, v in signature_def_map.items()
+        if signature_def_utils.is_valid_signature(v)
+    }
+  return signature_def_map
+
+
+_FRIENDLY_METHOD_NAMES = {
+    signature_constants.CLASSIFY_METHOD_NAME: 'Classify',
+    signature_constants.REGRESS_METHOD_NAME: 'Regress',
+    signature_constants.PREDICT_METHOD_NAME: 'Predict',
+    signature_constants.SUPERVISED_TRAIN_METHOD_NAME: 'Train',
+    signature_constants.SUPERVISED_EVAL_METHOD_NAME: 'Eval',
+}
+
+
+def _log_signature_report(signature_def_map, excluded_signatures):
+  """Log a report of which signatures were produced."""
+  sig_names_by_method_name = collections.defaultdict(list)
+
+  # We'll collect whatever method_names are present, but also we want to make
+  # sure to output a line for each of the three standard methods even if they
+  # have no signatures.
+  for method_name in _FRIENDLY_METHOD_NAMES:
+    sig_names_by_method_name[method_name] = []
+
+  for signature_name, sig in signature_def_map.items():
+    sig_names_by_method_name[sig.method_name].append(signature_name)
+
+  # TODO(b/67733540): consider printing the full signatures, not just names
+  for method_name, sig_names in sig_names_by_method_name.items():
+    if method_name in _FRIENDLY_METHOD_NAMES:
+      method_name = _FRIENDLY_METHOD_NAMES[method_name]
+    logging.info('Signatures INCLUDED in export for {}: {}'.format(
+        method_name, sig_names if sig_names else 'None'))
+
+  if excluded_signatures:
+    logging.info('Signatures EXCLUDED from export because they cannot be '
+                 'be served via TensorFlow Serving APIs:')
+    for signature_name, message in excluded_signatures.items():
+      logging.info('\'{}\' : {}'.format(signature_name, message))
+
+  if not signature_def_map:
+    logging.warn('Export includes no signatures!')
+  elif (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY not in
+        signature_def_map):
+    logging.warn('Export includes no default signature!')
+
+
+# When we create a timestamped directory, there is a small chance that the
+# directory already exists because another process is also creating these
+# directories. In this case we just wait one second to get a new timestamp and
+# try again. If this fails several times in a row, then something is seriously
+# wrong.
+MAX_DIRECTORY_CREATION_ATTEMPTS = 10
+
+
+def get_timestamped_export_dir(export_dir_base):
+  """Builds a path to a new subdirectory within the base directory.
+
+  Each export is written into a new subdirectory named using the
+  current time.  This guarantees monotonically increasing version
+  numbers even across multiple runs of the pipeline.
+  The timestamp used is the number of seconds since epoch UTC.
+
+  Args:
+    export_dir_base: A string containing a directory to write the exported
+        graph and checkpoints.
+  Returns:
+    The full path of the new subdirectory (which is not actually created yet).
+
+  Raises:
+    RuntimeError: if repeated attempts fail to obtain a unique timestamped
+      directory name.
+  """
+  attempts = 0
+  while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS:
+    timestamp = int(time.time())
+
+    result_dir = os.path.join(
+        compat.as_bytes(export_dir_base), compat.as_bytes(str(timestamp)))
+    if not gfile.Exists(result_dir):
+      # Collisions are still possible (though extremely unlikely): this
+      # directory is not actually created yet, but it will be almost
+      # instantly on return from this function.
+      return result_dir
+    time.sleep(1)
+    attempts += 1
+    logging.warn('Directory {} already exists; retrying (attempt {}/{})'.format(
+        result_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS))
+  raise RuntimeError('Failed to obtain a unique export directory name after '
+                     '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS))
+
+
+def get_temp_export_dir(timestamped_export_dir):
+  """Builds a directory name based on the argument but starting with 'temp-'.
+
+  This relies on the fact that TensorFlow Serving ignores subdirectories of
+  the base directory that can't be parsed as integers.
+
+  Args:
+    timestamped_export_dir: the name of the eventual export directory, e.g.
+      /foo/bar/<timestamp>
+
+  Returns:
+    A sister directory prefixed with 'temp-', e.g. /foo/bar/temp-<timestamp>.
+  """
+  (dirname, basename) = os.path.split(timestamped_export_dir)
+  temp_export_dir = os.path.join(
+      compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename)))
+  return temp_export_dir
+
+
+def export_outputs_for_mode(
+    mode, serving_export_outputs=None, predictions=None, loss=None,
+    metrics=None):
+  """Util function for constructing a `ExportOutput` dict given a mode.
+
+  The returned dict can be directly passed to `build_all_signature_defs` helper
+  function as the `export_outputs` argument, used for generating a SignatureDef
+  map.
+
+  Args:
+    mode: A `ModeKeys` specifying the mode.
+    serving_export_outputs: Describes the output signatures to be exported to
+      `SavedModel` and used during serving. Should be a dict or None.
+    predictions: A dict of Tensors or single Tensor representing model
+        predictions. This argument is only used if serving_export_outputs is not
+        set.
+    loss: A dict of Tensors or single Tensor representing calculated loss.
+    metrics: A dict of (metric_value, update_op) tuples, or a single tuple.
+      metric_value must be a Tensor, and update_op must be a Tensor or Op
+
+  Returns:
+    Dictionary mapping the a key to an `tf.estimator.export.ExportOutput` object
+    The key is the expected SignatureDef key for the mode.
+
+  Raises:
+    ValueError: if an appropriate ExportOutput cannot be found for the mode.
+  """
+  # TODO(b/113185250): move all model export helper functions into an util file.
+  if mode == mode_keys.ModeKeys.PREDICT:
+    return get_export_outputs(serving_export_outputs, predictions)
+  elif mode == mode_keys.ModeKeys.TRAIN:
+    return {mode: export_output_lib.TrainOutput(
+        loss=loss, predictions=predictions, metrics=metrics)}
+  elif mode == mode_keys.ModeKeys.TEST:
+    return {mode: export_output_lib.EvalOutput(
+        loss=loss, predictions=predictions, metrics=metrics)}
+  else:
+    raise ValueError(
+        'Export output type not found for mode: {}'.format(mode))
+
+
+def get_export_outputs(export_outputs, predictions):
+  """Validate export_outputs or create default export_outputs.
+
+  Args:
+    export_outputs: Describes the output signatures to be exported to
+      `SavedModel` and used during serving. Should be a dict or None.
+    predictions:  Predictions `Tensor` or dict of `Tensor`.
+
+  Returns:
+    Valid export_outputs dict
+
+  Raises:
+    TypeError: if export_outputs is not a dict or its values are not
+      ExportOutput instances.
+  """
+  if export_outputs is None:
+    default_output = export_output_lib.PredictOutput(predictions)
+    export_outputs = {
+        signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: default_output}
+
+  if not isinstance(export_outputs, dict):
+    raise TypeError('export_outputs must be dict, given: {}'.format(
+        export_outputs))
+  for v in six.itervalues(export_outputs):
+    if not isinstance(v, export_output_lib.ExportOutput):
+      raise TypeError(
+          'Values in export_outputs must be ExportOutput objects. '
+          'Given: {}'.format(export_outputs))
+
+  _maybe_add_default_serving_output(export_outputs)
+
+  return export_outputs
+
+
+def _maybe_add_default_serving_output(export_outputs):
+  """Add a default serving output to the export_outputs if not present.
+
+  Args:
+    export_outputs: Describes the output signatures to be exported to
+      `SavedModel` and used during serving. Should be a dict.
+
+  Returns:
+    export_outputs dict with default serving signature added if necessary
+
+  Raises:
+    ValueError: if multiple export_outputs were provided without a default
+      serving key.
+  """
+  if len(export_outputs) == 1:
+    (key, value), = export_outputs.items()
+    if key != signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+      export_outputs[
+          signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = value
+  if len(export_outputs) > 1:
+    if (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
+        not in export_outputs):
+      raise ValueError(
+          'Multiple export_outputs were provided, but none of them is '
+          'specified as the default.  Do this by naming one of them with '
+          'signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY.')
+
+  return export_outputs
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt
index 164edbd66a..5cd6851278 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.pbtxt
@@ -4,4 +4,12 @@ tf_module {
     name: "PeepholeLSTMCell"
     mtype: "<type \'type\'>"
   }
+  member_method {
+    name: "export"
+    argspec: "args=[\'model\', \'saved_model_path\', \'custom_objects\', \'as_text\', \'input_signature\', \'serving_only\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "load_from_saved_model"
+    argspec: "args=[\'saved_model_path\'], varargs=None, keywords=None, defaults=None"
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt
index 164edbd66a..5cd6851278 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.pbtxt
@@ -4,4 +4,12 @@ tf_module {
     name: "PeepholeLSTMCell"
     mtype: "<type \'type\'>"
   }
+  member_method {
+    name: "export"
+    argspec: "args=[\'model\', \'saved_model_path\', \'custom_objects\', \'as_text\', \'input_signature\', \'serving_only\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'False\'], "
+  }
+  member_method {
+    name: "load_from_saved_model"
+    argspec: "args=[\'saved_model_path\'], varargs=None, keywords=None, defaults=None"
+  }
 }
-- 
GitLab


From 89ec7e3612ec68a62227cf91b5566df46440ab26 Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Fri, 14 Dec 2018 17:00:34 -0800
Subject: [PATCH 640/873] Address PR comments

---
 tensorflow/compiler/jit/xla_gpu_device.cc       | 16 +++++++---------
 tensorflow/compiler/xla/client/client_library.h |  3 ++-
 tensorflow/compiler/xla/service/backend.cc      |  2 +-
 tensorflow/compiler/xla/service/backend.h       |  3 ++-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index 0350e8b36d..a0899ffed9 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -44,7 +44,7 @@ xla::StatusOr<absl::optional<std::set<int>>>
 XlaGpuDeviceFactory::ParseVisibleDeviceList(const string& visible_device_list) {
   std::set<int> gpu_ids;
   if (visible_device_list.empty()) {
-    return absl::optional<std::set<int>>(absl::nullopt);
+    return {{absl::nullopt}};
   }
   const std::vector<string> visible_devices =
       absl::StrSplit(visible_device_list, ',');
@@ -58,7 +58,7 @@ XlaGpuDeviceFactory::ParseVisibleDeviceList(const string& visible_device_list) {
     }
     gpu_ids.insert(platform_gpu_id);
   }
-  return absl::optional<std::set<int>>(gpu_ids);
+  return {{gpu_ids}};
 }
 
 Status XlaGpuDeviceFactory::CreateDevices(
@@ -83,18 +83,16 @@ Status XlaGpuDeviceFactory::CreateDevices(
   }
   string allowed_gpus =
       session_options.config.gpu_options().visible_device_list();
-  auto parsed_gpus = ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
+  absl::optional<std::set<int>> gpu_ids = ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
   // We want to fill the gpu_ids set with all devices if config string is empty.
-  std::set<int> gpu_ids;
   int num_visible_devices = platform.ValueOrDie()->VisibleDeviceCount();
-  if (!parsed_gpus) {
+  if (!gpu_ids) {
+    gpu_ids.emplace();
     for (int i = 0; i < num_visible_devices; ++i) {
-      gpu_ids.insert(i);
+      gpu_ids->insert(i);
     }
-  } else {
-    gpu_ids = *parsed_gpus;
   }
-  for (int i : gpu_ids) {
+  for (int i : *gpu_ids) {
     XlaDevice::Options options;
     options.platform = platform.ValueOrDie();
     options.device_name_prefix = name_prefix;
diff --git a/tensorflow/compiler/xla/client/client_library.h b/tensorflow/compiler/xla/client/client_library.h
index c02dcf7793..62d225c6c2 100644
--- a/tensorflow/compiler/xla/client/client_library.h
+++ b/tensorflow/compiler/xla/client/client_library.h
@@ -63,7 +63,8 @@ class LocalClientOptions {
   LocalClientOptions& set_intra_op_parallelism_threads(int num_threads);
   int intra_op_parallelism_threads() const;
 
-  // Sets the allowed_devices set for creation of stream executors.
+  // Sets the allowed_devices set for selectively constructing stream executors
+  // on the platform.
   LocalClientOptions& set_allowed_devices(
       const absl::optional<std::set<int>>& allowed_devices);
   const absl::optional<std::set<int>>& allowed_devices() const;
diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc
index cf4a24d7b4..cdfee66c8e 100644
--- a/tensorflow/compiler/xla/service/backend.cc
+++ b/tensorflow/compiler/xla/service/backend.cc
@@ -183,7 +183,7 @@ StatusOr<se::StreamExecutor*> Backend::stream_executor(
         device_ordinal, stream_executors_.back()->device_ordinal());
   }
   for (auto* executor : stream_executors_) {
-    if (executor && executor->device_ordinal() == device_ordinal) {
+    if (executor->device_ordinal() == device_ordinal) {
       return executor;
     }
   }
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index 97ac02fad3..7ca993fb26 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h
@@ -54,7 +54,8 @@ class BackendOptions {
   BackendOptions& set_intra_op_parallelism_threads(int num_threads);
   int intra_op_parallelism_threads() const;
 
-  // Sets the allowed_devices set for creation of stream executors.
+  // Sets the allowed_devices for selectively constructing stream executors
+  // on the platform.
   BackendOptions& set_allowed_devices(
       const absl::optional<std::set<int>>& allowed_devices);
   const absl::optional<std::set<int>>& allowed_devices() const;
-- 
GitLab


From aab318560cb26ea7e9e9ea5676b215f7b5e5019e Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Fri, 14 Dec 2018 17:10:41 -0800
Subject: [PATCH 641/873] [XLA] Instead of ever recomputing reachability during
 fusion, use the original reachability as a way to filter out definite
 reachability.

PiperOrigin-RevId: 225627515
---
 tensorflow/compiler/xla/service/BUILD         |  1 +
 .../xla/service/instruction_fusion.cc         | 48 ++++++++++++++-----
 2 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index f20121e490..55cadfdec6 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1415,6 +1415,7 @@ cc_library(
         "//tensorflow/core:lib",
         "@com_google_absl//absl/algorithm:container",
         "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 7559ed1bab..3ea0b81d0d 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
 #include "absl/memory/memory.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/service/fusion_queue.h"
@@ -570,19 +571,42 @@ HloInstruction* InstructionFusion::FuseIntoMultiOutput(
 
 bool InstructionFusion::MultiOutputFusionCreatesCycle(
     HloInstruction* producer, HloInstruction* consumer) {
-  auto is_reachable = [&](const HloInstruction* a, const HloInstruction* b) {
-    // A consumer operand may have been multi-output fused into a parallel
-    // consumer and thus be missing from the original reachability map.
-    if (!reachability_->IsPresent(a) || !reachability_->IsPresent(b)) {
-      reachability_ = HloReachabilityMap::Build(consumer->parent());
+  absl::flat_hash_set<int> operands;
+  for (const HloInstruction* operand : consumer->operands()) {
+    if (operand == producer) {
+      continue;
+    }
+
+    // If the reachability map already contains the producer and the operand of
+    // the consumer, and the producer can reach the operand, then we know for
+    // sure MultiOutputFusion would create a cycle. If not, we need to do a DFS
+    // traversal of the computation to verify that this multioutput fusion would
+    // not create a cycle.
+    if (reachability_->IsPresent(producer) &&
+        reachability_->IsPresent(operand) &&
+        reachability_->IsReachable(producer, operand)) {
+      return true;
     }
-    return reachability_->IsReachable(a, b);
-  };
-  return absl::c_any_of(consumer->operands(),
-                        [&](const HloInstruction* consumer_operand) {
-                          return consumer_operand != producer &&
-                                 is_reachable(producer, consumer_operand);
-                        });
+    operands.insert(operand->unique_id());
+  }
+
+  // Do a DFS on the producer to see if any of the other consumer operands are
+  // reachable in the current state of the graph.
+  std::vector<HloInstruction*> worklist = producer->users();
+  absl::flat_hash_set<int> visits;
+  while (!worklist.empty()) {
+    const HloInstruction* user = worklist.back();
+    worklist.pop_back();
+    if (operands.count(user->unique_id()) != 0) {
+      return true;
+    }
+    if (visits.count(user->unique_id()) == 0) {
+      visits.insert(user->unique_id());
+      worklist.insert(worklist.end(), user->users().begin(),
+                      user->users().end());
+    }
+  }
+  return false;
 }
 
 bool InstructionFusion::ShouldFuse(HloInstruction* consumer,
-- 
GitLab


From 2a0561d6875e43410275b8b9d3edb99e1bb14fab Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 17:14:20 -0800
Subject: [PATCH 642/873] Use logits directly when using sparse categorical
 cross entropy loss function followed by softmax activation function.

PiperOrigin-RevId: 225627871
---
 tensorflow/python/keras/backend.py            | 65 ++++++++++++-------
 .../python/keras/engine/training_test.py      | 65 +++++++++++++++++++
 tensorflow/python/keras/losses_test.py        | 39 +++++++++++
 tensorflow/python/keras/optimizers_test.py    |  4 +-
 4 files changed, 147 insertions(+), 26 deletions(-)

diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 095273071f..23623e6e2c 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -3856,19 +3856,23 @@ def categorical_crossentropy(target, output, from_logits=False, axis=-1):
   Raises:
       ValueError: if `axis` is neither -1 nor one of the axes of `output`.
   """
-  rank = len(output.shape)
-  axis = axis % rank
-  # Note: nn.softmax_cross_entropy_with_logits_v2
-  # expects logits, Keras expects probabilities.
   if not from_logits:
-    # scale preds so that the class probas of each sample sum to 1
-    output = output / math_ops.reduce_sum(output, axis, True)
-    # manual computation of crossentropy
-    epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype)
-    output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
-    return -math_ops.reduce_sum(target * math_ops.log(output), axis)
-  else:
-    return nn.softmax_cross_entropy_with_logits_v2(labels=target, logits=output)
+    if context.executing_eagerly() or output.op.type != 'Softmax':
+      axis = axis % len(output.shape)
+      # scale preds so that the class probas of each sample sum to 1
+      output = output / math_ops.reduce_sum(output, axis, True)
+      # manual computation of crossentropy
+      epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype)
+      output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
+      return -math_ops.reduce_sum(target * math_ops.log(output), axis)
+    else:
+      # When softmax activation function is used for output operation, we
+      # use logits from the softmax function directly to compute loss in order
+      # to prevent collapsing zero when training.
+      # See b/117284466
+      assert len(output.op.inputs) == 1
+      output = output.op.inputs[0]
+  return nn.softmax_cross_entropy_with_logits_v2(labels=target, logits=output)
 
 
 @tf_export('keras.backend.sparse_categorical_crossentropy')
@@ -3892,19 +3896,25 @@ def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1):
   Raises:
       ValueError: if `axis` is neither -1 nor one of the axes of `output`.
   """
+  if not from_logits:
+    if context.executing_eagerly() or output.op.type != 'Softmax':
+      epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype)
+      output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
+      output = math_ops.log(output)
+    else:
+      # When softmax activation function is used for output operation, we
+      # use logits from the softmax function directly to compute loss in order
+      # to prevent collapsing zero when training.
+      # See b/117284466
+      assert len(output.op.inputs) == 1
+      output = output.op.inputs[0]
+
   rank = len(output.shape)
   axis = axis % rank
   if axis != rank - 1:
     permutation = list(range(axis)) + list(range(axis + 1, rank)) + [axis]
     output = array_ops.transpose(output, perm=permutation)
 
-  # Note: nn.sparse_softmax_cross_entropy_with_logits
-  # expects logits, Keras expects probabilities.
-  if not from_logits:
-    epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype)
-    output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
-    output = math_ops.log(output)
-
   output_shape = output.shape
   targets = cast(flatten(target), 'int64')
   logits = array_ops.reshape(output, [-1, int(output_shape[-1])])
@@ -3931,13 +3941,18 @@ def binary_crossentropy(target, output, from_logits=False):
   Returns:
       A tensor.
   """
-  # Note: nn.sigmoid_cross_entropy_with_logits
-  # expects logits, Keras expects probabilities.
   if not from_logits:
-    # transform back to logits
-    epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype)
-    output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
-    output = math_ops.log(output / (1 - output))
+    if context.executing_eagerly() or output.op.type != 'Sigmoid':
+      # transform back to logits
+      epsilon_ = _to_tensor(epsilon(), output.dtype.base_dtype)
+      output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
+      output = math_ops.log(output / (1 - output))
+    else:
+      # When sigmoid activation function is used for output operation, we
+      # use logits from the sigmoid function directly to compute loss in order
+      # to prevent collapsing zero when training.
+      assert len(output.op.inputs) == 1
+      output = output.op.inputs[0]
   return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
 
 
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index a39d433982..887e3b84b5 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -732,6 +732,71 @@ class TrainingTest(keras_parameterized.TestCase):
     self.assertAllEqual([[6], [8], [10], [12]],
                         model.predict(dataset_two, steps=2))
 
+  def test_training_on_sparse_categorical_crossentropy_loss_with_softmax(self):
+    with context.eager_mode():
+      np.random.seed(1337)
+      train_x = np.ones((100, 4))
+      train_y = np.random.randint(0, 1, size=(100, 1))
+
+      reference_model = testing_utils.get_small_sequential_mlp(16, 2,
+                                                               input_dim=4)
+      reference_model.compile(loss='sparse_categorical_crossentropy',
+                              optimizer=RMSPropOptimizer(learning_rate=0.001),
+                              run_eagerly=True)
+      fixed_weights = reference_model.get_weights()
+      reference_model_loss = reference_model.train_on_batch(train_x, train_y)
+
+      test_model = testing_utils.get_small_sequential_mlp(16, 2, input_dim=4)
+      test_model.compile(loss='sparse_categorical_crossentropy',
+                         optimizer=RMSPropOptimizer(learning_rate=0.001),
+                         run_eagerly=False)
+      test_model.set_weights(fixed_weights)
+      test_model_loss = test_model.train_on_batch(train_x, train_y)
+      self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4)
+
+  def test_training_on_categorical_crossentropy_loss_with_softmax(self):
+    with context.eager_mode():
+      np.random.seed(1337)
+      train_x = np.ones((100, 4))
+      train_y = keras.utils.to_categorical(np.random.randint(0, 1,
+                                                             size=(100, 1)), 2)
+
+      reference_model = testing_utils.get_small_sequential_mlp(16, 2,
+                                                               input_dim=4)
+      reference_model.compile(loss='categorical_crossentropy',
+                              optimizer=RMSPropOptimizer(learning_rate=0.001),
+                              run_eagerly=True)
+      fixed_weights = reference_model.get_weights()
+      reference_model_loss = reference_model.train_on_batch(train_x, train_y)
+
+      test_model = testing_utils.get_small_sequential_mlp(16, 2, input_dim=4)
+      test_model.compile(loss='categorical_crossentropy',
+                         optimizer=RMSPropOptimizer(learning_rate=0.001),
+                         run_eagerly=False)
+      test_model.set_weights(fixed_weights)
+      test_model_loss = test_model.train_on_batch(train_x, train_y)
+      self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4)
+
+  def test_training_on_binary_crossentropy_loss(self):
+    with context.eager_mode():
+      train_x = np.ones((100, 4), dtype=np.float32)
+      train_y = np.ones((100, 1), dtype=np.float32)
+      reference_model = testing_utils.get_small_sequential_mlp(16, 1,
+                                                               input_dim=4)
+      reference_model.compile(loss='binary_crossentropy',
+                              optimizer=RMSPropOptimizer(learning_rate=0.001),
+                              run_eagerly=True)
+      fixed_weights = reference_model.get_weights()
+      reference_model_loss = reference_model.train_on_batch(train_x, train_y)
+
+      test_model = testing_utils.get_small_sequential_mlp(16, 1, input_dim=4)
+      test_model.compile(loss='binary_crossentropy',
+                         optimizer=RMSPropOptimizer(learning_rate=0.001),
+                         run_eagerly=False)
+      test_model.set_weights(fixed_weights)
+      test_model_loss = test_model.train_on_batch(train_x, train_y)
+      self.assertAlmostEqual(test_model_loss, reference_model_loss, places=4)
+
 
 class TestExceptionsAndWarnings(keras_parameterized.TestCase):
 
diff --git a/tensorflow/python/keras/losses_test.py b/tensorflow/python/keras/losses_test.py
index d2791cdcd3..bc040fb685 100644
--- a/tensorflow/python/keras/losses_test.py
+++ b/tensorflow/python/keras/losses_test.py
@@ -95,6 +95,45 @@ class KerasLossesTest(test.TestCase):
       objective_output = keras.losses.sparse_categorical_crossentropy(y_a, y_b)
       assert keras.backend.eval(objective_output).shape == (6,)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_categorical_crossentropy_loss(self):
+    target = keras.backend.variable(np.random.randint(0, 1, (5, 1)))
+    logits = keras.backend.variable(np.random.random((5, 1)))
+    softmax_output = keras.backend.softmax(logits)
+    output_from_logit = keras.losses.categorical_crossentropy(
+        target, logits, from_logits=True)
+    output_from_softmax = keras.losses.categorical_crossentropy(
+        target, softmax_output)
+    np.testing.assert_allclose(
+        keras.backend.eval(output_from_logit),
+        keras.backend.eval(output_from_softmax), atol=1e-5)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_sparse_categorical_crossentropy_loss(self):
+    target = keras.backend.variable(np.random.randint(0, 1, (5, 1)))
+    logits = keras.backend.variable(np.random.random((5, 1)))
+    softmax_output = keras.backend.softmax(logits)
+    output_from_logit = keras.losses.sparse_categorical_crossentropy(
+        target, logits, from_logits=True)
+    output_from_softmax = keras.losses.sparse_categorical_crossentropy(
+        target, softmax_output)
+    np.testing.assert_allclose(
+        keras.backend.eval(output_from_logit),
+        keras.backend.eval(output_from_softmax), atol=1e-5)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_binary_crossentropy_loss(self):
+    target = keras.backend.variable(np.random.randint(0, 1, (5, 1)))
+    logits = keras.backend.variable(np.random.random((5, 1)))
+    sigmoid_output = keras.backend.sigmoid(logits)
+    output_from_logit = keras.losses.binary_crossentropy(
+        target, logits, from_logits=True)
+    output_from_sigmoid = keras.losses.binary_crossentropy(
+        target, sigmoid_output)
+    np.testing.assert_allclose(
+        keras.backend.eval(output_from_logit),
+        keras.backend.eval(output_from_sigmoid), atol=1e-5)
+
   def test_serialization(self):
     fn = keras.losses.get('mse')
     config = keras.losses.serialize(fn)
diff --git a/tensorflow/python/keras/optimizers_test.py b/tensorflow/python/keras/optimizers_test.py
index 33d65e690a..18a20567ce 100644
--- a/tensorflow/python/keras/optimizers_test.py
+++ b/tensorflow/python/keras/optimizers_test.py
@@ -127,7 +127,9 @@ class KerasOptimizersTest(test.TestCase):
   def test_adam(self):
     with self.cached_session():
       _test_optimizer(keras.optimizers.Adam())
-      _test_optimizer(keras.optimizers.Adam(decay=1e-3))
+      # Accuracy seems dependent on the seed initialization.
+      # TODO(b/121051441): fix test flakiness.
+      _test_optimizer(keras.optimizers.Adam(decay=1e-3), target=0.73)
       _test_optimizer(keras.optimizers.Adam(amsgrad=True))
 
   def test_adamax(self):
-- 
GitLab


From c7c0a76f1d6b8ac2057434fbf638b77993c6b88e Mon Sep 17 00:00:00 2001
From: Mingsheng Hong <hongm@google.com>
Date: Fri, 14 Dec 2018 17:19:41 -0800
Subject: [PATCH 643/873] Extended experimental API to allow the configuration
 of the number of CPU devices.

PiperOrigin-RevId: 225628420
---
 tensorflow/c/c_api_experimental.cc | 10 +++++++---
 tensorflow/c/c_api_experimental.h  |  5 +++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index 38e29aa74a..81343f7bc0 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -66,7 +66,8 @@ void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) {
 }
 
 TF_Buffer* TF_CreateConfig(unsigned char enable_xla_compilation,
-                           unsigned char gpu_memory_allow_growth) {
+                           unsigned char gpu_memory_allow_growth,
+                           unsigned int num_cpu_devices) {
   tensorflow::ConfigProto config;
   auto* optimizer_options =
       config.mutable_graph_options()->mutable_optimizer_options();
@@ -87,6 +88,8 @@ TF_Buffer* TF_CreateConfig(unsigned char enable_xla_compilation,
   auto* gpu_options = config.mutable_gpu_options();
   gpu_options->set_allow_growth(gpu_memory_allow_growth);
 
+  (*config.mutable_device_count())["CPU"] = num_cpu_devices;
+
   // TODO(b/113217601): This is needed for EagerContext::runner_ to use a
   // threadpool, so that we avoid the possibility of running the runner_ in the
   // threadpool of GPU event mgr, as that can trigger more callbacks to be
@@ -8535,8 +8538,9 @@ TFE_Context* TFE_CreateContextFromSession(TF_Session* session,
 
   // Reduce GPU memory allocation, and set appropriate config options for TFE
   // context.
-  auto* config =
-      TF_CreateConfig(/*xla*/ false, /* gpu_memory_allow_growth */ true);
+  auto* config = TF_CreateConfig(
+      /*xla*/ false, /* gpu_memory_allow_growth */ true, /* num_cpu_devices */
+      10);
   TFE_ContextOptionsSetConfig(opts, config->data, config->length, status);
   if (!status->status.ok()) {
     CHECK(!config);
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index 3e3a485eb7..cb7a146846 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -67,9 +67,10 @@ TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options,
 // a) ConfigProto.optimizer_options.global_jit_level is set to to ON_1 if
 // `enable_xla_compilation` is non-zero, and OFF otherwise.
 // b) ConfigProto.gpu_options.allow_growth is set to `gpu_memory_allow_growth`.
+// c) ConfigProto.device_count is set to `num_cpu_devices`.
 TF_CAPI_EXPORT extern TF_Buffer* TF_CreateConfig(
-    unsigned char enable_xla_compilation,
-    unsigned char gpu_memory_allow_growth);
+    unsigned char enable_xla_compilation, unsigned char gpu_memory_allow_growth,
+    unsigned int num_cpu_devices);
 
 // Create a serialized tensorflow.RunOptions proto, where RunOptions.trace_level
 // is set to FULL_TRACE if `enable_full_trace` is non-zero, and NO_TRACE
-- 
GitLab


From 5138caa12031a953ff877b0a785233b957e0993a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 14 Dec 2018 19:03:12 -0800
Subject: [PATCH 644/873] This CL builds the capability of traversing the
 python graph to determine the original input nodes. We also make the error
 message more readable by removing common path prefix from stack traces

PiperOrigin-RevId: 225636415
---
 .../python/framework/error_interpolation.py   | 148 ++++++++++++++++--
 .../framework/error_interpolation_test.py     |  40 +++++
 2 files changed, 174 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/framework/error_interpolation.py b/tensorflow/python/framework/error_interpolation.py
index 37a634d806..557f947291 100644
--- a/tensorflow/python/framework/error_interpolation.py
+++ b/tensorflow/python/framework/error_interpolation.py
@@ -29,6 +29,7 @@ import re
 
 import six
 
+from tensorflow.python.framework.ops import Tensor
 from tensorflow.python.util import tf_stack
 
 _NAME_REGEX = r"[A-Za-z0-9.][A-Za-z0-9_.\-/]*?"
@@ -216,11 +217,13 @@ def _get_defining_frame_from_op(op):
   return frame
 
 
-def compute_field_dict(op):
+def compute_field_dict(op, strip_file_prefix=""):
   """Return a dictionary mapping interpolation tokens to values.
 
   Args:
     op: op.Operation object having a _traceback member.
+    strip_file_prefix: The common path in the stacktrace. We remove the prefix
+    from the file names.
 
   Returns:
     A dictionary mapping string tokens to string values.  The keys are shown
@@ -248,6 +251,8 @@ def compute_field_dict(op):
   """
   frame = _get_defining_frame_from_op(op)
   filename = frame[tf_stack.TB_FILENAME]
+  if filename.startswith(strip_file_prefix):
+    filename = filename[len(strip_file_prefix):]
   lineno = frame[tf_stack.TB_LINENO]
   defined_at = " (defined at %s:%d)" % (filename, lineno)
   colocation_summary = _compute_colocation_summary_from_op(op)
@@ -265,11 +270,110 @@ def compute_field_dict(op):
   return field_dict
 
 
+def _common_prefix(all_ops):
+  """Determines the common prefix from the paths of the stacktrace of 'all_ops'.
+
+  For example, if the paths are '/foo/bar/baz/' and '/foo/car', this would
+  return '/foo'.
+
+  Args:
+    all_ops: All the input nodes in the form of a list of lists of ops.
+
+  Returns:
+    The common prefix.
+  """
+  files = set()
+  for ops in all_ops:
+    if ops is None:
+      continue
+    for op in ops:
+      # pylint: disable=protected-access
+      tf_traceback = tf_stack.convert_stack(op._traceback)
+      # pylint: enable=protected-access
+      for frame in tf_traceback:
+        filename = frame[tf_stack.TB_FILENAME]
+        if "<embedded" not in filename:
+          files.add(filename)
+  return os.path.split(os.path.commonprefix(list(files)))[0]
+
+
+def _sources_for_node(name, graph):
+  """Gets the top-level root input nodes for 'name' node.
+
+  We recursively traverse the graph from 'name' node to its inputs and collect
+  all the nodes which don't have any inputs.
+
+  Args:
+    name: The name of the node.
+    graph: The graph containing the node.
+
+  Returns:
+    The unique top-level root input nodes.
+  """
+  def _helper(name, graph, seen_names, inputs):
+    """Recursive helper. 'seen_names' and 'inputs' are mutated."""
+    if name.startswith("^"):
+      name = name[1:]
+    try:
+      op = graph.as_graph_element(name)
+    except KeyError:
+      return
+    if isinstance(op, Tensor):
+      op = op.op
+    name = op.name
+    if name in seen_names:
+      return
+    seen_names.add(name)
+    if not op.node_def.input:
+      inputs.add(op)
+      return
+    for n in op.node_def.input:
+      _helper(n, graph, seen_names, inputs)
+
+  names = set()
+  inputs = set()
+  _helper(name, graph, names, inputs)
+  return list(inputs)
+
+
+def _build_error_message(op, input_ops, common_prefix):
+  """Returns the formatted error message for the given op.
+
+  Args:
+    op: The node.
+    input_ops: The input nodes to the 'op' node
+    common_prefix: The prefix path common to the stacktrace of inputs.
+
+  Returns:
+    The formatted error message for the given op. The error message also
+    includes the information about the input sources for the given op.
+  """
+  field_dict = compute_field_dict(op, common_prefix)
+  msg = "node %s%s " % (op.name, field_dict["defined_at"])
+  input_debug_info = []
+  # This stores the line numbers that we have already printed.
+  done = set()
+  done.add(field_dict["defined_at"])
+  for op_inp in input_ops:
+    field_dict_inp = compute_field_dict(op_inp, common_prefix)
+    if field_dict_inp["defined_at"] not in done:
+      input_debug_info.append(
+          " %s%s" % (op_inp.name, field_dict_inp["defined_at"]))
+      done.add(field_dict_inp["defined_at"])
+  if input_debug_info:
+    end_msg = ("\nInput Source operations connected to node %s:\n") % (op.name)
+    end_msg += "\t\n".join(input_debug_info)
+  else:
+    end_msg = ""
+  return msg, end_msg
+
+
 def interpolate(error_message, graph):
   """Interpolates an error message.
 
   The error message can contain tags of the form `{{type name}}` which will be
-  replaced.
+  replaced. For example: "{{node <name>}}" would get expanded to:
+  "node <name>(defined at <path>)".
 
   Args:
     error_message: A string to interpolate.
@@ -281,25 +385,41 @@ def interpolate(error_message, graph):
   """
   seps, tags = _parse_message(error_message)
   subs = []
-  end_msg = ""
+  end_msg = collections.defaultdict(list)
+  tagged_ops = []
 
   for t in tags:
     try:
       op = graph.get_operation_by_name(t.name)
     except KeyError:
       op = None
-
-    msg = "{{%s %s}}" % (t.type, t.name)
-    if op is not None:
-      field_dict = compute_field_dict(op)
-      if t.type == "node":
-        msg = "node %s%s " % (t.name, field_dict["defined_at"])
-      elif t.type == "colocation_node":
-        msg = "node %s%s having device %s " % (t.name, field_dict["defined_at"],
-                                               field_dict["devices"])
-        end_msg += "\n\n" + field_dict["devs_and_colocs"]
+    if op is None:
+      tagged_ops.append(None)
+    else:
+      tagged_ops.append([op] + _sources_for_node(op.name, graph))
+
+  common_prefix = _common_prefix(tagged_ops)
+  for tag, ops in zip(tags, tagged_ops):
+    msg = "{{%s %s}}" % (tag.type, tag.name)
+    if ops is not None:
+      if tag.type == "node":
+        msg, source_msg = _build_error_message(ops[0], ops[1:], common_prefix)
+        if source_msg:
+          end_msg["source_nodes"].append(source_msg)
+      elif tag.type == "colocation_node":
+        field_dict = compute_field_dict(ops[0], common_prefix)
+        msg = "node %s%s placed on device %s " % (
+            ops[0].name, field_dict["defined_at"], field_dict["devices"])
+        end_msg["colocations"].append(field_dict["devs_and_colocs"])
     subs.append(msg)
-  subs.append(end_msg)
+
+  if "source_nodes" in end_msg:
+    subs.append("\n\nErrors may have originated from an input operation.")
+    subs.append("\n".join(end_msg["source_nodes"]))
+    end_msg.pop("source_nodes", None)
+  for k, messages in end_msg.items():
+    subs.append("Additional information about %s:" % k)
+    subs.append("\n".join(messages))
 
   return "".join(
       itertools.chain(*six.moves.zip_longest(seps, subs, fillvalue="")))
diff --git a/tensorflow/python/framework/error_interpolation_test.py b/tensorflow/python/framework/error_interpolation_test.py
index 9eaa4a5f2d..d835ada086 100644
--- a/tensorflow/python/framework/error_interpolation_test.py
+++ b/tensorflow/python/framework/error_interpolation_test.py
@@ -19,12 +19,14 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import re
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import error_interpolation
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.framework import traceable_stack
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import tf_stack
 
@@ -195,6 +197,44 @@ class InterpolateFilenamesAndLineNumbersTest(test.TestCase):
     self.assertRegexpMatches(interpolated_string, "constant_op.py:[0-9]+.*")
 
 
+@test_util.run_v1_only("b/120545219")
+class InputNodesTest(test.TestCase):
+
+  def setUp(self):
+    # Add nodes to the graph for retrieval by name later.
+    one = constant_op.constant(1, name="One")
+    two = constant_op.constant(2, name="Two")
+    three = math_ops.add(one, two, name="Three")
+    self.graph = three.graph
+
+    # Change the list of bad file substrings so that constant_op.py is chosen
+    # as the defining stack frame for constant_op.constant ops.
+    self.old_bad_strings = error_interpolation._BAD_FILE_SUBSTRINGS
+    error_interpolation._BAD_FILE_SUBSTRINGS = [
+        "%sops.py" % os.sep,
+        "%sutil" % os.sep,
+    ]
+
+  def tearDown(self):
+    error_interpolation._BAD_FILE_SUBSTRINGS = self.old_bad_strings
+
+  def testNoInputs(self):
+    two_tags_with_seps = ";;;{{node One}},,,{{node Two}};;;"
+    interpolated_string = error_interpolation.interpolate(
+        two_tags_with_seps, self.graph)
+    expected_regex = (
+        r"^;;;.*constant_op.py:[0-9]+\) ,,,.*constant_op.py:[0-9]+\) ;;;$")
+    self.assertRegexpMatches(interpolated_string, expected_regex)
+
+  def testBasicInputs(self):
+    tag = ";;;{{node Three}};;;"
+    interpolated_string = error_interpolation.interpolate(tag, self.graph)
+    expected_regex = re.compile(
+        r"^;;;.*op_def_library.py:[0-9]+\) ;;;.*Input.*constant_op.py:[0-9]+\)",
+        re.DOTALL)
+    self.assertRegexpMatches(interpolated_string, expected_regex)
+
+
 @test_util.run_v1_only("b/120545219")
 class InterpolateDeviceSummaryTest(test.TestCase):
 
-- 
GitLab


From 65470e7364d9566accafc9df52349533cdc3ea0a Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Fri, 14 Dec 2018 19:50:30 -0800
Subject: [PATCH 645/873] Merge status values when aggregating status from
 executor or multi-graph runner.

PiperOrigin-RevId: 225639123
---
 tensorflow/compiler/tests/xla_ops_test.py     |   4 +-
 tensorflow/core/common_runtime/executor.h     |  28 +--
 .../distributed_runtime/master_session.cc     |  13 +-
 tensorflow/core/distributed_runtime/worker.cc |   2 +-
 tensorflow/core/lib/core/status.cc            | 214 +++++++++++++-----
 tensorflow/core/lib/core/status.h             |  20 ++
 tensorflow/core/lib/core/status_test.cc       |  44 ++++
 7 files changed, 234 insertions(+), 91 deletions(-)

diff --git a/tensorflow/compiler/tests/xla_ops_test.py b/tensorflow/compiler/tests/xla_ops_test.py
index 4cf88fc523..28274ff799 100644
--- a/tensorflow/compiler/tests/xla_ops_test.py
+++ b/tensorflow/compiler/tests/xla_ops_test.py
@@ -319,7 +319,7 @@ class XlaOpsTest(xla_test.XLATestCase, parameterized.TestCase):
         session.run(output)
       self.assertRegexpMatches(
           invalid_arg_error.exception.message,
-          (r'^start_indices must be a vector with length equal to input rank, '
+          (r'start_indices must be a vector with length equal to input rank, '
            r'but input rank is 3 and start_indices has shape \[2\].*'))
 
   def testDynamicSliceWithIncorrectSizeIndicesShape(self):
@@ -332,7 +332,7 @@ class XlaOpsTest(xla_test.XLATestCase, parameterized.TestCase):
         session.run(output)
       self.assertRegexpMatches(
           invalid_arg_error.exception.message,
-          (r'^size_indices must be a vector with length equal to input rank, '
+          (r'size_indices must be a vector with length equal to input rank, '
            r'but input rank is 3 and size_indices has shape \[2\].*'))
 
 
diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h
index 3c0f18d50a..02930168a4 100644
--- a/tensorflow/core/common_runtime/executor.h
+++ b/tensorflow/core/common_runtime/executor.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/framework/session_state.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
@@ -171,20 +172,7 @@ class ExecutorBarrier {
 
   mutable mutex mu_;
   int pending_ GUARDED_BY(mu_) = 0;
-  Status status_ GUARDED_BY(mu_);
-
-  void MergeStatusLocked(const Status& s) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    if (s.ok()) {
-      return;
-    }
-
-    // Prefer primary failures over cancellations.  A cancellation may finish
-    // _before_ the original status is propagated; we override it in this case.
-    if (status_.ok() ||
-        str_util::StrContains(status_.error_message(), "[CHILD]")) {
-      status_ = s;
-    }
-  }
+  StatusGroup status_group_ GUARDED_BY(mu_);
 
   void WhenDone(const Status& s) {
     Rendezvous* error_rendez = nullptr;
@@ -196,27 +184,25 @@ class ExecutorBarrier {
 
       // If we are the first error encountered, trigger an abort of the
       // Rendezvous object by this thread only.
-      if (status_.ok() && !s.ok()) {
+      if (status_group_.ok() && !s.ok()) {
         error_rendez = rendez_;
         error_rendez->Ref();
       }
 
-      MergeStatusLocked(s);
-
-      if (!status_.ok()) {
-        status = status_;
-      }
+      status_group_.Update(s);
 
       // If this is the last call to WhenDone, call the final callback
       // below.
       if (--pending_ == 0) {
         CHECK(done_cb_ != nullptr);
         std::swap(done, done_cb_);
+        status = status_group_.as_status();
       }
     }
 
     if (error_rendez != nullptr) {
-      error_rendez->StartAbort(status);
+      error_rendez->StartAbort(
+          errors::Aborted("Stopping remaining executors."));
       error_rendez->Unref();
     }
 
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 47f567ea8d..5a524eba76 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -530,7 +530,7 @@ class RunManyGraphs {
 
   Status status() const {
     mutex_lock l(mu_);
-    return status_;
+    return status_group_.as_status();
   }
 
  private:
@@ -538,22 +538,17 @@ class RunManyGraphs {
 
   BlockingCounter pending_;
   mutable mutex mu_;
-  Status status_ GUARDED_BY(mu_);
+  StatusGroup status_group_ GUARDED_BY(mu_);
 
   void ReportBadStatus(const Status& s) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     // Start cancellation if we aren't already in an error state.
-    if (status_.ok()) {
+    if (status_group_.ok()) {
       for (Call& call : calls_) {
         call.opts.StartCancel();
       }
     }
 
-    // Prefer primary failures over cancellations.  A cancellation may finish
-    // _before_ the original status is propagated; we override it in this case.
-    if (status_.ok() ||
-        str_util::StrContains(status_.error_message(), "[CHILD]")) {
-      status_ = s;
-    }
+    status_group_.Update(s);
   }
 
   TF_DISALLOW_COPY_AND_ASSIGN(RunManyGraphs);
diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc
index 8f41856f32..c6e34c568e 100644
--- a/tensorflow/core/distributed_runtime/worker.cc
+++ b/tensorflow/core/distributed_runtime/worker.cc
@@ -104,7 +104,7 @@ void Worker::AbortStep(int64 step_id) {
     // Delay a bit before aborting the step. This way, the root
     // cause may return first back to the client instead of this
     // cancellation generated abort error.
-    rendez->StartAbort(errors::Aborted("[CHILD] Step ", step_id,
+    rendez->StartAbort(errors::Aborted("Step ", step_id,
                                        " cancelled.  Cancelling rendezvous."));
     rendez->Unref();
   });
diff --git a/tensorflow/core/lib/core/status.cc b/tensorflow/core/lib/core/status.cc
index cb2a06e620..7be5b9b513 100644
--- a/tensorflow/core/lib/core/status.cc
+++ b/tensorflow/core/lib/core/status.cc
@@ -15,6 +15,11 @@ limitations under the License.
 
 #include "tensorflow/core/lib/core/status.h"
 #include <stdio.h>
+#include <map>
+#include "tensorflow/core/lib/core/error_codes.pb.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 
 namespace tensorflow {
 
@@ -44,68 +49,72 @@ const string& Status::empty_string() {
   return *empty;
 }
 
+string error_name(error::Code code) {
+  switch (code) {
+    case tensorflow::error::OK:
+      return "OK";
+      break;
+    case tensorflow::error::CANCELLED:
+      return "Cancelled";
+      break;
+    case tensorflow::error::UNKNOWN:
+      return "Unknown";
+      break;
+    case tensorflow::error::INVALID_ARGUMENT:
+      return "Invalid argument";
+      break;
+    case tensorflow::error::DEADLINE_EXCEEDED:
+      return "Deadline exceeded";
+      break;
+    case tensorflow::error::NOT_FOUND:
+      return "Not found";
+      break;
+    case tensorflow::error::ALREADY_EXISTS:
+      return "Already exists";
+      break;
+    case tensorflow::error::PERMISSION_DENIED:
+      return "Permission denied";
+      break;
+    case tensorflow::error::UNAUTHENTICATED:
+      return "Unauthenticated";
+      break;
+    case tensorflow::error::RESOURCE_EXHAUSTED:
+      return "Resource exhausted";
+      break;
+    case tensorflow::error::FAILED_PRECONDITION:
+      return "Failed precondition";
+      break;
+    case tensorflow::error::ABORTED:
+      return "Aborted";
+      break;
+    case tensorflow::error::OUT_OF_RANGE:
+      return "Out of range";
+      break;
+    case tensorflow::error::UNIMPLEMENTED:
+      return "Unimplemented";
+      break;
+    case tensorflow::error::INTERNAL:
+      return "Internal";
+      break;
+    case tensorflow::error::UNAVAILABLE:
+      return "Unavailable";
+      break;
+    case tensorflow::error::DATA_LOSS:
+      return "Data loss";
+      break;
+    default:
+      char tmp[30];
+      snprintf(tmp, sizeof(tmp), "Unknown code(%d)", static_cast<int>(code));
+      return tmp;
+      break;
+  }
+}
+
 string Status::ToString() const {
   if (state_ == nullptr) {
     return "OK";
   } else {
-    char tmp[30];
-    const char* type;
-    switch (code()) {
-      case tensorflow::error::CANCELLED:
-        type = "Cancelled";
-        break;
-      case tensorflow::error::UNKNOWN:
-        type = "Unknown";
-        break;
-      case tensorflow::error::INVALID_ARGUMENT:
-        type = "Invalid argument";
-        break;
-      case tensorflow::error::DEADLINE_EXCEEDED:
-        type = "Deadline exceeded";
-        break;
-      case tensorflow::error::NOT_FOUND:
-        type = "Not found";
-        break;
-      case tensorflow::error::ALREADY_EXISTS:
-        type = "Already exists";
-        break;
-      case tensorflow::error::PERMISSION_DENIED:
-        type = "Permission denied";
-        break;
-      case tensorflow::error::UNAUTHENTICATED:
-        type = "Unauthenticated";
-        break;
-      case tensorflow::error::RESOURCE_EXHAUSTED:
-        type = "Resource exhausted";
-        break;
-      case tensorflow::error::FAILED_PRECONDITION:
-        type = "Failed precondition";
-        break;
-      case tensorflow::error::ABORTED:
-        type = "Aborted";
-        break;
-      case tensorflow::error::OUT_OF_RANGE:
-        type = "Out of range";
-        break;
-      case tensorflow::error::UNIMPLEMENTED:
-        type = "Unimplemented";
-        break;
-      case tensorflow::error::INTERNAL:
-        type = "Internal";
-        break;
-      case tensorflow::error::UNAVAILABLE:
-        type = "Unavailable";
-        break;
-      case tensorflow::error::DATA_LOSS:
-        type = "Data loss";
-        break;
-      default:
-        snprintf(tmp, sizeof(tmp), "Unknown code(%d)",
-                 static_cast<int>(code()));
-        type = tmp;
-        break;
-    }
-    string result(type);
+    string result(error_name(code()));
     result += ": ";
     result += state_->msg;
     return result;
@@ -131,4 +140,93 @@ string* TfCheckOpHelperOutOfLine(const ::tensorflow::Status& v,
   return new string(r);
 }
 
+void StatusGroup::Update(const Status& s) {
+  if (s.ok()) {
+    ++num_ok_;
+  } else {
+    ok_ = false;
+    children_.push_back(s);
+  }
+}
+
+const int kMaxChildMessageSize = 2048;
+
+Status StatusGroup::as_status() const {
+  if (ok_) {
+    return Status::OK();
+  }
+
+  // If there is only one message, or all of the messages are identical, return
+  // the original status.  This reduces verbosity and preserves existing
+  // behavior when possible.
+  bool single_status = true;
+  for (const Status& s : children_) {
+    if (s != children_[0]) {
+      single_status = false;
+      break;
+    }
+  }
+
+  if (single_status) {
+    return children_[0];
+  }
+
+  std::vector<string> fmt;
+
+  // Compute a final output string with status codes sorted by frequency in
+  // increasing order.  This prefers more "interesting" messages over child
+  // messages that may come from cancellation.
+  std::map<error::Code, std::vector<Status>> code_to_status;
+  for (const Status& s : children_) {
+    code_to_status[s.code()].push_back(s);
+  }
+
+  std::vector<std::pair<error::Code, int>> count_vec;
+  count_vec.reserve(code_to_status.size());
+  for (auto& p : code_to_status) {
+    count_vec.push_back(std::make_pair(p.first, p.second.size()));
+  }
+
+  std::sort(
+      count_vec.begin(), count_vec.end(),
+      [](const std::pair<error::Code, int>& a,
+         const std::pair<error::Code, int>& b) { return a.second < b.second; });
+
+  fmt.push_back(
+      strings::Printf("Combined status information from %lu operations:\n",
+                      num_ok_ + children_.size()));
+
+  for (const auto& p : count_vec) {
+    // Deduplicate error messages
+    std::map<string, int> child_errors;
+    for (const Status& s : code_to_status[p.first]) {
+      ++child_errors[s.error_message()];
+    }
+
+    string child_fmt;
+    for (auto& m : child_errors) {
+      child_fmt.append(strings::Printf(
+          "  %s [%dx]",
+          str_util::StringReplace(m.first, "\n", "\n  ", true).c_str(),
+          m.second));
+      child_fmt.append("\n");
+    }
+    // Strip last newline.
+    child_fmt = child_fmt.substr(0, child_fmt.size() - 1);
+
+    if (child_fmt.size() > kMaxChildMessageSize) {
+      child_fmt =
+          strings::StrCat(child_fmt.substr(0, kMaxChildMessageSize), "...");
+    }
+    fmt.push_back(strings::Printf("Status code: %s [%dx]\n%s",
+                                  error_name(p.first).c_str(), p.second,
+                                  child_fmt.c_str()));
+  }
+
+  fmt.push_back(strings::Printf("(%zd successful operations.)", num_ok_));
+
+  // TODO(power): use the least-frequently occurring status for the return code
+  return Status(children_[0].code(), str_util::Join(fmt, "\n"));
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h
index eb0ff555a5..fe3eec1be0 100644
--- a/tensorflow/core/lib/core/status.h
+++ b/tensorflow/core/lib/core/status.h
@@ -97,6 +97,26 @@ class Status {
   void SlowCopyFrom(const State* src);
 };
 
+// Helper class to manage multiple child status values.
+class StatusGroup {
+ public:
+  // Return a merged status with combined child status messages.
+  //
+  // The status code returned is OK if all children were successful, otherwise
+  // the first non-OK child status code is reported.
+  Status as_status() const;
+
+  bool ok() const { return ok_; }
+
+  // Augment this group with the child status `status`.
+  void Update(const Status& status);
+
+ private:
+  bool ok_ = true;
+  size_t num_ok_ = 0;
+  std::vector<Status> children_;
+};
+
 inline Status::Status(const Status& s)
     : state_((s.state_ == NULL) ? NULL : new State(*s.state_)) {}
 
diff --git a/tensorflow/core/lib/core/status_test.cc b/tensorflow/core/lib/core/status_test.cc
index d95d8f20aa..d3296b4fac 100644
--- a/tensorflow/core/lib/core/status_test.cc
+++ b/tensorflow/core/lib/core/status_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/test_benchmark.h"
 
@@ -97,6 +98,49 @@ TEST(Status, EqualsDifferentMessage) {
   ASSERT_NE(a, b);
 }
 
+TEST(StatusGroup, AcceptsFirstCode) {
+  StatusGroup c;
+  const Status internal(errors::Internal("Original error."));
+  c.Update(internal);
+  c.Update(Status::OK());
+  c.Update(Status::OK());
+  c.Update(Status::OK());
+  ASSERT_EQ(c.as_status().code(), internal.code());
+  ASSERT_EQ(c.ok(), false);
+}
+
+TEST(StatusGroup, ContainsChildMessages) {
+  StatusGroup c;
+  const Status internal(errors::Internal("Original error."));
+  const Status cancelled(errors::Cancelled("Cancelled after 10 steps."));
+  const Status aborted(errors::Aborted("Aborted after 10 steps."));
+  c.Update(internal);
+  for (size_t i = 0; i < 5; ++i) {
+    c.Update(cancelled);
+  }
+  for (size_t i = 0; i < 10; ++i) {
+    c.Update(aborted);
+  }
+  for (size_t i = 0; i < 100; ++i) {
+    c.Update(Status::OK());
+  }
+
+  ASSERT_EQ(c.as_status().code(), internal.code());
+  EXPECT_TRUE(str_util::StrContains(c.as_status().error_message(),
+                                    internal.error_message()));
+  EXPECT_TRUE(str_util::StrContains(c.as_status().error_message(),
+                                    cancelled.error_message()));
+  EXPECT_TRUE(str_util::StrContains(c.as_status().error_message(),
+                                    aborted.error_message()));
+  StatusGroup d;
+  d.Update(c.as_status());
+  c.Update(errors::FailedPrecondition("Failed!"));
+  d.Update(c.as_status());
+  c.Update(errors::DataLoss("Data loss!"));
+  d.Update(c.as_status());
+  LOG(INFO) << d.as_status();
+}
+
 static void BM_TF_CHECK_OK(int iters) {
   tensorflow::Status s =
       (iters < 0) ? errors::InvalidArgument("Invalid") : Status::OK();
-- 
GitLab


From 65a0a3a05b53477e09cff1bd027d4942231893c3 Mon Sep 17 00:00:00 2001
From: Tong Shen <endlessroad@google.com>
Date: Fri, 14 Dec 2018 20:49:10 -0800
Subject: [PATCH 646/873] Return a clear error message when outside compilation
 has int64 input/output.

PiperOrigin-RevId: 225642077
---
 .../jit/extract_outside_compilation_pass.cc   | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
index 1906f1ac85..8b01768c49 100644
--- a/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
+++ b/tensorflow/compiler/jit/extract_outside_compilation_pass.cc
@@ -330,6 +330,38 @@ xla::StatusOr<NodeDef> BuildXlaHostComputeNodeDef(
   return new_def;
 }
 
+Status ValidateOutsideCompilationCallNode(Node* call_node) {
+  // DT_INT64 as input/output for outside compilation is not supported yet:
+  // b/120809951.
+  for (const Edge* e : call_node->in_edges()) {
+    if (e->IsControlEdge()) {
+      continue;
+    }
+    DataType dtype = e->src()->output_type(e->src_output());
+    if (dtype == DT_INT64) {
+      return errors::Unimplemented(
+          "int64 input for outside compilation is not supported yet: "
+          "b/120809951. Please cast output of node ",
+          e->src()->DebugString(),
+          " to int32 before feeding it into outside compilation.");
+    }
+  }
+  for (const Edge* e : call_node->out_edges()) {
+    if (e->IsControlEdge()) {
+      continue;
+    }
+    DataType dtype = e->dst()->input_type(e->dst_input());
+    if (dtype == DT_INT64) {
+      return errors::Unimplemented(
+          "int64 output for outside compilation is not supported yet: "
+          "b/120809951. Please cast input of node ",
+          e->dst()->DebugString(),
+          " to int32 before returning it from outside compilation.");
+    }
+  }
+  return Status::OK();
+}
+
 // Replace outside compilation function call node with XlaHostCompute node.
 // If the function call node has no input/output edges, we will just remove it
 // and not create a XlaHostCompute node.
@@ -1517,6 +1549,7 @@ Status ExtractOutsideCompilationForFunction(
     }
   }
   for (Node* n : outside_compilation_nodes) {
+    TF_RETURN_IF_ERROR(ValidateOutsideCompilationCallNode(n));
     TF_RETURN_IF_ERROR(ReplaceOrRemoveOutsideCompilationCallNode(
         graph_out.get(), n, host_compute_core));
   }
-- 
GitLab


From 9cc7cfb5b6aabf105266a4a2fafc4d445081571d Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Fri, 14 Dec 2018 22:46:01 -0800
Subject: [PATCH 647/873] Make ParseVisibleDeviceList static function

---
 tensorflow/compiler/jit/xla_gpu_device.cc | 29 ++++++++++-------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index a0899ffed9..0388be516b 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -29,19 +29,11 @@ limitations under the License.
 
 namespace tensorflow {
 
-class XlaGpuDeviceFactory : public DeviceFactory {
- public:
-  Status CreateDevices(const SessionOptions& options, const string& name_prefix,
-                       std::vector<std::unique_ptr<Device>>* devices) override;
-  // Returns a set containing the device ids contained in visible_device_list or
-  // nullopt if it is empty. It returns error in case of malformed configuration
-  // string.
-  static xla::StatusOr<absl::optional<std::set<int>>> ParseVisibleDeviceList(
-      const string& visible_device_list);
-};
-
-xla::StatusOr<absl::optional<std::set<int>>>
-XlaGpuDeviceFactory::ParseVisibleDeviceList(const string& visible_device_list) {
+// Returns a set containing the device ids contained in visible_device_list or
+// nullopt if it is empty. It returns error in case of malformed configuration
+// string.
+static xla::StatusOr<absl::optional<std::set<int>>> ParseVisibleDeviceList(
+    const string& visible_device_list) {
   std::set<int> gpu_ids;
   if (visible_device_list.empty()) {
     return {{absl::nullopt}};
@@ -61,6 +53,12 @@ XlaGpuDeviceFactory::ParseVisibleDeviceList(const string& visible_device_list) {
   return {{gpu_ids}};
 }
 
+class XlaGpuDeviceFactory : public DeviceFactory {
+ public:
+  Status CreateDevices(const SessionOptions& options, const string& name_prefix,
+                       std::vector<std::unique_ptr<Device>>* devices) override;
+};
+
 Status XlaGpuDeviceFactory::CreateDevices(
     const SessionOptions& session_options, const string& name_prefix,
     std::vector<std::unique_ptr<Device>>* devices) {
@@ -84,11 +82,10 @@ Status XlaGpuDeviceFactory::CreateDevices(
   string allowed_gpus =
       session_options.config.gpu_options().visible_device_list();
   absl::optional<std::set<int>> gpu_ids = ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
-  // We want to fill the gpu_ids set with all devices if config string is empty.
-  int num_visible_devices = platform.ValueOrDie()->VisibleDeviceCount();
   if (!gpu_ids) {
     gpu_ids.emplace();
-    for (int i = 0; i < num_visible_devices; ++i) {
+    // Fill the gpu_ids set with all devices if config string is empty.
+    for (int i = 0; i < platform.ValueOrDie()->VisibleDeviceCount(); ++i) {
       gpu_ids->insert(i);
     }
   }
-- 
GitLab


From 9dc824fd03ccb8b658495ab64619e0c7acad30bc Mon Sep 17 00:00:00 2001
From: Sami Kama <skama@nvidia.com>
Date: Fri, 14 Dec 2018 23:51:10 -0800
Subject: [PATCH 648/873] Fix clang-format check. Somehow clang-format used in
 the CI produces a different output than clang-format-5

---
 tensorflow/compiler/jit/xla_gpu_device.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/jit/xla_gpu_device.cc b/tensorflow/compiler/jit/xla_gpu_device.cc
index 0388be516b..1d8aacf629 100644
--- a/tensorflow/compiler/jit/xla_gpu_device.cc
+++ b/tensorflow/compiler/jit/xla_gpu_device.cc
@@ -81,7 +81,8 @@ Status XlaGpuDeviceFactory::CreateDevices(
   }
   string allowed_gpus =
       session_options.config.gpu_options().visible_device_list();
-  absl::optional<std::set<int>> gpu_ids = ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
+  absl::optional<std::set<int>> gpu_ids =
+      ParseVisibleDeviceList(allowed_gpus).ValueOrDie();
   if (!gpu_ids) {
     gpu_ids.emplace();
     // Fill the gpu_ids set with all devices if config string is empty.
-- 
GitLab


From b37a329a6fc4aaf94767ebf5158689a3ee233b89 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 15 Dec 2018 01:03:24 -0800
Subject: [PATCH 649/873] compat: Update forward compatibility horizon to
 2018-12-15

PiperOrigin-RevId: 225654735
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index bd0e38c823..dadf72b9ab 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 14)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 15)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From eff38f2ee70b44b2dd87c2dc0a983f2aadd5f77b Mon Sep 17 00:00:00 2001
From: jcf94 <xff252595680@gmail.com>
Date: Sat, 15 Dec 2018 19:40:43 +0800
Subject: [PATCH 650/873] Move REGISTER_MEM_ALLOCATOR to VerbsServer
 construction

---
 tensorflow/contrib/verbs/rdma_mgr.cc         |  6 +++++-
 tensorflow/contrib/verbs/rdma_mgr.h          |  1 +
 tensorflow/contrib/verbs/verbs_server_lib.cc | 11 ++++++-----
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 7e821d6be2..4a78074758 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -270,7 +270,11 @@ class BFCRdmaAllocatorFactory : public AllocatorFactory {
     return new BasicCPUAllocator(numa_node, ProcessState::singleton()->GetCPUAllocatorVisitor(), ProcessState::singleton()->GetCPUFreeVisitor());
   }
 };
-REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
+
+/*static*/ void RdmaMgr::RegMemAllocator() {
+    VLOG(1) << "Register Rdma capable Allocator when using grpc+verbs";
+    REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
+}
 
 void RdmaMgr::InitAllocators() {
   static std::once_flag flag;
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index 74b92cc9a6..11bef527c1 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -39,6 +39,7 @@ class RdmaMgr {
   void SetupChannels();
   bool ConnectivityCheck();
   void InitAllocators();
+  static void RegMemAllocator();
   static void RegMemVisitors();
   const string& local_worker() { return local_worker_; }
 
diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 5b72b1604a..641b47f38a 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -33,10 +33,15 @@ RendezvousMgrInterface* NewRdmaRendezvousMgr(const WorkerEnv* env) {
   return new RdmaRendezvousMgr(env);
 }
 
+std::once_flag reg_mem_allocator_call;
+std::once_flag reg_mem_visitors_call;
+
 }  // namespace
 
 VerbsServer::VerbsServer(const ServerDef& server_def, Env* env)
-    : GrpcServer(server_def, env), verbs_state_(DISCONNECTED) {}
+    : GrpcServer(server_def, env), verbs_state_(DISCONNECTED) {
+  std::call_once(reg_mem_allocator_call, []() { RdmaMgr::RegMemAllocator(); });
+}
 
 VerbsServer::~VerbsServer() {
   TF_CHECK_OK(Stop());
@@ -76,10 +81,6 @@ Status VerbsServer::ChannelCacheFactory(const ServerDef& server_def,
   return Status::OK();
 }
 
-namespace {
-std::once_flag reg_mem_visitors_call;
-}  // namespace
-
 Status VerbsServer::Init(ServiceInitFunction service_func,
                          RendezvousMgrCreationFunction rendezvous_mgr_func) {
   std::call_once(reg_mem_visitors_call, []() { RdmaMgr::RegMemVisitors(); });
-- 
GitLab


From 39d9c9e0a28231085fb86090c853de4535fecd1b Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Sat, 15 Dec 2018 17:05:15 -0800
Subject: [PATCH 651/873] Fix path name in the non gpu case.

Extension shouldn't be included in either case.

PiperOrigin-RevId: 225695531
---
 .../lite/examples/ios/camera/CameraExampleViewController.mm     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
index 48cd313c9d..4d5ea40cd0 100644
--- a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
+++ b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm
@@ -48,7 +48,7 @@ namespace {
 // GPU Delegate only supports float model now.
 NSString* model_file_name = @"mobilenet_v1_1.0_224";
 #else
-NSString* model_file_name = @"mobilenet_quant_v1_224.tflite";
+NSString* model_file_name = @"mobilenet_quant_v1_224";
 #endif
 NSString* model_file_type = @"tflite";
 // If you have your own model, point this to the labels file.
-- 
GitLab


From ebe98d94a5340386429da4b6e930b8cda9c7e854 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 16 Dec 2018 01:04:03 -0800
Subject: [PATCH 652/873] compat: Update forward compatibility horizon to
 2018-12-16

PiperOrigin-RevId: 225715604
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index dadf72b9ab..43b9e38868 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 15)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 16)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From feaaa82259a204b037eb9bd72ab2f74f9f4fe431 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Sun, 16 Dec 2018 21:15:20 -0800
Subject: [PATCH 653/873] [XLA] Improve runtime of MakeInstructionPostOrder by
 reserving the flat set upfront.

PiperOrigin-RevId: 225769332
---
 tensorflow/compiler/xla/service/hlo_computation.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index ca663b8b4a..f6867b4006 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 
-#include <stddef.h>
 #include <algorithm>
+#include <cstddef>
 #include <functional>
 #include <list>
 #include <queue>
@@ -396,6 +396,7 @@ std::vector<HloInstruction*> HloComputation::MakeInstructionPostOrder() const {
   post_order.reserve(instruction_count());
   std::vector<HloInstruction*> trace_instructions;
   absl::flat_hash_map<HloInstruction*, VisitState> visited;
+  visited.reserve(instruction_count());
   for (auto& instruction : instructions_) {
     if (instruction->opcode() == HloOpcode::kTrace) {
       // Trace instructions aren't handled by the DFS visitor. Add trace
-- 
GitLab


From 1a85cf6170cdcf86c8a88595e07d237bc24ac3ea Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 01:03:31 -0800
Subject: [PATCH 654/873] compat: Update forward compatibility horizon to
 2018-12-17

PiperOrigin-RevId: 225785021
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 43b9e38868..877e5c8947 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 16)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 17)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 06303a8ea057dbfcc639303557a7ab87362e6c09 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 02:28:33 -0800
Subject: [PATCH 655/873] Refactoring to make the polymorphic code easier to
 read.

Changes:
1. Remove "self._created_variables = []" as this shouldn't matter.
2. Remove self._concrete_stateful_fn -> this is used only for the first time we call __call__.
3. Make _initialize not return canonicalized arguments, instead create a function for it.
PiperOrigin-RevId: 225794015
---
 tensorflow/python/eager/def_function.py | 37 +++++++++++++++++++------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 5e7e866fd8..5a010e1880 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -263,8 +263,33 @@ class PolymorphicFunction(object):
         input_signature=self._input_signature,
         autograph=self._autograph)
 
+  def _canonicalize_function_inputs(self, args, kwds):
+    """Canonicalize the inputs to the Python function."""
+    if not self._stateful_fn:
+      raise ValueError(
+          "_canonicalize_function_inputs must be called only after _initialize "
+          "has run.")
+    if self._input_signature is None or args or kwds:
+      return self._stateful_fn._canonicalize_function_inputs(*args, **kwds)  # pylint: disable=protected-access
+    # If an input signature is defined, we may need to fetch a concrete function
+    # without any inputs specified. In this case args and kwds should be ignored
+    # but running _canonicalize_function_inputs would raise an exception.
+    return (), {}
+
   def _initialize(self, args, kwds, add_initializers_to=None):
-    """Initializes, on the first call."""
+    """Initializes, on the first call.
+
+    Creates two polymorphic functions, one that will allow creation of variables
+    and one that won't.
+
+    Additionally runs a trace for the polymorphic function that allows creation
+    of variables.
+
+    Args:
+      args: Arguments to the underlying python callable.
+      kwds: Keyword arguments to the python callable.
+      add_initializers_to: Where to collect variable initializers, if not None.
+    """
 
     created_variables = []
 
@@ -291,12 +316,6 @@ class PolymorphicFunction(object):
 
     self._stateless_fn = self._defun_with_scope(invalid_creator_scope)
     self._stateless_fn._name = self._name  # pylint: disable=protected-access
-    if self._input_signature is None or args or kwds:
-      return self._stateful_fn._canonicalize_function_inputs(*args, **kwds)  # pylint: disable=protected-access
-    # If an input signature is defined, we may need to fetch a concrete function
-    # without any inputs specified. In this case args and kwds should be ignored
-    # but running _canonicalize_function_inputs would raise an exception.
-    return (), {}
 
   def __call__(self, *args, **kwds):
     """Calls the graph function."""
@@ -313,7 +332,9 @@ class PolymorphicFunction(object):
                          " decorated with tf.function.")
       return results
 
-    canon_args, canon_kwds = self._initialize(args, kwds)
+    # This is the first call of __call__, so we have to initialize.
+    self._initialize(args, kwds)
+    canon_args, canon_kwds = self._canonicalize_function_inputs(args, kwds)
 
     if not self._created_variables:
       # If we did not create any variables the trace we have is good enough.
-- 
GitLab


From 6decf0842b1f7ec17c7d8957d453cd5132b7a128 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 02:55:37 -0800
Subject: [PATCH 656/873] Refactor deserialization of functions in object-based
 SavedModel.

Restore a FunctionDef into a FuncGraph and wrap it into Function object.
Replace calling of all functions until something fits with a input signature
match code structure.

PiperOrigin-RevId: 225796290
---
 .../saved_model/function_deserialization.py   | 48 ++++++++++++++-----
 tensorflow/python/saved_model/load.py         | 23 +++++----
 2 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
index 46bd69ad03..51e23574ca 100644
--- a/tensorflow/python/saved_model/function_deserialization.py
+++ b/tensorflow/python/saved_model/function_deserialization.py
@@ -19,28 +19,50 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.eager import def_function
+from tensorflow.python.util import nest
+
+
+def _inputs_compatible(args, function):
+  # TODO(vbardiovsky): The compatibility check should be about the signature,
+  # not the flattened version of it.
+  flattened_inputs = nest.flatten(args)
+  if len(flattened_inputs) != len(function.inputs):
+    return False
+  for a, b in zip(flattened_inputs, function.inputs):
+    if a.dtype != b.dtype or not b.shape.is_compatible_with(a.shape):
+      return False
+  return True
 
 
 def recreate_polymorphic_function(
-    saved_polymorphic_function, defined_functions):
-  """Creates a PolymorphicFunction which runs restored function definitions."""
+    saved_polymorphic_function, functions):
+  """Creates a PolymorphicFunction from a SavedPolymorphicFunction.
+
+  Args:
+    saved_polymorphic_function: SavedPolymorphicFunction proto.
+    functions: map from function name to Function.
+
+  Returns:
+    A PolymorphicFunction.
+  """
+  # TODO(andresp): Construct a PolymorphicFunction with the cache populated
+  # instead of creating a new PolymorphicFunction backed by a Python layer to
+  # glue things together. Current approach is nesting functions deeper for each
+  # serialization cycle.
   @def_function.function
   def restored_function(*args):
     """Calls a restored function."""
-    # Try calling each function, return a value from the first one whose
-    # signature matches.
-    # TODO(allenl): Consider re-populating the function cache directly.
     # TODO(allenl): Functions saved with input_signatures should revive with
     # input_signatures.
     for monomorphic_function in saved_polymorphic_function.monomorphic_function:
-      try:
-        # TODO(allenl): Passing an explicit name here prevents invalid name
-        # errors. We should replace this with something based on the actual
-        # Python function name.
-        return defined_functions[monomorphic_function.concrete_function](
-            *args, name="imported_function")
-      except ValueError:
-        continue
+      function_obj = functions[monomorphic_function.concrete_function]
+      if _inputs_compatible(args, function_obj):
+        flattened_inputs = nest.flatten(args)
+        flattened_outputs = function_obj._call_flat(flattened_inputs)  # pylint: disable=protected-access
+        # TODO(vbardiovsky): rebuild output structure.
+        single_output, = flattened_outputs
+        return single_output
+
     raise AssertionError(
         "Could not find matching function to call for arguments: %s" % (args,))
   return restored_function
diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py
index 9d9f60c69d..3ebc08caef 100644
--- a/tensorflow/python/saved_model/load.py
+++ b/tensorflow/python/saved_model/load.py
@@ -20,7 +20,8 @@ from __future__ import print_function
 
 import os
 
-from tensorflow.python.framework import function as function_lib
+from tensorflow.python.eager import function
+from tensorflow.python.framework import function_def_to_graph as function_def_lib
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import variables
@@ -42,16 +43,20 @@ class _Loader(object):
     self._asset_file_def = meta_graph.asset_file_def
     self._proto = object_graph_proto
     self._export_dir = export_dir
-    self._defined_functions = {}
-    for defined_function in function_lib.from_library(
-        meta_graph.graph_def.library):
-      # TODO(allenl): Do we need to do name mapping here? Not quite sure what
-      # happens when loaded names collide with existing names.
-      defined_function.add_to_graph(None)
-      self._defined_functions[defined_function.name] = defined_function
+    self._load_func_graphs(meta_graph.graph_def.library)
     self._load_all()
     self._restore_checkpoint()
 
+  def _load_func_graphs(self, function_library):
+    # TODO(allenl): Do we need to do name mapping here? Not quite sure what
+    # happens when loaded names collide with existing names.
+    # TODO(andresp): Look into gradient functions and the need to restore
+    # functions in the right order.
+    self._functions = {}
+    for fdef in function_library.function:
+      self._functions[fdef.signature.name] = function.Function(
+          function_def_lib.function_def_to_graph(fdef))
+
   def _load_all(self):
     self._nodes = [self._recreate(proto) for proto in self._proto.nodes]
     # After creating the objects, construct the edges between the objects.
@@ -92,7 +97,7 @@ class _Loader(object):
 
   def _recreate_function(self, proto):
     return function_deserialization.recreate_polymorphic_function(
-        proto, self._defined_functions)
+        proto, self._functions)
 
   def _recreate_variable(self, proto):
     # TODO(andresp): Can we use the checkpointed value as initializer?
-- 
GitLab


From 2a067cb0b107d513345a68aacdcd5a58c041c977 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 07:00:45 -0800
Subject: [PATCH 657/873] Some test cleanups.

PiperOrigin-RevId: 225819680
---
 .../python/estimator/python/gan_estimator_test.py    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
index bc9021050b..5a3d29cf0b 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py
@@ -75,8 +75,8 @@ class GetGANModelTest(test.TestCase, parameterized.TestCase):
   def test_get_gan_model(self, mode):
     with ops.Graph().as_default():
       generator_inputs = {'x': array_ops.ones([3, 4])}
-      real_data = (array_ops.zeros([3, 4]) if
-                   mode != model_fn_lib.ModeKeys.PREDICT else None)
+      is_predict = mode == model_fn_lib.ModeKeys.PREDICT
+      real_data = array_ops.zeros([3, 4]) if not is_predict else None
       gan_model = estimator._get_gan_model(
           mode, generator_fn, discriminator_fn, real_data, generator_inputs,
           add_summaries=False)
@@ -139,6 +139,7 @@ class GetEstimatorSpecTest(test.TestCase, parameterized.TestCase):
 
   @classmethod
   def setUpClass(cls):
+    super(GetEstimatorSpecTest, cls).setUpClass()
     cls._generator_optimizer = training.GradientDescentOptimizer(1.0)
     cls._discriminator_optimizer = training.GradientDescentOptimizer(1.0)
 
@@ -200,7 +201,6 @@ class GetEstimatorSpecTest(test.TestCase, parameterized.TestCase):
       self.assertSetEqual(frozenset(sync_opts), frozenset((g_opt, d_opt)))
 
 
-# TODO(joelshor): Add pandas test.
 class GANEstimatorIntegrationTest(test.TestCase):
 
   def setUp(self):
@@ -231,11 +231,11 @@ class GANEstimatorIntegrationTest(test.TestCase):
         get_eval_metric_ops_fn=get_metrics,
         model_dir=self._model_dir)
 
-    # TRAIN
+    # Train.
     num_steps = 10
     est.train(train_input_fn, steps=num_steps)
 
-    # EVALUTE
+    # Evaluate.
     scores = est.evaluate(eval_input_fn)
     self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
     self.assertIn('loss', six.iterkeys(scores))
@@ -243,7 +243,7 @@ class GANEstimatorIntegrationTest(test.TestCase):
                      scores['loss'])
     self.assertIn('mse_custom_metric', six.iterkeys(scores))
 
-    # PREDICT
+    # Predict.
     predictions = np.array([x for x in est.predict(predict_input_fn)])
 
     self.assertAllEqual(prediction_size, predictions.shape)
-- 
GitLab


From 5f1c0c1c8d85ec0a4b9746e61ccf3dcb6c4a4220 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 07:12:31 -0800
Subject: [PATCH 658/873] Internal change.

PiperOrigin-RevId: 225821151
---
 tensorflow/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 449a1372ed..d5d9e30d9e 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -368,6 +368,7 @@ package_group(
     name = "internal",
     packages = [
         "-//third_party/tensorflow/python/estimator",
+        "//learning/deepmind/...",
         "//learning/meta_rank/...",
         "//tensorflow/...",
         "//tensorflow_estimator/contrib/...",
-- 
GitLab


From fa7b38a5eae1e0818c8a1ac6a099fbbefa53727e Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Mon, 17 Dec 2018 10:05:05 -0800
Subject: [PATCH 659/873] Update the checks in (bidi/uni) lstm to use nullptr.

PiperOrigin-RevId: 225844636
---
 .../lite/kernels/bidirectional_sequence_lstm.cc    | 14 +++++++-------
 .../lite/kernels/unidirectional_sequence_lstm.cc   | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
index 2c345bba69..1cd927a305 100644
--- a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
@@ -182,7 +182,7 @@ TfLiteStatus CheckLstmTensorDimensionsAndTypes(
 
   const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, input_to_input_weights_tensor);
-  if (input_to_input_weights) {
+  if (input_to_input_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input);
@@ -208,7 +208,7 @@ TfLiteStatus CheckLstmTensorDimensionsAndTypes(
 
   const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, recurrent_to_input_weights_tensor);
-  if (recurrent_to_input_weights) {
+  if (recurrent_to_input_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0],
                       n_cell);
@@ -248,7 +248,7 @@ TfLiteStatus CheckLstmTensorDimensionsAndTypes(
 
   const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, cell_to_input_weights_tensor);
-  if (cell_to_input_weights) {
+  if (cell_to_input_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->type,
@@ -257,7 +257,7 @@ TfLiteStatus CheckLstmTensorDimensionsAndTypes(
 
   const TfLiteTensor* cell_to_forget_weights =
       GetOptionalInputTensor(context, node, cell_to_forget_weights_tensor);
-  if (cell_to_forget_weights) {
+  if (cell_to_forget_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->type,
@@ -266,7 +266,7 @@ TfLiteStatus CheckLstmTensorDimensionsAndTypes(
 
   const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, cell_to_output_weights_tensor);
-  if (cell_to_output_weights) {
+  if (cell_to_output_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->type,
@@ -315,7 +315,7 @@ TfLiteStatus CheckLstmTensorDimensionsAndTypes(
 
   const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, projection_weights_tensor);
-  if (projection_weights) {
+  if (projection_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output);
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell);
@@ -325,7 +325,7 @@ TfLiteStatus CheckLstmTensorDimensionsAndTypes(
 
   const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, projection_bias_tensor);
-  if (projection_bias) {
+  if (projection_bias != nullptr) {
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output);
     TF_LITE_ENSURE_EQ(context, projection_bias->type, kTfLiteFloat32);
diff --git a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
index 497777b9af..08e56b0ebd 100644
--- a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
@@ -110,7 +110,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
-  if (input_to_input_weights) {
+  if (input_to_input_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input);
@@ -130,7 +130,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor);
-  if (recurrent_to_input_weights) {
+  if (recurrent_to_input_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[0],
                       n_cell);
@@ -164,21 +164,21 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, kCellToInputWeightsTensor);
-  if (cell_to_input_weights) {
+  if (cell_to_input_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
   }
 
   const TfLiteTensor* cell_to_forget_weights =
       GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor);
-  if (cell_to_forget_weights) {
+  if (cell_to_forget_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
   }
 
   const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
-  if (cell_to_output_weights) {
+  if (cell_to_output_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell);
   }
@@ -220,7 +220,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
-  if (projection_weights) {
+  if (projection_weights != nullptr) {
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output);
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell);
@@ -228,7 +228,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, kProjectionBiasTensor);
-  if (projection_bias) {
+  if (projection_bias != nullptr) {
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output);
   }
-- 
GitLab


From a1c66b9f1577cf9668cde221d631a7de5a18d57a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 10:32:16 -0800
Subject: [PATCH 660/873] Fix docstring typo

PiperOrigin-RevId: 225849454
---
 tensorflow/python/ops/ragged/ragged_string_ops.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/ragged/ragged_string_ops.py b/tensorflow/python/ops/ragged/ragged_string_ops.py
index 80216376f3..4b22c23d0b 100644
--- a/tensorflow/python/ops/ragged/ragged_string_ops.py
+++ b/tensorflow/python/ops/ragged/ragged_string_ops.py
@@ -239,7 +239,7 @@ def unicode_split(input,
                   name=None):
   r"""Splits each string in `input` into a sequence of Unicode code points.
 
-  `result[i1...iN, j]` is the substring of `input[i1...iN] that encodes its
+  `result[i1...iN, j]` is the substring of `input[i1...iN]` that encodes its
   `j`th character, when decoded using `input_encoding`.
 
   Args:
@@ -293,7 +293,7 @@ def unicode_split_with_offsets(input,
 
   Returns a tuple `(chars, start_offsets)` where:
 
-  * `chars[i1...iN, j]` is the substring of `input[i1...iN] that encodes its
+  * `chars[i1...iN, j]` is the substring of `input[i1...iN]` that encodes its
     `j`th character, when decoded using `input_encoding`.
   * `start_offsets[i1...iN, j]` is the start byte offset for the `j`th
     character in `input[i1...iN]`, when decoded using `input_encoding`.
-- 
GitLab


From d91462836715e74dfce2238ac02fc52b9532dc66 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 10:37:49 -0800
Subject: [PATCH 661/873] Fix weakref unwrapping in AutoGraph conversion

PiperOrigin-RevId: 225850437
---
 tensorflow/python/autograph/core/converter.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/python/autograph/core/converter.py b/tensorflow/python/autograph/core/converter.py
index 4543b11398..e6d626f215 100644
--- a/tensorflow/python/autograph/core/converter.py
+++ b/tensorflow/python/autograph/core/converter.py
@@ -218,6 +218,11 @@ class ConversionOptions(object):
     def as_qualified_name(o):
       name = inspect_utils.getqualifiedname(ctx.info.namespace, o, max_depth=1)
       if not name:
+        if isinstance(o, weakref.ref):
+          # `o` might already be a weak reference, if this object was
+          # constructed from code generated by `to_ast` itself.
+          # If so, unpack it.
+          o = o()
         # TODO(mdan): This needs to account for the symbols defined locally.
         name = ctx.namer.new_symbol(o.__name__, ())
         ctx.program.add_symbol(name, weakref.ref(o))
-- 
GitLab


From f2a08fbecdd5ad4bd826c5efd7df60126ab716be Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Mon, 17 Dec 2018 10:43:38 -0800
Subject: [PATCH 662/873] Add Go wrapper around Eager C API's Context.

This is part of a series of changes to provide a thin Go wrapper around the Eager C API.

PiperOrigin-RevId: 225851549
---
 tensorflow/go/BUILD           |   1 +
 tensorflow/go/context.go      | 109 ++++++++++++++++++++++++++++++++++
 tensorflow/go/context_test.go |  57 ++++++++++++++++++
 3 files changed, 167 insertions(+)
 create mode 100644 tensorflow/go/context.go
 create mode 100644 tensorflow/go/context_test.go

diff --git a/tensorflow/go/BUILD b/tensorflow/go/BUILD
index f16cffac99..62d6b4f57c 100644
--- a/tensorflow/go/BUILD
+++ b/tensorflow/go/BUILD
@@ -17,6 +17,7 @@ sh_test(
         ":all_files",  # Go sources
         "//tensorflow:libtensorflow.so",  # C library
         "//tensorflow/c:headers",  # C library header
+        "//tensorflow/c/eager:headers",  # Eager C library header
         "//tensorflow/cc/saved_model:saved_model_half_plus_two",  # Testdata for LoadSavedModel
     ],
 )
diff --git a/tensorflow/go/context.go b/tensorflow/go/context.go
new file mode 100644
index 0000000000..04f86282af
--- /dev/null
+++ b/tensorflow/go/context.go
@@ -0,0 +1,109 @@
+/*
+Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package tensorflow
+
+// #include <stdlib.h>
+// #include "tensorflow/c/c_api.h"
+// #include "tensorflow/c/eager/c_api.h"
+import "C"
+import (
+	"fmt"
+	"runtime"
+)
+
+// ContextOptions contains configuration information for a session
+type ContextOptions struct {
+	// Config is a binary-serialized representation of the
+	// tensorflow.ConfigProto protocol message
+	// (https://www.tensorflow.org/code/tensorflow/core/protobuf/config.proto).
+	Config []byte
+
+	// Sets the default execution mode
+	Async bool
+}
+
+// c converts the ContextOptions to the C API's TF_ContextOptions.
+// Caller takes ownership of returned object.
+func (o *ContextOptions) c() (*C.TFE_ContextOptions, error) {
+	opt := C.TFE_NewContextOptions()
+	if o == nil {
+		return opt, nil
+	}
+
+	if sz := len(o.Config); sz > 0 {
+		status := newStatus()
+		cConfig := C.CBytes(o.Config)
+		C.TFE_ContextOptionsSetConfig(opt, cConfig, C.size_t(sz), status.c)
+		C.free(cConfig)
+		if err := status.Err(); err != nil {
+			C.TFE_DeleteContextOptions(opt)
+			return nil, fmt.Errorf("invalid ContextOptions.Config: %v", err)
+		}
+	}
+
+	var async uint8
+	if o.Async {
+		async = 1
+	}
+	C.TFE_ContextOptionsSetAsync(opt, C.uchar(async))
+
+	return opt, nil
+}
+
+// Context for executing operations eagerly.
+//
+// A Context allows operations to be executed immediately. It encapsulates
+// information such as the available devices, resource manager etc. It also
+// allows the user to configure execution using a ConfigProto, as they can
+// configure a Session when executing a Graph.
+type Context struct {
+	c *C.TFE_Context
+}
+
+// NewContext creates a new context for eager execution.
+// options may be nil to use the default options.
+func NewContext(options *ContextOptions) (*Context, error) {
+	status := newStatus()
+	cOpt, err := options.c()
+	if err != nil {
+		return nil, err
+	}
+	defer C.TFE_DeleteContextOptions(cOpt)
+	cContext := C.TFE_NewContext(cOpt, status.c)
+	if err := status.Err(); err != nil {
+		return nil, err
+	}
+
+	c := &Context{c: cContext}
+	runtime.SetFinalizer(c, (*Context).finalizer)
+	return c, nil
+}
+
+func (c *Context) finalizer() {
+	C.TFE_DeleteContext(c.c)
+}
+
+// ListDevices returns the list of devices associated with a Context.
+func (c *Context) ListDevices() ([]Device, error) {
+	status := newStatus()
+	devicesList := C.TFE_ContextListDevices(c.c, status.c)
+	if err := status.Err(); err != nil {
+		return nil, fmt.Errorf("SessionListDevices() failed: %v", err)
+	}
+	defer C.TF_DeleteDeviceList(devicesList)
+	return deviceSliceFromDeviceList(devicesList)
+}
diff --git a/tensorflow/go/context_test.go b/tensorflow/go/context_test.go
new file mode 100644
index 0000000000..ce4005da24
--- /dev/null
+++ b/tensorflow/go/context_test.go
@@ -0,0 +1,57 @@
+/*
+Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package tensorflow
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestContextConfigSetAsync(t *testing.T) {
+	tests := []bool{false, true}
+	for _, test := range tests {
+		t.Run(fmt.Sprint(test), func(t *testing.T) {
+			opt := &ContextOptions{Async: test}
+			if _, err := NewContext(opt); err != nil {
+				t.Fatal(err)
+			}
+		})
+	}
+}
+
+func TestContextConfigListDevices(t *testing.T) {
+	c, err := NewContext(nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	devs, err := c.ListDevices()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(devs) < 1 {
+		t.Fatalf("No devices found using ListDevices()")
+	}
+	foundCPUDevice := false
+	for _, d := range devs {
+		if d.Type == "CPU" {
+			foundCPUDevice = true
+		}
+	}
+	if !foundCPUDevice {
+		t.Error("Failed to find CPU device using ListDevices()")
+	}
+}
-- 
GitLab


From b8fa20009506d6b4ebb261fa8db37e91dda6870a Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Mon, 17 Dec 2018 10:49:19 -0800
Subject: [PATCH 663/873] Distribution Strategy: Fix batch splitting in case of
 map_and_batch. Also add unittests.

PiperOrigin-RevId: 225852554
---
 .../contrib/distribute/python/values_test.py  | 29 +++++++++++++++++++
 tensorflow/python/distribute/values.py        |  8 +++--
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index 538b859f3d..a91fe7c945 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -24,6 +24,7 @@ from absl.testing import parameterized
 from tensorflow.contrib.distribute.python import combinations
 from tensorflow.contrib.distribute.python import multi_worker_test_base
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.distribute import device_util
 from tensorflow.python.distribute import distribute_lib
@@ -748,6 +749,34 @@ class InputIteratorMultiWorkerTest(
                           expected_values, sess)
 
 
+class SplitDatasetBatchTest(test.TestCase):
+
+  def testBatchDataset(self):
+    dataset = dataset_ops.Dataset.range(100).batch(20)
+    split_batch_by = 2
+    result_dataset = values._split_dataset_batch(dataset, split_batch_by)
+    expected_values = [range(i, i+10) for i in range(0, 100, 10)]
+    result = [self.evaluate(el) for el in result_dataset]
+    self.assertAllEqual(expected_values, result)
+
+  def testMapAndBatchDataset(self):
+    dataset = dataset_ops.Dataset.range(100)
+    dataset = dataset.apply(batching.map_and_batch(lambda x: x, 20))
+    split_batch_by = 2
+    result_dataset = values._split_dataset_batch(dataset, split_batch_by)
+    expected_values = [range(i, i+10) for i in range(0, 100, 10)]
+    result = [self.evaluate(el) for el in result_dataset]
+    self.assertAllEqual(expected_values, result)
+
+  def testPrefetchDataset(self):
+    dataset = dataset_ops.Dataset.range(100).batch(20).prefetch(1)
+    split_batch_by = 2
+    result_dataset = values._split_dataset_batch(dataset, split_batch_by)
+    expected_values = [range(i, i+10) for i in range(0, 100, 10)]
+    result = [self.evaluate(el) for el in result_dataset]
+    self.assertAllEqual(expected_values, result)
+
+
 class MirroredVariableTest(test.TestCase, parameterized.TestCase):
 
   config = config_pb2.ConfigProto()
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index a5918b7b73..e4cc8bb3ea 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -1602,8 +1602,12 @@ def _split_dataset_batch(dataset, split_batch_by):
         "The batch operations can be followed by a prefetch.")
 
   batched_dataset = _get_batch_dataset(dataset)
-  batch_size = batched_dataset._batch_size
-  drop_remainder = batched_dataset._drop_remainder
+  if isinstance(batched_dataset, dataset_ops.BatchDataset):
+    batch_size = batched_dataset._batch_size
+    drop_remainder = batched_dataset._drop_remainder
+  elif isinstance(batched_dataset, batching._MapAndBatchDataset):
+    batch_size = batched_dataset._batch_size_t
+    drop_remainder = batched_dataset._drop_remainder_t
   # pylint: enable=protected-access
 
   if tensor_util.is_tensor(batch_size):
-- 
GitLab


From 24cad5cdc576ca1ae3b2b0a1d3d45634467aac35 Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Mon, 17 Dec 2018 10:50:50 -0800
Subject: [PATCH 664/873] Add Go wrapper around Eager C API's TensorHandle.

This is part of a series of changes to provide a thin Go wrapper around the Eager C API.

PiperOrigin-RevId: 225852836
---
 tensorflow/go/tensor_handle.go      | 154 ++++++++++++++++++++++++++++
 tensorflow/go/tensor_handle_test.go | 127 +++++++++++++++++++++++
 2 files changed, 281 insertions(+)
 create mode 100644 tensorflow/go/tensor_handle.go
 create mode 100644 tensorflow/go/tensor_handle_test.go

diff --git a/tensorflow/go/tensor_handle.go b/tensorflow/go/tensor_handle.go
new file mode 100644
index 0000000000..befc1c43ba
--- /dev/null
+++ b/tensorflow/go/tensor_handle.go
@@ -0,0 +1,154 @@
+/*
+Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package tensorflow
+
+// #include <stdlib.h>
+// #include "tensorflow/c/c_api.h"
+// #include "tensorflow/c/eager/c_api.h"
+import "C"
+import (
+	"runtime"
+	"unsafe"
+)
+
+// TensorHandle is a handle to a tensor on a device.
+//
+// A Tensor referenced by a TensorHandle may be on any device, whereas a Tensor
+// always resides in the host CPU's memory.
+//
+// A Tensor referenced by a TensorHandle may not have been computed yet. For
+// example, a TensorHandle might reference the output of an operation that has
+// not finished executing. Because of this, various methods, such as Shape() may
+// block until the tensor has been instantiated.
+//
+// This allows multiple operations to be performed on tensors on a device
+// (e.g. a GPU) without sending these values back to the host CPU in between
+// every operation.
+type TensorHandle struct {
+	c *C.TFE_TensorHandle
+}
+
+// NewTensorHandle creates a new tensor handle from a tensor.
+func NewTensorHandle(t *Tensor) (*TensorHandle, error) {
+	status := newStatus()
+	cHandle := C.TFE_NewTensorHandle(t.c, status.c)
+	if err := status.Err(); err != nil {
+		return nil, err
+	}
+
+	th := &TensorHandle{c: cHandle}
+	runtime.SetFinalizer(th, (*TensorHandle).finalizer)
+	return th, nil
+}
+
+func (th *TensorHandle) finalizer() {
+	C.TFE_DeleteTensorHandle(th.c)
+}
+
+// DataType returns the TensorHandle's datatype.
+func (th *TensorHandle) DataType() DataType {
+	return DataType(C.TFE_TensorHandleDataType(th.c))
+}
+
+// Shape returns the shape of the Tensor referenced by th.
+func (th *TensorHandle) Shape() ([]int64, error) {
+	n, err := th.numDims()
+	if err != nil {
+		return nil, err
+	}
+	r := make([]int64, n)
+	for i := 0; i < n; i++ {
+		if r[i], err = th.dim(i); err != nil {
+			return nil, err
+		}
+	}
+	return r, nil
+}
+
+// numDims returns the number of dimensions of the TensorHandle. It blocks
+// until the operation that produces the handle has completed.
+func (th *TensorHandle) numDims() (int, error) {
+	status := newStatus()
+	n := int(C.TFE_TensorHandleNumDims(th.c, status.c))
+	return n, status.Err()
+}
+
+// dim returns the size of the index'th dimension of the TensorHandle. It
+// blocks until the operation that produces the handle has completed.
+func (th *TensorHandle) dim(index int) (int64, error) {
+	status := newStatus()
+	n := int64(C.TFE_TensorHandleDim(th.c, C.int(index), status.c))
+	if err := status.Err(); err != nil {
+		return 0, err
+	}
+	return n, nil
+}
+
+// DeviceName returns the name of the device of the operation that produced the
+// TensorHandle. If the handle was produced by a copy, it returns the
+// destination device of the copy. Note that returned device name is not always
+// the device holding the tensor handle's memory. If you want the latter, use
+// BackingDeviceName. This function will block till the operation that produces
+// th has completed.
+func (th *TensorHandle) DeviceName() (string, error) {
+	status := newStatus()
+	name := C.TFE_TensorHandleDeviceName(th.c, status.c)
+	if err := status.Err(); err != nil {
+		return "", err
+	}
+	return C.GoString(name), nil
+}
+
+// BackingDeviceName returns the name of the device in whose memory the tensor
+// handle resides. This function will block till the operation that produces
+// `h` has completed.
+func (th *TensorHandle) BackingDeviceName() (string, error) {
+	status := newStatus()
+	name := C.TFE_TensorHandleBackingDeviceName(th.c, status.c)
+	if err := status.Err(); err != nil {
+		return "", err
+	}
+	return C.GoString(name), nil
+}
+
+// ToTensor returns the Tensor referenced by th. It may block if this tensor is
+// not yet computed.
+func (th *TensorHandle) ToTensor() (*Tensor, error) {
+	status := newStatus()
+	cTensor := C.TFE_TensorHandleResolve(th.c, status.c)
+	if err := status.Err(); err != nil {
+		return nil, err
+	}
+	return newTensorFromC(cTensor), nil
+}
+
+// CopyToDevice creates a new TensorHandle with the same contents as this
+// TensorHandle but placed in the memory of the device 'deviceName'. If source
+// and destination are the same device, then this creates a new handle that
+// shares the underlying buffer. Otherwise, it currently requires at least one
+// of the source or destination devices to be CPU (i.e., for the source or
+// destination tensor to be placed in host memory).
+func (th *TensorHandle) CopyToDevice(c *Context, deviceName string) (*TensorHandle, error) {
+	status := newStatus()
+	n := C.CString(deviceName)
+	newTh := C.TFE_TensorHandleCopyToDevice(th.c, c.c, n, status.c)
+	C.free(unsafe.Pointer(n))
+	if err := status.Err(); err != nil {
+		return nil, err
+	}
+	return &TensorHandle{c: newTh}, nil
+}
diff --git a/tensorflow/go/tensor_handle_test.go b/tensorflow/go/tensor_handle_test.go
new file mode 100644
index 0000000000..15dea64b08
--- /dev/null
+++ b/tensorflow/go/tensor_handle_test.go
@@ -0,0 +1,127 @@
+/*
+Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package tensorflow
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+)
+
+func TestNewTensorHandle(t *testing.T) {
+	vals := [][]float32{{1.0, 2.0}, {3.0, 4.0}}
+	tensor, err := NewTensor(vals)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if _, err = NewTensorHandle(tensor); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestTensorHandleDataType(t *testing.T) {
+	vals := [][]float32{{1.0, 2.0}, {3.0, 4.0}}
+	tensor, err := NewTensor(vals)
+	if err != nil {
+		t.Fatal(err)
+	}
+	th, err := NewTensorHandle(tensor)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if got, want := th.DataType(), Float; got != want {
+		t.Errorf("Got %v, want %v", got, want)
+	}
+}
+
+func TestTensorHandleShape(t *testing.T) {
+	vals := [][]float32{{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}}
+	tensor, err := NewTensor(vals)
+	if err != nil {
+		t.Fatal(err)
+	}
+	th, err := NewTensorHandle(tensor)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	got, err := th.Shape()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if want := []int64{2, 3}; !reflect.DeepEqual(got, want) {
+		t.Errorf("Got %#v, want %#v", got, want)
+	}
+}
+
+func TestTensorHandleDeviceName(t *testing.T) {
+	vals := [][]float32{{1.0, 2.0}, {3.0, 4.0}}
+	tensor, err := NewTensor(vals)
+	if err != nil {
+		t.Fatal(err)
+	}
+	th, err := NewTensorHandle(tensor)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	d, err := th.DeviceName()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(d, "CPU") {
+		t.Errorf("DeviceName() did not return a CPU device; got: %s", d)
+	}
+}
+
+func TestTensorHandleBackingDeviceName(t *testing.T) {
+	vals := [][]float32{{1.0, 2.0}, {3.0, 4.0}}
+	tensor, err := NewTensor(vals)
+	if err != nil {
+		t.Fatal(err)
+	}
+	th, err := NewTensorHandle(tensor)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	d, err := th.BackingDeviceName()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(d, "CPU") {
+		t.Errorf("BackingDeviceName() did not return a CPU device; got: %s", d)
+	}
+}
+
+func TestTensorHandleToTensor(t *testing.T) {
+	initialVals := [][]float32{{1.0, 2.0}, {3.0, 4.0}}
+	initialTensor, err := NewTensor(initialVals)
+	if err != nil {
+		t.Fatal(err)
+	}
+	th, err := NewTensorHandle(initialTensor)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	tensor, err := th.ToTensor()
+	if v := tensor.Value().([][]float32); !reflect.DeepEqual(v, initialVals) {
+		t.Errorf("Got %#v, want %#v", v, initialVals)
+	}
+}
-- 
GitLab


From 5c3353a01d787e4d199380102d8f975740056149 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 10:58:57 -0800
Subject: [PATCH 665/873] Support specifying sharding attribute for tensors
 with unknown dimension

There is nothing in the sharding API what would require a known
dimension so there is no reason to require it.

PiperOrigin-RevId: 225854305
---
 .../compiler/xla/experimental/xla_sharding/xla_sharding.py    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py b/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
index 1fea816a80..c34e84efc8 100644
--- a/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
+++ b/tensorflow/compiler/xla/experimental/xla_sharding/xla_sharding.py
@@ -104,9 +104,9 @@ class Sharding(object):
       ValueError: The tensor to split was smaller in the split dimension than
         the number of devices to split over.
     """
-    tensor.shape.assert_is_fully_defined()
     shape = tensor.shape.as_list()
-    if shape[split_dimension] < num_devices:
+    if (shape[split_dimension] is not None and
+        shape[split_dimension] < num_devices):
       raise ValueError('Split dimension was smaller than the required number '
                        'of splits: shape=%r, dimension=%r, num_devices=%r' %
                        (shape, split_dimension, num_devices))
-- 
GitLab


From d26205bd53e112095c1a6e583d3b3e252dce73c7 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Mon, 17 Dec 2018 11:02:43 -0800
Subject: [PATCH 666/873] Update @run_v1_only annotation for symbols removed
 from v2

PiperOrigin-RevId: 225855005
---
 .../distribute/distribute_coordinator_test.py    |  4 ++--
 .../python/kernel_tests/fifo_queue_test.py       |  9 +++++----
 .../kernel_tests/padding_fifo_queue_test.py      |  2 +-
 .../python/kernel_tests/priority_queue_test.py   |  7 +------
 .../random/random_shuffle_queue_test.py          |  3 +--
 tensorflow/python/training/queue_runner_test.py  | 16 +---------------
 6 files changed, 11 insertions(+), 30 deletions(-)

diff --git a/tensorflow/python/distribute/distribute_coordinator_test.py b/tensorflow/python/distribute/distribute_coordinator_test.py
index dbed3e7f59..ceb4483ebb 100644
--- a/tensorflow/python/distribute/distribute_coordinator_test.py
+++ b/tensorflow/python/distribute/distribute_coordinator_test.py
@@ -427,7 +427,7 @@ class DistributeCoordinatorTestStandaloneMode(DistributeCoordinatorTestBase):
     # Each finished worker will increment self._result_correct.
     self.assertEqual(self._result_correct, NUM_WORKERS)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only("MonitoredSession removed from v2")
   def testBetweenGraphWithMonitoredSession(self):
     """Test monitored session in standalone client mode."""
     distribute_coordinator.run_distribute_coordinator(
@@ -601,7 +601,7 @@ class DistributeCoordinatorTestInpendentWorkerMode(
     # Each finished worker will increment self._result_correct.
     self.assertEqual(self._result_correct, NUM_WORKERS)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only("MonitoredSession removed from v2")
   def testBetweenGraphWithMonitoredSession(self):
     cluster_spec = self._create_cluster_spec(
         num_workers=NUM_WORKERS, num_ps=NUM_PS)
diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py
index 0579dddb70..b88b43ff50 100644
--- a/tensorflow/python/kernel_tests/fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/fifo_queue_test.py
@@ -39,7 +39,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_v1_only("FIFOQueue removed from v2")
 class FIFOQueueTest(test.TestCase):
 
   def testConstructor(self):
@@ -1424,7 +1424,7 @@ class FIFOQueueTest(test.TestCase):
         session.run([a, c])
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_v1_only("FIFOQueue removed from v2")
 class FIFOQueueDictTest(test.TestCase):
 
   def testConstructor(self):
@@ -1585,7 +1585,7 @@ class FIFOQueueDictTest(test.TestCase):
       self.assertTrue([compat.as_bytes("dd"), compat.as_bytes("ee")], list(s))
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_v1_only("FIFOQueue removed from v2")
 class FIFOQueueWithTimeoutTest(test.TestCase):
 
   def testDequeueWithTimeout(self):
@@ -1620,7 +1620,7 @@ class FIFOQueueWithTimeoutTest(test.TestCase):
       self.assertEqual(37, self.evaluate(dequeued_t))
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_v1_only("FIFOQueue removed from v2")
 class QueueContainerTest(test.TestCase):
 
   def testContainer(self):
@@ -1631,6 +1631,7 @@ class QueueContainerTest(test.TestCase):
         compat.as_bytes("test"), q.queue_ref.op.get_attr("container"))
 
 
+@test_util.run_v1_only("FIFOQueue removed from v2")
 class FIFOQueueBenchmark(test.Benchmark):
   """Benchmark FIFOQueue operations."""
 
diff --git a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
index e3999695d0..214eaa0160 100644
--- a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
+++ b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py
@@ -35,7 +35,7 @@ from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_v1_only("PaddingFIFOQueue removed from v2")
 class PaddingFIFOQueueTest(test.TestCase):
 
   def testConstructor(self):
diff --git a/tensorflow/python/kernel_tests/priority_queue_test.py b/tensorflow/python/kernel_tests/priority_queue_test.py
index 49ec7ee483..84f395dd34 100644
--- a/tensorflow/python/kernel_tests/priority_queue_test.py
+++ b/tensorflow/python/kernel_tests/priority_queue_test.py
@@ -34,9 +34,9 @@ import tensorflow.python.ops.nn_grad  # pylint: disable=unused-import
 from tensorflow.python.platform import test
 
 
+@test_util.run_v1_only("PriorityQueue removed from v2")
 class PriorityQueueTest(test.TestCase):
 
-  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertReadOnceSorts(self):
     with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
@@ -114,7 +114,6 @@ class PriorityQueueTest(test.TestCase):
         missed.remove((dv0, dv1))
       self.assertEqual(missed, set())
 
-  @test_util.run_v1_only("b/120545219")
   def testRoundTripFillsCapacityMultiThreadedEnqueueAndDequeue(self):
     with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(10, (dtypes.int64), (()))
@@ -270,7 +269,6 @@ class PriorityQueueTest(test.TestCase):
         missed.remove((dv0, dv1))
       self.assertEqual(missed, set())
 
-  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertOnceReadOnceSorts(self):
     with self.cached_session() as sess:
       q = data_flow_ops.PriorityQueue(2000, (dtypes.string, dtypes.string), (
@@ -292,7 +290,6 @@ class PriorityQueueTest(test.TestCase):
       for e, dv0, dv1 in zip(deq_elem, deq_value_0, deq_value_1):
         self.assertTrue((dv0, dv1) in allowed[e])
 
-  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertOnceReadManySorts(self):
     with self.cached_session():
       q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (()))
@@ -301,7 +298,6 @@ class PriorityQueueTest(test.TestCase):
       deq_values = np.hstack((q.dequeue_many(100)[0].eval() for _ in range(10)))
       self.assertAllEqual(deq_values, sorted(elem))
 
-  @test_util.run_v1_only("b/120545219")
   def testRoundTripInsertOnceReadOnceLotsSorts(self):
     with self.cached_session():
       q = data_flow_ops.PriorityQueue(2000, (dtypes.int64), (()))
@@ -317,7 +313,6 @@ class PriorityQueueTest(test.TestCase):
       with self.assertRaises(TypeError):
         q.enqueue_many((["a", "b", "c"], ["a", "b", "c"])).run()
 
-  @test_util.run_v1_only("b/120545219")
   def testInsertingNonScalarFails(self):
     with self.cached_session() as sess:
       input_priority = array_ops.placeholder(dtypes.int64)
diff --git a/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py b/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
index dd814a22b4..4a8144fadb 100644
--- a/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
+++ b/tensorflow/python/kernel_tests/random/random_shuffle_queue_test.py
@@ -35,7 +35,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_v1_only("RandomShuffleQueue removed from v2")
 class RandomShuffleQueueTest(test.TestCase):
 
   def setUp(self):
@@ -1417,7 +1417,6 @@ class RandomShuffleQueueTest(test.TestCase):
 
       self.assertItemsEqual(elem, results)
 
-  @test_util.run_v1_only("b/120545219")
   def testBigDequeueMany(self):
     with self.cached_session() as sess:
       q = data_flow_ops.RandomShuffleQueue(2, 0, dtypes_lib.int32, ((),))
diff --git a/tensorflow/python/training/queue_runner_test.py b/tensorflow/python/training/queue_runner_test.py
index c5085079b7..2868e7bcc6 100644
--- a/tensorflow/python/training/queue_runner_test.py
+++ b/tensorflow/python/training/queue_runner_test.py
@@ -39,9 +39,9 @@ from tensorflow.python.training import queue_runner_impl
 _MockOp = collections.namedtuple("MockOp", ["name"])
 
 
+@test_util.run_v1_only("QueueRunner removed from v2")
 class QueueRunnerTest(test.TestCase):
 
-  @test_util.run_v1_only("b/120545219")
   def testBasic(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -62,7 +62,6 @@ class QueueRunnerTest(test.TestCase):
       # The variable should be 3.
       self.assertEqual(3, self.evaluate(var))
 
-  @test_util.run_v1_only("b/120545219")
   def testTwoOps(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -86,7 +85,6 @@ class QueueRunnerTest(test.TestCase):
       self.assertEqual(3, self.evaluate(var0))
       self.assertEqual(30, self.evaluate(var1))
 
-  @test_util.run_deprecated_v1
   def testExceptionsCaptured(self):
     with self.cached_session() as sess:
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
@@ -103,7 +101,6 @@ class QueueRunnerTest(test.TestCase):
       self.assertTrue("Operation not in the graph" in str(exceptions[0]))
       self.assertTrue("Operation not in the graph" in str(exceptions[1]))
 
-  @test_util.run_deprecated_v1
   def testRealDequeueEnqueue(self):
     with self.cached_session() as sess:
       q0 = data_flow_ops.FIFOQueue(3, dtypes.float32)
@@ -132,7 +129,6 @@ class QueueRunnerTest(test.TestCase):
       with self.assertRaisesRegexp(errors_impl.OutOfRangeError, "is closed"):
         self.evaluate(dequeue1)
 
-  @test_util.run_v1_only("b/120545219")
   def testRespectCoordShouldStop(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -157,7 +153,6 @@ class QueueRunnerTest(test.TestCase):
       # The variable should be 0.
       self.assertEqual(0, self.evaluate(var))
 
-  @test_util.run_deprecated_v1
   def testRequestStopOnException(self):
     with self.cached_session() as sess:
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32)
@@ -170,7 +165,6 @@ class QueueRunnerTest(test.TestCase):
       with self.assertRaisesRegexp(ValueError, "Operation not in the graph"):
         coord.join()
 
-  @test_util.run_deprecated_v1
   def testGracePeriod(self):
     with self.cached_session() as sess:
       # The enqueue will quickly block.
@@ -188,7 +182,6 @@ class QueueRunnerTest(test.TestCase):
       # the queue to be closed and the enqueue to terminate.
       coord.join(stop_grace_period_secs=1.0)
 
-  @test_util.run_deprecated_v1
   def testMultipleSessions(self):
     with self.cached_session() as sess:
       with session.Session() as other_sess:
@@ -204,7 +197,6 @@ class QueueRunnerTest(test.TestCase):
         other_threads = qr.create_threads(other_sess, coord=coord)
         self.assertEqual(len(threads), len(other_threads))
 
-  @test_util.run_deprecated_v1
   def testIgnoreMultiStarts(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -221,7 +213,6 @@ class QueueRunnerTest(test.TestCase):
       new_threads = qr.create_threads(sess, coord=coord)
       self.assertEqual([], new_threads)
 
-  @test_util.run_v1_only("b/120545219")
   def testThreads(self):
     with self.cached_session() as sess:
       # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
@@ -249,7 +240,6 @@ class QueueRunnerTest(test.TestCase):
       self.assertEqual(1, len(exceptions))
       self.assertTrue("Operation not in the graph" in str(exceptions[0]))
 
-  @test_util.run_deprecated_v1
   def testName(self):
     with ops.name_scope("scope"):
       queue = data_flow_ops.FIFOQueue(10, dtypes.float32, name="queue")
@@ -259,7 +249,6 @@ class QueueRunnerTest(test.TestCase):
     self.assertEqual(
         1, len(ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS, "scope")))
 
-  @test_util.run_deprecated_v1
   def testStartQueueRunners(self):
     # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
     zero64 = constant_op.constant(0, dtype=dtypes.int64)
@@ -278,7 +267,6 @@ class QueueRunnerTest(test.TestCase):
       # The variable should be 3.
       self.assertEqual(3, self.evaluate(var))
 
-  @test_util.run_deprecated_v1
   def testStartQueueRunnersRaisesIfNotASession(self):
     zero64 = constant_op.constant(0, dtype=dtypes.int64)
     var = variables.VariableV1(zero64)
@@ -292,7 +280,6 @@ class QueueRunnerTest(test.TestCase):
       with self.assertRaisesRegexp(TypeError, "tf.Session"):
         queue_runner_impl.start_queue_runners("NotASession")
 
-  @test_util.run_deprecated_v1
   def testStartQueueRunnersIgnoresMonitoredSession(self):
     zero64 = constant_op.constant(0, dtype=dtypes.int64)
     var = variables.VariableV1(zero64)
@@ -307,7 +294,6 @@ class QueueRunnerTest(test.TestCase):
           monitored_session.MonitoredSession())
       self.assertFalse(threads)
 
-  @test_util.run_deprecated_v1
   def testStartQueueRunnersNonDefaultGraph(self):
     # CountUpTo will raise OUT_OF_RANGE when it reaches the count.
     graph = ops.Graph()
-- 
GitLab


From 86a9610e8ac3340fdfbd5fceab5a6b3f402edc93 Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Mon, 17 Dec 2018 11:10:18 -0800
Subject: [PATCH 667/873] Internal Change.

PiperOrigin-RevId: 225856603
---
 .../lite/kernels/bidirectional_sequence_lstm_test.cc | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
index b865322682..4d6f91ec74 100644
--- a/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
@@ -463,6 +463,8 @@ TEST_P(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
           {n_batch, n_output},  // activation_state tensor
           {n_batch, n_cell},    // cell_state tensor
 
+          // TODO(b/121134029): Update tests so tensor shapes after state tensor
+          // are used. They are currently ignored by test_util.
           {n_batch, sequence_length, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
@@ -625,6 +627,8 @@ TEST_P(LSTMOpTest, BlackBoxTestMergedOutput) {
           {n_batch, n_output},  // activation_state tensor
           {n_batch, n_cell},    // cell_state tensor
 
+          // TODO(b/121134029): Update tests so tensor shapes after state tensor
+          // are used. They are currently ignored by test_util.
           {n_batch, sequence_length, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
@@ -786,6 +790,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
           {n_batch, n_output},  // activation_state tensor
           {n_batch, n_cell},    // cell_state tensor
 
+          // TODO(b/121134029): Update tests so tensor shapes after state tensor
+          // are used. They are currently ignored by test_util.
           {n_batch, sequence_length, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
@@ -944,6 +950,8 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
           {n_batch, n_output},  // activation_state tensor
           {n_batch, n_cell},    // cell_state tensor
 
+          // TODO(b/121134029): Update tests so tensor shapes after state tensor
+          // are used. They are currently ignored by test_util.
           {n_batch, sequence_length, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
@@ -1094,6 +1102,8 @@ TEST(LSTMOpTest,
           {n_batch, n_output},  // activation_state tensor
           {n_batch, n_cell},    // cell_state tensor
 
+          // TODO(b/121134029): Update tests so tensor shapes after state tensor
+          // are used. They are currently ignored by test_util.
           {n_batch, sequence_length, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
@@ -1244,6 +1254,8 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
           {n_batch, n_output},  // activation_state tensor
           {n_batch, n_cell},    // cell_state tensor
 
+          // TODO(b/121134029): Update tests so tensor shapes after state tensor
+          // are used. They are currently ignored by test_util.
           {n_batch, sequence_length, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
-- 
GitLab


From 7dea8383bbb97a1e78cdece876e083b22191974e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 11:18:45 -0800
Subject: [PATCH 668/873] Clean up cpu_allocator() uses to use ProcessState
 where possible.

1. Rename cpu_allocator() to cpu_allocator_base().
2. Add new definition of cpu_allocator() that forwards the request
  to ProcessState::GetCPUAllocator() when possible.

The legacy cpu_allocator() definition is an artifact of a simpler era
in TensorFlow design.  It continues to have a use in compilation and
execution contexts where either ProcessState is unavailable or a very
simple need is to be met.  However, its use can cause performance or
correctness problems in contexts where NUMA affinity is desired or
pre-registered memory for RDMA is in use.

This change makes the former semantics of cpu_allocator() available
via the new function cpu_allocator_base().  The new cpu_allocator()
function takes an optional numa_node parameter and calls the
ProcessState::GetCPUAllocator() when available (which it should always
be in a real model execution, but may not be in e.g. some test
contexts).  Some existing uses of cpu_allocator are changed to provide
a NUMA-node argument.

Also fixes creation of GPUCompatibleCPUDeviceFactory for optional
NUMA-aware CPU devices.

Also ensures that ProcessState generates unique CPU allocators per NUMA
node and does not fall back to cpu_allocator_base() when optional
NUMA affinity is turned on.

PiperOrigin-RevId: 225858100
---
 .../core/common_runtime/gpu/gpu_device.cc     |  3 ++-
 .../common_runtime/gpu/gpu_device_factory.cc  | 16 ++++++++++++---
 .../core/common_runtime/process_state.cc      | 16 +++++++++------
 .../core/common_runtime/process_state.h       |  3 ++-
 tensorflow/core/framework/allocator.cc        | 20 ++++++++++++++++++-
 tensorflow/core/framework/allocator.h         | 15 ++++++++++----
 .../core/framework/allocator_registry.h       | 13 ++++++++++++
 tensorflow/core/framework/op_kernel.cc        |  6 ++++++
 tensorflow/core/framework/op_kernel.h         |  2 ++
 tensorflow/core/kernels/constant_op.cc        |  3 ++-
 tensorflow/core/kernels/cwise_ops_common.h    |  3 ++-
 11 files changed, 82 insertions(+), 18 deletions(-)

diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 14b57cc337..010fdff4e9 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -631,7 +631,8 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
 
   if (parsed.dtype() == DT_VARIANT) {
     const Variant* from = parsed.flat<Variant>().data();
-    Tensor copy(cpu_allocator(), DT_VARIANT, parsed.shape());
+    int numa_node = attributes().locality().numa_node();
+    Tensor copy(cpu_allocator(numa_node), DT_VARIANT, parsed.shape());
     Variant* copy_variant = copy.flat<Variant>().data();
 
     std::list<Notification> notifications;
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
index 8dc7197329..962891894a 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/threadpool_device.h"
+#include "tensorflow/core/platform/numa.h"
 
 namespace tensorflow {
 
@@ -81,7 +82,8 @@ class GPUCompatibleCPUDevice : public ThreadPoolDevice {
   GPUCompatibleCPUDevice(const SessionOptions& options, const string& name,
                          Bytes memory_limit, const DeviceLocality& locality,
                          Allocator* allocator)
-      : ThreadPoolDevice(options, name, memory_limit, locality, allocator) {
+      : ThreadPoolDevice(options, name, memory_limit, locality, allocator),
+        numa_node_(locality.numa_node()) {
     if (options.config.has_gpu_options()) {
       force_gpu_compatible_ =
           options.config.gpu_options().force_gpu_compatible();
@@ -92,7 +94,7 @@ class GPUCompatibleCPUDevice : public ThreadPoolDevice {
   Allocator* GetAllocator(AllocatorAttributes attr) override {
     GPUProcessState* ps = GPUProcessState::singleton();
     if (attr.gpu_compatible() || force_gpu_compatible_) {
-      return ps->GetCUDAHostAllocator(0);
+      return ps->GetCUDAHostAllocator(numa_node_);
     } else {
       // Call the parent's implementation.
       return ThreadPoolDevice::GetAllocator(attr);
@@ -101,6 +103,7 @@ class GPUCompatibleCPUDevice : public ThreadPoolDevice {
 
  private:
   bool force_gpu_compatible_ = false;
+  int numa_node_ = port::kNUMANoAffinity;
 };
 
 // The associated factory.
@@ -113,10 +116,17 @@ class GPUCompatibleCPUDeviceFactory : public DeviceFactory {
     if (iter != options.config.device_count().end()) {
       n = iter->second;
     }
+    int num_numa_nodes = options.config.experimental().use_numa_affinity()
+                             ? port::NUMANumNodes()
+                             : 1;
     for (int i = 0; i < n; i++) {
       string name = strings::StrCat(name_prefix, "/device:CPU:", i);
+      int numa_node = i % num_numa_nodes;
+      DeviceLocality locality;
+      locality.set_numa_node(numa_node);
       devices->push_back(absl::make_unique<GPUCompatibleCPUDevice>(
-          options, name, Bytes(256 << 20), DeviceLocality(), cpu_allocator()));
+          options, name, Bytes(256 << 20), DeviceLocality(),
+          ProcessState::singleton()->GetCPUAllocator(numa_node)));
     }
 
     return Status::OK();
diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc
index 3d8ac9b134..fdb79767ec 100644
--- a/tensorflow/core/common_runtime/process_state.cc
+++ b/tensorflow/core/common_runtime/process_state.cc
@@ -34,11 +34,15 @@ namespace tensorflow {
 
 /*static*/ ProcessState* ProcessState::singleton() {
   static ProcessState* instance = new ProcessState;
+  static std::once_flag f;
+  std::call_once(f, []() {
+    AllocatorFactoryRegistry::singleton()->process_state_ = instance;
+  });
+
   return instance;
 }
 
-ProcessState::ProcessState() : numa_enabled_(false) {
-}
+ProcessState::ProcessState() : numa_enabled_(false) {}
 
 string ProcessState::MemDesc::DebugString() {
   return strings::StrCat((loc == CPU ? "CPU " : "GPU "), dev_index,
@@ -72,7 +76,7 @@ Allocator* ProcessState::GetCPUAllocator(int numa_node) {
     }
     Allocator* allocator = nullptr;
     SubAllocator* sub_allocator =
-        (alloc_visitors_defined || use_bfc_allocator)
+        (numa_enabled_ || alloc_visitors_defined || use_bfc_allocator)
             ? new BasicCPUAllocator(
                   numa_enabled_ ? numa_node : port::kNUMANoAffinity,
                   cpu_alloc_visitors_, cpu_free_visitors_)
@@ -93,7 +97,7 @@ Allocator* ProcessState::GetCPUAllocator(int numa_node) {
                            "bfc_cpu_allocator_for_gpu" /*name*/);
       VLOG(2) << "Using BFCAllocator with memory limit of "
               << cpu_mem_limit_in_mb << " MB for ProcessState CPU allocator";
-    } else if (alloc_visitors_defined) {
+    } else if (sub_allocator) {
       DCHECK(sub_allocator);
       allocator =
           new PoolAllocator(100 /*pool_size_limit*/, true /*auto_resize*/,
@@ -103,7 +107,7 @@ Allocator* ProcessState::GetCPUAllocator(int numa_node) {
               << " numa_node=" << numa_node;
     } else {
       DCHECK(!sub_allocator);
-      allocator = cpu_allocator();
+      allocator = cpu_allocator_base();
     }
     if (LogMemory::IsEnabled() && !allocator->TracksAllocationSizes()) {
       // Wrap the allocator to track allocation ids for better logging
@@ -138,7 +142,7 @@ void ProcessState::AddCPUFreeVisitor(SubAllocator::Visitor visitor) {
 void ProcessState::TestOnlyReset() {
   mutex_lock lock(mu_);
   // Don't delete this value because it's static.
-  Allocator* default_cpu_allocator = cpu_allocator();
+  Allocator* default_cpu_allocator = cpu_allocator_base();
   mem_desc_map_.clear();
   for (Allocator* a : cpu_allocators_) {
     if (a != default_cpu_allocator) delete a;
diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h
index 6849d305b3..f30e440c29 100644
--- a/tensorflow/core/common_runtime/process_state.h
+++ b/tensorflow/core/common_runtime/process_state.h
@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/allocator_registry.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/platform/types.h"
@@ -34,7 +35,7 @@ class PoolAllocator;
 
 // Singleton that manages per-process state, e.g. allocation of
 // shared resources.
-class ProcessState {
+class ProcessState : public ProcessStateInterface {
  public:
   static ProcessState* singleton();
 
diff --git a/tensorflow/core/framework/allocator.cc b/tensorflow/core/framework/allocator.cc
index 89c49a2ad0..e942191efe 100644
--- a/tensorflow/core/framework/allocator.cc
+++ b/tensorflow/core/framework/allocator.cc
@@ -216,15 +216,33 @@ class CPUAllocatorFactory : public AllocatorFactory {
 REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
 }  // namespace
 
-Allocator* cpu_allocator() {
+Allocator* cpu_allocator_base() {
   static Allocator* cpu_alloc =
       AllocatorFactoryRegistry::singleton()->GetAllocator();
+  // TODO(tucker): This really seems wrong.  It's only going to be effective on
+  // the first call in a process (but the desired effect is associated with a
+  // session), and we probably ought to be tracking the highest level Allocator,
+  // not the lowest.  Revisit the advertised semantics of the triggering option.
   if (cpu_allocator_collect_full_stats && !cpu_alloc->TracksAllocationSizes()) {
     cpu_alloc = new TrackingAllocator(cpu_alloc, true);
   }
   return cpu_alloc;
 }
 
+Allocator* cpu_allocator(int numa_node) {
+  // Correctness relies on devices being created prior to the first call
+  // to cpu_allocator, if devices are ever to be created in the process.
+  // Device creation in turn triggers ProcessState creation and the availability
+  // of the correct access pointer via this function call.
+  static ProcessStateInterface* ps =
+      AllocatorFactoryRegistry::singleton()->process_state();
+  if (ps) {
+    return ps->GetCPUAllocator(numa_node);
+  } else {
+    return cpu_allocator_base();
+  }
+}
+
 SubAllocator::SubAllocator(const std::vector<Visitor>& alloc_visitors,
                            const std::vector<Visitor>& free_visitors)
     : alloc_visitors_(alloc_visitors), free_visitors_(free_visitors) {}
diff --git a/tensorflow/core/framework/allocator.h b/tensorflow/core/framework/allocator.h
index 531ea73e89..3ded86e8e9 100644
--- a/tensorflow/core/framework/allocator.h
+++ b/tensorflow/core/framework/allocator.h
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/type_traits.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/numa.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace tensorflow {
@@ -375,10 +376,16 @@ struct AllocatorAttributes {
 };
 
 // Returns a trivial implementation of Allocator, which is a process singleton.
-// Access through this function is only intended for use in tests and auxiliary
-// processing.  Performance sensitive uses should always obtain allocators from
-// ProcessState.
-Allocator* cpu_allocator();
+// Access through this function is only intended for use by restricted parts
+// of the infrastructure.
+Allocator* cpu_allocator_base();
+
+// If available, calls ProcessState::GetCPUAllocator(numa_node).
+// If not, falls back to cpu_allocator_base().
+// Intended for use in contexts where ProcessState is not visible at
+// compile time. Where ProcessState is visible, it's preferable to
+// call it directly.
+Allocator* cpu_allocator(int numa_node = port::kNUMANoAffinity);
 
 // If 'enable' is true, the default CPU allocator implementation will collect
 // AllocatorStats. By default, it's disabled.
diff --git a/tensorflow/core/framework/allocator_registry.h b/tensorflow/core/framework/allocator_registry.h
index e907c52ba9..9dc74345da 100644
--- a/tensorflow/core/framework/allocator_registry.h
+++ b/tensorflow/core/framework/allocator_registry.h
@@ -43,6 +43,13 @@ class AllocatorFactory {
   virtual SubAllocator* CreateSubAllocator(int numa_node) = 0;
 };
 
+// ProcessState is defined in a package that cannot be a dependency of
+// framework.  This definition allows us to access the one method we need.
+class ProcessStateInterface {
+ public:
+  virtual Allocator* GetCPUAllocator(int numa_node) = 0;
+};
+
 // A singleton registry of AllocatorFactories.
 //
 // Allocators should be obtained through ProcessState or cpu_allocator()
@@ -72,6 +79,12 @@ class AllocatorFactoryRegistry {
   // Returns the singleton value.
   static AllocatorFactoryRegistry* singleton();
 
+  ProcessStateInterface* process_state() const { return process_state_; }
+
+ protected:
+  friend class ProcessState;
+  ProcessStateInterface* process_state_ = nullptr;
+
  private:
   mutex mu_;
   bool first_alloc_made_ = false;
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index e3cb4a40ec..692da603f1 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -122,6 +122,12 @@ const string& OpKernel::type_string() const { return def_->op(); }
 const string& OpKernel::requested_device() const { return def_->device(); }
 const string& OpKernel::requested_input(int i) const { return def_->input(i); }
 
+// This static function exists only because device_attributes.pb.h is
+// already included here, and it can't be introduced elsewhere.
+/*static*/ int OpKernel::DeviceNumaNode(const DeviceBase* device) {
+  return device->attributes().locality().numa_node();
+}
+
 Status OpKernel::InputRange(StringPiece input_name, int* start,
                             int* stop) const {
   const auto result = input_name_map_.find(input_name);
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 19a0c5e5be..6a25d2b92f 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -171,6 +171,8 @@ class OpKernel {
   // TODO(irving): Move to TensorShapeUtils once !allow_legacy_scalars
   Status MakeShape(const Tensor& shape, TensorShape* out) const;
 
+  static int DeviceNumaNode(const DeviceBase* device);
+
  private:
   const std::unique_ptr<const NodeDef> def_;
   const DataTypeVector input_types_;
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index 33b9243dfe..75ca77fad5 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -262,7 +262,8 @@ class ZerosLikeOp : public OpKernel {
       const Variant& v = input.scalar<Variant>()();
       // DT_VARIANT tensors must be allocated on CPU since they wrap C++
       // objects which can not be efficiently represented in GPU memory.
-      Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({}));
+      int numa_node = DeviceNumaNode(ctx->device());
+      Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape({}));
       Variant* out_v = &(out.scalar<Variant>()());
       OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
                               ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h
index f77d7238af..07fe44778a 100644
--- a/tensorflow/core/kernels/cwise_ops_common.h
+++ b/tensorflow/core/kernels/cwise_ops_common.h
@@ -264,7 +264,8 @@ class UnaryVariantOp : public OpKernel {
     const Variant& v = inp.scalar<Variant>()();
     Variant v_out;
     OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(ctx, OpEnum, v, &v_out));
-    Tensor out(cpu_allocator(), DT_VARIANT, TensorShape());
+    int numa_node = DeviceNumaNode(ctx->device());
+    Tensor out(cpu_allocator(numa_node), DT_VARIANT, TensorShape());
     out.scalar<Variant>()() = std::move(v_out);
     ctx->set_output(0, std::move(out));
   }
-- 
GitLab


From f9699b8d405aaeaa512fb8e87d075956941ac086 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 11:24:52 -0800
Subject: [PATCH 669/873] Fixing build due to ambiguous vector constructor.

PiperOrigin-RevId: 225859201
---
 tensorflow/core/graph/graph.cc | 17 ++++++-----------
 tensorflow/core/graph/graph.h  |  2 +-
 2 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc
index 623dc855c4..3ea222c13c 100644
--- a/tensorflow/core/graph/graph.cc
+++ b/tensorflow/core/graph/graph.cc
@@ -306,17 +306,12 @@ Status Node::input_tensor(int idx, OutputTensor* t) const {
 // NodeDebugInfo
 
 NodeDebugInfo::NodeDebugInfo(const Node& n) : NodeDebugInfo(n.def()) {}
-NodeDebugInfo::NodeDebugInfo(const NodeDef& ndef)
-    : name(ndef.name()),
-      original_node_names(
-          ndef.has_experimental_debug_info()
-              ? std::vector<string>({ndef.experimental_debug_info()
-                                         .original_node_names()
-                                         .begin(),
-                                     ndef.experimental_debug_info()
-                                         .original_node_names()
-                                         .end()})
-              : std::vector<string>()) {}
+NodeDebugInfo::NodeDebugInfo(const NodeDef& ndef) : name(ndef.name()) {
+  if (ndef.has_experimental_debug_info()) {
+    const auto& names = ndef.experimental_debug_info().original_node_names();
+    original_node_names.assign(names.begin(), names.end());
+  }
+}
 
 // InputTensor
 
diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h
index 0b31219d5f..289a3d2a23 100644
--- a/tensorflow/core/graph/graph.h
+++ b/tensorflow/core/graph/graph.h
@@ -297,7 +297,7 @@ class Node {
 // Stores debug information associated with the Node.
 struct NodeDebugInfo {
   const string name;
-  const std::vector<string> original_node_names;
+  std::vector<string> original_node_names;
 
   NodeDebugInfo(const Node& n);
   NodeDebugInfo(const NodeDef& ndef);
-- 
GitLab


From 10ef7edc881ee715eaae48656fcb431fe128441f Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 17 Dec 2018 11:30:55 -0800
Subject: [PATCH 670/873] Internal change.

PiperOrigin-RevId: 225860335
---
 .bazelrc                                      |  3 ++
 tensorflow/BUILD                              |  6 +++
 tensorflow/core/kernels/BUILD                 | 40 +++++++++++++------
 .../python/kernel_tests/linalg_grad_test.py   |  3 +-
 4 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/.bazelrc b/.bazelrc
index cd7e13ddfc..ceba7bfdba 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -93,6 +93,9 @@ build --define=PREFIX=/usr
 build --define=LIBDIR=$(PREFIX)/lib
 build --define=INCLUDEDIR=$(PREFIX)/include
 
+# Disable MKL-DNN contraction kernels by default.
+build --define=tensorflow_mkldnn_contraction_kernel=0
+
 # Default options should come above this line
 
 # Options from ./configure
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index d5d9e30d9e..10c83e8e4b 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -202,6 +202,12 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "arm",
+    values = {"cpu": "arm"},
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "freebsd",
     values = {"cpu": "freebsd"},
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index d519b2426e..73c11dab92 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -94,13 +94,14 @@ config_setting(
 )
 
 config_setting(
-    # Add "--define tensorflow_mkldnn_contraction_kernel=1" to your build command to use mkldnn
+    # Add "--define tensorflow_mkldnn_contraction_kernel=0" to your build command to disable mkldnn
     # sgemm in Eigen tensor contractions (matrix multiplications and convolutions). The mkldnn
     # kernels are generated at runtime and use avx/avx2/fma/avx512 based on cpu status registers
-    # (https://en.wikipedia.org/wiki/CPUID).
-    name = "mkldnn_contraction_kernel",
+    # (https://en.wikipedia.org/wiki/CPUID). Default Eigen contraction kernel is
+    # Eigen::internal::gebp_kernel (general block-panel kernel).
+    name = "no_mkldnn_contraction_kernel",
     values = {
-        "define": "tensorflow_mkldnn_contraction_kernel=1",
+        "define": "tensorflow_mkldnn_contraction_kernel=0",
     },
 )
 
@@ -578,12 +579,13 @@ cc_library(
 # tensor contractions (small matrix multiplication kernel used to multiple together
 # blocks of the original tensors).
 #
-# 0) Default contraction kernel is Eigen::internal::gebp_kernel.
-#
-# 1) --define tensorflow_mkldnn_contraction_kernel=1
+# 1) Default:
 #    Use Mkldnn single threaded sgemm. The mkldnn kernels are generated at runtime and
 #    use avx/avx2/fma/avx512 based on cpu status registers (https://en.wikipedia.org/wiki/CPUID).
 #
+# 2) Eigen: --define tensorflow_mkldnn_contraction_kernel=0 (disable mkldnn)
+#    Use Eigen contraction kernel: Eigen::internal::gebp_kernel.
+#
 # If you use `tensor.contract(other_tensor)` in your code, you must include additional header
 # to get the benefit of custom contraction kernel:
 #
@@ -595,17 +597,25 @@ cc_library(
     srcs = ["eigen_contraction_kernel.cc"],
     hdrs = ["eigen_contraction_kernel.h"],
     defines = select({
-        ":mkldnn_contraction_kernel": [
+        "//tensorflow:android": [],
+        "//tensorflow:arm": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_ppc64le": [],
+        ":no_mkldnn_contraction_kernel": [],
+        "//conditions:default": [
             "TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL",
             "TENSORFLOW_USE_MKLDNN_CONTRACTION_KERNEL",
         ],
-        "//conditions:default": [],
     }),
     deps = [
         "//third_party/eigen3",
     ] + select({
-        ":mkldnn_contraction_kernel": ["@mkl_dnn//:mkldnn_single_threaded"],
-        "//conditions:default": [],
+        "//tensorflow:android": [],
+        "//tensorflow:arm": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_ppc64le": [],
+        ":no_mkldnn_contraction_kernel": [],
+        "//conditions:default": ["@mkl_dnn//:mkldnn_single_threaded"],
     }),
 )
 
@@ -2484,8 +2494,12 @@ tf_cc_test(
     name = "eigen_mkldnn_contraction_kernel_test",
     size = "small",
     srcs = select({
-        ":mkldnn_contraction_kernel": ["eigen_mkldnn_contraction_kernel_test.cc"],
-        "//conditions:default": [],
+        "//tensorflow:android": [],
+        "//tensorflow:arm": [],
+        "//tensorflow:ios": [],
+        "//tensorflow:linux_ppc64le": [],
+        ":no_mkldnn_contraction_kernel": [],
+        "//conditions:default": ["eigen_mkldnn_contraction_kernel_test.cc"],
     }),
     tags = ["mkldnn_contraction_kernel"],
     deps = [
diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py
index ff84221611..1494329f80 100644
--- a/tensorflow/python/kernel_tests/linalg_grad_test.py
+++ b/tensorflow/python/kernel_tests/linalg_grad_test.py
@@ -216,6 +216,7 @@ if __name__ == '__main__':
           shape = (rows, cols)
           name = '%s_%s_%s' % (dtype.__name__, '_'.join(map(str, shape)),
                                l2_regularization)
+          float32_tol_fudge = 5.1 if l2_regularization == 1e-6 else 4.0
           _AddTest(
               MatrixBinaryFunctorGradientTest,
               'MatrixSolveLsGradient',
@@ -226,6 +227,6 @@ if __name__ == '__main__':
                    linalg_ops.matrix_solve_ls(a, b, l)),
                   dtype,
                   shape,
-                  float32_tol_fudge=4.0))
+                  float32_tol_fudge))
 
   test_lib.main()
-- 
GitLab


From be15ecc9c1e692b1b562d7d23f19bd1263896eef Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 11:33:01 -0800
Subject: [PATCH 671/873] Add feature columns for TPUs.

PiperOrigin-RevId: 225860767
---
 tensorflow/contrib/tpu/BUILD                  |  41 +-
 .../contrib/tpu/python/tpu/feature_column.py  | 429 ++++++++++++++++++
 .../tpu/python/tpu/feature_column_test.py     | 286 ++++++++++++
 3 files changed, 753 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/contrib/tpu/python/tpu/feature_column.py
 create mode 100644 tensorflow/contrib/tpu/python/tpu/feature_column_test.py

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 007aeaec15..563a036ab5 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -1,15 +1,15 @@
 # Description: Operations defined for Cloud TPUs
 
-licenses(["notice"])  # Apache 2.0
-
 load(
     "//tensorflow:tensorflow.bzl",
     "tf_custom_op_library",
     "tf_gen_op_libs",
     "tf_gen_op_wrapper_py",
+    "tf_py_test",
 )
 load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library")
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
+
+licenses(["notice"])  # Apache 2.0
 
 package(
     default_visibility = [
@@ -201,6 +201,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":feature_column",
         ":keras_support",  # split out to avoid cycle with tpu_strategy
         ":tpu_embedding",
         ":tpu_estimator",
@@ -420,3 +421,37 @@ py_library(
         "@six_archive//:six",
     ],
 )
+
+py_library(
+    name = "feature_column",
+    srcs = ["python/tpu/feature_column.py"],
+    deps = [
+        ":tpu_lib",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python/feature_column",
+        "//tensorflow/python/feature_column:feature_column_py",
+    ],
+)
+
+tf_py_test(
+    name = "feature_column_test",
+    srcs = [
+        "python/tpu/feature_column_test.py",
+    ],
+    additional_deps = [
+        ":feature_column",
+        "//third_party/py/numpy",
+        "//tensorflow/python:dtypes",
+        "//tensorflow/python:framework_ops",
+        "//tensorflow/python:lookup_ops",
+        "//tensorflow/python:parsing_ops",
+        "//tensorflow/python:session",
+        "//tensorflow/python:sparse_tensor",
+        "//tensorflow/python:variables",
+        "//tensorflow/python/feature_column",
+        "//tensorflow/python/feature_column:feature_column_py",
+    ],
+    main = "python/tpu/feature_column_test.py",
+)
diff --git a/tensorflow/contrib/tpu/python/tpu/feature_column.py b/tensorflow/contrib/tpu/python/tpu/feature_column.py
new file mode 100644
index 0000000000..8edf131bc2
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/feature_column.py
@@ -0,0 +1,429 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""TPU Feature Column Library."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+from tensorflow.contrib.tpu.python.tpu import tpu
+from tensorflow.contrib.tpu.python.tpu import tpu_function
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.feature_column import feature_column_lib as fc_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variable_scope
+# pylint: disable=protected-access
+
+
+_TPU_FC_TO_SCOPE = '_tpu_feature_column_scope'
+_SUPPORTED_CATEGORICAL_COLUMNS = (fc._IdentityCategoricalColumn,
+                                  fc._VocabularyFileCategoricalColumn,
+                                  fc._VocabularyListCategoricalColumn,
+                                  fc._WeightedCategoricalColumn,
+                                  fc_lib.IdentityCategoricalColumn,
+                                  fc_lib.VocabularyFileCategoricalColumn,
+                                  fc_lib.VocabularyListCategoricalColumn,
+                                  fc_lib.WeightedCategoricalColumn)
+
+
+def embedding_column(categorical_column,
+                     dimension,
+                     combiner='mean',
+                     initializer=None):
+  """TPU embedding_column for `tf.feature_column.embedding_column`.
+
+  Note that the interface for TPU embedding_column is different from the non-TPU
+  version. The following args available for the non-TPU version are NOT
+  supported: ckpt_to_load_from, tensor_name_in_ckp, max_norm and trainable.
+
+  Args:
+    categorical_column: A categorical_column returned from
+        categorical_column_with_identity,  weighted_categorical_column,
+        categorical_column_with_vocabulary_list or
+        categorical_column_with_vocabulary_file.
+    dimension: An integer specifying dimension of the embedding, must be > 0.
+    combiner: A string specifying how to reduce if there are multiple entries
+      in a single row. For more information, see
+      `tf.feature_column.embedding_column`.
+    initializer: A variable initializer function to be used in embedding
+      variable initialization. If not specified, defaults to
+      `tf.truncated_normal_initializer` with mean `0.0` and standard deviation
+      `1/sqrt(dimension)`.
+
+  Returns:
+    A  _TPUEmbeddingColumn.
+
+  Raises:
+    ValueError: if `dimension` not > 0.
+    ValueError: if `initializer` is specified but not callable.
+  """
+  if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
+    raise TypeError(
+        'categorical_column for tpu '
+        ' embedding_column must be type %s, got %s.' % (' or '.join([
+            cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
+        ]), type(categorical_column)))
+  if (dimension is None) or (dimension < 1):
+    raise ValueError('Invalid dimension {}.'.format(dimension))
+
+  if (initializer is not None) and (not callable(initializer)):
+    raise ValueError('initializer must be callable if specified. '
+                     'Embedding of column_name: {}'.format(
+                         categorical_column.name))
+  if initializer is None:
+    initializer = init_ops.truncated_normal_initializer(
+        mean=0.0, stddev=1 / math.sqrt(dimension))
+
+  embedding_shape = categorical_column._num_buckets, dimension  # pylint: disable=protected-access
+
+  def _creator(weight_collections, scope):
+    embedding_column_layer = fc._EmbeddingColumnLayer(
+        embedding_shape=embedding_shape,
+        initializer=initializer,
+        weight_collections=weight_collections,
+        trainable=True,
+        name='embedding_column_layer')
+    return embedding_column_layer(None, scope=scope)  # pylint: disable=not-callable
+
+  column = _TPUEmbeddingColumn(
+      categorical_column=categorical_column,
+      dimension=dimension,
+      combiner=combiner,
+      layer_creator=_creator,
+      ckpt_to_load_from=None,
+      tensor_name_in_ckpt=None,
+      max_norm=None,
+      trainable=True)
+  # For Embedding column, the initializer is hidden inside the creator Fn, which
+  # is not accessiable later. So, we attach it to a speicial field. Also note
+  # that non-TPU Embedding column and non-TPU shared Embedding column handle the
+  # initializer differently. See shared_embedding_columns for details.
+  column._tpu_initializer = initializer
+  return column
+
+
+def shared_embedding_columns(categorical_columns,
+                             dimension,
+                             combiner='mean',
+                             initializer=None,
+                             shared_embedding_collection_name=None):
+  """List of dense columns that convert from sparse, categorical input."""
+  for categorical_column in categorical_columns:
+    if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
+      raise TypeError(
+          'categorical_column for tpu '
+          ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([
+              cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
+          ]), type(categorical_column)))
+  columns = fc_lib.shared_embedding_columns(
+      categorical_columns,
+      dimension,
+      combiner=combiner,
+      initializer=initializer,
+      shared_embedding_collection_name=shared_embedding_collection_name,
+      ckpt_to_load_from=None,
+      tensor_name_in_ckpt=None,
+      max_norm=None,
+      trainable=True)
+
+  # Use the initializer and shared_embedding_collection_name to create TPU
+  # version
+  initializer = columns[0].initializer
+  shared_embedding_collection_name = columns[0].shared_embedding_collection_name
+  tpu_columns = []
+
+  # Create the state (_SharedEmbeddingColumnLayer) here.
+  for categorical_column in categorical_columns:
+    column = _TPUSharedEmbeddingColumn(
+        categorical_column=categorical_column,
+        dimension=dimension,
+        combiner=combiner,
+        initializer=initializer,
+        shared_embedding_collection_name=shared_embedding_collection_name,
+        ckpt_to_load_from=None,
+        tensor_name_in_ckpt=None,
+        max_norm=None,
+        trainable=True)
+    tpu_columns.append(column)
+
+  return tpu_columns
+
+
+class _TPUBaseEmbeddingColumn(object):
+  """Base class for TPU Embedding Column."""
+
+  def __init__(self, categorical_column):
+    self._tpu_categorical_column = categorical_column
+
+  def get_combiner(self):
+    """Returns the embedding combiner."""
+    raise NotImplementedError('not implemented')
+
+  def get_embedding_table_size(self):
+    """Returns the embedding table size, tuple of vocab size and dimension."""
+    raise NotImplementedError('not implemented')
+
+  def get_feature_key_name(self):
+    """Returns the feature key name in the features dict."""
+    raise NotImplementedError('not impl')
+
+  def get_weight_key_name(self):
+    """Return the key name for weights."""
+    raise NotImplementedError('not impl')
+
+  def get_embedding_var_name(self):
+    """Returns the embedding variable name.
+
+    Feature key name and embedding variable name are usually one-to-one mapping.
+    But for shared embedding columns, it is many-to-one mapping.
+    """
+    raise NotImplementedError('not impl')
+
+  def get_initializer(self):
+    """Returns the initializer."""
+    raise NotImplementedError('not impl')
+
+  def is_categorical_column_weighted(self):
+    """Check if the categorical column of the embedding column is weighted."""
+    raise NotImplementedError('not impl')
+
+
+class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
+  """Core Embedding Column."""
+
+  def __new__(cls,
+              categorical_column,
+              dimension,
+              combiner='mean',
+              layer_creator=None,
+              ckpt_to_load_from=None,
+              tensor_name_in_ckpt=None,
+              max_norm=None,
+              trainable=True):
+    # Note, args ckpt_to_load_from, tensor_name_in_ckpt, max_norm and trainable
+    # are not supported on TPU. They are solely for matching the signature of
+    # __new__ of parent class fc._EmbeddingColumn.
+    return fc._EmbeddingColumn.__new__(
+        cls,
+        categorical_column,
+        dimension,
+        combiner=combiner,
+        layer_creator=layer_creator,
+        ckpt_to_load_from=ckpt_to_load_from,
+        tensor_name_in_ckpt=tensor_name_in_ckpt,
+        max_norm=max_norm,
+        trainable=trainable)
+
+  def __init__(self,
+               categorical_column,
+               dimension,
+               combiner='mean',
+               layer_creator=None,
+               ckpt_to_load_from=None,
+               tensor_name_in_ckpt=None,
+               max_norm=None,
+               trainable=True):
+    _TPUBaseEmbeddingColumn.__init__(self, categorical_column)
+    self._key = None
+
+  def get_combiner(self):
+    return self.combiner
+
+  def get_embedding_table_size(self):
+    """Returns num_ids and width."""
+    return (self.categorical_column._num_buckets, self.dimension)
+
+  def get_feature_key_name(self):
+    """get_feature_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.categorical_column.name
+    return self.categorical_column.name
+
+  def get_weight_key_name(self):
+    """get_weight_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.weight_feature_key
+    return None
+
+  def get_embedding_var_name(self):
+    """get_embedding_var_name."""
+    return self.categorical_column.name
+
+  def get_initializer(self):
+    return self._tpu_initializer
+
+  def is_categorical_column_weighted(self):
+    """Check if the categorical column of the embedding column is weighted."""
+    if isinstance(
+        self.categorical_column,
+        (
+            fc._WeightedCategoricalColumn,  # pylint: disable=protected-access
+            fc_lib.WeightedCategoricalColumn)):
+      return True
+    return False
+
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    if tpu.under_tpu_inference_context():
+      def host_computation():
+        return fc._EmbeddingColumn._get_dense_tensor(
+            self, inputs, weight_collections, trainable)
+      return tpu.outside_compilation(host_computation)
+
+    if _is_running_on_cpu():
+      return fc._EmbeddingColumn._get_dense_tensor(
+          self, inputs, weight_collections, trainable)
+
+    # TPU mode
+    # Get the embeddings from the LazyBuilder.
+    tensor = inputs.get(self.get_feature_key_name())
+
+    # Add to collection for _create_tpu_embedding_variables_and_ops
+    _record_variable_scope_and_name(self.get_embedding_var_name(),
+                                    'embedding_weights')
+
+    return tensor
+
+
+class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
+                                fc._SharedEmbeddingColumn):
+  """Core Shared Embedding Column."""
+
+  def __new__(cls,
+              categorical_column,
+              dimension,
+              combiner='mean',
+              initializer=None,
+              shared_embedding_collection_name=None,
+              ckpt_to_load_from=None,
+              tensor_name_in_ckpt=None,
+              max_norm=None,
+              trainable=True):
+    return fc._SharedEmbeddingColumn.__new__(
+        cls,
+        categorical_column,
+        dimension,
+        combiner=combiner,
+        initializer=initializer,
+        shared_embedding_collection_name=shared_embedding_collection_name,
+        ckpt_to_load_from=ckpt_to_load_from,
+        tensor_name_in_ckpt=tensor_name_in_ckpt,
+        max_norm=max_norm,
+        trainable=trainable)
+
+  def __init__(self,
+               categorical_column,
+               dimension,
+               combiner='mean',
+               initializer=None,
+               shared_embedding_collection_name=None,
+               ckpt_to_load_from=None,
+               tensor_name_in_ckpt=None,
+               max_norm=None,
+               trainable=True):
+
+    _TPUBaseEmbeddingColumn.__init__(self, categorical_column)
+    self._key = None
+
+  def get_combiner(self):
+    return self.combiner
+
+  def get_embedding_table_size(self):
+    """Returns num_ids and width."""
+    return (self.categorical_column._num_buckets, self.dimension)
+
+  def get_feature_key_name(self):
+    """get_feature_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.categorical_column.name
+    return self.categorical_column.name
+
+  def get_weight_key_name(self):
+    """get_weight_key_name."""
+    if self.is_categorical_column_weighted():
+      return self.categorical_column.weight_feature_key
+    return None
+
+  def get_embedding_var_name(self):
+    """get_embedding_var_name."""
+    return self.shared_embedding_collection_name
+
+  def get_initializer(self):
+    return self.initializer
+
+  def is_categorical_column_weighted(self):
+    """Check if the categorical column of the embedding column is weighted."""
+    if isinstance(
+        self.categorical_column,
+        (
+            fc._WeightedCategoricalColumn,  # pylint: disable=protected-access
+            fc_lib.WeightedCategoricalColumn)):
+      return True
+    return False
+
+  def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
+    if tpu.under_tpu_inference_context():
+      def host_computation():
+        return fc._SharedEmbeddingColumn._get_dense_tensor(
+            self, inputs, weight_collections, trainable)
+      return tpu.outside_compilation(host_computation)
+
+    if _is_running_on_cpu():
+      return fc._SharedEmbeddingColumn._get_dense_tensor(
+          self, inputs, weight_collections, trainable)
+
+    # TPU mode
+    # Get the embeddings from the LazyBuilder.
+    tensor = inputs.get(self.get_feature_key_name())
+
+    # Add to collection for _create_tpu_embedding_variables_and_ops
+    _record_variable_scope_and_name(
+        self.get_embedding_var_name(),
+        'embedding_weights',
+        is_shared_embedding=True)
+    return tensor
+
+
+def _record_variable_scope_and_name(embedding_var_name,
+                                    embedding_var_name_in_fc,
+                                    is_shared_embedding=False):
+  """Add embedding variable name and scope to collection."""
+  g = ops.get_default_graph()
+  collection = g.get_collection_ref(_TPU_FC_TO_SCOPE)
+  if not collection:
+    collection.append({})
+
+  var_def_dict = collection[0]
+
+  captured_scope = None
+
+  if is_shared_embedding and (embedding_var_name in var_def_dict):
+    if var_def_dict[embedding_var_name][1] != embedding_var_name_in_fc:
+      raise ValueError(
+          'For embedding var name {}, the shared embedding name is different, '
+          'got {}; expected {}'.format(embedding_var_name,
+                                       embedding_var_name_in_fc,
+                                       var_def_dict[embedding_var_name][1]))
+  else:
+    # scope contains var_scope_name.
+    captured_scope = variable_scope.get_variable_scope()
+    var_def_dict[embedding_var_name] = (captured_scope,
+                                        embedding_var_name_in_fc)
+
+
+def _is_running_on_cpu():
+  """Returns True if the current context is CPU model."""
+  return tpu_function.get_tpu_context().number_of_shards is None
diff --git a/tensorflow/contrib/tpu/python/tpu/feature_column_test.py b/tensorflow/contrib/tpu/python/tpu/feature_column_test.py
new file mode 100644
index 0000000000..75164cce4c
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/feature_column_test.py
@@ -0,0 +1,286 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===================================================================
+"""Tests for contrib.tpu.python.tpu.feature_column."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.tpu.python.tpu import feature_column as tpu_fc
+from tensorflow.python.client import session
+from tensorflow.python.feature_column import feature_column as fc
+from tensorflow.python.feature_column import feature_column_lib as fc_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import lookup_ops
+from tensorflow.python.ops import parsing_ops
+from tensorflow.python.ops import variables as variables_lib
+from tensorflow.python.platform import test
+
+
+def _initialized_session():
+  sess = session.Session()
+  sess.run(variables_lib.global_variables_initializer())
+  sess.run(lookup_ops.tables_initializer())
+  return sess
+
+
+class EmbeddingColumnTest(test.TestCase):
+
+  def test_defaults(self):
+    categorical_column = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column = tpu_fc.embedding_column(
+        categorical_column, dimension=embedding_dimension)
+    self.assertIs(categorical_column, embedding_column.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column.dimension)
+    self.assertEqual('mean', embedding_column.combiner)
+    self.assertEqual('aaa_embedding', embedding_column.name)
+    self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column._parse_example_spec)
+
+  def test_all_constructor_args(self):
+    categorical_column = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column = tpu_fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        combiner='my_combiner',
+        initializer=lambda: 'my_initializer')
+    self.assertIs(categorical_column, embedding_column.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column.dimension)
+    self.assertEqual('my_combiner', embedding_column.combiner)
+    self.assertEqual('aaa_embedding', embedding_column.name)
+    self.assertEqual('aaa_embedding', embedding_column._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column._parse_example_spec)
+
+  def test_get_dense_tensor(self):
+    # Inputs.
+    vocabulary_size = 3
+    sparse_input = sparse_tensor.SparseTensorValue(
+        # example 0, ids [2]
+        # example 1, ids [0, 1]
+        # example 2, ids []
+        # example 3, ids [1]
+        indices=((0, 0), (1, 0), (1, 4), (3, 0)),
+        values=(2, 0, 1, 1),
+        dense_shape=(4, 5))
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups = (
+        # example 0, ids [2], embedding = [7, 11]
+        (7., 11.),
+        # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+        (2., 3.5),
+        # example 2, ids [], embedding = [0, 0]
+        (0., 0.),
+        # example 3, ids [1], embedding = [3, 5]
+        (3., 5.),
+    )
+
+    # Build columns.
+    categorical_column = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    embedding_column = tpu_fc.embedding_column(
+        categorical_column,
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup = embedding_column._get_dense_tensor(
+        fc._LazyBuilder({
+            'aaa': sparse_input
+        }))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, global_vars[0].eval())
+      self.assertAllEqual(expected_lookups, embedding_lookup.eval())
+
+
+class SharedEmbeddingColumnTest(test.TestCase):
+
+  def test_defaults(self):
+    categorical_column_a = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc_lib.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_b, embedding_column_a = tpu_fc.shared_embedding_columns(
+        [categorical_column_b, categorical_column_a],
+        dimension=embedding_dimension)
+    self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
+    self.assertIs(categorical_column_b, embedding_column_b.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column_a.dimension)
+    self.assertEqual(embedding_dimension, embedding_column_b.dimension)
+    self.assertEqual('mean', embedding_column_a.combiner)
+    self.assertEqual('mean', embedding_column_b.combiner)
+    self.assertIsNotNone(embedding_column_a.initializer)
+    self.assertIsNotNone(embedding_column_b.initializer)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_a.shared_embedding_collection_name)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_b.shared_embedding_collection_name)
+    self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
+    self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_a._var_scope_name)
+    self.assertEqual('aaa_bbb_shared_embedding',
+                     embedding_column_b._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column_a._variable_shape)
+    self.assertEqual((embedding_dimension,), embedding_column_b._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_a._parse_example_spec)
+    self.assertEqual({
+        'bbb': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_b._parse_example_spec)
+
+  def test_all_constructor_args(self):
+    categorical_column_a = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=3)
+    categorical_column_b = fc_lib.categorical_column_with_identity(
+        key='bbb', num_buckets=3)
+    embedding_dimension = 2
+    embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension,
+        combiner='my_combiner',
+        initializer=lambda: 'my_initializer',
+        shared_embedding_collection_name='var_scope_name')
+    self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
+    self.assertIs(categorical_column_b, embedding_column_b.categorical_column)
+    self.assertEqual(embedding_dimension, embedding_column_a.dimension)
+    self.assertEqual(embedding_dimension, embedding_column_b.dimension)
+    self.assertEqual('my_combiner', embedding_column_a.combiner)
+    self.assertEqual('my_combiner', embedding_column_b.combiner)
+    self.assertEqual('my_initializer', embedding_column_a.initializer())
+    self.assertEqual('my_initializer', embedding_column_b.initializer())
+    self.assertEqual('var_scope_name',
+                     embedding_column_a.shared_embedding_collection_name)
+    self.assertEqual('var_scope_name',
+                     embedding_column_b.shared_embedding_collection_name)
+    self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
+    self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
+    self.assertEqual('var_scope_name', embedding_column_a._var_scope_name)
+    self.assertEqual('var_scope_name', embedding_column_b._var_scope_name)
+    self.assertEqual((embedding_dimension,), embedding_column_a._variable_shape)
+    self.assertEqual((embedding_dimension,), embedding_column_b._variable_shape)
+    self.assertEqual({
+        'aaa': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_a._parse_example_spec)
+    self.assertEqual({
+        'bbb': parsing_ops.VarLenFeature(dtypes.int64)
+    }, embedding_column_b._parse_example_spec)
+
+  def test_get_dense_tensor(self):
+    # Inputs.
+    vocabulary_size = 3
+    # -1 values are ignored.
+    input_a = np.array([
+        [2, -1, -1],  # example 0, ids [2]
+        [0, 1, -1]
+    ])  # example 1, ids [0, 1]
+    input_b = np.array([
+        [0, -1, -1],  # example 0, ids [0]
+        [-1, -1, -1]
+    ])  # example 1, ids []
+    input_features = {'aaa': input_a, 'bbb': input_b}
+
+    # Embedding variable.
+    embedding_dimension = 2
+    embedding_values = (
+        (1., 2.),  # id 0
+        (3., 5.),  # id 1
+        (7., 11.)  # id 2
+    )
+
+    def _initializer(shape, dtype, partition_info):
+      self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
+      self.assertEqual(dtypes.float32, dtype)
+      self.assertIsNone(partition_info)
+      return embedding_values
+
+    # Expected lookup result, using combiner='mean'.
+    expected_lookups_a = (
+        # example 0:
+        (7., 11.),  # ids [2], embedding = [7, 11]
+        # example 1:
+        (2., 3.5),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
+    )
+    expected_lookups_b = (
+        # example 0:
+        (1., 2.),  # ids [0], embedding = [1, 2]
+        # example 1:
+        (0., 0.),  # ids [], embedding = [0, 0]
+    )
+
+    # Build columns.
+    categorical_column_a = fc_lib.categorical_column_with_identity(
+        key='aaa', num_buckets=vocabulary_size)
+    categorical_column_b = fc_lib.categorical_column_with_identity(
+        key='bbb', num_buckets=vocabulary_size)
+    embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns(
+        [categorical_column_a, categorical_column_b],
+        dimension=embedding_dimension,
+        initializer=_initializer)
+
+    # Provide sparse input and get dense result.
+    embedding_lookup_a = embedding_column_a._get_dense_tensor(
+        fc._LazyBuilder(input_features))
+    embedding_lookup_b = embedding_column_b._get_dense_tensor(
+        fc._LazyBuilder(input_features))
+
+    # Assert expected embedding variable and lookups.
+    global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
+    self.assertItemsEqual(('embedding_weights:0',),
+                          tuple([v.name for v in global_vars]))
+    embedding_var = global_vars[0]
+    with _initialized_session():
+      self.assertAllEqual(embedding_values, embedding_var.eval())
+      self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
+      self.assertAllEqual(expected_lookups_b, embedding_lookup_b.eval())
+
+
+if __name__ == '__main__':
+  test.main()
-- 
GitLab


From c62017e7e22a1fc0f81253d176068ddb474a0770 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 11:34:38 -0800
Subject: [PATCH 672/873] Fix demo app gradle build for linux and mac

PiperOrigin-RevId: 225861051
---
 tensorflow/lite/examples/android/app/build.gradle | 4 ----
 tensorflow/lite/examples/android/build.gradle     | 1 +
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/lite/examples/android/app/build.gradle b/tensorflow/lite/examples/android/app/build.gradle
index 35e7887852..e5f5c7efd1 100644
--- a/tensorflow/lite/examples/android/app/build.gradle
+++ b/tensorflow/lite/examples/android/app/build.gradle
@@ -10,10 +10,6 @@ android {
         versionCode 1
         versionName "1.0"
 
-        // Remove this block.
-        jackOptions {
-            enabled true
-        }
     }
     lintOptions {
         abortOnError false
diff --git a/tensorflow/lite/examples/android/build.gradle b/tensorflow/lite/examples/android/build.gradle
index 74dacbcddb..7c79358e45 100644
--- a/tensorflow/lite/examples/android/build.gradle
+++ b/tensorflow/lite/examples/android/build.gradle
@@ -2,6 +2,7 @@
 
 buildscript {
     repositories {
+        google()
         jcenter()
     }
     dependencies {
-- 
GitLab


From 7da4b01db602c3c1d97ce3d3dc064d3eb7407773 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 17 Dec 2018 11:44:36 -0800
Subject: [PATCH 673/873] Preparation for landing the ROCM patch.

PiperOrigin-RevId: 225862702
---
 tensorflow/stream_executor/cuda/cuda_driver.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h
index 3713a5b7b9..447422739d 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.h
+++ b/tensorflow/stream_executor/cuda/cuda_driver.h
@@ -514,6 +514,10 @@ class CudaContext {
   const int64 id_;
 };
 
+inline CUcontext CurrentContextOrDie() {
+  return CUDADriver::CurrentContextOrDie();
+}
+
 }  // namespace cuda
 }  // namespace stream_executor
 
-- 
GitLab


From 33cbbf4f68a68a99dd822c6a4999f0f254dbe1e2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 11:51:14 -0800
Subject: [PATCH 674/873] Internal change

PiperOrigin-RevId: 225863886
---
 tensorflow/python/ops/nn_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 48dcab4842..841bac8bea 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -398,7 +398,7 @@ def _count_nonzero(input_tensor, dtype=dtypes.int64):
   Returns:
       number of nonzero values with type dtype
   """
-  with ops.name_scope("count_nonzero", [input_tensor]):
+  with ops.name_scope("count_nonzero", values=[input_tensor]):
     zero = array_ops.zeros([], dtype=input_tensor.dtype)
     nonzero_count = math_ops.reduce_sum(
         math_ops.cast(
-- 
GitLab


From a333da7c6fb2e1f8f1c90eb03885e05c21959167 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 17 Dec 2018 11:51:19 -0800
Subject: [PATCH 675/873] Introduce a logger overriding mechanism.

Before: the whole program has to link against a unique
Logger::Singleton(), and there is no default. It's hard to control when
to use the custom logger vs the default.

After: By default always use the default logger. The pubsub logger
overrides via REGISTER_MODULE_INITIALIZER. Multiple implementations can
co-exist.

This simplifies the registration management and dependency management.

PiperOrigin-RevId: 225863909
---
 tensorflow/core/BUILD                         | 20 ++-----------------
 .../core/platform/default/build_config/BUILD  |  5 -----
 .../core/platform/{default => }/logger.cc     |  7 +++----
 tensorflow/core/platform/logger.h             | 19 +++++++++++++++++-
 4 files changed, 23 insertions(+), 28 deletions(-)
 rename tensorflow/core/platform/{default => }/logger.cc (91%)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 8bf1480d33..258c46fbcb 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -445,7 +445,8 @@ cc_library(
 )
 
 cc_library(
-    name = "logger_interface",
+    name = "logger",
+    srcs = ["platform/logger.cc"],
     hdrs = ["platform/logger.h"],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
@@ -455,23 +456,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "default_logger",
-    srcs = ["platform/default/logger.cc"],
-    hdrs = ["platform/logger.h"],
-    deps = [
-        "//tensorflow/core:lib_proto_parsing",
-        "//tensorflow/core:logger_interface",
-    ],
-)
-
-cc_library(
-    name = "logger",
-    hdrs = ["platform/logger.h"],
-    visibility = ["//visibility:public"],
-    deps = ["//tensorflow/core/platform/default/build_config:logger"],
-)
-
 filegroup(
     name = "platform_env_hdrs",
     srcs = [
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index ee6936b372..da1f66dc67 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -275,8 +275,3 @@ alias(
     actual = ":mobile_srcs",
     visibility = ["//visibility:public"],
 )
-
-alias(
-    name = "logger",
-    actual = "//tensorflow/core:default_logger",
-)
diff --git a/tensorflow/core/platform/default/logger.cc b/tensorflow/core/platform/logger.cc
similarity index 91%
rename from tensorflow/core/platform/default/logger.cc
rename to tensorflow/core/platform/logger.cc
index 54b1a1a67c..202840c808 100644
--- a/tensorflow/core/platform/default/logger.cc
+++ b/tensorflow/core/platform/logger.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-Logger* Logger::Singleton() {
+Logger::FactoryFunc Logger::singleton_factory_ = []() -> Logger* {
   class DefaultLogger : public Logger {
    private:
     void DoLogProto(google::protobuf::Any* proto) override {
@@ -27,8 +27,7 @@ Logger* Logger::Singleton() {
     }
     void DoFlush() override {}
   };
-  static Logger* instance = new DefaultLogger();
-  return instance;
-}
+  return new DefaultLogger();
+};
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/logger.h b/tensorflow/core/platform/logger.h
index 5d304bea63..f0bfef4f2d 100644
--- a/tensorflow/core/platform/logger.h
+++ b/tensorflow/core/platform/logger.h
@@ -26,7 +26,22 @@ namespace tensorflow {
 // log anything to a non-local place, e.g. a database.
 class Logger {
  public:
-  static Logger* Singleton();
+  // The singleton is supposed to be used in the following steps:
+  // * At program start time, REGISTER_MOUDLE_INITIALIZER calls
+  //   SetSingletonFactory.
+  // * At some point in the program execution, Singleton() is called for the
+  //   first time, initializing the logger.
+  // * Succeeding calls to Singleton() return the initiailized logger.
+  using FactoryFunc = Logger* (*)();
+
+  static void SetSingletonFactory(FactoryFunc factory) {
+    singleton_factory_ = factory;
+  }
+
+  static Logger* Singleton() {
+    static Logger* instance = singleton_factory_();
+    return instance;
+  }
 
   virtual ~Logger() = default;
 
@@ -44,6 +59,8 @@ class Logger {
  private:
   virtual void DoLogProto(google::protobuf::Any* proto) = 0;
   virtual void DoFlush() = 0;
+
+  static FactoryFunc singleton_factory_;
 };
 
 }  // namespace tensorflow
-- 
GitLab


From dc0aa75a0514ca51c1c70c27a37c77f893439561 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 12:07:32 -0800
Subject: [PATCH 676/873] monitored_session: Merge run options that report
 tensor allocations from hooks.

PiperOrigin-RevId: 225866692
---
 .../python/training/monitored_session.py      |  4 +-
 .../python/training/monitored_session_test.py | 57 ++++++++++---------
 2 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index 6a7d27df5c..072dbc1730 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -1392,9 +1392,11 @@ class _HookedSession(_WrappedSession):
     options.output_partition_graphs = max(
         options.output_partition_graphs,
         incoming_options.output_partition_graphs)
-
     options.debug_options.debug_tensor_watch_opts.extend(
         incoming_options.debug_options.debug_tensor_watch_opts)
     options.debug_options.reset_disk_byte_usage = (
         options.debug_options.reset_disk_byte_usage or
         incoming_options.debug_options.reset_disk_byte_usage)
+    options.report_tensor_allocations_upon_oom = (
+        options.report_tensor_allocations_upon_oom or
+        incoming_options.report_tensor_allocations_upon_oom)
diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py
index 99ee9ea7e2..6d24f8e17e 100644
--- a/tensorflow/python/training/monitored_session_test.py
+++ b/tensorflow/python/training/monitored_session_test.py
@@ -1364,11 +1364,13 @@ class RunOptionsMetadataHook(session_run_hook.SessionRunHook):
   """A hook that observes & optionally modifies RunOptions and RunMetadata."""
 
   def __init__(self, trace_level, timeout_in_ms, output_partition_graphs,
-               debug_tensor_watch):
+               debug_tensor_watch, report_tensor_allocations_upon_oom):
     self._trace_level = trace_level
     self._timeout_in_ms = timeout_in_ms
     self._output_partition_graphs = output_partition_graphs
     self._debug_tensor_watch = debug_tensor_watch
+    self._report_tensor_allocations_upon_oom = (
+        report_tensor_allocations_upon_oom)
 
     self.run_options_list = []
     self.run_metadata_list = []
@@ -1377,7 +1379,9 @@ class RunOptionsMetadataHook(session_run_hook.SessionRunHook):
     options = config_pb2.RunOptions(
         trace_level=self._trace_level,
         timeout_in_ms=self._timeout_in_ms,
-        output_partition_graphs=self._output_partition_graphs)
+        output_partition_graphs=self._output_partition_graphs,
+        report_tensor_allocations_upon_oom=self
+        ._report_tensor_allocations_upon_oom)
     options.debug_options.debug_tensor_watch_opts.extend(
         [self._debug_tensor_watch])
     return session_run_hook.SessionRunArgs(None, None, options=options)
@@ -1746,13 +1750,13 @@ class MonitoredSessionTest(test.TestCase):
           output_slot=0,
           debug_ops=['DebugIdentity'],
           debug_urls=[])
-      hook_a = RunOptionsMetadataHook(2, 30000, False, watch_a)
+      hook_a = RunOptionsMetadataHook(2, 30000, False, watch_a, False)
       watch_b = debug_pb2.DebugTensorWatch(
           node_name='my_const_2',
           output_slot=0,
           debug_ops=['DebugIdentity'],
           debug_urls=[])
-      hook_b = RunOptionsMetadataHook(3, 60000, True, watch_b)
+      hook_b = RunOptionsMetadataHook(3, 60000, True, watch_b, True)
       with monitored_session.MonitoredSession(
           hooks=[hook_a, hook_b]) as session:
         self.assertEqual(42, session.run(my_const))
@@ -1761,16 +1765,15 @@ class MonitoredSessionTest(test.TestCase):
         # timeout_in_ms=60000 should have overridden 30000;
         # output_partition_graphs=True should have overridden False.
         # The two debug tensor watches should have been merged.
-        self.assertEqual(
-            [
-                config_pb2.RunOptions(
-                    trace_level=3,
-                    timeout_in_ms=60000,
-                    output_partition_graphs=True,
-                    debug_options=debug_pb2.DebugOptions(
-                        debug_tensor_watch_opts=[watch_a, watch_b]))
-            ],
-            hook_b.run_options_list)
+        self.assertEqual([
+            config_pb2.RunOptions(
+                trace_level=3,
+                timeout_in_ms=60000,
+                output_partition_graphs=True,
+                debug_options=debug_pb2.DebugOptions(
+                    debug_tensor_watch_opts=[watch_a, watch_b]),
+                report_tensor_allocations_upon_oom=True),
+        ], hook_b.run_options_list)
         self.assertEqual(1, len(hook_b.run_metadata_list))
         self.assertTrue(
             isinstance(hook_b.run_metadata_list[0], config_pb2.RunMetadata))
@@ -1788,7 +1791,7 @@ class MonitoredSessionTest(test.TestCase):
           output_slot=0,
           debug_ops=['DebugIdentity'],
           debug_urls=[])
-      hook = RunOptionsMetadataHook(2, 60000, False, hook_watch)
+      hook = RunOptionsMetadataHook(2, 60000, False, hook_watch, False)
       with monitored_session.MonitoredSession(hooks=[hook]) as session:
         caller_watch = debug_pb2.DebugTensorWatch(
             node_name='my_const',
@@ -1796,7 +1799,10 @@ class MonitoredSessionTest(test.TestCase):
             debug_ops=['DebugIdentity'],
             debug_urls=[])
         caller_options = config_pb2.RunOptions(
-            trace_level=3, timeout_in_ms=30000, output_partition_graphs=True)
+            trace_level=3,
+            timeout_in_ms=30000,
+            output_partition_graphs=True,
+            report_tensor_allocations_upon_oom=True)
         caller_options.debug_options.debug_tensor_watch_opts.extend(
             [caller_watch])
         self.assertEqual(42, session.run(my_const, options=caller_options))
@@ -1807,16 +1813,15 @@ class MonitoredSessionTest(test.TestCase):
         # from the hook.
         # The two debug watches from the caller and the hook should be merged,
         # in that order.
-        self.assertEqual(
-            [
-                config_pb2.RunOptions(
-                    trace_level=3,
-                    timeout_in_ms=60000,
-                    output_partition_graphs=True,
-                    debug_options=debug_pb2.DebugOptions(
-                        debug_tensor_watch_opts=[caller_watch, hook_watch]))
-            ],
-            hook.run_options_list)
+        self.assertEqual([
+            config_pb2.RunOptions(
+                trace_level=3,
+                timeout_in_ms=60000,
+                output_partition_graphs=True,
+                debug_options=debug_pb2.DebugOptions(
+                    debug_tensor_watch_opts=[caller_watch, hook_watch]),
+                report_tensor_allocations_upon_oom=True),
+        ], hook.run_options_list)
         self.assertEqual(1, len(hook.run_metadata_list))
         self.assertTrue(
             isinstance(hook.run_metadata_list[0], config_pb2.RunMetadata))
-- 
GitLab


From ccf374d142c8c53364f5f11602ce44ab2f6b2ba7 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 12:24:55 -0800
Subject: [PATCH 677/873] In BROADCAST mode (used by Mesh-TensorFlow), assume
 that all cores return identical copies of the full output.  Return the output
 from the first core instead of concatenating the outputs on axis 0. Since we
 have broken the assumption that the input is batch-split on axis 0, it makes
 no sense to assume that the output will emerge batch-split on axis 0. In the
 future, we may want to provide more explicit options about how to combine the
 outfeeds.

PiperOrigin-RevId: 225869483
---
 .../contrib/tpu/python/tpu/tpu_estimator.py   | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index 44a8f7ce0e..e44b51f3e7 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -1778,9 +1778,22 @@ class _OutfeedHostCall(object):
             raise RuntimeError(
                 'All tensors outfed from TPU should preserve batch size '
                 'dimension, but got scalar {}'.format(dequeue_ops[i][0]))
-          # TODO(xiejw): Allow users to specify the axis for batch size
-          # dimension.
-          dequeue_ops[i] = array_ops.concat(dequeue_ops[i], axis=0)
+          # TODO(xiejw): Make the specification of the outfeed combinaton
+          # function more explicit and well-documented.  We may want to give the
+          # user the option of concatenating along any axis.
+          if (self._ctx.config.tpu_config.per_host_input_for_training is
+              tpu_config.InputPipelineConfig.BROADCAST):
+            # If the infeed is in BROADCAST mode (each core recieving the same
+            # input), then we assume that the cores also produce identical
+            # copies of the same output, and we simply take the output from
+            # the first core.  This mode is used by Mesh-TensorFlow.
+            with ops.control_dependencies(dequeue_ops[i]):
+              dequeue_ops[i] = array_ops.identity(dequeue_ops[i][0])
+          else:
+            # Assume that the input has been batch-split and that axis 0 of the
+            # output tensors represents the batch size.  Concatenate along
+            # the axis 0 to re-combine the batch.
+            dequeue_ops[i] = array_ops.concat(dequeue_ops[i], axis=0)
 
         if self._tensor_keys[name] is not None:
           # The user-provided eval_metrics[1] is a dict.
-- 
GitLab


From 01c44afaea3e5a3453903dedd07ccf4a1cb577f4 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 17 Dec 2018 12:41:48 -0800
Subject: [PATCH 678/873] Obey the semantics of @tf.function even if the target
 is whitelisted. The implementation relies on this assumption. This triggered
 autograph on a number of internal functions, where it will be disabled for
 now.

PiperOrigin-RevId: 225872049
---
 tensorflow/python/autograph/impl/api.py       |  6 +++
 .../python/eager/function_gradients_test.py   |  6 ++-
 tensorflow/python/eager/function_test.py      | 40 +++++++++++++------
 tensorflow/python/framework/func_graph.py     |  7 ++++
 tensorflow/python/framework/test_util.py      |  3 +-
 .../python/keras/engine/training_utils.py     |  3 +-
 .../saved_model/function_deserialization.py   |  3 +-
 7 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/autograph/impl/api.py b/tensorflow/python/autograph/impl/api.py
index a98c1dfe9a..b1c16b1169 100644
--- a/tensorflow/python/autograph/impl/api.py
+++ b/tensorflow/python/autograph/impl/api.py
@@ -183,6 +183,12 @@ def converted_call(f, owner, options, *args, **kwargs):
   # In particular, we may want to avoid renaming functions altogether.
   if not options.force_conversion and conversion.is_whitelisted_for_graph(f):
 
+    # TODO(mdan): This may be inconsistent in certain situations.
+    # If the function had already been annotated with @tf.function, it
+    # may be bound to the incorrect object. It's unclear if those situations
+    # are possible, but if they happen, we need to check if f is bound
+    # to a shim like WeakrefSelf and unpack it.
+
     # Args typically include `self`, as required by the conversion process.
     # When conversion is skipped, `self` is not necessary, because the
     # original bound method is being executed. This code removes it.
diff --git a/tensorflow/python/eager/function_gradients_test.py b/tensorflow/python/eager/function_gradients_test.py
index 98dec0b361..7cf77570e5 100644
--- a/tensorflow/python/eager/function_gradients_test.py
+++ b/tensorflow/python/eager/function_gradients_test.py
@@ -226,7 +226,8 @@ class FunctionGradientsTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(g, 1.0)
 
   def testGradient(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     def sq(x):
       return matmul(x, x, transpose_a=True)
@@ -696,7 +697,8 @@ class FunctionGradientsTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(g2, 2.0)
 
   def testGradientWithKeywordArguments(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     def sq(x):
       return matmul(a=x, b=x, transpose_a=True)
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index c7959441d8..55a9cc4e92 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -86,7 +86,8 @@ class DefunnedMiniModel(MiniModel):
 class FunctionTest(test.TestCase, parameterized.TestCase):
 
   def testBasic(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
     t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
     sq = matmul(t, t, transpose_a=True)
     sq2 = matmul(sq, t, transpose_a=True)
@@ -123,7 +124,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(add_2._name, 'add_2')
 
   def testBasicGraphMode(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     @def_function.function
     def sq(a):
@@ -134,7 +136,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
 
   def testNestedInputsGraphMode(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     pair = collections.namedtuple('pair', ['a', 'b'])
 
@@ -148,7 +151,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
 
   def testNestedOutputsGraphMode(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     pair = collections.namedtuple('pair', ['a', 'b'])
 
@@ -177,7 +181,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.assertEqual(f().shape, ())
 
   def testBasicGraphFunction(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     @def_function.function
     def sq(a):
@@ -191,7 +196,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
 
   def testInputSpecGraphFunction(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     @def_function.function
     def sq(a):
@@ -210,7 +216,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(out2, math_ops.matmul(t2, t2).numpy())
 
   def testNestedInputSpecGraphFunction(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     @def_function.function
     def sq(mats):
@@ -304,7 +311,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(f(), x)
 
   def testNestedInputsGraphFunction(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     pair = collections.namedtuple('pair', ['a', 'b'])
 
@@ -321,7 +329,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(out, math_ops.matmul(t, t).numpy())
 
   def testNestedOutputGraphFunction(self):
-    matmul = def_function.function(math_ops.matmul)
+    # TODO(b/121134877): Remove the autograph override.
+    matmul = def_function.function(math_ops.matmul, autograph=False)
 
     @def_function.function
     def sq(a):
@@ -726,7 +735,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.skipTest('No GPUs found')
 
     x = constant_op.constant([1.]).gpu()
-    f = def_function.function(math_ops.add)
+    # TODO(b/121134877): Remove the autograph override.
+    f = def_function.function(math_ops.add, autograph=False)
     y = f(x, x).cpu()
     self.assertAllEqual(y, [2.])
 
@@ -795,7 +805,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.skipTest('No GPUs found')
 
     # The Reshape op requires the shape tensor to be placed in host memory.
-    reshape = def_function.function(array_ops.reshape)
+    # TODO(b/121134877): Remove the autograph override.
+    reshape = def_function.function(array_ops.reshape, autograph=False)
     value = constant_op.constant([1., 2.]).gpu()
     shape = constant_op.constant([2, 1])
     reshaped = reshape(value, shape).cpu()
@@ -806,7 +817,8 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.skipTest('No GPUs found')
 
     # The Reshape op requires the shape tensor to be placed in host memory.
-    reshape = def_function.function(array_ops.reshape)
+    # TODO(b/121134877): Remove the autograph override.
+    reshape = def_function.function(array_ops.reshape, autograph=False)
     value = constant_op.constant([1., 2.])
     shape = constant_op.constant([2, 1]).gpu()
     reshape(value, shape)  # No error is raised
@@ -865,7 +877,9 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.assertEqual(1, int(self.evaluate(read())))
 
   def testSequenceInputs(self):
-    clip_by_global_norm = def_function.function(clip_ops.clip_by_global_norm)
+    # TODO(b/121134877): Remove the autograph override.
+    clip_by_global_norm = def_function.function(
+        clip_ops.clip_by_global_norm, autograph=False)
     t_list = [constant_op.constant(1.0), constant_op.constant(2.0)]
     clipped_list, global_norm = clip_by_global_norm(t_list,
                                                     constant_op.constant(.2))
diff --git a/tensorflow/python/framework/func_graph.py b/tensorflow/python/framework/func_graph.py
index f8be5e9edf..9528a24b46 100644
--- a/tensorflow/python/framework/func_graph.py
+++ b/tensorflow/python/framework/func_graph.py
@@ -432,6 +432,12 @@ def func_graph_from_py_func(name,
         _, original_func = tf_decorator.unwrap(python_func)
 
         def wrapper(*args, **kwargs):
+          # Note: functions annotated with @tf.function should always be
+          # converted even though they would meet autograph's whitelisting
+          # criteria.
+          # If this assumption is ever broken, converted_call will need to
+          # handle the possibility of original_func still being a shim, e.g.
+          # bound to WeakrefSelf.
           return autograph.converted_call(
               original_func, None,
               autograph.ConversionOptions(
@@ -439,6 +445,7 @@ def func_graph_from_py_func(name,
                   recursive=True,
                   strip_decorators=(def_function.function,),
                   optional_features=(),
+                  force_conversion=True,
               ), *args, **kwargs)
 
         # Wrapping around a decorator allows checks like tf_inspect.getargspec
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index ffab93c84e..73713cb10c 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1028,7 +1028,8 @@ def also_run_as_tf_function(f):
       # Running in eager mode
       bound_f()
       # Running as TF function
-      def_function.function(bound_f)()
+      # TODO(b/121143941): Remove the autograph override.
+      def_function.function(bound_f, autograph=False)()
 
   return decorated
 
diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py
index 64c6f727c9..8196376d18 100644
--- a/tensorflow/python/keras/engine/training_utils.py
+++ b/tensorflow/python/keras/engine/training_utils.py
@@ -1229,7 +1229,8 @@ def trace_model_call(model, input_signature=None):
     # all tensor inputs must be passed in as the first argument.
     input_signature = [input_specs] if len(input_specs) > 1 else input_specs
 
-  @def_function.function(input_signature=input_signature)
+  # TODO(mdan): Should the model's call be autographed by default?
+  @def_function.function(input_signature=input_signature, autograph=False)
   def _wrapped_model(*args):
     """A concrete tf.function that wraps the model's call function."""
     # When given a single input, Keras models will call the model on the tensor
diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
index 51e23574ca..b121af62bd 100644
--- a/tensorflow/python/saved_model/function_deserialization.py
+++ b/tensorflow/python/saved_model/function_deserialization.py
@@ -49,7 +49,8 @@ def recreate_polymorphic_function(
   # instead of creating a new PolymorphicFunction backed by a Python layer to
   # glue things together. Current approach is nesting functions deeper for each
   # serialization cycle.
-  @def_function.function
+  # TODO(mdan): We may enable autograph once exceptions are supported.
+  @def_function.function(autograph=False)
   def restored_function(*args):
     """Calls a restored function."""
     # TODO(allenl): Functions saved with input_signatures should revive with
-- 
GitLab


From 7dfd1fb42b05b567699a76ea96001aea39e0f9f6 Mon Sep 17 00:00:00 2001
From: Michael Kuperstein <mkuper@google.com>
Date: Mon, 17 Dec 2018 12:52:05 -0800
Subject: [PATCH 679/873] [XLA:CPU] EmitBufferPointer should cast constants to
 the right type.

PiperOrigin-RevId: 225873601
---
 .../compiler/xla/service/cpu/ir_emitter.cc    |  4 ++-
 tensorflow/compiler/xla/tests/BUILD           |  1 +
 .../compiler/xla/tests/constants_test.cc      | 30 +++++++++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 62a4e8d350..2c3e5d04cb 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -2862,7 +2862,9 @@ llvm::Value* IrEmitter::EmitBufferPointer(const BufferAllocation::Slice& slice,
   if (slice.allocation()->is_thread_local()) {
     return EmitThreadLocalBufferPointer(slice, target_shape);
   } else if (slice.allocation()->is_constant()) {
-    return FindOrDie(constant_buffer_to_global_, slice.allocation()->index());
+    return BitCast(
+        FindOrDie(constant_buffer_to_global_, slice.allocation()->index()),
+        IrShapeType(target_shape)->getPointerTo());
   } else {
     return EmitGlobalBufferPointer(slice, target_shape);
   }
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index 0300b64ed5..face72a066 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -842,6 +842,7 @@ xla_test(
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client:xla_computation",
         "//tensorflow/compiler/xla/tests:client_library_test_base",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
         "//tensorflow/compiler/xla/tests:literal_test_util",
         "//tensorflow/compiler/xla/tests:xla_internal_test_main",
         "//tensorflow/core:lib",
diff --git a/tensorflow/compiler/xla/tests/constants_test.cc b/tensorflow/compiler/xla/tests/constants_test.cc
index 72ff1e74a4..9174f2651c 100644
--- a/tensorflow/compiler/xla/tests/constants_test.cc
+++ b/tensorflow/compiler/xla/tests/constants_test.cc
@@ -25,7 +25,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/tests/client_library_test_base.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
 #include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_macros.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
 #include "tensorflow/core/platform/test.h"
 #include "tensorflow/core/platform/types.h"
@@ -178,5 +180,33 @@ TEST_F(ConstantsTest, Token) {
   TF_ASSERT_OK(Execute(&builder, {}).status());
 }
 
+class ConstantsHloTest : public HloTestBase {};
+
+// TODO(b/121147351): Fails on GPU. Not clear if this is expected behavior.
+XLA_TEST_F(ConstantsHloTest, DISABLED_ON_GPU(BitcastOfConstant)) {
+  const char* testcase = R"(
+    HloModule module, is_scheduled=true
+
+    func {
+      lhs = s32[] parameter(0)
+      rhs = s32[] parameter(1)
+      ROOT mul = s32[] add(lhs, rhs)
+    }
+
+    ENTRY test {
+      constant.0 = s32[1]{0} constant({0})
+      parameter.0 = s32[] parameter(0)
+      constant-as-scalar = s32[] bitcast(constant.0)
+      ROOT result = s32[] call(parameter.0, constant-as-scalar), to_apply=func
+    }
+  )";
+  auto module =
+      HloRunner::CreateModuleFromString(testcase, GetDebugOptionsForTest())
+          .ValueOrDie();
+  auto param = LiteralUtil::CreateR0<int32>(1);
+  auto result = ExecuteNoHloPasses(std::move(module), {&param});
+  EXPECT_TRUE(LiteralTestUtil::Equal(param, result));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From ca08819d37e71a27285f6379f2e9ca96340af5d1 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 13:02:07 -0800
Subject: [PATCH 680/873] Fix conditions in pip_package/BUILD file for windows.
 Multiple conditions don't work together.

PiperOrigin-RevId: 225875050
---
 tensorflow/tools/pip_package/BUILD | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 4ed2f6ce34..2de00ea957 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -242,16 +242,17 @@ sh_binary(
     name = "build_pip_package",
     srcs = ["build_pip_package.sh"],
     data = select({
-        "//tensorflow:windows": [
-            ":simple_console_for_windows",
-        ],
-        "api_version_2": COMMON_PIP_DEPS + [
-            ":simple_console",
-        ],
-        "//conditions:default": COMMON_PIP_DEPS_V1 + [
-            ":simple_console",
-        ],
-    }) + if_mkl_ml(["//third_party/mkl:intel_binary_blob"]),
+               "api_version_2": COMMON_PIP_DEPS,
+               "//conditions:default": COMMON_PIP_DEPS_V1,
+           }) +
+           select({
+               "//tensorflow:windows": [
+                   ":simple_console_for_windows",
+               ],
+               "//conditions:default": [
+                   ":simple_console",
+               ],
+           }) + if_mkl_ml(["//third_party/mkl:intel_binary_blob"]),
 )
 
 # A genrule for generating a marker file for the pip package on Windows
-- 
GitLab


From ab725544b0a87b4785fb9d7505ec267488104838 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 17 Dec 2018 13:02:53 -0800
Subject: [PATCH 681/873] Faster is_layer check

PiperOrigin-RevId: 225875278
---
 tensorflow/python/keras/engine/base_layer.py             | 6 ++++++
 tensorflow/python/training/checkpointable/layer_utils.py | 4 +---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index aeed750652..80ae99475e 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -1644,6 +1644,12 @@ class Layer(checkpointable.CheckpointableBase):
           getattr(layer, attribute) for layer in self._layers))
     return []
 
+  # This is a hack so that the is_layer (within
+  # training/checkpointable/layer_utils.py) check doesn't get the weights attr.
+  # TODO(b/110718070): Remove when fixed.
+  def _is_layer(self):
+    return True
+
 
 class Node(object):
   """A `Node` describes the connectivity between two layers.
diff --git a/tensorflow/python/training/checkpointable/layer_utils.py b/tensorflow/python/training/checkpointable/layer_utils.py
index ec764bca89..9d45c4883e 100644
--- a/tensorflow/python/training/checkpointable/layer_utils.py
+++ b/tensorflow/python/training/checkpointable/layer_utils.py
@@ -25,9 +25,7 @@ from __future__ import print_function
 def is_layer(obj):
   """Implicit check for Layer-like objects."""
   # TODO(b/110718070): Replace with isinstance(obj, base_layer.Layer).
-  return (hasattr(obj, "call")
-          and hasattr(obj, "build")
-          and hasattr(obj, "variables"))
+  return hasattr(obj, "_is_layer")
 
 
 def has_weights(obj):
-- 
GitLab


From 951d1854fada4653901779ba0d309f4ff1a8232d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 13:03:29 -0800
Subject: [PATCH 682/873] Append event tags to profile summaries.

PiperOrigin-RevId: 225875421
---
 tensorflow/lite/profiling/profile_buffer.h      |  5 +++++
 tensorflow/lite/profiling/profile_summarizer.cc | 10 ++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/tensorflow/lite/profiling/profile_buffer.h b/tensorflow/lite/profiling/profile_buffer.h
index 247ebb37c5..9aa9e41131 100644
--- a/tensorflow/lite/profiling/profile_buffer.h
+++ b/tensorflow/lite/profiling/profile_buffer.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <cstddef>
 #include <cstdint>
+#include <cstdio>
 
 #include "tensorflow/lite/profiling/time.h"
 
@@ -78,6 +79,9 @@ class ProfileBuffer {
     }
     uint64_t timestamp = time::NowMicros();
     int index = current_index_ % event_buffer_.size();
+    if (current_index_ != 0 && index == 0) {
+      fprintf(stderr, "Warning: ProfileBuffer wrapping.\n");
+    }
     event_buffer_[index].tag = tag;
     event_buffer_[index].event_type = event_type;
     event_buffer_[index].event_metadata = event_metadata;
@@ -101,6 +105,7 @@ class ProfileBuffer {
     const uint32_t max_size = event_buffer_.size();
     if (current_index_ > (max_size + event_handle)) {
       // Ignore, buffer has already overflowed.
+      fprintf(stderr, "Warning: Dropping ProfileBuffer event.\n");
       return;
     }
 
diff --git a/tensorflow/lite/profiling/profile_summarizer.cc b/tensorflow/lite/profiling/profile_summarizer.cc
index 64b1bd7ad7..aaf35d64c4 100644
--- a/tensorflow/lite/profiling/profile_summarizer.cc
+++ b/tensorflow/lite/profiling/profile_summarizer.cc
@@ -123,14 +123,20 @@ void ProfileSummarizer::ProcessProfiles(
   int64_t base_start_us = events[0]->begin_timestamp_us;
   int node_num = 0;
   int64_t curr_total_us = 0;
+  auto tag_string = [](const string& s, const string& t) {
+    return t == "OpInvoke" ? s : s + "/" + t;
+  };
   for (auto event : events) {
     auto op_details = GetOperatorDetails(interpreter, event->event_metadata);
     auto node_name = ToString(op_details.outputs);
     int64_t start_us = event->begin_timestamp_us - base_start_us;
     int64_t node_exec_time =
         event->end_timestamp_us - event->begin_timestamp_us;
-    stats_calculator_->AddNodeStats(node_name, op_details.name, node_num,
-                                    start_us, node_exec_time, 0 /*memory */);
+    stats_calculator_->AddNodeStats(tag_string(node_name, event->tag),
+                                    tag_string(op_details.name, event->tag),
+                                    node_num, start_us, node_exec_time,
+                                    0 /*memory */);
+
     curr_total_us += node_exec_time;
     ++node_num;
   }
-- 
GitLab


From ec3f67d9673e77a0b7078f2a988cd09141c0a741 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 17 Dec 2018 13:05:01 -0800
Subject: [PATCH 683/873] [XLA:Python] Add support for registering CPU custom
 call targets from Cython. Add a test case for calling a Cython target via a
 CPU custom call. Fix a couple of Python 3 compatibility bugs in
 xla_client.py.

[TF] Add testonly attribute to pyx_library() rule. Prefix python header path with @org_tensorflow so the rule works when TF is a submodule.

PiperOrigin-RevId: 225875684
---
 tensorflow/compiler/xla/python/BUILD          | 10 +++++++++
 .../xla/python/custom_call_for_test.pyx       | 21 +++++++++++++++++++
 .../xla/python/local_computation_builder.cc   | 15 +++++++++++++
 .../xla/python/local_computation_builder.h    |  7 +++++++
 .../xla/python/local_computation_builder.i    |  1 +
 tensorflow/compiler/xla/python/xla_client.py  | 14 +++++++++++--
 .../compiler/xla/python/xla_client_test.py    | 15 +++++++++++++
 .../core/platform/default/build_config.bzl    |  6 +++++-
 8 files changed, 86 insertions(+), 3 deletions(-)
 create mode 100644 tensorflow/compiler/xla/python/custom_call_for_test.pyx

diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index 4a57b1051e..ddffafa901 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -4,6 +4,7 @@ package(default_visibility = ["//tensorflow:internal"])
 
 load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
 load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured")
+load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library")
 
 py_library(
     name = "xla_client",
@@ -17,6 +18,12 @@ py_library(
     ],
 )
 
+pyx_library(
+    name = "custom_call_for_test",
+    testonly = True,
+    srcs = ["custom_call_for_test.pyx"],
+)
+
 py_test(
     name = "xla_client_test",
     srcs = ["xla_client_test.py"],
@@ -24,6 +31,7 @@ py_test(
     srcs_version = "PY2AND3",
     tags = ["no_oss"],
     deps = [
+        ":custom_call_for_test",
         ":xla_client",
         "//tensorflow/python:platform_test",
     ],
@@ -72,10 +80,12 @@ cc_library(
         "//tensorflow/compiler/xla/client/lib:triangular_solve",
         "//tensorflow/compiler/xla/service:platform_util",
         "//tensorflow/compiler/xla/service:shaped_buffer",
+        "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry",
         "//tensorflow/compiler/xrt:xrt_proto",
         "//tensorflow/compiler/xrt/cc:xrt_ops",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//third_party/python_runtime:headers",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:span",
     ],
diff --git a/tensorflow/compiler/xla/python/custom_call_for_test.pyx b/tensorflow/compiler/xla/python/custom_call_for_test.pyx
new file mode 100644
index 0000000000..530dffd175
--- /dev/null
+++ b/tensorflow/compiler/xla/python/custom_call_for_test.pyx
@@ -0,0 +1,21 @@
+# distutils: language = c++
+
+# Test case for defining a XLA custom call target in Cython, and registering
+# it via the xla_client SWIG API.
+
+from cpython.pycapsule cimport PyCapsule_New
+
+cdef void test_subtract_f32(void* out_ptr, void** data_ptr) nogil:
+  cdef float a = (<float*>(data_ptr[0]))[0]
+  cdef float b = (<float*>(data_ptr[1]))[0]
+  cdef float* out = <float*>(out_ptr)
+  out[0] = a - b
+
+
+cpu_custom_call_targets = {}
+
+cdef register_custom_call_target(fn_name, void* fn):
+  cdef const char* name = "xla._CPU_CUSTOM_CALL_TARGET"
+  cpu_custom_call_targets[fn_name] = PyCapsule_New(fn, name, NULL)
+
+register_custom_call_target(b"test_subtract_f32", <void*>(test_subtract_f32))
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index 5d191f5a18..20e9a14722 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -33,6 +33,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/executable_run_options.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
+#include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h"
 #include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/util.h"
@@ -115,6 +116,20 @@ LocalClient* GetOrCreateLocalClient() {
   return g_local_client;
 }
 
+Status RegisterCpuCustomCallTarget(const string& fn_name, PyObject* capsule) {
+  const char* name = "xla._CPU_CUSTOM_CALL_TARGET";
+  if (!PyCapsule_IsValid(capsule, name)) {
+    return InvalidArgument(
+        "Argument to RegisterCpuCustomCallTargetRegistry was not a "
+        "xla._CPU_CUSTOM_CALL_TARGET capsule.");
+  }
+  void* fn_ptr = PyCapsule_GetPointer(capsule, name);
+  CHECK(fn_ptr != nullptr);
+  cpu::CustomCallTargetRegistry::Global()->Register(
+      std::string(fn_name.begin(), fn_name.end()), fn_ptr);
+  return Status::OK();
+}
+
 Status TransferToInfeedLocal(const Literal& literal) {
   VLOG(1) << "Infeeding literal without replica number; shape: "
           << literal.shape();
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h
index c6e58ac971..5e83415921 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.h
+++ b/tensorflow/compiler/xla/python/local_computation_builder.h
@@ -19,6 +19,8 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include <Python.h>
+
 #include "absl/types/span.h"
 #include "tensorflow/cc/framework/ops.h"
 #include "tensorflow/cc/framework/scope.h"
@@ -49,6 +51,11 @@ Status InitializePlatformName(const string& platform_name);
 // local XLA service has been instantiated yet or not.
 int GetReplicaCount();
 
+// Registers a 'fn_capsule' as a CPU custom call target.
+// 'fn_capsule' is a void* pointer encapsulated in a PyCapsule object, with name
+// "xla._CPU_CUSTOM_CALL_TARGET".
+Status RegisterCpuCustomCallTarget(const string& name, PyObject* fn_capsule);
+
 // Wraps the local client's infeed-transfer function.
 //
 // The default device ordinal (0) is used.
diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i
index 11fb00e616..bf5d667c6a 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.i
+++ b/tensorflow/compiler/xla/python/local_computation_builder.i
@@ -1010,6 +1010,7 @@ tensorflow::ImportNumpy();
 %unignore xla::swig::InitializeReplicaCount;
 %unignore xla::swig::InitializePlatformName;
 %unignore xla::swig::GetReplicaCount;
+%unignore xla::swig::RegisterCpuCustomCallTarget;
 %unignore xla::swig::TransferToInfeedLocal;
 %unignore xla::swig::TransferToInfeedLocalReplica;
 %unignore xla::swig::TransferFromOutfeedLocalReplica;
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index 4166fa0327..378bbdcb17 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -415,7 +415,7 @@ class Shape(object):
       assert mtm is None, self
     if mtm is not None:
       assert self.rank() == len(mtm), self
-      assert sorted(mtm) == range(len(mtm)), self
+      assert sorted(mtm) == list(range(len(mtm))), self
 
   def update_minor_to_major(self, minor_to_major):
     if not self.is_array():
@@ -1149,7 +1149,7 @@ class ComputationBuilder(object):
     Returns:
       A LocalOp representing the added custom call op.
     """
-    opaque = opaque or ''
+    opaque = opaque or b''
     return self._client.CustomCall(call_target_name, operands,
                                    shape_with_layout,
                                    operand_shapes_with_layout, opaque)
@@ -1552,6 +1552,16 @@ def get_replica_count():
   return c_api.GetReplicaCount()
 
 
+def register_cpu_custom_call_target(name, fn):
+  """Registers a CPU custom call target.
+
+  Args:
+    name: bytes containing the name of the function.
+    fn: a PyCapsule object containing the function pointer.
+  """
+  c_api.RegisterCpuCustomCallTarget(name, fn)
+
+
 def GetPaddingConfigFromTriples(triples):
   """Create PaddingConfig proto from list of triples of integers."""
   padding_config = xla_data_pb2.PaddingConfig()
diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py
index 95c6dc8c45..002a20e60a 100644
--- a/tensorflow/compiler/xla/python/xla_client_test.py
+++ b/tensorflow/compiler/xla/python/xla_client_test.py
@@ -24,6 +24,7 @@ import threading
 
 import numpy as np
 
+from tensorflow.compiler.xla.python import custom_call_for_test
 from tensorflow.compiler.xla.python import xla_client
 import unittest
 
@@ -282,6 +283,20 @@ class ComputationsWithConstantsTest(LocalComputationTest):
         c.Constant(NumpyArrayF64([100, -100, 200, -200])))
     self._ExecuteAndCompareClose(c, expected=[104.4, -93.4, 208.8, -189])
 
+  def testCustomCall(self):
+    c = self._NewComputation()
+    for name, fn in custom_call_for_test.cpu_custom_call_targets.items():
+      xla_client.register_cpu_custom_call_target(name, fn)
+    c.CustomCall(
+        b"test_subtract_f32",
+        operands=(c.ConstantF32Scalar(1.25), c.ConstantF32Scalar(0.5)),
+        shape_with_layout=xla_client.Shape.array_shape(np.float32, (), ()),
+        operand_shapes_with_layout=(
+            xla_client.Shape.array_shape(np.float32, (), ()),
+            xla_client.Shape.array_shape(np.float32, (), ()),
+        ))
+    self._ExecuteAndCompareClose(c, expected=0.75)
+
 
 class ParametersTest(LocalComputationTest):
   """Tests focusing on Parameter ops and argument-passing."""
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index 3a4415f229..769e289025 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -34,6 +34,7 @@ def pyx_library(
         deps = [],
         py_deps = [],
         srcs = [],
+        testonly = None,
         **kwargs):
     """Compiles a group of .pyx / .pxd / .py files.
 
@@ -75,6 +76,7 @@ def pyx_library(
             # Optionally use PYTHON_BIN_PATH on Linux platforms so that python 3
             # works. Windows has issues with cython_binary so skip PYTHON_BIN_PATH.
             cmd = "PYTHONHASHSEED=0 $(location @cython//:cython_binary) --cplus $(SRCS) --output-file $(OUTS)",
+            testonly = testonly,
             tools = ["@cython//:cython_binary"] + pxd_srcs,
         )
 
@@ -85,8 +87,9 @@ def pyx_library(
         native.cc_binary(
             name = shared_object_name,
             srcs = [stem + ".cpp"],
-            deps = deps + ["//third_party/python_runtime:headers"],
+            deps = deps + ["@org_tensorflow//third_party/python_runtime:headers"],
             linkshared = 1,
+            testonly = testonly,
         )
         shared_objects.append(shared_object_name)
 
@@ -97,6 +100,7 @@ def pyx_library(
         deps = py_deps,
         srcs_version = "PY2AND3",
         data = shared_objects,
+        testonly = testonly,
         **kwargs
     )
 
-- 
GitLab


From 66f83978fed9a9292a5e4524206328734d006ac7 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Mon, 17 Dec 2018 13:05:17 -0800
Subject: [PATCH 684/873] Raising an error when different transformations in a
 dataset pipeline are in different graphs. This is in preparation for the next
 TF release where this warning would become an error.

PiperOrigin-RevId: 225875718
---
 .../python/data/kernel_tests/dataset_test.py  | 31 +++++++++++++
 tensorflow/python/data/ops/dataset_ops.py     | 46 ++++++++++++++++++-
 .../data/ops/multi_device_iterator_ops.py     |  1 +
 .../golden/v2/tensorflow.data.-dataset.pbtxt  |  1 +
 4 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/dataset_test.py b/tensorflow/python/data/kernel_tests/dataset_test.py
index 820bc8e4e2..3926be9550 100644
--- a/tensorflow/python/data/kernel_tests/dataset_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_test.py
@@ -30,10 +30,12 @@ from tensorflow.python.data.util import nest
 from tensorflow.python.data.util import structure
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -266,6 +268,35 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
           round_trip_dataset, [self.evaluate(tf_value_fn())],
           requires_initialization=True)
 
+  @test_util.run_deprecated_v1
+  def testSkipEagerSameGraphErrorOneShot(self):
+    dataset = dataset_ops.Dataset.range(10)
+    with ops.Graph().as_default():
+      dataset = dataset.batch(2)
+      with test.mock.patch.object(logging, "warning") as mock_log:
+        _ = dataset.make_one_shot_iterator()
+        self.assertRegexpMatches(
+            str(mock_log.call_args), "Please ensure that all datasets in the "
+            "pipeline are created in the same graph as the iterator.")
+
+  @test_util.run_deprecated_v1
+  def testSkipEagerSameGraphErrorOneShotSimple(self):
+    dataset = dataset_ops.Dataset.range(10)
+    with ops.Graph().as_default():
+      with test.mock.patch.object(logging, "warning") as mock_log:
+        _ = dataset.make_one_shot_iterator()
+        self.assertRegexpMatches(
+            str(mock_log.call_args), "Please ensure that all datasets in the "
+            "pipeline are created in the same graph as the iterator.")
+
+  @test_util.run_deprecated_v1
+  def testSkipEagerSameGraphErrorInitializable(self):
+    dataset = dataset_ops.Dataset.range(10)
+    with ops.Graph().as_default():
+      dataset = dataset.batch(2)
+      with self.assertRaisesRegexp(ValueError, "must be from the same graph"):
+        _ = dataset.make_initializable_iterator()
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 904c5b4b64..2c1f69de60 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -24,6 +24,8 @@ import warnings
 
 import numpy as np
 import six
+from six.moves import queue as Queue  # pylint: disable=redefined-builtin
+
 
 from tensorflow.python.compat import compat
 from tensorflow.python.data.experimental.ops import filter_for_shard_ops
@@ -54,6 +56,7 @@ from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import string_ops
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import function_utils
 from tensorflow.python.util.tf_export import tf_export
@@ -72,6 +75,9 @@ class DatasetV2(object):
   plan" of transformations that act on those elements.
   """
 
+  def __init__(self):
+    self._graph_attr = ops.get_default_graph()
+
   def _as_serialized_graph(self):
     """Produces serialized graph representation of the dataset.
 
@@ -96,6 +102,14 @@ class DatasetV2(object):
 
     raise NotImplementedError("Dataset._inputs")
 
+  @property
+  def _graph(self):
+    return self._graph_attr
+
+  @_graph.setter
+  def _graph(self, _):
+    raise ValueError("The _graph property is read-only")
+
   def _has_captured_ref(self):
     """Whether this dataset uses a function that captures ref variables.
 
@@ -1300,8 +1314,8 @@ class DatasetV1(DatasetV2):
   plan" of transformations that act on those elements.
   """
 
-  def __init__(self):
-    pass
+  def __init__(self):  # pylint: disable=useless-super-delegation
+    super(DatasetV1, self).__init__()
 
   @deprecation.deprecated(
       None, "Use `for ... in dataset:` to iterate over a dataset. If using "
@@ -1320,6 +1334,7 @@ class DatasetV1(DatasetV2):
     if context.executing_eagerly():
       return iterator_ops.EagerIterator(self)
 
+    _ensure_same_dataset_graph(self)
     graph_level_seed, op_level_seed = core_random_seed.get_seed(None)
 
     # NOTE(mrry): We capture by value here to ensure that `_make_dataset()` is
@@ -1389,6 +1404,7 @@ class DatasetV1(DatasetV2):
       raise RuntimeError(
           "dataset.make_initializable_iterator is not supported when eager "
           "execution is enabled.")
+    _ensure_same_dataset_graph(self)
     dataset = self._apply_options()
     if shared_name is None:
       shared_name = ""
@@ -1628,6 +1644,32 @@ class DatasetV1Adapter(DatasetV1):
     return iter(self._dataset)
 
 
+def _ensure_same_dataset_graph(dataset):
+  """Walks the dataset graph to ensure all datasets come from the same graph."""
+  current_graph = ops.get_default_graph()
+  bfs_q = Queue.Queue()
+  bfs_q.put(dataset)  # pylint: disable=protected-access
+  visited = []
+  while not bfs_q.empty():
+    ds = bfs_q.get()
+    visited.append(ds)
+    ds_graph = ds._graph  # pylint: disable=protected-access
+    if current_graph != ds_graph:
+      logging.warning("The graph (" + str(current_graph) + ") of the iterator "
+                      "is different from the graph (" + str(ds_graph) + ") "
+                      "the dataset: " + str(ds) + " was created in. "
+                      "If you are using the Estimator API, make sure that no "
+                      "part of the dataset returned by the `input_fn` function "
+                      "is defined outside the `input_fn` function."
+                      "Please ensure that all datasets in the pipeline are "
+                      "created in the same graph as the iterator. NOTE: This "
+                      "warning will become an error in future versions of "
+                      "TensorFlow.")
+    for input_ds in ds._inputs():  # pylint: disable=protected-access
+      if input_ds not in visited:
+        bfs_q.put(input_ds)
+
+
 @tf_export(v1=["data.make_one_shot_iterator"])
 def make_one_shot_iterator(dataset):
   """Creates a `tf.data.Iterator` for enumerating the elements of a dataset.
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 45d0156479..876b77b853 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -35,6 +35,7 @@ class _PerDeviceGenerator(dataset_ops.Dataset):
 
   def __init__(self, shard_num, multi_device_iterator_resource, incarnation_id,
                source_device, target_device, element_structure):
+    super(_PerDeviceGenerator, self).__init__()
     self._target_device = target_device
     self._structure = element_structure
 
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
index d877339409..3ecac329aa 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
@@ -16,6 +16,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
+    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "apply"
-- 
GitLab


From e298b2b0b0313ec67ce439145b007a084720369e Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Mon, 17 Dec 2018 13:05:24 -0800
Subject: [PATCH 685/873] [tf.data] Eager coverage for coordinated sloppy
 tests.

PiperOrigin-RevId: 225875741
---
 .../data/kernel_tests/interleave_test.py      | 79 ++++++++++---------
 .../python/data/kernel_tests/map_test.py      | 70 ++++++++--------
 2 files changed, 74 insertions(+), 75 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/interleave_test.py b/tensorflow/python/data/kernel_tests/interleave_test.py
index 05a211afcc..4fb61b2daf 100644
--- a/tensorflow/python/data/kernel_tests/interleave_test.py
+++ b/tensorflow/python/data/kernel_tests/interleave_test.py
@@ -22,7 +22,7 @@ import threading
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.experimental.ops import threading_options
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import errors
@@ -116,9 +116,7 @@ def _make_coordinated_sloppy_dataset(input_values, cycle_length, block_length,
   dataset = dataset_ops.Dataset.from_tensor_slices(input_values).repeat(
       2).interleave(interleave_fn, cycle_length, block_length,
                     num_parallel_calls).with_options(options)
-  iterator = dataset_ops.make_one_shot_iterator(dataset)
-  get_next = iterator.get_next()
-  return get_next, coordination_events
+  return dataset, coordination_events
 
 
 def _repeat(values, count):
@@ -264,22 +262,24 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("8", np.int64([4, 0, 6]), 2, 3, 1),
       ("9", np.int64([4, 0, 6]), 2, 3, 2),
   )
-  @test_util.run_v1_only("b/120545219")
-  def testSkipEagerSloppyInterleaveInOrder(self, input_values, cycle_length,
-                                           block_length, num_parallel_calls):
-    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+  def testSloppyInterleaveInOrder(self, input_values, cycle_length,
+                                  block_length, num_parallel_calls):
+    dataset, coordination_events = _make_coordinated_sloppy_dataset(
         input_values, cycle_length, block_length, num_parallel_calls)
-    config = config_pb2.ConfigProto(
-        inter_op_parallelism_threads=num_parallel_calls + 1,
-        use_per_session_threads=True)
-    with self.cached_session(config=config) as sess:
-      for expected_element in _interleave(
-          _repeat(input_values, 2), cycle_length, block_length):
-        coordination_events[expected_element].set()
-        self.assertEqual(expected_element * expected_element,
-                         self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    options = dataset_ops.Options()
+    options.experimental_threading = threading_options.ThreadingOptions()
+    options.experimental_threading.private_threadpool_size = (
+        num_parallel_calls + 1)
+    dataset = dataset.with_options(options)
+
+    get_next = self.getNext(dataset, requires_initialization=True)
+    for expected_element in _interleave(
+        _repeat(input_values, 2), cycle_length, block_length):
+      coordination_events[expected_element].set()
+      self.assertEqual(expected_element * expected_element,
+                       self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   @parameterized.named_parameters(
       ("1", np.int64([4, 5, 6]), 2, 1, 2),
@@ -287,27 +287,28 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("3", np.int64([4, 5, 6]), 3, 2, 3),
       ("4", np.int64([4, 0, 6]), 2, 3, 2),
   )
-  @test_util.run_v1_only("b/120545219")
-  def testSkipEagerSloppyInterleaveOutOfOrder(self, input_values, cycle_length,
-                                              block_length, num_parallel_calls):
-    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+  def testSloppyInterleaveOutOfOrder(self, input_values, cycle_length,
+                                     block_length, num_parallel_calls):
+    dataset, coordination_events = _make_coordinated_sloppy_dataset(
         input_values, cycle_length, block_length, num_parallel_calls)
-    config = config_pb2.ConfigProto(
-        inter_op_parallelism_threads=num_parallel_calls + 1,
-        use_per_session_threads=True)
-    with self.cached_session(config=config) as sess:
-      elements = [
-          x for x in _interleave(
-              _repeat(input_values, 2), cycle_length, block_length)
-      ]
-      for i in [1, 4, 7]:
-        elements[i], elements[i + 1] = elements[i + 1], elements[i]
-
-      for element in elements:
-        coordination_events[element].set()
-        self.assertEqual(element * element, self.evaluate(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    options = dataset_ops.Options()
+    options.experimental_threading = threading_options.ThreadingOptions()
+    options.experimental_threading.private_threadpool_size = (
+        num_parallel_calls + 1)
+    dataset = dataset.with_options(options)
+    get_next = self.getNext(dataset, requires_initialization=True)
+    elements = [
+        x for x in _interleave(
+            _repeat(input_values, 2), cycle_length, block_length)
+    ]
+    for i in [1, 4, 7]:
+      elements[i], elements[i + 1] = elements[i + 1], elements[i]
+
+    for element in elements:
+      coordination_events[element].set()
+      self.assertEqual(element * element, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/map_test.py b/tensorflow/python/data/kernel_tests/map_test.py
index 67ef98f9fe..e84391946f 100644
--- a/tensorflow/python/data/kernel_tests/map_test.py
+++ b/tensorflow/python/data/kernel_tests/map_test.py
@@ -25,7 +25,7 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.core.framework import attr_value_pb2
-from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.data.experimental.ops import threading_options
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
@@ -80,9 +80,7 @@ def _make_coordinated_sloppy_dataset(num_elements, num_parallel_calls):
   options.experimental_deterministic = False
   dataset = dataset_ops.Dataset.range(num_elements).map(
       map_fn, num_parallel_calls).with_options(options)
-  iterator = dataset_ops.make_one_shot_iterator(dataset)
-  next_element = iterator.get_next()
-  return next_element, coordination_events
+  return dataset, coordination_events
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -892,7 +890,6 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
     self.assertEqual(42, self.evaluate(get_next()))
 
-  # TODO(b/117581999): Add eager coverage.
   @parameterized.named_parameters(
       ("1", 1, 1),
       ("2", 10, 1),
@@ -901,45 +898,46 @@ class MapDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("5", 100, 10),
       ("6", 100, 100),
   )
-  @test_util.run_v1_only("b/120545219")
-  def testSkipEagerSloppyInterleaveInOrder(self, num_elements,
-                                           num_parallel_calls):
-    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+  def testSloppyInterleaveInOrder(self, num_elements, num_parallel_calls):
+    dataset, coordination_events = _make_coordinated_sloppy_dataset(
         num_elements, num_parallel_calls)
-    config = config_pb2.ConfigProto(
-        inter_op_parallelism_threads=num_parallel_calls + 1,
-        use_per_session_threads=True)
-    with self.cached_session(config=config) as sess:
-      for i in range(num_elements):
-        coordination_events[i].set()
-        self.assertEqual(i * i, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    options = dataset_ops.Options()
+    options.experimental_threading = threading_options.ThreadingOptions()
+    options.experimental_threading.private_threadpool_size = (
+        num_parallel_calls + 1)
+    dataset = dataset.with_options(options)
+    get_next = self.getNext(dataset, requires_initialization=True)
+    for i in range(num_elements):
+      coordination_events[i].set()
+      self.assertEqual(i * i, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
-  # TODO(b/117581999): Add eager coverage.
   @parameterized.named_parameters(
       ("1", 10, 10),
       ("2", 100, 10),
       ("3", 100, 100),
   )
-  @test_util.run_v1_only("b/120545219")
-  def testSkipEagerSloppyInterleaveOutOfOrder(self, num_elements,
-                                              num_parallel_calls):
-    get_next, coordination_events = _make_coordinated_sloppy_dataset(
+  def testSloppyInterleaveOutOfOrder(self, num_elements, num_parallel_calls):
+    dataset, coordination_events = _make_coordinated_sloppy_dataset(
         num_elements, num_parallel_calls)
-    config = config_pb2.ConfigProto(
-        inter_op_parallelism_threads=num_parallel_calls + 1,
-        use_per_session_threads=True)
-    with self.cached_session(config=config) as sess:
-      elements = [x for x in range(num_elements)]
-      for i in [1, 4, 7]:
-        elements[i], elements[i + 1] = elements[i + 1], elements[i]
-
-      for element in elements:
-        coordination_events[element].set()
-        self.assertEqual(element * element, sess.run(get_next))
-      with self.assertRaises(errors.OutOfRangeError):
-        sess.run(get_next)
+    options = dataset_ops.Options()
+    options.experimental_threading = threading_options.ThreadingOptions()
+    options.experimental_threading.private_threadpool_size = (
+        num_parallel_calls + 1)
+    dataset = dataset.with_options(options)
+
+    get_next = self.getNext(dataset, requires_initialization=True)
+
+    elements = [x for x in range(num_elements)]
+    for i in [1, 4, 7]:
+      elements[i], elements[i + 1] = elements[i + 1], elements[i]
+
+    for element in elements:
+      coordination_events[element].set()
+      self.assertEqual(element * element, self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
 
   @parameterized.named_parameters(
       ("Map", None),
-- 
GitLab


From e1d74e4478f71d79b4b9b91b4f4a2735011eecff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 13:08:15 -0800
Subject: [PATCH 686/873] Fix lint warning about long ternary expression.

PiperOrigin-RevId: 225876176
---
 .../gan/python/estimator/python/gan_estimator_impl.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
index 3593b501bb..adb7222821 100644
--- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
+++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py
@@ -233,13 +233,14 @@ def _get_estimator_spec(
       estimator_spec = _get_eval_estimator_spec(
           gan_model, gan_loss, get_eval_metric_ops_fn)
     else:  # model_fn_lib.ModeKeys.TRAIN:
-      gopt = (generator_optimizer() if callable(generator_optimizer) else
-              generator_optimizer)
-      dopt = (discriminator_optimizer() if callable(discriminator_optimizer)
-              else discriminator_optimizer)
+      if callable(generator_optimizer):
+        generator_optimizer = generator_optimizer()
+      if callable(discriminator_optimizer):
+        discriminator_optimizer = discriminator_optimizer()
       get_hooks_fn = get_hooks_fn or tfgan_train.get_sequential_train_hooks()
       estimator_spec = _get_train_estimator_spec(
-          gan_model, gan_loss, gopt, dopt, get_hooks_fn, is_chief=is_chief)
+          gan_model, gan_loss, generator_optimizer, discriminator_optimizer,
+          get_hooks_fn, is_chief=is_chief)
 
   return estimator_spec
 
-- 
GitLab


From d5b902c64885206c45f6c3ed0c891c444d6f2985 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 13:08:33 -0800
Subject: [PATCH 687/873] Change api under
 tensorflow_estimator/python/estimator/api/ to generate 2.0 API as well.

PiperOrigin-RevId: 225876218
---
 tensorflow/api_template.__init__.py    | 15 +++++++++++++--
 tensorflow/api_template_v1.__init__.py | 16 +++++++++++++---
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 4eba763129..ce5df0f9d4 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -19,6 +19,7 @@ from __future__ import division as _division
 from __future__ import print_function as _print_function
 
 import os as _os
+import sys as _sys
 
 # API IMPORTS PLACEHOLDER
 
@@ -26,13 +27,23 @@ import os as _os
 from tensorflow.python.tools import component_api_helper as _component_api_helper
 _component_api_helper.package_hook(
     parent_package_str=__name__,
-    child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
+    child_package_str=(
+        'tensorflow_estimator.python.estimator.api._v2.estimator'))
+
+_current_module = _sys.modules[__name__]
+if not hasattr(_current_module, 'estimator'):
+  _component_api_helper.package_hook(
+      parent_package_str=__name__,
+      child_package_str=(
+          'tensorflow_estimator.python.estimator.api.estimator'))
 
 # Make sure directory containing top level submodules is in
 # the __path__ so that "from tensorflow.foo import bar" works.
 # We're using bitwise, but there's nothing special about that.
 _tf_api_dir = _os.path.dirname(_os.path.dirname(bitwise.__file__))  # pylint: disable=undefined-variable
-if _tf_api_dir not in __path__:
+if not hasattr(_current_module, '__path__'):
+  __path__ = [_tf_api_dir]
+elif _tf_api_dir not in __path__:
   __path__.append(_tf_api_dir)
 
 # Enable TF2 behaviors
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 21b5277614..89bae4a4e9 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -19,6 +19,7 @@ from __future__ import division as _division
 from __future__ import print_function as _print_function
 
 import os as _os
+import sys as _sys
 
 # pylint: disable=g-bad-import-order
 from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
@@ -28,7 +29,15 @@ from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
 from tensorflow.python.tools import component_api_helper as _component_api_helper
 _component_api_helper.package_hook(
     parent_package_str=__name__,
-    child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
+    child_package_str=(
+        'tensorflow_estimator.python.estimator.api._v1.estimator'))
+
+_current_module = _sys.modules[__name__]
+if not hasattr(_current_module, 'estimator'):
+  _component_api_helper.package_hook(
+      parent_package_str=__name__,
+      child_package_str=(
+          'tensorflow_estimator.python.estimator.api.estimator'))
 
 from tensorflow.python.util.lazy_loader import LazyLoader  # pylint: disable=g-import-not-at-top
 contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
@@ -45,10 +54,11 @@ app.flags = flags  # pylint: disable=undefined-variable
 # Make sure directory containing top level submodules is in
 # the __path__ so that "from tensorflow.foo import bar" works.
 _tf_api_dir = _os.path.dirname(_os.path.dirname(app.__file__))  # pylint: disable=undefined-variable
-if _tf_api_dir not in __path__:
+if not hasattr(_current_module, '__path__'):
+  __path__ = [_tf_api_dir]
+elif _tf_api_dir not in __path__:
   __path__.append(_tf_api_dir)
 
-
 # These symbols appear because we import the python package which
 # in turn imports from tensorflow.core and tensorflow.python. They
 # must come from this module. So python adds these symbols for the
-- 
GitLab


From 02de6702dca9f2a9183ca14f66faeb25ad6fc1a1 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 13:11:17 -0800
Subject: [PATCH 688/873] Automated rollback of commit
 33cbbf4f68a68a99dd822c6a4999f0f254dbe1e2

PiperOrigin-RevId: 225876601
---
 tensorflow/python/ops/nn_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 841bac8bea..48dcab4842 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -398,7 +398,7 @@ def _count_nonzero(input_tensor, dtype=dtypes.int64):
   Returns:
       number of nonzero values with type dtype
   """
-  with ops.name_scope("count_nonzero", values=[input_tensor]):
+  with ops.name_scope("count_nonzero", [input_tensor]):
     zero = array_ops.zeros([], dtype=input_tensor.dtype)
     nonzero_count = math_ops.reduce_sum(
         math_ops.cast(
-- 
GitLab


From d8c9faf5f79aa947cfc0378063be914d436d442a Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 17 Dec 2018 13:24:50 -0800
Subject: [PATCH 689/873] Minor speedup of the sequential model run eagerly

PiperOrigin-RevId: 225878747
---
 tensorflow/python/keras/engine/sequential.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py
index 5a42afe847..8c388cbafc 100644
--- a/tensorflow/python/keras/engine/sequential.py
+++ b/tensorflow/python/keras/engine/sequential.py
@@ -103,6 +103,8 @@ class Sequential(Model):
     self._build_input_shape = None
     self._compute_output_and_mask_jointly = True
 
+    self._layer_call_argspecs = {}
+
     # Add to the model any layers passed to the constructor.
     if layers:
       for layer in layers:
@@ -192,6 +194,8 @@ class Sequential(Model):
     if self._layers:
       self._track_layers(self._layers)
 
+    self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call)
+
   @checkpointable.no_automatic_dependency_tracking
   def pop(self):
     """Removes the last layer in the model.
@@ -202,7 +206,8 @@ class Sequential(Model):
     if not self.layers:
       raise TypeError('There are no layers in the model.')
 
-    self._layers.pop()
+    layer = self._layers.pop()
+    self._layer_call_argspecs.pop(layer)
     if not self.layers:
       self.outputs = None
       self.inputs = None
@@ -240,9 +245,10 @@ class Sequential(Model):
     x = inputs
     for layer in self.layers:
       kwargs = {}
-      if 'mask' in tf_inspect.getfullargspec(layer.call).args:
+      argspec = self._layer_call_argspecs[layer].args
+      if 'mask' in argspec:
         kwargs['mask'] = mask
-      if 'training' in tf_inspect.getfullargspec(layer.call).args:
+      if 'training' in argspec:
         kwargs['training'] = training
 
       if isinstance(layer, Network) and layer._compute_output_and_mask_jointly:
-- 
GitLab


From 5e70a4059bcb9260962231530ecd9fe44fbac579 Mon Sep 17 00:00:00 2001
From: Dan Moldovan <mdan@google.com>
Date: Mon, 17 Dec 2018 13:32:24 -0800
Subject: [PATCH 690/873] Relax the strict verification of the function's in
 cases when the converted function is decorated. This helps autograph be more
 compatible with libraries such as sonnet, that in turn use wrapper libraries
 like wrapt.

PiperOrigin-RevId: 225879972
---
 .../python/autograph/impl/conversion.py       |  5 +--
 tensorflow/python/autograph/pyct/ast_util.py  | 33 +++++++++++++++++--
 .../python/autograph/pyct/ast_util_test.py    | 29 ++++++++++++++++
 3 files changed, 63 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/autograph/impl/conversion.py b/tensorflow/python/autograph/impl/conversion.py
index 733d4f1c71..0ca84b1f7a 100644
--- a/tensorflow/python/autograph/impl/conversion.py
+++ b/tensorflow/python/autograph/impl/conversion.py
@@ -352,10 +352,11 @@ def function_to_graph(f,
           ' in a separate expression.'.format(f, source))
     else:
       raise ValueError(
-          'Unable to identify source code of function {}. The source code'
+          'Unable to identify source code of function {}({}). The source code'
           ' reported by Python did not include exactly one matching signature:'
           '\n{}\n. This is an extremely rare occurrence. Please report it to'
-          ' the TensorFlow team.'.format(f, source))
+          ' the TensorFlow team.'.format(f, tf_inspect.getfullargspec(f),
+                                         source))
   node, = nodes
 
   # TODO(znado): Place inside standard_analysis.
diff --git a/tensorflow/python/autograph/pyct/ast_util.py b/tensorflow/python/autograph/pyct/ast_util.py
index 3dc10cf349..f7723412ab 100644
--- a/tensorflow/python/autograph/pyct/ast_util.py
+++ b/tensorflow/python/autograph/pyct/ast_util.py
@@ -349,6 +349,26 @@ class FunctionDefMatcher(gast.NodeVisitor):
 
     return True
 
+  def _argspec_compatible(self, node):
+    arg_spec = tf_inspect.getfullargspec(self.fn)
+
+    node_args = tuple(self._arg_name(arg) for arg in node.args.args)
+    if len(node_args) != len(arg_spec.args) and node.args.vararg is None:
+      return False
+
+    if arg_spec.varargs is not None and node.args.vararg is None:
+      return False
+
+    if arg_spec.varkw is not None and node.args.kwarg is None:
+      return False
+
+    node_kwonlyargs = tuple(self._arg_name(arg) for arg in node.args.kwonlyargs)
+    if (len(node_kwonlyargs) != len(arg_spec.kwonlyargs) and
+        node.args.kwarg is None):
+      return False
+
+    return True
+
   def visit_Lambda(self, node):
     self.generic_visit(node)
 
@@ -364,8 +384,17 @@ class FunctionDefMatcher(gast.NodeVisitor):
 
     if self.fn.__name__ != node.name:
       return
-    if not self._argspec_matches(node):
-      return
+
+    # Decorators have the ability to modify a function's signature. They usually
+    # claim that the result is indistinguishable from the original function,
+    # but it's very difficult to fool this test. As a consequence, we relax the
+    # verification and just check that the arguments are compatible.
+    if node.decorator_list:
+      if not self._argspec_compatible(node):
+        return
+    else:
+      if not self._argspec_matches(node):
+        return
 
     self.matching_nodes.append(node)
 
diff --git a/tensorflow/python/autograph/pyct/ast_util_test.py b/tensorflow/python/autograph/pyct/ast_util_test.py
index 9fcbbe646c..d4f1e1c7cd 100644
--- a/tensorflow/python/autograph/pyct/ast_util_test.py
+++ b/tensorflow/python/autograph/pyct/ast_util_test.py
@@ -294,6 +294,35 @@ class AstUtilTest(test.TestCase):
     nodes = ast_util.find_matching_definitions(node, f)
     self.assertFunctionDefNodes(nodes, ('return 1', 'return 2'))
 
+  def test_find_matching_definitions_decorated_compatible(self):
+    node = parser.parse_str(
+        textwrap.dedent("""
+      @sneaky_decorator
+      def f(x, *args, **kwargs):
+        return 1
+    """))
+
+    def f(a, b, c, d=1):
+      return a + b + c + d
+
+    nodes = ast_util.find_matching_definitions(node, f)
+    self.assertFunctionDefNodes(nodes, ('return 1',))
+
+  def test_find_matching_definitions_decorated_incompatible(self):
+    node = parser.parse_str(
+        textwrap.dedent("""
+      @sneaky_decorator
+      def f(x, y, z):
+        return 1
+    """))
+
+    def f(a, b, c, d, *args):
+      del args
+      return a + b + c + d
+
+    nodes = ast_util.find_matching_definitions(node, f)
+    self.assertFunctionDefNodes(nodes, ())
+
 
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From e861541e5c662083a2c56986a8bbe94cf8882747 Mon Sep 17 00:00:00 2001
From: Davide Libenzi <dlibenzi@google.com>
Date: Mon, 17 Dec 2018 13:33:41 -0800
Subject: [PATCH 691/873] Remove the device ordinal from the XRTAllocate proto,
 and use the device specified within the kernel context.

PiperOrigin-RevId: 225880160
---
 .../compiler/xrt/kernels/xrt_state_ops.h      |  4 +---
 tensorflow/compiler/xrt/tests/raw_api_test.cc | 19 -------------------
 tensorflow/compiler/xrt/xrt.proto             |  2 +-
 tensorflow/compiler/xrt/xrt_device.cc         |  8 ++++++++
 tensorflow/compiler/xrt/xrt_device.h          |  2 ++
 5 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
index e3b292e790..2e2f3ff116 100644
--- a/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
+++ b/tensorflow/compiler/xrt/kernels/xrt_state_ops.h
@@ -183,9 +183,7 @@ class XRTAllocateOp : public OpKernel {
     // We are guaranteed that the underlying device object won't be deleted out
     // from under us, while the ScopedRef is live.
     class DeviceAccessor::ScopedRef device_ref;
-    OP_REQUIRES_OK(ctx,
-                   DeviceAccessor::InitScopedRef(
-                       ctx, allocation_proto.device_ordinal(), &device_ref));
+    OP_REQUIRES_OK(ctx, DeviceAccessor::InitScopedRef(ctx, &device_ref));
 
     XRTTupleAllocation* allocation;
     OP_REQUIRES_OK(ctx, XRTTupleAllocation::CreateAndTransfer(
diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc
index 730a227167..5f8121703e 100644
--- a/tensorflow/compiler/xrt/tests/raw_api_test.cc
+++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc
@@ -217,7 +217,6 @@ xla::ProgramShape XlaCompiledProgramShape(
 
 TEST(RawApiTest, AllocAndRewrite) {
   xrt::XLAAllocation alloc;
-  alloc.set_device_ordinal(0);
   *alloc.mutable_value() =
       xla::LiteralUtil::CreateR2({{4, 5}, {6, 7}}).ToProto();
 
@@ -267,7 +266,6 @@ TEST(RawApiTest, AllocAndRewrite) {
 
 TEST(RawApiTest, AllocAndClearAll) {
   xrt::XLAAllocation alloc;
-  alloc.set_device_ordinal(0);
   *alloc.mutable_value() =
       xla::LiteralUtil::CreateR2({{4, 5}, {6, 7}}).ToProto();
 
@@ -298,7 +296,6 @@ TEST(RawApiTest, AllocAndClearAll) {
 
 TEST(RawApiTest, ReadAndWriteState) {
   xrt::XLAAllocation alloc;
-  alloc.set_device_ordinal(0);
   *alloc.mutable_value() = TwoElementTuple();
 
   Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
@@ -323,7 +320,6 @@ TEST(RawApiTest, ReadAndWriteState) {
 
 TEST(RawApiTest, ReadAndWriteStateAutoFree) {
   xrt::XLAAllocation alloc;
-  alloc.set_device_ordinal(0);
   *alloc.mutable_value() = TwoElementTuple();
 
   Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
@@ -344,7 +340,6 @@ TEST(RawApiTest, ReadAndWriteStateAutoFree) {
 
 TEST(RawApiTest, SubBuffer) {
   xrt::XLAAllocation alloc;
-  alloc.set_device_ordinal(0);
   *alloc.mutable_value() = NestedTuple();
 
   Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
@@ -385,10 +380,8 @@ TEST(RawApiTest, SubBuffer) {
 
 TEST(RawApiTest, MakeTuple) {
   xrt::XLAAllocation alloc_0;
-  alloc_0.set_device_ordinal(0);
   *alloc_0.mutable_value() = TwoElementTuple();
   xrt::XLAAllocation alloc_1;
-  alloc_1.set_device_ordinal(0);
   *alloc_1.mutable_value() = ScalarLiteral();
 
   // The trivial tuple that just forwards its input and releases it.
@@ -459,10 +452,8 @@ TEST(RawApiTest, MakeTuple) {
 
 TEST(RawApiTest, CompileAndExecute) {
   xrt::XLAAllocation p0;
-  p0.set_device_ordinal(0);
   *p0.mutable_value() = FloatVector({1.0f, 2.0f});
   xrt::XLAAllocation p1;
-  p1.set_device_ordinal(0);
   *p1.mutable_value() = FloatVector({8.0f, 5.0f});
 
   xrt::XLAComputation c;
@@ -514,10 +505,8 @@ TEST(RawApiTest, CompileAndExecute) {
 
 TEST(RawApiTest, CompileAndExecuteWithArgumentVector) {
   xrt::XLAAllocation p0;
-  p0.set_device_ordinal(0);
   *p0.mutable_value() = FloatVector({1.0f, 2.0f});
   xrt::XLAAllocation p1;
-  p1.set_device_ordinal(0);
   *p1.mutable_value() = FloatVector({8.0f, 5.0f});
 
   xrt::XLAComputation c;
@@ -637,10 +626,8 @@ TEST(RawApiTest, DotGeneralWithLayoutTest) {
   auto layout = xla::LayoutUtil::MakeLayout({0, 1});
 
   xrt::XLAAllocation p0;
-  p0.set_device_ordinal(0);
   *p0.mutable_value() = FloatMatrix({{1.0f, 2.0f}, {3.0f, 4.0f}}, layout);
   xrt::XLAAllocation p1;
-  p1.set_device_ordinal(0);
   *p1.mutable_value() = FloatMatrix({{8.0f}, {5.0f}}, layout);
 
   xrt::XLAComputation c;
@@ -723,10 +710,8 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) {
 
 TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   xrt::XLAAllocation p0;
-  p0.set_device_ordinal(0);
   *p0.mutable_value() = FloatVector({1.0f, 2.0f});
   xrt::XLAAllocation p1;
-  p1.set_device_ordinal(0);
   *p1.mutable_value() = FloatVector({8.0f, 5.0f});
 
   xrt::XLAComputation c;
@@ -776,11 +761,9 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) {
 
 TEST(RawApiTest, CompileAndExecuteReturnExplodedTuple) {
   xrt::XLAAllocation p0;
-  p0.set_device_ordinal(0);
   *p0.mutable_value() = xla::LiteralUtil::CreateR0<float>(12.0f).ToProto();
 
   xrt::XLAAllocation p1;
-  p1.set_device_ordinal(0);
   *p1.mutable_value() = xla::LiteralUtil::CreateR0<float>(3.0f).ToProto();
 
   xrt::XLAComputation c;
@@ -864,10 +847,8 @@ TEST(RawApiTest, LeakCompilationReference) {
 
 TEST(RawApiTest, CompileAndExecuteWithS64Argument) {
   xrt::XLAAllocation p0;
-  p0.set_device_ordinal(0);
   *p0.mutable_value() = xla::LiteralUtil::CreateR0<int64>(11031965).ToProto();
   xrt::XLAAllocation p1;
-  p1.set_device_ordinal(0);
   *p1.mutable_value() = xla::LiteralUtil::CreateR0<int64>(4091934).ToProto();
 
   xrt::XLAComputation c;
diff --git a/tensorflow/compiler/xrt/xrt.proto b/tensorflow/compiler/xrt/xrt.proto
index 378bb9246f..84adee7392 100644
--- a/tensorflow/compiler/xrt/xrt.proto
+++ b/tensorflow/compiler/xrt/xrt.proto
@@ -59,7 +59,7 @@ message XLAComputation {
 
 // Literal to allocate space for, and transfer to, device memory.
 message XLAAllocation {
-  int32 device_ordinal = 1;
+  reserved 1;
   xla.LiteralProto value = 2;
 }
 
diff --git a/tensorflow/compiler/xrt/xrt_device.cc b/tensorflow/compiler/xrt/xrt_device.cc
index ea40e6c895..34cb64742a 100644
--- a/tensorflow/compiler/xrt/xrt_device.cc
+++ b/tensorflow/compiler/xrt/xrt_device.cc
@@ -43,4 +43,12 @@ namespace tensorflow {
   return Status::OK();
 }
 
+/*static*/ Status XRTGenericDeviceAccessor::InitScopedRef(
+    OpKernelContext* ctx, ScopedRef* scoped_ref) {
+  const XlaDevice::Metadata* metadata;
+  TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata));
+  scoped_ref->Acquire(metadata->client());
+  return Status::OK();
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/xrt/xrt_device.h b/tensorflow/compiler/xrt/xrt_device.h
index 1e3fddd2a7..fb010651d9 100644
--- a/tensorflow/compiler/xrt/xrt_device.h
+++ b/tensorflow/compiler/xrt/xrt_device.h
@@ -59,6 +59,8 @@ class XRTGenericDeviceAccessor {
 
   static Status InitScopedRef(OpKernelContext* ctx, int device_ordinal,
                               ScopedRef* scoped_ref);
+
+  static Status InitScopedRef(OpKernelContext* ctx, ScopedRef* scoped_ref);
 };
 
 }  // namespace tensorflow
-- 
GitLab


From 9d59b574a3ebfe3b7372b1291fd0df5b1ef05237 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 13:39:24 -0800
Subject: [PATCH 692/873] Internal change.

PiperOrigin-RevId: 225881196
---
 tensorflow/tools/pip_package/setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 3927540cc7..57ebbb2253 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,7 +45,8 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.12.0'
+# TODO(annarev): switch this back to 1.12.0.
+_VERSION = '1.12.0a'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 0bb240942c35aaa754e9b00ab71de6a18a017ce0 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 17 Dec 2018 13:47:56 -0800
Subject: [PATCH 693/873] Update cudnn_recurrent_test to run under tf 2.0 mode.

PiperOrigin-RevId: 225882588
---
 .../keras/layers/cudnn_recurrent_test.py      | 749 +++++++++---------
 1 file changed, 381 insertions(+), 368 deletions(-)

diff --git a/tensorflow/python/keras/layers/cudnn_recurrent_test.py b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
index cc93364aae..36f2d2fa38 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent_test.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent_test.py
@@ -25,278 +25,294 @@ import numpy as np
 
 from tensorflow.python import keras
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.optimizer_v2.rmsprop import RMSprop
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
 from tensorflow.python.training.rmsprop import RMSPropOptimizer
 
 
-class CuDNNTest(test.TestCase, parameterized.TestCase):
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_cudnn_rnn_basics(self):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        timesteps = 6
-        units = 2
-        num_samples = 32
-        for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
-          for return_sequences in [True, False]:
-            with keras.utils.CustomObjectScope(
-                {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU,
-                 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}):
-              testing_utils.layer_test(
-                  layer_class,
-                  kwargs={'units': units,
-                          'return_sequences': return_sequences},
-                  input_shape=(num_samples, timesteps, input_size))
-          for go_backwards in [True, False]:
-            with keras.utils.CustomObjectScope(
-                {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU,
-                 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}):
-              testing_utils.layer_test(
-                  layer_class,
-                  kwargs={'units': units,
-                          'go_backwards': go_backwards},
-                  input_shape=(num_samples, timesteps, input_size))
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_trainability(self):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        units = 2
-        for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
-          layer = layer_class(units)
-          layer.build((None, None, input_size))
-          self.assertEqual(len(layer.weights), 3)
-          self.assertEqual(len(layer.trainable_weights), 3)
-          self.assertEqual(len(layer.non_trainable_weights), 0)
-          layer.trainable = False
-          self.assertEqual(len(layer.weights), 3)
-          self.assertEqual(len(layer.non_trainable_weights), 3)
-          self.assertEqual(len(layer.trainable_weights), 0)
-          layer.trainable = True
-          self.assertEqual(len(layer.weights), 3)
-          self.assertEqual(len(layer.trainable_weights), 3)
-          self.assertEqual(len(layer.non_trainable_weights), 0)
+@keras_parameterized.run_all_keras_modes
+class CuDNNTest(keras_parameterized.TestCase):
 
   @parameterized.named_parameters(
-      ('cudnngru', keras.layers.CuDNNGRU),
-      ('cudnnlstm', keras.layers.CuDNNLSTM),
-  )
-  def test_regularizer(self, layer_class):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        timesteps = 6
-        units = 2
-        num_samples = 32
-        layer = layer_class(
-            units,
-            return_sequences=False,
-            input_shape=(timesteps, input_size),
-            kernel_regularizer=keras.regularizers.l1(0.01),
-            recurrent_regularizer=keras.regularizers.l1(0.01),
-            bias_regularizer='l2')
-        layer.build((None, None, input_size))
-        self.assertEqual(len(layer.losses), 3)
-
-        layer = layer_class(
-            units,
-            return_sequences=False,
-            input_shape=(timesteps, input_size),
-            activity_regularizer='l2')
-        self.assertTrue(layer.activity_regularizer)
-        x = keras.backend.variable(
-            np.ones((num_samples, timesteps, input_size)))
-        layer(x)
-        self.assertEqual(len(layer.get_losses_for(x)), 1)
+      *test_util.generate_combinations_with_testcase_name(
+          layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM],
+          return_sequences=[True, False]))
+  @test_util.run_gpu_only
+  def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences):
+    input_size = 10
+    timesteps = 6
+    units = 2
+    num_samples = 32
+    testing_utils.layer_test(
+        layer_class,
+        kwargs={'units': units,
+                'return_sequences': return_sequences},
+        input_shape=(num_samples, timesteps, input_size))
+
+  @parameterized.named_parameters(
+      *test_util.generate_combinations_with_testcase_name(
+          layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM],
+          go_backwards=[True, False]))
+  @test_util.run_gpu_only
+  def test_cudnn_rnn_go_backward(self, layer_class, go_backwards):
+    input_size = 10
+    timesteps = 6
+    units = 2
+    num_samples = 32
+    testing_utils.layer_test(
+        layer_class,
+        kwargs={'units': units,
+                'go_backwards': go_backwards},
+        input_shape=(num_samples, timesteps, input_size))
 
   @parameterized.named_parameters(
       ('cudnngru', keras.layers.CuDNNGRU),
       ('cudnnlstm', keras.layers.CuDNNLSTM),
   )
+  @test_util.run_gpu_only
   def test_return_state(self, layer_class):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        timesteps = 6
-        units = 2
-        num_samples = 32
-        num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1
-
-        inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size))
-        layer = layer_class(units, return_state=True, stateful=True)
-        outputs = layer(inputs)
-        _, state = outputs[0], outputs[1:]
-        self.assertEqual(len(state), num_states)
-        model = keras.models.Model(inputs, state[0])
-
-        inputs = np.random.random((num_samples, timesteps, input_size))
-        state = model.predict(inputs)
-        np.testing.assert_allclose(
-            keras.backend.eval(layer.states[0]), state, atol=1e-4)
+    input_size = 10
+    timesteps = 6
+    units = 2
+    num_samples = 32
+    num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1
+
+    inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size))
+    layer = layer_class(units, return_state=True, stateful=True)
+    outputs = layer(inputs)
+    _, state = outputs[0], outputs[1:]
+    self.assertEqual(len(state), num_states)
+    model = keras.models.Model(inputs, state[0])
+    model.run_eagerly = testing_utils.should_run_eagerly()
+
+    inputs = np.random.random((num_samples, timesteps, input_size))
+    state = model.predict(inputs)
+    np.testing.assert_allclose(
+        keras.backend.eval(layer.states[0]), state, atol=1e-4)
 
   @parameterized.named_parameters(
       ('cudnngru', keras.layers.CuDNNGRU),
       ('cudnnlstm', keras.layers.CuDNNLSTM),
   )
+  @test_util.run_gpu_only
   def test_time_major_input(self, layer_class):
-    if test.is_gpu_available(cuda_only=True):
-      with self.test_session(use_gpu=True):
-        input_size = 10
-        timesteps = 6
-        units = 2
-        num_samples = 32
-
-        model = keras.models.Sequential()
-        model.add(
-            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
-        layer = layer_class(units, time_major=True, return_sequences=True)
-        model.add(layer)
-        model.add(
-            keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
-        model.compile(loss='categorical_crossentropy', optimizer='adam')
-        model.fit(
-            np.ones((num_samples, timesteps, input_size)),
-            np.ones((num_samples, timesteps, units)))
-        out = model.predict(np.ones((num_samples, timesteps, input_size)))
-        self.assertEqual(out.shape, (num_samples, timesteps, units))
+    input_size = 10
+    timesteps = 6
+    units = 2
+    num_samples = 32
+
+    model = keras.models.Sequential()
+    model.add(
+        keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
+    layer = layer_class(units, time_major=True, return_sequences=True)
+    model.add(layer)
+    model.add(
+        keras.layers.Lambda(lambda t: array_ops.transpose(t, [1, 0, 2])))
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=RMSprop(learning_rate=0.001))
+    model.fit(
+        np.ones((num_samples, timesteps, input_size)),
+        np.ones((num_samples, timesteps, units)))
+    out = model.predict(np.ones((num_samples, timesteps, input_size)))
+    self.assertEqual(out.shape, (num_samples, timesteps, units))
 
   @parameterized.named_parameters(
       ('cudnngru', keras.layers.CuDNNGRU),
       ('cudnnlstm', keras.layers.CuDNNLSTM),
   )
+  @test_util.run_gpu_only
   def test_specify_initial_state_keras_tensor(self, layer_class):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        timesteps = 6
-        units = 2
-        num_samples = 32
-        num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1
-
-        inputs = keras.Input((timesteps, input_size))
-        initial_state = [keras.Input((units,)) for _ in range(num_states)]
-        layer = layer_class(units)
-        if len(initial_state) == 1:
-          output = layer(inputs, initial_state=initial_state[0])
-        else:
-          output = layer(inputs, initial_state=initial_state)
-        self.assertIn(initial_state[0], layer._inbound_nodes[0].input_tensors)
-
-        model = keras.models.Model([inputs] + initial_state, output)
-        model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-        inputs = np.random.random((num_samples, timesteps, input_size))
-        initial_state = [
-            np.random.random((num_samples, units)) for _ in range(num_states)
-        ]
-        targets = np.random.random((num_samples, units))
-        model.fit([inputs] + initial_state, targets)
+    input_size = 10
+    timesteps = 6
+    units = 2
+    num_samples = 32
+    num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1
+
+    inputs = keras.Input((timesteps, input_size))
+    initial_state = [keras.Input((units,)) for _ in range(num_states)]
+    layer = layer_class(units)
+    if len(initial_state) == 1:
+      output = layer(inputs, initial_state=initial_state[0])
+    else:
+      output = layer(inputs, initial_state=initial_state)
+    self.assertIn(initial_state[0], layer._inbound_nodes[0].input_tensors)
+
+    model = keras.models.Model([inputs] + initial_state, output)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=RMSprop(learning_rate=0.001),
+                  run_eagerly=testing_utils.should_run_eagerly())
+
+    inputs = np.random.random((num_samples, timesteps, input_size))
+    initial_state = [
+        np.random.random((num_samples, units)) for _ in range(num_states)
+    ]
+    targets = np.random.random((num_samples, units))
+    model.fit([inputs] + initial_state, targets)
+
+
+class CuDNNGraphOnlyTest(keras_parameterized.TestCase):
 
   @parameterized.named_parameters(
       ('cudnngru', keras.layers.CuDNNGRU),
       ('cudnnlstm', keras.layers.CuDNNLSTM),
   )
+  @test_util.run_deprecated_v1
+  @test_util.run_gpu_only
+  def test_regularizer(self, layer_class):
+    input_size = 10
+    timesteps = 6
+    units = 2
+    num_samples = 32
+    layer = layer_class(
+        units,
+        return_sequences=False,
+        input_shape=(timesteps, input_size),
+        kernel_regularizer=keras.regularizers.l1(0.01),
+        recurrent_regularizer=keras.regularizers.l1(0.01),
+        bias_regularizer='l2')
+    layer.build((None, None, input_size))
+    self.assertEqual(len(layer.losses), 3)
+
+    layer = layer_class(
+        units,
+        return_sequences=False,
+        input_shape=(timesteps, input_size),
+        activity_regularizer='l2')
+    self.assertTrue(layer.activity_regularizer)
+    x = keras.backend.variable(
+        np.ones((num_samples, timesteps, input_size)))
+    layer(x)
+    self.assertEqual(len(layer.get_losses_for(x)), 1)
+
+  @parameterized.named_parameters(
+      ('cudnngru', keras.layers.CuDNNGRU),
+      ('cudnnlstm', keras.layers.CuDNNLSTM),
+  )
+  @test_util.run_gpu_only
+  @test_util.run_v1_only('b/120941292')
   def test_statefulness(self, layer_class):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        timesteps = 6
-        units = 2
-        num_samples = 32
-
-        model = keras.models.Sequential()
-        model.add(
-            keras.layers.Embedding(
-                10,
-                input_size,
-                input_length=timesteps,
-                batch_input_shape=(num_samples, timesteps)))
-        layer = layer_class(
-            units, return_sequences=False, stateful=True, weights=None)
-        model.add(layer)
-        model.compile(optimizer='sgd', loss='mse')
-        out1 = model.predict(np.ones((num_samples, timesteps)))
-        self.assertEqual(out1.shape, (num_samples, units))
-
-        # train once so that the states change
-        model.train_on_batch(
-            np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
-        out2 = model.predict(np.ones((num_samples, timesteps)))
-
-        # if the state is not reset, output should be different
-        self.assertNotEqual(out1.max(), out2.max())
-
-        # check that output changes after states are reset
-        # (even though the model itself didn't change)
-        layer.reset_states()
-        out3 = model.predict(np.ones((num_samples, timesteps)))
-        self.assertNotEqual(out2.max(), out3.max())
-
-        # check that container-level reset_states() works
-        model.reset_states()
-        out4 = model.predict(np.ones((num_samples, timesteps)))
-        self.assertAllClose(out3, out4, atol=1e-5)
-
-        # check that the call to `predict` updated the states
-        out5 = model.predict(np.ones((num_samples, timesteps)))
-        self.assertNotEqual(out4.max(), out5.max())
+    input_size = 10
+    timesteps = 6
+    units = 2
+    num_samples = 32
+
+    with self.cached_session(use_gpu=True):
+      model = keras.models.Sequential()
+      model.add(
+          keras.layers.Embedding(
+              10,
+              input_size,
+              input_length=timesteps,
+              batch_input_shape=(num_samples, timesteps)))
+      layer = layer_class(
+          units, return_sequences=False, stateful=True, weights=None)
+      model.add(layer)
+      model.compile(optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                    loss='mse')
+      out1 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertEqual(out1.shape, (num_samples, units))
+
+      # train once so that the states change
+      model.train_on_batch(
+          np.ones((num_samples, timesteps)), np.ones((num_samples, units)))
+      out2 = model.predict(np.ones((num_samples, timesteps)))
+
+      # if the state is not reset, output should be different
+      self.assertNotEqual(out1.max(), out2.max())
+
+      # check that output changes after states are reset
+      # (even though the model itself didn't change)
+      layer.reset_states()
+      out3 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertNotEqual(out2.max(), out3.max())
+
+      # check that container-level reset_states() works
+      model.reset_states()
+      out4 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertAllClose(out3, out4, atol=1e-5)
+
+      # check that the call to `predict` updated the states
+      out5 = model.predict(np.ones((num_samples, timesteps)))
+      self.assertNotEqual(out4.max(), out5.max())
+
+
+@test_util.run_all_in_graph_and_eager_modes
+class CuDNNV1OnlyTest(keras_parameterized.TestCase):
+
+  @test_util.run_gpu_only
+  def test_trainability(self):
+    input_size = 10
+    units = 2
+    for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]:
+      layer = layer_class(units)
+      layer.build((None, None, input_size))
+      self.assertEqual(len(layer.weights), 3)
+      self.assertEqual(len(layer.trainable_weights), 3)
+      self.assertEqual(len(layer.non_trainable_weights), 0)
+      layer.trainable = False
+      self.assertEqual(len(layer.weights), 3)
+      self.assertEqual(len(layer.non_trainable_weights), 3)
+      self.assertEqual(len(layer.trainable_weights), 0)
+      layer.trainable = True
+      self.assertEqual(len(layer.weights), 3)
+      self.assertEqual(len(layer.trainable_weights), 3)
+      self.assertEqual(len(layer.non_trainable_weights), 0)
 
   @parameterized.named_parameters(
       *test_util.generate_combinations_with_testcase_name(
           rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False],
           bidirectional=[True, False], implementation=[1, 2],
           model_nest_level=[1, 2], model_type=['seq', 'func']))
+  @test_util.run_v1_only('b/120911602, b/112083752')
+  @test_util.run_gpu_only
   def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn,
                                              bidirectional, implementation,
                                              model_nest_level, model_type):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        timesteps = 6
-        input_shape = (timesteps, input_size)
-        units = 2
-        num_samples = 32
-        inputs = np.random.random((num_samples, timesteps, input_size))
-
-        rnn_layer_kwargs = {
-            'recurrent_activation': 'sigmoid',
-            # ensure biases are non-zero and properly converted
-            'bias_initializer': 'random_uniform',
-            'implementation': implementation
-        }
-        if rnn_type == 'LSTM':
-          rnn_layer_class = keras.layers.LSTM
-          cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
-        else:
-          rnn_layer_class = keras.layers.GRU
-          cudnn_rnn_layer_class = keras.layers.CuDNNGRU
-          rnn_layer_kwargs['reset_after'] = True
-
-        layer = rnn_layer_class(units, **rnn_layer_kwargs)
-        if bidirectional:
-          layer = keras.layers.Bidirectional(layer)
-
-        cudnn_layer = cudnn_rnn_layer_class(units)
-        if bidirectional:
-          cudnn_layer = keras.layers.Bidirectional(cudnn_layer)
-
-        model = self._make_nested_model(input_shape, layer, model_nest_level,
-                                        model_type)
-        cudnn_model = self._make_nested_model(input_shape, cudnn_layer,
-                                              model_nest_level, model_type)
-
-        if to_cudnn:
-          self._convert_model_weights(model, cudnn_model)
-        else:
-          self._convert_model_weights(cudnn_model, model)
-
-        self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs),
-                            atol=1e-4)
+    input_size = 10
+    timesteps = 6
+    input_shape = (timesteps, input_size)
+    units = 2
+    num_samples = 32
+    inputs = np.random.random((num_samples, timesteps, input_size))
+
+    rnn_layer_kwargs = {
+        'recurrent_activation': 'sigmoid',
+        # ensure biases are non-zero and properly converted
+        'bias_initializer': 'random_uniform',
+        'implementation': implementation
+    }
+    if rnn_type == 'LSTM':
+      rnn_layer_class = keras.layers.LSTM
+      cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
+    else:
+      rnn_layer_class = keras.layers.GRU
+      cudnn_rnn_layer_class = keras.layers.CuDNNGRU
+      rnn_layer_kwargs['reset_after'] = True
+
+    layer = rnn_layer_class(units, **rnn_layer_kwargs)
+    if bidirectional:
+      layer = keras.layers.Bidirectional(layer)
+
+    cudnn_layer = cudnn_rnn_layer_class(units)
+    if bidirectional:
+      cudnn_layer = keras.layers.Bidirectional(cudnn_layer)
+
+    model = self._make_nested_model(input_shape, layer, model_nest_level,
+                                    model_type)
+    cudnn_model = self._make_nested_model(input_shape, cudnn_layer,
+                                          model_nest_level, model_type)
+
+    if to_cudnn:
+      self._convert_model_weights(model, cudnn_model)
+    else:
+      self._convert_model_weights(cudnn_model, model)
+
+    self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs),
+                        atol=1e-4)
 
   def _make_nested_model(self, input_shape, layer, level=1, model_type='func'):
     # example: make_nested_seq_model((1,), Dense(10), level=2).summary()
@@ -330,153 +346,150 @@ class CuDNNTest(test.TestCase, parameterized.TestCase):
   @parameterized.named_parameters(
       *test_util.generate_combinations_with_testcase_name(
           rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False]))
+  @test_util.run_v1_only('b/120911602')
+  @test_util.run_gpu_only
   def test_load_weights_between_noncudnn_rnn_time_distributed(self, rnn_type,
                                                               to_cudnn):
     # Similar test as test_load_weights_between_noncudnn_rnn() but has different
     # rank of input due to usage of TimeDistributed. Issue: #10356.
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_size = 10
-        steps = 6
-        timesteps = 6
-        input_shape = (timesteps, steps, input_size)
-        units = 2
-        num_samples = 32
-        inputs = np.random.random((num_samples, timesteps, steps, input_size))
-
-        rnn_layer_kwargs = {
-            'recurrent_activation': 'sigmoid',
-            # ensure biases are non-zero and properly converted
-            'bias_initializer': 'random_uniform',
-        }
-        if rnn_type == 'LSTM':
-          rnn_layer_class = keras.layers.LSTM
-          cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
-        else:
-          rnn_layer_class = keras.layers.GRU
-          cudnn_rnn_layer_class = keras.layers.CuDNNGRU
-          rnn_layer_kwargs['reset_after'] = True
-
-        layer = rnn_layer_class(units, **rnn_layer_kwargs)
-        layer = keras.layers.TimeDistributed(layer)
-
-        cudnn_layer = cudnn_rnn_layer_class(units)
-        cudnn_layer = keras.layers.TimeDistributed(cudnn_layer)
-
-        model = self._make_nested_model(input_shape, layer)
-        cudnn_model = self._make_nested_model(input_shape, cudnn_layer)
-
-        if to_cudnn:
-          self._convert_model_weights(model, cudnn_model)
-        else:
-          self._convert_model_weights(cudnn_model, model)
-
-        self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs),
-                            atol=1e-4)
-
-  @test_util.run_in_graph_and_eager_modes
+    input_size = 10
+    steps = 6
+    timesteps = 6
+    input_shape = (timesteps, steps, input_size)
+    units = 2
+    num_samples = 32
+    inputs = np.random.random((num_samples, timesteps, steps, input_size))
+
+    rnn_layer_kwargs = {
+        'recurrent_activation': 'sigmoid',
+        # ensure biases are non-zero and properly converted
+        'bias_initializer': 'random_uniform',
+    }
+    if rnn_type == 'LSTM':
+      rnn_layer_class = keras.layers.LSTM
+      cudnn_rnn_layer_class = keras.layers.CuDNNLSTM
+    else:
+      rnn_layer_class = keras.layers.GRU
+      cudnn_rnn_layer_class = keras.layers.CuDNNGRU
+      rnn_layer_kwargs['reset_after'] = True
+
+    layer = rnn_layer_class(units, **rnn_layer_kwargs)
+    layer = keras.layers.TimeDistributed(layer)
+
+    cudnn_layer = cudnn_rnn_layer_class(units)
+    cudnn_layer = keras.layers.TimeDistributed(cudnn_layer)
+
+    model = self._make_nested_model(input_shape, layer)
+    cudnn_model = self._make_nested_model(input_shape, cudnn_layer)
+
+    if to_cudnn:
+      self._convert_model_weights(model, cudnn_model)
+    else:
+      self._convert_model_weights(cudnn_model, model)
+
+    self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs),
+                        atol=1e-4)
+
+  @test_util.run_gpu_only
   def test_cudnnrnn_bidirectional(self):
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        rnn = keras.layers.CuDNNGRU
-        samples = 2
-        dim = 2
-        timesteps = 2
-        output_dim = 2
-        mode = 'concat'
-
-        x = np.random.random((samples, timesteps, dim))
-        target_dim = 2 * output_dim if mode == 'concat' else output_dim
-        y = np.random.random((samples, target_dim))
-
-        # test with Sequential model
-        model = keras.Sequential()
-        model.add(
-            keras.layers.Bidirectional(
-                rnn(output_dim), merge_mode=mode, input_shape=(None, dim)))
-        model.compile(
-            loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
-        model.fit(x, y, epochs=1, batch_size=1)
-
-        # test config
-        model.get_config()
-        model = keras.models.model_from_json(model.to_json())
-        model.summary()
-
-        # test stacked bidirectional layers
-        model = keras.Sequential()
-        model.add(
-            keras.layers.Bidirectional(
-                rnn(output_dim, return_sequences=True),
-                merge_mode=mode,
-                input_shape=(None, dim)))
-        model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode))
-        model.compile(
-            loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
-        model.fit(x, y, epochs=1, batch_size=1)
-
-        # test with functional API
-        inputs = keras.Input((timesteps, dim))
-        outputs = keras.layers.Bidirectional(
-            rnn(output_dim), merge_mode=mode)(
-                inputs)
-        model = keras.Model(inputs, outputs)
-        model.compile(
-            loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
-        model.fit(x, y, epochs=1, batch_size=1)
-
-        # Bidirectional and stateful
-        inputs = keras.Input(batch_shape=(1, timesteps, dim))
-        outputs = keras.layers.Bidirectional(
-            rnn(output_dim, stateful=True), merge_mode=mode)(
-                inputs)
-        model = keras.Model(inputs, outputs)
-        model.compile(
-            loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
-        model.fit(x, y, epochs=1, batch_size=1)
-
+    rnn = keras.layers.CuDNNGRU
+    samples = 2
+    dim = 2
+    timesteps = 2
+    output_dim = 2
+    mode = 'concat'
+
+    x = np.random.random((samples, timesteps, dim))
+    target_dim = 2 * output_dim if mode == 'concat' else output_dim
+    y = np.random.random((samples, target_dim))
+
+    # test with Sequential model
+    model = keras.Sequential()
+    model.add(
+        keras.layers.Bidirectional(
+            rnn(output_dim), merge_mode=mode, input_shape=(None, dim)))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.fit(x, y, epochs=1, batch_size=1)
+
+    # test config
+    model.get_config()
+    model = keras.models.model_from_json(model.to_json())
+    model.summary()
+
+    # test stacked bidirectional layers
+    model = keras.Sequential()
+    model.add(
+        keras.layers.Bidirectional(
+            rnn(output_dim, return_sequences=True),
+            merge_mode=mode,
+            input_shape=(None, dim)))
+    model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode))
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.fit(x, y, epochs=1, batch_size=1)
+
+    # test with functional API
+    inputs = keras.Input((timesteps, dim))
+    outputs = keras.layers.Bidirectional(
+        rnn(output_dim), merge_mode=mode)(
+            inputs)
+    model = keras.Model(inputs, outputs)
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.fit(x, y, epochs=1, batch_size=1)
+
+    # Bidirectional and stateful
+    inputs = keras.Input(batch_shape=(1, timesteps, dim))
+    outputs = keras.layers.Bidirectional(
+        rnn(output_dim, stateful=True), merge_mode=mode)(
+            inputs)
+    model = keras.Model(inputs, outputs)
+    model.compile(
+        loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001))
+    model.fit(x, y, epochs=1, batch_size=1)
+
+  @test_util.run_gpu_only
   def test_preprocess_weights_for_loading_gru_incompatible(self):
     """Test loading weights between incompatible layers.
 
     Should fail fast with an exception.
     """
-    if test.is_gpu_available(cuda_only=True):
-      with self.session(use_gpu=True):
-        input_shape = (3, 5)
-
-        def gru(cudnn=False, **kwargs):
-          layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU
-          return layer_class(2, input_shape=input_shape, **kwargs)
-
-        def get_layer_weights(layer):
-          layer.build(input_shape=input_shape)
-          return layer.get_weights()
-
-        def assert_not_compatible(src, dest, message):
-          with self.assertRaises(ValueError) as ex:
-            keras.engine.saving.preprocess_weights_for_loading(
-                dest,
-                get_layer_weights(src))
-          self.assertIn(message, str(ex.exception))
-
-        assert_not_compatible(
-            gru(),
-            gru(cudnn=True),
-            'GRU(reset_after=False) is not compatible with CuDNNGRU')
-        assert_not_compatible(
-            gru(cudnn=True),
-            gru(),
-            'CuDNNGRU is not compatible with GRU(reset_after=False)')
-        assert_not_compatible(
-            gru(),
-            gru(reset_after=True),
-            'GRU(reset_after=False) is not compatible with '
-            'GRU(reset_after=True)')
-        assert_not_compatible(
-            gru(reset_after=True),
-            gru(),
-            'GRU(reset_after=True) is not compatible with '
-            'GRU(reset_after=False)')
+    input_shape = (3, 5)
+
+    def gru(cudnn=False, **kwargs):
+      layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRU
+      return layer_class(2, input_shape=input_shape, **kwargs)
+
+    def get_layer_weights(layer):
+      layer.build(input_shape=input_shape)
+      return layer.get_weights()
+
+    def assert_not_compatible(src, dest, message):
+      with self.assertRaises(ValueError) as ex:
+        keras.engine.saving.preprocess_weights_for_loading(
+            dest,
+            get_layer_weights(src))
+      self.assertIn(message, str(ex.exception))
+
+    assert_not_compatible(
+        gru(),
+        gru(cudnn=True),
+        'GRU(reset_after=False) is not compatible with CuDNNGRU')
+    assert_not_compatible(
+        gru(cudnn=True),
+        gru(),
+        'CuDNNGRU is not compatible with GRU(reset_after=False)')
+    assert_not_compatible(
+        gru(),
+        gru(reset_after=True),
+        'GRU(reset_after=False) is not compatible with '
+        'GRU(reset_after=True)')
+    assert_not_compatible(
+        gru(reset_after=True),
+        gru(),
+        'GRU(reset_after=True) is not compatible with '
+        'GRU(reset_after=False)')
 
 
 if __name__ == '__main__':
-- 
GitLab


From f7e1f72cf63e3df0698ab1ce471cc014e057544f Mon Sep 17 00:00:00 2001
From: James Qin <jamesqin@google.com>
Date: Mon, 17 Dec 2018 14:01:01 -0800
Subject: [PATCH 694/873] Fix an issue in keras input_layer.Input() default
 dtype.

PiperOrigin-RevId: 225885182
---
 tensorflow/python/keras/engine/input_layer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py
index 9874efe2bc..a8303dc663 100644
--- a/tensorflow/python/keras/engine/input_layer.py
+++ b/tensorflow/python/keras/engine/input_layer.py
@@ -77,6 +77,8 @@ class InputLayer(base_layer.Layer):
         dtype = backend.floatx()
       else:
         dtype = backend.dtype(input_tensor)
+    elif input_tensor and input_tensor.dtype != dtype:
+      raise ValueError('`input_tensor.dtype` differs from `dtype`.')
     super(InputLayer, self).__init__(dtype=dtype, name=name)
     self.built = True
     self.sparse = sparse
@@ -215,8 +217,6 @@ def Input(  # pylint: disable=invalid-name
   if kwargs:
     raise ValueError('Unrecognized keyword arguments:', kwargs.keys())
 
-  if dtype is None:
-    dtype = backend.floatx()
   if shape is None and tensor is None:
     raise ValueError('Please provide to Input either a `shape`'
                      ' or a `tensor` argument. Note that '
-- 
GitLab


From 6229528cb5b9c5b2a9bffe3f358631af4ef9b417 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Mon, 17 Dec 2018 14:04:24 -0800
Subject: [PATCH 695/873] Roll-forward CUDA logging.

PiperOrigin-RevId: 225886083
---
 tensorflow/stream_executor/BUILD              |  3 ++
 .../stream_executor/stream_executor_pimpl.cc  | 30 ++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index 00c23b8d17..c43efc799c 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -54,7 +54,9 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc_impl",
+        ":logging_proto_cc_impl",
         "//tensorflow/core:lib",
+        "//tensorflow/core:logger",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
@@ -71,6 +73,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc",
+        ":logging_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index d1d0bd9bc2..7b3e5c6397 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/strings/str_cat.h"
+#include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/stream_executor/blas.h"
 #include "tensorflow/stream_executor/fft.h"
@@ -33,6 +34,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
+#include "tensorflow/stream_executor/logging.pb.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/rng.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
@@ -192,6 +194,8 @@ StreamExecutor::StreamExecutor(
     platform_kind_ = PlatformKind::kOpenCL;
   } else if (port::Lowercase(platform_->Name()) == "host") {
     platform_kind_ = PlatformKind::kHost;
+  } else {
+    platform_kind_ = PlatformKind::kInvalid;
   }
 }
 
@@ -217,7 +221,31 @@ StreamExecutor::~StreamExecutor() {
 port::Status StreamExecutor::Init(int device_ordinal,
                                   DeviceOptions device_options) {
   device_ordinal_ = device_ordinal;
-  return implementation_->Init(device_ordinal, std::move(device_options));
+  TF_RETURN_IF_ERROR(
+      implementation_->Init(device_ordinal, std::move(device_options)));
+
+  if (platform_kind_ == PlatformKind::kCuda) {
+    CudaInfo info;
+
+    int cc_major, cc_minor;
+    GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor);
+    info.mutable_compute_capability()->set_major(cc_major);
+    info.mutable_compute_capability()->set_minor(cc_minor);
+
+    if (auto *dnn = AsDnn()) {
+      port::StatusOr<dnn::VersionInfo> version_or = dnn->GetVersion();
+      if (version_or.ok()) {
+        const auto &version = version_or.ValueOrDie();
+        info.mutable_cudnn_version()->set_major(version.major_version());
+        info.mutable_cudnn_version()->set_minor(version.minor_version());
+        info.mutable_cudnn_version()->set_patch(version.patch());
+      }
+    }
+
+    tensorflow::Logger::Singleton()->LogProto(info);
+  }
+
+  return port::Status::OK();
 }
 
 port::Status StreamExecutor::Init() {
-- 
GitLab


From f73bdd710eb43285b3df374fd98a3af5e2e544de Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 14:07:40 -0800
Subject: [PATCH 696/873] Allow op-level profiling of Flex ops.

PiperOrigin-RevId: 225886758
---
 tensorflow/lite/c/c_api_internal.h       | 3 +++
 tensorflow/lite/core/subgraph.cc         | 1 +
 tensorflow/lite/core/subgraph.h          | 5 ++++-
 tensorflow/lite/delegates/flex/BUILD     | 1 +
 tensorflow/lite/delegates/flex/kernel.cc | 7 +++++++
 tensorflow/lite/profiling/profiler.h     | 9 ++++++---
 6 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/tensorflow/lite/c/c_api_internal.h b/tensorflow/lite/c/c_api_internal.h
index 1cd84eff5c..1b1bc6db8f 100644
--- a/tensorflow/lite/c/c_api_internal.h
+++ b/tensorflow/lite/c/c_api_internal.h
@@ -397,6 +397,9 @@ typedef struct TfLiteContext {
   // default: false.
   // WARNING: This is an experimental API and subject to change.
   bool allow_fp32_relax_to_fp16;
+
+  // Pointer to the op-level profiler, if set; nullptr otherwise.
+  void* profiler;
 } TfLiteContext;
 
 typedef struct _TfLiteRegistration {
diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
index 90361faeae..855c94e206 100644
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc
@@ -126,6 +126,7 @@ Subgraph::Subgraph(ErrorReporter* error_reporter,
   context_->recommended_num_threads = -1;
   context_->GetExternalContext = GetExternalContext;
   context_->SetExternalContext = SetExternalContext;
+  context_->profiler = nullptr;
 
   // Reserve some space for the tensors to avoid excessive resizing.
   tensors_.reserve(kTensorsReservedCapacity);
diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h
index 2a7c3a7c32..8fda701415 100644
--- a/tensorflow/lite/core/subgraph.h
+++ b/tensorflow/lite/core/subgraph.h
@@ -242,7 +242,10 @@ class Subgraph {
   // WARNING: This is an experimental API and subject to change.
   TfLiteStatus ResetVariableTensors();
 
-  void SetProfiler(profiling::Profiler* profiler) { profiler_ = profiler; }
+  void SetProfiler(profiling::Profiler* profiler) {
+    profiler_ = profiler;
+    context_->profiler = profiler;
+  }
 
   profiling::Profiler* GetProfiler() { return profiler_; }
 
diff --git a/tensorflow/lite/delegates/flex/BUILD b/tensorflow/lite/delegates/flex/BUILD
index 75083bf95a..bca8e514fe 100644
--- a/tensorflow/lite/delegates/flex/BUILD
+++ b/tensorflow/lite/delegates/flex/BUILD
@@ -157,6 +157,7 @@ cc_library(
         "//tensorflow/lite:kernel_api",
         "//tensorflow/lite:string",
         "//tensorflow/lite/kernels:kernel_util",
+        "//tensorflow/lite/profiling:profiler",
         "//tensorflow/core/common_runtime/eager:context",
         "//tensorflow/core/common_runtime/eager:execute",
         "//tensorflow/core/common_runtime/eager:tensor_handle",
diff --git a/tensorflow/lite/delegates/flex/kernel.cc b/tensorflow/lite/delegates/flex/kernel.cc
index 02da1d1a22..4e66921146 100644
--- a/tensorflow/lite/delegates/flex/kernel.cc
+++ b/tensorflow/lite/delegates/flex/kernel.cc
@@ -27,6 +27,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/flex/delegate_data.h"
 #include "tensorflow/lite/delegates/flex/util.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/profiling/profiler.h"
 #include "tensorflow/lite/string.h"
 
 // Note: this is part of TF Lite's Flex delegation code which is to be
@@ -131,6 +132,8 @@ tensorflow::Status ExecuteFlexOp(tensorflow::EagerContext* eager_context,
 struct OpNode {
   // The name of the TensorFlow op to execute.
   string name;
+  // Index of this node into TF Lite's operator list.
+  int index;
   // The corresponding NodeDef, containing the attributes for the op.
   tensorflow::NodeDef nodedef;
   // List of inputs, as TF Lite tensor indices.
@@ -181,6 +184,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
     op_data->nodes.push_back(OpNode());
     OpNode& node_data = op_data->nodes.back();
 
+    node_data.index = node_index;
     node_data.name = "";
     if (node->custom_initial_data) {
       // The flexbuffer contains a vector where the first elements is the
@@ -270,6 +274,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   // Execute the TensorFlow Ops sequentially.
   for (const auto& node_data : op_data->nodes) {
+    SCOPED_TAGGED_OPERATOR_PROFILE(
+        reinterpret_cast<profiling::Profiler*>(context->profiler),
+        node_data.name.c_str(), node_data.index);
     if (node_data.nodedef.op().empty()) {
       context->ReportError(context, "Invalid NodeDef in Flex op '%s'",
                            node_data.name.c_str());
diff --git a/tensorflow/lite/profiling/profiler.h b/tensorflow/lite/profiling/profiler.h
index 89c05cba37..dd45518b5b 100644
--- a/tensorflow/lite/profiling/profiler.h
+++ b/tensorflow/lite/profiling/profiler.h
@@ -153,9 +153,11 @@ class ScopedOperatorProfile {
 
 #define VARNAME_UNIQ(name, ctr) name##ctr
 
-#define SCOPED_OPERATOR_PROFILE(profiler, node_index)    \
-  tflite::profiling::ScopedOperatorProfile VARNAME_UNIQ( \
-      _profile_, __COUNTER__)((profiler), "OpInvoke", (node_index))
+#define SCOPED_TAGGED_OPERATOR_PROFILE(profiler, tag, node_index) \
+  tflite::profiling::ScopedOperatorProfile VARNAME_UNIQ(          \
+      _profile_, __COUNTER__)((profiler), (tag), (node_index))
+#define SCOPED_OPERATOR_PROFILE(profiler, node_index) \
+  SCOPED_TAGGED_OPERATOR_PROFILE((profiler), "OpInvoke", (node_index))
 #else
 
 namespace tflite {
@@ -172,6 +174,7 @@ class Profiler {
 }  // namespace profiling
 }  // namespace tflite
 
+#define SCOPED_TAGGED_OPERATOR_PROFILE(profiler, tag, node_index)
 #define SCOPED_OPERATOR_PROFILE(profiler, node_index)
 
 #endif  // TFLITE_PROFILING_ENABLED
-- 
GitLab


From 282329c55647546f6a8be2781cfbb5f26140a402 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 14:11:19 -0800
Subject: [PATCH 697/873] Automated rollback of commit
 33cbbf4f68a68a99dd822c6a4999f0f254dbe1e2

PiperOrigin-RevId: 225887383
---
 tensorflow/python/ops/nn_impl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 48dcab4842..841bac8bea 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -398,7 +398,7 @@ def _count_nonzero(input_tensor, dtype=dtypes.int64):
   Returns:
       number of nonzero values with type dtype
   """
-  with ops.name_scope("count_nonzero", [input_tensor]):
+  with ops.name_scope("count_nonzero", values=[input_tensor]):
     zero = array_ops.zeros([], dtype=input_tensor.dtype)
     nonzero_count = math_ops.reduce_sum(
         math_ops.cast(
-- 
GitLab


From 36304bc4ceb6140e470420b65ce470092fc47ab2 Mon Sep 17 00:00:00 2001
From: Scott Zhu <scottzhu@google.com>
Date: Mon, 17 Dec 2018 14:25:40 -0800
Subject: [PATCH 698/873] Add dtype check for RNN cells' inputs.

For Basic RNN, GRU and LSTM, those cells use tanh and sigmoid activation
function, which only support floats and complex as input types.

See #23878 for more context.

PiperOrigin-RevId: 225890009
---
 tensorflow/python/kernel_tests/rnn_test.py | 20 ++++++++++
 tensorflow/python/ops/rnn_cell_impl.py     | 46 ++++++++++++++++++++--
 2 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py
index a49496e4ef..12b69da6c2 100644
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
@@ -172,6 +172,26 @@ class RNNTest(test.TestCase):
           dtype=dtypes.float32,
           sequence_length=[[4]])
 
+  @test_util.run_in_graph_and_eager_modes
+  def testInvalidDtype(self):
+    if context.executing_eagerly():
+      inputs = np.zeros((3, 4, 5), dtype=np.int32)
+    else:
+      inputs = array_ops.placeholder(dtypes.int32, shape=(3, 4, 5))
+
+    cells = [
+        rnn_cell_impl.BasicRNNCell,
+        rnn_cell_impl.GRUCell,
+        rnn_cell_impl.BasicLSTMCell,
+        rnn_cell_impl.LSTMCell,
+    ]
+    for cell_cls in cells:
+      with self.cached_session():
+        with self.assertRaisesRegexp(
+            ValueError, "RNN cell only supports floating"):
+          cell = cell_cls(2, dtype=dtypes.int32)
+          rnn.dynamic_rnn(cell, inputs, dtype=dtypes.int32)
+
   @test_util.run_in_graph_and_eager_modes
   def testBatchSizeFromInput(self):
     cell = Plus1RNNCell()
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index ffc45619a7..d808a0c56e 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -405,6 +405,7 @@ class BasicRNNCell(LayerRNNCell):
                **kwargs):
     super(BasicRNNCell, self).__init__(
         _reuse=reuse, name=name, dtype=dtype, **kwargs)
+    _check_supported_dtypes(self.dtype)
     if context.executing_eagerly() and context.num_gpus() > 0:
       logging.warn("%s: Note that this cell is not optimized for performance. "
                    "Please use tf.contrib.cudnn_rnn.CudnnRNNTanh for better "
@@ -432,6 +433,7 @@ class BasicRNNCell(LayerRNNCell):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                        % str(inputs_shape))
+    _check_supported_dtypes(self.dtype)
 
     input_depth = inputs_shape[-1]
     self._kernel = self.add_variable(
@@ -446,7 +448,7 @@ class BasicRNNCell(LayerRNNCell):
 
   def call(self, inputs, state):
     """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
-
+    _check_rnn_cell_input_dtypes([inputs, state])
     gate_inputs = math_ops.matmul(
         array_ops.concat([inputs, state], 1), self._kernel)
     gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
@@ -502,6 +504,7 @@ class GRUCell(LayerRNNCell):
                **kwargs):
     super(GRUCell, self).__init__(
         _reuse=reuse, name=name, dtype=dtype, **kwargs)
+    _check_supported_dtypes(self.dtype)
 
     if context.executing_eagerly() and context.num_gpus() > 0:
       logging.warn("%s: Note that this cell is not optimized for performance. "
@@ -531,7 +534,7 @@ class GRUCell(LayerRNNCell):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                        % str(inputs_shape))
-
+    _check_supported_dtypes(self.dtype)
     input_depth = inputs_shape[-1]
     self._gate_kernel = self.add_variable(
         "gates/%s" % _WEIGHTS_VARIABLE_NAME,
@@ -560,6 +563,7 @@ class GRUCell(LayerRNNCell):
 
   def call(self, inputs, state):
     """Gated recurrent unit (GRU) with nunits cells."""
+    _check_rnn_cell_input_dtypes([inputs, state])
 
     gate_inputs = math_ops.matmul(
         array_ops.concat([inputs, state], 1), self._gate_kernel)
@@ -675,6 +679,7 @@ class BasicLSTMCell(LayerRNNCell):
     """
     super(BasicLSTMCell, self).__init__(
         _reuse=reuse, name=name, dtype=dtype, **kwargs)
+    _check_supported_dtypes(self.dtype)
     if not state_is_tuple:
       logging.warn("%s: Using a concatenated state is slower and will soon be "
                    "deprecated.  Use state_is_tuple=True.", self)
@@ -708,7 +713,7 @@ class BasicLSTMCell(LayerRNNCell):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                        % str(inputs_shape))
-
+    _check_supported_dtypes(self.dtype)
     input_depth = inputs_shape[-1]
     h_depth = self._num_units
     self._kernel = self.add_variable(
@@ -736,6 +741,8 @@ class BasicLSTMCell(LayerRNNCell):
         `LSTMStateTuple` or a concatenated state, depending on
         `state_is_tuple`).
     """
+    _check_rnn_cell_input_dtypes([inputs, state])
+
     sigmoid = math_ops.sigmoid
     one = constant_op.constant(1, dtype=dtypes.int32)
     # Parameters of gates are concatenated into one multiply for efficiency.
@@ -858,6 +865,7 @@ class LSTMCell(LayerRNNCell):
     """
     super(LSTMCell, self).__init__(
         _reuse=reuse, name=name, dtype=dtype, **kwargs)
+    _check_supported_dtypes(self.dtype)
     if not state_is_tuple:
       logging.warn("%s: Using a concatenated state is slower and will soon be "
                    "deprecated.  Use state_is_tuple=True.", self)
@@ -913,7 +921,7 @@ class LSTMCell(LayerRNNCell):
     if inputs_shape[-1] is None:
       raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                        % str(inputs_shape))
-
+    _check_supported_dtypes(self.dtype)
     input_depth = inputs_shape[-1]
     h_depth = self._num_units if self._num_proj is None else self._num_proj
     maybe_partitioner = (
@@ -979,6 +987,8 @@ class LSTMCell(LayerRNNCell):
       ValueError: If input size cannot be inferred from inputs via
         static shape inference.
     """
+    _check_rnn_cell_input_dtypes([inputs, state])
+
     num_proj = self._num_units if self._num_proj is None else self._num_proj
     sigmoid = math_ops.sigmoid
 
@@ -1519,3 +1529,31 @@ class MultiRNNCell(RNNCell):
                   array_ops.concat(new_states, 1))
 
     return cur_inp, new_states
+
+
+def _check_rnn_cell_input_dtypes(inputs):
+  """Check whether the input tensors are with supported dtypes.
+
+  Default RNN cells only support floats and complex as its dtypes since the
+  activation function (tanh and sigmoid) only allow those types. This function
+  will throw a proper error message if the inputs is not in a supported type.
+
+  Args:
+    inputs: tensor or nested structure of tensors that are feed to RNN cell as
+      input or state.
+
+  Raises:
+    ValueError: if any of the input tensor are not having dtypes of float or
+      complex.
+  """
+  for t in nest.flatten(inputs):
+    _check_supported_dtypes(t.dtype)
+
+
+def _check_supported_dtypes(dtype):
+  if dtype is None:
+    return
+  dtype = dtypes.as_dtype(dtype)
+  if not (dtype.is_floating or dtype.is_complex):
+    raise ValueError("RNN cell only supports floating point inputs, "
+                     "but saw dtype: %s" % dtype)
-- 
GitLab


From 9bbaf410b220c4340ee0fe2b07bedf42516c1ec5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 15:13:01 -0800
Subject: [PATCH 699/873] Typo fix in StoreDatasetInVariantTensor.

PiperOrigin-RevId: 225898557
---
 tensorflow/core/framework/dataset.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/dataset.cc b/tensorflow/core/framework/dataset.cc
index 6e21433271..5fed06ed6e 100644
--- a/tensorflow/core/framework/dataset.cc
+++ b/tensorflow/core/framework/dataset.cc
@@ -349,7 +349,7 @@ Status GetDatasetFromVariantTensor(const Tensor& tensor,
 }
 
 Status StoreDatasetInVariantTensor(DatasetBase* dataset, Tensor* tensor) {
-  if (!(tensor->dtype() == DT_VARIANT ||
+  if (!(tensor->dtype() == DT_VARIANT &&
         TensorShapeUtils::IsScalar(tensor->shape()))) {
     return errors::InvalidArgument(
         "Dataset tensor must be a scalar of dtype DT_VARIANT.");
-- 
GitLab


From 09a61c8fcd189557073fe3e38a3ee547158ef05c Mon Sep 17 00:00:00 2001
From: Davide Libenzi <dlibenzi@google.com>
Date: Mon, 17 Dec 2018 15:17:14 -0800
Subject: [PATCH 700/873] Fix missed set_device_ordinal() as it has been
 removed from the XRT alloc proto.

PiperOrigin-RevId: 225899141
---
 tensorflow/compiler/xla/python/local_computation_builder.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc
index 20e9a14722..657a09f92a 100644
--- a/tensorflow/compiler/xla/python/local_computation_builder.cc
+++ b/tensorflow/compiler/xla/python/local_computation_builder.cc
@@ -260,7 +260,6 @@ XrtAllocation::~XrtAllocation() {
 StatusOr<XrtAllocation*> XrtAllocation::FromLiteral(
     const Literal& argument, const string& session_target) {
   xrt::XLAAllocation alloc;
-  alloc.set_device_ordinal(0);
   *alloc.mutable_value() = argument.ToProto();
 
   tensorflow::Scope root = tensorflow::Scope::NewRootScope();
-- 
GitLab


From c66d9edb560f53fb4e8c43ce27d2490e611c9480 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 15:20:30 -0800
Subject: [PATCH 701/873] Internal change.

PiperOrigin-RevId: 225899682
---
 tensorflow/tools/ci_build/update_version.py | 2 +-
 tensorflow/tools/pip_package/setup.py       | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py
index 4373d464b6..1a14829fae 100755
--- a/tensorflow/tools/ci_build/update_version.py
+++ b/tensorflow/tools/ci_build/update_version.py
@@ -304,7 +304,7 @@ def main():
     new_version = Version(old_version.major,
                           str(nightly_minor_ver),
                           old_version.patch,
-                          "-dev" + time.strftime("%Y%m%d"),
+                          "a-dev" + time.strftime("%Y%m%d"),  # TODO(annarev): remove 'a'
                           NIGHTLY_VERSION)
   else:
     new_version = Version.parse_from_string(args.version, REGULAR_VERSION)
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 57ebbb2253..3927540cc7 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -45,8 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-# TODO(annarev): switch this back to 1.12.0.
-_VERSION = '1.12.0a'
+_VERSION = '1.12.0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
-- 
GitLab


From 1f9f001f98c51afa61f46ffe7b8281b8a38f37a1 Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Mon, 17 Dec 2018 15:33:28 -0800
Subject: [PATCH 702/873] Use ModeKeys constants in
 Model._make_execution_function.

PiperOrigin-RevId: 225901633
---
 tensorflow/python/keras/engine/training.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 320e76162e..bc85cc6fd1 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -48,6 +48,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import optimizer as tf_optimizer_module
 from tensorflow.python.training.checkpointable import base as checkpointable
+from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
@@ -2019,13 +2020,13 @@ class Model(Network):
             **kwargs)
 
   def _make_execution_function(self, mode):
-    if mode == 'train':
+    if mode == ModeKeys.TRAIN:
       self._make_fit_function()
       return self._fit_function
-    if mode == 'test':
+    if mode == ModeKeys.TEST:
       self._make_eval_function()
       return self._eval_function
-    if mode == 'predict':
+    if mode == ModeKeys.PREDICT:
       self._make_predict_function()
       return self.predict_function
 
-- 
GitLab


From 84bed97c3031738d6b7b14603a00377d1f10589e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 15:39:28 -0800
Subject: [PATCH 703/873] Set use_resource to None instead of False if the
 primary of a slot is not considered a variable.

PiperOrigin-RevId: 225902728
---
 tensorflow/python/training/slot_creator.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py
index ecf5a96ed4..bc1137e200 100644
--- a/tensorflow/python/training/slot_creator.py
+++ b/tensorflow/python/training/slot_creator.py
@@ -58,10 +58,19 @@ def _create_slot_var(primary, val, scope, validate_shape, shape, dtype):
   # When init from val instead of callable initializer, the shape is expected to
   # be None, not <unknown> or any fully defined shape.
   shape = shape if callable(val) else None
+  if resource_variable_ops.is_resource_variable(primary):
+    use_resource = True
+  elif isinstance(primary, variables.RefVariable):
+    use_resource = False
+  else:
+    use_resource = None
   slot = variable_scope.get_variable(
-      scope, initializer=val, trainable=False,
-      use_resource=resource_variable_ops.is_resource_variable(primary),
-      shape=shape, dtype=dtype,
+      scope,
+      initializer=val,
+      trainable=False,
+      use_resource=use_resource,
+      shape=shape,
+      dtype=dtype,
       validate_shape=validate_shape)
   variable_scope.get_variable_scope().set_partitioner(current_partitioner)
 
-- 
GitLab


From 48a8b3882a0dc2ef9c0d20c4169a2ffb21eb54c8 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Mon, 17 Dec 2018 15:51:31 -0800
Subject: [PATCH 704/873] [TF:XLA] Bump open source abseil revision to
 111ca7060a6ff50115ca85b59f6b5d8c8c5e9105

PiperOrigin-RevId: 225904571
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index aefab03b6d..6378d9ea95 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -125,11 +125,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "com_google_absl",
         build_file = clean_dep("//third_party:com_google_absl.BUILD"),
-        sha256 = "ab499df1dc1ee5f9bf95f327adc22a7bd327ae5e7c023309cddccd0763ba1043",
-        strip_prefix = "abseil-cpp-389ec3f906f018661a5308458d623d01f96d7b23",
+        sha256 = "583e5801372a0bb12eb561858532e3bb9a3528f15f65cfc87b2c0f4c1ab1a0ca",
+        strip_prefix = "abseil-cpp-111ca7060a6ff50115ca85b59f6b5d8c8c5e9105",
         urls = [
-            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/389ec3f906f018661a5308458d623d01f96d7b23.tar.gz",
-            "https://github.com/abseil/abseil-cpp/archive/389ec3f906f018661a5308458d623d01f96d7b23.tar.gz",
+            "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/111ca7060a6ff50115ca85b59f6b5d8c8c5e9105.tar.gz",
+            "https://github.com/abseil/abseil-cpp/archive/111ca7060a6ff50115ca85b59f6b5d8c8c5e9105.tar.gz",
         ],
     )
 
-- 
GitLab


From 43fb1b497d2488e1460a50a3bffce9f3ead6df09 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 15:55:13 -0800
Subject: [PATCH 705/873] Build file changes.

PiperOrigin-RevId: 225905061
---
 tensorflow/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 10c83e8e4b..93d45206cb 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -376,6 +376,7 @@ package_group(
         "-//third_party/tensorflow/python/estimator",
         "//learning/deepmind/...",
         "//learning/meta_rank/...",
+        "//learning/pathways/...",  # While dataset C++ api requires internals
         "//tensorflow/...",
         "//tensorflow_estimator/contrib/...",
         "//tensorflow_fold/llgtm/...",
-- 
GitLab


From 146c2da5d13c2265ca2e15ff7a0f6818a827beec Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 15:59:15 -0800
Subject: [PATCH 706/873] Adds Apple+Swift Bazel dependencies to TensorFlow
 workspace.

PiperOrigin-RevId: 225905634
---
 WORKSPACE | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/WORKSPACE b/WORKSPACE
index 7057d3f149..2277e83a3f 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -1,6 +1,6 @@
 workspace(name = "org_tensorflow")
 
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file")
 
 http_archive(
     name = "io_bazel_rules_closure",
@@ -40,6 +40,35 @@ load("//third_party/toolchains/preconfig/generate:workspace.bzl",
 
 remote_config_workspace()
 
+# Apple and Swift rules.
+http_archive(
+    name = "build_bazel_rules_apple",
+    sha256 = "4fe4ee824200b48821730f89ff260984332dc3551db587c24691235d1d96a8a7",
+    strip_prefix = "rules_apple-0.10.0",
+    urls = ["https://github.com/bazelbuild/rules_apple/archive/0.10.0.tar.gz"],
+)
+http_archive(
+    name = "build_bazel_rules_swift",
+    sha256 = "6544ff5615febec0342de1127144d2f3e43ea80fb7f9b1ade65e6a184e39e618",
+    strip_prefix = "rules_swift-0.5.0",
+    urls = ["https://github.com/bazelbuild/rules_swift/archive/0.5.0.tar.gz"],
+)
+http_archive(
+    name = "bazel_skylib",
+    sha256 = "eb5c57e4c12e68c0c20bc774bfbc60a568e800d025557bc4ea022c6479acc867",
+    strip_prefix = "bazel-skylib-0.6.0",
+    urls = ["https://github.com/bazelbuild/bazel-skylib/archive/0.6.0.tar.gz"],
+)
+http_file(
+    name = "xctestrunner",
+    executable = 1,
+    urls = ["https://github.com/google/xctestrunner/releases/download/0.2.5/ios_test_runner.par"],
+)
+load("@build_bazel_rules_apple//apple:repositories.bzl", "apple_rules_dependencies")
+apple_rules_dependencies(ignore_version_differences = True)
+load("@build_bazel_rules_swift//swift:repositories.bzl", "swift_rules_dependencies")
+swift_rules_dependencies()
+
 # We must check the bazel version before trying to parse any other BUILD
 # files, in case the parsing of those build files depends on the bazel
 # version we require here.
-- 
GitLab


From bf46fa4c5225e853e602b0e18951884b08729163 Mon Sep 17 00:00:00 2001
From: kaixih <kaixih@nvidia.com>
Date: Mon, 22 Oct 2018 14:09:42 -0700
Subject: [PATCH 707/873] Support the new extended rnn ops of CUDNN, which can
 accept sequence_lengths as a parameter. (Still contain many debugging codes;
 to be removed after correctness check)

Use async memcpy when copying sequence lengths data from GPU to CPU (which is required when creating cudnnRNNDataDescriptor_t

Remove the debugging prints

Reorder the parameters for better back compatibility

Use sequence_lengths as the new parameter

New unittest for the extend cudnn rnn ops added

Unittest for the extended cuDNN-RNN (supporting variable sequences) ops added; remove some debugging statements in its grad ops

Add user-inputs checks (int array required for sequence_lengths); Changed new ops name to CudnnRNNVarSeqLen

Add pbtxt files for VarSeqLen RNN APIs

Rename functions/classes/variables to replace Ex/ex to VarSeqLen/var_seq_len

Fix formatting

Enable the bazel test for the new var_len_seq ops

Minor changes from self.built to self._built with a initializer

Google Feedback: remove the manual cudaMemcpy() in the descriptor class

Google Feedback: remove the new rnn descriptor class and augment the existing one

Google Feedback: renaming the VarSeqLen to V3

Google Feedback: removing unnecessary spaces

Google feedback: redesign the cudnn_rnn_ops_test.py to include the v3 ops

Remove previous cudnn_rnn_ops_var_seq_len_test.py

Test Macro branches (unfinished)

Change the version number back to 7.2.1 for new cudnn rnn ops

add a dedicated ExtractForwardInput to process sequence lengths; seq_length -> max_seq_length; raise error if cudnn_version < 7.2.1 and is_var_seq_lengths()==true

add 'host_reserved' to v3; change v3 to subclass CudnnRNNForwardOp/CudnnRNNBackwardOp

Used 'absl::Span' instead of passing an int pointer

Changed 'seq_length' to 'max_seq_length' and 'seq_lens' to 'seq_lengths'

Changed to use only one omnipotent constructor to create 'CudnnRnnSequenceTensorDescriptor'

Separated 'CudnnSupport::createRnnSequenceTensorDescriptor()' for the extra 'seq_lengths' param. Other corresponding wrappers are changed as well.

Removed branches to check the sequence_lengths param; Added some comments on autotuning for V3

Changed the order of sequence_lengths for _cudnn_rnn()

Made the 'ExtractForwardInput()' share codes with the original function

Changed the order of 'sequence_lengths' for all classes/ops; Changed '_var_seq_len' to 'v3'

Updated test cases of new cudnn RNN API using 'ExpandNamedTestCases'

Changed to use unified 'CreateForwardAndBackwardIODescriptors' and 'DoForward' and 'DoBackward'

Changed the order of 'sequence_lengths' in '_forward()'; Changed all 'equal to' to 'equal'

Made Span argument to const reference; Removed seq_lengths_ field in 'CudnnRnnSequenceTensorDescriptor'

Changed handle() to data_handle()

Some sentences changed and V3 shape test added

Changed orders of 'sequence_lengths'; Changed the way to get pointer from Tensor

Changed the ops register

Changed to use seq_length for v1 and v2

Fixed some formating issues of python scripts and syntax issue in cudnn_rnn_ops_test.cc

Added macro check over the RNNDataDescriptorDeleter

Changed the init order to be in the order of declaration

Turn RNNDataDescriptor to be void pointer when cuDNN version < 7.2.1

Avoid the RNNDataDescriptor when cudnn < 7.2.1

Delete the unreached return

Another place needs to avoid the RNNDataDescriptor

Avoid data_handle() function when cudnn < 7.2.1
---
 .../python/kernel_tests/cudnn_rnn_ops_test.py | 177 +++++++--
 .../cudnn_rnn/python/layers/cudnn_rnn.py      |  12 +-
 .../cudnn_rnn/python/ops/cudnn_rnn_ops.py     |  80 +++-
 .../base_api/api_def_CudnnRNNBackpropV3.pbtxt |  49 +++
 .../api_def/base_api/api_def_CudnnRNNV3.pbtxt |  39 ++
 tensorflow/core/kernels/cudnn_rnn_ops.cc      | 260 ++++++++++---
 tensorflow/core/ops/cudnn_rnn_ops.cc          |  83 +++++
 tensorflow/core/ops/cudnn_rnn_ops_test.cc     |  35 ++
 tensorflow/python/ops/cudnn_rnn_grad.py       |  30 ++
 tensorflow/stream_executor/cuda/cuda_dnn.cc   | 347 ++++++++++++++----
 tensorflow/stream_executor/cuda/cuda_dnn.h    |   8 +-
 tensorflow/stream_executor/dnn.h              |  17 +-
 .../stream_executor/stream_executor_pimpl.cc  |  22 +-
 .../stream_executor/stream_executor_pimpl.h   |  11 +-
 14 files changed, 984 insertions(+), 186 deletions(-)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_CudnnRNNBackpropV3.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt

diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
index a268415f0e..3bda5aabdb 100644
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
@@ -68,6 +68,7 @@ def RunLSTM(sess,
             batch_size,
             time,
             num_layers=1,
+            variable_seq_lengths=False,
             is_training=True,
             dropout=0.,
             num_dirs=True,
@@ -99,6 +100,13 @@ def RunLSTM(sess,
                                  num_units).astype(dtype.as_numpy_dtype),
       dtype=dtype)
 
+  if variable_seq_lengths:
+    lengths_v = np.random.randint(low=1, high=time+1, size=batch_size)
+    lengths_v[0] = time # make sure the max sequence has 'time' elems
+    lengths = ops.convert_to_tensor(lengths_v.astype(np.int32))
+  else:
+    lengths = None
+
   initializer = init_ops.random_uniform_initializer(
       -0.01, 0.01, dtype=dtype, seed=19980904)
 
@@ -115,6 +123,7 @@ def RunLSTM(sess,
     outputs_op, state_tuple_op = rnn.dynamic_rnn(
         cell,
         inputs,
+        sequence_length=lengths,
         initial_state=rnn_cell_impl.LSTMStateTuple(
             h=initial_h_op, c=initial_c_op),
         dtype=dtype,
@@ -133,6 +142,7 @@ def RunLSTM(sess,
       cu_initial_h_op,
       cu_initial_c_op,
       opaque_params,
+      sequence_lengths=lengths,
       dropout=dropout,
       is_training=is_training,
       rnn_mode=cudnn_rnn_ops.CUDNN_LSTM)
@@ -325,12 +335,14 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
                             time,
                             num_layers,
                             dtype,
-                            rtol=2e-6,
-                            atol=2e-6):
+                            variable_seq_lengths,
+                            rtol=3e-6,
+                            atol=3e-6):
     with self.session(use_gpu=True) as sess:
       (outputs, cu_outputs, state_tuple, cu_state_tuple, inp_grad, cu_inp_grad,
        state_grad, cu_state_grad, wgrad, bgrad, cu_wgrad, cu_bgrad) = RunLSTM(
-           sess, num_units, input_size, batch_size, time, num_layers)
+           sess, num_units, input_size, batch_size, time, num_layers,
+           variable_seq_lengths=variable_seq_lengths)
 
       self.assertAllClose(outputs, cu_outputs, rtol=rtol, atol=atol)
       for s, cu_s in zip(state_tuple, cu_state_tuple):
@@ -341,20 +353,34 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
       self.assertAllClose(bgrad, cu_bgrad, rtol=rtol, atol=atol)
       self.assertAllClose(wgrad, cu_wgrad, rtol=rtol, atol=atol)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
-  def test_training(self, num_units, input_size, batch_size, time, num_layers):
+  def test_training(self, num_units, input_size, batch_size, time, num_layers,
+                    variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     self._test_training_helper(num_units, input_size, batch_size, time,
-                               num_layers, dtypes.float32)
+                               num_layers, dtypes.float32,
+                               variable_seq_lengths=variable_seq_lengths)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   def test_training_fp16(self, num_units, input_size, batch_size, time,
-                         num_layers):
+                         num_layers, variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     self._test_training_helper(
@@ -365,12 +391,20 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
         num_layers,
         dtypes.float16,
         rtol=5e-3,
-        atol=5e-4)
+        atol=5e-4,
+        variable_seq_lengths=variable_seq_lengths)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
-  def test_inference(self, num_units, input_size, batch_size, time, num_layers):
+  def test_inference(self, num_units, input_size, batch_size, time, num_layers,
+                     variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     with self.session(use_gpu=True) as sess:
@@ -381,7 +415,8 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
           batch_size,
           time,
           num_layers,
-          is_training=False)
+          is_training=False,
+          variable_seq_lengths=variable_seq_lengths)
 
       self.assertAllClose(outputs, cu_outputs)
       # h
@@ -389,11 +424,17 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
       # c
       self.assertAllClose(state_tuple.c, cu_state_tuple.c)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   def test_inference_fp16(self, num_units, input_size, batch_size, time,
-                          num_layers):
+                          num_layers, variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     with self.session(use_gpu=True) as sess:
@@ -405,7 +446,8 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
           time,
           num_layers,
           is_training=False,
-          dtype=dtypes.float16)
+          dtype=dtypes.float16,
+          variable_seq_lengths=variable_seq_lengths)
 
       rtol, atol = 5e-3, 5e-4
       self.assertAllClose(outputs, cu_outputs, rtol=rtol, atol=atol)
@@ -416,11 +458,17 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
       self.assertAllClose(
           state_tuple.c, cu_state_tuple.c, rtol=rtol, atol=atol)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   def test_inference_with_dropout(self, num_units, input_size, batch_size, time,
-                                  num_layers):
+                                  num_layers, variable_seq_lengths):
     """Validates that dropout does not affect Cudnn Rnn inference."""
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
@@ -436,7 +484,8 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
             time,
             num_layers,
             is_training=False,
-            dropout=0.)
+            dropout=0.,
+            variable_seq_lengths=variable_seq_lengths)
 
     with ops.Graph().as_default() as g:
       with self.session(use_gpu=True, graph=g) as sess:
@@ -448,7 +497,8 @@ class CudnnLSTMTest(TensorFlowTestCase, parameterized.TestCase):
             time,
             num_layers,
             is_training=False,
-            dropout=1.)
+            dropout=1.,
+            variable_seq_lengths=variable_seq_lengths)
 
     self.assertAllClose(cu_outputs, cu_outputs2)
     # h
@@ -464,6 +514,7 @@ def RunGRU(sess,
            time,
            num_layers=1,
            is_training=True,
+           variable_seq_lengths=False,
            dropout=0.,
            num_dirs=True,
            dtype=dtypes.float32):
@@ -489,6 +540,13 @@ def RunGRU(sess,
                                  num_units).astype(dtype.as_numpy_dtype),
       dtype=dtype)
 
+  if variable_seq_lengths:
+    lengths_v = np.random.randint(low=1, high=time+1, size=batch_size)
+    lengths_v[0] = time # make sure the max sequence has 'time' elems
+    lengths = ops.convert_to_tensor(lengths_v.astype(np.int32))
+  else:
+    lengths = None
+
   initializer = init_ops.random_uniform_initializer(
       -0.01, 0.01, dtype=dtype, seed=19980904)
   with variable_scope.variable_scope("test", initializer=initializer):
@@ -521,6 +579,7 @@ def RunGRU(sess,
     outputs_op, h_op = rnn.dynamic_rnn(
         cell,
         inputs,
+        sequence_length=lengths,
         initial_state=initial_h_op,
         dtype=dtype,
         time_major=True,
@@ -533,12 +592,14 @@ def RunGRU(sess,
       num_layers, num_units, input_size)
   opaque_params = format_converter.tf_canonical_to_opaque(ws + bs)
 
+
   cu_initial_h_op = array_ops.expand_dims(initial_h_op, axis=0)
   cu_outputs_op, cu_h_op, _ = cudnn_rnn_ops._cudnn_rnn(
       inputs,
       cu_initial_h_op,
       array_ops.zeros_like(cu_initial_h_op),  # not used
       opaque_params,
+      sequence_lengths=lengths,
       dropout=dropout,
       is_training=is_training,
       rnn_mode=cudnn_rnn_ops.CUDNN_GRU)
@@ -615,12 +676,14 @@ class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):
                             time,
                             num_layers,
                             dtype,
-                            rtol=2e-6,
-                            atol=2e-6):
+                            variable_seq_lengths,
+                            rtol=3e-6,
+                            atol=3e-6):
     with self.session(use_gpu=True) as sess:
       (outputs, cu_outputs, h, cu_h, inp_grad, cu_inp_grad, hgrad,
        cu_hgrad, wgrad, bgrad, cu_wgrad, cu_bgrad) = RunGRU(
-           sess, num_units, input_size, batch_size, time, num_layers)
+           sess, num_units, input_size, batch_size, time, num_layers,
+           variable_seq_lengths=variable_seq_lengths)
 
       self.assertAllClose(outputs, cu_outputs, rtol=rtol, atol=atol)
       self.assertAllClose(h, cu_h, rtol=rtol, atol=atol)
@@ -631,20 +694,34 @@ class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):
       for wg, cu_wg in zip(wgrad, cu_wgrad):
         self.assertAllClose(wg, cu_wg, rtol=rtol, atol=atol)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
-  def test_training(self, num_units, input_size, batch_size, time, num_layers):
+  def test_training(self, num_units, input_size, batch_size, time, num_layers,
+                    variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     self._test_training_helper(num_units, input_size, batch_size, time,
-                               num_layers, dtypes.float32)
+                               num_layers, dtypes.float32,
+                               variable_seq_lengths=variable_seq_lengths)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   def test_training_fp16(self, num_units, input_size, batch_size, time,
-                         num_layers):
+                         num_layers, variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     self._test_training_helper(
@@ -655,12 +732,20 @@ class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):
         num_layers,
         dtypes.float16,
         rtol=5e-3,
-        atol=5e-4)
+        atol=5e-4,
+        variable_seq_lengths=variable_seq_lengths)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
-  def test_inference(self, num_units, input_size, batch_size, time, num_layers):
+  def test_inference(self, num_units, input_size, batch_size, time, num_layers,
+                     variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     with self.session(use_gpu=True) as sess:
@@ -671,15 +756,22 @@ class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):
           batch_size,
           time,
           num_layers,
-          is_training=False)
+          is_training=False,
+          variable_seq_lengths=variable_seq_lengths)
       self.assertAllClose(outputs, cu_outputs)
       self.assertAllClose(h, cu_h)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   def test_inference_fp16(self, num_units, input_size, batch_size, time,
-                          num_layers):
+                          num_layers, variable_seq_lengths):
     if not context.context().num_gpus():
       self.skipTest("No GPUs found")
     with self.session(use_gpu=True) as sess:
@@ -691,17 +783,24 @@ class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):
           time,
           num_layers,
           is_training=False,
-          dtype=dtypes.float16)
+          dtype=dtypes.float16,
+          variable_seq_lengths=variable_seq_lengths)
 
       rtol, atol = 5e-3, 5e-4
       self.assertAllClose(outputs, cu_outputs, rtol=rtol, atol=atol)
       self.assertAllClose(h, cu_h, rtol=rtol, atol=atol)
 
-  @parameterized.named_parameters(*NAMED_RNN_TESTCASES)
+  @parameterized.named_parameters(
+      ExpandNamedTestCases(
+          NAMED_RNN_TESTCASES, **{
+              "variable_seq_lengths": [
+                  True, False
+              ],
+          }))
   @unittest.skipUnless(test.is_built_with_cuda(),
                        "Test only applicable when running on GPUs")
   def test_inference_with_dropout(self, num_units, input_size, batch_size, time,
-                                  num_layers):
+                                  num_layers, variable_seq_lengths):
     """Validates that dropout does not affect Cudnn Rnn inference."""
     # Hand-picked dropouts are used below (0. and 1.)
     if not context.context().num_gpus():
@@ -717,7 +816,8 @@ class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):
             time,
             num_layers,
             is_training=False,
-            dropout=0.)
+            dropout=0.,
+            variable_seq_lengths=variable_seq_lengths)
 
     with ops.Graph().as_default() as g:
       with self.session(use_gpu=True, graph=g) as sess:
@@ -729,7 +829,8 @@ class CudnnGRUTest(TensorFlowTestCase, parameterized.TestCase):
             time,
             num_layers,
             is_training=False,
-            dropout=1.)
+            dropout=1.,
+            variable_seq_lengths=variable_seq_lengths)
 
     self.assertAllClose(cu_outputs, cu_outputs2)
     self.assertAllClose(cu_h[0], cu_h2[0])
diff --git a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
index 8e25637ed9..7d3d55ce9a 100644
--- a/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
+++ b/tensorflow/contrib/cudnn_rnn/python/layers/cudnn_rnn.py
@@ -374,7 +374,8 @@ class _CudnnRNN(base_layer.Layer):
         "This cell does not yet support object-based saving. File a feature "
         "request if this limitation bothers you.")
 
-  def call(self, inputs, initial_state=None, training=True):
+  def call(self, inputs, initial_state=None, sequence_lengths=None,
+           training=True):
     """Runs the forward step for the RNN model.
 
     Args:
@@ -382,6 +383,9 @@ class _CudnnRNN(base_layer.Layer):
       initial_state: a tuple of tensor(s) of shape
         `[num_layers * num_dirs, batch_size, num_units]`. If not provided, use
         zero initial states. The tuple size is 2 for LSTM and 1 for other RNNs.
+      sequence_lengths: an int32 array representing the variable sequence
+        lengths in a batch. The size of the array has to equal the
+        batch_size. If not provided, the same sequence length will be assumed.
       training: whether this operation will be used in training or inference.
     Returns:
       output: a tensor of shape `[time_len, batch_size, num_dirs * num_units]`.
@@ -411,7 +415,7 @@ class _CudnnRNN(base_layer.Layer):
       # For model that doesn't take input_c, replace with a dummy tensor.
       c = array_ops.constant([], dtype=dtype)
     outputs, (output_h, output_c) = self._forward(inputs, h, c, self.kernel,
-                                                  training)
+                                                  sequence_lengths, training)
     if self._rnn_mode == CUDNN_LSTM:
       return outputs, (output_h, output_c)
     else:
@@ -475,7 +479,8 @@ class _CudnnRNN(base_layer.Layer):
           dropout=self._dropout,
           direction=self._direction)
 
-  def _forward(self, inputs, h, c, opaque_params, training):
+  def _forward(self, inputs, h, c, opaque_params, sequence_lengths,
+               training):
     output, output_h, output_c = cudnn_rnn_ops._cudnn_rnn(  # pylint:disable=protected-access
         inputs,
         h,
@@ -483,6 +488,7 @@ class _CudnnRNN(base_layer.Layer):
         opaque_params,
         training,
         self._rnn_mode,
+        sequence_lengths=sequence_lengths,
         input_mode=self._input_mode,
         direction=self._direction,
         dropout=self._dropout,
diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
index 1ce29b42d5..030636af5a 100644
--- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
+++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py
@@ -955,6 +955,7 @@ def _cudnn_rnn(inputs,
                params,
                is_training,
                rnn_mode,
+               sequence_lengths=None,
                input_mode=CUDNN_INPUT_LINEAR_MODE,
                direction=CUDNN_RNN_UNIDIRECTION,
                dropout=0.,
@@ -972,6 +973,10 @@ def _cudnn_rnn(inputs,
     params: the parameter buffer created for this model.
     is_training: whether this operation will be used in training or inference
     rnn_mode: one of ('lstm', 'gru', 'rnn_relu', 'rnn_tanh').
+    sequence_lengths: an int32 array representing the variable sequence lengths
+      in a batch. The size of the array has to equal the batch_size. Default to
+      None, in which case sequences in the batch are assumed to have the same
+      length, which is inferred from inputs.
     input_mode: indicate whether there is a linear projection between the
       input and the actual computation before the first layer. It could be
       'linear_input', 'skip_input' or 'auto_select'.
@@ -1010,7 +1015,11 @@ def _cudnn_rnn(inputs,
       "seed2": seed2,
       "name": name
   }
-  if use_cudnn_v2 != "1":
+  if sequence_lengths is not None:
+    args["sequence_lengths"] = sequence_lengths
+    outputs, output_h, output_c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(
+        **args)
+  elif use_cudnn_v2 != "1":
     outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn(**args)
   else:
     outputs, output_h, output_c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv2(**args)
@@ -1022,6 +1031,7 @@ def cudnn_lstm(inputs,
                input_c,
                params,
                is_training,
+               sequence_lengths=None,
                input_mode=CUDNN_INPUT_LINEAR_MODE,
                direction=CUDNN_RNN_UNIDIRECTION,
                dropout=0.,
@@ -1051,12 +1061,17 @@ def cudnn_lstm(inputs,
     dropout: whether to enable dropout. With it is 0, dropout is disabled.
     seed: the op seed used for initializing dropout. See `tf.set_random_seed`
         for behavior.
+    sequence_lengths: an int32 array representing the variable sequence lengths
+      in a batch. The size of the array has to equal the batch_size. Default to
+      None, in which case sequences in the batch are assumed to have the same
+      length, which is inferred from inputs.
     name: name of the operation.
   Returns:
     outputs, output_h, output_c
   """
   return _cudnn_rnn(inputs, input_h, input_c, params, is_training, CUDNN_LSTM,
-                    input_mode, direction, dropout, seed, name)
+                    sequence_lengths, input_mode, direction, dropout, seed,
+                    name)
 
 
 def _cudnn_rnn_no_input_c(inputs,
@@ -1064,6 +1079,7 @@ def _cudnn_rnn_no_input_c(inputs,
                           params,
                           is_training,
                           rnn_mode,
+                          sequence_lengths=None,
                           input_mode=CUDNN_INPUT_LINEAR_MODE,
                           direction=CUDNN_RNN_UNIDIRECTION,
                           dropout=0.,
@@ -1079,6 +1095,10 @@ def _cudnn_rnn_no_input_c(inputs,
     params: the parameter buffer created for this model.
     is_training: whether this operation will be used in training or inference
     rnn_mode: one of ('lstm', 'gru', 'rnn_relu', 'rnn_tanh').
+    sequence_lengths: an int32 array representing the variable sequence lengths
+      in a batch. The size of the array has to equal the batch_size. Default to
+      None, in which case sequences in the batch are assumed to have the same
+      length, which is inferred from inputs.
     input_mode: indicate whether there is a linear projection between the
       input and the actual computation before the first layer. It could be
       'linear_input', 'skip_input' or 'auto_select'.
@@ -1098,8 +1118,9 @@ def _cudnn_rnn_no_input_c(inputs,
   """
   input_c = array_ops.constant([], dtype=input_h.dtype)
   outputs, output_h, _ = _cudnn_rnn(inputs, input_h, input_c, params,
-                                    is_training, rnn_mode, input_mode,
-                                    direction, dropout, seed, name)
+                                    is_training, rnn_mode, sequence_lengths,
+                                    input_mode, direction, dropout, seed,
+                                    name)
   return outputs, output_h
 
 
@@ -1107,6 +1128,7 @@ def cudnn_gru(inputs,
               input_h,
               params,
               is_training,
+              sequence_lengths=None,
               input_mode=CUDNN_INPUT_LINEAR_MODE,
               direction=CUDNN_RNN_UNIDIRECTION,
               dropout=0.,
@@ -1129,6 +1151,10 @@ def cudnn_gru(inputs,
         'skip_input' is only allowed when input_size == num_units;
         'auto_select' implies 'skip_input' when input_size == num_units;
         otherwise, it implies 'linear_input'.
+    sequence_lengths: an int32 array representing the variable sequence lengths
+      in a batch. The size of the array has to equal the batch_size. Default to
+      None, in which case sequences in the batch are assumed to have the same
+      length, which is inferred from inputs.
     direction: the direction model that the model operates. Could be either
         'unidirectional' or 'bidirectional'
     dropout: whether to enable dropout. With it is 0, dropout is disabled.
@@ -1139,7 +1165,8 @@ def cudnn_gru(inputs,
     outputs, output_h
   """
   return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training, CUDNN_GRU,
-                               input_mode, direction, dropout, seed, name)
+                               sequence_lengths, input_mode, direction, dropout,
+                               seed, name)
 
 
 def cudnn_rnn_relu(inputs,
@@ -1150,6 +1177,7 @@ def cudnn_rnn_relu(inputs,
                    direction=CUDNN_RNN_UNIDIRECTION,
                    dropout=0.,
                    seed=0,
+                   sequence_lengths=None,
                    name=None):
   """Cudnn RNN Relu.
 
@@ -1173,19 +1201,23 @@ def cudnn_rnn_relu(inputs,
     dropout: whether to enable dropout. With it is 0, dropout is disabled.
     seed: the op seed used for initializing dropout. See `tf.set_random_seed`
         for behavior.
+    sequence_lengths: an int32 array representing the variable sequence lengths
+      in a batch. The size of the array has to equal the batch_size. If not
+      provided, the same sequence length will be assumed.
     name: name of the operation.
   Returns:
     outputs, output_h
   """
   return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training,
-                               CUDNN_RNN_RELU, input_mode, direction, dropout,
-                               seed, name)
+                               CUDNN_RNN_RELU, sequence_lengths, input_mode,
+                               direction, dropout, seed, name)
 
 
 def cudnn_rnn_tanh(inputs,
                    input_h,
                    params,
                    is_training,
+                   sequence_lengths=None,
                    input_mode=CUDNN_INPUT_LINEAR_MODE,
                    direction=CUDNN_RNN_UNIDIRECTION,
                    dropout=0.,
@@ -1208,6 +1240,10 @@ def cudnn_rnn_tanh(inputs,
         'skip_input' is only allowed when input_size == num_units;
         'auto_select' implies 'skip_input' when input_size == num_units;
         otherwise, it implies 'linear_input'.
+    sequence_lengths: an int32 array representing the variable sequence lengths
+      in a batch. The size of the array has to equal the batch_size. Default to
+      None, in which case sequences in the batch are assumed to have the same
+      length, which is inferred from inputs.
     direction: the direction model that the model operates. Could be either
         'unidirectional' or 'bidirectional'
     dropout: whether to enable dropout. With it is 0, dropout is disabled.
@@ -1218,8 +1254,8 @@ def cudnn_rnn_tanh(inputs,
     outputs, output_h
   """
   return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training,
-                               CUDNN_RNN_TANH, input_mode, direction, dropout,
-                               seed, name)
+                               CUDNN_RNN_TANH, sequence_lengths, input_mode,
+                               direction, dropout, seed, name)
 
 
 def cudnn_rnn_opaque_params_to_canonical(rnn_mode,
@@ -1497,7 +1533,8 @@ class _CudnnRNN(object):
         input_mode=self._input_mode,
         direction=self._direction)
 
-  def __call__(self, input_data, input_h, input_c, params, is_training=True):
+  def __call__(self, input_data, input_h, input_c, params, is_training=True,
+               sequence_lengths=None):
     """Runs the forward step for the RNN model.
 
     Args:
@@ -1509,6 +1546,10 @@ class _CudnnRNN(object):
         A Tensor of the same shape as input_h.
       params: the parameter buffer created for this model.
       is_training: whether this operation will be used in training or inference.
+      sequence_lengths: an int32 array representing the variable sequence
+        lengths in a batch. The size of the array has to equal the batch_size.
+        Default to None, in which case sequences in the batch are assumed to
+        have the same length, which is inferred from inputs.
     Returns:
       output: the output sequence.
       output_h: the final state for h.
@@ -1521,6 +1562,7 @@ class _CudnnRNN(object):
         params,
         is_training,
         self._rnn_mode,
+        sequence_lengths=sequence_lengths,
         input_mode=self._input_mode,
         direction=self._direction,
         dropout=self._dropout,
@@ -1615,7 +1657,8 @@ class CudnnLSTM(_CudnnRNN):
         dropout=dropout,
         seed=seed)
 
-  def __call__(self, input_data, input_h, input_c, params, is_training=True):
+  def __call__(self, input_data, input_h, input_c, params,
+               sequence_lengths=None, is_training=True):
     """Runs the forward step for the Cudnn LSTM model.
 
     Args:
@@ -1626,6 +1669,10 @@ class CudnnLSTM(_CudnnRNN):
       input_c: the initial hidden state for c. A Tensor of the same shape as
         input_h.
       params: the parameter buffer created for this model.
+      sequence_lengths: an int32 array representing the variable sequence
+        lengths in a batch. The size of the array has to equal the batch_size.
+        Default to None, in which case sequences in the batch are assumed to
+        have the same length, which is inferred from inputs.
       is_training: whether this operation will be used in training or inference.
     Returns:
       output: the output sequence.
@@ -1633,7 +1680,8 @@ class CudnnLSTM(_CudnnRNN):
       output_c: the final state for c.
     """
     output, output_h, output_c = super(CudnnLSTM, self).__call__(
-        input_data, input_h, input_c, params, is_training=is_training)
+        input_data, input_h, input_c, params, sequence_lengths=sequence_lengths,
+        is_training=is_training)
     return (output, output_h, output_c)
 
 
@@ -1687,7 +1735,8 @@ class _CudnnRNNNoInputC(_CudnnRNN):
         dropout=dropout,
         seed=seed)
 
-  def __call__(self, input_data, input_h, params, is_training=True):
+  def __call__(self, input_data, input_h, params, sequence_lengths=None,
+               is_training=True):
     """Runs the forward step for the Cudnn LSTM model.
 
     Args:
@@ -1696,6 +1745,10 @@ class _CudnnRNNNoInputC(_CudnnRNN):
       input_h: the initial hidden state for h. A Tensor of shape [num_layers,
         batch_size, num_units].
       params: the parameter buffer created for this model.
+      sequence_lengths: an int32 array representing the variable sequence
+        lengths in a batch. The size of the array has to equal the batch_size.
+        Default to None, in which case sequences in the batch are assumed to
+        have the same length, which is inferred from inputs.
       is_training: whether this operation will be used in training or inference.
     Returns:
       output: the output sequence.
@@ -1707,6 +1760,7 @@ class _CudnnRNNNoInputC(_CudnnRNN):
         params,
         is_training,
         self._rnn_mode,
+        sequence_lengths=sequence_lengths,
         input_mode=self._input_mode,
         direction=self._direction,
         dropout=self._dropout,
diff --git a/tensorflow/core/api_def/base_api/api_def_CudnnRNNBackpropV3.pbtxt b/tensorflow/core/api_def/base_api/api_def_CudnnRNNBackpropV3.pbtxt
new file mode 100644
index 0000000000..7967ca7c5d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_CudnnRNNBackpropV3.pbtxt
@@ -0,0 +1,49 @@
+op {
+  graph_op_name: "CudnnRNNBackpropV3"
+  visibility: HIDDEN
+  summary: "Backprop step of CudnnRNNV3."
+  description: <<END
+Compute the backprop of both data and weights in a RNN. Takes an extra
+    "sequence_lengths" input than CudnnRNNBackprop.
+
+rnn_mode: Indicates the type of the RNN model.
+input_mode: Indicates whether there is a linear projection between the input and
+    the actual computation before the first layer. 'skip_input' is only allowed
+    when input_size == num_units; 'auto_select' implies 'skip_input' when
+    input_size == num_units; otherwise, it implies 'linear_input'.
+direction: Indicates whether a bidirectional model will be used. Should be
+  "unidirectional" or "bidirectional".
+dropout: Dropout probability. When set to 0., dropout is disabled.
+seed: The 1st part of a seed to initialize dropout.
+seed2: The 2nd part of a seed to initialize dropout.
+input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+    num_units].
+input_c: For LSTM, a 3-D tensor with the shape of
+    [num_layer * dir, batch, num_units]. For other models, it is ignored.
+params: A 1-D tensor that contains the weights and biases in an opaque layout.
+    The size must be created through CudnnRNNParamsSize, and initialized
+    separately. Note that they might not be compatible across different
+    generations. So it is a good idea to save and restore
+sequence_lengths: a vector of lengths of each input sequence.
+output: A 3-D tensor with the shape of [seq_length, batch_size,
+    dir * num_units].
+output_h: The same shape has input_h.
+output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+output_backprop: A 3-D tensor with the same shape as output in the forward pass.
+output_h_backprop: A 3-D tensor with the same shape as output_h in the forward
+    pass.
+output_c_backprop: A 3-D tensor with the same shape as output_c in the forward
+    pass.
+reserve_space: The same reserve_space produced in the forward operation.
+input_backprop: The backprop to input in the forward pass. Has the same shape
+    as input.
+input_h_backprop: The backprop to input_h in the forward pass. Has the same
+    shape as input_h.
+input_c_backprop: The backprop to input_c in the forward pass. Has the same
+    shape as input_c.
+params_backprop: The backprop to the params buffer in the forward pass. Has the
+    same shape as params.
+END
+}
+
diff --git a/tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt b/tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt
new file mode 100644
index 0000000000..9cde53684d
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_CudnnRNNV3.pbtxt
@@ -0,0 +1,39 @@
+op {
+  graph_op_name: "CudnnRNNV3"
+  visibility: HIDDEN
+  summary: "A RNN backed by cuDNN."
+  description: <<END
+Computes the RNN from the input and initial states, with respect to the params
+buffer. Accepts one extra input "sequence_lengths" than CudnnRNN.
+
+rnn_mode: Indicates the type of the RNN model.
+input_mode: Indicates whether there is a linear projection between the input and
+  the actual computation before the first layer. 'skip_input' is only allowed
+  when input_size == num_units; 'auto_select' implies 'skip_input' when
+  input_size == num_units; otherwise, it implies 'linear_input'.
+direction: Indicates whether a bidirectional model will be used. Should be
+  "unidirectional" or "bidirectional".
+dropout: Dropout probability. When set to 0., dropout is disabled.
+seed: The 1st part of a seed to initialize dropout.
+seed2: The 2nd part of a seed to initialize dropout.
+input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+    num_units].
+input_c: For LSTM, a 3-D tensor with the shape of
+    [num_layer * dir, batch, num_units]. For other models, it is ignored.
+params: A 1-D tensor that contains the weights and biases in an opaque layout.
+    The size must be created through CudnnRNNParamsSize, and initialized
+    separately. Note that they might not be compatible across different
+    generations. So it is a good idea to save and restore
+sequence_lengths: a vector of lengths of each input sequence.
+output: A 3-D tensor with the shape of [seq_length, batch_size,
+    dir * num_units].
+output_h: The same shape has input_h.
+output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+is_training: Indicates whether this operation is used for inferenece or
+  training.
+reserve_space: An opaque tensor that can be used in backprop calculation. It
+  is only produced if is_training is true.
+END
+}
+
diff --git a/tensorflow/core/kernels/cudnn_rnn_ops.cc b/tensorflow/core/kernels/cudnn_rnn_ops.cc
index fbd702ef14..2968161890 100644
--- a/tensorflow/core/kernels/cudnn_rnn_ops.cc
+++ b/tensorflow/core/kernels/cudnn_rnn_ops.cc
@@ -105,6 +105,12 @@ class CudnnRNNForwardOpV2;
 template <typename Device, typename T>
 class CudnnRNNBackwardOpV2;
 
+template <typename Device, typename T>
+class CudnnRNNForwardOpV3;
+
+template <typename Device, typename T>
+class CudnnRNNBackwardOpV3;
+
 enum class TFRNNInputMode {
   kRNNLinearInput = 0,
   kRNNSkipInput = 1,
@@ -142,13 +148,13 @@ uint64 HashList(const std::vector<int>& list) {
 class CudnnRnnParameters {
  public:
   CudnnRnnParameters(int num_layers, int input_size, int num_units,
-                     int seq_length, int batch_size, int dir_count,
+                     int max_seq_length, int batch_size, int dir_count,
                      bool has_dropout, bool is_training, RnnMode rnn_mode,
                      TFRNNInputMode rnn_input_mode, DataType dtype)
       : num_layers_(num_layers),
         input_size_(input_size),
         num_units_(num_units),
-        seq_length_(seq_length),
+        seq_length_(max_seq_length),
         batch_size_(batch_size),
         dir_count_(dir_count),
         has_dropout_(has_dropout),
@@ -157,7 +163,7 @@ class CudnnRnnParameters {
         rnn_input_mode_(rnn_input_mode),
         dtype_(dtype) {
     hash_code_ = HashList(
-        {num_layers, input_size, num_units, seq_length, batch_size, dir_count,
+        {num_layers, input_size, num_units, max_seq_length, batch_size, dir_count,
          static_cast<int>(has_dropout), static_cast<int>(is_training),
          static_cast<int>(rnn_mode), static_cast<int>(rnn_input_mode), dtype});
   }
@@ -493,7 +499,7 @@ struct CudnnRnnModelShapes {
   int input_size;
   int num_units;
   int dir_count;
-  int seq_length;
+  int max_seq_length;
   int batch_size;
   TensorShape input_shape;
   TensorShape output_shape;
@@ -505,9 +511,9 @@ struct CudnnRnnModelShapes {
   }
   string DebugString() const {
     return strings::Printf(
-        "[num_layers, input_size, num_units, dir_count, seq_length, "
+        "[num_layers, input_size, num_units, dir_count, max_seq_length, "
         "batch_size]: [%d, %d, %d, %d, %d, %d] ",
-        num_layers, input_size, num_units, dir_count, seq_length, batch_size);
+        num_layers, input_size, num_units, dir_count, max_seq_length, batch_size);
   }
 };
 
@@ -565,7 +571,7 @@ Status ExtractForwardInput(OpKernelContext* context,
   if ((*input)->dims() != 3) {
     return errors::InvalidArgument("RNN input must be a 3-D vector.");
   }
-  model_shapes->seq_length = (*input)->dim_size(0);
+  model_shapes->max_seq_length = (*input)->dim_size(0);
   model_shapes->batch_size = (*input)->dim_size(1);
   model_shapes->input_size = (*input)->dim_size(2);
   model_shapes->input_shape = (*input)->shape();
@@ -597,17 +603,31 @@ Status ExtractForwardInput(OpKernelContext* context,
     }
   }
   model_shapes->output_shape =
-      TensorShape({model_shapes->seq_length, model_shapes->batch_size,
+      TensorShape({model_shapes->max_seq_length, model_shapes->batch_size,
                    model_shapes->dir_count * model_shapes->num_units});
   return Status::OK();
 }
 
+// Extract and checks the sequence_lengths, forward input tensors,
+// parameters, and shapes from the OpKernelContext.
+Status ExtractForwardInput(OpKernelContext* context,
+                           const CudnnModelTypes& model_types,
+                           const Tensor** input, const Tensor** input_h,
+                           const Tensor** input_c, const Tensor** params,
+                           CudnnRnnModelShapes* model_shapes,
+                           const Tensor** sequence_lengths) {
+  TF_RETURN_IF_ERROR(context->input("sequence_lengths", sequence_lengths));
+  return ExtractForwardInput(context, model_types, input, input_h, input_c,
+                              params, model_shapes);
+}
+
 template <typename T>
 Status CreateForwardAndBackwardIODescriptors(
     OpKernelContext* context, const CudnnRnnModelShapes& model_shapes,
     std::unique_ptr<RnnSequenceTensorDescriptor>* input_desc,
     std::unique_ptr<RnnStateTensorDescriptor>* state_desc,
-    std::unique_ptr<RnnSequenceTensorDescriptor>* output_desc) {
+    std::unique_ptr<RnnSequenceTensorDescriptor>* output_desc,
+    const absl::Span<const int>& seq_lengths) {
   StreamExecutor* executor = context->op_device_context()->stream()->parent();
   se::dnn::DataType data_type = ToDataType<T>::value;
 
@@ -616,11 +636,19 @@ Status CreateForwardAndBackwardIODescriptors(
   const TensorShape& output_shape = model_shapes.output_shape;
 
   DCHECK_EQ(input_shape.dims(), 3);
-  auto input_desc_s = executor->createRnnSequenceTensorDescriptor(
-      input_shape.dim_size(0), input_shape.dim_size(1), input_shape.dim_size(2),
-      data_type);
-  TF_RETURN_IF_ERROR(input_desc_s.status());
-  *input_desc = input_desc_s.ConsumeValueOrDie();
+  if (seq_lengths.data() != nullptr) {
+    auto input_desc_s = executor->createRnnSequenceTensorDescriptor(
+        input_shape.dim_size(0), input_shape.dim_size(1), input_shape.dim_size(2),
+        seq_lengths, data_type);
+    TF_RETURN_IF_ERROR(input_desc_s.status());
+    *input_desc = input_desc_s.ConsumeValueOrDie();
+  } else {
+    auto input_desc_s = executor->createRnnSequenceTensorDescriptor(
+        input_shape.dim_size(0), input_shape.dim_size(1), input_shape.dim_size(2),
+        data_type);
+    TF_RETURN_IF_ERROR(input_desc_s.status());
+    *input_desc = input_desc_s.ConsumeValueOrDie();
+  }
 
   DCHECK_EQ(hidden_state_shape.dims(), 3);
   auto hidden_state_desc_s = executor->createRnnStateTensorDescriptor(
@@ -630,11 +658,20 @@ Status CreateForwardAndBackwardIODescriptors(
   *state_desc = hidden_state_desc_s.ConsumeValueOrDie();
 
   DCHECK_EQ(output_shape.dims(), 3);
-  auto output_desc_s = executor->createRnnSequenceTensorDescriptor(
-      output_shape.dim_size(0), output_shape.dim_size(1),
-      output_shape.dim_size(2), data_type);
-  TF_RETURN_IF_ERROR(output_desc_s.status());
-  *output_desc = output_desc_s.ConsumeValueOrDie();
+  if (seq_lengths.data() != nullptr) {
+    auto output_desc_s = executor->createRnnSequenceTensorDescriptor(
+        output_shape.dim_size(0), output_shape.dim_size(1),
+        output_shape.dim_size(2), seq_lengths, data_type);
+    TF_RETURN_IF_ERROR(output_desc_s.status());
+    *output_desc = output_desc_s.ConsumeValueOrDie();
+  } else {
+    auto output_desc_s = executor->createRnnSequenceTensorDescriptor(
+        output_shape.dim_size(0), output_shape.dim_size(1),
+        output_shape.dim_size(2), data_type);
+    TF_RETURN_IF_ERROR(output_desc_s.status());
+    *output_desc = output_desc_s.ConsumeValueOrDie();
+  }
+
   return Status::OK();
 }
 
@@ -648,6 +685,7 @@ Status DoForward(OpKernelContext* context, const RnnDescriptor& rnn_desc,
                  const bool is_training,
                  /* forward outputs, outputs of the function */
                  Tensor* output, Tensor* output_h, Tensor* output_c,
+                 const Tensor* sequence_lengths,
                  ScratchAllocator* reserve_space_allocator,
                  ScratchAllocator* workspace_allocator,
                  ProfileResult* output_profile_result) {
@@ -655,8 +693,15 @@ Status DoForward(OpKernelContext* context, const RnnDescriptor& rnn_desc,
   std::unique_ptr<RnnStateTensorDescriptor> state_desc;
   std::unique_ptr<RnnSequenceTensorDescriptor> output_desc;
 
+  absl::Span<const int> seq_lengths;
+  if (sequence_lengths != nullptr) {
+    seq_lengths = absl::Span<const int>(
+        sequence_lengths->template flat<int>().data(),
+        model_shapes.batch_size);
+  }
   TF_RETURN_IF_ERROR(CreateForwardAndBackwardIODescriptors<T>(
-      context, model_shapes, &input_desc, &state_desc, &output_desc));
+        context, model_shapes, &input_desc, &state_desc, &output_desc,
+        seq_lengths));
 
   auto input_data = AsDeviceMemory<T>(input);
   auto input_h_data = AsDeviceMemory<T>(input_h);
@@ -664,6 +709,7 @@ Status DoForward(OpKernelContext* context, const RnnDescriptor& rnn_desc,
   if (model_types.HasInputC()) {
     input_c_data = AsDeviceMemory<T>(input_c);
   }
+
   auto params_data = AsDeviceMemory<T>(params);
   auto output_data = AsDeviceMemory<T>(output);
   auto output_h_data = AsDeviceMemory<T>(output_h);
@@ -703,14 +749,22 @@ Status DoBackward(
     const Tensor* output_c_backprop, const Tensor* reserve_space,
     /* backprop outputs, output of the function */
     Tensor* input_backprop, Tensor* input_h_backprop, Tensor* input_c_backprop,
-    Tensor* params_backprop, ScratchAllocator* workspace_allocator,
+    Tensor* params_backprop, const Tensor* sequence_lengths,
+    ScratchAllocator* workspace_allocator,
     ProfileResult* output_profile_result) {
   std::unique_ptr<RnnSequenceTensorDescriptor> input_desc;
   std::unique_ptr<RnnStateTensorDescriptor> state_desc;
   std::unique_ptr<RnnSequenceTensorDescriptor> output_desc;
 
+  absl::Span<const int> seq_lengths;
+  if (sequence_lengths != nullptr) {
+    seq_lengths = absl::Span<const int>(
+        sequence_lengths->template flat<int>().data(),
+        model_shapes.batch_size);
+  }
   TF_RETURN_IF_ERROR(CreateForwardAndBackwardIODescriptors<T>(
-      context, model_shapes, &input_desc, &state_desc, &output_desc));
+        context, model_shapes, &input_desc, &state_desc, &output_desc,
+        seq_lengths));
 
   auto input_data = AsDeviceMemory<T>(input);
   auto input_h_data = AsDeviceMemory<T>(input_h);
@@ -1162,22 +1216,32 @@ class CudnnRNNForwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
 
   void Compute(OpKernelContext* context) override {
     AlgorithmConfig algo_config;
-    ComputeAndReturnAlgorithm(context, &algo_config);
+    ComputeAndReturnAlgorithm(context, &algo_config, false);
   }
 
  protected:
   virtual void ComputeAndReturnAlgorithm(OpKernelContext* context,
-                                         AlgorithmConfig* output_algo_config) {
+                                         AlgorithmConfig* output_algo_config,
+                                         bool var_seq_lengths) {
     CHECK_NE(output_algo_config, nullptr);
 
     const Tensor* input = nullptr;
     const Tensor* input_h = nullptr;
     const Tensor* input_c = nullptr;
     const Tensor* params = nullptr;
+    const Tensor* sequence_lengths = nullptr;
     CudnnRnnModelShapes model_shapes;
-    OP_REQUIRES_OK(context,
-                   ExtractForwardInput(context, model_types(), &input, &input_h,
-                                       &input_c, &params, &model_shapes));
+    if (var_seq_lengths) {
+      OP_REQUIRES_OK(
+          context,
+          ExtractForwardInput(context, model_types(), &input, &input_h, &input_c,
+                              &params, &model_shapes, &sequence_lengths));
+    } else {
+      OP_REQUIRES_OK(
+          context,
+          ExtractForwardInput(context, model_types(), &input, &input_h, &input_c,
+                              &params, &model_shapes));
+    }
     RnnInputMode input_mode;
     OP_REQUIRES_OK(context,
                    ToRNNInputMode(rnn_input_mode(), model_shapes.num_units,
@@ -1215,11 +1279,19 @@ class CudnnRNNForwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
           context, GetCachedRnnDescriptor<T>(context, model_shapes, input_mode,
                                              *output_algo_config,
                                              &rnn_state_cache_, &rnn_desc_ptr));
-      launch_status = DoForward<T>(
-          context, *rnn_desc_ptr, model_types(), model_shapes, input, input_h,
-          input_c, params, is_training_, output, output_h, output_c,
-          &reserve_space_allocator, &workspace_allocator,
-          /*output_profile_result=*/nullptr);
+      if (var_seq_lengths) {
+        launch_status = DoForward<T>(
+            context, *rnn_desc_ptr, model_types(), model_shapes, input, input_h,
+            input_c, params, is_training_, output, output_h, output_c,
+            sequence_lengths, &reserve_space_allocator, &workspace_allocator,
+            /*output_profile_result=*/nullptr);
+      } else {
+        launch_status = DoForward<T>(
+            context, *rnn_desc_ptr, model_types(), model_shapes, input, input_h,
+            input_c, params, is_training_, output, output_h, output_c,
+            nullptr, &reserve_space_allocator, &workspace_allocator,
+            /*output_profile_result=*/nullptr);
+      }
     }
     OP_REQUIRES_OK(context, launch_status);
   }
@@ -1301,7 +1373,7 @@ class CudnnRNNForwardOpV2<GPUDevice, T>
   void Compute(OpKernelContext* context) override {
     AlgorithmConfig best_algo_config;
     CudnnRNNForwardOp<GPUDevice, T>::ComputeAndReturnAlgorithm(
-        context, &best_algo_config);
+        context, &best_algo_config, false);
     if (!context->status().ok()) {
       return;
     }
@@ -1354,7 +1426,7 @@ class CudnnRNNForwardOpV2<GPUDevice, T>
     const auto& modeltypes = model_types();
     CudnnRnnParameters rnn_params(
         model_shapes.num_layers, model_shapes.input_size,
-        model_shapes.num_units, model_shapes.seq_length,
+        model_shapes.num_units, model_shapes.max_seq_length,
         model_shapes.batch_size, model_shapes.dir_count,
         /*has_dropout=*/std::abs(dropout()) > 1e-8, is_training(),
         modeltypes.rnn_mode, modeltypes.rnn_input_mode, input->dtype());
@@ -1422,7 +1494,8 @@ class CudnnRNNForwardOpV2<GPUDevice, T>
       status = DoForward<T>(
           context, *rnn_desc, model_types(), model_shapes, input, input_h,
           input_c, params, is_training(), output, output_h, output_c,
-          &reserve_space_allocator, &workspace_allocator, &fwd_profile_result);
+          nullptr, &reserve_space_allocator, &workspace_allocator,
+          &fwd_profile_result);
       if (!status.ok()) {
         continue;
       }
@@ -1435,7 +1508,8 @@ class CudnnRNNForwardOpV2<GPUDevice, T>
             input_c, params, output, output_h, output_c, &output_backprop,
             &output_h_backprop, &output_c_backprop, &reserve_space,
             &input_backprop, &input_h_backprop, &input_c_backprop,
-            &params_backprop, &workspace_allocator, &bak_profile_result);
+            &params_backprop, nullptr, &workspace_allocator,
+            &bak_profile_result);
         if (!status.ok()) {
           continue;
         }
@@ -1480,6 +1554,51 @@ TF_CALL_float(REGISTER_GPU);
 TF_CALL_double(REGISTER_GPU);
 #undef REGISTER_GPU
 
+template <typename T>
+class CudnnRNNForwardOpV3<GPUDevice, T>
+    : public CudnnRNNForwardOp<GPUDevice, T> {
+ private:
+  using CudnnRNNForwardOp<GPUDevice, T>::is_training;
+  using CudnnRNNKernelCommon::CreateRnnDescriptor;
+  using CudnnRNNKernelCommon::dropout;
+  using CudnnRNNKernelCommon::HasInputC;
+  using CudnnRNNKernelCommon::model_types;
+
+ public:
+  explicit CudnnRNNForwardOpV3(OpKernelConstruction* context)
+      : CudnnRNNForwardOp<GPUDevice, T>(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    AlgorithmConfig best_algo_config;
+    CudnnRNNForwardOp<GPUDevice, T>::ComputeAndReturnAlgorithm(
+        context, &best_algo_config, true);
+    if (!context->status().ok()) {
+      return;
+    }
+
+    Tensor* output_host_reserved = nullptr;
+    // TODO: Current V3 only uses the default standard algorithm to process 
+    // batches with variable sequences and the inputs should be padded. 
+    // Autotune is not supported yet.
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(4, {}, &output_host_reserved));
+  }
+};
+
+#define REGISTER_GPU(T)                                        \
+  REGISTER_KERNEL_BUILDER(Name("CudnnRNNV3")                   \
+                              .Device(DEVICE_GPU)              \
+                              .HostMemory("sequence_lengths")  \
+                              .HostMemory("host_reserved")     \
+                              .TypeConstraint<T>("T"),         \
+                          CudnnRNNForwardOpV3<GPUDevice, T>);
+
+TF_CALL_half(REGISTER_GPU);
+TF_CALL_float(REGISTER_GPU);
+TF_CALL_double(REGISTER_GPU);
+#undef REGISTER_GPU
+
+
 // Run the backward operation of the RNN model.
 template <typename T>
 class CudnnRNNBackwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
@@ -1488,14 +1607,29 @@ class CudnnRNNBackwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
       : CudnnRNNKernelCommon(context) {}
 
   void Compute(OpKernelContext* context) override {
+    ComputeImpl(context, false);
+  }
+
+ protected:
+  virtual void ComputeImpl(OpKernelContext* context, 
+                           bool var_seq_lengths) {
     const Tensor* input = nullptr;
     const Tensor* input_h = nullptr;
     const Tensor* input_c = nullptr;
     const Tensor* params = nullptr;
+    const Tensor* sequence_lengths = nullptr;
     CudnnRnnModelShapes model_shapes;
-    OP_REQUIRES_OK(context,
-                   ExtractForwardInput(context, model_types(), &input, &input_h,
-                                       &input_c, &params, &model_shapes));
+    if (var_seq_lengths) {
+      OP_REQUIRES_OK(
+          context,
+          ExtractForwardInput(context, model_types(), &input, &input_h, &input_c,
+                              &params, &model_shapes, &sequence_lengths));
+    } else {
+      OP_REQUIRES_OK(
+          context,
+          ExtractForwardInput(context, model_types(), &input, &input_h, &input_c,
+                              &params, &model_shapes));
+    }
     RnnInputMode input_mode;
     OP_REQUIRES_OK(context,
                    ToRNNInputMode(rnn_input_mode(), model_shapes.num_units,
@@ -1536,12 +1670,22 @@ class CudnnRNNBackwardOp<GPUDevice, T> : public CudnnRNNKernelCommon {
           context, GetCachedRnnDescriptor<T>(context, model_shapes, input_mode,
                                              algo_config, &rnn_state_cache_,
                                              &rnn_desc_ptr));
-      launch_status = DoBackward<T>(
-          context, *rnn_desc_ptr, model_types(), model_shapes, input, input_h,
-          input_c, params, output, output_h, output_c, output_backprop,
-          output_h_backprop, output_c_backprop, reserve_space, input_backprop,
-          input_h_backprop, input_c_backprop, params_backprop,
-          &workspace_allocator, /*output_profile_result=*/nullptr);
+      if (var_seq_lengths) {
+        launch_status = DoBackward<T>(
+            context, *rnn_desc_ptr, model_types(), model_shapes, input, input_h,
+            input_c, params, output, output_h, output_c, output_backprop,
+            output_h_backprop, output_c_backprop, reserve_space, input_backprop,
+            input_h_backprop, input_c_backprop, params_backprop,
+            sequence_lengths, &workspace_allocator,
+            /*output_profile_result=*/nullptr);
+      } else {
+        launch_status = DoBackward<T>(
+            context, *rnn_desc_ptr, model_types(), model_shapes, input, input_h,
+            input_c, params, output, output_h, output_c, output_backprop,
+            output_h_backprop, output_c_backprop, reserve_space, input_backprop,
+            input_h_backprop, input_c_backprop, params_backprop,
+            nullptr, &workspace_allocator, /*output_profile_result=*/nullptr);
+      }
     }
     OP_REQUIRES_OK(context, launch_status);
   }
@@ -1685,6 +1829,32 @@ TF_CALL_float(REGISTER_GPU);
 TF_CALL_double(REGISTER_GPU);
 #undef REGISTER_GPU
 
+template <typename T>
+class CudnnRNNBackwardOpV3<GPUDevice, T>
+    : public CudnnRNNBackwardOp<GPUDevice, T> {
+ public:
+  explicit CudnnRNNBackwardOpV3(OpKernelConstruction* context)
+      : CudnnRNNBackwardOp<GPUDevice, T>(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    CudnnRNNBackwardOp<GPUDevice, T>::ComputeImpl(context, true);
+  }
+};
+
+#define REGISTER_GPU(T)                                        \
+  REGISTER_KERNEL_BUILDER(Name("CudnnRNNBackpropV3")           \
+                              .Device(DEVICE_GPU)              \
+                              .HostMemory("sequence_lengths")  \
+                              .HostMemory("host_reserved")     \
+                              .TypeConstraint<T>("T"),         \
+                          CudnnRNNBackwardOpV3<GPUDevice, T>);
+
+TF_CALL_half(REGISTER_GPU);
+TF_CALL_float(REGISTER_GPU);
+TF_CALL_double(REGISTER_GPU);
+#undef REGISTER_GPU
+
+
 // TODO(zhengxq): Add the conversion of Cudnn RNN Params from and to
 // its canonical form.
 
diff --git a/tensorflow/core/ops/cudnn_rnn_ops.cc b/tensorflow/core/ops/cudnn_rnn_ops.cc
index f84142c992..1747973879 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops.cc
@@ -147,6 +147,51 @@ REGISTER_OP("CudnnRNNV2")
       return Status::OK();
     });
 
+REGISTER_OP("CudnnRNNV3")
+    .Input("input: T")
+    .Input("input_h: T")
+    .Input("input_c: T")
+    .Input("params: T")
+    .Input("sequence_lengths: int32")
+    .SetIsStateful()
+    .Output("output: T")
+    .Output("output_h: T")
+    .Output("output_c: T")
+    .Output("reserve_space: T")
+    .Output("host_reserved: int8")
+    .Attr("T: {float16, float32, float64}")
+    .Attr(kRNNModeAttrs)
+    .Attr(kRNNInputModeAttrs)
+    .Attr(kRNNDirectionAttrs)
+    .Attr("dropout: float = 0.0")
+    .Attr("seed: int = 0")
+    .Attr("seed2: int = 0")
+    .Attr("is_training: bool = true")
+    .SetShapeFn([](InferenceContext* c) {
+      auto input_shape = c->input(0);
+      auto input_h_shape = c->input(1);
+      auto max_seq_length = c->Dim(input_shape, 0);
+      auto batch_size = c->Dim(input_shape, 1);
+      auto num_units = c->Dim(input_h_shape, 2);
+      string direction;
+      TF_RETURN_IF_ERROR(c->GetAttr("direction", &direction));
+      string rnn_mode;
+      TF_RETURN_IF_ERROR(c->GetAttr("rnn_mode", &rnn_mode));
+      int dir_count = (direction == "bidirectional") ? 2 : 1;
+      DimensionHandle output_size;
+      TF_RETURN_IF_ERROR(c->Multiply(num_units, dir_count, &output_size));
+      auto output_shape = c->MakeShape({max_seq_length, batch_size, output_size});
+      auto output_h_shape = input_h_shape;
+      auto output_c_shape TF_ATTRIBUTE_UNUSED =
+          (rnn_mode == "lstm") ? output_h_shape : c->MakeShape({});
+      c->set_output(0, output_shape);
+      c->set_output(1, output_h_shape);
+      c->set_output(2, output_c_shape);
+      c->set_output(3, c->UnknownShape());
+      c->set_output(4, c->UnknownShape());
+      return Status::OK();
+    });
+
 REGISTER_OP("CudnnRNNBackprop")
     .Input("input: T")
     .Input("input_h: T")
@@ -220,6 +265,44 @@ REGISTER_OP("CudnnRNNBackpropV2")
       return Status::OK();
     });
 
+REGISTER_OP("CudnnRNNBackpropV3")
+    .Input("input: T")
+    .Input("input_h: T")
+    .Input("input_c: T")
+    .Input("params: T")
+    .Input("sequence_lengths: int32")
+    .Input("output: T")
+    .Input("output_h: T")
+    .Input("output_c: T")
+    .Input("output_backprop: T")
+    .Input("output_h_backprop: T")
+    .Input("output_c_backprop: T")
+    .Input("reserve_space: T")
+    .Input("host_reserved: int8")
+    .SetIsStateful()
+    .Output("input_backprop: T")
+    .Output("input_h_backprop: T")
+    .Output("input_c_backprop: T")
+    .Output("params_backprop: T")
+    .Attr("T: {float16, float32, float64}")
+    .Attr(kRNNModeAttrs)
+    .Attr(kRNNInputModeAttrs)
+    .Attr(kRNNDirectionAttrs)
+    .Attr("dropout: float = 0.0")
+    .Attr("seed: int = 0")
+    .Attr("seed2: int = 0")
+    .SetShapeFn([](InferenceContext* c) {
+      auto input_shape = c->input(0);
+      auto input_h_shape = c->input(1);
+      auto input_c_shape = c->input(2);
+      auto params_shape = c->input(3);
+      c->set_output(0, input_shape);
+      c->set_output(1, input_h_shape);
+      c->set_output(2, input_c_shape);
+      c->set_output(3, params_shape);
+      return Status::OK();
+    });
+
 REGISTER_OP("CudnnRNNParamsToCanonical")
     .Input("num_layers: int32")
     .Input("num_units: int32")
diff --git a/tensorflow/core/ops/cudnn_rnn_ops_test.cc b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
index 13c3b933f4..25121c6484 100644
--- a/tensorflow/core/ops/cudnn_rnn_ops_test.cc
+++ b/tensorflow/core/ops/cudnn_rnn_ops_test.cc
@@ -102,4 +102,39 @@ TEST(CudnnRNNOpsTest, ForwardV2Lstm_ShapeFn) {
   INFER_OK(op, input_shapes_desc, output_shapes_desc);
 }
 
+TEST(CudnnRNNOpsTest, ForwardV3Lstm_ShapeFn) {
+  int max_seq_length = 2;
+  int batch_size = 3;
+  int num_units = 4;
+  int num_layers = 5;
+  int dir_count = 1;
+  std::vector<int> input_shape = {max_seq_length, batch_size, num_units};
+  std::vector<int> input_h_shape = {num_layers * dir_count, batch_size,
+                                    num_units};
+  std::vector<int> output_shape = {max_seq_length, batch_size,
+                                   num_units * dir_count};
+  std::vector<int> seq_lengths_shape = {batch_size};
+  auto shape_to_str = [](const std::vector<int>& v) {
+    return strings::StrCat("[", str_util::Join(v, ","), "]");
+  };
+  string input_shapes_desc = strings::StrCat(
+      shape_to_str(input_shape), ";", shape_to_str(input_h_shape), ";",
+      shape_to_str(input_h_shape), ";", "[?]", ";",
+      shape_to_str(seq_lengths_shape));
+  string output_shapes_desc = "[d0_0,d0_1,d1_2];in1;in1;?;?";
+
+  ShapeInferenceTestOp op("CudnnRNNV3");
+  TF_ASSERT_OK(NodeDefBuilder("test", "CudnnRNNV3")
+                   .Input({"input", 0, DT_FLOAT})
+                   .Input({"input_h", 0, DT_FLOAT})
+                   .Input({"input_c", 0, DT_FLOAT})
+                   .Input({"params", 0, DT_FLOAT})
+                   .Input({"sequence_lengths", 0, DT_INT32})
+                   .Attr("rnn_mode", "lstm")
+                   .Attr("input_mode", "auto_select")
+                   .Attr("direction", "unidirectional")
+                   .Finalize(&op.node_def));
+  INFER_OK(op, input_shapes_desc, output_shapes_desc);
+}
+
 }  // end namespace tensorflow
diff --git a/tensorflow/python/ops/cudnn_rnn_grad.py b/tensorflow/python/ops/cudnn_rnn_grad.py
index c618c470f2..844ccfa13a 100644
--- a/tensorflow/python/ops/cudnn_rnn_grad.py
+++ b/tensorflow/python/ops/cudnn_rnn_grad.py
@@ -71,3 +71,33 @@ def _cudnn_rnn_backward_v2(op, *grad):
       rnn_mode=op.get_attr("rnn_mode"),
       input_mode=op.get_attr("input_mode"),
       direction=op.get_attr("direction"))
+
+
+@ops.RegisterGradient("CudnnRNNV3")
+def _cudnn_rnn_backwardv3(op, *grads):
+  """Gradients for the CudnnRNNV3 op."""
+  if not op.get_attr("is_training"):
+    raise ValueError(
+        "To use CudnnRNNV3 in gradients, is_training must be set to"
+        " True.")
+  return gen_cudnn_rnn_ops.cudnn_rnn_backprop_v3(
+      input=op.inputs[0],
+      input_h=op.inputs[1],
+      input_c=op.inputs[2],
+      params=op.inputs[3],
+      sequence_lengths=op.inputs[4],
+      output=op.outputs[0],
+      output_h=op.outputs[1],
+      output_c=op.outputs[2],
+      output_backprop=grads[0],
+      output_h_backprop=grads[1],
+      output_c_backprop=grads[2],
+      reserve_space=op.outputs[3],
+      host_reserved=op.outputs[4],
+      dropout=op.get_attr("dropout"),
+      seed=op.get_attr("seed"),
+      seed2=op.get_attr("seed2"),
+      rnn_mode=op.get_attr("rnn_mode"),
+      input_mode=op.get_attr("input_mode"),
+      direction=op.get_attr("direction"))+(None,)
+
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 1f2e2f48bb..8f2cf34b5c 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -365,6 +365,13 @@ struct TensorDescriptorDeleter {
     CHECK_CUDNN_OK(cudnnDestroyTensorDescriptor(descriptor));
   }
 };
+#if CUDNN_VERSION >= 7201
+struct RNNDataDescriptorDeleter {
+  void operator()(cudnnRNNDataDescriptor_t descriptor) const {
+    CHECK_CUDNN_OK(cudnnDestroyRNNDataDescriptor(descriptor));
+  }
+};
+#endif
 struct FilterDescriptorDeleter {
   void operator()(cudnnFilterDescriptor_t descriptor) const {
     CHECK_CUDNN_OK(cudnnDestroyFilterDescriptor(descriptor));
@@ -410,6 +417,10 @@ struct PersistentRnnPlanDeleter {
 // RAII wrappers for cuDNN types.
 using TensorDescriptor =
     std::unique_ptr<cudnnTensorStruct, TensorDescriptorDeleter>;
+#if CUDNN_VERSION >= 7201
+using RNNDataDescriptor =
+    std::unique_ptr<cudnnRNNDataStruct, RNNDataDescriptorDeleter>;
+#endif
 using FilterDescriptor =
     std::unique_ptr<cudnnFilterStruct, FilterDescriptorDeleter>;
 using ConvolutionDescriptor =
@@ -431,6 +442,13 @@ TensorDescriptor CreateTensorDescriptor() {
   CHECK_CUDNN_OK(cudnnCreateTensorDescriptor(&result));
   return TensorDescriptor(result);
 }
+#if CUDNN_VERSION >= 7201
+RNNDataDescriptor CreateRNNDataDescriptor() {
+  cudnnRNNDataDescriptor_t result;
+  CHECK_CUDNN_OK(cudnnCreateRNNDataDescriptor(&result));
+  return RNNDataDescriptor(result);
+}
+#endif
 FilterDescriptor CreateFilterDescriptor() {
   cudnnFilterDescriptor_t result;
   CHECK_CUDNN_OK(cudnnCreateFilterDescriptor(&result));
@@ -1012,6 +1030,14 @@ class CudnnRnnDescriptor : public dnn::RnnDescriptor {
         /*mode=*/rnn_mode, /*algo=*/rnn_algo,
         /*dataType=*/compute_type));
 
+    // TODO: For now, we only use cudnnRNN**Ex API to process padded inputs. 
+    // But in the future if these APIs are used to process full length arrays, 
+    // we need to distinguish when to set it.
+#if CUDNN_VERSION >= 7201
+    RETURN_IF_CUDNN_ERROR(
+        cudnnSetRNNPaddingMode(rnn_desc.get(), CUDNN_RNN_PADDED_IO_ENABLED));
+#endif
+
     port::StatusOr<PersistentRnnPlan> rnn_plan_wrapper;
     PersistentRnnPlan rnn_plan;
     if (rnn_algo == CUDNN_RNN_ALGO_PERSIST_DYNAMIC) {
@@ -1196,26 +1222,32 @@ port::StatusOr<CudnnRnnParamsDescriptor> CudnnRnnParamsDescriptor::Create(
 
 class CudnnRnnSequenceTensorDescriptor
     : public dnn::RnnSequenceTensorDescriptor {
-  CudnnRnnSequenceTensorDescriptor(CUDAExecutor* parent, int seq_length,
+  CudnnRnnSequenceTensorDescriptor(CUDAExecutor* parent, int max_seq_length,
                                    int batch_size, int data_size,
                                    cudnnDataType_t data_type,
+#if CUDNN_VERSION >= 7201
+                                   RNNDataDescriptor data_handle,
+#endif
                                    TensorDescriptor handle)
       : parent_(parent),
-        seq_length_(seq_length),
+        max_seq_length_(max_seq_length),
         batch_size_(batch_size),
         data_size_(data_size),
         data_type_(data_type),
         handle_(std::move(handle)),
-        handles_(seq_length, handle_.get()) {}
+#if CUDNN_VERSION >= 7201
+        rnn_data_handle_(std::move(data_handle)),
+#endif
+        handles_(max_seq_length, handle_.get()) {}
 
  public:
   CudnnRnnSequenceTensorDescriptor(CudnnRnnSequenceTensorDescriptor&&) =
       default;
 
   static port::StatusOr<CudnnRnnSequenceTensorDescriptor> Create(
-      CUDAExecutor* parent, int seq_length, int batch_size, int data_size,
+      CUDAExecutor* parent, int max_seq_length, int batch_size, int data_size,
       cudnnDataType_t data_type) {
-    CHECK_GT(seq_length, 0);
+    CHECK_GT(max_seq_length, 0);
     int dims[] = {batch_size, data_size, 1};
     int strides[] = {dims[1] * dims[2], dims[2], 1};
     TensorDescriptor tensor_desc = CreateTensorDescriptor();
@@ -1223,26 +1255,78 @@ class CudnnRnnSequenceTensorDescriptor
         /*tensorDesc=*/tensor_desc.get(), /*dataType=*/data_type,
         /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims,
         /*strideA=*/strides));
-    return CudnnRnnSequenceTensorDescriptor(parent, seq_length, batch_size,
+    return CudnnRnnSequenceTensorDescriptor(parent, max_seq_length, batch_size,
                                             data_size, data_type,
+#if CUDNN_VERSION >= 7201
+                                            nullptr,
+#endif
                                             std::move(tensor_desc));
   }
 
+  static port::StatusOr<CudnnRnnSequenceTensorDescriptor> Create(
+      CUDAExecutor* parent, int max_seq_length, int batch_size, int data_size,
+      const absl::Span<const int>& seq_lengths,
+      cudnnDataType_t data_type) {
+    CHECK_GT(max_seq_length, 0);
+    int dims[] = {batch_size, data_size, 1};
+    int strides[] = {dims[1] * dims[2], dims[2], 1};
+    TensorDescriptor tensor_desc = CreateTensorDescriptor();
+    RETURN_IF_CUDNN_ERROR(cudnnSetTensorNdDescriptor(
+        /*tensorDesc=*/tensor_desc.get(), /*dataType=*/data_type,
+        /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims,
+        /*strideA=*/strides));
+    const int* seq_lengths_array = seq_lengths.data();
+#if CUDNN_VERSION >= 7201
+    RNNDataDescriptor data_desc = CreateRNNDataDescriptor();
+    float padding_fill = 0.0f;
+    RETURN_IF_CUDNN_ERROR(cudnnSetRNNDataDescriptor(
+        /*RNNDataDesc=*/data_desc.get(), /*dataType*/ data_type,
+        /*layout=*/CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED,
+        /*maxSeqLength=*/max_seq_length,
+        /*batchSize=*/batch_size, /*vectorSize=*/data_size,
+        /*seqLengthArray=*/seq_lengths_array,
+        /*paddingFill*/ (void*)&padding_fill));
+    return CudnnRnnSequenceTensorDescriptor(parent, max_seq_length, batch_size,
+                                            data_size, data_type,
+                                            std::move(data_desc),
+                                            std::move(tensor_desc));
+#else
+    return port::Status(port::error::INVALID_ARGUMENT,
+                        "No supported cudnnSetRNNDataDescriptor when "
+                        "CUDNN_VERSION < 7.2.1");
+#endif
+  }
+
   const cudnnTensorDescriptor_t* handles() const {
     return handles_.data();
   }
+#if CUDNN_VERSION >= 7201
+  const cudnnRNNDataDescriptor_t data_handle() const {
+    return rnn_data_handle_.get(); 
+  }
+#endif
 
-  int seq_length() const { return seq_length_; }
+  int max_seq_length() const { return max_seq_length_; }
   int batch_size() const { return batch_size_; }
   int data_size() const { return data_size_; }
+  bool is_var_seq_lengths() const {
+#if CUDNN_VERSION >= 7201
+      return rnn_data_handle_ != nullptr;
+#else
+      return false;
+#endif
+  }
 
  private:
   CUDAExecutor* parent_;
-  int seq_length_;
+  int max_seq_length_;
   int batch_size_;
   int data_size_;
   cudnnDataType_t data_type_;
   TensorDescriptor handle_;
+#if CUDNN_VERSION >= 7201
+  RNNDataDescriptor rnn_data_handle_;
+#endif
   std::vector<cudnnTensorDescriptor_t> handles_;  // Copies of handle_.
   SE_DISALLOW_COPY_AND_ASSIGN(CudnnRnnSequenceTensorDescriptor);
 };
@@ -1287,7 +1371,7 @@ namespace {
 struct RnnModelDims {
   int num_layers = 0;
   int batch_size = 0;
-  int seq_length = 0;
+  int max_seq_length = 0;
   int hidden_size = 0;
   int input_size = 0;
   int dir_count = 0;
@@ -1312,7 +1396,7 @@ port::StatusOr<RnnModelDims> ExtractAndCheckRnnForward(
   RnnModelDims model_dims;
   model_dims.num_layers = rnn_desc.num_layers();
   model_dims.batch_size = input_desc.batch_size();
-  model_dims.seq_length = input_desc.seq_length();
+  model_dims.max_seq_length = input_desc.max_seq_length();
   model_dims.hidden_size = rnn_desc.hidden_size();
   model_dims.input_size = input_desc.data_size();
   model_dims.dir_count =
@@ -1330,7 +1414,7 @@ port::StatusOr<RnnModelDims> ExtractAndCheckRnnForward(
         input_h_desc.data_size() == input_c_desc.data_size())) {
     return port::Status(port::error::INVALID_ARGUMENT, "Invalid input_c shape");
   }
-  if (!(output_desc.seq_length() == model_dims.seq_length &&
+  if (!(output_desc.max_seq_length() == model_dims.max_seq_length &&
         output_desc.batch_size() == model_dims.batch_size &&
         output_desc.data_size() ==
             model_dims.hidden_size * model_dims.dir_count)) {
@@ -1377,7 +1461,7 @@ port::StatusOr<DeviceMemory<uint8>> CreateRnnWorkspace(
   size_t workspace_size_in_bytes = 0;
   RETURN_IF_CUDNN_ERROR(cudnnGetRNNWorkspaceSize(
       /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
-      /*seqLength=*/input_desc.seq_length(), /*xDesc=*/input_desc.handles(),
+      /*seqLength=*/input_desc.max_seq_length(), /*xDesc=*/input_desc.handles(),
       /*sizeInBytes=*/&workspace_size_in_bytes));
   // Allocate the workspace.
   if (workspace_size_in_bytes == 0) {
@@ -1427,7 +1511,7 @@ port::Status CudnnSupport::DoRnnForwardImpl(
     size_t reserve_space_size_in_bytes = 0;
     RETURN_IF_CUDNN_ERROR(cudnnGetRNNTrainingReserveSize(
         /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
-        /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
+        /*seqLength=*/model_dims.max_seq_length, /*xDesc=*/input_desc.handles(),
         /*sizeInBytes=*/&reserve_space_size_in_bytes));
 
     if (reserve_space_size_in_bytes > 0) {
@@ -1450,31 +1534,78 @@ port::Status CudnnSupport::DoRnnForwardImpl(
   }
 
   if (!is_training) {
-    RETURN_IF_CUDNN_ERROR(cudnnRNNForwardInference(
-        /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
-        /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
-        /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
-        /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(),
-        /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(),
-        /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(),
-        /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(),
-        /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(),
-        /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(),
-        /*workSpaceSizeInBytes=*/workspace.size()));
+    if (input_desc.is_var_seq_lengths()) {
+#if CUDNN_VERSION >= 7201
+      RETURN_IF_CUDNN_ERROR(cudnnRNNForwardInferenceEx(
+          /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+          /*xDesc=*/input_desc.data_handle(), /*x=*/input_data.opaque(),
+          /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(),
+          /*cxDesc=*/input_c_desc.handle(), /*cx=*/input_c_data.opaque(),
+          /*wDesc=*/rnn_desc.params_handle(), /*w=*/params.opaque(),
+          /*yDesc=*/output_desc.data_handle(),
+          /*y=*/output_data->opaque(),
+          /*hyDesc=*/output_h_desc.handle(), /*hy=*/output_h_data->opaque(),
+          /*cyDesc=*/output_c_desc.handle(), /*cy=*/output_c_data->opaque(),
+          NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+          /*workspace=*/workspace.opaque(),
+          /*workSpaceSizeInBytes=*/workspace.size()));
+#else
+      return port::Status(port::error::INVALID_ARGUMENT,
+                          "No supported cudnnRNNForwardInferenceEx when "
+                          "CUDNN_VERSION < 7.2.1");
+#endif
+    } else {
+      RETURN_IF_CUDNN_ERROR(cudnnRNNForwardInference(
+          /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+          /*seqLength=*/model_dims.max_seq_length, /*xDesc=*/input_desc.handles(),
+          /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
+          /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(),
+          /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(),
+          /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(),
+          /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(),
+          /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(),
+          /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(),
+          /*workSpaceSizeInBytes=*/workspace.size()));
+    }
   } else {
-    RETURN_IF_CUDNN_ERROR(cudnnRNNForwardTraining(
-        /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
-        /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
-        /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
-        /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(),
-        /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(),
-        /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(),
-        /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(),
-        /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(),
-        /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(),
-        /*workSpaceSizeInBytes=*/workspace.size(),
-        /*reserveSpace=*/reserve_space.opaque(),
-        /*reserveSpaceSizeInBytes=*/reserve_space.size()));
+    if (input_desc.is_var_seq_lengths()) {
+#if CUDNN_VERSION >= 7201
+      // cudnnSetRNNPaddingMode(rnn_desc.handle(), CUDNN_RNN_PADDED_IO_ENABLED);
+      RETURN_IF_CUDNN_ERROR(cudnnRNNForwardTrainingEx(
+          /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+          /*xDesc=*/input_desc.data_handle(), /*x=*/input_data.opaque(),
+          /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(),
+          /*cxDesc=*/input_c_desc.handle(), /*cx=*/input_c_data.opaque(),
+          /*wDesc=*/rnn_desc.params_handle(), /*w=*/params.opaque(),
+          /*yDesc=*/output_desc.data_handle(),
+          /*y=*/output_data->opaque(),
+          /*hyDesc=*/output_h_desc.handle(), /*hy=*/output_h_data->opaque(),
+          /*cyDesc=*/output_c_desc.handle(), /*cy=*/output_c_data->opaque(),
+          NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+          /*workspace=*/workspace.opaque(),
+          /*workSpaceSizeInBytes=*/workspace.size(),
+          /*reserveSpace=*/reserve_space.opaque(),
+          /*reserveSpaceSizeInBytes=*/reserve_space.size()));
+#else
+      return port::Status(port::error::INVALID_ARGUMENT,
+                          "No supported cudnnRNNForwardTrainingEx when "
+                          "CUDNN_VERSION < 7.2.1");
+#endif
+    } else {
+      RETURN_IF_CUDNN_ERROR(cudnnRNNForwardTraining(
+          /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+          /*seqLength=*/model_dims.max_seq_length, /*xDesc=*/input_desc.handles(),
+          /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
+          /*hx=*/input_h_data.opaque(), /*cxDesc=*/input_c_desc.handle(),
+          /*cx=*/input_c_data.opaque(), /*wDesc=*/rnn_desc.params_handle(),
+          /*w=*/params.opaque(), /*yDesc=*/output_desc.handles(),
+          /*y=*/output_data->opaque(), /*hyDesc=*/output_h_desc.handle(),
+          /*hy=*/output_h_data->opaque(), /*cyDesc=*/output_c_desc.handle(),
+          /*cy=*/output_c_data->opaque(), /*workspace=*/workspace.opaque(),
+          /*workSpaceSizeInBytes=*/workspace.size(),
+          /*reserveSpace=*/reserve_space.opaque(),
+          /*reserveSpaceSizeInBytes=*/reserve_space.size()));
+    }
   }
 
   if (is_profiling) {
@@ -1541,45 +1672,97 @@ port::Status CudnnSupport::DoRnnBackwardImpl(
     }
   }
 
-  RETURN_IF_CUDNN_ERROR(cudnnRNNBackwardData(
-      /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
-      /*seqLength=*/model_dims.seq_length, /*yDesc=*/output_desc.handles(),
-      /*y=*/output_data.opaque(), /*dyDesc=*/output_desc.handles(),
-      /*dy=*/output_backprop_data.opaque(),
-      /*dhyDesc=*/output_h_desc.handle(),
-      /*dhy=*/output_h_backprop_data.opaque(),
-      /*dcyDesc=*/output_c_desc.handle(),
-      /*dcy=*/output_c_backprop_data.opaque(),
-      /*wDesc=*/rnn_desc.params_handle(), /*w=*/params.opaque(),
-      /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(),
-      /*cxDesc=*/input_c_desc.handle(), /*cx=*/input_c_data.opaque(),
-      /*dxDesc=*/input_desc.handles(), /*dx=*/input_backprop_data->opaque(),
-      /*dhxDesc=*/input_h_desc.handle(),
-      /*dhx=*/input_h_backprop_data->opaque(),
-      /*dcxDesc=*/input_c_desc.handle(),
-      /*dcx=*/input_c_backprop_data->opaque(),
-      /*workspace=*/workspace.opaque(),
-      /*workSpaceSizeInBytes=*/workspace.size(),
-      /*reserveSpace=*/reserve_space_data->opaque(),
-      /*reserveSpaceSizeInBytes=*/reserve_space_data->size()));
-
-  if (params_backprop_data != nullptr) {
-    // Clear the dw to zeros.
-    stream->ThenMemZero(params_backprop_data, params_backprop_data->size());
-    // make the backward weight call
-    RETURN_IF_CUDNN_ERROR(cudnnRNNBackwardWeights(
+  if (input_desc.is_var_seq_lengths()) {
+#if CUDNN_VERSION >= 7201
+    RETURN_IF_CUDNN_ERROR(cudnnRNNBackwardDataEx(
+        /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+        /*yDesc=*/output_desc.data_handle(), /*y=*/output_data.opaque(),
+        /*dyDesc=*/output_desc.data_handle(),
+        /*dy=*/output_backprop_data.opaque(), NULL, NULL,
+        /*dhyDesc=*/output_h_desc.handle(),
+        /*dhy=*/output_h_backprop_data.opaque(),
+        /*dcyDesc=*/output_c_desc.handle(),
+        /*dcy=*/output_c_backprop_data.opaque(),
+        /*wDesc=*/rnn_desc.params_handle(), /*w=*/params.opaque(),
+        /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(),
+        /*cxDesc=*/input_c_desc.handle(), /*cx=*/input_c_data.opaque(),
+        /*dxDesc=*/input_desc.data_handle(),
+        /*dx=*/input_backprop_data->opaque(),
+        /*dhxDesc=*/input_h_desc.handle(),
+        /*dhx=*/input_h_backprop_data->opaque(),
+        /*dcxDesc=*/input_c_desc.handle(),
+        /*dcx=*/input_c_backprop_data->opaque(), NULL, NULL,
+        /*workspace=*/workspace.opaque(),
+        /*workSpaceSizeInBytes=*/workspace.size(),
+        /*reserveSpace=*/reserve_space_data->opaque(),
+        /*reserveSpaceSizeInBytes=*/reserve_space_data->size()));
+#else
+    return port::Status(port::error::INVALID_ARGUMENT,
+                        "No supported cudnnRNNBackwardDataEx when "
+                        "CUDNN_VERSION < 7.2.1");
+#endif
+  } else {
+    RETURN_IF_CUDNN_ERROR(cudnnRNNBackwardData(
         /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
-        /*seqLength=*/model_dims.seq_length, /*xDesc=*/input_desc.handles(),
-        /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
-        /*hx=*/input_h_data.opaque(), /*yDesc=*/output_desc.handles(),
-        /*y=*/output_data.opaque(), /*workspace=*/workspace.opaque(),
+        /*seqLength=*/model_dims.max_seq_length, /*yDesc=*/output_desc.handles(),
+        /*y=*/output_data.opaque(), /*dyDesc=*/output_desc.handles(),
+        /*dy=*/output_backprop_data.opaque(),
+        /*dhyDesc=*/output_h_desc.handle(),
+        /*dhy=*/output_h_backprop_data.opaque(),
+        /*dcyDesc=*/output_c_desc.handle(),
+        /*dcy=*/output_c_backprop_data.opaque(),
+        /*wDesc=*/rnn_desc.params_handle(), /*w=*/params.opaque(),
+        /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(),
+        /*cxDesc=*/input_c_desc.handle(), /*cx=*/input_c_data.opaque(),
+        /*dxDesc=*/input_desc.handles(), /*dx=*/input_backprop_data->opaque(),
+        /*dhxDesc=*/input_h_desc.handle(),
+        /*dhx=*/input_h_backprop_data->opaque(),
+        /*dcxDesc=*/input_c_desc.handle(),
+        /*dcx=*/input_c_backprop_data->opaque(),
+        /*workspace=*/workspace.opaque(),
         /*workSpaceSizeInBytes=*/workspace.size(),
-        /*dwDesc=*/rnn_desc.params_handle(),
-        /*dw=*/params_backprop_data->opaque(),
         /*reserveSpace=*/reserve_space_data->opaque(),
         /*reserveSpaceSizeInBytes=*/reserve_space_data->size()));
   }
 
+  if (params_backprop_data != nullptr) {
+    // Clear the dw to zeros.
+    stream->ThenMemZero(params_backprop_data, params_backprop_data->size());
+    if (input_desc.is_var_seq_lengths()) {
+#if CUDNN_VERSION >= 7201
+      RETURN_IF_CUDNN_ERROR(cudnnRNNBackwardWeightsEx(
+          /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+          /*xDesc=*/input_desc.data_handle(), /*x=*/input_data.opaque(),
+          /*hxDesc=*/input_h_desc.handle(), /*hx=*/input_h_data.opaque(),
+          /*yDesc=*/output_desc.data_handle(),
+          /*y=*/output_data.opaque(),
+          /*workspace=*/workspace.opaque(),
+          /*workSpaceSizeInBytes=*/workspace.size(),
+          /*dwDesc=*/rnn_desc.params_handle(),
+          /*dw=*/params_backprop_data->opaque(),
+          /*reserveSpace=*/reserve_space_data->opaque(),
+          /*reserveSpaceSizeInBytes=*/reserve_space_data->size()));
+#else
+      return port::Status(port::error::INVALID_ARGUMENT,
+                          "No supported cudnnRNNBackwardWeightsEx when "
+                          "CUDNN_VERSION < 7.2.1");
+#endif
+    } else {
+      // make the backward weight call
+      RETURN_IF_CUDNN_ERROR(cudnnRNNBackwardWeights(
+          /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc.handle(),
+          /*seqLength=*/model_dims.max_seq_length, /*xDesc=*/input_desc.handles(),
+          /*x=*/input_data.opaque(), /*hxDesc=*/input_h_desc.handle(),
+          /*hx=*/input_h_data.opaque(), /*yDesc=*/output_desc.handles(),
+          /*y=*/output_data.opaque(), /*workspace=*/workspace.opaque(),
+          /*workSpaceSizeInBytes=*/workspace.size(),
+          /*dwDesc=*/rnn_desc.params_handle(),
+          /*dw=*/params_backprop_data->opaque(),
+          /*reserveSpace=*/reserve_space_data->opaque(),
+          /*reserveSpaceSizeInBytes=*/reserve_space_data->size()));
+    }
+  }
+
   if (is_profiling) {
     if (!timer->Stop(AsCUDAStream(stream))) {
       return port::Status(port::error::INTERNAL, "Failed to stop timer");
@@ -1616,12 +1799,26 @@ CudnnSupport::createRnnDescriptor(
 }
 
 port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
-CudnnSupport::createRnnSequenceTensorDescriptor(int seq_length, int batch_size,
+CudnnSupport::createRnnSequenceTensorDescriptor(int max_seq_length, int batch_size,
                                                 int data_size,
                                                 dnn::DataType data_type) {
   SE_ASSIGN_OR_RETURN(CudnnRnnSequenceTensorDescriptor descriptor,
                       CudnnRnnSequenceTensorDescriptor::Create(
-                          parent_, seq_length, batch_size, data_size,
+                          parent_, max_seq_length, batch_size, data_size,
+                          ToCudnnDataType(data_type)));
+  return std::unique_ptr<dnn::RnnSequenceTensorDescriptor>(
+      new CudnnRnnSequenceTensorDescriptor(std::move(descriptor)));
+}
+
+port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
+CudnnSupport::createRnnSequenceTensorDescriptor(
+    int max_seq_length, int batch_size, int data_size,
+    const absl::Span<const int>& seq_lengths,
+    dnn::DataType data_type) {
+  SE_ASSIGN_OR_RETURN(CudnnRnnSequenceTensorDescriptor descriptor,
+                      CudnnRnnSequenceTensorDescriptor::Create(
+                          parent_, max_seq_length, batch_size, data_size,
+                          seq_lengths,
                           ToCudnnDataType(data_type)));
   return std::unique_ptr<dnn::RnnSequenceTensorDescriptor>(
       new CudnnRnnSequenceTensorDescriptor(std::move(descriptor)));
@@ -1668,7 +1865,6 @@ bool CudnnSupport::DoRnnForward(
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_h_desc);
   const CudnnRnnStateTensorDescriptor& cudnn_output_c_desc =
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_c_desc);
-
   return IsStatusOk(
       DoRnnForwardImpl<Eigen::half>(
           stream, cudnn_rnn_desc, cudnn_input_desc, input_data,
@@ -1710,7 +1906,6 @@ bool CudnnSupport::DoRnnForward(
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_h_desc);
   const CudnnRnnStateTensorDescriptor& cudnn_output_c_desc =
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_c_desc);
-
   return IsStatusOk(
       DoRnnForwardImpl<float>(
           stream, cudnn_rnn_desc, cudnn_input_desc, input_data,
@@ -1753,7 +1948,6 @@ bool CudnnSupport::DoRnnForward(
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_h_desc);
   const CudnnRnnStateTensorDescriptor& cudnn_output_c_desc =
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_c_desc);
-
   return IsStatusOk(
       DoRnnForwardImpl<double>(
           stream, cudnn_rnn_desc, cudnn_input_desc, input_data,
@@ -1803,7 +1997,6 @@ bool CudnnSupport::DoRnnBackward(
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_h_desc);
   const CudnnRnnStateTensorDescriptor& cudnn_output_c_desc =
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_c_desc);
-
   return IsStatusOk(
       DoRnnBackwardImpl<Eigen::half>(
           stream, cudnn_rnn_desc, cudnn_input_desc, input_data,
@@ -1855,7 +2048,6 @@ bool CudnnSupport::DoRnnBackward(
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_h_desc);
   const CudnnRnnStateTensorDescriptor& cudnn_output_c_desc =
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_c_desc);
-
   return IsStatusOk(
       DoRnnBackwardImpl<float>(
           stream, cudnn_rnn_desc, cudnn_input_desc, input_data,
@@ -1908,7 +2100,6 @@ bool CudnnSupport::DoRnnBackward(
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_h_desc);
   const CudnnRnnStateTensorDescriptor& cudnn_output_c_desc =
       static_cast<const CudnnRnnStateTensorDescriptor&>(output_c_desc);
-
   return IsStatusOk(
       DoRnnBackwardImpl<double>(
           stream, cudnn_rnn_desc, cudnn_input_desc, input_data,
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index 0641be140d..044ed54514 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -55,10 +55,16 @@ class CudnnSupport : public dnn::DnnSupport {
       ScratchAllocator* state_allocator) override;
 
   port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
-  createRnnSequenceTensorDescriptor(int seq_length, int batch_size,
+  createRnnSequenceTensorDescriptor(int max_seq_length, int batch_size,
                                     int data_size,
                                     dnn::DataType data_type) override;
 
+  port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
+  createRnnSequenceTensorDescriptor(int max_seq_length, int batch_size,
+                                    int data_size,
+                                    const absl::Span<const int>& seq_lengths,
+                                    dnn::DataType data_type) override;
+
   port::StatusOr<std::unique_ptr<dnn::RnnStateTensorDescriptor>>
   createRnnStateTensorDescriptor(int num_layer, int batch_size, int data_size,
                                  dnn::DataType data_type) override;
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 43738d2d1d..786b126260 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -2081,13 +2081,24 @@ class DnnSupport {
   // sequence. The caller retains the ownership of the returned descriptor.
   //
   // Arguments:
-  //  seq_length: the length of the sequence.
+  //  max_seq_length: the max length of the sequences.
   //  batch_size: the size of a minibatch.
   //  data_size: the size of the state.
+  //  seq_lenghs: the lengths of sequences in a batch.
   //  data_type: an enum to specify the type for the underlying data.
   virtual port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
-  createRnnSequenceTensorDescriptor(int seq_length, int batch_size,
-                                    int data_size, dnn::DataType data_type) {
+  createRnnSequenceTensorDescriptor(int max_seq_length, int batch_size,
+                                    int data_size,
+                                    dnn::DataType data_type) {
+    return port::Status(port::error::UNIMPLEMENTED,
+                        "createRnnSequenceTensorDescriptor is unimplemented");
+  }
+
+  virtual port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
+  createRnnSequenceTensorDescriptor(int max_seq_length, int batch_size,
+                                    int data_size,
+                                    const absl::Span<const int>& seq_lengths,
+                                    dnn::DataType data_type) {
     return port::Status(port::error::UNIMPLEMENTED,
                         "createRnnSequenceTensorDescriptor is unimplemented");
   }
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index d1d0bd9bc2..e4b3652e46 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -389,7 +389,7 @@ StreamExecutor::createRnnDescriptor(
 }
 
 port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
-StreamExecutor::createRnnSequenceTensorDescriptor(int seq_length,
+StreamExecutor::createRnnSequenceTensorDescriptor(int max_seq_length,
                                                   int batch_size, int data_size,
                                                   dnn::DataType data_type) {
   dnn::DnnSupport *dnn_support = AsDnn();
@@ -397,8 +397,24 @@ StreamExecutor::createRnnSequenceTensorDescriptor(int seq_length,
     return port::Status(port::error::UNKNOWN,
                         "Fail to find the dnn implementation.");
   }
-  return dnn_support->createRnnSequenceTensorDescriptor(seq_length, batch_size,
-                                                        data_size, data_type);
+  return dnn_support->createRnnSequenceTensorDescriptor(max_seq_length, batch_size,
+                                                        data_size,
+                                                        data_type);
+}
+
+port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
+StreamExecutor::createRnnSequenceTensorDescriptor(
+    int max_seq_length, int batch_size, int data_size,
+    const absl::Span<const int>& seq_lengths, dnn::DataType data_type) {
+  dnn::DnnSupport *dnn_support = AsDnn();
+  if (!dnn_support) {
+    return port::Status(port::error::UNKNOWN,
+                        "Fail to find the dnn implementation.");
+  }
+  return dnn_support->createRnnSequenceTensorDescriptor(max_seq_length, batch_size,
+                                                        data_size,
+                                                        seq_lengths,
+                                                        data_type);
 }
 
 port::StatusOr<std::unique_ptr<dnn::RnnStateTensorDescriptor>>
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index d259a4ab63..36d6be0e6c 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -413,8 +413,15 @@ class StreamExecutor {
   // Create a RNN sequence descriptor that specifies either the input or output
   // sequence. The caller retains the ownership of the returned descriptor.
   port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
-  createRnnSequenceTensorDescriptor(int seq_length, int batch_size,
-                                    int data_size, dnn::DataType data_type);
+  createRnnSequenceTensorDescriptor(int max_seq_length, int batch_size,
+                                    int data_size,
+                                    dnn::DataType data_type);
+
+  port::StatusOr<std::unique_ptr<dnn::RnnSequenceTensorDescriptor>>
+  createRnnSequenceTensorDescriptor(int max_seq_length, int batch_size,
+                                    int data_size,
+                                    const absl::Span<const int>& seq_lengths,
+                                    dnn::DataType data_type);
 
   // Create an RNN state descriptor that specifies the input or hidden state.
   // The caller retains the ownership of the returned descriptor.
-- 
GitLab


From 43a1ee8ff24925c342533afd322b59271d396596 Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Mon, 17 Dec 2018 16:00:34 -0800
Subject: [PATCH 708/873] Check whether we are running in Google Cloud and
 raise a RuntimeError if we are and no TPU names are specified.

PiperOrigin-RevId: 225905831
---
 .../cluster_resolver/tpu_cluster_resolver.py  | 23 ++++++++
 .../tpu_cluster_resolver_test.py              | 53 +++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
index 52ac07d7ea..529a443412 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver.py
@@ -22,6 +22,8 @@ import collections
 import os
 import re
 
+from six.moves import urllib
+from six.moves.urllib.error import URLError
 from six.moves.urllib.request import Request
 from six.moves.urllib.request import urlopen
 
@@ -160,6 +162,20 @@ class TPUClusterResolver(ClusterResolver):
   def _environmentDiscoveryUrl():
     return os.environ.get(_DISCOVERY_SERVICE_URL_ENV_VARIABLE)
 
+  @staticmethod
+  def _isRunningInGCE():
+    """Checks for GCE presence by attempting to query the metadata service."""
+    try:
+      req = Request('http://metadata.google.internal/computeMetadata/v1',
+                    headers={'Metadata-Flavor': 'Google'})
+      resp = urllib.request.urlopen(req, timeout=1)
+      info = resp.info()
+      if 'Metadata-Flavor' in info and info['Metadata-Flavor'] == 'Google':
+        return True
+    except URLError:
+      pass
+    return False
+
   def __init__(self,
                tpu=None,
                zone=None,
@@ -208,6 +224,8 @@ class TPUClusterResolver(ClusterResolver):
     Raises:
       ImportError: If the googleapiclient is not installed.
       ValueError: If no TPUs are specified.
+      RuntimeError: If an empty TPU name is specified and this is running in a
+        Google Cloud environment.
     """
     if isinstance(tpu, list):
       if not tpu:
@@ -230,6 +248,11 @@ class TPUClusterResolver(ClusterResolver):
 
     self._tpu = compat.as_bytes(tpu)  # self._tpu is always bytes
 
+    # If we are running in Cloud and don't specify a TPU name
+    if self._isRunningInGCE() and not self._tpu:
+      raise RuntimeError('You need to specify a TPU Name if you are running in '
+                         'the Google Cloud environment.')
+
     # By default the task_type is 'worker` and the task_index is 0 (which is the
     # first worker in the task).
     self.task_type = job_name
diff --git a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
index 27d92608fa..58c332a509 100644
--- a/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
+++ b/tensorflow/python/distribute/cluster_resolver/tpu_cluster_resolver_test.py
@@ -20,6 +20,9 @@ from __future__ import print_function
 
 import os
 
+import six
+from six.moves.urllib.error import URLError
+
 from tensorflow.python.client import session
 from tensorflow.python.distribute import cluster_resolver
 from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
@@ -64,6 +67,28 @@ def mock_request_compute_metadata(cls, *args, **kwargs):
   return ''
 
 
+def mock_is_running_in_gce(cls, *args, **kwargs):
+  del cls, args, kwargs  # Unused.
+  return True
+
+
+def mock_is_not_running_in_gce(cls, *args, **kwargs):
+  del cls, args, kwargs  # Unused.
+  return False
+
+
+def mock_running_in_gce_urlopen(cls, *args, **kwargs):
+  del cls, args, kwargs  # Unused.
+  mock_response = mock.MagicMock()
+  mock_response.info.return_value = {'Metadata-Flavor': 'Google'}
+  return mock_response
+
+
+def mock_not_running_in_gce_urlopen(cls, *args, **kwargs):
+  del cls, args, kwargs  # Unused.
+  raise URLError(reason='Host does not exist.')
+
+
 class TPUClusterResolverTest(test.TestCase):
 
   def _verifyClusterSpecEquality(self, cluster_spec, expected_proto):
@@ -104,6 +129,25 @@ class TPUClusterResolverTest(test.TestCase):
 
     return mock_client
 
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_isRunningInGCE',
+                     mock_is_running_in_gce)
+  def testCheckRunningInGceWithNoTpuName(self):
+    with self.assertRaisesRegexp(RuntimeError, '.*Google Cloud.*'):
+      cluster_resolver.TPUClusterResolver(tpu='')
+
+  @mock.patch.object(six.moves.urllib.request,
+                     'urlopen',
+                     mock_running_in_gce_urlopen)
+  def testIsRunningInGce(self):
+    self.assertTrue(cluster_resolver.TPUClusterResolver._isRunningInGCE())
+
+  @mock.patch.object(six.moves.urllib.request,
+                     'urlopen',
+                     mock_not_running_in_gce_urlopen)
+  def testIsNotRunningInGce(self):
+    self.assertFalse(cluster_resolver.TPUClusterResolver._isRunningInGCE())
+
   @mock.patch.object(cluster_resolver.TPUClusterResolver,
                      '_requestComputeMetadata',
                      mock_request_compute_metadata)
@@ -388,6 +432,9 @@ class TPUClusterResolverTest(test.TestCase):
     self.assertEqual(should_resolve, resolver._shouldResolve(),
                      "TPU: '%s'" % tpu)
 
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_isRunningInGCE',
+                     mock_is_not_running_in_gce)
   def testShouldResolveNoName(self):
     self.verifyShouldResolve('', False)
 
@@ -595,6 +642,9 @@ class TPUClusterResolverTest(test.TestCase):
           {0: [0], 1: [1, 2]})
 
   @mock.patch.object(session.BaseSession, 'list_devices')
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_isRunningInGCE',
+                     mock_is_not_running_in_gce)
   def testNumAcceleratorsSuccess(self, mock_list_devices):
     device_names = [
         '/job:tpu_worker/task:0/device:TPU:0',
@@ -616,6 +666,9 @@ class TPUClusterResolverTest(test.TestCase):
     self.assertEqual(resolver.num_accelerators(), 2)
 
   @mock.patch.object(session.BaseSession, 'list_devices')
+  @mock.patch.object(cluster_resolver.TPUClusterResolver,
+                     '_isRunningInGCE',
+                     mock_is_not_running_in_gce)
   def testNumAcceleratorsRetryFailure(self, mock_list_devices):
     resolver = cluster_resolver.TPUClusterResolver(tpu='')
     mock_list_devices.side_effect = errors.DeadlineExceededError(
-- 
GitLab


From d23ca91a256c82d19f6d08945a800f53f15460b2 Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Mon, 17 Dec 2018 16:03:05 -0800
Subject: [PATCH 709/873] Create ModeKeysV2 class which reference TensorFlow
 ModeKeys constants for consistency. Export ModeKeysV2 as
 tf.estimator.ModeKeys in V2. Keep existing ModeKeys only exported in V1.

PiperOrigin-RevId: 225906469
---
 .../tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
index bf7c1abcd8..f3dfe7296f 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.-mode-keys.pbtxt
@@ -1,6 +1,6 @@
 path: "tensorflow.estimator.ModeKeys"
 tf_class {
-  is_instance: "<class \'tensorflow_estimator.python.estimator.model_fn.ModeKeys\'>"
+  is_instance: "<class \'tensorflow_estimator.python.estimator.mode_keys.ModeKeysV2\'>"
   is_instance: "<type \'object\'>"
   member {
     name: "EVAL"
-- 
GitLab


From 6a4ade1924ebcef3e60d626ab5738d0d0fd3c704 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Mon, 17 Dec 2018 16:19:34 -0800
Subject: [PATCH 710/873] Add support for TPUStrategy to be able to unwrap
 TPUMirroredVariable.

PiperOrigin-RevId: 225909038
---
 tensorflow/contrib/distribute/python/tpu_strategy.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 7ea245eb6e..d16166f627 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -521,6 +521,11 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
       # be represented using a PerReplica wrapper instead of a list with
       # one entry per device.
       return tuple(val)
+    elif isinstance(val, values.TPUMirroredVariable):
+      # pylint: disable=protected-access
+      if values._enclosing_tpu_context() is not None:
+        return (val,)
+      return tuple(val._get(device=d) for d in sorted(val._index.keys()))
     return (val,)
 
   def value_container(self, value):
-- 
GitLab


From fc26aad82986ccbb0fa019470651038d09d23e7a Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Mon, 17 Dec 2018 16:27:31 -0800
Subject: [PATCH 711/873] Allow delegate application after tensor allocation

PiperOrigin-RevId: 225910199
---
 tensorflow/lite/arena_planner.cc      |  5 ++++
 tensorflow/lite/arena_planner_test.cc | 41 +++++++++++++++++++++++++++
 tensorflow/lite/core/subgraph.cc      | 20 +++++++++++++
 tensorflow/lite/core/subgraph.h       |  4 +++
 tensorflow/lite/interpreter_test.cc   | 30 +++++++++++++++++++-
 5 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/arena_planner.cc b/tensorflow/lite/arena_planner.cc
index 8200b6adaa..8a5ef11312 100644
--- a/tensorflow/lite/arena_planner.cc
+++ b/tensorflow/lite/arena_planner.cc
@@ -55,12 +55,17 @@ TfLiteStatus ArenaPlanner::ResetAllocations() {
   TF_LITE_ENSURE_STATUS(persistent_arena_.Clear());
   allocs_.clear();
   allocs_.resize(graph_info_->num_tensors());
+  // Note that we only clear the alloc_queue_ when re-planning allocations, as
+  // it should only change when the graph topology itself changes.
   return kTfLiteOk;
 }
 
 TfLiteStatus ArenaPlanner::PlanAllocations() {
   // Invalidate any existing data.
   TF_LITE_ENSURE_STATUS(ResetAllocations());
+  // The alloc_queue_ is specific to the graph topology, and will be
+  // completely reconstructed from graph data here.
+  alloc_queue_.clear();
 
   // Keeps track of references to each tensor.
   std::vector<int> refcounts(graph_info_->num_tensors(), 0);
diff --git a/tensorflow/lite/arena_planner_test.cc b/tensorflow/lite/arena_planner_test.cc
index 479f25cafe..d02d8b34c0 100644
--- a/tensorflow/lite/arena_planner_test.cc
+++ b/tensorflow/lite/arena_planner_test.cc
@@ -108,6 +108,14 @@ class TestGraph {
     variables_ = variables;
   }
 
+  void Swap(TestGraph* other) {
+    std::swap(nodes_, other->nodes_);
+    std::swap(tensors_, other->tensors_);
+    std::swap(inputs_, other->inputs_);
+    std::swap(outputs_, other->outputs_);
+    std::swap(variables_, other->variables_);
+  }
+
  private:
   std::vector<TfLiteNode> nodes_;
   std::vector<TfLiteTensor> tensors_;
@@ -163,6 +171,11 @@ class ArenaPlannerTest : public ::testing::Test {
     CHECK(planner_->PlanAllocations() == kTfLiteOk);
   }
 
+  void SwapGraph(TestGraph* graph) {
+    graph_->Swap(graph);
+    CHECK(planner_->PlanAllocations() == kTfLiteOk);
+  }
+
   void Execute(int start, int end) {
     CHECK(planner_->ExecuteAllocations(start, end) == kTfLiteOk);
   }
@@ -493,6 +506,34 @@ TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
   EXPECT_EQ(GetOffset(10), 0);
 }
 
+TEST_F(ArenaPlannerTest, ModifiedGraph) {
+  TestGraph graph({0, 1},
+                  {
+                      /* in, out, tmp */
+                      {{0, 1}, {2}, {}},     // First op
+                      {{2, 0}, {4, 5}, {}},  // Second op
+                      {{4, 5}, {3}, {}}      // Third op
+                  },
+                  {3});
+  SetGraph(&graph, /*preserve_inputs=*/true);
+  Execute(0, 10);
+
+  // Now update the graph data used by the existing allocator. It should behave
+  // as if it had been recreated with the new graph.
+  TestGraph pruned_graph({0, 1},
+                         {
+                             /* in, out, tmp */
+                             {{0, 1}, {3}, {}},  // First op
+                         },
+                         {3});
+  SwapGraph(&pruned_graph);
+  Execute(0, 10);
+
+  EXPECT_EQ(GetOffset(0), 0);
+  EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+  EXPECT_EQ(GetOffset(3), GetOffsetAfter(1));
+}
+
 }  // namespace
 }  // namespace tflite
 
diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
index 855c94e206..763ab56575 100644
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc
@@ -932,6 +932,12 @@ void Subgraph::SwitchToKernelContext() {
 }
 
 TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
+  if (state_ == kStateInvokableAndImmutable) {
+    ReportError(
+        "ModifyGraphWithDelegate is disallowed when graph is immutable.");
+    return kTfLiteError;
+  }
+
   if (!(delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors)) {
     int last_execution_plan_index_prepared;
     TF_LITE_ENSURE_OK(&context_, PrepareOpsStartingAt(
@@ -944,6 +950,8 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
     }
   }
 
+  const bool was_invokable_before_delegate = state_ == kStateInvokable;
+
   // TODO(aselle): Consider if it is worth storing pointers to delegates.
   // Setup additional context interface.
   SwitchToDelegateContext();
@@ -955,6 +963,13 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
 
   TF_LITE_ENSURE_OK(context_, status);
 
+  // If the memory planner has already been created, we need to execute
+  // planning again to account for the updated graph topology.
+  if (memory_planner_) {
+    state_ = kStateUninvokable;
+    TF_LITE_ENSURE_OK(context_, memory_planner_->PlanAllocations());
+  }
+
   if (!(delegate->flags & kTfLiteDelegateFlagsAllowDynamicTensors)) {
     // Reset the state to force tensor/op reallocation.
     state_ = kStateUninvokable;
@@ -963,6 +978,11 @@ TfLiteStatus Subgraph::ModifyGraphWithDelegate(TfLiteDelegate* delegate) {
     // After using a delegate which doesn't support dynamic tensors, make the
     // entire graph immutable.
     state_ = kStateInvokableAndImmutable;
+  } else if (was_invokable_before_delegate) {
+    // If the graph was invokable prior to delegate application, flush
+    // allocation now to leave it in a consistent state.
+    TF_LITE_ENSURE_OK(context_, AllocateTensors());
+    TF_LITE_ENSURE_EQ(context_, state_, kStateInvokable);
   }
 
   return status;
diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h
index 8fda701415..979226b864 100644
--- a/tensorflow/lite/core/subgraph.h
+++ b/tensorflow/lite/core/subgraph.h
@@ -393,6 +393,10 @@ class Subgraph {
   // Allow a delegate to look at the graph and modify the graph to handle
   // parts of the graph themselves. After this is called, the graph may
   // contain new nodes that replace 1 more nodes.
+  // NOTE: If tensors were allocated prior to delegate application, they will
+  // be reallocated if the graph was modified (i.e., the caller does *not* need
+  // to explicitly call |AllocateTensors()| again). If tensors were unallocated,
+  // they will remain unallocated after delegate application.
   // WARNING: This is an experimental API and subject to change.
   TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate);
 
diff --git a/tensorflow/lite/interpreter_test.cc b/tensorflow/lite/interpreter_test.cc
index 78b5d1b887..40e5134bc7 100644
--- a/tensorflow/lite/interpreter_test.cc
+++ b/tensorflow/lite/interpreter_test.cc
@@ -1155,7 +1155,22 @@ TEST_F(TestDelegate, BasicDelegate) {
   EXPECT_EQ(params->output_tensors->data[1], 4);
 }
 
-TEST_F(TestDelegate, ComplexDeligate) {
+TEST_F(TestDelegate, StaticDelegateMakesGraphImmutable) {
+  delegate_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate({0, 1, 2}));
+  ASSERT_EQ(
+      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
+      kTfLiteOk);
+  ASSERT_EQ(interpreter_->execution_plan().size(), 1);
+
+  // As the delegate doesn't support dynamic resizing, further graph mutation is
+  // prohibited.
+  ASSERT_NE(interpreter_->ResizeInputTensor(0, {0}), kTfLiteOk);
+  ASSERT_NE(
+      interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()),
+      kTfLiteOk);
+}
+
+TEST_F(TestDelegate, ComplexDelegate) {
   delegate_ = std::unique_ptr<SimpleDelegate>(new SimpleDelegate({1, 2}));
   interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate());
 
@@ -1316,6 +1331,19 @@ TEST_F(TestDelegateWithDynamicTensors, AllowDynamicTensors) {
   ASSERT_EQ(interpreter_->execution_plan()[0], 1);
 }
 
+TEST_F(TestDelegateWithDynamicTensors, ModifyGraphAfterAllocate) {
+  // Trigger allocation *before* delegate application.
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+
+  delegate_.flags = kTfLiteDelegateFlagsAllowDynamicTensors;
+  ASSERT_EQ(interpreter_->ModifyGraphWithDelegate(&delegate_), kTfLiteOk);
+  ASSERT_EQ(interpreter_->execution_plan().size(), 1);
+  ASSERT_EQ(interpreter_->execution_plan()[0], 1);
+
+  // Allocation should still succeed.
+  ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk);
+}
+
 TEST(TestDelegateOwnership, ProperlyDisposed) {
   struct TfLiteInterpreterOwnedDelegate : public TfLiteDelegate {
     TfLiteInterpreterOwnedDelegate(bool* destroyed, bool* prepared)
-- 
GitLab


From 901a8ec928d0173ea90e834dfdf492acaad0fad2 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 17 Dec 2018 16:38:12 -0800
Subject: [PATCH 712/873] Support captures in
 tf.compat.v1.wrap_function.prune()

Also sets the output structure instead of returning the flattened structure

PiperOrigin-RevId: 225911821
---
 tensorflow/python/eager/wrap_function.py      |  8 +++++++-
 tensorflow/python/eager/wrap_function_test.py | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/wrap_function.py b/tensorflow/python/eager/wrap_function.py
index 2b39e99a4e..a3606c0eba 100644
--- a/tensorflow/python/eager/wrap_function.py
+++ b/tensorflow/python/eager/wrap_function.py
@@ -69,9 +69,15 @@ class WrappedFunction(function.Function):
       pruned_graph = func_graph.FuncGraph("pruned")
       sink_tensor = array_ops.identity_n(flat_fetches)[0]
     lift_map = lift_to_graph.lift_to_graph(
-        sink_tensor, pruned_graph, sources=flat_feeds)
+        sink_tensor, pruned_graph,
+        sources=flat_feeds + self.graph.internal_captures)
     pruned_graph.outputs.extend(lift_map[x] for x in flat_fetches)
+    for external_capture, internal_capture in self.graph.captures.items():
+      pruned_graph.captures[external_capture] = lift_map[internal_capture]
     pruned_graph.inputs.extend(lift_map[x] for x in flat_feeds)
+    pruned_graph.inputs.extend(pruned_graph.captures.values())
+    pruned_graph.structured_outputs = nest.map_structure(
+        lambda node: lift_map[node], fetches)
     pruned_fn = WrappedFunction(
         pruned_graph, variable_holder=self._variable_holder)
     pruned_fn._num_positional_args = len(flat_feeds)  # pylint: disable=protected-access
diff --git a/tensorflow/python/eager/wrap_function_test.py b/tensorflow/python/eager/wrap_function_test.py
index d34e9228f3..0c70a1221c 100644
--- a/tensorflow/python/eager/wrap_function_test.py
+++ b/tensorflow/python/eager/wrap_function_test.py
@@ -23,6 +23,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
@@ -79,6 +80,21 @@ class WrapFunctionTest(test.TestCase):
     f_wrapped = wrap_function.wrap_function(f, [])
     self.assertAllEqual(1.0, f_wrapped())
 
+  def testCaptures(self):
+
+    v1 = variables.Variable(2.)
+
+    def f():
+      v2 = variables.Variable(3.)
+      return array_ops.identity(v1 * v2 * constant_op.constant(1.), 'fetch')
+
+    f_wrapped = wrap_function.wrap_function(f, [])
+    self.assertAllEqual(6.0, f_wrapped())
+    pruned = f_wrapped.prune(
+        feeds=(),
+        fetches=f_wrapped.graph.get_tensor_by_name('fetch:0'))
+    self.assertAllEqual(6.0, pruned())
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
-- 
GitLab


From 8714717ef5833140c56689e5e0b4afc257164d97 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 16:44:17 -0800
Subject: [PATCH 713/873] Remove tf.estimator.inputs from TF 2.0 and add rename
 of tf.estimator.inputs to tf.compat.v1.estimator.inputs to tf_upgrade_v2.py
 script

PiperOrigin-RevId: 225912626
---
 .../api/golden/v2/tensorflow.estimator.inputs.pbtxt   | 11 -----------
 .../tools/api/golden/v2/tensorflow.estimator.pbtxt    |  4 ----
 tensorflow/tools/compatibility/tf_upgrade_v2.py       |  2 ++
 tensorflow/tools/compatibility/tf_upgrade_v2_test.py  | 11 +++++++++++
 4 files changed, 13 insertions(+), 15 deletions(-)
 delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.estimator.inputs.pbtxt

diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.inputs.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.inputs.pbtxt
deleted file mode 100644
index b318fea1f8..0000000000
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.inputs.pbtxt
+++ /dev/null
@@ -1,11 +0,0 @@
-path: "tensorflow.estimator.inputs"
-tf_module {
-  member_method {
-    name: "numpy_input_fn"
-    argspec: "args=[\'x\', \'y\', \'batch_size\', \'num_epochs\', \'shuffle\', \'queue_capacity\', \'num_threads\'], varargs=None, keywords=None, defaults=[\'None\', \'128\', \'1\', \'None\', \'1000\', \'1\'], "
-  }
-  member_method {
-    name: "pandas_input_fn"
-    argspec: "args=[\'x\', \'y\', \'batch_size\', \'num_epochs\', \'shuffle\', \'queue_capacity\', \'num_threads\', \'target_column\'], varargs=None, keywords=None, defaults=[\'None\', \'128\', \'1\', \'None\', \'1000\', \'1\', \'target\'], "
-  }
-}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
index 6f57505afe..add8ef5e65 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.pbtxt
@@ -180,10 +180,6 @@ tf_module {
     name: "export"
     mtype: "<type \'module\'>"
   }
-  member {
-    name: "inputs"
-    mtype: "<type \'module\'>"
-  }
   member_method {
     name: "add_metrics"
     argspec: "args=[\'estimator\', \'metric_fn\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py
index 06a7bb781d..2662889a60 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py
@@ -360,6 +360,8 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
             "tf.space_to_batch",
         "tf.nn.space_to_batch":
             "tf.space_to_batch",
+        "tf.estimator.inputs":
+            "tf.compat.v1.estimator.inputs",
         "tf.extract_image_patches":
             "tf.image.extract_image_patches",
         "tf.gfile.Copy":
diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
index d5428e7536..270d93065e 100644
--- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
+++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py
@@ -529,6 +529,17 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map
     _, unused_report, unused_errors, new_text = self._upgrade(text)
     self.assertEqual(new_text, expected_text)
 
+  def testEstimatorInputs(self):
+    text = "tf.estimator.inputs.numpy_input_fn(0)"
+    expected_text = "tf.compat.v1.estimator.inputs.numpy_input_fn(0)"
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
+    text = "tf.estimator.inputs.pandas_input_fn(0)"
+    expected_text = "tf.compat.v1.estimator.inputs.pandas_input_fn(0)"
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    self.assertEqual(new_text, expected_text)
+
   def testBatchToSpace(self):
     text = "tf.batch_to_space_nd(input, block_shape, crops, name)"
     expected_text = "tf.batch_to_space(input, block_shape, crops, name)"
-- 
GitLab


From 2d28cb50cffd8360dbab52779b13320bd9fc0127 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Mon, 17 Dec 2018 16:53:42 -0800
Subject: [PATCH 714/873] Move test from while_v2_test.py to
 control_flow_ops_py_test.py.

This is to make sure we have coverage of explicitly instantiating a
TensorArray and passing it to a while_loop in both v1 and v2 control
flow, in a way they're actually called (while_v2_test.py calls
while_v2 directly, instead of going through
control_flow_ops.while_loop).

PiperOrigin-RevId: 225913951
---
 .../kernel_tests/control_flow_ops_py_test.py  | 34 +++++++++++++++++++
 .../python/kernel_tests/while_v2_test.py      | 34 -------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 39ceb0d749..256b9c3166 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -63,6 +63,8 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import tensor_array_grad  # pylint: disable=unused-import
+from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.ops import while_v2  # pylint: disable=unused-import
@@ -2919,6 +2921,38 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r, param)[0]
       self.assertAllClose(107520.0, self.evaluate(r))
 
+  @test_util.run_deprecated_v1
+  def testNestedWhileAndTensorArray(self):
+    n = constant_op.constant(3.0)
+
+    def Body(row, ta, n):
+
+      def InnerBody(row, col, ta, n):
+        # Note: row and col are 1-based.
+        ta = ta.write(
+            math_ops.cast(n * (row - 1.) + col - 1., dtypes.int32), row * col)
+        return row, col + 1., ta, n
+
+      # TODO(b/118457764): Remove n from loop_vars from both loops once fixed.
+      ta = control_flow_ops.while_loop(
+          lambda _, col, _1, n: col <= n,
+          InnerBody, [row, constant_op.constant(1.), ta, n],
+          return_same_structure=False)[2]
+      return row + 1., ta, n
+
+    ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=9)
+    ta = control_flow_ops.while_loop(
+        lambda row, _, _1: row <= n,
+        Body, [constant_op.constant(1.), ta, n],
+        return_same_structure=False)[1]
+
+    output = array_ops.reshape(ta.stack(), [3, 3])
+    self.assertAllEqual(
+        self.evaluate(output), [[1., 2., 3.], [2., 4., 6.], [3., 6., 9.]])
+    # TODO(b/117675481): This does not work with current TA. Enable with new TA.
+    # grad = gradients_impl.gradients(output, [n])
+    # self.assertEqual(self.evaluate(grad), 3.5)
+
   @test_util.run_deprecated_v1
   def testWhileGrad_StopGrad(self):
     with self.cached_session():
diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
index cae459a34e..6567ac9429 100644
--- a/tensorflow/python/kernel_tests/while_v2_test.py
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -33,8 +33,6 @@ from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import list_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import tensor_array_grad  # pylint: disable=unused-import
-from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import while_v2
 from tensorflow.python.ops.control_flow_ops import while_loop as while_loop_v1
 from tensorflow.python.ops.while_v2 import while_loop as while_loop_v2
@@ -410,38 +408,6 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
       self.assertEqual(self.evaluate(ret), 16.)
       self.assertSequenceEqual(self.evaluate(grad), [32.])
 
-  @test_util.run_deprecated_v1
-  def testNestedWhileAndTensorArray(self):
-    n = constant_op.constant(3.0)
-
-    def Body(row, ta, n):
-
-      def InnerBody(row, col, ta, n):
-        # Note: row and col are 1-based.
-        ta = ta.write(
-            math_ops.cast(n * (row - 1.) + col - 1., dtypes.int32), row * col)
-        return row, col + 1., ta, n
-
-      # TODO(b/118457764): Remove n from loop_vars from both loops once fixed.
-      ta = while_loop_v2(
-          lambda _, col, _1, n: col <= n,
-          InnerBody, [row, constant_op.constant(1.), ta, n],
-          return_same_structure=False)[2]
-      return row + 1., ta, n
-
-    ta = tensor_array_ops.TensorArray(dtype=dtypes.float32, size=9)
-    ta = while_loop_v2(
-        lambda row, _, _1: row <= n,
-        Body, [constant_op.constant(1.), ta, n],
-        return_same_structure=False)[1]
-
-    output = array_ops.reshape(ta.stack(), [3, 3])
-    self.assertAllEqual(
-        self.evaluate(output), [[1., 2., 3.], [2., 4., 6.], [3., 6., 9.]])
-    # TODO(b/117675481): This does not work with current TA. Enable with new TA.
-    # grad = gradients_impl.gradients(output, [n])
-    # self.assertEqual(self.evaluate(grad), 3.5)
-
   @test_util.run_deprecated_v1
   def testForwardPassRewrite(self):
     x = constant_op.constant(1.0, name="x")
-- 
GitLab


From 3649760af5d94ce087f6eb90659e40ce546cdc61 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Mon, 17 Dec 2018 17:01:49 -0800
Subject: [PATCH 715/873] Internal Change

PiperOrigin-RevId: 225915018
---
 tensorflow/tools/ci_build/install/install_pip_packages.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 3470488cc5..eeadabaa73 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -128,5 +128,5 @@ pip2 install --upgrade h5py==2.8.0
 pip3 install --upgrade h5py==2.8.0
 
 # Estimator
-pip2 install tensorflow_estimator --no-deps
-pip3 install tensorflow_estimator --no-deps
+pip2 install tf-estimator-nightly==1.12.0.dev20181203 --no-deps
+pip3 install tf-estimator-nightly==1.12.0.dev20181203 --no-deps
-- 
GitLab


From 4e7bce616baff6ed27add562ef91319742d341a0 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 17:07:45 -0800
Subject: [PATCH 716/873] Automated rollback of commit
 6229528cb5b9c5b2a9bffe3f358631af4ef9b417

PiperOrigin-RevId: 225915955
---
 tensorflow/stream_executor/BUILD              |  3 --
 .../stream_executor/stream_executor_pimpl.cc  | 30 +------------------
 2 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index c43efc799c..00c23b8d17 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -54,9 +54,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc_impl",
-        ":logging_proto_cc_impl",
         "//tensorflow/core:lib",
-        "//tensorflow/core:logger",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
@@ -73,7 +71,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc",
-        ":logging_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index 7b3e5c6397..d1d0bd9bc2 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include <utility>
 
 #include "absl/strings/str_cat.h"
-#include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/stream_executor/blas.h"
 #include "tensorflow/stream_executor/fft.h"
@@ -34,7 +33,6 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
-#include "tensorflow/stream_executor/logging.pb.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/rng.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
@@ -194,8 +192,6 @@ StreamExecutor::StreamExecutor(
     platform_kind_ = PlatformKind::kOpenCL;
   } else if (port::Lowercase(platform_->Name()) == "host") {
     platform_kind_ = PlatformKind::kHost;
-  } else {
-    platform_kind_ = PlatformKind::kInvalid;
   }
 }
 
@@ -221,31 +217,7 @@ StreamExecutor::~StreamExecutor() {
 port::Status StreamExecutor::Init(int device_ordinal,
                                   DeviceOptions device_options) {
   device_ordinal_ = device_ordinal;
-  TF_RETURN_IF_ERROR(
-      implementation_->Init(device_ordinal, std::move(device_options)));
-
-  if (platform_kind_ == PlatformKind::kCuda) {
-    CudaInfo info;
-
-    int cc_major, cc_minor;
-    GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor);
-    info.mutable_compute_capability()->set_major(cc_major);
-    info.mutable_compute_capability()->set_minor(cc_minor);
-
-    if (auto *dnn = AsDnn()) {
-      port::StatusOr<dnn::VersionInfo> version_or = dnn->GetVersion();
-      if (version_or.ok()) {
-        const auto &version = version_or.ValueOrDie();
-        info.mutable_cudnn_version()->set_major(version.major_version());
-        info.mutable_cudnn_version()->set_minor(version.minor_version());
-        info.mutable_cudnn_version()->set_patch(version.patch());
-      }
-    }
-
-    tensorflow::Logger::Singleton()->LogProto(info);
-  }
-
-  return port::Status::OK();
+  return implementation_->Init(device_ordinal, std::move(device_options));
 }
 
 port::Status StreamExecutor::Init() {
-- 
GitLab


From 8854a473a625ab59c66101573d770f0710a3076f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 17:08:40 -0800
Subject: [PATCH 717/873] This CL fixes gradient tapes involving strings, to
 make sure they return None for string targets instead of erroring.

PiperOrigin-RevId: 225916061
---
 tensorflow/python/eager/backprop.py           |  5 ++-
 tensorflow/python/eager/backprop_test.py      | 32 +++++++++++++++++++
 .../python/keras/layers/unified_gru_test.py   | 31 ++++++++++++++++++
 3 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 481f680f56..2c0f68365c 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -595,7 +595,7 @@ def _fast_fill(value, shape, dtype):
 
 def _zeros(shape, dtype):
   """Helper to return (possibly cached) zero tensors in eager mode."""
-  if dtype == dtypes.variant:
+  if dtype == dtypes.variant or dtype == dtypes.string:
     # TODO(apassos): need to save enough information about variant tensors to do
     # a zeros
     return None
@@ -618,6 +618,9 @@ def _zeros(shape, dtype):
 
 
 def _ones(shape, dtype):
+  if dtypes.as_dtype(dtype) == dtypes.string:
+    return None
+
   if not context.context().executing_eagerly():
     return array_ops.ones(shape, dtype)
 
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 477d18e214..22ae6f74cb 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -270,6 +270,38 @@ class BackpropTest(test.TestCase):
       z = y * y
     self.assertAllEqual(t.gradient([x, y, z], [x, y]), [1.0, 11.0])
 
+  def testTapeGradientStringTarget(self):
+    s = constant_op.constant('unknown', dtype=dtypes.string)
+    x = constant_op.constant(3.0)
+
+    with backprop.GradientTape() as t:
+      t.watch(x)
+      t.watch(s)
+    grads = t.gradient(s, x)
+    self.assertEqual(grads, None)
+
+  def testTapeNoOpGradientStringSourceAndTarget(self):
+    s = constant_op.constant('unknown', dtype=dtypes.string)
+
+    with backprop.GradientTape() as t:
+      t.watch(s)
+    grads = t.gradient(s, s)
+    self.assertEqual(grads, None)
+
+  def testTapeNoOpGradientWithMultiTargetMultiSourceIncludeString(self):
+    x = constant_op.constant(3.0)
+    y = constant_op.constant(5.0)
+    s = constant_op.constant('unknown', dtype=dtypes.string)
+
+    with backprop.GradientTape() as t:
+      t.watch(x)
+      t.watch(y)
+      t.watch(s)
+      z = y * y
+    grads = t.gradient([x, y, z, s], [x, y, s])
+    self.assertAllEqual(grads[:2], [1.0, 11.0])
+    self.assertEqual(grads[2], None)
+
   def testTapeNoOpOnVariableIsIdentity(self):
     v0 = resource_variable_ops.ResourceVariable(1.0)
     with backprop.GradientTape() as t:
diff --git a/tensorflow/python/keras/layers/unified_gru_test.py b/tensorflow/python/keras/layers/unified_gru_test.py
index 5d482b866d..87587228b6 100644
--- a/tensorflow/python/keras/layers/unified_gru_test.py
+++ b/tensorflow/python/keras/layers/unified_gru_test.py
@@ -27,6 +27,7 @@ import numpy as np
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
 from tensorflow.python import keras
+from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -36,6 +37,8 @@ from tensorflow.python.keras import testing_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import test
@@ -135,6 +138,34 @@ class UnifiedGRUTest(keras_parameterized.TestCase):
       assert l1.get_config() == l2.get_config()
 
 
+class GRULayerGradientTapeTest(test.TestCase):
+
+  @test_util.run_in_graph_and_eager_modes(config=_config)
+  def test_in_tape(self):
+    if not context.executing_eagerly():
+      self.skipTest('bloo')
+    time_steps = 10
+    embedding_size = 11
+    gru_unit_size = 12
+
+    gru = keras.layers.UnifiedGRU(gru_unit_size,
+                                  return_sequences=True,
+                                  return_state=True,
+                                  recurrent_activation='sigmoid',
+                                  recurrent_initializer='glorot_uniform')
+
+    x = random_ops.random_uniform([1, time_steps, embedding_size])
+    y = random_ops.random_uniform([1, gru_unit_size])
+
+    with backprop.GradientTape() as tape:
+      hidden_state = array_ops.zeros([1, gru_unit_size], dtype=dtypes.float32)
+      _, state = gru(x, initial_state=hidden_state)
+
+      loss = math_ops.reduce_mean(math_ops.square(state - y))
+
+    tape.gradient(loss, gru.variables)
+
+
 # TODO(scottzhu): Re-enable those tests in v2 mode once bugs attached are fixed.
 @test_util.run_v1_only
 class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
-- 
GitLab


From 087abe0a21151294edf409d3e0ea1bd7208839f0 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 17:14:18 -0800
Subject: [PATCH 718/873] Automated rollback of commit
 a333da7c6fb2e1f8f1c90eb03885e05c21959167

PiperOrigin-RevId: 225916734
---
 tensorflow/core/BUILD                         | 20 +++++++++++++++++--
 .../core/platform/default/build_config/BUILD  |  5 +++++
 .../core/platform/{ => default}/logger.cc     |  7 ++++---
 tensorflow/core/platform/logger.h             | 19 +-----------------
 4 files changed, 28 insertions(+), 23 deletions(-)
 rename tensorflow/core/platform/{ => default}/logger.cc (91%)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 258c46fbcb..8bf1480d33 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -445,8 +445,7 @@ cc_library(
 )
 
 cc_library(
-    name = "logger",
-    srcs = ["platform/logger.cc"],
+    name = "logger_interface",
     hdrs = ["platform/logger.h"],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
@@ -456,6 +455,23 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "default_logger",
+    srcs = ["platform/default/logger.cc"],
+    hdrs = ["platform/logger.h"],
+    deps = [
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core:logger_interface",
+    ],
+)
+
+cc_library(
+    name = "logger",
+    hdrs = ["platform/logger.h"],
+    visibility = ["//visibility:public"],
+    deps = ["//tensorflow/core/platform/default/build_config:logger"],
+)
+
 filegroup(
     name = "platform_env_hdrs",
     srcs = [
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index da1f66dc67..ee6936b372 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -275,3 +275,8 @@ alias(
     actual = ":mobile_srcs",
     visibility = ["//visibility:public"],
 )
+
+alias(
+    name = "logger",
+    actual = "//tensorflow/core:default_logger",
+)
diff --git a/tensorflow/core/platform/logger.cc b/tensorflow/core/platform/default/logger.cc
similarity index 91%
rename from tensorflow/core/platform/logger.cc
rename to tensorflow/core/platform/default/logger.cc
index 202840c808..54b1a1a67c 100644
--- a/tensorflow/core/platform/logger.cc
+++ b/tensorflow/core/platform/default/logger.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-Logger::FactoryFunc Logger::singleton_factory_ = []() -> Logger* {
+Logger* Logger::Singleton() {
   class DefaultLogger : public Logger {
    private:
     void DoLogProto(google::protobuf::Any* proto) override {
@@ -27,7 +27,8 @@ Logger::FactoryFunc Logger::singleton_factory_ = []() -> Logger* {
     }
     void DoFlush() override {}
   };
-  return new DefaultLogger();
-};
+  static Logger* instance = new DefaultLogger();
+  return instance;
+}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/logger.h b/tensorflow/core/platform/logger.h
index f0bfef4f2d..5d304bea63 100644
--- a/tensorflow/core/platform/logger.h
+++ b/tensorflow/core/platform/logger.h
@@ -26,22 +26,7 @@ namespace tensorflow {
 // log anything to a non-local place, e.g. a database.
 class Logger {
  public:
-  // The singleton is supposed to be used in the following steps:
-  // * At program start time, REGISTER_MOUDLE_INITIALIZER calls
-  //   SetSingletonFactory.
-  // * At some point in the program execution, Singleton() is called for the
-  //   first time, initializing the logger.
-  // * Succeeding calls to Singleton() return the initiailized logger.
-  using FactoryFunc = Logger* (*)();
-
-  static void SetSingletonFactory(FactoryFunc factory) {
-    singleton_factory_ = factory;
-  }
-
-  static Logger* Singleton() {
-    static Logger* instance = singleton_factory_();
-    return instance;
-  }
+  static Logger* Singleton();
 
   virtual ~Logger() = default;
 
@@ -59,8 +44,6 @@ class Logger {
  private:
   virtual void DoLogProto(google::protobuf::Any* proto) = 0;
   virtual void DoFlush() = 0;
-
-  static FactoryFunc singleton_factory_;
 };
 
 }  // namespace tensorflow
-- 
GitLab


From d2f49a097b245ec8fd2a6cb36825faebb75fcc47 Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Mon, 17 Dec 2018 17:17:30 -0800
Subject: [PATCH 719/873] Add default to the cluster resolver parameter in
 TPUStrategy

PiperOrigin-RevId: 225917115
---
 .../contrib/distribute/python/tpu_strategy.py | 24 ++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index d16166f627..4d2e1540eb 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -35,6 +35,7 @@ from tensorflow.python.distribute import device_util
 from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import reduce_util
 from tensorflow.python.distribute import values
+from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver_lib
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import constant_op
@@ -119,7 +120,10 @@ def _create_tpu_mirrored_variable(devices, real_mirrored_creator, *args,
 class TPUStrategy(distribute_lib.DistributionStrategy):
   """TPU distribution strategy implementation."""
 
-  def __init__(self, tpu_cluster_resolver, steps_per_run, num_cores=None):
+  def __init__(self,
+               tpu_cluster_resolver=None,
+               steps_per_run=None,
+               num_cores=None):
     """Initializes the TPUStrategy object.
 
     Args:
@@ -145,12 +149,26 @@ class TPUStrategy(distribute_lib.DistributionStrategy):
 class TPUExtended(distribute_lib.DistributionStrategyExtended):
   """Implementation of TPUStrategy."""
 
-  # Track what TPU devices have been initialized.
+  # Track what TPU devices have been initialized. This is *intentionally*
+  # shared across all instances of TPUExtended as we want to keep track of which
+  # devices are initialized globally.
   _initialized_devices = []
 
-  def __init__(self, container_strategy, tpu_cluster_resolver, steps_per_run,
+  def __init__(self,
+               container_strategy,
+               tpu_cluster_resolver=None,
+               steps_per_run=None,
                num_cores=None):
     super(TPUExtended, self).__init__(container_strategy)
+
+    if tpu_cluster_resolver is None:
+      tpu_cluster_resolver = resolver_lib.TPUClusterResolver("")
+
+    if steps_per_run is None:
+      # TODO(frankchn): Warn when we are being used by DS/Keras and this is
+      # not specified.
+      steps_per_run = 1
+
     self._tpu_cluster_resolver = tpu_cluster_resolver
     self._tpu_metadata = get_tpu_system_metadata(self._tpu_cluster_resolver)
     # TODO(sourabhbajaj): Change this from num_cores to metadata_override
-- 
GitLab


From 601a65952851d4ddb959a8481d9ea6e00c76bb04 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 18:15:28 -0800
Subject: [PATCH 720/873] Explicitly pass values kwarg as it is currently being
 treated as the default_name kwarg instead. This causes an exception to be
 thrown in eager mode.

PiperOrigin-RevId: 225923506
---
 tensorflow/contrib/distributions/python/ops/sample_stats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/distributions/python/ops/sample_stats.py b/tensorflow/contrib/distributions/python/ops/sample_stats.py
index 978e627d66..19e99e0380 100644
--- a/tensorflow/contrib/distributions/python/ops/sample_stats.py
+++ b/tensorflow/contrib/distributions/python/ops/sample_stats.py
@@ -300,7 +300,7 @@ def percentile(x,
       raise ValueError("Argument 'interpolation' must be in %s.  Found %s" %
                        (allowed_interpolations, interpolation))
 
-  with ops.name_scope(name, [x, q]):
+  with ops.name_scope(name, values=[x, q]):
     x = ops.convert_to_tensor(x, name="x")
     # Double is needed here and below, else we get the wrong index if the array
     # is huge along axis.
-- 
GitLab


From 1de6fc1d15f226ec9fb9c36f21d3f0a4c6c480e3 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Mon, 17 Dec 2018 18:31:16 -0800
Subject: [PATCH 721/873] Add a flag in clone model to share the non-input
 layers.

PiperOrigin-RevId: 225924868
---
 tensorflow/python/keras/models.py      |  48 +++++---
 tensorflow/python/keras/models_test.py | 148 +++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 14 deletions(-)

diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index 68d58bf66b..00ec5978be 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -43,7 +43,11 @@ model_from_yaml = saving.model_from_yaml
 model_from_json = saving.model_from_json
 
 
-def _clone_functional_model(model, input_tensors=None):
+def clone_layer(layer):
+  return layer.__class__.from_config(layer.get_config())
+
+
+def _clone_functional_model(model, input_tensors=None, share_weights=False):
   """Clone a functional `Model` instance.
 
   Model cloning is similar to calling a model on new inputs,
@@ -55,6 +59,11 @@ def _clone_functional_model(model, input_tensors=None):
       input_tensors: optional list of input tensors
           to build the model upon. If not provided,
           placeholders will be created.
+      share_weights: flag to enable sharing of non-input layers between the
+          cloned and original model. Note this still clones the input layers.
+          This is required when we create a per-replica copy of the model with
+          distribution strategy; we want the weights to be shared but still
+          feed inputs separately so we create new input layers.
 
   Returns:
       An instance of `Model` reproducing the behavior
@@ -127,10 +136,11 @@ def _clone_functional_model(model, input_tensors=None):
 
       # Get or create layer.
       if layer not in layer_map:
-        # Clone layer.
-        new_layer = layer.__class__.from_config(layer.get_config())
-        layer_map[layer] = new_layer
-        layer = new_layer
+        if not share_weights:
+          # Clone layer.
+          new_layer = clone_layer(layer)
+          layer_map[layer] = new_layer
+          layer = new_layer
       else:
         # Reuse previously cloned layer.
         layer = layer_map[layer]
@@ -177,7 +187,7 @@ def _clone_functional_model(model, input_tensors=None):
   return Model(input_tensors, output_tensors, name=model.name)
 
 
-def _clone_sequential_model(model, input_tensors=None):
+def _clone_sequential_model(model, input_tensors=None, share_weights=False):
   """Clone a `Sequential` model instance.
 
   Model cloning is similar to calling a model on new inputs,
@@ -189,6 +199,11 @@ def _clone_sequential_model(model, input_tensors=None):
       input_tensors: optional list of input tensors
           to build the model upon. If not provided,
           placeholders will be created.
+      share_weights: flag to enable sharing of non-input layers between the
+          cloned and original model. Note this still clones the input layers.
+          This is required when we create a per-replica copy of the model with
+          distribution strategy; we want the weights to be shared but still
+          feed inputs separately so we create new input layers.
 
   Returns:
       An instance of `Sequential` reproducing the behavior
@@ -203,23 +218,28 @@ def _clone_sequential_model(model, input_tensors=None):
                      'to be a `Sequential` model instance, '
                      'but got:', model)
 
-  def clone(layer):
-    return layer.__class__.from_config(layer.get_config())
-
   # Use model._layers to ensure that all layers are cloned. The model's layers
   # property will exclude the initial InputLayer (if it exists) in the model,
   # resulting in a different Sequential model structure.
   if input_tensors is None:
-    layers = [clone(layer) for layer in model._layers]
+    if share_weights:
+      # In preserve weights case we still want the input layers to be cloned.
+      layers = []
+      for layer in model._layers:
+        if isinstance(layer, InputLayer):
+          layers.append(clone_layer(layer))
+        else:
+          layers.append(layer)
+    else:
+      layers = [clone_layer(layer) for layer in model._layers]
     return Sequential(layers=layers, name=model.name)
   else:
     # If input tensors are provided, the original model's InputLayer is
     # overwritten with a different InputLayer.
     layers = [
-        clone(layer)
-        for layer in model._layers
-        if not isinstance(layer, InputLayer)
-    ]
+        layer for layer in model._layers if not isinstance(layer, InputLayer)]
+    if not share_weights:
+      layers = [clone_layer(layer) for layer in layers]
     if len(generic_utils.to_list(input_tensors)) != 1:
       raise ValueError('To clone a `Sequential` model, we expect '
                        ' at most one tensor '
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index c466d94fed..b497a9fc55 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -249,6 +249,154 @@ class TestModelCloning(test.TestCase):
       self.assertFalse(has_placeholder)
 
 
+class TestModelCloningLayerPreserveWeights(test.TestCase):
+
+  @test_util.run_deprecated_v1
+  def test_clone_sequential_model(self):
+    with self.cached_session():
+      val_a = np.random.random((10, 4))
+      val_out = np.random.random((10, 4))
+
+      model = sequential_model(False)
+
+    # Everything should work in a new session.
+    keras.backend.clear_session()
+
+    with self.cached_session():
+      # With placeholder creation
+      new_model = keras.models._clone_sequential_model(
+          model, share_weights=True)
+      # update ops from batch norm needs to be included
+      self.assertEqual(len(new_model.get_updates_for(new_model.inputs)), 2)
+      new_model.compile('rmsprop', 'mse')
+      new_model.train_on_batch(val_a, val_out)
+
+      # On top of new tensor
+      input_a = keras.Input(shape=(4,))
+      new_model = keras.models._clone_sequential_model(
+          model, input_tensors=input_a, share_weights=True)
+      self.assertEqual(len(new_model.get_updates_for(new_model.inputs)), 2)
+      new_model.compile('rmsprop', 'mse')
+      new_model.train_on_batch(val_a, val_out)
+
+      # On top of new, non-Keras tensor
+      input_a = keras.backend.variable(val_a)
+      new_model = keras.models._clone_sequential_model(
+          model, input_tensors=input_a, share_weights=True)
+      self.assertEqual(len(new_model.get_updates_for(new_model.inputs)), 2)
+      new_model.compile('rmsprop', 'mse')
+      new_model.train_on_batch(None, val_out)
+
+  @test_util.run_deprecated_v1
+  def test_clone_sequential_model_input_layer(self):
+
+    @test_util.run_deprecated_v1
+    def test_input_layer(include_inputs):
+      with self.cached_session():
+        val_a = np.random.random((10, 4))
+        model = sequential_model(include_inputs, include_inputs)
+        # Sanity check
+        self.assertEqual(
+            isinstance(model._layers[0], keras.layers.InputLayer),
+            include_inputs)
+        self.assertEqual(model._is_graph_network, include_inputs)
+
+      keras.backend.clear_session()
+      with self.cached_session():
+        # With placeholder creation -- clone model should have an InputLayer
+        # if the original model has one.
+        new_model = keras.models._clone_sequential_model(
+            model, share_weights=True)
+        self.assertEqual(
+            isinstance(new_model._layers[0], keras.layers.InputLayer),
+            include_inputs)
+        self.assertEqual(new_model._is_graph_network, model._is_graph_network)
+
+        # On top of new tensor  -- clone model should always have an InputLayer.
+        input_a = keras.Input(shape=(4,))
+        new_model = keras.models._clone_sequential_model(
+            model, input_tensors=input_a, share_weights=True)
+        self.assertIsInstance(new_model._layers[0], keras.layers.InputLayer)
+        self.assertTrue(new_model._is_graph_network)
+
+        # On top of new, non-Keras tensor  -- clone model should always have an
+        # InputLayer.
+        input_a = keras.backend.variable(val_a)
+        new_model = keras.models._clone_sequential_model(
+            model, input_tensors=input_a, share_weights=True)
+        self.assertIsInstance(new_model._layers[0], keras.layers.InputLayer)
+        self.assertTrue(new_model._is_graph_network)
+
+    test_input_layer(True)
+    test_input_layer(False)
+
+  @test_util.run_deprecated_v1
+  def test_clone_functional_model(self):
+    with self.cached_session():
+      val_a = np.random.random((10, 4))
+      val_b = np.random.random((10, 4))
+      val_out = np.random.random((10, 4))
+
+      input_a = keras.Input(shape=(4,))
+      input_b = keras.Input(shape=(4,))
+      dense_1 = keras.layers.Dense(4,)
+      dense_2 = keras.layers.Dense(4,)
+
+      x_a = dense_1(input_a)
+      x_a = keras.layers.Dropout(0.5)(x_a)
+      x_a = keras.layers.BatchNormalization()(x_a)
+      x_b = dense_1(input_b)
+      x_a = dense_2(x_a)
+      outputs = keras.layers.add([x_a, x_b])
+      model = keras.models.Model([input_a, input_b], outputs)
+
+    # Everything should work in a new session.
+    keras.backend.clear_session()
+
+    with self.cached_session():
+      # With placeholder creation
+      new_model = keras.models._clone_functional_model(
+          model, share_weights=True)
+      self.assertEqual(len(new_model.get_updates_for(new_model.inputs)), 2)
+      new_model.compile('rmsprop', 'mse')
+      new_model.train_on_batch([val_a, val_b], val_out)
+
+      # On top of new tensors
+      input_a = keras.Input(shape=(4,), name='a')
+      input_b = keras.Input(shape=(4,), name='b')
+      new_model = keras.models._clone_functional_model(
+          model, input_tensors=[input_a, input_b], share_weights=True)
+      self.assertEqual(len(new_model.get_updates_for(new_model.inputs)), 2)
+      new_model.compile('rmsprop', 'mse')
+      new_model.train_on_batch([val_a, val_b], val_out)
+
+      # On top of new, non-Keras tensors
+      input_a = keras.backend.variable(val_a)
+      input_b = keras.backend.variable(val_b)
+      new_model = keras.models._clone_functional_model(
+          model, input_tensors=[input_a, input_b], share_weights=True)
+      self.assertEqual(len(new_model.get_updates_for(new_model.inputs)), 2)
+      new_model.compile('rmsprop', 'mse')
+      new_model.train_on_batch(None, val_out)
+
+  @test_util.run_in_graph_and_eager_modes
+  def test_clone_functional_model_with_masking(self):
+    with self.cached_session():
+      x = np.array([[[1], [1]], [[0], [0]]])
+      inputs = keras.Input((2, 1))
+      outputs = keras.layers.Masking(mask_value=0)(inputs)
+      outputs = keras.layers.TimeDistributed(
+          keras.layers.Dense(1, kernel_initializer='one'))(outputs)
+      model = keras.Model(inputs, outputs)
+
+      model = keras.models._clone_functional_model(
+          model, share_weights=True)
+      model.compile(loss='mse', optimizer=adam.AdamOptimizer(0.01))
+      y = np.array([[[1], [1]], [[1], [1]]])
+      loss = model.train_on_batch(x, y)
+      self.assertEqual(float(loss), 0.)
+
+
 def _has_placeholder(graph):
   ops_types = [op.type for op in graph.get_operations()]
   return any('Placeholder' in s for s in ops_types)
-- 
GitLab


From b447374e6646498b18e9fe945af633dc28d566e3 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Mon, 17 Dec 2018 18:55:42 -0800
Subject: [PATCH 722/873] Allow setting an RpcCollectiveExecutorMgr.

This requires starting a correctly specified server, but not starting any
remote contexts.

At the moment, this supports multiple workers, and requires one-time setup before starting computation.

PiperOrigin-RevId: 225926944
---
 tensorflow/c/BUILD                            |  1 -
 tensorflow/c/c_api_experimental.cc            | 51 +++++++++++++++++++
 tensorflow/c/c_api_experimental.h             |  8 +++
 tensorflow/core/common_runtime/eager/BUILD    |  4 ++
 .../core/common_runtime/eager/context.cc      | 33 ++++++++++++
 .../core/common_runtime/eager/context.h       | 13 ++++-
 tensorflow/python/BUILD                       |  1 +
 tensorflow/python/eager/context.py            | 32 ++++++++++++
 tensorflow/python/pywrap_tfe.i                |  4 ++
 9 files changed, 145 insertions(+), 2 deletions(-)

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 25df970eca..9f00cc5de4 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -123,7 +123,6 @@ tf_cuda_library(
         "//tensorflow/c/eager:c_api",
         "//tensorflow/c/eager:c_api_internal",
         "//tensorflow/compiler/jit:flags",
-        "//tensorflow/contrib/tpu:all_ops",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index 81343f7bc0..f04b285037 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -8890,3 +8890,54 @@ TFE_TensorHandle* TFE_NewTensorHandleFromScalar(TF_DataType dtype_arg,
   std::memcpy(tensorflow::TensorCApi::Buffer(tensor)->data(), data, len);
   return new TFE_TensorHandle(tensor, nullptr, nullptr);
 }
+
+namespace {
+tensorflow::Status EnableCollectiveOps(const tensorflow::ServerDef& server_def,
+                                       TFE_Context* ctx) {
+  // We don't use the TF_RETURN_IF_ERROR macro directly since that destroys the
+  // server object (which currently CHECK-fails) and we miss the error, instead,
+  // we log the error, and then return to allow the user to see the error
+  // message.
+#define LOG_AND_RETURN_IF_ERROR(...)                    \
+  do {                                                  \
+    const ::tensorflow::Status _status = (__VA_ARGS__); \
+    if (TF_PREDICT_FALSE(!_status.ok())) {              \
+      LOG(ERROR) << _status.error_message();            \
+      return _status;                                   \
+    }                                                   \
+  } while (0);
+
+  std::unique_ptr<tensorflow::ServerInterface> server;
+  LOG_AND_RETURN_IF_ERROR(tensorflow::NewServer(server_def, &server));
+
+  tensorflow::GrpcServer* grpc_server =
+      dynamic_cast<tensorflow::GrpcServer*>(server.get());
+  if (grpc_server == nullptr) {
+    LOG_AND_RETURN_IF_ERROR(tensorflow::errors::Internal(
+        "Currently, TFE_NewContext only supports tensorflow::GrpcServer."));
+  }
+
+  LOG_AND_RETURN_IF_ERROR(grpc_server->Start());
+
+  LOG_AND_RETURN_IF_ERROR(ctx->context.StoreCollectiveOpsServer(
+      std::move(server), grpc_server->worker_env()->device_mgr,
+      grpc_server->worker_env()->collective_executor_mgr));
+
+  return tensorflow::Status::OK();
+#undef LOG_AND_RETURN_IF_ERROR
+}
+}  // namespace
+
+// Set server_def on the context, possibly updating it.
+TF_CAPI_EXPORT extern void TFE_EnableCollectiveOps(TFE_Context* ctx,
+                                                   const void* proto,
+                                                   size_t proto_len,
+                                                   TF_Status* status) {
+  tensorflow::ServerDef server_def;
+  if (!server_def.ParseFromArray(proto, proto_len)) {
+    status->status = tensorflow::errors::InvalidArgument(
+        "Invalid tensorflow.ServerDef protocol buffer");
+    return;
+  }
+  status->status = EnableCollectiveOps(server_def, ctx);
+}
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index cb7a146846..e6d04d0c2b 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -247,6 +247,14 @@ TF_CAPI_EXPORT int TF_PickUnusedPortOrDie(void);
 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandleFromScalar(
     TF_DataType dtype, void* scalar, size_t len);
 
+// Specify the server_def that enables collective ops.
+// This is different to the above function in that it doesn't create remote
+// contexts, and remotely executing ops is not possible. It just enables
+// communication for collective ops.
+TF_CAPI_EXPORT extern void TFE_EnableCollectiveOps(TFE_Context* ctx,
+                                                   const void* proto,
+                                                   size_t proto_len,
+                                                   TF_Status* status);
 #ifdef __cplusplus
 } /* end extern "C" */
 #endif
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index 86890ba07d..77e3246df0 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -61,6 +61,10 @@ tf_cuda_library(
             "//tensorflow/core:lib_internal",
             "//tensorflow/core:protos_all_cc",
             "//tensorflow/core:session_options",
+            "//tensorflow/core/distributed_runtime:collective_param_resolver_distributed",
+            "//tensorflow/core/distributed_runtime:device_resolver_distributed",
+            "//tensorflow/core/distributed_runtime:rpc_collective_executor_mgr",
+            "//tensorflow/core/distributed_runtime:worker_cache",
             "//tensorflow/core/distributed_runtime:server_lib",
             "//tensorflow/core/distributed_runtime:worker_session",
             "//tensorflow/core/distributed_runtime/eager:eager_client",
diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index 1727c04560..2212bda534 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -20,6 +20,9 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/device_resolver_local.h"
 #include "tensorflow/core/common_runtime/device_set.h"
 #include "tensorflow/core/common_runtime/process_util.h"
+#include "tensorflow/core/distributed_runtime/collective_param_resolver_distributed.h"
+#include "tensorflow/core/distributed_runtime/device_resolver_distributed.h"
+#include "tensorflow/core/distributed_runtime/rpc_collective_executor_mgr.h"
 #include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/lib/core/blocking_counter.h"
 #include "tensorflow/core/util/env_var.h"
@@ -364,6 +367,36 @@ Status EagerContext::GetClientAndContextID(Device* device,
   return Status::OK();
 }
 
+Status EagerContext::StoreCollectiveOpsServer(
+    std::unique_ptr<ServerInterface> server, DeviceMgr* device_mgr,
+    CollectiveExecutorMgrInterface* rpc_collective_executor_mgr) {
+  collective_executor_mgr_.reset(nullptr);
+  unowned_collective_executor_mgr_ = rpc_collective_executor_mgr;
+
+  local_device_manager_.reset(nullptr);
+  local_unowned_device_manager_ = device_mgr;
+
+  devices_ = local_unowned_device_manager_->ListDevices();
+  devices_map_.clear();
+
+  InitDeviceMapAndAsync();
+  ClearCaches();
+
+  pflr_.reset(new ProcessFunctionLibraryRuntime(
+      local_unowned_device_manager_, env_, TF_GRAPH_DEF_VERSION, &func_lib_def_,
+      {}, thread_pool_.get()));
+
+  // Memory leak!
+  if (server_ != nullptr) {
+    LOG(WARNING) << "Unable to destroy server_ object, so releasing instead. "
+                    "Servers don't support clean shutdown.";
+    server_.release();
+  }
+  server_ = std::move(server);
+
+  return Status::OK();
+}
+
 void EagerContext::InitializeRemote(
     std::unique_ptr<ServerInterface> server,
     std::unique_ptr<eager::EagerClientCache> remote_eager_workers,
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index cdef947893..5ff6b3ffbd 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -32,6 +32,7 @@ limitations under the License.
 #ifndef __ANDROID__
 #include "tensorflow/core/distributed_runtime/eager/eager_client.h"
 #include "tensorflow/core/distributed_runtime/server_lib.h"
+#include "tensorflow/core/distributed_runtime/worker_cache.h"
 #endif
 #include "tensorflow/core/framework/collective.h"
 #include "tensorflow/core/framework/log_memory.h"
@@ -148,10 +149,15 @@ class EagerContext {
   bool LogMemory() { return log_memory_; }
 
   Rendezvous* GetRendezvous() { return rendezvous_; }
+  CollectiveExecutorMgrInterface* collective_executor_mgr() {
+    return (collective_executor_mgr_ != nullptr)
+               ? collective_executor_mgr_.get()
+               : unowned_collective_executor_mgr_;
+  }
   std::unique_ptr<CollectiveExecutor::Handle> GetCollectiveExecutorHandle() {
     return std::unique_ptr<CollectiveExecutor::Handle>(
         new CollectiveExecutor::Handle(
-            collective_executor_mgr_->FindOrCreate(0), true /*inherit_ref*/));
+            collective_executor_mgr()->FindOrCreate(0), true /*inherit_ref*/));
   }
 
   const tensorflow::DeviceMgr* local_device_mgr() const {
@@ -204,6 +210,10 @@ class EagerContext {
     return active_remote_contexts_.find(context_id) !=
            active_remote_contexts_.end();
   }
+
+  Status StoreCollectiveOpsServer(
+      std::unique_ptr<ServerInterface> server, DeviceMgr* device_mgr,
+      CollectiveExecutorMgrInterface* rpc_collective_executor_mgr);
 #endif
 
   // If true, then tensors should be shipped across processes via the
@@ -280,6 +290,7 @@ class EagerContext {
   Env* const env_;
 
   std::unique_ptr<CollectiveExecutorMgrInterface> collective_executor_mgr_;
+  CollectiveExecutorMgrInterface* unowned_collective_executor_mgr_ = nullptr;
 
 #ifndef __ANDROID__
   void CloseRemoteContexts();
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index d851c229ac..156b4045b5 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -4112,6 +4112,7 @@ tf_py_wrap_cc(
         ":tf_session_helper",
         "//third_party/python_runtime:headers",
         "//tensorflow/c:c_api",
+        "//tensorflow/c:c_api_experimental",
         "//tensorflow/c:checkpoint_reader",
         "//tensorflow/c:python_api",
         "//tensorflow/c:tf_status_helper",
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index 848b300eba..cd43dc7ab2 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -265,6 +265,7 @@ class Context(object):
       execution_mode = SYNC
     self._execution_mode = execution_mode
     self._server_def = server_def
+    self._collective_ops_server_def = None
 
   # pylint: enable=redefined-outer-name
 
@@ -325,10 +326,17 @@ class Context(object):
         self._context_handle = pywrap_tensorflow.TFE_NewContext(opts)
       finally:
         pywrap_tensorflow.TFE_DeleteContextOptions(opts)
+      assert not (self._server_def and self._collective_ops_server_def), (
+          "Cannot enable remote execution as well as collective ops at the "
+          "moment. If this is important to you, please file an issue.")
       if self._server_def is not None:
         server_def_str = self._server_def.SerializeToString()
         pywrap_tensorflow.TFE_ContextSetServerDef(self._context_handle, 600,
                                                   server_def_str)
+      elif self._collective_ops_server_def is not None:
+        server_def_str = self._collective_ops_server_def.SerializeToString()
+        pywrap_tensorflow.TFE_EnableCollectiveOps(self._context_handle,
+                                                  server_def_str)
 
       self._initialize_devices()
 
@@ -370,6 +378,30 @@ class Context(object):
 
       self._initialize_devices()
 
+  def enable_collective_ops(self, server_def):
+    """Enable collective ops with an appropriate server_def.
+
+    If previously enabled, this cannot be re-enabled.
+
+    Args:
+      server_def: A tensorflow::ServerDef proto. Enables execution on remote
+        devices.
+
+    Raises:
+      ValueError: if server_def is None.
+    """
+    if not server_def:
+      raise ValueError("server_def is None.")
+    if not self._context_handle:
+      self._collective_ops_server_def = server_def
+    else:
+      server_def_str = server_def.SerializeToString()
+      pywrap_tensorflow.TFE_EnableCollectiveOps(self._context_handle,
+                                                server_def_str)
+
+      self._clear_caches()
+      self._initialize_devices()
+
   @property
   def _handle(self):
     ctx = self._context_handle
diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i
index adbce95c6f..733d471ca2 100755
--- a/tensorflow/python/pywrap_tfe.i
+++ b/tensorflow/python/pywrap_tfe.i
@@ -68,9 +68,12 @@ limitations under the License.
 %rename("%s") TFE_ContextEndStep;
 %rename("%s") TFE_Py_RegisterVSpace;
 %rename("%s") TFE_Py_EncodeArg;
+%rename("%s") TFE_EnableCollectiveOps;
+%rename("%s") TF_PickUnusedPortOrDie;
 
 %{
 #include "tensorflow/python/eager/pywrap_tfe.h"
+#include "tensorflow/c/c_api_experimental.h"
 %}
 
 %typemap(in) (const void* proto) {
@@ -226,6 +229,7 @@ limitations under the License.
 %native(TFE_Py_FastPathExecute) TFE_Py_FastPathExecute_C;
 
 %include "tensorflow/python/eager/pywrap_tfe.h"
+%include "tensorflow/c/c_api_experimental.h"
 
 // Clear all typemaps.
 %typemap(out) TF_DataType;
-- 
GitLab


From de84436a995549c63682e53f403421e1f0772479 Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Mon, 17 Dec 2018 19:41:10 -0800
Subject: [PATCH 723/873] Fix problems in env.h and kernels.h when included
 from a C translation unit

PiperOrigin-RevId: 225930499
---
 tensorflow/c/env.h     | 9 +++++----
 tensorflow/c/kernels.h | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h
index 15652353cd..73078fcbbc 100644
--- a/tensorflow/c/env.h
+++ b/tensorflow/c/env.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -24,14 +25,14 @@ limitations under the License.
 // --------------------------------------------------------------------------
 // C API for tensorflow::Env.
 
-struct TF_WritableFileHandle;
-struct TF_StringStream;
-struct TF_Thread;
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+typedef struct TF_WritableFileHandle TF_WritableFileHandle;
+typedef struct TF_StringStream TF_StringStream;
+typedef struct TF_Thread TF_Thread;
+
 typedef struct TF_FileStatistics {
   // The length of the file in bytes.
   int64_t length;
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index 1a91aa184f..cefc30bcdf 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -35,9 +35,9 @@ extern "C" {
 // `TF_RegisterKernelBuilder`, which will allow TF to construct user-provided
 // kernels when necessary.
 
-struct TF_KernelBuilder;
-struct TF_OpKernelConstruction;
-struct TF_OpKernelContext;
+typedef struct TF_KernelBuilder TF_KernelBuilder;
+typedef struct TF_OpKernelConstruction TF_OpKernelConstruction;
+typedef struct TF_OpKernelContext TF_OpKernelContext;
 
 // Allocates a new kernel builder and returns a pointer to it.
 //
-- 
GitLab


From a2b72676c51e1fb4186e4d6978877e5eb3bc204a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 19:41:42 -0800
Subject: [PATCH 724/873] Add HLO field for batch_group_converter

PiperOrigin-RevId: 225930532
---
 .../tf2xla/kernels/conv_op_helpers.cc         | 145 +++-------
 .../compiler/tf2xla/kernels/xla_conv_op.cc    |   2 +-
 tensorflow/compiler/xla/client/xla_builder.cc |  56 ++--
 tensorflow/compiler/xla/client/xla_builder.h  |  30 +-
 .../compiler/xla/g3doc/operation_semantics.md |  41 ++-
 tensorflow/compiler/xla/reference_util.cc     |  11 +-
 tensorflow/compiler/xla/service/BUILD         |   8 +-
 .../xla/service/algebraic_simplifier_test.cc  |  21 +-
 ...onvolution_feature_group_converter_test.cc |  14 +-
 ...rter.cc => convolution_group_converter.cc} | 258 ++++++++++++++++--
 ...verter.h => convolution_group_converter.h} |  25 +-
 tensorflow/compiler/xla/service/cpu/BUILD     |   2 +-
 .../xla/service/cpu/conv_canonicalization.cc  |   3 +-
 .../service/cpu/conv_canonicalization_test.cc |   8 +-
 .../compiler/xla/service/cpu/cpu_compiler.cc  |  13 +-
 .../dynamic_dimension_inference_test.cc       |   3 +-
 tensorflow/compiler/xla/service/gpu/BUILD     |   1 +
 .../service/gpu/cudnn_conv_rewriter_test.cc   |  87 +++---
 .../xla/service/gpu/nvptx_compiler.cc         |   8 +
 tensorflow/compiler/xla/service/hlo.proto     |   4 +-
 .../xla/service/hlo_creation_utils.cc         |   6 +-
 .../xla/service/hlo_evaluator_test.cc         |  17 +-
 .../xla/service/hlo_evaluator_typed_visitor.h |  11 +-
 .../compiler/xla/service/hlo_instruction.cc   |  24 +-
 .../compiler/xla/service/hlo_instruction.h    |   7 +-
 .../compiler/xla/service/hlo_instructions.cc  |   8 +-
 .../compiler/xla/service/hlo_instructions.h   |  14 +-
 tensorflow/compiler/xla/service/hlo_parser.cc |   9 +-
 .../compiler/xla/service/hlo_verifier.cc      |   4 +-
 .../compiler/xla/service/shape_inference.cc   |  23 +-
 .../compiler/xla/service/shape_inference.h    |   2 +-
 .../xla/service/shape_inference_test.cc       |  12 +-
 .../compiler/xla/service/transpose_folding.cc |   3 +-
 .../xla/service/transpose_folding_test.cc     |  28 +-
 .../compiler/xla/tests/convolution_test.cc    |   2 +-
 35 files changed, 609 insertions(+), 301 deletions(-)
 rename tensorflow/compiler/xla/service/{convolution_feature_group_converter.cc => convolution_group_converter.cc} (61%)
 rename tensorflow/compiler/xla/service/{convolution_feature_group_converter.h => convolution_group_converter.h} (58%)

diff --git a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
index 4124b258c7..b0bc764030 100644
--- a/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
+++ b/tensorflow/compiler/tf2xla/kernels/conv_op_helpers.cc
@@ -428,98 +428,47 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
   int n_dim = GetTensorBatchDimIndex(num_dims, attrs.data_format);
   int c_dim = GetTensorFeatureDimIndex(num_dims, attrs.data_format);
 
-  int64 total_spatial_size = 1;
-  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
-    total_spatial_size *= dims.input_size(i);
-  }
+  // The conversion logic below assumes that the data format is NHWC, so we also
+  // check that here.
+  bool use_batch_group_count =
+      filter_tensor_shape.dim_size(num_dims - 1) == 1 && attrs.depthwise &&
+      attrs.data_format == FORMAT_NHWC;
 
-  // We use this approach only for depthwise convolutions where feature counts
-  // are large but space dimensions are small. The conversion logic below
-  // assumes that the data format is NHWC, so we also check that here.
-  bool should_perform_depthwise_conv =
-      attrs.data_format == FORMAT_NHWC &&
-      (total_spatial_size < dims.in_depth) &&
-      filter_tensor_shape.dim_size(num_dims - 1) == 1 && attrs.depthwise;
-
-  int64 num_spatial_dims =
-      attrs.num_spatial_dims + (should_perform_depthwise_conv ? 1 : 0);
-
-  std::vector<std::pair<int64, int64>> padding(num_spatial_dims);
-  std::vector<int64> rhs_dilation(num_spatial_dims);
-  std::vector<int64> window_strides(num_spatial_dims);
-  std::vector<int64> ones(num_spatial_dims, 1);
-
-  if (should_perform_depthwise_conv) {
-    // This approach is similar to handling of grouped convolutions in
-    // the convolution_feature_group_converter.cc. Please refer to it for
-    // details.
-
-    // Add spatial dimension to the activation, and reshape.
-    std::vector<int64> activations_reshape_sizes, gradients_reshape_sizes;
-
-    activations_reshape_sizes.push_back(dims.batch_size);
-    gradients_reshape_sizes.push_back(dims.batch_size);
-    for (int i = 0; i < attrs.num_spatial_dims; i++) {
-      activations_reshape_sizes.push_back(dims.input_size(i));
-      gradients_reshape_sizes.push_back(dims.output_size(i));
-    }
-    activations_reshape_sizes.push_back(dims.in_depth);
-    activations_reshape_sizes.push_back(1);
-    gradients_reshape_sizes.push_back(dims.out_depth);
-    gradients_reshape_sizes.push_back(1);
-
-    activations = xla::Reshape(activations, activations_reshape_sizes);
-    gradients = xla::Reshape(gradients, gradients_reshape_sizes);
-
-    int64 new_spatial_dim = activations_reshape_sizes.size() - 1;
-
-    // Set the newly added dimension to be the batch.
-    dnums.set_input_batch_dimension(new_spatial_dim);
-    dnums.set_input_feature_dimension(c_dim);
-
-    // The gradients become the RHS of the convolution.
-    // The gradients have shape [batch, out_rows, out_cols, ..., out_depth, 1]
-    // where the batch becomes a spatial dimension, and 1 becomes
-    // the input feature for the convolution.
-    dnums.set_kernel_input_feature_dimension(new_spatial_dim);
-    dnums.set_kernel_output_feature_dimension(c_dim);
-
-    // Treat original batch dimension as a spatial dimension.
-    dnums.add_input_spatial_dimensions(n_dim);
-    dnums.add_kernel_spatial_dimensions(n_dim);
+  std::vector<std::pair<int64, int64>> padding(attrs.num_spatial_dims);
+  std::vector<int64> rhs_dilation(attrs.num_spatial_dims);
+  std::vector<int64> window_strides(attrs.num_spatial_dims);
+  std::vector<int64> ones(attrs.num_spatial_dims, 1);
+
+  // The activations (inputs) form the LHS of the convolution.
+  // Activations have shape: [batch, in_rows, in_cols, ..., in_depth]
+  // For the gradient computation, we flip the roles of the batch and
+  // feature dimensions.
+  // Each spatial entry has size in_depth * batch
+
+  // Swap n_dim and c_dim in the activations.
+  dnums.set_input_batch_dimension(c_dim);
+  dnums.set_input_feature_dimension(n_dim);
+
+  // The gradients become the RHS of the convolution.
+  // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
+  // where the batch becomes the input feature for the convolution.
+  dnums.set_kernel_input_feature_dimension(n_dim);
+  dnums.set_kernel_output_feature_dimension(c_dim);
+
+  // The dimension swap below is needed because filter shape is KH,KW,F,DM.
+  if (use_batch_group_count) {
+    dnums.set_output_batch_dimension(attrs.num_spatial_dims + 1);
+    dnums.set_output_feature_dimension(attrs.num_spatial_dims);
   } else {
-    // The activations (inputs) form the LHS of the convolution.
-    // Activations have shape: [batch, in_rows, in_cols, ..., in_depth]
-    // For the gradient computation, we flip the roles of the batch and
-    // feature dimensions.
-    // Each spatial entry has size in_depth * batch
-
-    // Swap n_dim and c_dim in the activations.
-    dnums.set_input_batch_dimension(c_dim);
-    dnums.set_input_feature_dimension(n_dim);
-
-    // The gradients become the RHS of the convolution.
-    // The gradients have shape [batch, out_rows, out_cols, ..., out_depth]
-    // where the batch becomes the input feature for the convolution.
-    dnums.set_kernel_input_feature_dimension(n_dim);
-    dnums.set_kernel_output_feature_dimension(c_dim);
+    dnums.set_output_batch_dimension(attrs.num_spatial_dims);
+    dnums.set_output_feature_dimension(attrs.num_spatial_dims + 1);
   }
 
-  dnums.set_output_batch_dimension(num_spatial_dims);
-  dnums.set_output_feature_dimension(num_spatial_dims + 1);
-
   // Tensorflow filter shape is [ H, W, ..., inC, outC ].
-  for (int i = 0; i < num_spatial_dims; ++i) {
+  for (int i = 0; i < attrs.num_spatial_dims; ++i) {
     dnums.add_output_spatial_dimensions(i);
   }
 
-  if (should_perform_depthwise_conv) {
-    // Set the right parameters for the newly created spatial dimension.
-    padding[0] = {0, 0};
-    rhs_dilation[0] = 1;
-    window_strides[0] = 1;
-  }
-
   for (int64 i = 0; i < attrs.num_spatial_dims; ++i) {
     int64 dim = GetTensorSpatialDimIndex(num_dims, attrs.data_format, i);
     dnums.add_input_spatial_dimensions(dim);
@@ -561,10 +510,9 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
     const int64 pad_before =
         attrs.padding == Padding::SAME ? std::max<int64>(pad_total / 2, 0) : 0;
 
-    int64 dim_being_operated = should_perform_depthwise_conv ? i + 1 : i;
-    padding[dim_being_operated] = {pad_before, pad_total - pad_before};
-    rhs_dilation[dim_being_operated] = dims.spatial_dims[i].stride;
-    window_strides[dim_being_operated] = attrs.dilations[dim];
+    padding[i] = {pad_before, pad_total - pad_before};
+    rhs_dilation[i] = dims.spatial_dims[i].stride;
+    window_strides[i] = attrs.dilations[dim];
   }
 
   // Besides padding the input, we will also expand output_rows to
@@ -575,19 +523,16 @@ xla::StatusOr<xla::XlaOp> MakeXlaBackpropFilterConvOp(
   //
   // This is done by specifying the window dilation factors in the
   // convolution HLO below.
-  filter_backprop = xla::ConvGeneralDilated(
-      activations, gradients, window_strides, padding,
-      /*lhs_dilation=*/ones, rhs_dilation, dnums,
-      /*feature_group_count=*/
-      should_perform_depthwise_conv ? dims.in_depth : 1);
 
-  if (should_perform_depthwise_conv) {
-    filter_backprop = xla::Reshape(filter_backprop, filter_shape.dimensions());
-  } else {
-    if (attrs.depthwise) {
-      filter_backprop = ContractFilterForDepthwiseBackprop(
-          filter_shape, filter_backprop, activations.builder());
-    }
+  filter_backprop = xla::ConvGeneralDilated(
+      activations, gradients, window_strides, padding, /*lhs_dilation=*/ones,
+      rhs_dilation, dnums,
+      /*feature_group_count=*/1,
+      /*batch_group_count=*/use_batch_group_count ? dims.in_depth : 1);
+
+  if (!use_batch_group_count && attrs.depthwise) {
+    filter_backprop = ContractFilterForDepthwiseBackprop(
+        filter_shape, filter_backprop, activations.builder());
   }
 
   return filter_backprop;
diff --git a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
index 4612f19971..b20adc592a 100644
--- a/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/xla_conv_op.cc
@@ -78,7 +78,7 @@ class XlaConvOp : public XlaOpKernel {
     xla::XlaOp output = xla::ConvGeneralDilated(
         context->Input(0), context->Input(1), window_strides, padding,
         lhs_dilation, rhs_dilation, dnums_, feature_group_count,
-        &precision_config_);
+        /*batch_group_count=*/1, &precision_config_);
     context->SetOutput(0, output);
   }
 
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 60df2ec395..6653ae0ca3 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -959,27 +959,29 @@ Status XlaBuilder::VerifyConvolution(
 
 XlaOp XlaBuilder::Conv(const XlaOp& lhs, const XlaOp& rhs,
                        absl::Span<const int64> window_strides, Padding padding,
-                       int64 feature_group_count,
+                       int64 feature_group_count, int64 batch_group_count,
                        const PrecisionConfig* precision_config) {
   return ConvWithGeneralDimensions(
       lhs, rhs, window_strides, padding,
       CreateDefaultConvDimensionNumbers(window_strides.size()),
-      feature_group_count, precision_config);
+      feature_group_count, batch_group_count, precision_config);
 }
 
 XlaOp XlaBuilder::ConvWithGeneralPadding(
     const XlaOp& lhs, const XlaOp& rhs, absl::Span<const int64> window_strides,
     absl::Span<const std::pair<int64, int64>> padding,
-    int64 feature_group_count, const PrecisionConfig* precision_config) {
+    int64 feature_group_count, int64 batch_group_count,
+    const PrecisionConfig* precision_config) {
   return ConvGeneral(lhs, rhs, window_strides, padding,
                      CreateDefaultConvDimensionNumbers(window_strides.size()),
-                     feature_group_count, precision_config);
+                     feature_group_count, batch_group_count, precision_config);
 }
 
 XlaOp XlaBuilder::ConvWithGeneralDimensions(
     const XlaOp& lhs, const XlaOp& rhs, absl::Span<const int64> window_strides,
     Padding padding, const ConvolutionDimensionNumbers& dimension_numbers,
-    int64 feature_group_count, const PrecisionConfig* precision_config) {
+    int64 feature_group_count, int64 batch_group_count,
+    const PrecisionConfig* precision_config) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, GetShape(lhs));
     TF_ASSIGN_OR_RETURN(const Shape& rhs_shape, GetShape(rhs));
@@ -1007,7 +1009,7 @@ XlaOp XlaBuilder::ConvWithGeneralDimensions(
                        MakePadding(base_area_dimensions, window_dimensions,
                                    window_strides, padding),
                        dimension_numbers, feature_group_count,
-                       precision_config);
+                       batch_group_count, precision_config);
   });
 }
 
@@ -1015,10 +1017,11 @@ XlaOp XlaBuilder::ConvGeneral(
     const XlaOp& lhs, const XlaOp& rhs, absl::Span<const int64> window_strides,
     absl::Span<const std::pair<int64, int64>> padding,
     const ConvolutionDimensionNumbers& dimension_numbers,
-    int64 feature_group_count, const PrecisionConfig* precision_config) {
+    int64 feature_group_count, int64 batch_group_count,
+    const PrecisionConfig* precision_config) {
   return ConvGeneralDilated(lhs, rhs, window_strides, padding, {}, {},
                             dimension_numbers, feature_group_count,
-                            precision_config);
+                            batch_group_count, precision_config);
 }
 
 XlaOp XlaBuilder::ConvGeneralDilated(
@@ -1026,7 +1029,8 @@ XlaOp XlaBuilder::ConvGeneralDilated(
     absl::Span<const std::pair<int64, int64>> padding,
     absl::Span<const int64> lhs_dilation, absl::Span<const int64> rhs_dilation,
     const ConvolutionDimensionNumbers& dimension_numbers,
-    int64 feature_group_count, const PrecisionConfig* precision_config) {
+    int64 feature_group_count, int64 batch_group_count,
+    const PrecisionConfig* precision_config) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
     TF_ASSIGN_OR_RETURN(const Shape& lhs_shape, GetShape(lhs));
@@ -1045,14 +1049,15 @@ XlaOp XlaBuilder::ConvGeneralDilated(
                         MakeWindow(window_dimensions, window_strides, padding,
                                    lhs_dilation, rhs_dilation));
 
-    TF_ASSIGN_OR_RETURN(Shape shape,
-                        ShapeInference::InferConvolveShape(
-                            lhs_shape, rhs_shape, feature_group_count,
-                            instr.window(), dimension_numbers));
+    TF_ASSIGN_OR_RETURN(
+        Shape shape, ShapeInference::InferConvolveShape(
+                         lhs_shape, rhs_shape, feature_group_count,
+                         batch_group_count, instr.window(), dimension_numbers));
     *instr.mutable_shape() = shape.ToProto();
 
     *instr.mutable_convolution_dimension_numbers() = dimension_numbers;
     instr.set_feature_group_count(feature_group_count);
+    instr.set_batch_group_count(batch_group_count);
 
     if (precision_config != nullptr) {
       *instr.mutable_precision_config() = *precision_config;
@@ -2786,38 +2791,42 @@ XlaOp DotGeneral(const XlaOp& lhs, const XlaOp& rhs,
 
 XlaOp Conv(const XlaOp& lhs, const XlaOp& rhs,
            absl::Span<const int64> window_strides, Padding padding,
-           int64 feature_group_count, const PrecisionConfig* precision_config) {
+           int64 feature_group_count, int64 batch_group_count,
+           const PrecisionConfig* precision_config) {
   return lhs.builder()->Conv(lhs, rhs, window_strides, padding,
-                             feature_group_count, precision_config);
+                             feature_group_count, batch_group_count,
+                             precision_config);
 }
 
 XlaOp ConvWithGeneralPadding(const XlaOp& lhs, const XlaOp& rhs,
                              absl::Span<const int64> window_strides,
                              absl::Span<const std::pair<int64, int64>> padding,
-                             int64 feature_group_count,
+                             int64 feature_group_count, int64 batch_group_count,
                              const PrecisionConfig* precision_config) {
   return lhs.builder()->ConvWithGeneralPadding(
-      lhs, rhs, window_strides, padding, feature_group_count, precision_config);
+      lhs, rhs, window_strides, padding, feature_group_count, batch_group_count,
+      precision_config);
 }
 
 XlaOp ConvWithGeneralDimensions(
     const XlaOp& lhs, const XlaOp& rhs, absl::Span<const int64> window_strides,
     Padding padding, const ConvolutionDimensionNumbers& dimension_numbers,
-    int64 feature_group_count, const PrecisionConfig* precision_config) {
+    int64 feature_group_count, int64 batch_group_count,
+    const PrecisionConfig* precision_config) {
   return lhs.builder()->ConvWithGeneralDimensions(
       lhs, rhs, window_strides, padding, dimension_numbers, feature_group_count,
-      precision_config);
+      batch_group_count, precision_config);
 }
 
 XlaOp ConvGeneral(const XlaOp& lhs, const XlaOp& rhs,
                   absl::Span<const int64> window_strides,
                   absl::Span<const std::pair<int64, int64>> padding,
                   const ConvolutionDimensionNumbers& dimension_numbers,
-                  int64 feature_group_count,
+                  int64 feature_group_count, int64 batch_group_count,
                   const PrecisionConfig* precision_config) {
   return lhs.builder()->ConvGeneral(lhs, rhs, window_strides, padding,
                                     dimension_numbers, feature_group_count,
-                                    precision_config);
+                                    batch_group_count, precision_config);
 }
 
 XlaOp ConvGeneralDilated(const XlaOp& lhs, const XlaOp& rhs,
@@ -2826,11 +2835,12 @@ XlaOp ConvGeneralDilated(const XlaOp& lhs, const XlaOp& rhs,
                          absl::Span<const int64> lhs_dilation,
                          absl::Span<const int64> rhs_dilation,
                          const ConvolutionDimensionNumbers& dimension_numbers,
-                         int64 feature_group_count,
+                         int64 feature_group_count, int64 batch_group_count,
                          const PrecisionConfig* precision_config) {
   return lhs.builder()->ConvGeneralDilated(
       lhs, rhs, window_strides, padding, lhs_dilation, rhs_dilation,
-      dimension_numbers, feature_group_count, precision_config);
+      dimension_numbers, feature_group_count, batch_group_count,
+      precision_config);
 }
 
 XlaOp Fft(const XlaOp& operand, FftType fft_type,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 098efb60f9..6e9b025e5d 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -387,28 +387,28 @@ class XlaBuilder {
 
   XlaOp Conv(const XlaOp& lhs, const XlaOp& rhs,
              absl::Span<const int64> window_strides, Padding padding,
-             int64 feature_group_count = 1,
+             int64 feature_group_count = 1, int64 batch_group_count = 1,
              const PrecisionConfig* precision_config = nullptr);
 
   XlaOp ConvWithGeneralPadding(
       const XlaOp& lhs, const XlaOp& rhs,
       absl::Span<const int64> window_strides,
       absl::Span<const std::pair<int64, int64>> padding,
-      int64 feature_group_count = 1,
+      int64 feature_group_count = 1, int64 batch_group_count = 1,
       const PrecisionConfig* precision_config = nullptr);
 
   XlaOp ConvWithGeneralDimensions(
       const XlaOp& lhs, const XlaOp& rhs,
       absl::Span<const int64> window_strides, Padding padding,
       const ConvolutionDimensionNumbers& dimension_numbers,
-      int64 feature_group_count = 1,
+      int64 feature_group_count = 1, int64 batch_group_count = 1,
       const PrecisionConfig* precision_config = nullptr);
 
   XlaOp ConvGeneral(const XlaOp& lhs, const XlaOp& rhs,
                     absl::Span<const int64> window_strides,
                     absl::Span<const std::pair<int64, int64>> padding,
                     const ConvolutionDimensionNumbers& dimension_numbers,
-                    int64 feature_group_count = 1,
+                    int64 feature_group_count = 1, int64 batch_group_count = 1,
                     const PrecisionConfig* precision_config = nullptr);
 
   XlaOp ConvGeneralDilated(const XlaOp& lhs, const XlaOp& rhs,
@@ -418,6 +418,7 @@ class XlaBuilder {
                            absl::Span<const int64> rhs_dilation,
                            const ConvolutionDimensionNumbers& dimension_numbers,
                            int64 feature_group_count = 1,
+                           int64 batch_group_count = 1,
                            const PrecisionConfig* precision_config = nullptr);
 
   XlaOp Fft(const XlaOp& operand, FftType fft_type,
@@ -881,23 +882,25 @@ class XlaBuilder {
                           const PrecisionConfig* precision_config);
   friend XlaOp Conv(const XlaOp& lhs, const XlaOp& rhs,
                     absl::Span<const int64> window_strides, Padding padding,
-                    int64 feature_group_count,
+                    int64 feature_group_count, int64 batch_group_count,
                     const PrecisionConfig* precision_config);
   friend XlaOp ConvWithGeneralPadding(
       const XlaOp& lhs, const XlaOp& rhs,
       absl::Span<const int64> window_strides,
       absl::Span<const std::pair<int64, int64>> padding,
-      int64 feature_group_count, const PrecisionConfig* precision_config);
+      int64 feature_group_count, int64 batch_group_count,
+      const PrecisionConfig* precision_config);
   friend XlaOp ConvWithGeneralDimensions(
       const XlaOp& lhs, const XlaOp& rhs,
       absl::Span<const int64> window_strides, Padding padding,
       const ConvolutionDimensionNumbers& dimension_numbers,
-      int64 feature_group_count, const PrecisionConfig* precision_config);
+      int64 feature_group_count, int64 batch_group_count,
+      const PrecisionConfig* precision_config);
   friend XlaOp ConvGeneral(const XlaOp& lhs, const XlaOp& rhs,
                            absl::Span<const int64> window_strides,
                            absl::Span<const std::pair<int64, int64>> padding,
                            const ConvolutionDimensionNumbers& dimension_numbers,
-                           int64 feature_group_count,
+                           int64 feature_group_count, int64 batch_group_count,
                            const PrecisionConfig* precision_config);
   friend XlaOp ConvGeneralDilated(
       const XlaOp& lhs, const XlaOp& rhs,
@@ -906,7 +909,8 @@ class XlaBuilder {
       absl::Span<const int64> lhs_dilation,
       absl::Span<const int64> rhs_dilation,
       const ConvolutionDimensionNumbers& dimension_numbers,
-      int64 feature_group_count, const PrecisionConfig* precision_config);
+      int64 feature_group_count, int64 batch_group_count,
+      const PrecisionConfig* precision_config);
   friend XlaOp Fft(const XlaOp& operand, FftType fft_type,
                    absl::Span<const int64> fft_length);
   friend XlaOp Infeed(XlaBuilder* builder, const Shape& shape,
@@ -1372,7 +1376,7 @@ XlaOp DotGeneral(const XlaOp& lhs, const XlaOp& rhs,
 // default convolution dimension numbers.
 XlaOp Conv(const XlaOp& lhs, const XlaOp& rhs,
            absl::Span<const int64> window_strides, Padding padding,
-           int64 feature_group_count = 1,
+           int64 feature_group_count = 1, int64 batch_group_count = 1,
            const PrecisionConfig* precision_config = nullptr);
 
 // Enqueues a convolution instruction onto the computation, with the caller
@@ -1381,6 +1385,7 @@ XlaOp ConvWithGeneralPadding(const XlaOp& lhs, const XlaOp& rhs,
                              absl::Span<const int64> window_strides,
                              absl::Span<const std::pair<int64, int64>> padding,
                              int64 feature_group_count = 1,
+                             int64 batch_group_count = 1,
                              const PrecisionConfig* precision_config = nullptr);
 
 // Enqueues a convolution instruction onto the computation, with the caller
@@ -1388,7 +1393,7 @@ XlaOp ConvWithGeneralPadding(const XlaOp& lhs, const XlaOp& rhs,
 XlaOp ConvWithGeneralDimensions(
     const XlaOp& lhs, const XlaOp& rhs, absl::Span<const int64> window_strides,
     Padding padding, const ConvolutionDimensionNumbers& dimension_numbers,
-    int64 feature_group_count = 1,
+    int64 feature_group_count = 1, int64 batch_group_count = 1,
     const PrecisionConfig* precision_config = nullptr);
 
 // Enqueues a convolution instruction onto the computation, with the caller
@@ -1397,7 +1402,7 @@ XlaOp ConvGeneral(const XlaOp& lhs, const XlaOp& rhs,
                   absl::Span<const int64> window_strides,
                   absl::Span<const std::pair<int64, int64>> padding,
                   const ConvolutionDimensionNumbers& dimension_numbers,
-                  int64 feature_group_count = 1,
+                  int64 feature_group_count = 1, int64 batch_group_count = 1,
                   const PrecisionConfig* precision_config = nullptr);
 
 // Enqueues a convolution instruction onto the computation, with the caller
@@ -1409,6 +1414,7 @@ XlaOp ConvGeneralDilated(const XlaOp& lhs, const XlaOp& rhs,
                          absl::Span<const int64> rhs_dilation,
                          const ConvolutionDimensionNumbers& dimension_numbers,
                          int64 feature_group_count = 1,
+                         int64 batch_group_count = 1,
                          const PrecisionConfig* precision_config = nullptr);
 
 // Enqueues an FFT instruction onto the computation, of the given type and
diff --git a/tensorflow/compiler/xla/g3doc/operation_semantics.md b/tensorflow/compiler/xla/g3doc/operation_semantics.md
index 002ebc31b9..9a9cd08c30 100644
--- a/tensorflow/compiler/xla/g3doc/operation_semantics.md
+++ b/tensorflow/compiler/xla/g3doc/operation_semantics.md
@@ -548,17 +548,23 @@ Computes a convolution of the kind used in neural networks. Here, a convolution
 can be thought of as a n-dimensional window moving across a n-dimensional base
 area and a computation is performed for each possible position of the window.
 
-| Arguments             | Type                 | Semantics                     |
-| --------------------- | -------------------- | ----------------------------- |
-| `lhs`                 | `XlaOp`              | rank n+2 array of inputs      |
-| `rhs`                 | `XlaOp`              | rank n+2 array of kernel      |
-:                       :                      : weights                       :
-| `window_strides`      | `ArraySlice<int64>`  | n-d array of kernel strides   |
-| `padding`             | `ArraySlice<         | n-d array of (low, high)      |
-:                       : pair<int64, int64>>` : padding                       :
-| `lhs_dilation`        | `ArraySlice<int64>`  | n-d lhs dilation factor array |
-| `rhs_dilation`        | `ArraySlice<int64>`  | n-d rhs dilation factor array |
-| `feature_group_count` | int64                | the number of feature groups  |
+| Arguments             | Type                     | Semantics                |
+| --------------------- | ------------------------ | ------------------------ |
+| `lhs`                 | `XlaOp`                  | rank n+2 array of inputs |
+| `rhs`                 | `XlaOp`                  | rank n+2 array of kernel |
+:                       :                          : weights                  :
+| `window_strides`      | `ArraySlice<int64>`      | n-d array of kernel      |
+:                       :                          : strides                  :
+| `padding`             | `ArraySlice< pair<int64, | n-d array of (low, high) |
+:                       : int64>>`                 : padding                  :
+| `lhs_dilation`        | `ArraySlice<int64>`      | n-d lhs dilation factor  |
+:                       :                          : array                    :
+| `rhs_dilation`        | `ArraySlice<int64>`      | n-d rhs dilation factor  |
+:                       :                          : array                    :
+| `feature_group_count` | int64                    | the number of feature    |
+:                       :                          : groups                   :
+| `batch_group_count`   | int64                    | the number of batch      |
+:                       :                          : groups                   :
 
 Let n be the number of spatial dimensions. The `lhs` argument is a rank n+2
 array describing the base area. This is called the input, even though of course
@@ -628,12 +634,21 @@ input feature dimension, and the filter would be reshaped from
 `[filter_height, filter_width, 1, in_channels * channel_multiplier]`. For more
 details, see `tf.nn.depthwise_conv2d`.
 
+The `batch_group_count` (default value 1) argument can be used for depthwise
+filters during backpropagation. `batch_group_count` needs to be a divisor of the
+size of the `lhs` batch dimension. If `batch_group_count` is greater than 1, it
+means that conceptually the output batch dimension is split evenely in
+`batch_group_count` groups, such that each group consists of a consecutive
+subsequence of batches. Each output batch element is the reduced value of the
+batch group size.
+
 The output shape has these dimensions, in this order:
 
-*   `batch`: Same size as `batch` on the input (`lhs`).
+*   `batch`: The size of this dimension times `batch_group_count` should equal
+    the size of the `batch` dimension in lhs.
 *   `z`: Same size as `output-z` on the kernel (`rhs`).
 *   `spatial_dims`: One value for each valid placement of the convolutional
-window.
+    window.
 
 The valid placements of the convolutional window are determined by the strides
 and the size of the base area after padding.
diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc
index a27e2005da..92f28a9f8a 100644
--- a/tensorflow/compiler/xla/reference_util.cc
+++ b/tensorflow/compiler/xla/reference_util.cc
@@ -527,10 +527,11 @@ ReferenceUtil::ConvArray4DGeneralDimensionsDilated(
   dim2.set_base_dilation(lhs_dilation.second);
   *window.add_dimensions() = dim2;
 
-  const Shape& shape = ShapeInference::InferConvolveShape(
-                           lhs_literal.shape(), rhs_literal.shape(),
-                           /*feature_group_count=*/1, window, dnums)
-                           .ConsumeValueOrDie();
+  const Shape& shape =
+      ShapeInference::InferConvolveShape(
+          lhs_literal.shape(), rhs_literal.shape(),
+          /*feature_group_count=*/1, /*batch_group_count=*/1, window, dnums)
+          .ConsumeValueOrDie();
 
   HloInstruction* lhs_instruction =
       b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_literal)));
@@ -542,7 +543,7 @@ ReferenceUtil::ConvArray4DGeneralDimensionsDilated(
       /*new_size=*/2, PrecisionConfig::DEFAULT);
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, precision_config));
+      /*batch_group_count=*/1, window, dnums, precision_config));
   HloModuleConfig config;
   HloModule module("ReferenceUtil", config);
   auto computation = module.AddEntryComputation(b.Build());
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 55cadfdec6..d8736c8196 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1700,9 +1700,9 @@ tf_cc_test(
 )
 
 cc_library(
-    name = "convolution_feature_group_converter",
-    srcs = ["convolution_feature_group_converter.cc"],
-    hdrs = ["convolution_feature_group_converter.h"],
+    name = "convolution_group_converter",
+    srcs = ["convolution_group_converter.cc"],
+    hdrs = ["convolution_group_converter.h"],
     deps = [
         ":hlo",
         ":hlo_pass",
@@ -1724,7 +1724,7 @@ tf_cc_test(
     size = "small",
     srcs = ["convolution_feature_group_converter_test.cc"],
     deps = [
-        ":convolution_feature_group_converter",
+        ":convolution_group_converter",
         ":hlo",
         ":hlo_matchers",
         ":hlo_parser",
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index cfb4c48277..e6015370aa 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -1273,7 +1273,7 @@ TEST_F(AlgebraicSimplifierTest, ZeroSizedConvolution) {
   // Create add computation.
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {3, 3, 3}), lhs, rhs, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m->AddEntryComputation(builder.Build());
   HloPassFix<AlgebraicSimplifier> simplifier(default_options_);
   EXPECT_THAT(m->entry_computation()->root_instruction(),
@@ -2971,11 +2971,11 @@ TEST_P(ConvInputPaddingTest, DoTest) {
           .ValueOrDie();
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeInference::InferConvolveShape(lhs_pad->shape(), filter->shape(),
-                                         /*feature_group_count=*/1, window,
-                                         dnums)
+                                         /*feature_group_count=*/1,
+                                         /*batch_group_count=*/1, window, dnums)
           .ValueOrDie(),
-      lhs_pad, filter, /*feature_group_count=*/1, window, dnums,
-      DefaultPrecisionConfig(2)));
+      lhs_pad, filter, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      window, dnums, DefaultPrecisionConfig(2)));
   auto module = CreateNewVerifiedModule();
   module->AddEntryComputation(builder.Build());
 
@@ -3088,11 +3088,11 @@ TEST_P(ConvFilterPaddingTest, DoIt) {
 
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeInference::InferConvolveShape(input->shape(), rhs_pad->shape(),
-                                         /*feature_group_count=*/1, window,
-                                         dnums)
+                                         /*feature_group_count=*/1,
+                                         /*batch_group_count=*/1, window, dnums)
           .ValueOrDie(),
-      input, rhs_pad, /*feature_group_count=*/1, window, dnums,
-      precision_config));
+      input, rhs_pad, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      window, dnums, precision_config));
 
   auto module = CreateNewVerifiedModule();
   module->AddEntryComputation(builder.Build());
@@ -3240,7 +3240,8 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) {
 
     b.AddInstruction(HloInstruction::CreateConvolve(
         out_shape, input, filter,
-        /*feature_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
+        /*feature_group_count=*/1, /*batch_group_count=*/1, window, dnums,
+        DefaultPrecisionConfig(2)));
 
     // TODO(b/80488902): verify this module.
     auto module = CreateNewUnverifiedModule();
diff --git a/tensorflow/compiler/xla/service/convolution_feature_group_converter_test.cc b/tensorflow/compiler/xla/service/convolution_feature_group_converter_test.cc
index e6bf2143a2..d58f157242 100644
--- a/tensorflow/compiler/xla/service/convolution_feature_group_converter_test.cc
+++ b/tensorflow/compiler/xla/service/convolution_feature_group_converter_test.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/convolution_feature_group_converter.h"
+#include "tensorflow/compiler/xla/service/convolution_group_converter.h"
 
 #include <memory>
 #include <string>
@@ -30,10 +30,10 @@ limitations under the License.
 namespace xla {
 namespace {
 
-using ConvolutionFeatureGroupConverterTest = HloTestBase;
+using ConvolutionGroupConverterTest = HloTestBase;
 namespace op = testing::opcode_matchers;
 
-TEST_F(ConvolutionFeatureGroupConverterTest,
+TEST_F(ConvolutionGroupConverterTest,
        ConvertFeatureGroupCountEqualToInputFeatureDim) {
   string hlo_string = R"(HloModule Convolve1D1Window_0_module
 
@@ -49,7 +49,8 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,2], filter: f32[1,1,2]) -> f32[1,2
   auto computation = module->entry_computation();
   HloInstruction* root = computation->root_instruction();
   EXPECT_EQ(root->opcode(), HloOpcode::kConvolution);
-  ConvolutionFeatureGroupConverter converter;
+  ConvolutionGroupConverter converter(nullptr, /*convert_batch_groups_only=*/
+                                      false);
   ASSERT_TRUE(converter.Run(module.get()).ValueOrDie());
   root = computation->root_instruction();
   // Make sure the convolution is converted to one with feature_group_count = 1.
@@ -63,7 +64,7 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,2], filter: f32[1,1,2]) -> f32[1,2
                          op::Broadcast(op::Constant())));
 }
 
-TEST_F(ConvolutionFeatureGroupConverterTest,
+TEST_F(ConvolutionGroupConverterTest,
        ConvertFeatureGroupCountDivisorOfInputFeatureDim) {
   string hlo_string = R"(HloModule Convolve1D1Window_0_module
 
@@ -79,7 +80,8 @@ ENTRY %Convolve1D1Window_0.v3 (input: f32[1,2,4], filter: f32[1,2,2]) -> f32[1,2
   auto computation = module->entry_computation();
   HloInstruction* root = computation->root_instruction();
   EXPECT_EQ(root->opcode(), HloOpcode::kConvolution);
-  ConvolutionFeatureGroupConverter converter;
+  ConvolutionGroupConverter converter(nullptr, /*convert_batch_groups_only=*/
+                                      false);
   ASSERT_TRUE(converter.Run(module.get()).ValueOrDie());
   root = computation->root_instruction();
   // Make sure the convolution is replaced with a concatenate.
diff --git a/tensorflow/compiler/xla/service/convolution_feature_group_converter.cc b/tensorflow/compiler/xla/service/convolution_group_converter.cc
similarity index 61%
rename from tensorflow/compiler/xla/service/convolution_feature_group_converter.cc
rename to tensorflow/compiler/xla/service/convolution_group_converter.cc
index 95c7724c3c..7a24faec17 100644
--- a/tensorflow/compiler/xla/service/convolution_feature_group_converter.cc
+++ b/tensorflow/compiler/xla/service/convolution_group_converter.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/xla/service/convolution_feature_group_converter.h"
+#include "tensorflow/compiler/xla/service/convolution_group_converter.h"
 
 #include <memory>
 #include <vector>
@@ -50,8 +50,12 @@ class ConvolutionVisitor : public DfsHloVisitorWithDefault {
 
   Status HandleConvolution(HloInstruction* convolution) override;
 
+  Status HandleBatchGroupCount(HloInstruction* convolution);
+
   // Runs the visitor on a computation.
   static bool Run(HloComputation* computation,
+                  std::function<bool(HloInstruction*)> is_cost_viable,
+                  bool convert_batch_groups_only,
                   bool canonicalize_depthwise_filter);
 
   // Returns whether any convolution ops were rewritten.
@@ -60,10 +64,15 @@ class ConvolutionVisitor : public DfsHloVisitorWithDefault {
   ~ConvolutionVisitor() override = default;
 
  private:
-  explicit ConvolutionVisitor(HloComputation* computation,
-                              bool canonicalize_depthwise_filter = false)
+  explicit ConvolutionVisitor(
+      HloComputation* computation,
+      std::function<bool(HloInstruction*)> is_cost_viable,
+      bool convert_batch_groups_only,
+      bool canonicalize_depthwise_filter = false)
       : computation_(computation),
-        filter_expansion_(!canonicalize_depthwise_filter) {}
+        filter_expansion_(!canonicalize_depthwise_filter),
+        convert_batch_groups_only_(convert_batch_groups_only),
+        is_cost_viable_(is_cost_viable) {}
 
   // Current HloComputation instance the ConvolutionVisitor is traversing.
   HloComputation* computation_;
@@ -73,11 +82,21 @@ class ConvolutionVisitor : public DfsHloVisitorWithDefault {
 
   // Whether filter expansion is required.
   bool filter_expansion_;
+
+  // Decides whether to convert batch groups or feature groups.
+  bool convert_batch_groups_only_;
+
+  // std::function<std::vector<LloValue*>(int64, int64)> chunk_fetcher
+  std::function<bool(HloInstruction*)> is_cost_viable_;
 };
 
-bool ConvolutionVisitor::Run(HloComputation* computation,
-                             bool canonicalize_depthwise_filter) {
-  ConvolutionVisitor visitor(computation, canonicalize_depthwise_filter);
+bool ConvolutionVisitor::Run(
+    HloComputation* computation,
+    std::function<bool(HloInstruction*)> is_cost_viable,
+    bool convert_batch_groups_only, bool canonicalize_depthwise_filter) {
+  ConvolutionVisitor visitor(computation, is_cost_viable,
+                             convert_batch_groups_only,
+                             canonicalize_depthwise_filter);
   TF_CHECK_OK(computation->Accept(&visitor));
   return visitor.changed_;
 }
@@ -176,18 +195,206 @@ HloInstruction* GetExpandedFilterMask(
       predicate_shape, HloOpcode::kEq, broadcasted_mask1, broadcasted_mask2));
 }
 
-Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
-  int64 group_count = convolution->feature_group_count();
-  if (group_count == 1) {
+// This function handles batch_group_counts which are relevant only for
+// depthwise backprop filter convolutions.
+Status ConvolutionVisitor::HandleBatchGroupCount(HloInstruction* convolution) {
+  auto dim_numbers = convolution->convolution_dimension_numbers();
+  auto activation = convolution->mutable_operand(0);
+  auto filter = convolution->mutable_operand(1);
+  int64 batch_group_count = convolution->batch_group_count();
+
+  if (batch_group_count == 1) {
     return Status::OK();
   }
-  auto filter = convolution->mutable_operand(1);
-  changed_ = true;
+
+  VLOG(2) << "Dealing with batch_group_count " << batch_group_count << "\n";
+
   auto add = [&](std::unique_ptr<HloInstruction> inst) {
     return computation_->AddInstruction(std::move(inst));
   };
 
+  int64 input_batch_dimension = dim_numbers.input_batch_dimension();
+  int64 input_feature_dimension = dim_numbers.input_feature_dimension();
+  int64 output_batch_dimension = dim_numbers.output_batch_dimension();
+  int64 output_feature_dimension = dim_numbers.output_feature_dimension();
+  int64 kernel_input_feature_dimension =
+      dim_numbers.kernel_input_feature_dimension();
+
+  int64 input_batch = activation->shape().dimensions(input_batch_dimension);
+
+  // We are not yet supporting batch_group of sizes greater than 1.
+  TF_RET_CHECK(input_batch == batch_group_count);
+
+  if (is_cost_viable_(convolution)) {
+    // Add a dimension to the activation, and reshape.
+    Shape reshaped_activation_shape = activation->shape();
+    ShapeUtil::AppendMajorDimension(1, &reshaped_activation_shape);
+
+    activation = add(
+        HloInstruction::CreateReshape(reshaped_activation_shape, activation));
+
+    // Add a dimension to the filter, and reshape.
+    Shape reshaped_filter_shape = filter->shape();
+    ShapeUtil::AppendMajorDimension(1, &reshaped_filter_shape);
+
+    filter = add(HloInstruction::CreateReshape(reshaped_filter_shape, filter));
+
+    int64 new_spatial_dim = reshaped_activation_shape.dimensions().size() - 1;
+
+    Shape new_output_shape = convolution->shape();
+    ShapeUtil::AppendMajorDimension(1, &new_output_shape);
+
+    int64 input_feature =
+        activation->shape().dimensions(input_feature_dimension);
+
+    // The code below edits convolution dimension numbers. Please refer to
+    // conv_op_helpers.cc to find how the dimensions were set up originally.
+
+    // Effectively, the new input batch becomes 1, and so does the kernel
+    // input feature. The original input batch now becomes a spatial dimension.
+    // The output batch (remember that the output is the new kernel for in
+    // backprop) becomes a spatial dimension too.
+
+    dim_numbers.set_input_batch_dimension(new_spatial_dim);
+    dim_numbers.set_input_feature_dimension(input_batch_dimension);
+    dim_numbers.set_kernel_input_feature_dimension(new_spatial_dim);
+
+    dim_numbers.add_input_spatial_dimensions(input_feature_dimension);
+    dim_numbers.add_kernel_spatial_dimensions(kernel_input_feature_dimension);
+
+    dim_numbers.add_output_spatial_dimensions(output_batch_dimension);
+    dim_numbers.set_output_batch_dimension(new_spatial_dim);
+
+    // Add window for the new spatial dimension.
+    Window new_window = convolution->window();
+    auto* dim = new_window.add_dimensions();
+    dim->set_window_dilation(1);
+    dim->set_base_dilation(1);
+    dim->set_stride(1);
+    dim->set_size(input_feature);
+
+    auto new_convolution = add(HloInstruction::CreateConvolve(
+        new_output_shape, activation, filter,
+        /*feature_group_count=*/batch_group_count, /*batch_group_count=*/1,
+        new_window, dim_numbers, convolution->precision_config()));
+
+    // Delete the extra spatial dimension, and reshape.
+    Shape reshaped_convolution_shape = ShapeUtil::DeleteDimension(
+        new_spatial_dim - 1, new_convolution->shape());
+    auto reshaped_convolution = HloInstruction::CreateReshape(
+        reshaped_convolution_shape, new_convolution);
+
+    TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction(
+        convolution, std::move(reshaped_convolution)));
+
+    changed_ = true;
+  } else {
+    // We first obtain the expanded the filter (which is the convolution
+    // output). The batch dimension is the expanded one (which originally
+    // represents kernel input feature dimension). We mask the filter to zero
+    // out the expanded regions. Next we reduce the filter in the batch
+    // dimension to obtain the original filter size.
+
+    HloInstruction* filter_mask =
+        GetExpandedFilterMask(convolution->shape(), output_batch_dimension,
+                              output_feature_dimension, batch_group_count, add);
+    auto expanded_filter_shape = ExpandedFilterShape(
+        convolution->shape(), batch_group_count, output_batch_dimension);
+
+    auto new_convolution = add(HloInstruction::CreateConvolve(
+        expanded_filter_shape, activation, filter,
+        /*feature_group_count=*/1, /*batch_group_count=*/1,
+        convolution->window(), dim_numbers, convolution->precision_config()));
+
+    auto zero = add(HloInstruction::CreateConstant(
+        LiteralUtil::Zero(expanded_filter_shape.element_type())));
+    auto zero_filter =
+        add(HloInstruction::CreateBroadcast(expanded_filter_shape, zero, {}));
+
+    auto new_filter = add(HloInstruction::CreateTernary(
+        expanded_filter_shape, HloOpcode::kSelect, filter_mask, new_convolution,
+        zero_filter));
+
+    auto zero_literal = LiteralUtil::CreateR0(0.0f);
+    TF_ASSIGN_OR_RETURN(zero_literal, zero_literal.Convert(F32));
+    auto zero_scalar =
+        add(HloInstruction::CreateConstant(std::move(zero_literal)));
+
+    auto reduce_function = [&]() -> HloComputation* {
+      HloComputation::Builder b("add_computation");
+      Shape shape = ShapeUtil::MakeShape(F32, {});
+      auto lhs =
+          b.AddInstruction(HloInstruction::CreateParameter(0, shape, "lhs"));
+      auto rhs =
+          b.AddInstruction(HloInstruction::CreateParameter(1, shape, "rhs"));
+      auto scalar_op = b.AddInstruction(
+          HloInstruction::CreateBinary(shape, HloOpcode::kAdd, lhs, rhs));
+      return computation_->parent()->AddEmbeddedComputation(b.Build(scalar_op));
+    };
+
+    // Ensure that data input to reduce window is of type F32.
+    if (primitive_util::BitWidth(new_filter->shape().element_type()) <
+        primitive_util::BitWidth(F32)) {
+      Shape convert_shape = new_filter->shape();
+      convert_shape.set_element_type(F32);
+      new_filter =
+          add(HloInstruction::CreateBitcastConvert(convert_shape, new_filter));
+    }
+
+    auto reduce_window_shape = new_convolution->shape();
+    reduce_window_shape.set_dimensions(output_batch_dimension, 1);
+
+    // Create the reduce window.
+    Window window;
+    for (int64 i = 0; i < new_convolution->shape().dimensions_size(); ++i) {
+      auto* dim = window.add_dimensions();
+      dim->set_padding_low(0);
+      dim->set_padding_high(0);
+      dim->set_window_dilation(1);
+      dim->set_base_dilation(1);
+      if (i == output_batch_dimension) {
+        dim->set_stride(batch_group_count);
+        dim->set_size(batch_group_count);
+      } else {
+        dim->set_stride(1);
+        dim->set_size(1);
+      }
+    }
+    auto reduce_window = add(HloInstruction::CreateReduceWindow(
+        reduce_window_shape, new_filter, zero_scalar, window,
+        reduce_function()));
+
+    Shape convert_back_shape = reduce_window->shape();
+    convert_back_shape.set_element_type(activation->shape().element_type());
+
+    // Convert reduced data back to the original data type.
+    auto reduce_window_converted =
+        HloInstruction::CreateBitcastConvert(convert_back_shape, reduce_window);
+
+    TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction(
+        convolution, std::move(reduce_window_converted)));
+  }
+
+  return Status::OK();
+}
+
+Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
+  if (convert_batch_groups_only_) {
+    return HandleBatchGroupCount(convolution);
+  }
+
+  auto add = [&](std::unique_ptr<HloInstruction> inst) {
+    return computation_->AddInstruction(std::move(inst));
+  };
+
+  int64 group_count = convolution->feature_group_count();
+  if (group_count == 1) {
+    return Status::OK();
+  }
+
+  changed_ = true;
   auto dim_numbers = convolution->convolution_dimension_numbers();
+  auto filter = convolution->mutable_operand(1);
   int64 kernel_input_feature_dim = dim_numbers.kernel_input_feature_dimension();
   int64 group_size = filter->shape().dimensions(kernel_input_feature_dim);
   int64 kernel_output_feature_dim =
@@ -205,6 +412,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
     // If the code generator handles depthwise separable convolutions
     // inherently, then no filter expansion is needed.
     if (!filter_expansion_ && depthwise_separable) {
+      changed_ = false;
       return Status::OK();
     }
     // We want to repeat 'filter' in the 'input_feature_dim' dimension
@@ -233,8 +441,8 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
 
     auto new_convolution = HloInstruction::CreateConvolve(
         convolution->shape(), convolution->mutable_operand(0), new_filter,
-        /*feature_group_count=*/1, convolution->window(), dim_numbers,
-        convolution->precision_config());
+        /*feature_group_count=*/1, /*batch_group_count=*/1,
+        convolution->window(), dim_numbers, convolution->precision_config());
     TF_RETURN_IF_ERROR(computation_->ReplaceWithNewInstruction(
         convolution, std::move(new_convolution)));
   } else {
@@ -294,8 +502,9 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
       dim->set_size(group_size);
 
       auto new_convolution = add(HloInstruction::CreateConvolve(
-          new_output_shape, activation, filter, group_count, new_window,
-          dim_numbers, convolution->precision_config()));
+          new_output_shape, activation, filter, group_count,
+          /*batch_group_count=*/1, new_window, dim_numbers,
+          convolution->precision_config()));
 
       // Delete the extra spatial dimension, and reshape.
       Shape reshaped_convolution_shape =
@@ -372,7 +581,8 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
 
         auto new_convolution = add(HloInstruction::CreateConvolve(
             conv_slice_shape, activation_slice, filter_slice,
-            /*feature_group_count=*/1, convolution->window(), dim_numbers,
+            /*feature_group_count=*/1, /*batch_group_count=*/1,
+            convolution->window(), dim_numbers,
             convolution->precision_config()));
 
         sliced_convolutions.push_back(new_convolution);
@@ -390,17 +600,19 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
 
 }  // namespace
 
-StatusOr<bool> ConvolutionFeatureGroupConverter::Run(HloModule* module) {
-  XLA_VLOG_LINES(2, "ConvolutionFeatureGroupConverter::Run(), before:\n" +
-                        module->ToString());
+StatusOr<bool> ConvolutionGroupConverter::Run(HloModule* module) {
+  XLA_VLOG_LINES(
+      2, "ConvolutionGroupConverter::Run(), before:\n" + module->ToString());
   bool changed = false;
   for (auto* comp : module->MakeNonfusionComputations()) {
-    if (ConvolutionVisitor::Run(comp, filter_expansion_)) {
+    if (ConvolutionVisitor::Run(comp, is_cost_viable_,
+                                convert_batch_groups_only_,
+                                filter_expansion_)) {
       changed = true;
     }
   }
-  XLA_VLOG_LINES(2, "ConvolutionFeatureGroupConverter::Run(), after:\n" +
-                        module->ToString());
+  XLA_VLOG_LINES(
+      2, "ConvolutionGroupConverter::Run(), after:\n" + module->ToString());
   return changed;
 }
 
diff --git a/tensorflow/compiler/xla/service/convolution_feature_group_converter.h b/tensorflow/compiler/xla/service/convolution_group_converter.h
similarity index 58%
rename from tensorflow/compiler/xla/service/convolution_feature_group_converter.h
rename to tensorflow/compiler/xla/service/convolution_group_converter.h
index cb6bc04c00..1caf184111 100644
--- a/tensorflow/compiler/xla/service/convolution_feature_group_converter.h
+++ b/tensorflow/compiler/xla/service/convolution_group_converter.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CONVOLUTION_FEATURE_GROUP_CONVERTER_H_
-#define TENSORFLOW_COMPILER_XLA_SERVICE_CONVOLUTION_FEATURE_GROUP_CONVERTER_H_
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CONVOLUTION_GROUP_CONVERTER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_CONVOLUTION_GROUP_CONVERTER_H_
 
 #include "absl/strings/string_view.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
@@ -25,23 +25,34 @@ namespace xla {
 
 // A pass which rewrites convolutions with feature_group_count > 1 into
 // convolutions with feature_group_count = 1.
-class ConvolutionFeatureGroupConverter : public HloModulePass {
+class ConvolutionGroupConverter : public HloModulePass {
  public:
-  ConvolutionFeatureGroupConverter(bool canonicalize_depthwise_filter = false)
-      : filter_expansion_(canonicalize_depthwise_filter) {}
+  ConvolutionGroupConverter(std::function<bool(HloInstruction*)> is_cost_viable,
+                            bool convert_batch_groups_only,
+                            bool canonicalize_depthwise_filter = false)
+      : is_cost_viable_(is_cost_viable),
+        convert_batch_groups_only_(convert_batch_groups_only),
+        filter_expansion_(canonicalize_depthwise_filter) {}
 
   absl::string_view name() const override {
-    return "convolution-feature-group-converter";
+    return "convolution-group-converter";
   }
 
   // Run convolution rewriting on the given computation. Returns whether the
   // computation was changed.
   StatusOr<bool> Run(HloModule* module) override;
 
+  // Lambda containing cost model that decides whether to expand
+  // batch_group_count.
+  std::function<bool(HloInstruction*)> is_cost_viable_;
+
+  // Decides whether to convert batch groups or feature groups.
+  bool convert_batch_groups_only_;
+
   // Tells whether filter expansion is required.
   bool filter_expansion_;
 };
 
 }  // namespace xla
 
-#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CONVOLUTION_FEATURE_GROUP_CONVERTER_H_
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CONVOLUTION_GROUP_CONVERTER_H_
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 4173af5179..7e4447a9dd 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -112,7 +112,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
         "//tensorflow/compiler/xla/service:conditional_simplifier",
-        "//tensorflow/compiler/xla/service:convolution_feature_group_converter",
+        "//tensorflow/compiler/xla/service:convolution_group_converter",
         "//tensorflow/compiler/xla/service:dot_decomposer",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
index 2d9978404c..8e55267a67 100644
--- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
+++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization.cc
@@ -132,7 +132,8 @@ StatusOr<bool> ConvCanonicalization::Run(HloModule* module) {
       HloInstruction* new_conv = module->entry_computation()->AddInstruction(
           HloInstruction::CreateConvolve(
               new_conv_shape, new_input, new_kernel, hlo->feature_group_count(),
-              hlo->window(), new_dnums, hlo->precision_config()));
+              hlo->batch_group_count(), hlo->window(), new_dnums,
+              hlo->precision_config()));
 
       // Reshape the output back to the shape of the original convolution.
       TF_RETURN_IF_ERROR(module->entry_computation()->ReplaceWithNewInstruction(
diff --git a/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc b/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc
index c58175428f..02085108a0 100644
--- a/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/conv_canonicalization_test.cc
@@ -84,8 +84,8 @@ TEST_F(ConvCanonicalizationTest, NonCanonicalToCanonical) {
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(
           F32, {kOutputFeatureCount, kBatchSize, output_size, output_size}),
-      input, kernel, /*feature_group_count=*/1, conv_window_, dnums,
-      DefaultPrecisionConfig(2)));
+      input, kernel, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      conv_window_, dnums, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
   HloComputation* entry_computation =
@@ -147,8 +147,8 @@ TEST_F(ConvCanonicalizationTest, CanonicalStaysTheSame) {
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(
           F32, {kBatchSize, output_size, output_size, kOutputFeatureCount}),
-      input, kernel, /*feature_group_count=*/1, conv_window_, dnums,
-      DefaultPrecisionConfig(2)));
+      input, kernel, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      conv_window_, dnums, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
   module->AddEntryComputation(builder.Build());
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
index f3dfa4d642..ba7dcde5c3 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
@@ -51,7 +51,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
 #include "tensorflow/compiler/xla/service/conditional_simplifier.h"
-#include "tensorflow/compiler/xla/service/convolution_feature_group_converter.h"
+#include "tensorflow/compiler/xla/service/convolution_group_converter.h"
 #include "tensorflow/compiler/xla/service/cpu/buffer_info_util.h"
 #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h"
 #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h"
@@ -257,7 +257,16 @@ Status CpuCompiler::RunHloPassesThroughLayoutAssn(
   pipeline.AddPass<CallInliner>();
   pipeline.AddPass<BatchDotSimplification>();
   pipeline.AddPass<DotDecomposer>();
-  pipeline.AddPass<ConvolutionFeatureGroupConverter>();
+  auto cost_model = [](HloInstruction* conv) {
+    // We need a cost model for CPUs. Currently, do nothing.
+    return false;
+  };
+  pipeline.AddPass<ConvolutionGroupConverter>(
+      cost_model,
+      /*convert_batch_groups_only=*/true);
+  pipeline.AddPass<ConvolutionGroupConverter>(
+      cost_model,
+      /*convert_batch_groups_only=*/false);
   pipeline.AddPass<ConvCanonicalization>(target_machine_features);
   {
     auto& pass =
diff --git a/tensorflow/compiler/xla/service/dynamic_dimension_inference_test.cc b/tensorflow/compiler/xla/service/dynamic_dimension_inference_test.cc
index ea9ebed45d..1dd196821c 100644
--- a/tensorflow/compiler/xla/service/dynamic_dimension_inference_test.cc
+++ b/tensorflow/compiler/xla/service/dynamic_dimension_inference_test.cc
@@ -292,7 +292,8 @@ TEST_F(DynamicDimensionInferenceTest, ConvolutionTest) {
   Window window;
 
   auto* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
-      zx_shape, a_param, b_param, /*feature_group_count=*/1, window, dnums,
+      zx_shape, a_param, b_param, /*feature_group_count=*/1,
+      /*batch_group_count=*/1, window, dnums,
       HloTestBase::DefaultPrecisionConfig(2)));
 
   module_->AddEntryComputation(builder.Build());
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index bfd1b6cb14..6c23f921f4 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -694,6 +694,7 @@ cc_library(
         "//tensorflow/compiler/xla/service:buffer_liveness",
         "//tensorflow/compiler/xla/service:call_inliner",
         "//tensorflow/compiler/xla/service:conditional_simplifier",
+        "//tensorflow/compiler/xla/service:convolution_group_converter",
         "//tensorflow/compiler/xla/service:executable",
         "//tensorflow/compiler/xla/service:flatten_call_graph",
         "//tensorflow/compiler/xla/service:hlo",
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
index 73af18f87a..dbcdc2b075 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc
@@ -109,9 +109,11 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) {
   auto* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeInference::InferConvolveShape(
           activations->shape(), gradients->shape(), /*feature_group_count=*/1,
-          conv_window, tf_default_dnums_for_backward_filter_)
+          /*batch_group_count=*/1, conv_window,
+          tf_default_dnums_for_backward_filter_)
           .ConsumeValueOrDie(),
-      activations, gradients, /*feature_group_count=*/1, conv_window,
+      activations, gradients, /*feature_group_count=*/1,
+      /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_filter_, DefaultPrecisionConfig(2)));
 
   OpMetadata metadata;
@@ -147,9 +149,11 @@ TEST_F(CudnnConvRewriterTest,
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeInference::InferConvolveShape(
           activations->shape(), gradients->shape(), /*feature_group_count=*/1,
-          conv_window, tf_default_dnums_for_backward_filter_)
+          /*batch_group_count=*/1, conv_window,
+          tf_default_dnums_for_backward_filter_)
           .ConsumeValueOrDie(),
-      activations, gradients, /*feature_group_count=*/1, conv_window,
+      activations, gradients, /*feature_group_count=*/1,
+      /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_filter_, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
@@ -179,7 +183,7 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedActivations) {
   }
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {32, 3, 3, 32}), activations, gradients,
-      /*feature_group_count=*/1, conv_window,
+      /*feature_group_count=*/1, /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_filter_, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
@@ -209,7 +213,7 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedGradients) {
   }
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {320, 3, 3, 192}), activations, gradients,
-      /*feature_group_count=*/1, conv_window,
+      /*feature_group_count=*/1, /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_filter_, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
@@ -238,7 +242,7 @@ TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithUnevenPadding) {
   }
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {32, 2, 2, 32}), activations, gradients,
-      /*feature_group_count=*/1, conv_window,
+      /*feature_group_count=*/1, /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_filter_, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
@@ -283,13 +287,15 @@ TEST_F(CudnnConvRewriterTest, BackwardInputConvolveEvenPadding) {
 
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {4, 3, 16, 16}), /*lhs=*/output,
-      /*rhs=*/reverse_kernel, /*feature_group_count=*/1, conv_window,
-      conv_dnums, DefaultPrecisionConfig(2)));
+      /*rhs=*/reverse_kernel, /*feature_group_count=*/1,
+      /*batch_group_count=*/1, conv_window, conv_dnums,
+      DefaultPrecisionConfig(2)));
   // Verify the convolution's shape is consistent with ShapeInference.
   CHECK(ShapeUtil::Compatible(
       conv->shape(), ShapeInference::InferConvolveShape(
                          output->shape(), reverse_kernel->shape(),
-                         /*feature_group_count=*/1, conv_window, conv_dnums)
+                         /*feature_group_count=*/1, /*batch_group_count=*/1,
+                         conv_window, conv_dnums)
                          .ValueOrDie()));
 
   auto module = CreateNewVerifiedModule();
@@ -332,10 +338,12 @@ TEST_F(CudnnConvRewriterTest, BackwardInputConvolve1x1Filter) {
 
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeInference::InferConvolveShape(output->shape(), kernel->shape(),
-                                         /*feature_group_count=*/1, conv_window,
+                                         /*feature_group_count=*/1,
+                                         /*batch_group_count=*/1, conv_window,
                                          tf_default_dnums_for_backward_input_)
           .ConsumeValueOrDie(),
-      /*lhs=*/output, /*rhs=*/kernel, /*feature_group_count=*/1, conv_window,
+      /*lhs=*/output, /*rhs=*/kernel, /*feature_group_count=*/1,
+      /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_input_, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
@@ -365,11 +373,12 @@ TEST_F(CudnnConvRewriterTest,
   builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeInference::InferConvolveShape(
           output->shape(), kernel->shape(), /*feature_group_count=*/1,
-          default_conv_window_, tf_default_dnums_for_backward_input_)
+          /*batch_group_count=*/1, default_conv_window_,
+          tf_default_dnums_for_backward_input_)
           .ConsumeValueOrDie(),
       /*lhs=*/output, /*rhs=*/kernel, /*feature_group_count=*/1,
-      default_conv_window_, tf_default_dnums_for_backward_input_,
-      DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, default_conv_window_,
+      tf_default_dnums_for_backward_input_, DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule();
   HloComputation* entry_computation =
@@ -415,15 +424,15 @@ TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnGradients) {
   }
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {20, 10, 10, 192}), output, reverse_kernel,
-      /*feature_group_count=*/1, conv_window,
+      /*feature_group_count=*/1, /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_input_, DefaultPrecisionConfig(2)));
   // Verify the convolution's shape is consistent with ShapeInference.
   CHECK(ShapeUtil::Compatible(
-      conv->shape(),
-      ShapeInference::InferConvolveShape(
-          output->shape(), reverse_kernel->shape(), /*feature_group_count=*/1,
-          conv_window, tf_default_dnums_for_backward_input_)
-          .ValueOrDie()));
+      conv->shape(), ShapeInference::InferConvolveShape(
+                         output->shape(), reverse_kernel->shape(),
+                         /*feature_group_count=*/1, /*batch_group_count=*/1,
+                         conv_window, tf_default_dnums_for_backward_input_)
+                         .ValueOrDie()));
 
   auto module = CreateNewVerifiedModule();
   HloComputation* entry_computation =
@@ -465,15 +474,15 @@ TEST_F(CudnnConvRewriterTest, BackwardInputConvolveLowPaddingTooLarge) {
   }
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {20, 10, 10, 192}), output, reverse_kernel,
-      /*feature_group_count=*/1, conv_window,
+      /*feature_group_count=*/1, /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_input_, DefaultPrecisionConfig(2)));
   // Verify the convolution's shape is consistent with ShapeInference.
   CHECK(ShapeUtil::Compatible(
-      conv->shape(),
-      ShapeInference::InferConvolveShape(
-          output->shape(), reverse_kernel->shape(), /*feature_group_count=*/1,
-          conv_window, tf_default_dnums_for_backward_input_)
-          .ValueOrDie()));
+      conv->shape(), ShapeInference::InferConvolveShape(
+                         output->shape(), reverse_kernel->shape(),
+                         /*feature_group_count=*/1, /*batch_group_count=*/1,
+                         conv_window, tf_default_dnums_for_backward_input_)
+                         .ValueOrDie()));
 
   auto module = CreateNewVerifiedModule();
   HloComputation* entry_computation =
@@ -519,15 +528,15 @@ TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnActivations) {
   forward_conv_col_dim->set_base_dilation(2);
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {1, 1, 14, 1}), output, reverse_kernel,
-      /*feature_group_count=*/1, conv_window,
+      /*feature_group_count=*/1, /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_input_, DefaultPrecisionConfig(2)));
   // Verify the convolution's shape is consistent with ShapeInference.
   CHECK(ShapeUtil::Compatible(
-      conv->shape(),
-      ShapeInference::InferConvolveShape(
-          output->shape(), reverse_kernel->shape(), /*feature_group_count=*/1,
-          conv_window, tf_default_dnums_for_backward_input_)
-          .ValueOrDie()));
+      conv->shape(), ShapeInference::InferConvolveShape(
+                         output->shape(), reverse_kernel->shape(),
+                         /*feature_group_count=*/1, /*batch_group_count=*/1,
+                         conv_window, tf_default_dnums_for_backward_input_)
+                         .ValueOrDie()));
 
   auto module = CreateNewVerifiedModule();
   const HloComputation* entry_computation =
@@ -574,15 +583,15 @@ TEST_F(CudnnConvRewriterTest,
   forward_conv_col_dim->set_padding_high(2);
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       ShapeUtil::MakeShape(F32, {1, 1, 4, 1}), output, reverse_kernel,
-      /*feature_group_count=*/1, conv_window,
+      /*feature_group_count=*/1, /*batch_group_count=*/1, conv_window,
       tf_default_dnums_for_backward_input_, DefaultPrecisionConfig(2)));
   // Verify the convolution's shape is consistent with ShapeInference.
   CHECK(ShapeUtil::Compatible(
-      conv->shape(),
-      ShapeInference::InferConvolveShape(
-          output->shape(), reverse_kernel->shape(), /*feature_group_count=*/1,
-          conv_window, tf_default_dnums_for_backward_input_)
-          .ValueOrDie()));
+      conv->shape(), ShapeInference::InferConvolveShape(
+                         output->shape(), reverse_kernel->shape(),
+                         /*feature_group_count=*/1, /*batch_group_count=*/1,
+                         conv_window, tf_default_dnums_for_backward_input_)
+                         .ValueOrDie()));
 
   auto module = CreateNewVerifiedModule();
   HloComputation* entry_computation =
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 60f2116e60..cd369d5598 100644
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -36,6 +36,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/buffer_liveness.h"
 #include "tensorflow/compiler/xla/service/call_inliner.h"
 #include "tensorflow/compiler/xla/service/conditional_simplifier.h"
+#include "tensorflow/compiler/xla/service/convolution_group_converter.h"
 #include "tensorflow/compiler/xla/service/flatten_call_graph.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h"
@@ -158,6 +159,13 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
 
     // TODO(b/64094172): make Call work on GPU instead of inlining.
     pipeline.AddPass<CallInliner>();
+    auto cost_model = [](HloInstruction* conv) {
+      // We need a cost model for GPUs. Currently, do nothing.
+      return false;
+    };
+    pipeline.AddPass<ConvolutionGroupConverter>(
+        cost_model,
+        /*convert_batch_groups_only=*/true);
     // Convert BF16 operations to F32 operations so that the GPU backend can
     // support BF16 operations without directly implementing a BF16 lowering for
     // most ops.
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 414c632712..faced059ac 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto";
 option cc_enable_arenas = true;
 
 // Serialization of HloInstruction.
-// Next ID: 58
+// Next ID: 59
 message HloInstructionProto {
   reserved 10;
   reserved "parameter_name";
@@ -82,6 +82,8 @@ message HloInstructionProto {
   // it will use a default value of 1.
   int64 feature_group_count = 50;
 
+  int64 batch_group_count = 58;
+
   // Describes the [begin, end) index range and stride for slices.
   message SliceDimensions {
     int64 start = 1;
diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc
index b2005d3c21..e41aeab19e 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc
@@ -69,11 +69,11 @@ StatusOr<HloInstruction*> MakeConvolveHlo(
   CHECK_EQ(computation, rhs->parent());
   TF_ASSIGN_OR_RETURN(Shape convolve_shape,
                       ShapeInference::InferConvolveShape(
-                          lhs->shape(), rhs->shape(), feature_group_count,
+                          lhs->shape(), rhs->shape(), feature_group_count, 1,
                           window, dimension_numbers));
   return computation->AddInstruction(HloInstruction::CreateConvolve(
-      convolve_shape, lhs, rhs, feature_group_count, window, dimension_numbers,
-      precision_config));
+      convolve_shape, lhs, rhs, feature_group_count, 1, window,
+      dimension_numbers, precision_config));
 }
 
 StatusOr<HloInstruction*> MakeTransposeHlo(HloInstruction* operand,
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index 4eaaab20ea..8fa493a873 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -804,7 +804,7 @@ TEST_P(HloEvaluatorTest, SimpleConv1D) {
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 3});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
@@ -859,7 +859,7 @@ TEST_P(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
@@ -943,7 +943,7 @@ TEST_P(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
@@ -1021,7 +1021,7 @@ TEST_P(HloEvaluatorTest, Conv2DGeneralDimensions) {
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
@@ -1081,7 +1081,7 @@ TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithHighPadding) {
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 7, 7});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
@@ -1145,7 +1145,7 @@ TEST_P(HloEvaluatorTest, DilatedBaseConv2DWithLowAndHighPadding) {
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 8, 8});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
@@ -1217,7 +1217,7 @@ TEST_P(HloEvaluatorTest,
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 9, 3});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction, /*feature_group_count=*/1,
-      window, dnums, DefaultPrecisionConfig(2)));
+      /*batch_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
@@ -1288,7 +1288,8 @@ TEST_P(HloEvaluatorTest, Conv2DGroupedConvolution) {
   Shape shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 8});
   b.AddInstruction(HloInstruction::CreateConvolve(
       shape, lhs_instruction, rhs_instruction,
-      /*feature_group_count=*/2, window, dnums, DefaultPrecisionConfig(2)));
+      /*feature_group_count=*/2, /*batch_group_count=*/1, window, dnums,
+      DefaultPrecisionConfig(2)));
   m_->AddEntryComputation(b.Build());
 
   Literal result = Evaluate();
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index 03d42990ce..3ace2f5443 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -1011,10 +1011,10 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     CHECK_EQ(num_spatial_dims + 2, lhs_rank);
     CHECK_EQ(num_spatial_dims + 2, rhs_rank);
 
-    TF_ASSIGN_OR_RETURN(
-        auto inferred_return_shape,
-        ShapeInference::InferConvolveShape(
-            lhs_shape, rhs_shape, conv->feature_group_count(), window, dnums));
+    TF_ASSIGN_OR_RETURN(auto inferred_return_shape,
+                        ShapeInference::InferConvolveShape(
+                            lhs_shape, rhs_shape, conv->feature_group_count(),
+                            conv->batch_group_count(), window, dnums));
     CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape))
         << "return shape set to: " << ShapeUtil::HumanString(result_shape)
         << " but is inferred to be: "
@@ -1038,7 +1038,10 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     auto rhs_literal_data = rhs_literal.data<ReturnT>();
 
     int64 feature_group_count = conv->feature_group_count();
+    int64 batch_group_count = conv->batch_group_count();
 
+    // The batch count > 1 case is unimplemented in the HLO evaluator so far.
+    TF_RET_CHECK(batch_group_count == 1);
     auto func = [&window_shape, &dnums, &lhs_shape, &rhs_shape, &window,
                  &lhs_dim_multipliers, &rhs_dim_multipliers, lhs_literal_data,
                  rhs_literal_data,
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 8b2ace1e82..013b5dff5e 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -383,7 +383,8 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           proto.operand_ids_size(), PrecisionConfig::DEFAULT);
       instruction = CreateConvolve(
           shape, operands(0), operands(1),
-          std::max<int64>(proto.feature_group_count(), 1), proto.window(),
+          std::max<int64>(proto.feature_group_count(), 1),
+          std::max<int64>(proto.batch_group_count(), 1), proto.window(),
           proto.convolution_dimension_numbers(), precision_config);
       break;
     }
@@ -438,6 +439,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       static_cast<HloCustomCallInstruction*>(instruction.get())
           ->set_feature_group_count(
               std::max(static_cast<int64>(proto.feature_group_count()), 1LL));
+      static_cast<HloCustomCallInstruction*>(instruction.get())
+          ->set_batch_group_count(
+              std::max(static_cast<int64>(proto.batch_group_count()), 1LL));
       break;
     case HloOpcode::kPad:
       TF_RET_CHECK(proto.operand_ids_size() == 2)
@@ -734,12 +738,12 @@ HloInstruction::CreateGetTupleElement(const Shape& shape,
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateConvolve(
     const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
-    int64 feature_group_count, const Window& window,
+    int64 feature_group_count, int64 batch_group_count, const Window& window,
     const ConvolutionDimensionNumbers& dimension_numbers,
     const PrecisionConfig& precision_config) {
   return absl::make_unique<HloConvolutionInstruction>(
-      shape, lhs, rhs, feature_group_count, window, dimension_numbers,
-      precision_config);
+      shape, lhs, rhs, feature_group_count, batch_group_count, window,
+      dimension_numbers, precision_config);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateFft(
@@ -3323,6 +3327,18 @@ void HloInstruction::set_feature_group_count(int64 feature_group_count) {
       feature_group_count);
 }
 
+int64 HloInstruction::batch_group_count() const {
+  if (auto convolution = DynCast<HloConvolutionInstruction>(this)) {
+    return convolution->batch_group_count();
+  }
+  return Cast<HloCustomCallInstruction>(this)->batch_group_count();
+}
+
+void HloInstruction::set_batch_group_count(int64 batch_group_count) {
+  Cast<HloCustomCallInstruction>(this)->set_batch_group_count(
+      batch_group_count);
+}
+
 HloComputation* HloInstruction::select() const {
   return Cast<HloSelectAndScatterInstruction>(this)->select();
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index dd77f101a0..e1294c37d3 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -426,7 +426,7 @@ class HloInstruction {
   // and window describes how the filter is applied to lhs.
   static std::unique_ptr<HloInstruction> CreateConvolve(
       const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
-      int64 feature_group_count, const Window& window,
+      int64 feature_group_count, int64 batch_group_count, const Window& window,
       const ConvolutionDimensionNumbers& dimension_numbers,
       const PrecisionConfig& precision_config);
 
@@ -1495,6 +1495,11 @@ class HloInstruction {
 
   void set_feature_group_count(int64 feature_group_count);
 
+  // The number of batch groups. Must be a divisor of the input batch dimension
+  int64 batch_group_count() const;
+
+  void set_batch_group_count(int64 batch_group_count);
+
   // Delegates to HloSelectAndScatterInstruction::select.
   HloComputation* select() const;
 
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 5521e5bd9a..a47d33314a 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1655,11 +1655,12 @@ std::unique_ptr<HloInstruction> HloOutfeedInstruction::CloneWithNewOperandsImpl(
 
 HloConvolutionInstruction::HloConvolutionInstruction(
     const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
-    int64 feature_group_count, const Window& window,
+    int64 feature_group_count, int64 batch_group_count, const Window& window,
     const ConvolutionDimensionNumbers& dimension_numbers,
     const PrecisionConfig& precision_config)
     : HloInstruction(HloOpcode::kConvolution, shape),
       feature_group_count_(feature_group_count),
+      batch_group_count_(batch_group_count),
       window_(window),
       convolution_dimension_numbers_(dimension_numbers),
       precision_config_(precision_config) {
@@ -1737,8 +1738,9 @@ HloConvolutionInstruction::CloneWithNewOperandsImpl(
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloConvolutionInstruction>(
-      shape, new_operands[0], new_operands[1], feature_group_count_, window(),
-      convolution_dimension_numbers_, precision_config_);
+      shape, new_operands[0], new_operands[1], feature_group_count_,
+      batch_group_count_, window(), convolution_dimension_numbers_,
+      precision_config_);
 }
 
 HloReduceWindowInstruction::HloReduceWindowInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 5420d4ce11..d875b34b62 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -933,7 +933,7 @@ class HloConvolutionInstruction : public HloInstruction {
  public:
   explicit HloConvolutionInstruction(
       const Shape& shape, HloInstruction* lhs, HloInstruction* rhs,
-      int64 feature_group_count, const Window& window,
+      int64 feature_group_count, int64 batch_group_count, const Window& window,
       const ConvolutionDimensionNumbers& dimension_numbers,
       const PrecisionConfig& precision_config);
   const Window& window() const override { return window_; }
@@ -949,6 +949,10 @@ class HloConvolutionInstruction : public HloInstruction {
   // dimension and output feature dimension.
   int64 feature_group_count() const { return feature_group_count_; }
 
+  // The number of feature groups. Must be a divisor of the input batch
+  // dimension.
+  int64 batch_group_count() const { return batch_group_count_; }
+
   // Returns the information used to tell the implementation information about
   // what sort of precision is requested. The meaning of the field is backend
   // specific. At the moment, it is only supported for kConvolution and kDot.
@@ -977,6 +981,9 @@ class HloConvolutionInstruction : public HloInstruction {
   // The number of feature groups. Must be a divisor of the input feature
   // dimension and output feature dimension.
   int64 feature_group_count_;
+  // The number of feature groups. Must be a divisor of the input batch
+  // dimension.
+  int64 batch_group_count_;
   // Describes the window used for a convolution.
   Window window_;
   // Describes the dimension numbers used for a convolution.
@@ -1099,7 +1106,11 @@ class HloCustomCallInstruction : public HloInstruction {
   void set_feature_group_count(int64 feature_group_count) {
     feature_group_count_ = feature_group_count;
   }
+  void set_batch_group_count(int64 batch_group_count) {
+    batch_group_count_ = batch_group_count;
+  }
   int64 feature_group_count() const { return feature_group_count_; }
+  int64 batch_group_count() const { return batch_group_count_; }
   // Returns a serialized representation of this instruction.
   HloInstructionProto ToProto() const override;
 
@@ -1134,6 +1145,7 @@ class HloCustomCallInstruction : public HloInstruction {
   std::unique_ptr<ConvolutionDimensionNumbers> convolution_dimension_numbers_;
   // The number of feature groups. This is used for grouped convolutions.
   int64 feature_group_count_;
+  int64 batch_group_count_;
   // Whether the result and operand layouts are constrained.
   bool layout_constrained_;
   // For layout-constrained custom calls, this vector holds the shape with
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 29bb088f6d..51f7d943a2 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -1007,11 +1007,14 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder,
       optional<Window> window;
       optional<ConvolutionDimensionNumbers> dnums;
       optional<int64> feature_group_count;
+      optional<int64> batch_group_count;
       attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window};
       attrs["dim_labels"] = {/*required=*/true,
                              AttrTy::kConvolutionDimensionNumbers, &dnums};
       attrs["feature_group_count"] = {/*required=*/false, AttrTy::kInt64,
                                       &feature_group_count};
+      attrs["batch_group_count"] = {/*required=*/false, AttrTy::kInt64,
+                                    &batch_group_count};
       optional<std::vector<PrecisionConfig::Precision>> operand_precision;
       attrs["operand_precision"] = {/*required=*/false, AttrTy::kPrecisionList,
                                     &operand_precision};
@@ -1025,6 +1028,9 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder,
       if (!feature_group_count) {
         feature_group_count = 1;
       }
+      if (!batch_group_count) {
+        batch_group_count = 1;
+      }
       PrecisionConfig precision_config;
       if (operand_precision) {
         *precision_config.mutable_operand_precision() = {
@@ -1035,7 +1041,8 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder,
       }
       instruction = builder->AddInstruction(HloInstruction::CreateConvolve(
           shape, /*lhs=*/operands[0], /*rhs=*/operands[1],
-          feature_group_count.value(), *window, *dnums, precision_config));
+          feature_group_count.value(), batch_group_count.value(), *window,
+          *dnums, precision_config));
       break;
     }
     case HloOpcode::kFft: {
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index ace854ed6a..3a5d5d17a0 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -153,8 +153,8 @@ Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) {
       const Shape expected,
       ShapeInference::InferConvolveShape(
           convolution->operand(0)->shape(), convolution->operand(1)->shape(),
-          convolution->feature_group_count(), convolution->window(),
-          convolution->convolution_dimension_numbers()));
+          convolution->feature_group_count(), convolution->batch_group_count(),
+          convolution->window(), convolution->convolution_dimension_numbers()));
   return CheckShape(convolution, expected);
 }
 
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 7e7282a737..e6e118d0d2 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -1556,7 +1556,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 
 /* static */ StatusOr<Shape> ShapeInference::InferConvolveShape(
     const Shape& lhs, const Shape& rhs, int64 feature_group_count,
-    const Window& window, const ConvolutionDimensionNumbers& dnums) {
+    int64 batch_group_count, const Window& window,
+    const ConvolutionDimensionNumbers& dnums) {
   TF_RETURN_IF_ERROR(ExpectArray(lhs, "lhs of convolution"));
   TF_RETURN_IF_ERROR(ExpectArray(rhs, "rhs of convolution"));
 
@@ -1565,6 +1566,13 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
         "feature_group_count must be a positive number, got %d",
         feature_group_count);
   }
+
+  if (batch_group_count <= 0) {
+    return InvalidArgument(
+        "batch_group_count must be a positive number, got %d",
+        batch_group_count);
+  }
+
   if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) {
     return InvalidArgument(
         "Convolution with different element types: %s and %s.",
@@ -1700,6 +1708,17 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
         ShapeUtil::HumanString(lhs), ShapeUtil::HumanString(rhs),
         dnums.DebugString());
   }
+
+  if (input_batch % batch_group_count > 0) {
+    return InvalidArgument(
+        "Expected input batch dimension (value %d) to be divisible by "
+        "batch_group_count (value %d); "
+        "got <conv>(%s, %s)\n"
+        "Dimension numbers: {%s}.",
+        input_batch, batch_group_count, ShapeUtil::HumanString(lhs),
+        ShapeUtil::HumanString(rhs), dnums.DebugString());
+  }
+
   std::vector<int64> window_dims(num_spatial_dims);
   for (int i = 0; i < num_spatial_dims; ++i) {
     window_dims[i] = window.dimensions(i).size();
@@ -1722,7 +1741,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
                              /*allow_negative_padding=*/true));
 
   std::vector<int64> dimensions(num_dims);
-  dimensions[dnums.output_batch_dimension()] = input_batch;
+  dimensions[dnums.output_batch_dimension()] = input_batch / batch_group_count;
   dimensions[dnums.output_feature_dimension()] = kernel_output_features;
   for (int i = 0; i < num_spatial_dims; ++i) {
     dimensions[dnums.output_spatial_dimensions(i)] =
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index d94385a04d..9f56415599 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -109,7 +109,7 @@ class ShapeInference {
   // filter (rhs) to lhs in the way specified by the fields on window.
   static StatusOr<Shape> InferConvolveShape(
       const Shape& lhs, const Shape& rhs, int64 feature_group_count,
-      const Window& window,
+      int64 batch_group_count, const Window& window,
       const ConvolutionDimensionNumbers& dimension_numbers);
 
   // Infers the shape produced by the given FFT type on the given operand.
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index 4639e32db4..0a870808d4 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -420,7 +420,8 @@ TEST_F(ShapeInferenceTest, Convolve) {
   dim1->set_window_dilation(1);
   dim1->set_base_dilation(1);
   auto inferred_status = ShapeInference::InferConvolveShape(
-      lhs_shape, rhs_shape, /*feature_group_count=*/1, window, dnums);
+      lhs_shape, rhs_shape, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      window, dnums);
   ASSERT_IS_OK(inferred_status.status());
   Shape inferred_shape = inferred_status.ValueOrDie();
   ASSERT_TRUE(ShapeUtil::Equal(ShapeUtil::MakeShape(F32, {10, 12, 2, 3}),
@@ -465,7 +466,8 @@ TEST_F(ShapeInferenceTest, ConvolveWithWindowDilation) {
   dim1->set_window_dilation(2);
   dim1->set_base_dilation(1);
   auto inferred_status = ShapeInference::InferConvolveShape(
-      lhs_shape, rhs_shape, /*feature_group_count=*/1, window, dnums);
+      lhs_shape, rhs_shape, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      window, dnums);
   ASSERT_IS_OK(inferred_status.status());
   Shape inferred_shape = inferred_status.ValueOrDie();
   ASSERT_TRUE(ShapeUtil::Equal(ShapeUtil::MakeShape(F32, {10, 12, 31, 5}),
@@ -510,7 +512,8 @@ TEST_F(ShapeInferenceTest, ConvolveWithBaseDilation) {
   dim1->set_window_dilation(1);
   dim1->set_base_dilation(2);
   auto inferred_status = ShapeInference::InferConvolveShape(
-      lhs_shape, rhs_shape, /*feature_group_count=*/1, window, dnums);
+      lhs_shape, rhs_shape, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      window, dnums);
   ASSERT_IS_OK(inferred_status.status());
   Shape inferred_shape = inferred_status.ValueOrDie();
   ASSERT_TRUE(ShapeUtil::Equal(ShapeUtil::MakeShape(F32, {10, 12, 4, 9}),
@@ -548,7 +551,8 @@ TEST_F(ShapeInferenceTest, ConvolveDimensionNumbersOverlapError) {
   dim1->set_padding_low(1);
   dim1->set_padding_high(1);
   auto inferred_status = ShapeInference::InferConvolveShape(
-      lhs_shape, rhs_shape, /*feature_group_count=*/1, window, dnums);
+      lhs_shape, rhs_shape, /*feature_group_count=*/1, /*batch_group_count=*/1,
+      window, dnums);
   ASSERT_FALSE(inferred_status.ok());
   ASSERT_THAT(inferred_status.status().error_message(),
               HasSubstr("each dimension exactly once"));
diff --git a/tensorflow/compiler/xla/service/transpose_folding.cc b/tensorflow/compiler/xla/service/transpose_folding.cc
index 7c1f4b5cc6..eaf4f28b87 100644
--- a/tensorflow/compiler/xla/service/transpose_folding.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding.cc
@@ -178,7 +178,8 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) {
 
   auto new_conv = HloInstruction::CreateConvolve(
       convolution.shape(), new_lhs, new_rhs, convolution.feature_group_count(),
-      convolution.window(), new_dnums, convolution.precision_config());
+      convolution.batch_group_count(), convolution.window(), new_dnums,
+      convolution.precision_config());
   TF_CHECK_OK(convolution.parent()->ReplaceWithNewInstruction(
       &convolution, std::move(new_conv)));
 
diff --git a/tensorflow/compiler/xla/service/transpose_folding_test.cc b/tensorflow/compiler/xla/service/transpose_folding_test.cc
index 3ca53edc81..f8a5fa0215 100644
--- a/tensorflow/compiler/xla/service/transpose_folding_test.cc
+++ b/tensorflow/compiler/xla/service/transpose_folding_test.cc
@@ -240,12 +240,13 @@ TEST_F(TransposeFoldingTest, FoldConvDimSwapTransposeRhs) {
         transpose_y->shape().dimensions(dnums.kernel_spatial_dimensions(i)));
   }
   StatusOr<Shape> conv_shape = ShapeInference::InferConvolveShape(
-      x->shape(), transpose_y->shape(), /*feature_group_count=*/1, window,
-      dnums);
+      x->shape(), transpose_y->shape(), /*feature_group_count=*/1,
+      /*batch_group_count=*/1, window, dnums);
   EXPECT_IS_OK(conv_shape);
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       conv_shape.ValueOrDie(), x, transpose_y,
-      /*feature_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
+      /*feature_group_count=*/1, /*batch_group_count=*/1, window, dnums,
+      DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule("test_module");
   HloComputation* entry_computation =
@@ -295,12 +296,13 @@ TEST_F(TransposeFoldingTest, FoldConvComplexTransposeRhs) {
         transpose_y->shape().dimensions(dnums.kernel_spatial_dimensions(i)));
   }
   StatusOr<Shape> conv_shape = ShapeInference::InferConvolveShape(
-      x->shape(), transpose_y->shape(), /*feature_group_count=*/1, window,
-      dnums);
+      x->shape(), transpose_y->shape(), /*feature_group_count=*/1,
+      /*batch_group_count=*/1, window, dnums);
   EXPECT_IS_OK(conv_shape);
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       conv_shape.ValueOrDie(), x, transpose_y,
-      /*feature_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
+      /*feature_group_count=*/1, /*batch_group_count=*/1, window, dnums,
+      DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule("test_module");
   HloComputation* entry_computation =
@@ -355,12 +357,13 @@ TEST_F(TransposeFoldingTest, FoldConvTransposeLhs) {
     dim->set_size(y->shape().dimensions(dnums.kernel_spatial_dimensions(i)));
   }
   StatusOr<Shape> conv_shape = ShapeInference::InferConvolveShape(
-      transpose_x->shape(), y->shape(), /*feature_group_count=*/1, window,
-      dnums);
+      transpose_x->shape(), y->shape(), /*feature_group_count=*/1,
+      /*batch_group_count=*/1, window, dnums);
   EXPECT_IS_OK(conv_shape);
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       conv_shape.ValueOrDie(), transpose_x, y,
-      /*feature_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
+      /*feature_group_count=*/1, /*batch_group_count=*/1, window, dnums,
+      DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule("test_module");
   HloComputation* entry_computation =
@@ -421,12 +424,13 @@ TEST_F(TransposeFoldingTest, FoldConvComplexTransposeLhs) {
     dim->set_size(y->shape().dimensions(dnums.kernel_spatial_dimensions(i)));
   }
   StatusOr<Shape> conv_shape = ShapeInference::InferConvolveShape(
-      transpose_x->shape(), y->shape(), /*feature_group_count=*/1, window,
-      dnums);
+      transpose_x->shape(), y->shape(), /*feature_group_count=*/1,
+      /*batch_group_count=*/1, window, dnums);
   EXPECT_IS_OK(conv_shape);
   HloInstruction* conv = builder.AddInstruction(HloInstruction::CreateConvolve(
       conv_shape.ValueOrDie(), transpose_x, y,
-      /*feature_group_count=*/1, window, dnums, DefaultPrecisionConfig(2)));
+      /*feature_group_count=*/1, /*batch_group_count=*/1, window, dnums,
+      DefaultPrecisionConfig(2)));
 
   auto module = CreateNewVerifiedModule("test_module");
   HloComputation* entry_computation =
diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc
index 4a58a1ed66..2496938912 100644
--- a/tensorflow/compiler/xla/tests/convolution_test.cc
+++ b/tensorflow/compiler/xla/tests/convolution_test.cc
@@ -98,7 +98,7 @@ class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest {
     precision.add_operand_precision(PrecisionConfig::HIGHEST);
     precision.add_operand_precision(PrecisionConfig::DEFAULT);
     Conv(lhs, rhs, {1, 1}, Padding::kValid, /*feature_group_count=*/1,
-         &precision);
+         /*batch_group_count=*/1, &precision);
 
     ComputeAndCompare(&builder, {}, error_spec_);
   }
-- 
GitLab


From e3223e19464c8eb2b2611583eb1f0bb3ab930e6d Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 17 Dec 2018 20:09:45 -0800
Subject: [PATCH 725/873] Add TPUPartitionedCallOp.

PiperOrigin-RevId: 225932828
---
 tensorflow/contrib/tpu/BUILD                  | 21 +++++++++++++
 tensorflow/contrib/tpu/ops/functional_ops.cc  | 31 +++++++++++++++++++
 .../contrib/tpu/python/tpu/functional.py      | 25 +++++++++++++++
 3 files changed, 77 insertions(+)
 create mode 100644 tensorflow/contrib/tpu/ops/functional_ops.cc
 create mode 100644 tensorflow/contrib/tpu/python/tpu/functional.py

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index 563a036ab5..ec8a273ea8 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -103,6 +103,7 @@ tf_gen_op_libs(
         "tpu_configuration_ops",
         "tpu_embedding_ops",
         "tpu_ordinal_selector_op",
+        "functional_ops",
     ],
     deps = [
         "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_cc",
@@ -161,6 +162,26 @@ tf_gen_op_wrapper_py(
     ],
 )
 
+tf_gen_op_wrapper_py(
+    name = "gen_functional_ops",
+    out = "python/tpu/gen_functional_ops.py",
+    hidden = [
+        "TPUPartitionedCall",
+    ],
+    deps = [":functional_ops_op_lib"],
+)
+
+py_library(
+    name = "functional",
+    srcs = ["python/tpu/functional.py"],
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        ":gen_functional_ops",
+    ],
+)
+
 py_library(
     name = "profiler",
     srcs = ["python/profiler/__init__.py"],
diff --git a/tensorflow/contrib/tpu/ops/functional_ops.cc b/tensorflow/contrib/tpu/ops/functional_ops.cc
new file mode 100644
index 0000000000..aa81e8b24b
--- /dev/null
+++ b/tensorflow/contrib/tpu/ops/functional_ops.cc
@@ -0,0 +1,31 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+
+REGISTER_OP("TPUPartitionedCall")
+    .Input("args: Tin")
+    .Input("device_ordinal: int32")
+    .Output("output: Tout")
+    .Attr("Tin: list(type) >= 0")
+    .Attr("Tout: list(type) >= 0")
+    .Attr("f: func")
+    .SetShapeFn(shape_inference::UnknownShape);
+
+}  // namespace tensorflow
diff --git a/tensorflow/contrib/tpu/python/tpu/functional.py b/tensorflow/contrib/tpu/python/tpu/functional.py
new file mode 100644
index 0000000000..1ec9b5b33d
--- /dev/null
+++ b/tensorflow/contrib/tpu/python/tpu/functional.py
@@ -0,0 +1,25 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Functional operations."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.contrib.tpu.python.tpu import gen_functional_ops
+
+
+TPUPartitionedCall = gen_functional_ops._tpu_partitioned_call  # pylint: disable=invalid-name,protected-access
+
-- 
GitLab


From d32dee99a1e0ed31e24ea7f0d6a08fcf69d3a474 Mon Sep 17 00:00:00 2001
From: Martin Wicke <wicke@google.com>
Date: Mon, 17 Dec 2018 20:16:37 -0800
Subject: [PATCH 726/873] Add a warning message which happens when contrib is
 imported.

PiperOrigin-RevId: 225933307
---
 tensorflow/api_template_v1.__init__.py | 10 +++++++++-
 tensorflow/python/util/lazy_loader.py  | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 89bae4a4e9..f88e046873 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -40,7 +40,15 @@ if not hasattr(_current_module, 'estimator'):
           'tensorflow_estimator.python.estimator.api.estimator'))
 
 from tensorflow.python.util.lazy_loader import LazyLoader  # pylint: disable=g-import-not-at-top
-contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
+_CONTRIB_WARNING = """
+WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
+For more information, please see:
+  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
+  * https://github.com/tensorflow/addons
+If you depend on functionality not listed there, please file an issue.
+"""
+contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib',
+                     _CONTRIB_WARNING)
 del LazyLoader
 # The templated code that replaces the placeholder above sometimes
 # sets the __all__ variable. If it does, we have to be sure to add
diff --git a/tensorflow/python/util/lazy_loader.py b/tensorflow/python/util/lazy_loader.py
index 6d2622b1c0..a9499f8334 100644
--- a/tensorflow/python/util/lazy_loader.py
+++ b/tensorflow/python/util/lazy_loader.py
@@ -31,17 +31,25 @@ class LazyLoader(types.ModuleType):
   """
 
   # The lint error here is incorrect.
-  def __init__(self, local_name, parent_module_globals, name):  # pylint: disable=super-on-old-class
+  def __init__(self, local_name, parent_module_globals, name, warning=None):  # pylint: disable=super-on-old-class
     self._local_name = local_name
     self._parent_module_globals = parent_module_globals
+    self._warning = warning
 
     super(LazyLoader, self).__init__(name)
 
   def _load(self):
+    """Load the module and insert it into the parent's globals."""
     # Import the target module and insert it into the parent's namespace
     module = importlib.import_module(self.__name__)
     self._parent_module_globals[self._local_name] = module
 
+    # Emit a warning if one was specified
+    if self._warning:
+      print(self._warning)
+      # Make sure to only warn once.
+      self._warning = None
+
     # Update this object's dict so that if someone keeps a reference to the
     #   LazyLoader, lookups are efficient (__getattr__ is only called on lookups
     #   that fail).
-- 
GitLab


From 27cb1211103f5962d984c71c16b1655dee92fc0a Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Mon, 17 Dec 2018 20:58:54 -0800
Subject: [PATCH 727/873] Automated rollback of commit
 c66d9edb560f53fb4e8c43ce27d2490e611c9480

PiperOrigin-RevId: 225936177
---
 tensorflow/tools/ci_build/update_version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py
index 1a14829fae..4373d464b6 100755
--- a/tensorflow/tools/ci_build/update_version.py
+++ b/tensorflow/tools/ci_build/update_version.py
@@ -304,7 +304,7 @@ def main():
     new_version = Version(old_version.major,
                           str(nightly_minor_ver),
                           old_version.patch,
-                          "a-dev" + time.strftime("%Y%m%d"),  # TODO(annarev): remove 'a'
+                          "-dev" + time.strftime("%Y%m%d"),
                           NIGHTLY_VERSION)
   else:
     new_version = Version.parse_from_string(args.version, REGULAR_VERSION)
-- 
GitLab


From 71ac1a92dadc183d6c3e08cd28e60ea763fbf0e0 Mon Sep 17 00:00:00 2001
From: jcf94 <xff252595680@gmail.com>
Date: Tue, 18 Dec 2018 13:10:29 +0800
Subject: [PATCH 728/873] Remove the use of REGISTER_MEM_ALLOCTOR

---
 tensorflow/contrib/verbs/rdma_mgr.cc          | 23 ++-----------------
 tensorflow/contrib/verbs/rdma_mgr.h           |  1 -
 tensorflow/contrib/verbs/verbs_server_lib.cc  |  5 +---
 .../core/common_runtime/process_state.cc      |  9 --------
 .../core/common_runtime/process_state.h       |  3 ---
 5 files changed, 3 insertions(+), 38 deletions(-)

diff --git a/tensorflow/contrib/verbs/rdma_mgr.cc b/tensorflow/contrib/verbs/rdma_mgr.cc
index 4a78074758..27215858e6 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.cc
+++ b/tensorflow/contrib/verbs/rdma_mgr.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include <vector>
 #include "tensorflow/contrib/verbs/grpc_verbs_client.h"
 #include "tensorflow/contrib/verbs/verbs_service.pb.h"
-#include "tensorflow/core/common_runtime/bfc_allocator.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
 #include "tensorflow/core/common_runtime/gpu/gpu_util.h"
 #include "tensorflow/core/common_runtime/pool_allocator.h"
@@ -257,25 +256,6 @@ void MRDeleter(ibv_mr* mr) {
   }
 }
 
-// TODO: This is to fix the bug of "local protection error when doing rdma send"
-//       Bug caused by commit 33170cc. The new design of Allocator/SubAllocator is
-//       good but not working correctly with this part.
-//       Waiting to migrate all the "cpu_allocator()" to "ProcessState::singleton()",
-//       and this patch will nolonger be needed.
-class BFCRdmaAllocatorFactory : public AllocatorFactory {
- public:
-  Allocator* CreateAllocator() { return ProcessState::singleton()->GetCPUAllocator(port::kNUMANoAffinity); }
-
-  SubAllocator* CreateSubAllocator(int numa_node) {
-    return new BasicCPUAllocator(numa_node, ProcessState::singleton()->GetCPUAllocatorVisitor(), ProcessState::singleton()->GetCPUFreeVisitor());
-  }
-};
-
-/*static*/ void RdmaMgr::RegMemAllocator() {
-    VLOG(1) << "Register Rdma capable Allocator when using grpc+verbs";
-    REGISTER_MEM_ALLOCATOR("BFCRdmaAllocator", 101, BFCRdmaAllocatorFactory);
-}
-
 void RdmaMgr::InitAllocators() {
   static std::once_flag flag;
   std::call_once(
@@ -299,12 +279,13 @@ void RdmaMgr::InitAllocators() {
 #if GOOGLE_CUDA
   GPUProcessState::singleton()->AddCUDAHostAllocVisitor(0, alloc_visitor);
   GPUProcessState::singleton()->AddCUDAHostFreeVisitor(0, free_visitor);
+
   if (IsGDRAvailable()) {
     // Note we don't free allocated GPU memory so there is no free visitor
 
     // TODO: This is to fix the 'invalid use of member in static member function bug'.
     //       Waiting for better implementation.
-    // int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
+    //       int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1;
     int32_t bus_id = 0;
 
     SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id,
diff --git a/tensorflow/contrib/verbs/rdma_mgr.h b/tensorflow/contrib/verbs/rdma_mgr.h
index 11bef527c1..74b92cc9a6 100644
--- a/tensorflow/contrib/verbs/rdma_mgr.h
+++ b/tensorflow/contrib/verbs/rdma_mgr.h
@@ -39,7 +39,6 @@ class RdmaMgr {
   void SetupChannels();
   bool ConnectivityCheck();
   void InitAllocators();
-  static void RegMemAllocator();
   static void RegMemVisitors();
   const string& local_worker() { return local_worker_; }
 
diff --git a/tensorflow/contrib/verbs/verbs_server_lib.cc b/tensorflow/contrib/verbs/verbs_server_lib.cc
index 641b47f38a..19ef109f67 100644
--- a/tensorflow/contrib/verbs/verbs_server_lib.cc
+++ b/tensorflow/contrib/verbs/verbs_server_lib.cc
@@ -33,15 +33,12 @@ RendezvousMgrInterface* NewRdmaRendezvousMgr(const WorkerEnv* env) {
   return new RdmaRendezvousMgr(env);
 }
 
-std::once_flag reg_mem_allocator_call;
 std::once_flag reg_mem_visitors_call;
 
 }  // namespace
 
 VerbsServer::VerbsServer(const ServerDef& server_def, Env* env)
-    : GrpcServer(server_def, env), verbs_state_(DISCONNECTED) {
-  std::call_once(reg_mem_allocator_call, []() { RdmaMgr::RegMemAllocator(); });
-}
+    : GrpcServer(server_def, env), verbs_state_(DISCONNECTED) {}
 
 VerbsServer::~VerbsServer() {
   TF_CHECK_OK(Stop());
diff --git a/tensorflow/core/common_runtime/process_state.cc b/tensorflow/core/common_runtime/process_state.cc
index 2e022857f4..fdb79767ec 100644
--- a/tensorflow/core/common_runtime/process_state.cc
+++ b/tensorflow/core/common_runtime/process_state.cc
@@ -151,13 +151,4 @@ void ProcessState::TestOnlyReset() {
   gtl::STLDeleteElements(&cpu_al_);
 }
 
-const std::vector<SubAllocator::Visitor>& ProcessState::GetCPUAllocatorVisitor()
-{
-  return cpu_alloc_visitors_;
-}
-const std::vector<SubAllocator::Visitor>& ProcessState::GetCPUFreeVisitor()
-{
-  return cpu_free_visitors_;
-}
-
 }  // namespace tensorflow
diff --git a/tensorflow/core/common_runtime/process_state.h b/tensorflow/core/common_runtime/process_state.h
index e8f46f308b..f30e440c29 100644
--- a/tensorflow/core/common_runtime/process_state.h
+++ b/tensorflow/core/common_runtime/process_state.h
@@ -75,9 +75,6 @@ class ProcessState : public ProcessStateInterface {
   // REQUIRES: must be called before GetCPUAllocator.
   void AddCPUFreeVisitor(SubAllocator::Visitor v);
 
-  const std::vector<SubAllocator::Visitor>& GetCPUAllocatorVisitor();
-  const std::vector<SubAllocator::Visitor>& GetCPUFreeVisitor();
-
   typedef std::unordered_map<const void*, MemDesc> MDMap;
 
  protected:
-- 
GitLab


From 195360a81941d8f8ffe2138084d8f83b9d0b711d Mon Sep 17 00:00:00 2001
From: Dimitris Vardoulakis <dimvar@google.com>
Date: Mon, 17 Dec 2018 21:43:28 -0800
Subject: [PATCH 729/873] Rename CrossReplicaSum to AllReduce throughout the
 XLA compiler.

PiperOrigin-RevId: 225939650
---
 tensorflow/compiler/xla/client/xla_builder.cc |   9 +-
 .../compiler/xla/service/ar_crs_combiner.h    |   8 +-
 .../xla/service/ar_crs_combiner_test.cc       | 131 +++++++++---------
 .../service/bfloat16_conversion_folding.cc    |   7 +-
 .../bfloat16_conversion_folding_test.cc       |  17 ++-
 .../xla/service/bfloat16_normalization.cc     |   2 +-
 .../service/bfloat16_normalization_test.cc    |  11 +-
 .../xla/service/bfloat16_propagation.cc       |   2 +-
 .../xla/service/bfloat16_propagation_test.cc  |   2 +-
 .../compiler/xla/service/buffer_assignment.cc |   2 +-
 tensorflow/compiler/xla/service/call_graph.cc |   2 +-
 .../compiler/xla/service/cpu/ir_emitter.cc    |   6 +-
 .../compiler/xla/service/cpu/ir_emitter.h     |   2 +-
 .../compiler/xla/service/dfs_hlo_visitor.h    |   2 +-
 .../service/dfs_hlo_visitor_with_default.h    |   2 +-
 .../compiler/xla/service/gpu/ir_emitter.cc    |   4 +-
 .../compiler/xla/service/gpu/ir_emitter.h     |   2 +-
 .../xla/service/gpu/ir_emitter_unnested.cc    |   6 +-
 .../xla/service/gpu/ir_emitter_unnested.h     |   2 +-
 tensorflow/compiler/xla/service/hlo.proto     |   2 +-
 .../compiler/xla/service/hlo_computation.cc   |   6 +-
 .../compiler/xla/service/hlo_computation.h    |   2 +-
 .../compiler/xla/service/hlo_cost_analysis.cc |   2 +-
 .../compiler/xla/service/hlo_cost_analysis.h  |   2 +-
 .../xla/service/hlo_element_type_converter.cc |   7 +-
 .../compiler/xla/service/hlo_graph_dumper.cc  |   2 +-
 .../compiler/xla/service/hlo_instruction.cc   |  42 +++---
 .../compiler/xla/service/hlo_instruction.h    |  10 +-
 .../compiler/xla/service/hlo_instructions.cc  |  17 ++-
 .../compiler/xla/service/hlo_instructions.h   |  14 +-
 .../compiler/xla/service/hlo_matchers.h       |   2 +-
 tensorflow/compiler/xla/service/hlo_opcode.h  |   2 +-
 tensorflow/compiler/xla/service/hlo_parser.cc |   9 +-
 .../compiler/xla/service/hlo_parser_test.cc   |  22 +--
 .../compiler/xla/service/hlo_reachability.cc  |   2 +-
 .../xla/service/hlo_rematerialization.cc      |   2 +-
 .../compiler/xla/service/hlo_verifier.cc      |  11 +-
 .../compiler/xla/service/hlo_verifier.h       |   2 +-
 .../xla/service/instruction_fusion.cc         |   2 +-
 .../compiler/xla/service/layout_assignment.cc |   2 +-
 .../xla/service/layout_assignment_test.cc     |   4 +-
 .../compiler/xla/service/pattern_matcher.h    |   2 +-
 .../compiler/xla/service/shape_inference.cc   |   2 +-
 .../compiler/xla/service/shape_inference.h    |   2 +-
 tensorflow/compiler/xla/tests/BUILD           |   4 +-
 ...replica_sum_test.cc => all_reduce_test.cc} |   6 +-
 46 files changed, 192 insertions(+), 209 deletions(-)
 rename tensorflow/compiler/xla/tests/{cross_replica_sum_test.cc => all_reduce_test.cc} (94%)

diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 6653ae0ca3..622fc158e1 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -211,7 +211,7 @@ void XlaBuilder::IsConstantVisitor(const int64 op_handle,
 
     // Non functional ops.
     case HloOpcode::kRng:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
       // TODO(b/33009255): Implmement constant folding for cross replica sum.
     case HloOpcode::kInfeed:
     case HloOpcode::kOutfeed:
@@ -2020,8 +2020,8 @@ XlaOp XlaBuilder::CrossReplicaSum(
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
     TF_ASSIGN_OR_RETURN(const Shape& operand_shape, GetShape(operand));
-    TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferCrossReplicaSumShape(
-                                         {&operand_shape}));
+    TF_ASSIGN_OR_RETURN(Shape shape,
+                        ShapeInference::InferAllReduceShape({&operand_shape}));
     *instr.mutable_shape() = shape.ToProto();
 
     for (const ReplicaGroup& group : replica_groups) {
@@ -2034,8 +2034,7 @@ XlaOp XlaBuilder::CrossReplicaSum(
 
     AddCalledComputation(computation, &instr);
 
-    return AddInstruction(std::move(instr), HloOpcode::kCrossReplicaSum,
-                          {operand});
+    return AddInstruction(std::move(instr), HloOpcode::kAllReduce, {operand});
   });
 }
 
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner.h b/tensorflow/compiler/xla/service/ar_crs_combiner.h
index 6be7e1002d..6f54b97615 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner.h
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner.h
@@ -25,10 +25,10 @@ limitations under the License.
 
 namespace xla {
 
-// When the HLO graph contains an AllReduce, followed by some simple linear
-// operations, followed by a CrossReplicaSum, we can combine the AR and the CRS,
-// to use an efficient CrossReplicaSum implementation that fully utilizes the
-// interconnect bandwidth.
+// When the HLO graph contains a cross-module AllReduce, followed by some simple
+// linear operations, followed by a cross-replica AllReduce, we can combine the
+// CMAR and the CRAR, to use an efficient AllReduce implementation that fully
+// utilizes the interconnect bandwidth.
 // Such sequences appear in spatially partitioned models.
 // This pass must run right after spatial partitioning.
 class ArCrsCombiner : public HloModulePass {
diff --git a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
index 8a4fd0ee1b..caa57296f4 100644
--- a/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
+++ b/tensorflow/compiler/xla/service/ar_crs_combiner_test.cc
@@ -361,38 +361,38 @@ HloModule foobar
 ENTRY %entrycomp (p: bf16[]) -> (f32[], f32[]) {
   %p = bf16[] parameter(0)
 
-  %cross-replica-sum.ar.1 = bf16[]
-      cross-replica-sum(%p),
+  %all-reduce.ar.1 = bf16[]
+      all-reduce(%p),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.bf16,
       sharding={maximal device=0}
   %convert.1 = f32[]
-      convert(%cross-replica-sum.ar.1),
+      convert(%all-reduce.ar.1),
       sharding={maximal device=0}
-  %cross-replica-sum.1 = f32[]
-      cross-replica-sum(%convert.1),
+  %all-reduce.1 = f32[]
+      all-reduce(%convert.1),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=0}
 
-  %cross-replica-sum.ar.2 = bf16[]
-      cross-replica-sum(%p),
+  %all-reduce.ar.2 = bf16[]
+      all-reduce(%p),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.bf16,
       sharding={maximal device=1}
   %convert.2 = f32[]
-      convert(%cross-replica-sum.ar.2),
+      convert(%all-reduce.ar.2),
       sharding={maximal device=1}
-  %cross-replica-sum.2 = f32[]
-      cross-replica-sum(%convert.2),
+  %all-reduce.2 = f32[]
+      all-reduce(%convert.2),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=1}
 
   ROOT %tuple = (f32[], f32[])
-      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      tuple(%all-reduce.1, %all-reduce.2),
       sharding={{maximal device=0}, {maximal device=1}}
 }
 )";
@@ -406,8 +406,8 @@ ENTRY %entrycomp (p: bf16[]) -> (f32[], f32[]) {
   auto changed = combiner.Run(module.get()).ValueOrDie();
   EXPECT_TRUE(changed);
   EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::CrossReplicaSum(op::Convert(op::Parameter())),
-                        op::CrossReplicaSum(op::Convert(op::Parameter()))));
+              op::Tuple(op::AllReduce(op::Convert(op::Parameter())),
+                        op::AllReduce(op::Convert(op::Parameter()))));
   auto crs_after =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();
@@ -433,34 +433,34 @@ HloModule foobar
 ENTRY %entrycomp (p: f32[2,1]) -> (f32[2], f32[2]) {
   %p = f32[2,1] parameter(0)
 
-  %cross-replica-sum.ar.1 = f32[2,1]
-      cross-replica-sum(%p),
+  %all-reduce.ar.1 = f32[2,1]
+      all-reduce(%p),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.1,
       sharding={maximal device=0}
-  %bitcast.1 = f32[2]{0} bitcast(f32[2,1]{1,0} %cross-replica-sum.ar.1)
-  %cross-replica-sum.1 = f32[2]
-      cross-replica-sum(%bitcast.1),
+  %bitcast.1 = f32[2]{0} bitcast(f32[2,1]{1,0} %all-reduce.ar.1)
+  %all-reduce.1 = f32[2]
+      all-reduce(%bitcast.1),
       replica_groups={{0,1}},
       to_apply=%sum.2,
       sharding={maximal device=0}
 
-  %cross-replica-sum.ar.2 = f32[2,1]
-      cross-replica-sum(%p),
+  %all-reduce.ar.2 = f32[2,1]
+      all-reduce(%p),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.1,
       sharding={maximal device=1}
-  %bitcast.2 = f32[2]{0} bitcast(f32[2,1]{1,0} %cross-replica-sum.ar.2)
-  %cross-replica-sum.2 = f32[2]
-      cross-replica-sum(%bitcast.2),
+  %bitcast.2 = f32[2]{0} bitcast(f32[2,1]{1,0} %all-reduce.ar.2)
+  %all-reduce.2 = f32[2]
+      all-reduce(%bitcast.2),
       replica_groups={{0,1}},
       to_apply=%sum.2,
       sharding={maximal device=1}
 
   ROOT %tuple = (f32[], f32[])
-      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      tuple(%all-reduce.1, %all-reduce.2),
       sharding={{maximal device=0}, {maximal device=1}}
 }
 )";
@@ -474,8 +474,8 @@ ENTRY %entrycomp (p: f32[2,1]) -> (f32[2], f32[2]) {
   auto changed = combiner.Run(module.get()).ValueOrDie();
   EXPECT_TRUE(changed);
   EXPECT_THAT(module->entry_computation()->root_instruction(),
-              op::Tuple(op::CrossReplicaSum(op::Bitcast(op::Parameter())),
-                        op::CrossReplicaSum(op::Bitcast(op::Parameter()))));
+              op::Tuple(op::AllReduce(op::Bitcast(op::Parameter())),
+                        op::AllReduce(op::Bitcast(op::Parameter()))));
   auto crs_after =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();
@@ -496,38 +496,38 @@ ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
   %p = f32[] parameter(0)
   %constant.f32 = f32[] constant(123)
 
-  %cross-replica-sum.ar.1 = f32[]
-      cross-replica-sum(%p),
+  %all-reduce.ar.1 = f32[]
+      all-reduce(%p),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.f32,
       sharding={maximal device=0}
   %multiply.1 = f32[]
-      multiply(%cross-replica-sum.ar.1, %constant.f32),
+      multiply(%all-reduce.ar.1, %constant.f32),
       sharding={maximal device=0}
-  %cross-replica-sum.1 = f32[]
-      cross-replica-sum(%multiply.1),
+  %all-reduce.1 = f32[]
+      all-reduce(%multiply.1),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=0}
 
-  %cross-replica-sum.ar.2 = f32[]
-      cross-replica-sum(%p),
+  %all-reduce.ar.2 = f32[]
+      all-reduce(%p),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.f32,
       sharding={maximal device=1}
   %multiply.2 = f32[]
-      multiply(%cross-replica-sum.ar.2, %constant.f32),
+      multiply(%all-reduce.ar.2, %constant.f32),
       sharding={maximal device=1}
-  %cross-replica-sum.2 = f32[]
-      cross-replica-sum(%multiply.2),
+  %all-reduce.2 = f32[]
+      all-reduce(%multiply.2),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=1}
 
   ROOT %tuple = (f32[], f32[])
-      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      tuple(%all-reduce.1, %all-reduce.2),
       sharding={{maximal device=0}, {maximal device=1}}
 }
 )";
@@ -542,9 +542,8 @@ ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
   EXPECT_TRUE(changed);
   EXPECT_THAT(
       module->entry_computation()->root_instruction(),
-      op::Tuple(
-          op::CrossReplicaSum(op::Multiply(op::Parameter(), op::Constant())),
-          op::CrossReplicaSum(op::Multiply(op::Parameter(), op::Constant()))));
+      op::Tuple(op::AllReduce(op::Multiply(op::Parameter(), op::Constant())),
+                op::AllReduce(op::Multiply(op::Parameter(), op::Constant()))));
   auto crs_after =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();
@@ -572,44 +571,44 @@ ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
   %constant.bf16 = bf16[] constant(1)
   %constant.f32 = f32[] constant(2)
 
-  %cross-replica-sum.ar.1 = bf16[]
-      cross-replica-sum(%constant.bf16),
+  %all-reduce.ar.1 = bf16[]
+      all-reduce(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.bf16,
       sharding={maximal device=0}
   %convert.1 = f32[]
-      convert(%cross-replica-sum.ar.1),
+      convert(%all-reduce.ar.1),
       sharding={maximal device=0}
   %add.1 = f32[]
       add(%constant.f32, %convert.1),
       sharding={maximal device=0}
-  %cross-replica-sum.1 = f32[]
-      cross-replica-sum(%add.1),
+  %all-reduce.1 = f32[]
+      all-reduce(%add.1),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=0}
 
-  %cross-replica-sum.ar.2 = bf16[]
-      cross-replica-sum(%constant.bf16),
+  %all-reduce.ar.2 = bf16[]
+      all-reduce(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.bf16,
       sharding={maximal device=1}
   %convert.2 = f32[]
-      convert(%cross-replica-sum.ar.2),
+      convert(%all-reduce.ar.2),
       sharding={maximal device=1}
   %add.2 = f32[]
       add(%constant.f32, %convert.2),
       sharding={maximal device=1}
-  %cross-replica-sum.2 = f32[]
-      cross-replica-sum(%add.2),
+  %all-reduce.2 = f32[]
+      all-reduce(%add.2),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=1}
 
   ROOT %tuple = (f32[], f32[])
-      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      tuple(%all-reduce.1, %all-reduce.2),
       sharding={{maximal device=0}, {maximal device=1}}
 }
 )";
@@ -625,10 +624,10 @@ ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
   EXPECT_THAT(
       module->entry_computation()->root_instruction(),
       op::Tuple(
-          op::CrossReplicaSum(op::Add(
-              op::Divide(op::Constant(), op::Constant()), op::Convert())),
-          op::CrossReplicaSum(op::Add(
-              op::Divide(op::Constant(), op::Constant()), op::Convert()))));
+          op::AllReduce(op::Add(op::Divide(op::Constant(), op::Constant()),
+                                op::Convert())),
+          op::AllReduce(op::Add(op::Divide(op::Constant(), op::Constant()),
+                                op::Convert()))));
   auto crs_after =
       module->entry_computation()->root_instruction()->operands()[0];
   auto replica_groups_after = crs_after->replica_groups();
@@ -657,44 +656,44 @@ ENTRY %entrycomp (p: f32[]) -> (f32[], f32[]) {
   %constant.f32.1 = f32[] constant(2)
   %constant.f32.2 = f32[] constant(3)
 
-  %cross-replica-sum.ar.1 = bf16[]
-      cross-replica-sum(%constant.bf16),
+  %all-reduce.ar.1 = bf16[]
+      all-reduce(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.bf16,
       sharding={maximal device=0}
   %convert.1 = f32[]
-      convert(%cross-replica-sum.ar.1),
+      convert(%all-reduce.ar.1),
       sharding={maximal device=0}
   %add.1 = f32[]
       add(%constant.f32.1, %convert.1),
       sharding={maximal device=0}
-  %cross-replica-sum.1 = f32[]
-      cross-replica-sum(%add.1),
+  %all-reduce.1 = f32[]
+      all-reduce(%add.1),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=0}
 
-  %cross-replica-sum.ar.2 = bf16[]
-      cross-replica-sum(%constant.bf16),
+  %all-reduce.ar.2 = bf16[]
+      all-reduce(%constant.bf16),
       replica_groups={{0},{1}},
       all_reduce_id=1,
       to_apply=%sum.bf16,
       sharding={maximal device=1}
   %convert.2 = f32[]
-      convert(%cross-replica-sum.ar.2),
+      convert(%all-reduce.ar.2),
       sharding={maximal device=1}
   %add.2 = f32[]
       add(%constant.f32.2, %convert.2),
       sharding={maximal device=1}
-  %cross-replica-sum.2 = f32[]
-      cross-replica-sum(%add.2),
+  %all-reduce.2 = f32[]
+      all-reduce(%add.2),
       replica_groups={{0,1}},
       to_apply=%sum.f32,
       sharding={maximal device=1}
 
   ROOT %tuple = (f32[], f32[])
-      tuple(%cross-replica-sum.1, %cross-replica-sum.2),
+      tuple(%all-reduce.1, %all-reduce.2),
       sharding={{maximal device=0}, {maximal device=1}}
 }
 )";
diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc
index e9d30fc03c..6caef77ed0 100644
--- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc
@@ -34,8 +34,8 @@ class BFloat16ConversionFoldingVisitor : public DfsHloVisitorWithDefault {
 
   Status DefaultAction(HloInstruction* hlo) override;
 
-  // Special handling for cross-replica-sum which can have a tuple output.
-  Status HandleCrossReplicaSum(HloInstruction* crs) override;
+  // Special handling for all-reduce which can have a tuple output.
+  Status HandleAllReduce(HloInstruction* crs) override;
 
   static bool Run(HloComputation* computation,
                   const BFloat16Support* bfloat16_support) {
@@ -176,8 +176,7 @@ Status BFloat16ConversionFoldingVisitor::DefaultAction(HloInstruction* hlo) {
   return TryFoldBF16Conversions(hlo);
 }
 
-Status BFloat16ConversionFoldingVisitor::HandleCrossReplicaSum(
-    HloInstruction* crs) {
+Status BFloat16ConversionFoldingVisitor::HandleAllReduce(HloInstruction* crs) {
   if (crs->IsCrossModuleAllReduce()) {
     // Cross-module all-reduce has side effect.
     return Status::OK();
diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
index 4ce351acc2..2232a2cbdf 100644
--- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
@@ -38,7 +38,7 @@ class TestBFloat16Support : public BFloat16Support {
         hlo.opcode() == HloOpcode::kSubtract ||
         hlo.opcode() == HloOpcode::kTuple ||
         hlo.opcode() == HloOpcode::kGetTupleElement ||
-        hlo.opcode() == HloOpcode::kCrossReplicaSum) {
+        hlo.opcode() == HloOpcode::kAllReduce) {
       return true;
     }
     return false;
@@ -49,7 +49,7 @@ class TestBFloat16Support : public BFloat16Support {
         hlo.opcode() == HloOpcode::kSubtract ||
         hlo.opcode() == HloOpcode::kTuple ||
         hlo.opcode() == HloOpcode::kGetTupleElement ||
-        hlo.opcode() == HloOpcode::kCrossReplicaSum) {
+        hlo.opcode() == HloOpcode::kAllReduce) {
       return true;
     }
     return false;
@@ -58,7 +58,7 @@ class TestBFloat16Support : public BFloat16Support {
   bool SupportsMixedPrecisions(const HloInstruction& hlo) const override {
     if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kTuple ||
         hlo.opcode() == HloOpcode::kGetTupleElement ||
-        hlo.opcode() == HloOpcode::kCrossReplicaSum) {
+        hlo.opcode() == HloOpcode::kAllReduce) {
       return true;
     }
     return false;
@@ -213,7 +213,7 @@ TEST_F(BFloat16ConversionFoldingTest, DoNotFoldTuple) {
   EXPECT_EQ(tuple->operand(1), convert0);
 }
 
-TEST_F(BFloat16ConversionFoldingTest, FoldCrossReplicaSumTupleOutput) {
+TEST_F(BFloat16ConversionFoldingTest, FoldAllReduceTupleOutput) {
   auto builder = HloComputation::Builder(TestName());
 
   auto module = CreateNewVerifiedModule();
@@ -236,11 +236,10 @@ TEST_F(BFloat16ConversionFoldingTest, FoldCrossReplicaSumTupleOutput) {
   HloInstruction* b = builder.AddInstruction(
       HloInstruction::CreateParameter(1, f32_shape, "b"));
 
-  HloInstruction* crs =
-      builder.AddInstruction(HloInstruction::CreateCrossReplicaSum(
-          ShapeUtil::MakeTupleShape({f32_shape, f32_shape}), {convert_a, b},
-          sum, /*replica_groups=*/{}, /*barrier=*/"",
-          /*all_reduce_id=*/absl::nullopt));
+  HloInstruction* crs = builder.AddInstruction(HloInstruction::CreateAllReduce(
+      ShapeUtil::MakeTupleShape({f32_shape, f32_shape}), {convert_a, b}, sum,
+      /*replica_groups=*/{}, /*barrier=*/"",
+      /*all_reduce_id=*/absl::nullopt));
   HloInstruction* gte_a = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(f32_shape, crs, 0));
   HloInstruction* gte_b = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc
index b8a8f844ef..e3aefe9067 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc
@@ -362,7 +362,7 @@ Status BFloat16NormalizationVisitor::DefaultAction(HloInstruction* hlo) {
   }
   // TODO(b/112040122): Correctly normalize variadic reduce.
   if ((hlo->opcode() == HloOpcode::kSort ||
-       hlo->opcode() == HloOpcode::kCrossReplicaSum) &&
+       hlo->opcode() == HloOpcode::kAllReduce) &&
       ShapeUtil::IsTuple(hlo->shape())) {
     return HandleMultipleOutputs(hlo);
   }
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
index 9f97d18c56..551ac4be73 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
@@ -232,7 +232,7 @@ TEST_F(BFloat16NormalizationTest, ResolveUnsupportedMixedPrecisionReduce) {
   EXPECT_EQ(reduce->operand(1)->shape().element_type(), F32);
 }
 
-TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleCrossReplicaSum) {
+TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleAllReduce) {
   auto module = CreateNewVerifiedModule();
   HloComputation::Builder sum_builder("sum");
   auto x = sum_builder.AddInstruction(HloInstruction::CreateParameter(
@@ -253,11 +253,10 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleCrossReplicaSum) {
   HloInstruction* b = builder.AddInstruction(
       HloInstruction::CreateParameter(1, bf16_shape, "b"));
 
-  HloInstruction* crs =
-      builder.AddInstruction(HloInstruction::CreateCrossReplicaSum(
-          ShapeUtil::MakeTupleShape({f32_shape, bf16_shape}), {a, b}, reduction,
-          /*replica_groups=*/{}, /*barrier=*/"",
-          /*all_reduce_id=*/absl::nullopt));
+  HloInstruction* crs = builder.AddInstruction(HloInstruction::CreateAllReduce(
+      ShapeUtil::MakeTupleShape({f32_shape, bf16_shape}), {a, b}, reduction,
+      /*replica_groups=*/{}, /*barrier=*/"",
+      /*all_reduce_id=*/absl::nullopt));
   HloInstruction* gte = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(bf16_shape, crs, 1));
 
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
index 63d4572f20..05dd4b3e91 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
@@ -276,7 +276,7 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo,
       if (bfloat16_support_->EffectiveOperandPrecisionIsOutputPrecision(
               *use.instruction, use.operand_number)) {
         if (use.instruction->opcode() == HloOpcode::kTuple ||
-            (use.instruction->opcode() == HloOpcode::kCrossReplicaSum &&
+            (use.instruction->opcode() == HloOpcode::kAllReduce &&
              ShapeUtil::IsTuple(use.instruction->shape()))) {
           ShapeIndex use_output_index{use.operand_number};
           for (int64 i : use.operand_index) {
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc
index 5be7141aae..a9b5d9916e 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc
@@ -209,7 +209,7 @@ TEST_F(BFloat16PropagationTest, DoNotChangeAllReduce) {
       rb.AddInstruction(HloInstruction::CreateParameter(1, shape, "p1"))));
   auto reduction = module->AddEmbeddedComputation(rb.Build());
   HloInstruction* all_reduce =
-      builder.AddInstruction(HloInstruction::CreateCrossReplicaSum(
+      builder.AddInstruction(HloInstruction::CreateAllReduce(
           ShapeUtil::MakeTupleShape({shape, shape}), {a, b}, reduction,
           /*replica_groups=*/{}, /*barrier=*/"", /*all_reduce_id=*/1));
   HloInstruction* gte0 = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc
index 8d7c624478..202e45e181 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment.cc
@@ -186,7 +186,7 @@ Status GatherComputationsByAllocationType(
             worklist.push_back(std::make_pair(subcomputation,
                                               false));  // Not thread local.
             break;
-          case HloOpcode::kCrossReplicaSum:
+          case HloOpcode::kAllReduce:
           case HloOpcode::kMap:
           case HloOpcode::kReduce:
           case HloOpcode::kReduceWindow:
diff --git a/tensorflow/compiler/xla/service/call_graph.cc b/tensorflow/compiler/xla/service/call_graph.cc
index 7987343bfa..173b3fc05f 100644
--- a/tensorflow/compiler/xla/service/call_graph.cc
+++ b/tensorflow/compiler/xla/service/call_graph.cc
@@ -58,7 +58,7 @@ CallContext GetInstructionCallContext(HloOpcode opcode) {
     case HloOpcode::kConditional:
     case HloOpcode::kWhile:
       return CallContext::kSequential;
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kMap:
     case HloOpcode::kReduce:
     case HloOpcode::kReduceWindow:
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 2c3e5d04cb..ed7fe59c80 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -1333,11 +1333,11 @@ Status IrEmitter::HandleFft(HloInstruction* fft) {
   return Status::OK();
 }
 
-Status IrEmitter::HandleCrossReplicaSum(HloInstruction* crs) {
+Status IrEmitter::HandleAllReduce(HloInstruction* crs) {
   if (hlo_module_config_.replica_count() != 1) {
     // TODO(b/33011107): Support nontrivial cross replica sum on CPU.
     return Unimplemented(
-        "CrossReplicaSum with >1 replica is not implemented on CPU.");
+        "AllReduce with >1 replica is not implemented on CPU.");
   }
 
   // When there is a single replica, a cross replica sum is the identity
@@ -1363,7 +1363,7 @@ Status IrEmitter::HandleCrossReplicaSum(HloInstruction* crs) {
 
     const Shape& operand_shape = crs->operand(i)->shape();
     CHECK(ShapeUtil::IsArray(operand_shape))
-        << "Operands to cross-replica-sum must be arrays: " << crs->ToString();
+        << "Operands to all-reduce must be arrays: " << crs->ToString();
     operand_ptrs.push_back(EmitBufferPointer(out_slice, operand_shape));
 
     // TODO(b/63762267): Be more aggressive about specifying alignment.
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 1db75cc8be..db76de4bb2 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -134,7 +134,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   Status HandleDot(HloInstruction* dot) override;
   Status HandleConvolution(HloInstruction* convolution) override;
   Status HandleFft(HloInstruction* fft) override;
-  Status HandleCrossReplicaSum(HloInstruction* crs) override;
+  Status HandleAllReduce(HloInstruction* crs) override;
   Status HandleInfeed(HloInstruction* infeed) override;
   Status HandleOutfeed(HloInstruction* outfeed) override;
   Status HandleSort(HloInstruction* sort) override;
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
index e84bf00153..2132468b90 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h
@@ -105,7 +105,7 @@ class DfsHloVisitorBase {
   }
   virtual Status HandleConvolution(HloInstructionPtr hlo) = 0;
   virtual Status HandleFft(HloInstructionPtr fft) = 0;
-  virtual Status HandleCrossReplicaSum(HloInstructionPtr hlo) = 0;
+  virtual Status HandleAllReduce(HloInstructionPtr hlo) = 0;
   virtual Status HandleAllToAll(HloInstructionPtr hlo) = 0;
   virtual Status HandleCollectivePermute(HloInstructionPtr hlo) = 0;
   virtual Status HandleGetDimensionSize(HloInstructionPtr hlo) = 0;
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
index 80ea5be298..680dd256bb 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
@@ -91,7 +91,7 @@ class DfsHloVisitorWithDefaultBase
   Status HandleFft(HloInstructionPtr fft) override {
     return DefaultAction(fft);
   }
-  Status HandleCrossReplicaSum(HloInstructionPtr crs) override {
+  Status HandleAllReduce(HloInstructionPtr crs) override {
     return DefaultAction(crs);
   }
   Status HandleAllToAll(HloInstructionPtr hlo) override {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index 6693f66d62..22db38ee03 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -637,9 +637,9 @@ Status IrEmitter::HandleFft(HloInstruction* fft) {
   return Unimplemented("Hit a case for fft that is not implemented on GPU.");
 }
 
-Status IrEmitter::HandleCrossReplicaSum(HloInstruction* crs) {
+Status IrEmitter::HandleAllReduce(HloInstruction* crs) {
   // TODO(b/33011107): Support cross replica sum on GPU.
-  return Unimplemented("CrossReplicaSum is not implemented on GPU.");
+  return Unimplemented("AllReduce is not implemented on GPU.");
 }
 
 Status IrEmitter::HandleParameter(HloInstruction* parameter) {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
index 2da46c0169..f380aee9d3 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
@@ -81,7 +81,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   Status HandleDot(HloInstruction* dot) override;
   Status HandleConvolution(HloInstruction* convolution) override;
   Status HandleFft(HloInstruction* fft) override;
-  Status HandleCrossReplicaSum(HloInstruction* crs) override;
+  Status HandleAllReduce(HloInstruction* crs) override;
   Status HandleInfeed(HloInstruction* infeed) override;
   Status HandleOutfeed(HloInstruction* outfeed) override;
   Status HandleSend(HloInstruction* send) override;
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 87d16c0afc..1472853dc4 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1295,11 +1295,11 @@ Status IrEmitterUnnested::HandleTupleSelect(HloInstruction* tuple_select) {
   return IrEmitter::HandleTupleSelect(tuple_select);
 }
 
-Status IrEmitterUnnested::HandleCrossReplicaSum(HloInstruction* crs) {
+Status IrEmitterUnnested::HandleAllReduce(HloInstruction* crs) {
   if (hlo_module_config_.replica_count() != 1) {
     // TODO(b/33011107): Support nontrivial cross replica sum on GPU.
     return Unimplemented(
-        "CrossReplicaSum with >1 replica is not implemented on GPU.");
+        "AllReduce with >1 replica is not implemented on GPU.");
   }
 
   // CRS with one operand and one replica is simply the identity function.
@@ -1311,7 +1311,7 @@ Status IrEmitterUnnested::HandleCrossReplicaSum(HloInstruction* crs) {
   // and when it's run.
   if (crs->operand_count() == 1) {
     CHECK(ShapeUtil::IsArray(crs->operand(0)->shape()))
-        << "Operands to cross-replica-sum must be arrays: " << crs->ToString();
+        << "Operands to all-reduce must be arrays: " << crs->ToString();
     AddThunkToThunkSequence(absl::make_unique<DeviceToDeviceCopyThunk>(
         /*source_address=*/GetAllocationSlice(*crs->operand(0)),
         /*destination_buffer=*/GetAllocationSlice(*crs),
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index 1ebea7ab48..d217ee36cf 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -176,7 +176,7 @@ class IrEmitterUnnested : public IrEmitter {
   Status HandleSelect(HloInstruction* select) override;
   Status HandleSort(HloInstruction* sort) override;
   Status HandleTupleSelect(HloInstruction* tuple_select) override;
-  Status HandleCrossReplicaSum(HloInstruction* crs) override;
+  Status HandleAllReduce(HloInstruction* crs) override;
   Status HandleAfterAll(HloInstruction* after_all) override;
 
   Status EmitTargetElementLoop(
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index faced059ac..9b50f1ca5b 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -168,7 +168,7 @@ message HloInstructionProto {
   // Cross replica op fields.
   repeated ReplicaGroup replica_groups = 49;
   int64 all_reduce_id = 45;
-  string cross_replica_sum_barrier = 46;
+  string all_reduce_barrier = 46;
 
   // Whether this Send/Recv instruction transfers data to/from the host. Only
   // present for Send and Recv instructions and their SendDone and RecvDone
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index f6867b4006..7563030718 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -332,7 +332,7 @@ void HloComputation::ComputeInstructionPostOrder(
       dfs_stack.emplace_back(op);
     }
 
-    // Add inputs for send->recv_done dependencies and cross-replica-sum
+    // Add inputs for send->recv_done dependencies and all-reduce
     // dependencies.
     switch (current->opcode()) {
       case HloOpcode::kRecvDone: {
@@ -344,7 +344,7 @@ void HloComputation::ComputeInstructionPostOrder(
         }
         break;
       }
-      case HloOpcode::kCrossReplicaSum: {
+      case HloOpcode::kAllReduce: {
         auto all_reduce_id = current->all_reduce_id();
         if (all_reduce_id) {
           auto it = channel_dependency_map.find(all_reduce_id.value());
@@ -372,7 +372,7 @@ HloComputation::ComputeChannelDependencies() const {
             instruction.get());
         break;
       }
-      case HloOpcode::kCrossReplicaSum: {
+      case HloOpcode::kAllReduce: {
         auto all_reduce_id = instruction->all_reduce_id();
         if (all_reduce_id) {
           auto& dependencies = channel_dependency_map[all_reduce_id.value()];
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 5467d0a68b..a0ccbc583f 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -367,7 +367,7 @@ class HloComputation {
 
   // Returns a map from channel-id to directed dependencies of the channel
   // instructions. For send&recv pairs it means the send instruction and for
-  // cross-replica-sum the union of the dependencies for all participating
+  // all-reduce the union of the dependencies for all participating
   // instructions.
   using ChannelDependencyMap =
       absl::flat_hash_map<int64, absl::InlinedVector<HloInstruction*, 1>>;
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
index df7d3826db..cb431aed47 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc
@@ -552,7 +552,7 @@ Status HloCostAnalysis::HandleFft(const HloInstruction* fft) {
   return Status::OK();
 }
 
-Status HloCostAnalysis::HandleCrossReplicaSum(const HloInstruction* crs) {
+Status HloCostAnalysis::HandleAllReduce(const HloInstruction* crs) {
   // We assume 2 replicas, so that each output element is the sum of two input
   // elements.
   //
diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
index 33983119c9..b52305626d 100644
--- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h
@@ -71,7 +71,7 @@ class HloCostAnalysis : public ConstDfsHloVisitor {
   Status HandleDot(const HloInstruction* dot) override;
   Status HandleConvolution(const HloInstruction* convolution) override;
   Status HandleFft(const HloInstruction* fft) override;
-  Status HandleCrossReplicaSum(const HloInstruction* crs) override;
+  Status HandleAllReduce(const HloInstruction* crs) override;
   Status HandleAllToAll(const HloInstruction* hlo) override;
   Status HandleCollectivePermute(const HloInstruction* hlo) override;
   Status HandleInfeed(const HloInstruction* infeed) override;
diff --git a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
index 72006e17e7..a40b6d888c 100644
--- a/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
+++ b/tensorflow/compiler/xla/service/hlo_element_type_converter.cc
@@ -141,10 +141,9 @@ StatusOr<bool> HloElementTypeConverter::Run(HloModule* module) {
       // These are ops with embedded computations where it suffices to convert
       // the embedded computations instead of converting the ops themselves.
       if (opcode == HloOpcode::kWhile || opcode == HloOpcode::kCall ||
-          opcode == HloOpcode::kCrossReplicaSum ||
-          opcode == HloOpcode::kFusion || opcode == HloOpcode::kMap ||
-          opcode == HloOpcode::kReduce || opcode == HloOpcode::kReduceWindow ||
-          opcode == HloOpcode::kScatter ||
+          opcode == HloOpcode::kAllReduce || opcode == HloOpcode::kFusion ||
+          opcode == HloOpcode::kMap || opcode == HloOpcode::kReduce ||
+          opcode == HloOpcode::kReduceWindow || opcode == HloOpcode::kScatter ||
           opcode == HloOpcode::kSelectAndScatter ||
           opcode == HloOpcode::kConditional) {
         continue;
diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 5db21e47ca..07cc379ac6 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1030,7 +1030,7 @@ ColorScheme HloDotDumper::GetInstructionColor(const HloInstruction* instr) {
     case HloOpcode::kMap:
     case HloOpcode::kGetDimensionSize:
       return kGray;
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
     case HloOpcode::kInfeed:
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 013b5dff5e..3e8903c953 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -333,20 +333,20 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
                                   proto.outfeed_config());
       break;
     }
-    case HloOpcode::kCrossReplicaSum: {
+    case HloOpcode::kAllReduce: {
       TF_RET_CHECK(proto.called_computation_ids_size() == 1)
-          << "CrossReplicaSum should have 1 called computation but sees "
+          << "AllReduce should have 1 called computation but sees "
           << proto.called_computation_ids_size();
       absl::optional<int64> all_reduce_id;
       if (proto.all_reduce_id() > 0) {
         all_reduce_id = proto.all_reduce_id();
       }
-      instruction = CreateCrossReplicaSum(
+      instruction = CreateAllReduce(
           shape, all_operands(), computations(0),
           /*replica_groups=*/
           std::vector<ReplicaGroup>(proto.replica_groups().begin(),
                                     proto.replica_groups().end()),
-          /*barrier=*/proto.cross_replica_sum_barrier(),
+          /*barrier=*/proto.all_reduce_barrier(),
           /*all_reduce_id=*/all_reduce_id);
       break;
     }
@@ -770,8 +770,7 @@ HloInstruction::CreateReducePrecision(const Shape& shape,
       shape, operand, exponent_bits, mantissa_bits);
 }
 
-/* static */ std::unique_ptr<HloInstruction>
-HloInstruction::CreateCrossReplicaSum(
+/* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateAllReduce(
     const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* reduce_computation,
     const std::vector<ReplicaGroup>& replica_groups, absl::string_view barrier,
@@ -1165,7 +1164,7 @@ bool HloInstruction::HasSideEffectNoRecurse() const {
     case HloOpcode::kOutfeed:
     case HloOpcode::kTrace:
       return true;
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
       return all_reduce_id().has_value();
     default:
       return false;
@@ -1288,7 +1287,7 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
     case HloOpcode::kParameter:
     case HloOpcode::kGetTupleElement:
     case HloOpcode::kReducePrecision:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
     case HloOpcode::kInfeed:
@@ -1745,7 +1744,7 @@ bool HloInstruction::IdenticalSlowPath(
     case HloOpcode::kReducePrecision:
     case HloOpcode::kInfeed:
     case HloOpcode::kOutfeed:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
     case HloOpcode::kConvolution:
@@ -1894,7 +1893,7 @@ HloComputation* HloInstruction::to_apply() const {
     case HloOpcode::kMap:
     case HloOpcode::kReduceWindow:
     case HloOpcode::kReduce:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kScatter:
       CHECK_EQ(called_computations_.size(), 1);
       return called_computations_[0];
@@ -1913,7 +1912,7 @@ void HloInstruction::set_to_apply(HloComputation* computation) {
     case HloOpcode::kMap:
     case HloOpcode::kReduceWindow:
     case HloOpcode::kReduce:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kScatter:
       CHECK_EQ(called_computations_.size(), 1);
       called_computations_[0] = computation;
@@ -2071,11 +2070,11 @@ bool HloInstruction::IsElementwiseImpl(
 }
 
 bool HloInstruction::IsCrossModuleAllReduce() const {
-  return opcode() == HloOpcode::kCrossReplicaSum && all_reduce_id();
+  return opcode() == HloOpcode::kAllReduce && all_reduce_id();
 }
 
 bool HloInstruction::IsCrossReplicaAllReduce() const {
-  return opcode() == HloOpcode::kCrossReplicaSum && !all_reduce_id();
+  return opcode() == HloOpcode::kAllReduce && !all_reduce_id();
 }
 
 string HloInstruction::ToStringWithCanonicalNameMap(
@@ -2186,7 +2185,7 @@ std::vector<string> HloInstruction::ExtraAttributesToString(
     } else if (opcode() == HloOpcode::kCall || opcode() == HloOpcode::kMap ||
                opcode() == HloOpcode::kReduceWindow ||
                opcode() == HloOpcode::kReduce ||
-               opcode() == HloOpcode::kCrossReplicaSum ||
+               opcode() == HloOpcode::kAllReduce ||
                opcode() == HloOpcode::kScatter) {
       extra.push_back(
           StrCat("to_apply=", PrintName(to_apply()->name(), options)));
@@ -2222,7 +2221,7 @@ std::vector<string> HloInstruction::ExtraAttributesToString(
       case HloOpcode::kMap:
       case HloOpcode::kReduceWindow:
       case HloOpcode::kReduce:
-      case HloOpcode::kCrossReplicaSum:
+      case HloOpcode::kAllReduce:
       case HloOpcode::kScatter:
         extra.push_back(
             StrCat("to_apply=\n", to_apply()->ToString(new_options)));
@@ -2419,8 +2418,8 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
       return visitor->HandleConvolution(this);
     case HloOpcode::kFft:
       return visitor->HandleFft(this);
-    case HloOpcode::kCrossReplicaSum:
-      return visitor->HandleCrossReplicaSum(this);
+    case HloOpcode::kAllReduce:
+      return visitor->HandleAllReduce(this);
     case HloOpcode::kAllToAll:
       return visitor->HandleAllToAll(this);
     case HloOpcode::kCollectivePermute:
@@ -3275,13 +3274,12 @@ HloInstruction::source_target_pairs() const {
   return Cast<HloCollectivePermuteInstruction>(this)->source_target_pairs();
 }
 
-string HloInstruction::cross_replica_sum_barrier() const {
-  return Cast<HloAllReduceInstruction>(this)->cross_replica_sum_barrier();
+string HloInstruction::all_reduce_barrier() const {
+  return Cast<HloAllReduceInstruction>(this)->all_reduce_barrier();
 }
 
-void HloInstruction::set_cross_replica_sum_barrier(const string& barrier) {
-  return Cast<HloAllReduceInstruction>(this)->set_cross_replica_sum_barrier(
-      barrier);
+void HloInstruction::set_all_reduce_barrier(const string& barrier) {
+  return Cast<HloAllReduceInstruction>(this)->set_all_reduce_barrier(barrier);
 }
 
 absl::optional<int64> HloInstruction::all_reduce_id() const {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index e1294c37d3..36e1ab4931 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -462,9 +462,7 @@ class HloInstruction {
   // `all_reduce_id`: for Allreduce nodes from different modules, if they have
   // the same all_reduce_id, they will be 'Allreduce'd. If empty, Allreduce will
   // not be applied cross modules.
-  //
-  // TODO(b/117564385): Rename this to AllReduce.
-  static std::unique_ptr<HloInstruction> CreateCrossReplicaSum(
+  static std::unique_ptr<HloInstruction> CreateAllReduce(
       const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* reduce_computation,
       const std::vector<ReplicaGroup>& replica_groups,
@@ -1459,9 +1457,9 @@ class HloInstruction {
   // Delegates to HloCollectivePermuteInstruction::source_target_pairs.
   const std::vector<std::pair<int64, int64>>& source_target_pairs() const;
 
-  // Delegates to HloAllReduceInstruction::cross_replica_sum_barrier.
-  string cross_replica_sum_barrier() const;
-  void set_cross_replica_sum_barrier(const string& barrier);
+  // Delegates to HloAllReduceInstruction::all_reduce_barrier.
+  string all_reduce_barrier() const;
+  void set_all_reduce_barrier(const string& barrier);
 
   // Delegates to HloAllReduceInstruction::all_reduce_id.
   absl::optional<int64> all_reduce_id() const;
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index a47d33314a..756e260b60 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -363,9 +363,9 @@ HloAllReduceInstruction::HloAllReduceInstruction(
     HloComputation* reduce_computation,
     const std::vector<ReplicaGroup>& replica_groups, absl::string_view barrier,
     const absl::optional<int64>& all_reduce_id)
-    : HloCollectiveInstruction(HloOpcode::kCrossReplicaSum, shape, operands,
+    : HloCollectiveInstruction(HloOpcode::kAllReduce, shape, operands,
                                replica_groups),
-      cross_replica_sum_barrier_(barrier),
+      all_reduce_barrier_(barrier),
       all_reduce_id_(all_reduce_id) {
   AppendComputation(reduce_computation);
 }
@@ -381,7 +381,7 @@ HloInstructionProto HloAllReduceInstruction::ToProto() const {
   if (all_reduce_id_) {
     proto.set_all_reduce_id(*all_reduce_id_);
   }
-  proto.set_cross_replica_sum_barrier(cross_replica_sum_barrier_);
+  proto.set_all_reduce_barrier(all_reduce_barrier_);
   return proto;
 }
 
@@ -389,8 +389,8 @@ std::vector<string> HloAllReduceInstruction::ExtraAttributesToStringImpl(
     const HloPrintOptions& options) const {
   std::vector<string> result =
       HloCollectiveInstruction::ExtraAttributesToStringImpl(options);
-  if (!cross_replica_sum_barrier().empty()) {
-    result.push_back(StrCat("barrier=\"", cross_replica_sum_barrier(), "\""));
+  if (!all_reduce_barrier().empty()) {
+    result.push_back(StrCat("barrier=\"", all_reduce_barrier(), "\""));
   }
   if (all_reduce_id_) {
     result.push_back(StrCat("all_reduce_id=", *all_reduce_id_));
@@ -405,8 +405,7 @@ bool HloAllReduceInstruction::IdenticalSlowPath(
   const auto& casted_other = static_cast<const HloAllReduceInstruction&>(other);
   return HloCollectiveInstruction::IdenticalSlowPath(other, eq_computations) &&
          eq_computations(to_apply(), casted_other.to_apply()) &&
-         cross_replica_sum_barrier() ==
-             casted_other.cross_replica_sum_barrier() &&
+         all_reduce_barrier() == casted_other.all_reduce_barrier() &&
          all_reduce_id() == casted_other.all_reduce_id();
 }
 
@@ -415,8 +414,8 @@ HloAllReduceInstruction::CloneWithNewOperandsImpl(
     const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* /*context*/) const {
   return absl::make_unique<HloAllReduceInstruction>(
-      shape, new_operands, to_apply(), replica_groups(),
-      cross_replica_sum_barrier(), all_reduce_id());
+      shape, new_operands, to_apply(), replica_groups(), all_reduce_barrier(),
+      all_reduce_id());
 }
 
 HloAllToAllInstruction::HloAllToAllInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index d875b34b62..ca212c7f2c 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -242,14 +242,10 @@ class HloAllReduceInstruction : public HloCollectiveInstruction {
       const std::vector<ReplicaGroup>& replica_groups,
       absl::string_view barrier, const absl::optional<int64>& all_reduce_id);
 
-  // Returns the barrier config used for the CrossReplicaSum implementation of
+  // Returns the barrier config used for the AllReduce implementation of
   // each backend.
-  string cross_replica_sum_barrier() const {
-    return cross_replica_sum_barrier_;
-  }
-  void set_cross_replica_sum_barrier(string barrier) {
-    cross_replica_sum_barrier_ = barrier;
-  }
+  string all_reduce_barrier() const { return all_reduce_barrier_; }
+  void set_all_reduce_barrier(string barrier) { all_reduce_barrier_ = barrier; }
 
   absl::optional<int64> all_reduce_id() const { return all_reduce_id_; }
   void set_all_reduce_id(const absl::optional<int64>& all_reduce_id);
@@ -270,8 +266,8 @@ class HloAllReduceInstruction : public HloCollectiveInstruction {
       const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
-  // The string representation of the barrier config used for CrossReplicaSum.
-  string cross_replica_sum_barrier_;
+  // The string representation of the barrier config used for AllReduce.
+  string all_reduce_barrier_;
 
   // For Allreduce nodes from different modules, if they have the same
   // all_reduce_id, they will be 'Allreduce'd. If empty, Allreduce will not be
diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h
index 1fbcbdf98d..67488a6a9a 100644
--- a/tensorflow/compiler/xla/service/hlo_matchers.h
+++ b/tensorflow/compiler/xla/service/hlo_matchers.h
@@ -178,7 +178,7 @@ HLO_MATCHER(Constant);
 HLO_MATCHER(Convert);
 HLO_MATCHER(Convolution);
 HLO_MATCHER(Copy);
-HLO_MATCHER(CrossReplicaSum);
+HLO_MATCHER(AllReduce);
 HLO_MATCHER(CollectivePermute);
 HLO_MATCHER(Divide);
 HLO_MATCHER(Domain);
diff --git a/tensorflow/compiler/xla/service/hlo_opcode.h b/tensorflow/compiler/xla/service/hlo_opcode.h
index 127cfd165a..94122ac38f 100644
--- a/tensorflow/compiler/xla/service/hlo_opcode.h
+++ b/tensorflow/compiler/xla/service/hlo_opcode.h
@@ -49,6 +49,7 @@ namespace xla {
   V(kAdd, "add")                                             \
   V(kAddDependency, "add-dependency")                        \
   V(kAfterAll, "after-all", kHloOpcodeIsVariadic)            \
+  V(kAllReduce, "all-reduce")                                \
   V(kAllToAll, "all-to-all")                                 \
   V(kAtan2, "atan2")                                         \
   V(kBatchNormGrad, "batch-norm-grad")                       \
@@ -70,7 +71,6 @@ namespace xla {
   V(kConvolution, "convolution")                             \
   V(kCopy, "copy")                                           \
   V(kCos, "cosine")                                          \
-  V(kCrossReplicaSum, "cross-replica-sum")                   \
   V(kCustomCall, "custom-call")                              \
   V(kDivide, "divide")                                       \
   V(kDomain, "domain")                                       \
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index 51f7d943a2..44643951c1 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -767,7 +767,7 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder,
           HloInstruction::CreateBitcastConvert(shape, operands[0]));
       break;
     }
-    case HloOpcode::kCrossReplicaSum: {
+    case HloOpcode::kAllReduce: {
       optional<std::vector<std::vector<int64>>> tmp_groups;
       optional<HloComputation*> to_apply;
       optional<std::vector<int64>> replica_group_ids;
@@ -787,10 +787,9 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder,
       if (tmp_groups) {
         replica_groups = CreateReplicaGroups(*tmp_groups);
       }
-      instruction =
-          builder->AddInstruction(HloInstruction::CreateCrossReplicaSum(
-              shape, operands, *to_apply, replica_groups,
-              barrier ? *barrier : "", all_reduce_id));
+      instruction = builder->AddInstruction(HloInstruction::CreateAllReduce(
+          shape, operands, *to_apply, replica_groups, barrier ? *barrier : "",
+          all_reduce_id));
       break;
     }
     case HloOpcode::kAllToAll: {
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 80882d490d..ef31cec327 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1117,9 +1117,9 @@ ENTRY Gather {
 
 )"
 },
-// cross-replica-sum
+// all-reduce
 {
-"CrossReplicaSum",
+"AllReduce",
 R"(HloModule CRS
 
 add {
@@ -1130,14 +1130,14 @@ add {
 
 ENTRY CRS {
   input = f32[8]{0} parameter(0)
-  ROOT crs = f32[8]{0} cross-replica-sum(input), replica_groups={}, to_apply=add
+  ROOT crs = f32[8]{0} all-reduce(input), replica_groups={}, to_apply=add
 }
 
 )"
 },
-// cross-replica-sum with subgroups
+// all-reduce with subgroups
 {
-"CrossReplicaSumWithSubgroups",
+"AllReduceWithSubgroups",
 R"(HloModule CRS_Subgroups
 
 add {
@@ -1146,16 +1146,16 @@ add {
   ROOT add = f32[] add(lhs, rhs)
 }
 
-ENTRY CrossReplicaSumWithSubgroups {
+ENTRY AllReduceWithSubgroups {
   input = f32[128,32]{0,1} parameter(0)
-  ROOT cross-replica-sum = f32[128,32]{0,1} cross-replica-sum(input), replica_groups={{0,1},{2,3}}, barrier="abc", to_apply=add
+  ROOT all-reduce = f32[128,32]{0,1} all-reduce(input), replica_groups={{0,1},{2,3}}, barrier="abc", to_apply=add
 }
 
 )"
 },
-// cross-replica-sum with all-reduce-id
+// all-reduce with all-reduce-id
 {
-"CrossReplicaSumAllReduce",
+"AllReduceAllReduce",
 R"(HloModule CRS
 
 add {
@@ -1166,8 +1166,8 @@ add {
 
 ENTRY CRS {
   input = f32[8]{0} parameter(0)
-  crs.1 = f32[8]{0} cross-replica-sum(input), replica_groups={{0}}, all_reduce_id=1, to_apply=add
-  ROOT crs.0 = f32[8]{0} cross-replica-sum(input), replica_groups={{0}}, all_reduce_id=1, to_apply=add
+  crs.1 = f32[8]{0} all-reduce(input), replica_groups={{0}}, all_reduce_id=1, to_apply=add
+  ROOT crs.0 = f32[8]{0} all-reduce(input), replica_groups={{0}}, all_reduce_id=1, to_apply=add
 }
 
 )"
diff --git a/tensorflow/compiler/xla/service/hlo_reachability.cc b/tensorflow/compiler/xla/service/hlo_reachability.cc
index 4aa8067752..edaa4c59e2 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.cc
+++ b/tensorflow/compiler/xla/service/hlo_reachability.cc
@@ -93,7 +93,7 @@ std::unique_ptr<HloReachabilityMap> HloReachabilityMap::Build(
         }
         break;
       }
-      case HloOpcode::kCrossReplicaSum: {
+      case HloOpcode::kAllReduce: {
         auto all_reduce_id = hlo->all_reduce_id();
         if (all_reduce_id) {
           auto it = channel_dependency_map.find(all_reduce_id.value());
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 48add75523..ac74e2432f 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -63,7 +63,7 @@ bool IsRematerializable(const HloInstruction* instruction) {
     case HloOpcode::kCall:
     case HloOpcode::kConstant:
     case HloOpcode::kConditional:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kCustomCall:
     case HloOpcode::kParameter:
     case HloOpcode::kWhile:
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 3a5d5d17a0..e1c737132f 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -44,7 +44,7 @@ bool IsCallerInstruction(HloInstruction* hlo) {
     case HloOpcode::kCall:
     case HloOpcode::kConditional:
     case HloOpcode::kWhile:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kMap:
     case HloOpcode::kReduce:
     case HloOpcode::kReduceWindow:
@@ -167,13 +167,12 @@ Status ShapeVerifier::HandleFft(HloInstruction* fft) {
   return CheckShape(fft, expected);
 }
 
-Status ShapeVerifier::HandleCrossReplicaSum(HloInstruction* crs) {
+Status ShapeVerifier::HandleAllReduce(HloInstruction* crs) {
   std::vector<const Shape*> operand_shapes;
   for (const HloInstruction* operand : crs->operands()) {
     operand_shapes.push_back(&operand->shape());
   }
-  return CheckShape(crs,
-                    ShapeInference::InferCrossReplicaSumShape(operand_shapes));
+  return CheckShape(crs, ShapeInference::InferAllReduceShape(operand_shapes));
 }
 
 Status ShapeVerifier::HandleAllToAll(HloInstruction* hlo) {
@@ -685,7 +684,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) {
     case HloOpcode::kCall:
     case HloOpcode::kConditional:
     case HloOpcode::kConstant:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kCustomCall:
     case HloOpcode::kDomain:
     case HloOpcode::kFusion:
@@ -1346,7 +1345,7 @@ class InstructionVerifier : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
-  Status HandleCrossReplicaSum(HloInstruction* crs) override {
+  Status HandleAllReduce(HloInstruction* crs) override {
     if (crs->all_reduce_id().has_value()) {
       TF_RET_CHECK(crs->all_reduce_id().value() > 0)
           << "All reduce id must be greater than 0 for "
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h
index e4d0c3d695..a1a6aba972 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.h
+++ b/tensorflow/compiler/xla/service/hlo_verifier.h
@@ -52,7 +52,7 @@ class ShapeVerifier : public DfsHloVisitor {
   Status HandleDot(HloInstruction* dot) override;
   Status HandleConvolution(HloInstruction* convolution) override;
   Status HandleFft(HloInstruction* fft) override;
-  Status HandleCrossReplicaSum(HloInstruction* crs) override;
+  Status HandleAllReduce(HloInstruction* crs) override;
   Status HandleAllToAll(HloInstruction* hlo) override;
   Status HandleCollectivePermute(HloInstruction* hlo) override;
   Status HandleReducePrecision(HloInstruction* reduce_precision) override;
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 3ea0b81d0d..0744871529 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -127,7 +127,7 @@ bool IsAlwaysDuplicable(const HloInstruction& instruction) {
     case HloOpcode::kCall:
     case HloOpcode::kConditional:
     case HloOpcode::kConvolution:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
     case HloOpcode::kCustomCall:
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index eddef850cf..b9ddd9636f 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -2012,7 +2012,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
     case HloOpcode::kConditional:
     case HloOpcode::kConvert:
     case HloOpcode::kCos:
-    case HloOpcode::kCrossReplicaSum:
+    case HloOpcode::kAllReduce:
     case HloOpcode::kAllToAll:
     case HloOpcode::kCollectivePermute:
     case HloOpcode::kDivide:
diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc
index 9fe8c3accb..31d78752f0 100644
--- a/tensorflow/compiler/xla/service/layout_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc
@@ -894,11 +894,11 @@ TEST_F(LayoutAssignmentTest, AllReduceLayoutMissmatch) {
     ENTRY entry_computation {
       param = (f32[2,2]) parameter(0)
       gte = f32[2,2] get-tuple-element(param), index=0
-      ar.0 = f32[2,2] cross-replica-sum(gte),
+      ar.0 = f32[2,2] all-reduce(gte),
         all_reduce_id=1, replica_groups={{0}}, to_apply=add,
         sharding={maximal device=0}
       const = f32[2,2] constant({{0,1},{2,3}})
-      ROOT ar.1 = f32[2,2] cross-replica-sum(const),
+      ROOT ar.1 = f32[2,2] all-reduce(const),
         all_reduce_id=1, replica_groups={{0}}, to_apply=add,
         sharding={maximal device=1}
     })";
diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h
index 81db3bb643..fdb6a9b01b 100644
--- a/tensorflow/compiler/xla/service/pattern_matcher.h
+++ b/tensorflow/compiler/xla/service/pattern_matcher.h
@@ -2036,7 +2036,7 @@ XLA_UNOP_PATTERN(Ceil)
 XLA_UNOP_PATTERN(Convert)
 XLA_UNOP_PATTERN(Copy)
 XLA_UNOP_PATTERN(Cos)
-XLA_UNOP_PATTERN(CrossReplicaSum)
+XLA_UNOP_PATTERN(AllReduce)
 XLA_UNOP_PATTERN(Exp)
 XLA_UNOP_PATTERN(Fft)
 XLA_UNOP_PATTERN(Floor)
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index e6e118d0d2..8e571675c7 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -1833,7 +1833,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 #undef RET_CHECK_RANK
 }
 
-/* static */ StatusOr<Shape> ShapeInference::InferCrossReplicaSumShape(
+/* static */ StatusOr<Shape> ShapeInference::InferAllReduceShape(
     absl::Span<const Shape* const> operand_shapes) {
   for (const Shape* operand_shape : operand_shapes) {
     TF_RETURN_IF_ERROR(
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 9f56415599..1b8fd10d69 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -118,7 +118,7 @@ class ShapeInference {
 
   // Infers the shape produced by a cross replica sum with the given operand
   // shapes.
-  static StatusOr<Shape> InferCrossReplicaSumShape(
+  static StatusOr<Shape> InferAllReduceShape(
       absl::Span<const Shape* const> operand_shapes);
 
   // Infers final shape of an Alltoall operation that is created by the xla
diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD
index face72a066..ee24d4d99c 100644
--- a/tensorflow/compiler/xla/tests/BUILD
+++ b/tensorflow/compiler/xla/tests/BUILD
@@ -1635,8 +1635,8 @@ xla_test(
 )
 
 xla_test(
-    name = "cross_replica_sum_test",
-    srcs = ["cross_replica_sum_test.cc"],
+    name = "all_reduce_test",
+    srcs = ["all_reduce_test.cc"],
     deps = [
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:shape_util",
diff --git a/tensorflow/compiler/xla/tests/cross_replica_sum_test.cc b/tensorflow/compiler/xla/tests/all_reduce_test.cc
similarity index 94%
rename from tensorflow/compiler/xla/tests/cross_replica_sum_test.cc
rename to tensorflow/compiler/xla/tests/all_reduce_test.cc
index 410732c07b..7e695f829e 100644
--- a/tensorflow/compiler/xla/tests/cross_replica_sum_test.cc
+++ b/tensorflow/compiler/xla/tests/all_reduce_test.cc
@@ -41,7 +41,7 @@ XLA_TEST_F(TrivialCrossReplicaSumTest, OneOperand) {
 
   ENTRY test_computation {
     p = f32[3] parameter(0)
-    ROOT crs = f32[3] cross-replica-sum(p), to_apply=add
+    ROOT crs = f32[3] all-reduce(p), to_apply=add
   })";
   auto module =
       ParseHloString(module_str, GetModuleConfigForTest()).ValueOrDie();
@@ -62,7 +62,7 @@ XLA_TEST_F(TrivialCrossReplicaSumTest, MultipleOperands) {
   ENTRY test_computation {
     p0 = f32[3] parameter(0)
     p1 = f32[2] parameter(1)
-    ROOT crs = (f32[3], f32[2]) cross-replica-sum(p0, p1), to_apply=add
+    ROOT crs = (f32[3], f32[2]) all-reduce(p0, p1), to_apply=add
   })";
   auto module =
       ParseHloString(module_str, GetModuleConfigForTest()).ValueOrDie();
@@ -88,7 +88,7 @@ XLA_TEST_F(TrivialCrossReplicaSumTest, ConstantOperand) {
   ENTRY test_computation {
     p0 = f32[3] parameter(0)
     p1 = f32[2] constant({10, 20})
-    ROOT crs = (f32[3], f32[2]) cross-replica-sum(p0, p1), to_apply=add
+    ROOT crs = (f32[3], f32[2]) all-reduce(p0, p1), to_apply=add
   })";
   auto module =
       ParseHloString(module_str, GetModuleConfigForTest()).ValueOrDie();
-- 
GitLab


From e4d5cde00cde622794cfd375f19e1556870607d0 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Mon, 17 Dec 2018 22:10:16 -0800
Subject: [PATCH 730/873] Add test for xla.estimator_model_fn.

PiperOrigin-RevId: 225941631
---
 tensorflow/contrib/compiler/BUILD       |  16 +-
 tensorflow/contrib/compiler/xla_test.py | 340 ++++++++++++++++++++++++
 2 files changed, 352 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/compiler/BUILD b/tensorflow/contrib/compiler/BUILD
index e4566437c6..e32097cedd 100644
--- a/tensorflow/contrib/compiler/BUILD
+++ b/tensorflow/contrib/compiler/BUILD
@@ -70,22 +70,30 @@ py_library(
     ],
 )
 
-tf_py_test(
+cuda_py_test(
     name = "xla_test",
     srcs = ["xla_test.py"],
     additional_deps = [
         ":xla",
-        "@six_archive//:six",
+        "@absl_py//absl/testing:parameterized",
+        "//tensorflow/compiler/tests:xla_test",
+        "//tensorflow/contrib/tpu:tpu_estimator",
+        "//tensorflow/contrib/tpu:tpu_lib",
+        "//tensorflow/python:client_testlib",
         "//tensorflow/python:constant_op",
         "//tensorflow/python:control_flow_ops",
         "//tensorflow/python:control_flow_util",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform",
-        "//tensorflow/contrib/tpu:tpu_lib",
         "//tensorflow/python:state_ops",
         "//tensorflow/python:summary",
         "//tensorflow/python:training",
         "//tensorflow/python:variable_scope",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+    tags = [
+        "no_mac",
+        "no_windows",
     ],
-    tags = ["no_pip"],
+    xla_enabled = True,
 )
diff --git a/tensorflow/contrib/compiler/xla_test.py b/tensorflow/contrib/compiler/xla_test.py
index 3b49755afc..a85b2dd155 100644
--- a/tensorflow/contrib/compiler/xla_test.py
+++ b/tensorflow/contrib/compiler/xla_test.py
@@ -18,11 +18,19 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import re
+from absl.testing import parameterized
+
 from tensorflow.contrib.compiler import xla
+from tensorflow.contrib.tpu.python.tpu import tpu_estimator
 from tensorflow.contrib.tpu.python.tpu import tpu_feed
+from tensorflow.contrib.training.python.training import hparam
 from tensorflow.python import summary
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import logging_ops
@@ -30,6 +38,14 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import test
+from tensorflow.python.training import training
+
+
+_TRAIN = model_fn_lib.ModeKeys.TRAIN
+_EVAL = model_fn_lib.ModeKeys.EVAL
+_EXPECTED_LOSS = 1
+_EXPECTED_FEATURE = 2
+_EXPECTED_LABEL = 3
 
 
 class XLACompileContextTest(test.TestCase):
@@ -252,5 +268,329 @@ class CheckFunctionArgumentCountTest(test.TestCase):
                      xla.check_function_argument_count(func, 0, queue))
 
 
+def _test_train_model_fn(features, labels, mode, params):
+  """A dummy model_fn for testing purpose."""
+  del features, labels, params
+  loss = constant_op.constant(_EXPECTED_LOSS)
+  return model_fn_lib.EstimatorSpec(
+      mode=mode, loss=loss, train_op=array_ops.identity(loss))
+
+
+@xla.estimator_model_fn
+def decorated_model_fn(features, labels, mode, params):
+  return _test_train_model_fn(features, labels, mode, params)
+
+
+def make_dummy_features_labels():
+  # XLA CPU/GPU backend doesn't support guaranteed constant, thus use dataset
+  # container to work around.
+  features_dataset = dataset_ops.Dataset.from_tensors(
+      constant_op.constant(_EXPECTED_FEATURE)).repeat(10)
+  features_op = features_dataset.make_one_shot_iterator().get_next()
+  labels_dataset = dataset_ops.Dataset.from_tensors(
+      constant_op.constant(_EXPECTED_LABEL)).repeat(10)
+  labels_op = labels_dataset.make_one_shot_iterator().get_next()
+  return features_op, labels_op
+
+
+class XlaDecoratorTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('test_use_as_decorator', decorated_model_fn, None),
+      ('test_use_as_function', xla.estimator_model_fn(_test_train_model_fn),
+       None),
+      ('test_use_tpu_false_hparams', decorated_model_fn,
+       hparam.HParams(use_tpu=False)),
+      ('test_use_tpu_false_dict_params', decorated_model_fn, {
+          'use_tpu': False
+      }),
+  )
+  def test_compile(self, model_fn, params):
+    """Calls model_fn and verifies it is compiled."""
+    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      mock_xla_compile.return_value = [loss]
+
+      features, labels = make_dummy_features_labels()
+      estimator_spec = model_fn(
+          features=features, labels=labels, mode=_TRAIN, params=params or {})
+
+      mock_xla_compile.assert_called_once()
+      self.assertEqual(estimator_spec.mode, _TRAIN)
+
+      with self.test_session() as sess:
+        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
+        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))
+
+  @parameterized.named_parameters(
+      ('test_use_tpu_true_hparams', decorated_model_fn,
+       hparam.HParams(use_tpu=True)),
+      ('test_use_tpu_true_dict_params', decorated_model_fn, {
+          'use_tpu': True
+      }),
+  )
+  def test_not_compile(self, model_fn, params):
+    """Calls model_fn and verifies it is NOT compiled."""
+    with test.mock.patch.object(xla, 'compile') as mock_xla_compile:
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      mock_xla_compile.return_value = [loss]
+
+      features, labels = make_dummy_features_labels()
+      estimator_spec = model_fn(
+          features=features, labels=labels, mode=_TRAIN, params=params or {})
+
+      mock_xla_compile.assert_not_called()
+      self.assertEqual(estimator_spec.mode, _TRAIN)
+
+      with self.test_session() as sess:
+        self.assertEqual(sess.run(estimator_spec.loss), sess.run(loss))
+        self.assertEqual(sess.run(estimator_spec.train_op), sess.run(loss))
+
+  def test_model_with_summary(self):
+    """Tests that summary ops are disabled."""
+
+    @xla.estimator_model_fn
+    def model_fn_with_summary(features, labels, mode, params):
+      del features, labels, params
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      summary.scalar('loss_scalar_summary', loss)
+      summary.histogram('loss_histogram_summary', loss)
+      summary.image('loss_image_summary', loss)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode, loss=loss, train_op=array_ops.identity(loss))
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = model_fn_with_summary(
+        features=features, labels=labels, mode=_TRAIN, params={})
+
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+
+def _test_eval_metric_fn(eval_tensor_1, eval_tensor_2):
+  return {
+      'metric_1': (eval_tensor_1, eval_tensor_1),
+      'metric_2': (eval_tensor_2, eval_tensor_2),
+  }
+
+
+class XlaDecoratorEvaluationTest(test.TestCase):
+
+  def _verify_evaluation_result(self, eval_model_fn):
+    features, labels = make_dummy_features_labels()
+    estimator_spec = eval_model_fn(
+        features=features, labels=labels, mode=_EVAL, params={})
+
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_1'][0]),
+          _EXPECTED_FEATURE + _EXPECTED_LABEL)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_1'][1]),
+          _EXPECTED_FEATURE + _EXPECTED_LABEL)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_2'][0]),
+          _EXPECTED_FEATURE - _EXPECTED_LABEL)
+      self.assertEqual(
+          sess.run(estimator_spec.eval_metric_ops['metric_2'][1]),
+          _EXPECTED_FEATURE - _EXPECTED_LABEL)
+
+  def test_eval_base_estimator_spec_eval_metric_ops_disallowed(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn_return_estimator_spec(features, labels, mode, params):
+      del features, labels, params
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      return model_fn_lib.EstimatorSpec(
+          mode=mode,
+          loss=loss,
+          eval_metric_ops={
+              'metric': (array_ops.identity(loss), control_flow_ops.no_op())
+          })
+
+    with self.assertRaisesRegexp(
+        ValueError, 'EstimatorSpec.eval_metric_ops is not supported with XLA '
+        'compilation. Please use TPUEstimatorSpec.eval_metrics instead.'):
+      self._verify_evaluation_result(eval_model_fn_return_estimator_spec)
+
+  def test_eval_base_estimator_spec_no_eval_metric_ops(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn_no_eval_metric_ops(features, labels, mode, params):
+      del features, labels, params
+      return model_fn_lib.EstimatorSpec(
+          mode=mode, loss=constant_op.constant(_EXPECTED_LOSS))
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = eval_model_fn_no_eval_metric_ops(
+        features=features, labels=labels, mode=_EVAL, params={})
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+  def test_eval_no_eval_metrics(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn_no_eval_metrics(features, labels, mode, params):
+      del features, labels, params
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode, loss=constant_op.constant(_EXPECTED_LOSS))
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = eval_model_fn_no_eval_metrics(
+        features=features, labels=labels, mode=_EVAL, params={})
+
+    self.assertEqual(estimator_spec.eval_metric_ops, {})
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+  def test_eval_fn_missing_input_tensor(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors_dict = {
+          'eval_tensor_1': features + labels,
+      }
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn,
+                        dummy_eval_metric_fn_tensors_dict))
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        re.escape("Arguments ['eval_tensor_2'] are needed by metric_fn (first "
+                  'element of TPUEstimatorSpec.eval_metrics) but they are not '
+                  'provided by evaluation tensors (second element of '
+                  'TPUEstimatorSpec.eval_metrics).')):
+      self._verify_evaluation_result(eval_model_fn)
+
+  def test_eval_fn_extraneous_input_tensor(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors_dict = {
+          'eval_tensor_1': features + labels,
+          'eval_tensor_2': features - labels,
+          'extra_tensor': features * 2 - labels,
+      }
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn,
+                        dummy_eval_metric_fn_tensors_dict))
+
+    with self.assertRaisesRegexp(
+        ValueError,
+        re.escape("Arguments ['extra_tensor'] are provided by evaluation "
+                  'tensors (second element of TPUEstimatorSpec.eval_metrics) '
+                  'but they are not needed by metric_fn (first element of '
+                  'TPUEstimatorSpec.eval_metrics).')):
+      self._verify_evaluation_result(eval_model_fn)
+
+  def test_eval_tensors_as_list(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors = [features + labels, features - labels]
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn, dummy_eval_metric_fn_tensors))
+
+    self._verify_evaluation_result(eval_model_fn)
+
+  def test_eval_tensors_as_dict(self):
+
+    @xla.estimator_model_fn
+    def eval_model_fn(features, labels, mode, params):
+      del params
+      dummy_eval_metric_fn_tensors_dict = {
+          'eval_tensor_1': features + labels,
+          'eval_tensor_2': features - labels,
+      }
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          eval_metrics=(_test_eval_metric_fn,
+                        dummy_eval_metric_fn_tensors_dict))
+
+    self._verify_evaluation_result(eval_model_fn)
+
+  def test_model_with_summary(self):
+    """Tests that summary ops are disabled."""
+
+    @xla.estimator_model_fn
+    def model_fn_with_summary(features, labels, mode, params):
+      del features, labels, params
+      loss = constant_op.constant(_EXPECTED_LOSS)
+      summary.scalar('loss_scalar_summary', loss)
+      summary.histogram('loss_histogram_summary', loss)
+      summary.image('loss_image_summary', loss)
+      return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=loss)
+
+    features, labels = make_dummy_features_labels()
+    estimator_spec = model_fn_with_summary(
+        features=features, labels=labels, mode=_EVAL, params={})
+
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(estimator_spec.loss), _EXPECTED_LOSS)
+
+
+class XlaDecoratorScaffoldTest(test.TestCase, parameterized.TestCase):
+
+  def _make_scaffold_fn(self, mode):
+
+    def _scaffold_fn_on_cpu():
+      scaffold = training.Scaffold()
+      self.assertNotIn(mode, self.is_scaffold_fn_called)
+      self.is_scaffold_fn_called[mode] = True
+      return scaffold
+
+    return _scaffold_fn_on_cpu
+
+  def test_scaffold_fn_return_none(self):
+
+    @xla.estimator_model_fn
+    def model_fn(features, labels, mode, params):
+      del features, labels, params
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          train_op=control_flow_ops.no_op(),
+          scaffold_fn=lambda: None)
+
+    features, labels = make_dummy_features_labels()
+    with self.assertRaisesRegexp(
+        ValueError,
+        'TPUEstimatorSpec.scaffold_fn returns None, which is not allowed'):
+      model_fn(features=features, labels=labels, mode=_TRAIN, params={})
+
+  @parameterized.named_parameters(
+      ('train_mode', _TRAIN),
+      ('eval_mode', _EVAL),
+      # TODO(ycao): Add predict_mode test after PREDICT mode is implemented.
+  )
+  def test_scaffold_fn_in_mode(self, mode):
+
+    @xla.estimator_model_fn
+    def model_fn(features, labels, mode, params):
+      del features, labels, params
+      return tpu_estimator.TPUEstimatorSpec(
+          mode=mode,
+          loss=constant_op.constant(_EXPECTED_LOSS),
+          train_op=control_flow_ops.no_op(),
+          scaffold_fn=self._make_scaffold_fn(mode))
+
+    features, labels = make_dummy_features_labels()
+
+    self.is_scaffold_fn_called = {}
+    model_fn(features=features, labels=labels, mode=mode, params={})
+    self.assertTrue(self.is_scaffold_fn_called[mode])
+
+
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From a4bb2859cfa8f92fc0318b70116eb7cd02eaaae8 Mon Sep 17 00:00:00 2001
From: Blake Hechtman <blakehechtman@google.com>
Date: Mon, 17 Dec 2018 23:48:11 -0800
Subject: [PATCH 731/873] [XLA] Convert reduce windows that are actually
 reduces into reduce and reshape.

PiperOrigin-RevId: 225948274
---
 .../xla/service/algebraic_simplifier.cc       | 48 +++++++++++++++++++
 .../xla/service/algebraic_simplifier_test.cc  | 48 +++++++++++++++++--
 2 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 1287dcf546..9e453203ce 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -43,6 +43,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/service/hlo_query.h"
 #include "tensorflow/compiler/xla/service/pattern_matcher.h"
+#include "tensorflow/compiler/xla/shape.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
@@ -2596,6 +2597,53 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow(
                                   function));
   }
 
+  // A reduce window can be expressed as a reduce and a reshape if all
+  // dimensions either have a window size of one or the entire dimension. If
+  // there is no stride, dilation, or padding, this is as easy as checking the
+  // size of the output shape and window dimension.
+  //
+  // The reshape is a bitcast since it adds one-sized dimensions. Often these
+  // ones are immediately removed as well with another reshape. The
+  // implementation of reduce tends to be slightly more efficient at reducing
+  // entire dimensions compared to reduce window.
+  auto effective_reduce_dims = [&] {
+    if (window_util::HasStride(window) || window_util::HasDilation(window) ||
+        window_util::HasPadding(window)) {
+      return absl::InlinedVector<int64, 8>{};
+    }
+    absl::InlinedVector<int64, 8> reduce_dims;
+    for (int64 i = 0; i < window.dimensions_size(); ++i) {
+      if (window.dimensions(i).size() == 1) {
+        continue;
+      } else if (reduce_window->shape().dimensions(i) == 1) {
+        reduce_dims.push_back(i);
+      } else {
+        return absl::InlinedVector<int64, 8>{};
+      }
+    }
+    return reduce_dims;
+  }();
+
+  // If a reduce window can be expressed as a reduce, do so and reshape the
+  // output.
+  if (!effective_reduce_dims.empty()) {
+    Shape reduce_shape = ShapeUtil::FilterDimensions(
+        [&](int64 dim) {
+          return !absl::c_linear_search(effective_reduce_dims, dim);
+        },
+        reduce_window->shape());
+    HloInstruction* reduce =
+        computation_->AddInstruction(HloInstruction::CreateReduce(
+            /*shape=*/reduce_shape,
+            /*operand=*/operand,
+            /*init_value=*/reduce_window->mutable_operand(1),
+            /*dimensions_to_reduce=*/effective_reduce_dims,
+            /*reduce_computation=*/function));
+    return ReplaceWithNewInstruction(
+        reduce_window,
+        HloInstruction::CreateReshape(reduce_window->shape(), reduce));
+  }
+
   // This optimization folds a pad op into reduce_window.
   HloInstruction* pad;
   const HloInstruction* convert = nullptr;
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index e6015370aa..a9d617cbf6 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -1283,6 +1283,51 @@ TEST_F(AlgebraicSimplifierTest, ZeroSizedConvolution) {
               GmockMatch(m::Broadcast(m::Constant())));
 }
 
+TEST_F(AlgebraicSimplifierTest, ReduceWindowIsReduceAndReshape) {
+  auto m = CreateNewVerifiedModule();
+  auto builder = HloComputation::Builder(TestName());
+  HloInstruction* param =
+      builder.AddInstruction(HloInstruction::CreateParameter(
+          0, ShapeUtil::MakeShape(F32, {1, 2, 3, 4}), "param"));
+  Window window;
+  for (int64 i = 0; i < 4; ++i) {
+    WindowDimension* dim = window.add_dimensions();
+    // Makes 1x2x3x1 window.
+    dim->set_size((i % 3) + 1);
+    dim->set_stride(1);
+    dim->set_padding_low(0);
+    dim->set_padding_high(0);
+    dim->set_window_dilation(1);
+    dim->set_base_dilation(1);
+  }
+  // Create add computation.
+  HloComputation* add_computation = nullptr;
+  {
+    HloComputation::Builder builder(TestName() + ".add");
+    const Shape scalar_shape = ShapeUtil::MakeShape(F32, {});
+    HloInstruction* p0 = builder.AddInstruction(
+        HloInstruction::CreateParameter(0, scalar_shape, "p0"));
+    HloInstruction* p1 = builder.AddInstruction(
+        HloInstruction::CreateParameter(1, scalar_shape, "p1"));
+    builder.AddInstruction(
+        HloInstruction::CreateBinary(scalar_shape, HloOpcode::kAdd, p0, p1));
+    add_computation = m->AddEmbeddedComputation(builder.Build());
+  }
+  builder.AddInstruction(HloInstruction::CreateReduceWindow(
+      ShapeUtil::MakeShape(F32, {1, 1, 1, 4}), param,
+      builder.AddInstruction(
+          HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(0.0f))),
+      window, add_computation));
+  m->AddEntryComputation(builder.Build());
+  HloPassFix<AlgebraicSimplifier> simplifier(default_options_);
+  EXPECT_THAT(m->entry_computation()->root_instruction(),
+              GmockMatch(m::ReduceWindow(m::Parameter(0), m::Constant())));
+  ASSERT_TRUE(simplifier.Run(m.get()).ValueOrDie());
+  EXPECT_THAT(
+      m->entry_computation()->root_instruction(),
+      GmockMatch(m::Reshape(m::Reduce(m::Parameter(0), m::Constant()))));
+}
+
 TEST_F(AlgebraicSimplifierTest, ZeroSizedReduceWindow) {
   auto m = CreateNewVerifiedModule();
   auto builder = HloComputation::Builder(TestName());
@@ -4087,9 +4132,6 @@ PadReduceWindowEffectiveBroadcastCases() {
       {/*input_spatials=*/{2, 2}, /*symmetric_pad_amount=*/{6, 6},
        /*reduce_window_spatials=*/{7, 7}, /*prepend_a=*/true,
        /*should_become_broadcast=*/false},  //
-      {/*input_spatials=*/{1, 1}, /*symmetric_pad_amount=*/{2, 2},
-       /*reduce_window_spatials=*/{5, 5}, /*prepend_a=*/true,
-       /*should_become_broadcast=*/true},  //
       {/*input_spatials=*/{1, 1}, /*symmetric_pad_amount=*/{2, 2},
        /*reduce_window_spatials=*/{1, 1}, /*prepend_a=*/true,
        /*should_become_broadcast=*/false},  //
-- 
GitLab


From 263bf87f4f0f3fc35295555296badf928d37b7dd Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 18 Dec 2018 00:45:06 -0800
Subject: [PATCH 732/873] tf.saved_model.save: Fix saving non-hashable data
 structures

For object_map we don't want to hash the contents of anything, we want object-identity. Even if non-hashable things don't end up in object_map, we still need to be able to check for them.

PiperOrigin-RevId: 225953468
---
 tensorflow/python/saved_model/save.py         |  2 +-
 tensorflow/python/saved_model/save_test.py    | 21 +++++++++++++++++++
 .../python/training/checkpointable/util.py    | 16 +++++++-------
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 6c2d5e6f2b..9005965d23 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -410,7 +410,7 @@ def _map_resources(accessible_objects):
   """
   # TODO(allenl): Handle MirroredVariables and other types of variables which
   # may need special casing.
-  object_map = {}
+  object_map = util.ObjectIdentityDictionary()
   resource_map = {}
   asset_info = _AssetInfo(
       asset_defs=[],
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index 5381c2f031..005bc99afa 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -260,6 +260,27 @@ class SaveTest(test.TestCase):
     self.assertAllClose({"output_0": 7.},
                         _import_and_infer(save_dir, {"x": 3.}))
 
+  def test_datastructures(self):
+
+    class HasDatastructures(util.Checkpoint):
+
+      def __init__(self):
+        self.a = [1.]
+        self.a.append(variables.Variable(2.))
+        self.b = {"a": variables.Variable(3.)}
+
+      @def_function.function(input_signature=[tensor_spec.TensorSpec(
+          shape=None, dtype=dtypes.float32)])
+      def add(self, x):
+        return x + math_ops.add_n(self.a) + self.b["a"]
+
+    to_save = HasDatastructures()
+    to_save.add(constant_op.constant(1.))
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save.save(to_save, save_dir)
+    self.assertAllClose({"output_0": 10.},
+                        _import_and_infer(save_dir, {"x": 4.}))
+
   def test_default_attr_stripping(self):
 
     class Complex(util.Checkpoint):
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index fde91948e5..5d5a67714c 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -394,7 +394,7 @@ class _WeakObjectIdentityWrapper(_ObjectIdentityWrapper):
     return self._wrapped()
 
 
-class _ObjectIdentityDictionary(collections.MutableMapping):
+class ObjectIdentityDictionary(collections.MutableMapping):
   """A mutable mapping data structure which compares using "is".
 
   This is necessary because we have checkpointable objects (_ListWrapper) which
@@ -425,7 +425,7 @@ class _ObjectIdentityDictionary(collections.MutableMapping):
       yield key.unwrapped
 
 
-class _ObjectIdentityWeakKeyDictionary(_ObjectIdentityDictionary):
+class _ObjectIdentityWeakKeyDictionary(ObjectIdentityDictionary):
   """Like weakref.WeakKeyDictionary, but compares objects with "is"."""
 
   def _wrap_key(self, key):
@@ -496,7 +496,7 @@ def _breadth_first_checkpointable_traversal(root_checkpointable):
   """Find shortest paths to all variables owned by dependencies of root."""
   bfs_sorted = []
   to_visit = collections.deque([root_checkpointable])
-  path_to_root = _ObjectIdentityDictionary()
+  path_to_root = ObjectIdentityDictionary()
   path_to_root[root_checkpointable] = ()
   while to_visit:
     current_checkpointable = to_visit.popleft()
@@ -558,7 +558,7 @@ def _slot_variable_naming_for_optimizer(optimizer_path):
 def _serialize_slot_variables(checkpointable_objects, node_ids, object_names):
   """Gather and name slot variables."""
   non_slot_objects = list(checkpointable_objects)
-  slot_variables = _ObjectIdentityDictionary()
+  slot_variables = ObjectIdentityDictionary()
   for checkpointable in non_slot_objects:
     if (isinstance(checkpointable, optimizer_v1.Optimizer)
         # TODO(b/110718070): Fix Keras imports.
@@ -726,10 +726,10 @@ def fill_object_graph_proto(checkpointable_objects,
 def _serialize_gathered_objects(
     checkpointable_objects, path_to_root, saveables_cache, object_map):
   """Create SaveableObjects and protos for gathered objects."""
-  object_names = _ObjectIdentityDictionary()
+  object_names = ObjectIdentityDictionary()
   for obj, path in path_to_root.items():
     object_names[obj] = _object_prefix_from_path(path)
-  node_ids = _ObjectIdentityDictionary()
+  node_ids = ObjectIdentityDictionary()
   for node_id, node in enumerate(checkpointable_objects):
     node_ids[node] = node_id
   slot_variables = _serialize_slot_variables(
@@ -793,10 +793,10 @@ def find_objects(root_checkpointable):
   """Find and number objects which are dependencies of `root_checkpointable`."""
   checkpointable_objects, path_to_root = (
       _breadth_first_checkpointable_traversal(root_checkpointable))
-  object_names = _ObjectIdentityDictionary()
+  object_names = ObjectIdentityDictionary()
   for obj, path in path_to_root.items():
     object_names[obj] = _object_prefix_from_path(path)
-  node_ids = _ObjectIdentityDictionary()
+  node_ids = ObjectIdentityDictionary()
   for node_id, node in enumerate(checkpointable_objects):
     node_ids[node] = node_id
   slot_variables = _serialize_slot_variables(
-- 
GitLab


From a5b6dcd084b76714d104492b49bf2a7c512a13c8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 01:03:13 -0800
Subject: [PATCH 733/873] compat: Update forward compatibility horizon to
 2018-12-18

PiperOrigin-RevId: 225955167
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 877e5c8947..0245fe1c25 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 17)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 18)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From 5a02334ec90f2c6a91418ae14f3f28428fd52249 Mon Sep 17 00:00:00 2001
From: Tom Hennigan <tomhennigan@google.com>
Date: Tue, 18 Dec 2018 01:27:52 -0800
Subject: [PATCH 734/873] Improve error message when function local variables
 are GC'd.

New message:

```
ValueError: A tf.Variable created inside your tf.function has been garbage-collected. Your code needs to keep Python references to variables created inside `tf.function`s.

A common way to raise this error is to create and return a variable only referenced inside your function:

@tf.function
def f():
  v = tf.Variable(1.0)
  return v

v = f()  # Crashes with this error message!

The reason this crashes is that @tf.function annotated function returns a **`tf.Tensor`** with the **value** of the variable when the function is called rather than the variable instance itself. As such there is no code holding a reference to the `v` created inside the function and Python garbage collects it.

The simplest way to fix this issue is to create variables outside the function and capture them:

v = tf.Variable(1.0)

@tf.function
def f():
  return v

f()  # <tf.Tensor: ... numpy=1.>
v.assign_add(1.)
f()  # <tf.Tensor: ... numpy=2.>
```

PiperOrigin-RevId: 225957777
---
 tensorflow/python/eager/def_function.py | 36 ++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 5a010e1880..52b481915e 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -347,9 +347,39 @@ class PolymorphicFunction(object):
         variable = wr()
         if variable is None:
           raise ValueError(
-              "Variable created in a tf.function garbage-collected. Code needs"
-              " to keep python references to variables created in a"
-              " tf.function.")
+              "A tf.Variable created inside your tf.function has been"
+              " garbage-collected. Your code needs to keep Python references"
+              " to variables created inside `tf.function`s.\n"
+              "\n"
+              "A common way to raise this error is to create and return a"
+              " variable only referenced inside your function:\n"
+              "\n"
+              "@tf.function\n"
+              "def f():\n"
+              "  v = tf.Variable(1.0)\n"
+              "  return v\n"
+              "\n"
+              "v = f()  # Crashes with this error message!\n"
+              "\n"
+              "The reason this crashes is that @tf.function annotated"
+              " function returns a **`tf.Tensor`** with the **value** of the"
+              " variable when the function is called rather than the"
+              " variable instance itself. As such there is no code holding a"
+              " reference to the `v` created inside the function and Python"
+              " garbage collects it.\n"
+              "\n"
+              "The simplest way to fix this issue is to create variables"
+              " outside the function and capture them:\n"
+              "\n"
+              "v = tf.Variable(1.0)\n"
+              "\n"
+              "@tf.function\n"
+              "def f():\n"
+              "  return v\n"
+              "\n"
+              "f()  # <tf.Tensor: ... numpy=1.>\n"
+              "v.assign_add(1.)\n"
+              "f()  # <tf.Tensor: ... numpy=2.>")
         condition = math_ops.logical_and(
             condition, resource_variable_ops.var_is_initialized_op(
                 variable.handle))
-- 
GitLab


From e4bdb31636dd880fa3914d77844c4ac3079ecf16 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Tue, 18 Dec 2018 01:35:35 -0800
Subject: [PATCH 735/873] Add versioning based on input/output types in toco.

To make sure we keep toco versioning as a single source of versioning information. Dequantize is the ops that support incremented version based on the int8 type, currently but the hybrid ops will soon follow.

PiperOrigin-RevId: 225958674
---
 tensorflow/lite/toco/model.h                  |  12 +-
 tensorflow/lite/toco/tflite/export.cc         |  19 +-
 tensorflow/lite/toco/tflite/export.h          |   6 +-
 tensorflow/lite/toco/tflite/export_test.cc    |  67 ++++-
 tensorflow/lite/toco/tflite/operator.cc       | 255 +++++++++++++-----
 tensorflow/lite/toco/tflite/operator.h        |   8 +-
 tensorflow/lite/toco/tflite/operator_test.cc  |   8 +-
 tensorflow/lite/toco/tflite/simple_operator.h |   4 +-
 8 files changed, 276 insertions(+), 103 deletions(-)

diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h
index d392535f5c..e71d36583e 100644
--- a/tensorflow/lite/toco/model.h
+++ b/tensorflow/lite/toco/model.h
@@ -376,7 +376,7 @@ struct Operator {
   // Output activation arrays. Same comments as for inputs apply here too.
   std::vector<string> outputs;
 
-  // If true, the array has more outputs than are listed in the 'outputs'
+  // If true, the operator has more outputs than are listed in the 'outputs'
   // member. These need to be resolved by some graph transformation.
   // This flag is only here to indicate that an operator should not be
   // discarded as unused, even if from its 'outputs' member alone it
@@ -2208,6 +2208,16 @@ class Model {
   // addresses. See Operator::inputs, Operator::outputs.
   std::unordered_map<string, std::unique_ptr<Array>> arrays;
 };
+
+// OperatorSignature contains the information required to making versioning
+// decisions.
+struct OperatorSignature {
+  // The operator.
+  const Operator* op;
+
+  // The model in which the operator resides.
+  const Model* model;
+};
 }  // namespace toco
 
 #endif  // TENSORFLOW_LITE_TOCO_MODEL_H_
diff --git a/tensorflow/lite/toco/tflite/export.cc b/tensorflow/lite/toco/tflite/export.cc
index 8b9448486d..53f5ecef87 100644
--- a/tensorflow/lite/toco/tflite/export.cc
+++ b/tensorflow/lite/toco/tflite/export.cc
@@ -106,16 +106,17 @@ void WriteModelToString(const flatbuffers::FlatBufferBuilder& builder,
 namespace details {
 
 OperatorKey::OperatorKey(
-    const ::toco::Operator& op,
+    const ::toco::OperatorSignature& op_signature,
     const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
     bool enable_select_tf_ops) {
   // Get the op name (by Toco definition).
+  const ::toco::Operator& op = *op_signature.op;
   string name = HelpfulOperatorTypeName(op);
 
   bool is_builtin = false;
   const auto& builtin_ops = GetBuiltinOpsMap();
   if (ops_by_type.count(op.type) != 0) {
-    version_ = ops_by_type.at(op.type)->GetVersion(op);
+    version_ = ops_by_type.at(op.type)->GetVersion(op_signature);
     name = ops_by_type.at(op.type)->name();
     is_builtin = (builtin_ops.count(name) > 0);
   }
@@ -190,7 +191,8 @@ void LoadOperatorsMap(
   // First find a list of unique operator types.
   std::set<OperatorKey> keys;
   for (const auto& op : model.operators) {
-    keys.insert(OperatorKey(*op, ops_by_type, enable_select_tf_ops));
+    const toco::OperatorSignature op_signature = {op.get(), &model};
+    keys.insert(OperatorKey(op_signature, ops_by_type, enable_select_tf_ops));
   }
   // Now assign indices to them and fill in the map.
   int index = 0;
@@ -301,8 +303,9 @@ Offset<Vector<Offset<OperatorCode>>> ExportOperatorCodes(
   std::map<int, Offset<OperatorCode>> ordered_opcodes;
 
   for (const auto& op : model.operators) {
-    const details::OperatorKey operator_key =
-        details::OperatorKey(*op, ops_by_type, params.enable_select_tf_ops);
+    const toco::OperatorSignature op_signature = {op.get(), &model};
+    const details::OperatorKey operator_key = details::OperatorKey(
+        op_signature, ops_by_type, params.enable_select_tf_ops);
     int op_index = operators_map.at(operator_key);
 
     flatbuffers::Offset<flatbuffers::String> custom_code = 0;
@@ -349,9 +352,9 @@ Offset<Vector<Offset<Operator>>> ExportOperators(
     for (const string& output : op->outputs) {
       outputs.push_back(tensors_map.at(output));
     }
-
-    const auto key =
-        details::OperatorKey(*op, ops_by_type, params.enable_select_tf_ops);
+    const toco::OperatorSignature op_signature = {op.get(), &model};
+    const auto key = details::OperatorKey(op_signature, ops_by_type,
+                                          params.enable_select_tf_ops);
     int op_index = operators_map.at(key);
 
     auto tflite_op_it = ops_by_type.find(op->type);
diff --git a/tensorflow/lite/toco/tflite/export.h b/tensorflow/lite/toco/tflite/export.h
index adf6757a30..08d9c95677 100644
--- a/tensorflow/lite/toco/tflite/export.h
+++ b/tensorflow/lite/toco/tflite/export.h
@@ -76,7 +76,7 @@ inline void Export(const Model& model, string* output_file_contents) {
 
 namespace details {
 
-// A maps from tensor name to its final position in the TF Lite buffer.
+// A map from tensor name to its final position in the TF Lite buffer.
 using TensorsMap = std::unordered_map<string, int>;
 
 // A key to identify an operator.
@@ -88,7 +88,7 @@ class OperatorKey {
 
   // Construct OperatorKey by Toco op.
   OperatorKey(
-      const ::toco::Operator& op,
+      const ::toco::OperatorSignature& op_signature,
       const std::map<OperatorType, std::unique_ptr<BaseOperator>>& ops_by_type,
       bool enable_select_tf_ops);
 
@@ -158,7 +158,7 @@ class OperatorKey {
   std::string flex_tensorflow_op_;
 };
 
-// A maps from operator type to its final position in the TF Lite buffer.
+// A map from OperatorKey to its final position in the TF Lite buffer.
 using OperatorsMap = std::unordered_map<OperatorKey, int, OperatorKey::Hash>;
 
 void LoadTensorsMap(const Model& model, TensorsMap* tensors_map);
diff --git a/tensorflow/lite/toco/tflite/export_test.cc b/tensorflow/lite/toco/tflite/export_test.cc
index b371296784..821ed4dbf3 100644
--- a/tensorflow/lite/toco/tflite/export_test.cc
+++ b/tensorflow/lite/toco/tflite/export_test.cc
@@ -301,8 +301,9 @@ class FakeConvolutionOperator
                         OperatorType::kConv) {}
 
   // Returning the op version according to the op parameters.
-  int GetVersion(const Operator& op) const override {
-    const TocoOperator& conv_op = static_cast<const TocoOperator&>(op);
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const TocoOperator& conv_op =
+        static_cast<const TocoOperator&>(*op_signature.op);
     if (conv_op.dilation_width_factor != 1 ||
         conv_op.dilation_height_factor != 1) {
       // Version 2 if dilation is used.
@@ -448,22 +449,58 @@ TEST_F(VersionedOpExportTest, Export) {
 }
 
 TEST(OperatorKeyTest, TestBuiltinOp) {
+  Model model;
   auto op = absl::make_unique<ConvOperator>();
 
+  // Test a normal float operation.
+  op->inputs = {"input", "filter"};
+  op->outputs = {"output"};
+  Array& input_array = model.GetOrCreateArray(op->inputs[0]);
+  Array& filter_array = model.GetOrCreateArray(op->inputs[1]);
+  Array& output_array = model.GetOrCreateArray(op->outputs[0]);
+  input_array.data_type = ArrayDataType::kFloat;
+  filter_array.data_type = ArrayDataType::kFloat;
+  output_array.data_type = ArrayDataType::kFloat;
+
   const auto ops_by_type = BuildOperatorByTypeMap();
-  const auto key = details::OperatorKey(*op, ops_by_type, false);
+  const toco::OperatorSignature op_signature = {op.get(), &model};
+  const auto key = details::OperatorKey(op_signature, ops_by_type, false);
 
   EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CONV_2D);
   EXPECT_EQ(key.custom_code(), "");
   EXPECT_EQ(key.version(), 1);
 }
 
+TEST(OperatorKeyTest, TestBuiltinOpWithVersionedInputTypes) {
+  Model model;
+  auto op = absl::make_unique<DequantizeOperator>();
+
+  op->inputs = {"input"};
+  op->outputs = {"output"};
+  Array& input_array = model.GetOrCreateArray(op->inputs[0]);
+  Array& output_array = model.GetOrCreateArray(op->outputs[0]);
+  input_array.data_type = ArrayDataType::kInt8;
+  output_array.data_type = ArrayDataType::kFloat;
+
+  const auto ops_by_type = BuildOperatorByTypeMap();
+
+  // Test a signed int8 dequantize operation.
+  const toco::OperatorSignature op_signature = {op.get(), &model};
+  const auto key = details::OperatorKey(op_signature, ops_by_type, false);
+
+  EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_DEQUANTIZE);
+  EXPECT_EQ(key.custom_code(), "");
+  EXPECT_EQ(key.version(), 2);
+}
+
 TEST(OperatorKeyTest, TestCustomOp) {
+  Model model;
   auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
   op->tensorflow_op = "MyCrazyCustomOp";
 
   const auto ops_by_type = BuildOperatorByTypeMap();
-  const auto key = details::OperatorKey(*op, ops_by_type, false);
+  const toco::OperatorSignature op_signature = {op.get(), &model};
+  const auto key = details::OperatorKey(op_signature, ops_by_type, false);
 
   EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CUSTOM);
   EXPECT_EQ(key.custom_code(), "MyCrazyCustomOp");
@@ -471,12 +508,14 @@ TEST(OperatorKeyTest, TestCustomOp) {
 }
 
 TEST(OperatorKeyTest, TestFlexOp) {
+  Model model;
   auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
   op->tensorflow_op = "BatchMatMul";
 
   const auto ops_by_type = BuildOperatorByTypeMap();
   {
-    const auto key = details::OperatorKey(*op, ops_by_type, false);
+    const toco::OperatorSignature op_signature = {op.get(), &model};
+    const auto key = details::OperatorKey(op_signature, ops_by_type, false);
     // It shouldn't be converted to Flex op if `allow_flex_op` is false.
     EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CUSTOM);
     EXPECT_EQ(key.custom_code(), "BatchMatMul");
@@ -488,7 +527,8 @@ TEST(OperatorKeyTest, TestFlexOp) {
   {
     // Verify that the custom op name is prefixed by "Flex" and `is_flex_op`
     // is true.
-    const auto key = details::OperatorKey(*op, ops_by_type, true);
+    const toco::OperatorSignature op_signature = {op.get(), &model};
+    const auto key = details::OperatorKey(op_signature, ops_by_type, true);
     EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CUSTOM);
     EXPECT_EQ(key.custom_code(), "FlexBatchMatMul");
     EXPECT_EQ(key.version(), 1);
@@ -498,11 +538,13 @@ TEST(OperatorKeyTest, TestFlexOp) {
 }
 
 TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
+  Model model;
   auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
   op->tensorflow_op = "Merge";
 
   const auto ops_by_type = BuildOperatorByTypeMap();
-  const auto key = details::OperatorKey(*op, ops_by_type, true);
+  const toco::OperatorSignature op_signature = {op.get(), &model};
+  const auto key = details::OperatorKey(op_signature, ops_by_type, true);
 
   EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CUSTOM);
   EXPECT_EQ(key.custom_code(), "FlexMerge");
@@ -514,11 +556,13 @@ TEST(OperatorKeyTest, TestFlexWithControlFlowOp) {
 }
 
 TEST(OperatorKeyTest, TestFlexWithUnsupportedOp) {
+  Model model;
   auto op = absl::make_unique<TensorFlowUnsupportedOperator>();
   op->tensorflow_op = "HashTableV2";
 
   const auto ops_by_type = BuildOperatorByTypeMap();
-  const auto key = details::OperatorKey(*op, ops_by_type, true);
+  const toco::OperatorSignature op_signature = {op.get(), &model};
+  const auto key = details::OperatorKey(op_signature, ops_by_type, true);
 
   EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CUSTOM);
   EXPECT_EQ(key.custom_code(), "HashTableV2");
@@ -532,6 +576,7 @@ TEST(OperatorKeyTest, TestFlexWithUnsupportedOp) {
 
 TEST(OperatorKeyTest, TestFlexWithPartiallySupportedOps) {
   // Test Toco-supported/TFLite-unsupported operators.
+  Model model;
   // TODO(ycling): The test will be broken if TensorFlowAssert is implemented in
   // TFLite. Find a more robust way to test the fallback logic.
   auto op = absl::make_unique<TensorFlowAssertOperator>();
@@ -541,7 +586,8 @@ TEST(OperatorKeyTest, TestFlexWithPartiallySupportedOps) {
   {
     // If NodeDef isn't retained in the Toco op, a regular custom op
     // will be exported.
-    const auto key = details::OperatorKey(*op, ops_by_type, true);
+    const toco::OperatorSignature op_signature = {op.get(), &model};
+    const auto key = details::OperatorKey(op_signature, ops_by_type, true);
     EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CUSTOM);
     EXPECT_EQ(key.custom_code(), "Assert");
     EXPECT_EQ(key.version(), 1);
@@ -556,7 +602,8 @@ TEST(OperatorKeyTest, TestFlexWithPartiallySupportedOps) {
 
   {
     // If NodeDef is retained in the Toco op, a Flex op will be exported.
-    const auto key = details::OperatorKey(*op, ops_by_type, true);
+    const toco::OperatorSignature op_signature = {op.get(), &model};
+    const auto key = details::OperatorKey(op_signature, ops_by_type, true);
     EXPECT_EQ(key.type(), ::tflite::BuiltinOperator_CUSTOM);
     EXPECT_EQ(key.custom_code(), "FlexAssert");
     EXPECT_EQ(key.version(), 1);
diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc
index 205af23da5..f77ebf0d47 100644
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc
@@ -14,19 +14,20 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/toco/tflite/operator.h"
 
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_def.pb.h"
+#include "tensorflow/core/util/ptr_util.h"
 // TODO(ycling): Consider refactoring to extract the LSTM definition out of
 // graph_transformation module.
 #include "tensorflow/lite/toco/graph_transformations/lstm_utils.h"
+#include "tensorflow/lite/toco/model.h"
 #include "tensorflow/lite/toco/tflite/builtin_operator.h"
 #include "tensorflow/lite/toco/tflite/custom_operator.h"
 #include "tensorflow/lite/toco/tflite/simple_operator.h"
 #include "tensorflow/lite/toco/tflite/types.h"
 #include "tensorflow/lite/toco/tflite/whitelisted_flex_ops.h"
-#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/node_def.pb.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_def.pb.h"
-#include "tensorflow/core/util/ptr_util.h"
 
 namespace toco {
 
@@ -60,7 +61,9 @@ class AveragePool
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Convolution
@@ -92,7 +95,9 @@ class Convolution
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class DepthwiseConvolution
@@ -126,8 +131,9 @@ class DepthwiseConvolution
     op->dilation_height_factor = options.dilation_h_factor();
   }
 
-  int GetVersion(const Operator& op) const override {
-    const auto& conv_op = static_cast<const DepthwiseConvOperator&>(op);
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const auto& conv_op =
+        static_cast<const DepthwiseConvOperator&>(*op_signature.op);
     if (conv_op.dilation_width_factor != 1 ||
         conv_op.dilation_height_factor != 1) {
       return 2;
@@ -155,7 +161,9 @@ class Add : public BuiltinOperator<AddOperator, ::tflite::AddOptions,
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class SpaceToBatchND
@@ -174,7 +182,9 @@ class SpaceToBatchND
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Sub : public BuiltinOperator<SubOperator, ::tflite::SubOptions,
@@ -196,7 +206,9 @@ class Sub : public BuiltinOperator<SubOperator, ::tflite::SubOptions,
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Div : public BuiltinOperator<DivOperator, ::tflite::DivOptions,
@@ -218,7 +230,9 @@ class Div : public BuiltinOperator<DivOperator, ::tflite::DivOptions,
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class BatchToSpaceND
@@ -237,7 +251,9 @@ class BatchToSpaceND
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Cast : public BuiltinOperator<CastOperator, ::tflite::CastOptions,
@@ -258,7 +274,9 @@ class Cast : public BuiltinOperator<CastOperator, ::tflite::CastOptions,
     op->dst_data_type = DataType::Deserialize(options.out_data_type());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Concatenation
@@ -278,7 +296,9 @@ class Concatenation
     op->axis = options.axis();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class DepthToSpace : public CustomOperator<DepthToSpaceOperator> {
@@ -292,7 +312,9 @@ class DepthToSpace : public CustomOperator<DepthToSpaceOperator> {
     op->block_size = m["block_size"].AsInt64();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class FakeQuant
@@ -315,9 +337,8 @@ class FakeQuant
     op->num_bits = options.num_bits();
     op->narrow_range = options.narrow_range();
   }
-
-  int GetVersion(const Operator& op) const override {
-    const auto& fq_op = static_cast<const FakeQuantOperator&>(op);
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const auto& fq_op = static_cast<const FakeQuantOperator&>(*op_signature.op);
     return fq_op.narrow_range ? 2 : 1;
   }
 };
@@ -369,8 +390,9 @@ class FullyConnected
     }
   }
 
-  int GetVersion(const Operator& op) const override {
-    const auto& fc_op = static_cast<const FullyConnectedOperator&>(op);
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const auto& fc_op =
+        static_cast<const FullyConnectedOperator&>(*op_signature.op);
     return fc_op.weights_format == FullyConnectedWeightsFormat::kDefault ? 1
                                                                          : 2;
   }
@@ -392,7 +414,9 @@ class Gather : public BuiltinOperator<GatherOperator, ::tflite::GatherOptions,
     op->axis = {options.axis()};
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Svdf : public BuiltinOperator<SvdfOperator, ::tflite::SVDFOptions,
@@ -414,7 +438,9 @@ class Svdf : public BuiltinOperator<SvdfOperator, ::tflite::SVDFOptions,
     op->rank = options.rank();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class L2Normalization
@@ -436,7 +462,9 @@ class L2Normalization
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class L2Pool : public BuiltinOperator<L2PoolOperator, ::tflite::Pool2DOptions,
@@ -465,7 +493,9 @@ class L2Pool : public BuiltinOperator<L2PoolOperator, ::tflite::Pool2DOptions,
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class LocalResponseNormalization
@@ -490,7 +520,9 @@ class LocalResponseNormalization
     op->beta = options.beta();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class MaxPool : public BuiltinOperator<MaxPoolOperator, ::tflite::Pool2DOptions,
@@ -519,7 +551,9 @@ class MaxPool : public BuiltinOperator<MaxPoolOperator, ::tflite::Pool2DOptions,
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Mul : public BuiltinOperator<MulOperator, ::tflite::MulOptions,
@@ -541,7 +575,9 @@ class Mul : public BuiltinOperator<MulOperator, ::tflite::MulOptions,
         ActivationFunction::Deserialize(options.fused_activation_function());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Pad : public BuiltinOperator<PadOperator, ::tflite::PadOptions,
@@ -558,7 +594,9 @@ class Pad : public BuiltinOperator<PadOperator, ::tflite::PadOptions,
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Tile
@@ -574,7 +612,9 @@ class Tile
 
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class PadV2 : public BuiltinOperator<PadV2Operator, ::tflite::PadV2Options,
@@ -591,7 +631,9 @@ class PadV2 : public BuiltinOperator<PadV2Operator, ::tflite::PadV2Options,
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Reshape
@@ -614,7 +656,9 @@ class Reshape
                      options.new_shape()->end());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Softmax
@@ -633,7 +677,9 @@ class Softmax
     op->beta = options.beta();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class SpaceToDepth
@@ -653,7 +699,9 @@ class SpaceToDepth
     op->block_size = options.block_size();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Transpose
@@ -670,7 +718,9 @@ class Transpose
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Lstm : public BuiltinOperator<LstmCellOperator, ::tflite::LSTMOptions,
@@ -713,8 +763,9 @@ class Lstm : public BuiltinOperator<LstmCellOperator, ::tflite::LSTMOptions,
     }
   }
 
-  int GetVersion(const Operator& op) const override {
-    const auto& lstm_op = static_cast<const LstmCellOperator&>(op);
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const auto& lstm_op =
+        static_cast<const LstmCellOperator&>(*op_signature.op);
     switch (lstm_op.kernel_type) {
       case LstmCellOperator::KERNEL_FULL:
         return 1;
@@ -770,7 +821,9 @@ class UnidirectionalSequenceLstm
            ::tflite::ActivationFunctionType_TANH);
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 
   std::vector<bool> GetMutatingInputVariables(
       const Operator& op) const override {
@@ -796,7 +849,9 @@ class Mean : public BuiltinOperator<MeanOperator, ::tflite::ReducerOptions,
     op->keep_dims = options.keep_dims();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Sum
@@ -815,7 +870,9 @@ class Sum
     op->keep_dims = options.keep_dims();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ReduceMax
@@ -834,7 +891,9 @@ class ReduceMax
     op->keep_dims = options.keep_dims();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ReduceMin
@@ -853,7 +912,9 @@ class ReduceMin
     op->keep_dims = options.keep_dims();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ReduceProd
@@ -872,7 +933,9 @@ class ReduceProd
     op->keep_dims = options.keep_dims();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ReduceAny
@@ -891,7 +954,9 @@ class ReduceAny
     op->keep_dims = options.keep_dims();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ResizeBilinear
@@ -911,7 +976,9 @@ class ResizeBilinear
     op->align_corners = options.align_corners();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ResizeNearestNeighbor
@@ -932,7 +999,9 @@ class ResizeNearestNeighbor
     op->align_corners = options.align_corners();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Squeeze
@@ -955,7 +1024,9 @@ class Squeeze
                             options.squeeze_dims()->end());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Split
@@ -975,7 +1046,9 @@ class Split
     op->num_split = options.num_splits();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class SplitV
@@ -995,7 +1068,9 @@ class SplitV
     op->num_split = options.num_splits();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class StridedSlice
@@ -1021,7 +1096,9 @@ class StridedSlice
     op->shrink_axis_mask = options.shrink_axis_mask();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class TopK_V2 : public BuiltinOperator<TopKV2Operator, ::tflite::TopKV2Options,
@@ -1037,7 +1114,9 @@ class TopK_V2 : public BuiltinOperator<TopKV2Operator, ::tflite::TopKV2Options,
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ArgMax : public BuiltinOperator<ArgMaxOperator, ::tflite::ArgMaxOptions,
@@ -1056,7 +1135,9 @@ class ArgMax : public BuiltinOperator<ArgMaxOperator, ::tflite::ArgMaxOptions,
     op->output_data_type = DataType::Deserialize(options.output_type());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ArgMin : public BuiltinOperator<ArgMinOperator, ::tflite::ArgMinOptions,
@@ -1075,7 +1156,9 @@ class ArgMin : public BuiltinOperator<ArgMinOperator, ::tflite::ArgMinOptions,
     op->output_data_type = DataType::Deserialize(options.output_type());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class TransposeConv
@@ -1100,7 +1183,9 @@ class TransposeConv
     op->stride_height = options.stride_h();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class SparseToDense
@@ -1121,7 +1206,9 @@ class SparseToDense
     op->validate_indices = options.validate_indices();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class ExpandDims
@@ -1139,7 +1226,9 @@ class ExpandDims
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Pack : public BuiltinOperator<PackOperator, ::tflite::PackOptions,
@@ -1159,7 +1248,9 @@ class Pack : public BuiltinOperator<PackOperator, ::tflite::PackOptions,
     op->axis = options.axis();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Shape
@@ -1179,7 +1270,9 @@ class Shape
     op->output_data_type = DataType::Deserialize(options.out_type());
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class OneHot : public BuiltinOperator<OneHotOperator, ::tflite::OneHotOptions,
@@ -1196,7 +1289,9 @@ class OneHot : public BuiltinOperator<OneHotOperator, ::tflite::OneHotOptions,
     op->axis = options.axis();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class CTCBeamSearchDecoder
@@ -1217,7 +1312,9 @@ class CTCBeamSearchDecoder
     op->merge_repeated = m["merge_repeated"].AsBool();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
@@ -1235,7 +1332,9 @@ class Unpack : public BuiltinOperator<UnpackOperator, ::tflite::UnpackOptions,
     op->axis = options.axis();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class LeakyRelu
@@ -1253,7 +1352,9 @@ class LeakyRelu
     op->alpha = options.alpha();
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class SquaredDifference
@@ -1272,7 +1373,9 @@ class SquaredDifference
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 class MirrorPad
@@ -1295,7 +1398,9 @@ class MirrorPad
                    : MirrorPadMode::kSymmetric;
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 std::unique_ptr<flexbuffers::Builder> WriteFlexOpOptions(
@@ -1472,8 +1577,8 @@ class TensorFlowUnsupported : public BaseOperator {
     node_def.SerializeToString(&op->tensorflow_node_def);
   }
 
-  int GetVersion(const Operator& op) const override {
-    // TODO(ycling): Deisng and implement a way to plumb the version of
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    // TODO(ycling): Design and implement a way to plumb the version of
     // custom ops.
     return 1;
   }
@@ -1497,11 +1602,13 @@ class Dequantize
   void ReadOptions(const TfLiteOptions& options,
                    TocoOperator* op) const override {}
 
-  int GetVersion(const Operator& op) const override {
-    // TODO(suharshs): Dequantize now supports INT8 in addition to
-    // QUANTIZED_UINT8. When TOCO can create models with INT8, we need
-    // to find a way to see the type here and return version 2. Right now
-    // version 2 will only be added by post training quantization tools.
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    // Version 2 supports signed int8 input types.
+    if (input_array.data_type == ArrayDataType::kInt8) {
+      return 2;
+    }
     return 1;
   }
 };
@@ -1534,6 +1641,8 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
   ops.push_back(MakeUnique<DepthwiseConvolution>(
       ::tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
       OperatorType::kDepthwiseConv));
+  ops.push_back(MakeUnique<Dequantize>(::tflite::BuiltinOperator_DEQUANTIZE,
+                                       OperatorType::kDequantize));
   ops.push_back(
       MakeUnique<FullyConnected>(::tflite::BuiltinOperator_FULLY_CONNECTED,
                                  OperatorType::kFullyConnected));
@@ -1645,8 +1754,6 @@ std::vector<std::unique_ptr<BaseOperator>> BuildOperatorList(
   // when custom ops are exported but SimpleOperator bypasses thoses. To
   // prevent user confusion we are settling on using SimpleOperator only for
   // builtins.
-  ops.push_back(MakeUnique<SimpleOperator<DequantizeOperator>>(
-      "DEQUANTIZE", OperatorType::kDequantize));
   ops.push_back(
       MakeUnique<SimpleOperator<FloorOperator>>("FLOOR", OperatorType::kFloor));
   ops.push_back(
diff --git a/tensorflow/lite/toco/tflite/operator.h b/tensorflow/lite/toco/tflite/operator.h
index 4ac531579c..899db1a359 100644
--- a/tensorflow/lite/toco/tflite/operator.h
+++ b/tensorflow/lite/toco/tflite/operator.h
@@ -87,15 +87,15 @@ class BaseOperator {
       const BuiltinOptions* builtin_options,
       const CustomOptions* custom_options) const = 0;
 
-  // Get the op version by op parameters.
-  // The function need to be overridden to return the op version based on the
+  // Get the op version using the OperatorSignature.
+  // The function needs to be overridden to return the op version based on the
   // parameters. Note:
   // * The first version for each op should be 1 (to be consistent with the
   //   default value in Flatbuffer. `return 1;` is okay for newly implemented
   //   ops.
-  // * When multiple versions are defined for an op, this function need to be
+  // * When multiple versions are defined for an op, this function needs to be
   //   overridden. (See example in `operator_test.cc`)
-  virtual int GetVersion(const Operator& op) const = 0;
+  virtual int GetVersion(const OperatorSignature& op_signature) const = 0;
 
   // Given a Toco `Operator`, return a list of booleans indicating the op
   // mutates which input variables.
diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc
index 14ec89cd73..f2f7221eb1 100644
--- a/tensorflow/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/lite/toco/tflite/operator_test.cc
@@ -111,8 +111,6 @@ class OperatorTest : public ::testing::Test {
 };
 
 TEST_F(OperatorTest, SimpleOperators) {
-  CheckSimpleOperator<DequantizeOperator>("DEQUANTIZE",
-                                          OperatorType::kDequantize);
   CheckSimpleOperator<FloorOperator>("FLOOR", OperatorType::kFloor);
   CheckSimpleOperator<ReluOperator>("RELU", OperatorType::kRelu);
   CheckSimpleOperator<Relu1Operator>("RELU_N1_TO_1", OperatorType::kRelu1);
@@ -469,6 +467,12 @@ TEST_F(OperatorTest, BuiltinArgMin) {
   EXPECT_EQ(op.output_data_type, output_toco_op->output_data_type);
 }
 
+TEST_F(OperatorTest, BuiltinDequantize) {
+  DequantizeOperator op;
+  auto output_toco_op = SerializeAndDeserialize(
+      GetOperator("DEQUANTIZE", OperatorType::kDequantize), op);
+}
+
 TEST_F(OperatorTest, BuiltinTransposeConv) {
   TransposeConvOperator op;
   op.stride_width = 123;
diff --git a/tensorflow/lite/toco/tflite/simple_operator.h b/tensorflow/lite/toco/tflite/simple_operator.h
index e3e4c8551e..290074831b 100644
--- a/tensorflow/lite/toco/tflite/simple_operator.h
+++ b/tensorflow/lite/toco/tflite/simple_operator.h
@@ -42,7 +42,9 @@ class SimpleOperator : public BaseOperator {
     return std::unique_ptr<Operator>(new T);
   }
 
-  int GetVersion(const Operator& op) const override { return 1; }
+  int GetVersion(const OperatorSignature& op_signature) const override {
+    return 1;
+  }
 };
 
 }  // namespace tflite
-- 
GitLab


From 15fa7c49e27963df5304d7f827e6c4459079cc18 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 01:55:09 -0800
Subject: [PATCH 736/873] Extract function spec information that is needed for
 canonicalization.

This will allow serialization of all the information we need for canonicalization.

PiperOrigin-RevId: 225960841
---
 tensorflow/python/eager/def_function.py |   5 +-
 tensorflow/python/eager/function.py     | 291 +++++++++++++-----------
 2 files changed, 161 insertions(+), 135 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 52b481915e..fc14558cc7 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -269,8 +269,11 @@ class PolymorphicFunction(object):
       raise ValueError(
           "_canonicalize_function_inputs must be called only after _initialize "
           "has run.")
+    # pylint: disable=protected-access
     if self._input_signature is None or args or kwds:
-      return self._stateful_fn._canonicalize_function_inputs(*args, **kwds)  # pylint: disable=protected-access
+      return self._stateful_fn._function_spec.canonicalize_function_inputs(
+          *args, **kwds)
+    # pylint: enable=protected-access
     # If an input signature is defined, we may need to fetch a concrete function
     # without any inputs specified. In this case args and kwds should be ignored
     # but running _canonicalize_function_inputs would raise an exception.
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 885403dd10..7ba9f9290b 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -769,6 +769,146 @@ def _deterministic_dict_values(dictionary):
   return tuple(dictionary[key] for key in sorted(dictionary))
 
 
+class FunctionSpec(object):
+  """Specification of how to bind arguments to a function."""
+
+  def __init__(self, python_function, input_signature):
+    if isinstance(python_function, functools.partial):
+      python_function_to_inspect = python_function.func
+      self._args_to_prepend = python_function.args or tuple()
+      self._kwargs_to_include = python_function.keywords or {}
+    else:
+      python_function_to_inspect = python_function
+      self._args_to_prepend = tuple()
+      self._kwargs_to_include = {}
+
+    fullargspec = tf_inspect.getfullargspec(python_function_to_inspect)
+    self._default_values = fullargspec.defaults
+
+    if tf_inspect.ismethod(python_function_to_inspect):
+      # Remove `self`: default arguments shouldn't be matched to it.
+      args = fullargspec.args[1:]
+    else:
+      args = fullargspec.args
+
+    # A cache mapping from argument name to index, for canonicalizing
+    # arguments that are called in a keyword-like fashion.
+    self._args_to_indices = {arg: i for i, arg in enumerate(args)}
+    self.arg_names = args
+    self.vararg_name = fullargspec.varargs
+
+    # A cache mapping from arg index to default value, for canonicalization.
+    offset = len(args) - len(fullargspec.defaults or [])
+    self._arg_indices_to_default_values = {
+        offset + index: default
+        for index, default in enumerate(fullargspec.defaults or [])
+    }
+    self._default_values_start_index = offset
+    if input_signature is None:
+      self.input_signature = None
+    else:
+      if fullargspec.varkw is not None or fullargspec.kwonlyargs:
+        raise ValueError("Cannot define a TensorFlow function from a Python "
+                         "function with keyword arguments when "
+                         "input_signature is provided.")
+
+      if not isinstance(input_signature, (tuple, list)):
+        raise TypeError("input_signature must be either a tuple or a "
+                        "list, received " + str(type(input_signature)))
+
+      self.input_signature = tuple(input_signature)
+      self.flat_input_signature = tuple(nest.flatten(input_signature))
+
+  def canonicalize_function_inputs(self, *args, **kwargs):
+    """Canonicalizes `args` and `kwargs`.
+
+    Canonicalize the inputs to the Python function using a `FunctionSpec`
+    instance. In particular, we parse the varags and kwargs that the
+    original function was called with into a tuple corresponding to the
+    Python function's positional (named) arguments and a dictionary
+    corresponding to its kwargs.
+
+    Args:
+      *args: The varargs this object was called with.
+      **kwargs: The keyword args this function was called with.
+
+    Returns:
+      A canonicalized ordering of the inputs.
+
+    Raises:
+      ValueError: If a keyword in `kwargs` cannot be matched with a positional
+        argument when an input signature is specified, or when the inputs
+        do not conform to the input signature.
+    """
+    args = self._args_to_prepend + args
+    kwargs = dict(kwargs, **self._kwargs_to_include)
+    if not kwargs:
+      if self._default_values:
+        inputs = args + self._default_values[
+            len(args) - self._default_values_start_index:]
+      else:
+        inputs = args
+    else:
+      # Maps from index of arg to its corresponding value, according to `args`
+      # and `kwargs`; seeded with the default values for the named args that
+      # aren't in `args`.
+      arg_indices_to_values = {
+          index: default for index, default in six.iteritems(
+              self._arg_indices_to_default_values) if index >= len(args)
+      }
+      consumed_args = []
+      for arg, value in six.iteritems(kwargs):
+        index = self._args_to_indices.get(arg, None)
+        if index is not None:
+          arg_indices_to_values[index] = value
+          consumed_args.append(arg)
+        elif self.input_signature is not None:
+          raise ValueError("Cannot define a TensorFlow function from a Python "
+                           "function with keyword arguments when "
+                           "input_signature is provided.")
+      for arg in consumed_args:
+        # After this loop, `kwargs` will only contain true keyword arguments, as
+        # opposed to named arguments called in a keyword-like fashion.
+        kwargs.pop(arg)
+      inputs = args + _deterministic_dict_values(arg_indices_to_values)
+    flat_inputs = nest.flatten(inputs)
+
+    # Check for NumPy arrays in arguments and convert them to Tensors.
+    # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps
+    # finding a way to store them directly in the cache key (currently not
+    # possible since ndarrays are not hashable).
+    need_packing = False
+    for index, value in enumerate(flat_inputs):
+      if type(value) == np.ndarray:
+        flat_inputs[index] = constant_op.constant(value)
+        need_packing = True
+    if need_packing:
+      inputs = nest.pack_sequence_as(
+          structure=inputs, flat_sequence=flat_inputs)
+    if self.input_signature is None:
+      return inputs, kwargs
+    else:
+      assert not kwargs
+      signature_relevant_inputs = inputs[:len(self.input_signature)]
+      try:
+        nest.assert_same_structure(self.input_signature,
+                                   signature_relevant_inputs)
+      except (ValueError, TypeError):
+        raise ValueError("Structure of Python function inputs does not match "
+                         "input_signature.")
+      signature_inputs_flat = nest.flatten(signature_relevant_inputs)
+      if any(
+          not pywrap_tensorflow.IsTensor(arg) for arg in signature_inputs_flat):
+        raise ValueError("When input_signature is provided, all inputs to "
+                         "the Python function must be Tensors.")
+      if any(not spec.is_compatible_with(other) for spec, other in zip(
+          self.flat_input_signature, signature_inputs_flat)):
+        raise ValueError("Python inputs incompatible with input_signature: "
+                         "inputs (%s), input_signature (%s)" %
+                         (str(inputs), str(self.input_signature)))
+      return inputs, {}
+
+
 class PolymorphicFunction(object):
   """Wrapper class for the graph functions defined for a Python function.
 
@@ -805,15 +945,11 @@ class PolymorphicFunction(object):
       ValueError: if `input_signature` is not None and the `python_function`'s
         argspec has keyword arguments.
     """
-
     if isinstance(python_function, functools.partial):
       self._python_function = python_function.func
-      self._args_to_prepend = python_function.args or tuple()
-      self._kwargs_to_include = python_function.keywords or {}
     else:
       self._python_function = python_function
-      self._args_to_prepend = tuple()
-      self._kwargs_to_include = {}
+    self._function_spec = FunctionSpec(python_function, input_signature)
     self._name = name
     self._autograph = autograph
     self._function_cache = collections.OrderedDict()
@@ -827,41 +963,6 @@ class PolymorphicFunction(object):
     # different functions for each instance.
     self._descriptor_cache = weakref.WeakKeyDictionary()
 
-    fullargspec = tf_inspect.getfullargspec(self._python_function)
-    if tf_inspect.ismethod(self._python_function):
-      # Remove `self`: default arguments shouldn't be matched to it.
-      args = fullargspec.args[1:]
-    else:
-      args = fullargspec.args
-
-    # A cache mapping from argument name to index, for canonicalizing
-    # arguments that are called in a keyword-like fashion.
-    self._args_to_indices = {arg: i for i, arg in enumerate(args)}
-    self._arg_names = args
-    self._vararg_name = fullargspec.varargs
-    # A cache mapping from arg index to default value, for canonicalization.
-    offset = len(args) - len(fullargspec.defaults or [])
-    self._arg_indices_to_default_values = {
-        offset + index: default
-        for index, default in enumerate(fullargspec.defaults or [])
-    }
-    self._default_values = fullargspec.defaults
-    self._default_values_start_index = offset
-    if input_signature is None:
-      self._input_signature = None
-    else:
-      if fullargspec.varkw is not None or fullargspec.kwonlyargs:
-        raise ValueError("Cannot define a TensorFlow function from a Python "
-                         "function with keyword arguments when "
-                         "input_signature is provided.")
-
-      if not isinstance(input_signature, (tuple, list)):
-        raise TypeError("input_signature must be either a tuple or a "
-                        "list, received " + str(type(input_signature)))
-
-      self._input_signature = tuple(input_signature)
-      self._flat_input_signature = tuple(nest.flatten(input_signature))
-
   def __call__(self, *args, **kwargs):
     """Calls a graph function specialized to the inputs."""
     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
@@ -870,7 +971,17 @@ class PolymorphicFunction(object):
   @property
   def python_function(self):
     """Returns the wrapped Python function."""
-    return self._python_function
+    return self._python_function  # pylint: disable=protected-access
+
+  @property
+  def _input_signature(self):
+    """Returns the wrapped Python function."""
+    return self._function_spec.input_signature  # pylint: disable=protected-access
+
+  @property
+  def _flat_input_signature(self):
+    """Returns the wrapped Python function."""
+    return self._function_spec.flat_input_signature  # pylint: disable=protected-access
 
   def _get_concrete_function_internal_garbage_collected(self, *args, **kwargs):
     """Returns a concrete function which cleans up its graph function."""
@@ -1050,96 +1161,6 @@ class PolymorphicFunction(object):
     return CacheKey(input_signature, parent_graph, device_functions,
                     colocation_stack, uses_xla)
 
-  def _canonicalize_function_inputs(self, *args, **kwargs):
-    """Canonicalizes `args` and `kwargs`.
-
-    Canonicalize the inputs to the Python function using its fullargspec. In
-    particular, we parse the varags and kwargs that this
-    `PolymorphicFunction` was called with into a tuple corresponding to the
-    Python function's positional (named) arguments and a dictionary
-    corresponding to its kwargs.
-
-    Args:
-      *args: The varargs this object was called with.
-      **kwargs: The keyword args this function was called with.
-
-    Returns:
-      A canonicalized ordering of the inputs.
-
-    Raises:
-      ValueError: If a keyword in `kwargs` cannot be matched with a positional
-        argument when an input signature is specified, or when the inputs
-        do not conform to the input signature.
-    """
-    args = self._args_to_prepend + args
-    kwargs = dict(kwargs, **self._kwargs_to_include)
-    if not kwargs:
-      if self._default_values:
-        inputs = args + self._default_values[len(args) -
-                                             self._default_values_start_index:]
-      else:
-        inputs = args
-    else:
-      # Maps from index of arg to its corresponding value, according to `args`
-      # and `kwargs`; seeded with the default values for the named args that
-      # aren't in `args`.
-      arg_indices_to_values = {
-          index: default for index, default in six.iteritems(
-              self._arg_indices_to_default_values) if index >= len(args)
-      }
-      consumed_args = []
-      for arg, value in six.iteritems(kwargs):
-        index = self._args_to_indices.get(arg, None)
-        if index is not None:
-          arg_indices_to_values[index] = value
-          consumed_args.append(arg)
-        elif self._input_signature is not None:
-          raise ValueError("Cannot define a TensorFlow function from a Python "
-                           "function with keyword arguments when "
-                           "input_signature is provided.")
-      for arg in consumed_args:
-        # After this loop, `kwargs` will only contain true keyword arguments, as
-        # opposed to named arguments called in a keyword-like fashion.
-        kwargs.pop(arg)
-      inputs = args + _deterministic_dict_values(arg_indices_to_values)
-    flat_inputs = nest.flatten(inputs)
-
-    # Check for NumPy arrays in arguments and convert them to Tensors.
-    # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps
-    # finding a way to store them directly in the cache key (currently not
-    # possible since ndarrays are not hashable).
-    need_packing = False
-    for index, value in enumerate(flat_inputs):
-      if type(value) == np.ndarray:
-        flat_inputs[index] = constant_op.constant(value)
-        need_packing = True
-    if need_packing:
-      inputs = nest.pack_sequence_as(structure=inputs,
-                                     flat_sequence=flat_inputs)
-    if self._input_signature is None:
-      return inputs, kwargs
-    else:
-      assert not kwargs
-      signature_relevant_inputs = inputs[:len(self._input_signature)]
-      try:
-        nest.assert_same_structure(self._input_signature,
-                                   signature_relevant_inputs)
-      except (ValueError, TypeError):
-        raise ValueError("Structure of Python function inputs does not match "
-                         "input_signature.")
-      signature_inputs_flat = nest.flatten(signature_relevant_inputs)
-      if any(not pywrap_tensorflow.IsTensor(arg)
-             for arg in signature_inputs_flat):
-        raise ValueError("When input_signature is provided, all inputs to "
-                         "the Python function must be Tensors.")
-      if any(not spec.is_compatible_with(other)
-             for spec, other in zip(self._flat_input_signature,
-                                    signature_inputs_flat)):
-        raise ValueError("Python inputs incompatible with input_signature: "
-                         "inputs (%s), input_signature (%s)" %
-                         (str(inputs), str(self._input_signature)))
-      return inputs, {}
-
   def _maybe_define_function(self, args, kwargs):
     """Gets a function for these inputs, defining it if necessary.
 
@@ -1159,7 +1180,8 @@ class PolymorphicFunction(object):
       TypeError: If the function inputs include non-hashable objects
     """
     if self._input_signature is None or args is not None or kwargs is not None:
-      args, kwargs = self._canonicalize_function_inputs(*args, **kwargs)
+      args, kwargs = self._function_spec.canonicalize_function_inputs(
+          *args, **kwargs)
     cache_key = self._cache_key(args, kwargs)
     with self._lock:
       try:
@@ -1177,8 +1199,9 @@ class PolymorphicFunction(object):
         else:
           arglen = len(self._input_signature)
         arg_names = (
-            self._arg_names[:arglen]
-            + [self._vararg_name] * (arglen - len(self._arg_names)))
+            self._function_spec.arg_names[:arglen]
+            + [self._function_spec.vararg_name] *
+            (arglen - len(self._function_spec.arg_names)))
         graph_function = Function(
             func_graph_module.func_graph_from_py_func(
                 self._name,
-- 
GitLab


From 33bb4fe143bf5f2d83343ca427afa314f84eb963 Mon Sep 17 00:00:00 2001
From: Gaurav Jain <gjn@google.com>
Date: Tue, 18 Dec 2018 02:26:23 -0800
Subject: [PATCH 737/873] Apply run_deprecated_v1 to entire test class

Test classes having operations in the setUp function as well
as individual tests annotated with run_deprecated_v1 need to
have the whole class annotated with run_deprecated_v1 to ensure
the setUp function as well as the test function is run in graph
mode.

PiperOrigin-RevId: 225964901
---
 .../python/debug/cli/cli_shared_test.py       | 14 +-----
 .../python/debug/lib/debug_gradients_test.py  |  2 +-
 .../python/debug/wrappers/disk_usage_test.py  |  2 +-
 .../python/debug/wrappers/framework_test.py   |  2 +-
 .../framework/error_interpolation_test.py     | 12 ++---
 tensorflow/python/framework/test_util.py      | 11 ++++-
 .../python/kernel_tests/base64_ops_test.py    |  2 +-
 .../boosted_trees/quantile_ops_test.py        |  5 +--
 tensorflow/python/kernel_tests/losses_test.py | 45 ++++++-------------
 .../training/basic_session_run_hooks_test.py  |  4 +-
 .../training/tensorboard_logging_test.py      |  4 +-
 11 files changed, 37 insertions(+), 66 deletions(-)

diff --git a/tensorflow/python/debug/cli/cli_shared_test.py b/tensorflow/python/debug/cli/cli_shared_test.py
index 66a12efda5..535e8a262b 100644
--- a/tensorflow/python/debug/cli/cli_shared_test.py
+++ b/tensorflow/python/debug/cli/cli_shared_test.py
@@ -105,7 +105,7 @@ class TimeToReadableStrTest(test_util.TensorFlowTestCase):
       cli_shared.time_to_readable_str(100, force_time_unit="ks")
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -119,7 +119,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
   def tearDown(self):
     ops.reset_default_graph()
 
-  @test_util.run_deprecated_v1
   def testSingleFetchNoFeeds(self):
     run_start_intro = cli_shared.get_run_start_intro(12, self.const_a, None, {})
 
@@ -183,7 +182,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     run_start_intro = cli_shared.get_run_start_intro(1, self.sparse_d, None, {})
     self.assertEqual(str(self.sparse_d), run_start_intro.lines[4].strip())
 
-  @test_util.run_deprecated_v1
   def testTwoFetchesListNoFeeds(self):
     fetches = [self.const_a, self.const_b]
     run_start_intro = cli_shared.get_run_start_intro(1, fetches, None, {})
@@ -200,7 +198,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     description = cli_shared.get_run_short_description(1, fetches, None)
     self.assertEqual("run #1: 2 fetches; 0 feeds", description)
 
-  @test_util.run_deprecated_v1
   def testNestedListAsFetches(self):
     fetches = [self.const_c, [self.const_a, self.const_b]]
     run_start_intro = cli_shared.get_run_start_intro(1, fetches, None, {})
@@ -214,7 +211,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     description = cli_shared.get_run_short_description(1, fetches, None)
     self.assertEqual("run #1: 3 fetches; 0 feeds", description)
 
-  @test_util.run_deprecated_v1
   def testNestedDictAsFetches(self):
     fetches = {"c": self.const_c, "ab": {"a": self.const_a, "b": self.const_b}}
     run_start_intro = cli_shared.get_run_start_intro(1, fetches, None, {})
@@ -232,7 +228,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     description = cli_shared.get_run_short_description(1, fetches, None)
     self.assertEqual("run #1: 3 fetches; 0 feeds", description)
 
-  @test_util.run_deprecated_v1
   def testTwoFetchesAsTupleNoFeeds(self):
     fetches = (self.const_a, self.const_b)
     run_start_intro = cli_shared.get_run_start_intro(1, fetches, None, {})
@@ -249,7 +244,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     description = cli_shared.get_run_short_description(1, fetches, None)
     self.assertEqual("run #1: 2 fetches; 0 feeds", description)
 
-  @test_util.run_deprecated_v1
   def testTwoFetchesAsNamedTupleNoFeeds(self):
     fetches_namedtuple = namedtuple("fetches", "x y")
     fetches = fetches_namedtuple(self.const_b, self.const_c)
@@ -267,7 +261,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     description = cli_shared.get_run_short_description(1, fetches, None)
     self.assertEqual("run #1: 2 fetches; 0 feeds", description)
 
-  @test_util.run_deprecated_v1
   def testWithFeedDict(self):
     feed_dict = {
         self.const_a: 10.0,
@@ -291,7 +284,6 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
                                                        feed_dict)
     self.assertEqual("run #1: 1 fetch (c:0); 2 feeds", description)
 
-  @test_util.run_deprecated_v1
   def testTensorFilters(self):
     feed_dict = {self.const_a: 10.0}
     tensor_filters = {
@@ -322,20 +314,18 @@ class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
     command_set.add(annot[2].content)
     self.assertEqual({"run -f filter_a", "run -f filter_b"}, command_set)
 
-  @test_util.run_deprecated_v1
   def testGetRunShortDescriptionWorksForTensorFeedKey(self):
     short_description = cli_shared.get_run_short_description(
         1, self.const_a, {self.const_a: 42.0})
     self.assertEqual("run #1: 1 fetch (a:0); 1 feed (a:0)", short_description)
 
-  @test_util.run_deprecated_v1
   def testGetRunShortDescriptionWorksForUnicodeFeedKey(self):
     short_description = cli_shared.get_run_short_description(
         1, self.const_a, {u"foo": 42.0})
     self.assertEqual("run #1: 1 fetch (a:0); 1 feed (foo)", short_description)
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class GetErrorIntroTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/debug/lib/debug_gradients_test.py b/tensorflow/python/debug/lib/debug_gradients_test.py
index 885691c3ef..e592e46095 100644
--- a/tensorflow/python/debug/lib/debug_gradients_test.py
+++ b/tensorflow/python/debug/lib/debug_gradients_test.py
@@ -36,7 +36,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import gradient_descent
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class IdentifyGradientTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/debug/wrappers/disk_usage_test.py b/tensorflow/python/debug/wrappers/disk_usage_test.py
index 88b1cd540d..71c56b3310 100644
--- a/tensorflow/python/debug/wrappers/disk_usage_test.py
+++ b/tensorflow/python/debug/wrappers/disk_usage_test.py
@@ -32,7 +32,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import monitored_session
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class DumpingDebugWrapperDiskUsageLimitTest(test_util.TensorFlowTestCase):
 
   @classmethod
diff --git a/tensorflow/python/debug/wrappers/framework_test.py b/tensorflow/python/debug/wrappers/framework_test.py
index a50fa7cf4b..aa070d4428 100644
--- a/tensorflow/python/debug/wrappers/framework_test.py
+++ b/tensorflow/python/debug/wrappers/framework_test.py
@@ -141,7 +141,7 @@ class TestDebugWrapperSessionBadAction(framework.BaseDebugWrapperSession):
     return framework.OnRunEndResponse()
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class DebugWrapperSessionTest(test_util.TensorFlowTestCase):
 
   def _no_rewrite_session_config(self):
diff --git a/tensorflow/python/framework/error_interpolation_test.py b/tensorflow/python/framework/error_interpolation_test.py
index d835ada086..5ddbac72ff 100644
--- a/tensorflow/python/framework/error_interpolation_test.py
+++ b/tensorflow/python/framework/error_interpolation_test.py
@@ -115,7 +115,7 @@ class ComputeColocationSummaryFromOpTest(test.TestCase):
     self.assertIn("No node-device colocations", summary)
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class InterpolateFilenamesAndLineNumbersTest(test.TestCase):
 
   def setUp(self):
@@ -197,7 +197,7 @@ class InterpolateFilenamesAndLineNumbersTest(test.TestCase):
     self.assertRegexpMatches(interpolated_string, "constant_op.py:[0-9]+.*")
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class InputNodesTest(test.TestCase):
 
   def setUp(self):
@@ -235,7 +235,7 @@ class InputNodesTest(test.TestCase):
     self.assertRegexpMatches(interpolated_string, expected_regex)
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class InterpolateDeviceSummaryTest(test.TestCase):
 
   def _fancy_device_function(self, unused_op):
@@ -279,7 +279,7 @@ class InterpolateDeviceSummaryTest(test.TestCase):
     self.assertRegexpMatches(result, expected_re)
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class InterpolateColocationSummaryTest(test.TestCase):
 
   def setUp(self):
@@ -304,13 +304,11 @@ class InterpolateColocationSummaryTest(test.TestCase):
 
     self.graph = node_three.graph
 
-  @test_util.run_v1_only("b/120545219")
   def testNodeThreeHasColocationInterpolation(self):
     message = "{{colocation_node Three_with_one}}"
     result = error_interpolation.interpolate(message, self.graph)
     self.assertIn("colocate_with(One)", result)
 
-  @test_util.run_v1_only("b/120545219")
   def testNodeFourHasColocationInterpolationForNodeThreeOnly(self):
     message = "{{colocation_node Four_with_three}}"
     result = error_interpolation.interpolate(message, self.graph)
@@ -319,14 +317,12 @@ class InterpolateColocationSummaryTest(test.TestCase):
         "One", result,
         "Node One should not appear in Four_with_three's summary:\n%s" % result)
 
-  @test_util.run_v1_only("b/120545219")
   def testNodeFiveHasColocationInterpolationForNodeOneAndTwo(self):
     message = "{{colocation_node Five_with_one_with_two}}"
     result = error_interpolation.interpolate(message, self.graph)
     self.assertIn("colocate_with(One)", result)
     self.assertIn("colocate_with(Two)", result)
 
-  @test_util.run_v1_only("b/120545219")
   def testColocationInterpolationForNodeLackingColocation(self):
     message = "{{colocation_node One}}"
     result = error_interpolation.interpolate(message, self.graph)
diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py
index 73713cb10c..c663af2ee4 100644
--- a/tensorflow/python/framework/test_util.py
+++ b/tensorflow/python/framework/test_util.py
@@ -1052,7 +1052,16 @@ def run_deprecated_v1(func=None):
 
   def decorator(f):
     if tf_inspect.isclass(f):
-      raise ValueError("`run_deprecated_v1` only supports test methods.")
+      setup = f.__dict__.get("setUp")
+      if setup is not None:
+        setattr(f, "setUp", decorator(setup))
+
+      for name, value in f.__dict__.copy().items():
+        if (callable(value) and
+            name.startswith(unittest.TestLoader.testMethodPrefix)):
+          setattr(f, name, decorator(value))
+
+      return f
 
     def decorated(self, *args, **kwargs):
       if tf2.enabled():
diff --git a/tensorflow/python/kernel_tests/base64_ops_test.py b/tensorflow/python/kernel_tests/base64_ops_test.py
index 381f190b8d..d5a5dc8c01 100644
--- a/tensorflow/python/kernel_tests/base64_ops_test.py
+++ b/tensorflow/python/kernel_tests/base64_ops_test.py
@@ -31,7 +31,7 @@ from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class Base64OpsTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
index 2b9863fb89..37a60fa0e3 100644
--- a/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
+++ b/tensorflow/python/kernel_tests/boosted_trees/quantile_ops_test.py
@@ -35,6 +35,7 @@ from tensorflow.python.platform import googletest
 from tensorflow.python.training import saver
 
 
+@test_util.run_deprecated_v1
 class QuantileOpsTest(test_util.TensorFlowTestCase):
 
   def create_resource(self, name, eps, max_elements, num_streams=1):
@@ -82,7 +83,6 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
     self.max_elements = 1 << 16
     self.num_quantiles = constant_op.constant(3, dtype=dtypes.int64)
 
-  @test_util.run_v1_only("b/120545219")
   def testBasicQuantileBucketsSingleResource(self):
     with self.cached_session() as sess:
       quantile_accumulator_handle = self.create_resource("floats", self.eps,
@@ -107,7 +107,6 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self._feature_0_quantiles, quantiles[0].eval())
       self.assertAllClose(self._feature_1_quantiles, quantiles[1].eval())
 
-  @test_util.run_v1_only("b/120545219")
   def testBasicQuantileBucketsMultipleResources(self):
     with self.cached_session() as sess:
       quantile_accumulator_handle_0 = self.create_resource("float_0", self.eps,
@@ -142,7 +141,6 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self._feature_0_quantiles, quantiles[0].eval())
       self.assertAllClose(self._feature_1_quantiles, quantiles[1].eval())
 
-  @test_util.run_v1_only("b/120545219")
   def testSaveRestoreAfterFlush(self):
     save_dir = os.path.join(self.get_temp_dir(), "save_restore")
     save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash")
@@ -175,7 +173,6 @@ class QuantileOpsTest(test_util.TensorFlowTestCase):
       self.assertAllClose(self._feature_0_boundaries, buckets[0].eval())
       self.assertAllClose(self._feature_1_boundaries, buckets[1].eval())
 
-  @test_util.run_v1_only("b/120545219")
   def testSaveRestoreBeforeFlush(self):
     save_dir = os.path.join(self.get_temp_dir(), "save_restore")
     save_path = os.path.join(tempfile.mkdtemp(prefix=save_dir), "hash")
diff --git a/tensorflow/python/kernel_tests/losses_test.py b/tensorflow/python/kernel_tests/losses_test.py
index 4584a27e62..89788936db 100644
--- a/tensorflow/python/kernel_tests/losses_test.py
+++ b/tensorflow/python/kernel_tests/losses_test.py
@@ -38,6 +38,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import momentum as momentum_lib
 
 
+@test_util.run_deprecated_v1
 class AbsoluteDifferenceLossTest(test.TestCase):
 
   def setUp(self):
@@ -51,26 +52,22 @@ class AbsoluteDifferenceLossTest(test.TestCase):
         losses.absolute_difference(
             self._predictions, self._predictions, weights=None)
 
-  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeight(self):
     loss = losses.absolute_difference(self._predictions, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(0.0, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLoss(self):
     loss = losses.absolute_difference(self._labels, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(5.5, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.absolute_difference(self._labels, self._predictions, weights)
     with self.cached_session():
       self.assertAlmostEqual(5.5 * weights, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.absolute_difference(self._labels, self._predictions,
@@ -148,7 +145,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
       self.assertEquals(loss.op.name, 'softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testNonZeroLossWithPythonScalarWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -158,7 +155,7 @@ class SoftmaxCrossEntropyLossTest(test.TestCase):
       loss = losses.softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(weights * 10.0, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testNonZeroLossWithScalarTensorWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -311,7 +308,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
       self.assertEquals(loss.op.name, 'sparse_softmax_cross_entropy_loss/value')
       self.assertAlmostEqual(loss.eval(), 10.0, 3)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testNonZeroLossWithPythonScalarWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -321,7 +318,7 @@ class SparseSoftmaxCrossEntropyLossTest(test.TestCase):
       loss = losses.sparse_softmax_cross_entropy(labels, logits, weights)
       self.assertAlmostEqual(weights * 10.0, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testNonZeroLossWithScalarTensorWeight(self):
     logits = constant_op.constant([[10.0, 0.0, 0.0], [0.0, 10.0, 0.0],
                                    [0.0, 0.0, 10.0]])
@@ -654,6 +651,7 @@ class SigmoidCrossEntropyLossTest(test.TestCase):
                              3)
 
 
+@test_util.run_deprecated_v1
 class LogLossTest(test.TestCase):
 
   def setUp(self):
@@ -677,13 +675,11 @@ class LogLossTest(test.TestCase):
       with self.assertRaises(ValueError):
         losses.log_loss(self._labels, self._labels, weights=None)
 
-  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeight(self):
     loss = losses.log_loss(self._labels, self._labels)
     with self.cached_session():
       self.assertAlmostEqual(0.0, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeightWithPlaceholder(self):
     tf_predictions = array_ops.placeholder(
         dtypes.float32, shape=self._np_labels.shape)
@@ -692,14 +688,12 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(
           0.0, loss.eval(feed_dict={tf_predictions: self._np_labels}), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLoss(self):
     loss = losses.log_loss(self._labels, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(-np.sum(self._expected_losses) / 6.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.log_loss(self._labels, self._predictions, weights)
@@ -707,7 +701,6 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.log_loss(self._labels, self._predictions,
@@ -716,7 +709,6 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeightAndPlaceholder(self):
     tf_predictions = array_ops.placeholder(
         dtypes.float32, shape=self._np_predictions.shape)
@@ -728,7 +720,6 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(weights * -np.sum(self._expected_losses) / 6.0,
                              loss, 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeightAndPlaceholderWithRankOnly(self):
     tf_predictions = array_ops.placeholder(dtypes.float32, shape=[None, None])
     weights = 2.3
@@ -788,7 +779,6 @@ class LogLossTest(test.TestCase):
       self.assertAlmostEqual(-np.sum(expected_losses) / 5.0,
                              self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithMeasurementSpecificWeightsWithPlaceholder(self):
     weights = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3))
     expected_losses = np.multiply(self._expected_losses, weights)
@@ -816,7 +806,6 @@ class LogLossTest(test.TestCase):
     with self.cached_session():
       self.assertAlmostEqual(-np.sum(expected_losses), self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithSampleSpecificWeightsMostZeroWithPlaceholder(self):
     weights = np.array([0, 0, 0, 0, 0, 2]).reshape((2, 3))
     expected_losses = np.multiply(self._expected_losses, weights)
@@ -934,6 +923,7 @@ class HuberLossTest(test.TestCase):
       self.assertAllClose(expected, self.evaluate(loss), atol=1e-5)
 
 
+@test_util.run_deprecated_v1
 class MeanSquaredErrorTest(test.TestCase):
 
   def setUp(self):
@@ -955,26 +945,26 @@ class MeanSquaredErrorTest(test.TestCase):
           losses.mean_squared_error(predictions=constant_op.constant(0),
                                     labels=constant_op.constant(0)).eval())
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testAllCorrectNoLossWeight(self):
     loss = losses.mean_squared_error(self._predictions, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(0.0, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testNonZeroLoss(self):
     loss = losses.mean_squared_error(self._labels, self._predictions)
     with self.cached_session():
       self.assertAlmostEqual(49.5, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testNonZeroLossWithPythonScalarWeight(self):
     weights = 2.3
     loss = losses.mean_squared_error(self._labels, self._predictions, weights)
     with self.cached_session():
       self.assertAlmostEqual(49.5 * weights, self.evaluate(loss), 3)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_deprecated_v1
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.mean_squared_error(self._labels, self._predictions,
@@ -1013,6 +1003,7 @@ class MeanSquaredErrorTest(test.TestCase):
       self.assertAlmostEqual(0.0, self.evaluate(loss), 3)
 
 
+@test_util.run_deprecated_v1
 class MeanPairwiseSquaredErrorTest(test.TestCase):
 
   def setUp(self):
@@ -1068,12 +1059,10 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
       self.assertAlmostEqual(
           expected_loss, dynamic_inputs_op.eval(feed_dict=feed_dict), places=3)
 
-  @test_util.run_v1_only("b/120545219")
   def testAllCorrectNoLossWeight(self):
     self._test_valid_weights(
         self._labels, self._labels, expected_loss=0.0)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLoss(self):
     self._test_valid_weights(
         self._labels, self._predictions,
@@ -1104,7 +1093,6 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
           np_grad = self.evaluate(grad)
           self.assertFalse(np.isnan(np_grad).any())
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithPythonScalarWeight(self):
     weight = 2.3
     self._test_valid_weights(
@@ -1112,7 +1100,6 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
         expected_loss=weight * np.sum(self._expected_losses),
         weights=weight)
 
-  @test_util.run_v1_only("b/120545219")
   def testNonZeroLossWithScalarTensorWeight(self):
     weights = 2.3
     loss = losses.mean_pairwise_squared_error(
@@ -1123,12 +1110,10 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
       self.assertAlmostEqual(weights * np.sum(self._expected_losses),
                              self.evaluate(loss), 3)
 
-  @test_util.run_deprecated_v1
   def testNonZeroLossWithScalarZeroWeight(self):
     self._test_valid_weights(
         self._labels, self._predictions, expected_loss=0.0, weights=0.0)
 
-  @test_util.run_deprecated_v1
   def test3d(self):
     labels = np.array([
         [[1, 9, 2], [12, 11, 10], [9, 8, 7]],
@@ -1140,7 +1125,6 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
     ])
     self._test_valid_weights(labels, predictions, expected_loss=137.5)
 
-  @test_util.run_deprecated_v1
   def test3dWeightedScalar(self):
     labels = np.array([
         [[1, 9, 2], [12, 11, 10], [9, 8, 7]],
@@ -1179,7 +1163,6 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
             weights_placeholder: weights,
         })
 
-  @test_util.run_v1_only("b/120545219")
   def testInvalid3dWeighted2x0(self):
     labels = np.array([
         [[1, 9, 2], [12, 11, 10], [9, 8, 7]],
@@ -1192,7 +1175,6 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
     self._test_invalid_weights(
         labels, predictions, weights=np.asarray((1.2, 3.4)))
 
-  @test_util.run_deprecated_v1
   def test3dWeighted2x3x3(self):
     labels = np.array([
         [[1, 9, 2], [12, 11, 10], [9, 8, 7]],
@@ -1209,7 +1191,6 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
         expected_loss=9 * 137.5,
         weights=np.ones((2, 3, 3)))
 
-  @test_util.run_deprecated_v1
   def testLossWithAllZeroBatchSpecificWeights(self):
     self._test_valid_weights(
         self._labels, self._predictions, expected_loss=0.0,
@@ -1251,6 +1232,7 @@ class MeanPairwiseSquaredErrorTest(test.TestCase):
         self.assertAlmostEqual(loss0 + loss1, loss0_1, 5)
 
 
+@test_util.run_deprecated_v1
 class CosineDistanceLossTest(test.TestCase):
 
   def setUp(self):
@@ -1329,7 +1311,6 @@ class CosineDistanceLossTest(test.TestCase):
     with self.cached_session():
       self.assertEqual(3.0 / 4.0, self.evaluate(loss))
 
-  @test_util.run_deprecated_v1
   def testMeasurementSpecificWeightsWithPlaceholderWithShape(self):
     tf_predictions = array_ops.placeholder(
         dtypes.float32, shape=self._labels.shape)
diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py
index 1af27626ba..55ef162eb1 100644
--- a/tensorflow/python/training/basic_session_run_hooks_test.py
+++ b/tensorflow/python/training/basic_session_run_hooks_test.py
@@ -1122,7 +1122,7 @@ class StepCounterHookTest(test.TestCase):
         self.assertGreater(summary_value.simple_value, 0)
 
 
-@test_util.run_v1_only('b/120545219')
+@test_util.run_deprecated_v1
 class SummarySaverHookTest(test.TestCase):
 
   def setUp(self):
@@ -1404,7 +1404,7 @@ class FinalOpsHookTest(test.TestCase):
                              hook.final_ops_values.tolist())
 
 
-@test_util.run_v1_only('b/120545219')
+@test_util.run_deprecated_v1
 class ResourceSummarySaverHookTest(test.TestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/training/tensorboard_logging_test.py b/tensorflow/python/training/tensorboard_logging_test.py
index 5088ab07e5..ffc7eb5b96 100644
--- a/tensorflow/python/training/tensorboard_logging_test.py
+++ b/tensorflow/python/training/tensorboard_logging_test.py
@@ -33,7 +33,7 @@ from tensorflow.python.summary.writer import writer
 from tensorflow.python.training import tensorboard_logging
 
 
-@test_util.run_v1_only("b/120545219")
+@test_util.run_deprecated_v1
 class EventLoggingTest(test.TestCase):
 
   def setUp(self):
@@ -87,7 +87,6 @@ class EventLoggingTest(test.TestCase):
                                   (event_pb2.LogMessage.ERROR, "format")])
     self.assertEqual(2, self.logged_message_count)
 
-  @test_util.run_v1_only("b/120545219")
   def testVerbosity(self):
     tensorboard_logging.set_summary_writer(self._sw)
     tensorboard_logging.set_verbosity(tensorboard_logging.ERROR)
@@ -115,7 +114,6 @@ class EventLoggingTest(test.TestCase):
     tensorboard_logging.warn("this should work")
     self.assertEqual(1, self.logged_message_count)
 
-  @test_util.run_v1_only("b/120545219")
   def testSummaryWriterFailsAfterClear(self):
     tensorboard_logging._clear_summary_writer()
     with self.assertRaises(RuntimeError):
-- 
GitLab


From 3a99dabf840e1ca84ec42ba736ad178f672f549c Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Tue, 18 Dec 2018 02:38:21 -0800
Subject: [PATCH 738/873] while: Verify tuple element post tuple size is not
 accessed.

GetTupleElement in line 332 CHECKS if element larger than tuple is requested, verify that that won't happen earlier.

PiperOrigin-RevId: 225966102
---
 tensorflow/compiler/tf2xla/kernels/while_op.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/kernels/while_op.cc b/tensorflow/compiler/tf2xla/kernels/while_op.cc
index 89b577bfc0..ff5255028b 100644
--- a/tensorflow/compiler/tf2xla/kernels/while_op.cc
+++ b/tensorflow/compiler/tf2xla/kernels/while_op.cc
@@ -291,6 +291,15 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
 
   xla::XlaOp while_result = xla::While(cond_wrapper, *body.computation, init);
 
+  auto while_shape_or = builder->GetShape(while_result);
+  OP_REQUIRES_OK(ctx, while_shape_or.status());
+  auto count = xla::ShapeUtil::TupleElementCount(while_shape_or.ValueOrDie());
+  int max_index = body.outputs.size() + body.resource_updates.size() - 1;
+  OP_REQUIRES(
+      ctx, max_index < count,
+      errors::Internal("Max tuple element requested (", max_index,
+                       ") needs to be less than tuple size (", count, ")"));
+
   // Sets non-variable outputs.
   for (int i = 0; i < ctx->num_outputs(); ++i) {
     if (ctx->input_type(i) != DT_RESOURCE) {
-- 
GitLab


From 868a11a8de788134caeadc895625583e8b44b660 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 02:46:54 -0800
Subject: [PATCH 739/873] Update astor version to fix
 https://github.com/berkerpeksag/astor/issues/86 which we are hitting in
 Python 3.7

PiperOrigin-RevId: 225966877
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 6378d9ea95..566f705ce2 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -278,12 +278,12 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "astor_archive",
         build_file = clean_dep("//third_party:astor.BUILD"),
-        sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d",
-        strip_prefix = "astor-0.6.2",
+        sha256 = "95c30d87a6c2cf89aa628b87398466840f0ad8652f88eb173125a6df8533fb8d",
+        strip_prefix = "astor-0.7.1",
         system_build_file = clean_dep("//third_party/systemlibs:astor.BUILD"),
         urls = [
-            "https://mirror.bazel.build/pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz",
-            "https://pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz",
+            "https://mirror.bazel.build/pypi.python.org/packages/99/80/f9482277c919d28bebd85813c0a70117214149a96b08981b72b63240b84c/astor-0.7.1.tar.gz",
+            "https://files.pythonhosted.org/packages/99/80/f9482277c919d28bebd85813c0a70117214149a96b08981b72b63240b84c/astor-0.7.1.tar.gz",
         ],
     )
 
-- 
GitLab


From 23c920fd443d160913976b3b75254c9023aa8791 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 06:00:22 -0800
Subject: [PATCH 740/873] Object SavedModel: Initial support for serializing
 functions that depend on other serialized nodes.

PiperOrigin-RevId: 225984279
---
 .../saved_model/function_deserialization.py   |  4 +-
 .../saved_model/function_serialization.py     | 32 ++++++++++-----
 tensorflow/python/saved_model/load.py         | 40 +++++++++++++++++--
 tensorflow/python/saved_model/load_test.py    | 23 +++++++++--
 tensorflow/python/saved_model/save.py         | 11 ++++-
 .../saved_model/saved_object_graph.proto      |  8 +++-
 6 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
index b121af62bd..99a82326aa 100644
--- a/tensorflow/python/saved_model/function_deserialization.py
+++ b/tensorflow/python/saved_model/function_deserialization.py
@@ -23,10 +23,12 @@ from tensorflow.python.util import nest
 
 
 def _inputs_compatible(args, function):
+  """Check if args are compatible with a concrete function."""
   # TODO(vbardiovsky): The compatibility check should be about the signature,
   # not the flattened version of it.
   flattened_inputs = nest.flatten(args)
-  if len(flattened_inputs) != len(function.inputs):
+  expected_input_count = len(function.inputs) - len(function.captured_inputs)
+  if len(flattened_inputs) != expected_input_count:
     return False
   for a, b in zip(flattened_inputs, function.inputs):
     if a.dtype != b.dtype or not b.shape.is_compatible_with(a.shape):
diff --git a/tensorflow/python/saved_model/function_serialization.py b/tensorflow/python/saved_model/function_serialization.py
index 69f34f0fd6..6f8c1ee88c 100644
--- a/tensorflow/python/saved_model/function_serialization.py
+++ b/tensorflow/python/saved_model/function_serialization.py
@@ -20,18 +20,29 @@ from __future__ import print_function
 
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import function as defun_lib
+from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.saved_model import saved_object_graph_pb2
 
 
-def _serialize_polymorphic_function(polymorphic_function):
-  monomorphic_functions = []
+def _serialize_polymorphic_function(polymorphic_function, node_ids):
+  """Build a SavedPolymorphicProto."""
+  proto = saved_object_graph_pb2.SavedPolymorphicFunction()
   for concrete_function in list_all_concrete_functions(polymorphic_function):
-    monomorphic_functions.append(
-        saved_object_graph_pb2.SavedMonomorphicFunction(
-            concrete_function=concrete_function.name))
-  saved_polymorphic_function = saved_object_graph_pb2.SavedPolymorphicFunction(
-      monomorphic_function=monomorphic_functions)
-  return saved_polymorphic_function
+    bound_inputs = []
+    try:
+      for capture in concrete_function.captured_inputs:
+        bound_inputs.append(node_ids[capture])
+    except KeyError:
+      # TODO(andresp): Would it better to throw an exception?
+      logging.warning(
+          "Concrete function %s not added to object based saved model as it "
+          "captures tensor %s which is unsupported or not reachable from root.",
+          concrete_function.name, capture)
+      continue
+    function_proto = proto.monomorphic_function.add()
+    function_proto.concrete_function = concrete_function.name
+    function_proto.bound_inputs.extend(bound_inputs)
+  return proto
 
 
 def list_all_concrete_functions(polymorphic_function):
@@ -63,7 +74,8 @@ def list_all_polymorphic_functions(checkpointable_object):
 
 
 def add_polymorphic_functions_to_object_graph_proto(checkpointable_objects,
-                                                    saved_object_graph):
+                                                    saved_object_graph,
+                                                    node_ids):
   """Finds PolymorphicFunctions attached to objects and saves them."""
   existing_objects = list(zip(checkpointable_objects, saved_object_graph.nodes))
   for obj, obj_proto in existing_objects:
@@ -72,7 +84,7 @@ def add_polymorphic_functions_to_object_graph_proto(checkpointable_objects,
       function_node_id = len(saved_object_graph.nodes)
       function_node = saved_object_graph.nodes.add()
       function_node.function.CopyFrom(
-          _serialize_polymorphic_function(polymorphic_function))
+          _serialize_polymorphic_function(polymorphic_function, node_ids))
       reference = obj_proto.children.add()
       reference.node_id = function_node_id
       reference.local_name = name
diff --git a/tensorflow/python/saved_model/load.py b/tensorflow/python/saved_model/load.py
index 3ebc08caef..0f3dd36d4c 100644
--- a/tensorflow/python/saved_model/load.py
+++ b/tensorflow/python/saved_model/load.py
@@ -24,6 +24,7 @@ from tensorflow.python.eager import function
 from tensorflow.python.framework import function_def_to_graph as function_def_lib
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.saved_model import constants
 from tensorflow.python.saved_model import function_deserialization
@@ -45,17 +46,48 @@ class _Loader(object):
     self._export_dir = export_dir
     self._load_func_graphs(meta_graph.graph_def.library)
     self._load_all()
+    self._bind_function_captures()
     self._restore_checkpoint()
 
   def _load_func_graphs(self, function_library):
     # TODO(allenl): Do we need to do name mapping here? Not quite sure what
     # happens when loaded names collide with existing names.
-    # TODO(andresp): Look into gradient functions and the need to restore
-    # functions in the right order.
+    # TODO(andresp): Look into restoring nested and gradient functions in the
+    # right order.
     self._functions = {}
     for fdef in function_library.function:
-      self._functions[fdef.signature.name] = function.Function(
-          function_def_lib.function_def_to_graph(fdef))
+      graph = function_def_lib.function_def_to_graph(fdef)
+      self._functions[fdef.signature.name] = function.Function(graph)
+
+  def _bind_function_captures(self):
+    """Setup captured tensors in restored concrete functions."""
+    seen_functions = set()
+    for object_proto in self._proto.nodes:
+      if object_proto.WhichOneof("kind") == "function":
+        for monomorphic_function in object_proto.function.monomorphic_function:
+          name = monomorphic_function.concrete_function
+          bound_inputs = [
+              self._get_tensor_from_node(node_id)
+              for node_id in monomorphic_function.bound_inputs]
+          if name in seen_functions:
+            if self._functions[name]._captured_inputs != bound_inputs:  # pylint: disable=protected-access
+              raise NotImplementedError(
+                  "Function %s is used more than once with different "
+                  "captured inputs." % name)
+          else:
+            seen_functions.add(name)
+            # TODO(andresp): This is only injecting the captured inputs into the
+            # concrete function, note that we did not modify the FuncGraph
+            # itself.
+            self._functions[name]._captured_inputs = bound_inputs  # pylint: disable=protected-access
+
+  def _get_tensor_from_node(self, node_id):
+    obj = self._nodes[node_id]
+    if resource_variable_ops.is_resource_variable(obj):
+      return obj.handle
+    elif isinstance(obj, tracking.TrackableAsset):
+      return obj.asset_path.handle
+    raise ValueError("Can't convert node %s to tensor" % (type(obj)))
 
   def _load_all(self):
     self._nodes = [self._recreate(proto) for proto in self._proto.nodes]
diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py
index ba88668f8c..7a129e3b15 100644
--- a/tensorflow/python/saved_model/load_test.py
+++ b/tensorflow/python/saved_model/load_test.py
@@ -53,16 +53,17 @@ class LoadTest(test.TestCase):
 
   def test_variables(self):
     root = tracking.Checkpointable()
-    root.f = def_function.function(
-        lambda x: 2. * x,
-        input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
     root.v1 = variables.Variable(1.)
     root.v2 = variables.Variable(2.)
+    root.f = def_function.function(
+        lambda x: root.v2 * x,
+        input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
     save_dir = os.path.join(self.get_temp_dir(), "saved_model")
     save.save(root, save_dir)
     imported = load.load(save_dir)
     self.assertEquals(imported.v1.numpy(), 1.0)
     self.assertEquals(imported.v2.numpy(), 2.0)
+    self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
 
   def _make_asset(self, contents):
     filename = tempfile.mktemp(prefix=self.get_temp_dir())
@@ -95,6 +96,21 @@ class LoadTest(test.TestCase):
     with open(imported.asset2.asset_path.numpy(), "r") as f:
       self.assertEquals("contents 2", f.read())
 
+  def test_capture_assets(self):
+    root = tracking.Checkpointable()
+    root.vocab = tracking.TrackableAsset(self._make_asset("contents"))
+    root.f = def_function.function(
+        lambda: root.vocab.asset_path,
+        input_signature=[])
+    save_dir = os.path.join(self.get_temp_dir(), "save_dir")
+    save.save(root, save_dir)
+    imported = load.load(save_dir)
+    origin_output = root.f().numpy()
+    imported_output = imported.f().numpy()
+    self.assertNotEqual(origin_output, imported_output)
+    with open(imported_output, "r") as f:
+      self.assertEquals("contents", f.read())
+
   def test_assets_dedup(self):
     vocab = self._make_asset("contents")
     root = tracking.Checkpointable()
@@ -130,5 +146,6 @@ class LoadTest(test.TestCase):
     self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
     self.assertEqual(14, imported.f(constant_op.constant(7)).numpy())
 
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 9005965d23..73a847e975 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -521,11 +521,20 @@ def _write_object_graph(root, export_dir, asset_file_def_index):
   util.fill_object_graph_proto(checkpointable_objects, node_ids, slot_variables,
                                proto)
 
+  node_ids = util.ObjectIdentityDictionary()
+  for i in range(len(checkpointable_objects)):
+    obj = checkpointable_objects[i]
+    node_ids[obj] = i
+    if resource_variable_ops.is_resource_variable(obj):
+      node_ids[obj.handle] = i
+    elif isinstance(obj, tracking.TrackableAsset):
+      node_ids[obj.asset_path.handle] = i
+
   for obj, obj_proto in zip(checkpointable_objects, proto.nodes):
     _write_object_proto(obj, obj_proto, asset_file_def_index)
 
   function_serialization.add_polymorphic_functions_to_object_graph_proto(
-      checkpointable_objects, proto)
+      checkpointable_objects, proto, node_ids)
 
   extra_asset_dir = os.path.join(
       compat.as_bytes(export_dir),
diff --git a/tensorflow/python/saved_model/saved_object_graph.proto b/tensorflow/python/saved_model/saved_object_graph.proto
index b95990ad34..1341cca7ff 100644
--- a/tensorflow/python/saved_model/saved_object_graph.proto
+++ b/tensorflow/python/saved_model/saved_object_graph.proto
@@ -73,7 +73,7 @@ message SavedAsset {
   //
   // Only the field `AssetFileDef.filename` is used. Other fields, such as
   // `AssetFileDef.tensor_info`, MUST be ignored.
-  uint32 asset_file_def_index = 1;
+  int32 asset_file_def_index = 1;
 }
 
 // A function with multiple signatures, possibly with non-Tensor arguments.
@@ -84,6 +84,12 @@ message SavedPolymorphicFunction {
 message SavedMonomorphicFunction {
   // A reference to a TensorFlow function in the MetaGraph's FunctionDefLibrary
   string concrete_function = 1;
+
+  // Bound inputs to the function. The SavedObjects identified by the node ids
+  // given here are appended as extra inputs to the caller-supplied inputs.
+  // The only types of SavedObjects valid here are SavedVariable, SavedResource
+  // and SavedAsset.
+  repeated int32 bound_inputs = 2;
 }
 
 // Represents a Variable that is initialized by loading the contents from the
-- 
GitLab


From b1801cd34cf6a77efc37dd4e25a62a9f69f8732c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 08:07:03 -0800
Subject: [PATCH 741/873] Fix include path flags for TensorRT.

PiperOrigin-RevId: 225997942
---
 third_party/tensorrt/BUILD.tpl                            | 6 ------
 third_party/toolchains/preconfig/generate/generate.sh     | 4 ++++
 .../toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD      | 8 ++------
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/third_party/tensorrt/BUILD.tpl b/third_party/tensorrt/BUILD.tpl
index 57682e8735..a2c30b8b94 100644
--- a/third_party/tensorrt/BUILD.tpl
+++ b/third_party/tensorrt/BUILD.tpl
@@ -12,9 +12,6 @@ package(default_visibility = ["//visibility:public"])
 cc_library(
     name = "tensorrt_headers",
     hdrs = [%{tensorrt_headers}],
-    includes = [
-        "include",
-    ],
     visibility = ["//visibility:public"],
 )
 
@@ -22,9 +19,6 @@ cc_library(
     name = "nv_infer",
     srcs = [%{nv_infer}],
     data = [%{nv_infer}],
-    includes = [
-        "include",
-    ],
     copts= cuda_default_copts(),
     deps = [
         "@local_config_cuda//cuda:cuda",
diff --git a/third_party/toolchains/preconfig/generate/generate.sh b/third_party/toolchains/preconfig/generate/generate.sh
index 79407d59ac..76fb1bd3c0 100755
--- a/third_party/toolchains/preconfig/generate/generate.sh
+++ b/third_party/toolchains/preconfig/generate/generate.sh
@@ -52,6 +52,10 @@ bazel build --define=mount_project="${PWD}" "${PKG}/generate:${TARGET}"
 cd "${TEMPDIR}"
 tar xvf "${ROOT}/bazel-bin/${PKG}/generate/${TARGET}_outputs.tar"
 
+# Other than @local_config_tensorrt, the remote config repo is a subpackage of
+# @org_tensorflow and we need to add '-iquote <package_path>' manually.
+buildozer "set strip_include_prefix [package_name()]" //local_config_tensorrt:%cc_library
+
 # Delete all empty files: configurations leave empty files around when they are
 # unnecessary.
 find . -empty -delete
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
index 399d7c1463..8bf9115a86 100755
--- a/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
+++ b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
@@ -12,9 +12,7 @@ package(default_visibility = ["//visibility:public"])
 cc_library(
     name = "tensorrt_headers",
     hdrs = [":tensorrt_include"],
-    includes = [
-        "include",
-    ],
+    strip_include_prefix = [package_name()],
     visibility = ["//visibility:public"],
 )
 
@@ -23,10 +21,8 @@ cc_library(
     srcs = ["tensorrt/lib/libnvinfer.so.5"],
     copts = cuda_default_copts(),
     data = ["tensorrt/lib/libnvinfer.so.5"],
-    includes = [
-        "include",
-    ],
     linkstatic = 1,
+    strip_include_prefix = [package_name()],
     visibility = ["//visibility:public"],
     deps = [
         ":tensorrt_headers",
-- 
GitLab


From 3856a33dc65e7aff1df7ee4e940479ef37e9934b Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 18 Dec 2018 08:15:59 -0800
Subject: [PATCH 742/873] Internal change

PiperOrigin-RevId: 225999107
---
 .../java/org/tensorflow/lite/Interpreter.java | 35 ++++++++++++++++-
 .../lite/NativeInterpreterWrapper.java        | 23 +++++++----
 .../main/java/org/tensorflow/lite/Tensor.java | 15 ++++++++
 .../native/nativeinterpreterwrapper_jni.cc    |  8 ++++
 .../lite/java/src/main/native/tensor_jni.cc   | 16 ++++++++
 .../lite/java/src/main/native/tensor_jni.h    |  9 +++++
 .../org/tensorflow/lite/InterpreterTest.java  | 38 +++++++++++++++++--
 .../java/org/tensorflow/lite/TensorTest.java  |  1 +
 tensorflow/lite/java/src/test/native/BUILD    |  1 +
 .../src/test/native/interpreter_test_jni.cc   | 13 ++++---
 10 files changed, 140 insertions(+), 19 deletions(-)

diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
index 2203d5fbdb..1b2d0d5aa8 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java
@@ -116,9 +116,26 @@ public final class Interpreter implements AutoCloseable {
       return this;
     }
 
+    /**
+     * Advanced: Set if buffer handle output is allowed.
+     *
+     * <p>When a {@link Delegate} supports hardware acceleration, the interpreter will make the data
+     * of output tensors available in the CPU-allocated tensor buffers by default. If the client can
+     * consume the buffer handle directly (e.g. reading output from OpenGL texture), it can set this
+     * flag to false, avoiding the copy of data to the CPU buffer. The delegate documentation should
+     * indicate whether this is supported and how it can be used.
+     *
+     * <p>WARNING: This is an experimental interface that is subject to change.
+     */
+    public Options setAllowBufferHandleOutput(boolean allow) {
+      this.allowBufferHandleOutput = allow;
+      return this;
+    }
+
     int numThreads = -1;
-    boolean useNNAPI = false;
-    boolean allowFp16PrecisionForFp32 = false;
+    Boolean useNNAPI;
+    Boolean allowFp16PrecisionForFp32;
+    Boolean allowBufferHandleOutput;
     final List<Delegate> delegates = new ArrayList<>();
   }
 
@@ -349,6 +366,20 @@ public final class Interpreter implements AutoCloseable {
     wrapper.setNumThreads(numThreads);
   }
 
+  /**
+   * Advanced: Modifies the graph with the provided {@link Delegate}.
+   *
+   * <p>Note: The typical path for providing delegates is via {@link Options#addDelegate}, at
+   * creation time. This path should only be used when a delegate might require coordinated
+   * interaction between Interpeter creation and delegate application.
+   *
+   * <p>WARNING: This is an experimental API and subject to change.
+   */
+  public void modifyGraphWithDelegate(Delegate delegate) {
+    checkNotClosed();
+    wrapper.modifyGraphWithDelegate(delegate);
+  }
+
   /** Release resources associated with the {@code Interpreter}. */
   @Override
   public void close() {
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
index 1952db0267..580dbef084 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java
@@ -69,11 +69,15 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     this.interpreterHandle = createInterpreter(modelHandle, errorHandle, options.numThreads);
     this.inputTensors = new Tensor[getInputCount(interpreterHandle)];
     this.outputTensors = new Tensor[getOutputCount(interpreterHandle)];
-    if (options.useNNAPI) {
-      setUseNNAPI(options.useNNAPI);
+    if (options.useNNAPI != null) {
+      setUseNNAPI(options.useNNAPI.booleanValue());
     }
-    if (options.allowFp16PrecisionForFp32) {
-      setAllowFp16PrecisionForFp32(options.allowFp16PrecisionForFp32);
+    if (options.allowFp16PrecisionForFp32 != null) {
+      allowFp16PrecisionForFp32(
+          interpreterHandle, options.allowFp16PrecisionForFp32.booleanValue());
+    }
+    if (options.allowBufferHandleOutput != null) {
+      allowBufferHandleOutput(interpreterHandle, options.allowBufferHandleOutput.booleanValue());
     }
     for (Delegate delegate : options.delegates) {
       applyDelegate(interpreterHandle, errorHandle, delegate.getNativeHandle());
@@ -180,14 +184,15 @@ final class NativeInterpreterWrapper implements AutoCloseable {
     useNNAPI(interpreterHandle, useNNAPI);
   }
 
-  void setAllowFp16PrecisionForFp32(boolean allow) {
-    allowFp16PrecisionForFp32(interpreterHandle, allow);
-  }
-
   void setNumThreads(int numThreads) {
     numThreads(interpreterHandle, numThreads);
   }
 
+  void modifyGraphWithDelegate(Delegate delegate) {
+    applyDelegate(interpreterHandle, errorHandle, delegate.getNativeHandle());
+    delegates.add(delegate);
+  }
+
   /** Gets index of an input given its name. */
   int getInputIndex(String name) {
     if (inputsIndexes == null) {
@@ -356,6 +361,8 @@ final class NativeInterpreterWrapper implements AutoCloseable {
 
   private static native void allowFp16PrecisionForFp32(long interpreterHandle, boolean allow);
 
+  private static native void allowBufferHandleOutput(long interpreterHandle, boolean allow);
+
   private static native long createErrorReporter(int size);
 
   private static native long createModel(String modelPathOrBuffer, long errorHandle);
diff --git a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
index 7aa24b4198..b56fcd772b 100644
--- a/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
+++ b/tensorflow/lite/java/src/main/java/org/tensorflow/lite/Tensor.java
@@ -80,6 +80,15 @@ public final class Tensor {
     return shapeCopy;
   }
 
+  /**
+   * Returns the (global) index of the tensor within the owning {@link Interpreter}.
+   *
+   * @hide
+   */
+  public int index() {
+    return index(nativeHandle);
+  }
+
   /**
    * Copies the contents of the provided {@code src} object to the Tensor.
    *
@@ -278,10 +287,16 @@ public final class Tensor {
 
   private static native int numBytes(long handle);
 
+  private static native int setBufferHandle(long handle, long delegateHandle, int bufferHandle);
+
+  private static native int bufferHandle(long handle);
+
   private static native void readMultiDimensionalArray(long handle, Object dst);
 
   private static native void writeMultiDimensionalArray(long handle, Object src);
 
+  private static native int index(long handle);
+
   static {
     TensorFlowLite.init();
   }
diff --git a/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
index 1e98f94250..d3759c97fb 100644
--- a/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
+++ b/tensorflow/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc
@@ -245,6 +245,14 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_allowFp16PrecisionForFp32(
   interpreter->SetAllowFp16PrecisionForFp32(static_cast<bool>(allow));
 }
 
+JNIEXPORT void JNICALL
+Java_org_tensorflow_lite_NativeInterpreterWrapper_allowBufferHandleOutput(
+    JNIEnv* env, jclass clazz, jlong handle, jboolean allow) {
+  tflite::Interpreter* interpreter = convertLongToInterpreter(env, handle);
+  if (interpreter == nullptr) return;
+  interpreter->SetAllowBufferHandleOutput(allow);
+}
+
 JNIEXPORT void JNICALL
 Java_org_tensorflow_lite_NativeInterpreterWrapper_numThreads(JNIEnv* env,
                                                              jclass clazz,
diff --git a/tensorflow/lite/java/src/main/native/tensor_jni.cc b/tensorflow/lite/java/src/main/native/tensor_jni.cc
index 82d2679de9..cc81eb8d51 100644
--- a/tensorflow/lite/java/src/main/native/tensor_jni.cc
+++ b/tensorflow/lite/java/src/main/native/tensor_jni.cc
@@ -35,6 +35,7 @@ class TensorHandle {
       : interpreter_(interpreter), tensor_index_(tensor_index) {}
 
   TfLiteTensor* tensor() const { return interpreter_->tensor(tensor_index_); }
+  int index() const { return tensor_index_; }
 
  private:
   tflite::Interpreter* const interpreter_;
@@ -50,6 +51,15 @@ TfLiteTensor* GetTensorFromHandle(JNIEnv* env, jlong handle) {
   return reinterpret_cast<TensorHandle*>(handle)->tensor();
 }
 
+int GetTensorIndexFromHandle(JNIEnv* env, jlong handle) {
+  if (handle == 0) {
+    throwException(env, kIllegalArgumentException,
+                   "Internal error: Invalid handle to TfLiteTensor.");
+    return -1;
+  }
+  return reinterpret_cast<TensorHandle*>(handle)->index();
+}
+
 size_t ElementByteSize(TfLiteType data_type) {
   // The code in this file makes the assumption that the
   // TensorFlow TF_DataTypes and the Java primitive types
@@ -399,3 +409,9 @@ JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_numBytes(JNIEnv* env,
   if (tensor == nullptr) return 0;
   return static_cast<jint>(tensor->bytes);
 }
+
+JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_index(JNIEnv* env,
+                                                             jclass clazz,
+                                                             jlong handle) {
+  return GetTensorIndexFromHandle(env, handle);
+}
diff --git a/tensorflow/lite/java/src/main/native/tensor_jni.h b/tensorflow/lite/java/src/main/native/tensor_jni.h
index ec0442e93f..52150bf3ab 100644
--- a/tensorflow/lite/java/src/main/native/tensor_jni.h
+++ b/tensorflow/lite/java/src/main/native/tensor_jni.h
@@ -106,6 +106,15 @@ Java_org_tensorflow_lite_Tensor_writeMultiDimensionalArray(JNIEnv* env,
                                                            jlong handle,
                                                            jobject src);
 
+/*
+ *  Class:     org_tensorflow_lite_Tensor
+ *  Method:    index
+ *  Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_tensorflow_lite_Tensor_index(JNIEnv* env,
+                                                             jclass clazz,
+                                                             jlong handle);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif  // __cplusplus
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
index e635515de8..f89062ba45 100644
--- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java
@@ -169,11 +169,13 @@ public final class InterpreterTest {
   public void testRunForMultipleInputsOutputs() {
     Interpreter interpreter = new Interpreter(MULTIPLE_INPUTS_MODEL_FILE);
     assertThat(interpreter.getInputTensorCount()).isEqualTo(4);
+    assertThat(interpreter.getInputTensor(0).index()).isGreaterThan(-1);
     assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
     assertThat(interpreter.getInputTensor(1).dataType()).isEqualTo(DataType.FLOAT32);
     assertThat(interpreter.getInputTensor(2).dataType()).isEqualTo(DataType.FLOAT32);
     assertThat(interpreter.getInputTensor(3).dataType()).isEqualTo(DataType.FLOAT32);
     assertThat(interpreter.getOutputTensorCount()).isEqualTo(2);
+    assertThat(interpreter.getOutputTensor(0).index()).isGreaterThan(-1);
     assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32);
     assertThat(interpreter.getOutputTensor(1).dataType()).isEqualTo(DataType.FLOAT32);
 
@@ -361,10 +363,38 @@ public final class InterpreterTest {
     float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD};
     float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD};
     float[][][][] fourD = {threeD, threeD};
-    float[] output = new float[1];
-    interpreter.run(fourD, output);
-    float[] expected = {7.0f};
-    assertThat(output).usingTolerance(0.1f).containsExactly(expected).inOrder();
+    float[][][][] parsedOutputs = new float[2][8][8][3];
+    interpreter.run(fourD, parsedOutputs);
+    float[] outputOneD = parsedOutputs[0][0][0];
+    float[] expected = {7.0f, 7.0f, 7.0f};
+    assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder();
+
+    interpreter.close();
+  }
+
+  @Test
+  public void testModifyGraphWithDelegate() throws Exception {
+    System.loadLibrary("tensorflowlite_test_jni");
+    Delegate delegate =
+        new Delegate() {
+          @Override
+          public long getNativeHandle() {
+            return getNativeHandleForDelegate();
+          }
+        };
+    Interpreter interpreter = new Interpreter(MODEL_FILE);
+    interpreter.modifyGraphWithDelegate(delegate);
+
+    // The native delegate stubs out the graph with a single op that produces the scalar value 7.
+    float[] oneD = {1.23f, 6.54f, 7.81f};
+    float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD};
+    float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD};
+    float[][][][] fourD = {threeD, threeD};
+    float[][][][] parsedOutputs = new float[2][8][8][3];
+    interpreter.run(fourD, parsedOutputs);
+    float[] outputOneD = parsedOutputs[0][0][0];
+    float[] expected = {7.0f, 7.0f, 7.0f};
+    assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder();
 
     interpreter.close();
   }
diff --git a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
index 35ff4328b8..be6a706b8d 100644
--- a/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
+++ b/tensorflow/lite/java/src/test/java/org/tensorflow/lite/TensorTest.java
@@ -50,6 +50,7 @@ public final class TensorTest {
     outputs.put(0, new float[2][8][8][3]);
     wrapper.run(inputs, outputs);
     tensor = wrapper.getOutputTensor(0);
+    assertThat(tensor.index()).isGreaterThan(-1);
   }
 
   @After
diff --git a/tensorflow/lite/java/src/test/native/BUILD b/tensorflow/lite/java/src/test/native/BUILD
index 481aea7ecd..994f2389b4 100644
--- a/tensorflow/lite/java/src/test/native/BUILD
+++ b/tensorflow/lite/java/src/test/native/BUILD
@@ -16,6 +16,7 @@ cc_library(
     deps = [
         "//tensorflow/lite/c:c_api_internal",
         "//tensorflow/lite/java/jni",
+        "//tensorflow/lite/kernels:kernel_util",
     ],
 )
 
diff --git a/tensorflow/lite/java/src/test/native/interpreter_test_jni.cc b/tensorflow/lite/java/src/test/native/interpreter_test_jni.cc
index 1a0072a7c6..000e718ba7 100644
--- a/tensorflow/lite/java/src/test/native/interpreter_test_jni.cc
+++ b/tensorflow/lite/java/src/test/native/interpreter_test_jni.cc
@@ -14,7 +14,9 @@ limitations under the License.
 ==============================================================================*/
 
 #include <jni.h>
+#include <algorithm>
 #include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -23,22 +25,23 @@ extern "C" {
 JNIEXPORT jlong JNICALL
 Java_org_tensorflow_lite_InterpreterTest_getNativeHandleForDelegate(
     JNIEnv* env, jclass clazz) {
-  // A simple op which outputs a vector of length 1 with the value [7].
+  // A simple op which outputs a tensor with values of 7.
   static TfLiteRegistration registration = {
       .init = nullptr,
       .free = nullptr,
       .prepare =
           [](TfLiteContext* context, TfLiteNode* node) {
+            TfLiteTensor* input = &context->tensors[node->inputs->data[0]];
             TfLiteTensor* output = &context->tensors[node->outputs->data[0]];
-            TfLiteIntArray* scalar_size = TfLiteIntArrayCreate(1);
-            scalar_size->data[0] = 1;
+            TfLiteIntArray* output_dims = TfLiteIntArrayCopy(input->dims);
             output->type = kTfLiteFloat32;
-            return context->ResizeTensor(context, output, scalar_size);
+            return context->ResizeTensor(context, output, output_dims);
           },
       .invoke =
           [](TfLiteContext* context, TfLiteNode* node) {
             TfLiteTensor* output = &context->tensors[node->outputs->data[0]];
-            output->data.f[0] = 7.0f;
+            std::fill(output->data.f,
+                      output->data.f + tflite::NumElements(output), 7.0f);
             return kTfLiteOk;
           },
       .profiling_string = nullptr,
-- 
GitLab


From ec01fa80e679e1b81296710e0877bcee10f50bc8 Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Tue, 18 Dec 2018 08:47:51 -0800
Subject: [PATCH 743/873] Improve the way to merge input_shape and
 interpreter_->inputs().

PiperOrigin-RevId: 226002955
---
 tensorflow/lite/kernels/test_util.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/lite/kernels/test_util.cc b/tensorflow/lite/kernels/test_util.cc
index 549ea78f5b..19d7e37409 100644
--- a/tensorflow/lite/kernels/test_util.cc
+++ b/tensorflow/lite/kernels/test_util.cc
@@ -119,10 +119,10 @@ void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
 
   CHECK(interpreter_ != nullptr);
 
-  int i = 0;
-  for (const auto& shape : input_shapes) {
-    int input_idx = interpreter_->inputs()[i++];
+  for (int i = 0; i < input_shapes.size(); ++i) {
+    const int input_idx = interpreter_->inputs()[i];
     if (input_idx == kOptionalTensor) continue;
+    const auto& shape = input_shapes[i];
     if (shape.empty()) continue;
     CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk);
   }
-- 
GitLab


From 7ed1c1e26303e78cbddc11283fd193e4e49c4e3f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 09:04:19 -0800
Subject: [PATCH 744/873] Serialize all reachable polymorphic functions that
 have an input signature.

PiperOrigin-RevId: 226005404
---
 .../saved_model/function_serialization.py     |  3 ++
 tensorflow/python/saved_model/load_test.py    | 42 +++++++++++--------
 tensorflow/python/saved_model/save.py         |  5 ++-
 3 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/saved_model/function_serialization.py b/tensorflow/python/saved_model/function_serialization.py
index 6f8c1ee88c..6bc6542491 100644
--- a/tensorflow/python/saved_model/function_serialization.py
+++ b/tensorflow/python/saved_model/function_serialization.py
@@ -47,6 +47,9 @@ def _serialize_polymorphic_function(polymorphic_function, node_ids):
 
 def list_all_concrete_functions(polymorphic_function):
   """Given a polymorphic function, returns all of its concrete functions."""
+  input_signature = polymorphic_function._input_signature  # pylint: disable=protected-access
+  if input_signature is not None:
+    polymorphic_function.get_concrete_function()
   concrete_functions = []
   for signature in polymorphic_function._cached_input_signatures:  # pylint: disable=protected-access
     if any(isinstance(arg, defun_lib.UnknownArgument) for arg in signature):
diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py
index 7a129e3b15..9ed84e5c8f 100644
--- a/tensorflow/python/saved_model/load_test.py
+++ b/tensorflow/python/saved_model/load_test.py
@@ -35,6 +35,11 @@ from tensorflow.python.training.checkpointable import tracking
 
 class LoadTest(test.TestCase):
 
+  def cycle(self, obj):
+    path = tempfile.mkdtemp(prefix=self.get_temp_dir())
+    save.save(obj, path, signatures={})
+    return load.load(path)
+
   def test_structure_import(self):
     root = tracking.Checkpointable()
     root.f = def_function.function(
@@ -44,9 +49,7 @@ class LoadTest(test.TestCase):
     root.dep_two = tracking.Checkpointable()
     root.dep_two.dep = tracking.Checkpointable()
     root.dep_three = root.dep_two.dep
-    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
-    save.save(root, save_dir)
-    imported = load.load(save_dir)
+    imported = self.cycle(root)
     self.assertIs(imported.dep_three, imported.dep_two.dep)
     self.assertIsNot(imported.dep_one, imported.dep_two)
     self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
@@ -58,9 +61,7 @@ class LoadTest(test.TestCase):
     root.f = def_function.function(
         lambda x: root.v2 * x,
         input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
-    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
-    save.save(root, save_dir)
-    imported = load.load(save_dir)
+    imported = self.cycle(root)
     self.assertEquals(imported.v1.numpy(), 1.0)
     self.assertEquals(imported.v2.numpy(), 2.0)
     self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
@@ -102,9 +103,7 @@ class LoadTest(test.TestCase):
     root.f = def_function.function(
         lambda: root.vocab.asset_path,
         input_signature=[])
-    save_dir = os.path.join(self.get_temp_dir(), "save_dir")
-    save.save(root, save_dir)
-    imported = load.load(save_dir)
+    imported = self.cycle(root)
     origin_output = root.f().numpy()
     imported_output = imported.f().numpy()
     self.assertNotEqual(origin_output, imported_output)
@@ -121,31 +120,40 @@ class LoadTest(test.TestCase):
     root.asset1 = tracking.TrackableAsset(vocab)
     root.asset2 = tracking.TrackableAsset(vocab)
 
-    export_dir = os.path.join(self.get_temp_dir(), "save_dir")
-    save.save(root, export_dir)
-    imported = load.load(export_dir)
+    imported = self.cycle(root)
 
     self.assertEqual(imported.asset1.asset_path.numpy(),
                      imported.asset2.asset_path.numpy())
 
-  def test_only_implicit_signatures(self):
+  def test_implicit_input_signature(self):
+    @def_function.function
     def func(x):
       return 2 * x
 
     root = tracking.Checkpointable()
-    root.f = def_function.function(func)
+    root.f = func
 
     # Add two traces.
     root.f(constant_op.constant(1.))
     root.f(constant_op.constant(1))
 
-    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
-    save.save(root, save_dir, signatures=dict())
-    imported = load.load(save_dir)
+    imported = self.cycle(root)
 
     self.assertEqual(4., imported.f(constant_op.constant(2.)).numpy())
     self.assertEqual(14, imported.f(constant_op.constant(7)).numpy())
 
+  def test_explicit_input_signature(self):
+    @def_function.function(
+        input_signature=[tensor_spec.TensorSpec(None, dtypes.float32)])
+    def func(x):
+      return 2 * x
+
+    root = tracking.Checkpointable()
+    root.f = func
+
+    imported = self.cycle(root)
+    self.assertEqual(4., imported.f(constant_op.constant(2.0)).numpy())
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 73a847e975..38a459da2e 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -482,8 +482,9 @@ def _fill_meta_graph_def(meta_graph_def, obj, signature_functions,
   # the exported graph (thus the `to_graph` argument).
   saver = object_saver.freeze(object_map=object_map, to_graph=exported_graph)
 
-  # We must resolve the concrete function to add to MetaGraph while in eager
-  # mode.
+  # We must instantiate and list all concrete functions of polymorphic functions
+  # while in eager mode so they end up added to the graph and can later be used
+  # by the object based saved model.
   concrete_functions = []
   for accessible_object in accessible_objects:
     for function in function_serialization.list_all_polymorphic_functions(
-- 
GitLab


From cdc14f67360a11452490252bd4742ee862b0dc60 Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Tue, 18 Dec 2018 09:38:52 -0800
Subject: [PATCH 745/873] Update tf.float to tf.cast(..,dtype=tf.float32) in
 losses_impl

PiperOrigin-RevId: 226010550
---
 tensorflow/python/ops/losses/losses_impl.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index 20397612bc..dc3ef4b72d 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import confusion_matrix
@@ -664,8 +665,8 @@ def mean_squared_error(
     raise ValueError("predictions must not be None.")
   with ops.name_scope(scope, "mean_squared_error",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     losses = math_ops.squared_difference(predictions, labels)
     return compute_weighted_loss(
-- 
GitLab


From 1330e04a5a4e0a981bc5d48b35087af82bbd0fb6 Mon Sep 17 00:00:00 2001
From: Doe Hyun Yoon <dyoon@google.com>
Date: Tue, 18 Dec 2018 10:35:00 -0800
Subject: [PATCH 746/873] Add cost model for Cast ops. Using float to int16
 cost from Eigen sclar_cast_op cost.

PiperOrigin-RevId: 226020086
---
 .../core/grappler/costs/op_level_cost_estimator.cc     |  6 ++++++
 .../grappler/costs/op_level_cost_estimator_test.cc     | 10 ++++++++++
 2 files changed, 16 insertions(+)

diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 0e55209238..55eb391d2b 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -288,6 +288,12 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
       {"Atan", EIGEN_COST(scalar_atan_op<float>)},
       {"Atan2", EIGEN_COST(scalar_quotient_op<float>) +
                     EIGEN_COST(scalar_atan_op<float>)},
+      // For now, we use Eigen cost model for float to int16 cast as an example
+      // case; Eigen cost model is zero when src and dst types are identical,
+      // and it uses AddCost (1) when different. We may implement a separate
+      // cost functions for cast ops, using the actual input and output types.
+      {"Cast", Eigen::internal::functor_traits<
+                   Eigen::internal::scalar_cast_op<float, int16>>::Cost},
       {"Ceil", EIGEN_COST(scalar_ceil_op<float>)},
       {"Cos", EIGEN_COST(scalar_cos_op<float>)},
       {"Dequantize", EIGEN_COST(scalar_product_op<float>)},
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
index c9ce63a8ef..9a59877ac5 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
@@ -712,6 +712,16 @@ TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) {
   EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
 }
 
+TEST_F(OpLevelCostEstimatorTest, CastExecutionTime) {
+  auto cost = PredictCosts(DescribeUnaryOp("Cast", 1000));
+  EXPECT_EQ(Costs::Duration(800), cost.memory_time);
+  EXPECT_EQ(Costs::Duration(100), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(900), cost.execution_time);
+  EXPECT_EQ(1, cost.num_ops_total);
+  EXPECT_FALSE(cost.inaccurate);
+  EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+}
+
 TEST_F(OpLevelCostEstimatorTest, UnknownOrPartialShape) {
   {
     auto cost = PredictCosts(DescribeMatMul(2, 4, 7, 7));
-- 
GitLab


From e31888a13b5c41974ae0c27ba2bbde18902d6a04 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 18 Dec 2018 10:43:57 -0800
Subject: [PATCH 747/873] Allow fetching operations using
 tf.compat.v1.wrap_function.prune()

Some related tweaking to avoid creating two sets of input placeholders and to re-structure outputs. Like session.run, fetched Operations turn into None.

PiperOrigin-RevId: 226021852
---
 tensorflow/python/eager/lift_to_graph.py      | 15 +++++--
 tensorflow/python/eager/wrap_function.py      | 38 ++++++++++++++---
 tensorflow/python/eager/wrap_function_test.py | 41 ++++++++++++++++++-
 3 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/eager/lift_to_graph.py b/tensorflow/python/eager/lift_to_graph.py
index c231264047..2e9d24f61e 100644
--- a/tensorflow/python/eager/lift_to_graph.py
+++ b/tensorflow/python/eager/lift_to_graph.py
@@ -29,13 +29,19 @@ def _graph_inputs(op):
   return [x.op for x in op.inputs] + list(op.control_inputs)
 
 
+def _as_operation(op_or_tensor):
+  if isinstance(op_or_tensor, ops.Tensor):
+    return op_or_tensor.op
+  return op_or_tensor
+
+
 def lift_to_graph(init_tensor, graph, sources=None):
   """Copies the tensor and all its inputs recursively to the outer graph."""
   # Check that the initializer does not depend on any placeholders.
   if sources is None:
     sources = set([])
   visited_ops = set([x.op for x in sources])
-  ops_to_visit = [init_tensor.op]
+  ops_to_visit = [_as_operation(init_tensor)]
   op_outputs = collections.defaultdict(set)
   while ops_to_visit:
     op = ops_to_visit.pop()
@@ -57,7 +63,7 @@ def lift_to_graph(init_tensor, graph, sources=None):
   # outputs are part of this subgraph.
   ops_to_copy = []
   marked_ops = set([])
-  ops_to_visit = [init_tensor.op]
+  ops_to_visit = [_as_operation(init_tensor)]
   while ops_to_visit:
     op = ops_to_visit.pop()
     if op in marked_ops:
@@ -67,15 +73,18 @@ def lift_to_graph(init_tensor, graph, sources=None):
     for inp in _graph_inputs(op):
       if all(x in marked_ops for x in op_outputs[inp]) and inp not in sources:
         ops_to_visit.append(inp)
-  assert len(ops_to_copy) == len(visited_ops)
   # ops_to_copy now holds a reverse topologically sorted list of ops which
   # ends in the initializer. We copy those to the outermost graph and
   # build the initialization op there.
   with graph.as_default():
     op_map = {}
+    source_ops = set()
     for s in sources:
+      source_ops.add(s.op)
       op_map[s] = array_ops.placeholder(dtype=s.dtype, shape=s.shape)
     for op in reversed(ops_to_copy):
+      if op in source_ops:
+        continue
       copied_inputs = [op_map[x] for x in op.inputs]
       copied_control_inputs = [op_map[x] for x in op.control_inputs]
       with ops.control_dependencies(copied_control_inputs):
diff --git a/tensorflow/python/eager/wrap_function.py b/tensorflow/python/eager/wrap_function.py
index a3606c0eba..0930b6116d 100644
--- a/tensorflow/python/eager/wrap_function.py
+++ b/tensorflow/python/eager/wrap_function.py
@@ -24,6 +24,7 @@ from tensorflow.python.eager import lift_to_graph
 from tensorflow.python.framework import func_graph
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
@@ -57,9 +58,19 @@ class WrappedFunction(function.Function):
 
   def prune(self, feeds, fetches):
     flat_feeds, flat_fetches = nest.flatten(feeds), nest.flatten(fetches)
-    for f in flat_feeds + flat_fetches:
+    for f in flat_feeds:
       if not isinstance(f, ops.Tensor):
-        raise ValueError("Feeds and fetches must be tensors.")
+        raise ValueError("Feeds must be tensors.")
+    tensor_fetches = []
+    operation_fetches = []
+    for f in flat_fetches:
+      if isinstance(f, ops.Tensor):
+        tensor_fetches.append(f)
+      elif isinstance(f, ops.Operation):
+        operation_fetches.append(f)
+      else:
+        raise ValueError("Fetches must be tensors or operations.")
+    for f in flat_feeds + flat_fetches:
       if f.graph is not self._func_graph:
         raise ValueError(
             "Can only prune function whose feeds and fetches "
@@ -67,17 +78,34 @@ class WrappedFunction(function.Function):
                 self._func_graph, f, f.graph))
     with self._func_graph.as_default():
       pruned_graph = func_graph.FuncGraph("pruned")
-      sink_tensor = array_ops.identity_n(flat_fetches)[0]
+      with ops.control_dependencies(operation_fetches):
+        if tensor_fetches:
+          identity_fetches = array_ops.identity_n(tensor_fetches)
+          sink_tensor = identity_fetches[0]
+        else:
+          identity_fetches = []
+          sink_tensor = control_flow_ops.no_op()
     lift_map = lift_to_graph.lift_to_graph(
         sink_tensor, pruned_graph,
         sources=flat_feeds + self.graph.internal_captures)
-    pruned_graph.outputs.extend(lift_map[x] for x in flat_fetches)
+    for original_fetch, identity_fetch in zip(
+        tensor_fetches, identity_fetches):
+      lift_map[original_fetch] = lift_map[identity_fetch]
+    pruned_graph.outputs.extend(
+        lift_map[x] for x in flat_fetches if isinstance(x, ops.Tensor))
     for external_capture, internal_capture in self.graph.captures.items():
       pruned_graph.captures[external_capture] = lift_map[internal_capture]
     pruned_graph.inputs.extend(lift_map[x] for x in flat_feeds)
     pruned_graph.inputs.extend(pruned_graph.captures.values())
+
+    def _structured_output_mapping(fetched):
+      lifted = lift_map[fetched]
+      if isinstance(lifted, ops.Operation):
+        return None
+      return lifted
+
     pruned_graph.structured_outputs = nest.map_structure(
-        lambda node: lift_map[node], fetches)
+        _structured_output_mapping, fetches)
     pruned_fn = WrappedFunction(
         pruned_graph, variable_holder=self._variable_holder)
     pruned_fn._num_positional_args = len(flat_feeds)  # pylint: disable=protected-access
diff --git a/tensorflow/python/eager/wrap_function_test.py b/tensorflow/python/eager/wrap_function_test.py
index 0c70a1221c..65dd73aafc 100644
--- a/tensorflow/python/eager/wrap_function_test.py
+++ b/tensorflow/python/eager/wrap_function_test.py
@@ -80,7 +80,7 @@ class WrapFunctionTest(test.TestCase):
     f_wrapped = wrap_function.wrap_function(f, [])
     self.assertAllEqual(1.0, f_wrapped())
 
-  def testCaptures(self):
+  def testPruneCaptures(self):
 
     v1 = variables.Variable(2.)
 
@@ -95,6 +95,45 @@ class WrapFunctionTest(test.TestCase):
         fetches=f_wrapped.graph.get_tensor_by_name('fetch:0'))
     self.assertAllEqual(6.0, pruned())
 
+  def testPruneOperations(self):
+
+    v = variables.Variable(0)
+
+    def f():
+      v.assign_add(1, name='increment', read_value=False)
+
+    f_wrapped = wrap_function.wrap_function(f, [])
+    pruned = f_wrapped.prune(
+        feeds=(),
+        fetches=(f_wrapped.graph.get_operation_by_name('increment'),))
+    self.assertEqual((None,), pruned())
+    self.assertEqual(1, self.evaluate(v))
+
+    del f, f_wrapped
+
+    def f1():
+      v.assign_add(
+          array_ops.placeholder(shape=[], dtype=dtypes.int32, name='step'),
+          name='increment', read_value=False)
+      return constant_op.constant(1, name='other')
+
+    f_wrapped = wrap_function.wrap_function(f1, [])
+    increments = f_wrapped.prune(
+        feeds=(f_wrapped.graph.get_tensor_by_name('step:0')),
+        fetches=(f_wrapped.graph.get_operation_by_name('increment'),
+                 f_wrapped.graph.get_tensor_by_name('other:0')))
+    first_output, second_output = increments(constant_op.constant(2))
+    self.assertEqual(['Placeholder:0', 'Placeholder_1:0'],
+                     [t.name for t in increments.inputs])
+    self.assertIs(None, first_output)
+    self.assertEqual(1, second_output.numpy())
+    self.assertEqual(3, v.numpy())
+    does_not_increment = f_wrapped.prune(
+        feeds=(f_wrapped.graph.get_tensor_by_name('step:0')),
+        fetches=f_wrapped.graph.get_tensor_by_name('other:0'))
+    self.assertEqual(1, does_not_increment(constant_op.constant(3)).numpy())
+    self.assertEqual(3, v.numpy())
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
-- 
GitLab


From 58edc1b75c827e4ee835c1e9230ffa4fa61c0cb6 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Tue, 18 Dec 2018 10:58:02 -0800
Subject: [PATCH 748/873] Export connect_to_remote_host as
 tf.config.experimental_connect_to_host

PiperOrigin-RevId: 226024266
---
 tensorflow/contrib/eager/python/BUILD            | 16 ++--------------
 tensorflow/contrib/eager/python/remote_test.py   |  2 +-
 tensorflow/contrib/eager/python/tfe.py           |  2 +-
 tensorflow/python/BUILD                          |  1 +
 tensorflow/python/__init__.py                    |  1 +
 tensorflow/python/eager/BUILD                    | 12 ++++++++++++
 .../eager/python => python/eager}/remote.py      |  2 ++
 .../tools/api/generator/api_init_files.bzl       |  1 +
 .../tools/api/generator/api_init_files_v1.bzl    |  1 +
 .../tools/api/golden/v1/tensorflow.config.pbtxt  |  7 +++++++
 tensorflow/tools/api/golden/v1/tensorflow.pbtxt  |  4 ++++
 .../tools/api/golden/v2/tensorflow.config.pbtxt  |  7 +++++++
 tensorflow/tools/api/golden/v2/tensorflow.pbtxt  |  4 ++++
 13 files changed, 44 insertions(+), 16 deletions(-)
 rename tensorflow/{contrib/eager/python => python/eager}/remote.py (96%)
 create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt

diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD
index 77052a75a7..8966a9befc 100644
--- a/tensorflow/contrib/eager/python/BUILD
+++ b/tensorflow/contrib/eager/python/BUILD
@@ -15,7 +15,6 @@ py_library(
         ":metrics",
         ":network",
         ":parameter_server",
-        ":remote",
         ":saver",
         "//tensorflow/python:framework_ops",
         "//tensorflow/python:framework_test_lib",
@@ -31,6 +30,7 @@ py_library(
         "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:execution_callbacks",
         "//tensorflow/python/eager:function",
+        "//tensorflow/python/eager:remote",
     ],
 )
 
@@ -238,24 +238,12 @@ py_test(
     ],
 )
 
-py_library(
-    name = "remote",
-    srcs = ["remote.py"],
-    srcs_version = "PY2AND3",
-    visibility = ["//tensorflow:internal"],
-    deps = [
-        "//tensorflow/core:protos_all_py",
-        "//tensorflow/python:platform",
-        "//tensorflow/python/eager:context",
-    ],
-)
-
 cuda_py_test(
     name = "remote_test",
     srcs = ["remote_test.py"],
     additional_deps = [
         ":parameter_server",
-        ":remote",
+        "//tensorflow/python/eager:remote",
         "//tensorflow/contrib/eager/python:tfe",
         "//tensorflow/python:array_ops",
         "//tensorflow/python:client",
diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py
index 3926de15e7..f540d9b37b 100644
--- a/tensorflow/contrib/eager/python/remote_test.py
+++ b/tensorflow/contrib/eager/python/remote_test.py
@@ -24,12 +24,12 @@ import os
 import numpy as np
 
 from tensorflow.contrib.eager.python import parameter_server
-from tensorflow.contrib.eager.python import remote
 from tensorflow.core.protobuf import cluster_pb2
 from tensorflow.core.protobuf import tensorflow_server_pb2
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import function
+from tensorflow.python.eager import remote
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py
index 8882a863c3..31481d7685 100644
--- a/tensorflow/contrib/eager/python/tfe.py
+++ b/tensorflow/contrib/eager/python/tfe.py
@@ -99,7 +99,6 @@ from tensorflow.contrib.eager.python.network import Network
 from tensorflow.contrib.eager.python.network import Sequential
 from tensorflow.contrib.eager.python.network import save_network_checkpoint
 from tensorflow.contrib.eager.python.network import restore_network_checkpoint
-from tensorflow.contrib.eager.python.remote import connect_to_remote_host
 from tensorflow.contrib.eager.python.saver import get_optimizer_variables
 from tensorflow.contrib.eager.python.saver import restore_variables_on_create
 from tensorflow.contrib.eager.python.saver import Saver
@@ -127,6 +126,7 @@ from tensorflow.python.eager.execution_callbacks import inf_callback
 from tensorflow.python.eager.execution_callbacks import inf_nan_callback
 from tensorflow.python.eager.execution_callbacks import nan_callback
 from tensorflow.python.eager.execution_callbacks import seterr
+from tensorflow.python.eager.remote import connect_to_remote_host
 from tensorflow.python.framework.tensor_spec import TensorSpec
 from tensorflow.python.framework.ops import enable_eager_execution
 from tensorflow.python.framework.ops import enable_eager_execution_internal as enable_remote_eager_execution
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index 156b4045b5..c76385a3e2 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -150,6 +150,7 @@ py_library(
         "//tensorflow/python/distribute",
         "//tensorflow/python/distribute:estimator_training",
         "//tensorflow/python/eager:def_function",
+        "//tensorflow/python/eager:remote",
         "//tensorflow/python/feature_column:feature_column_py",
         "//tensorflow/python/keras",
         "//tensorflow/python/ops/distributions",
diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py
index b2cc63bd13..9f1e52b42b 100644
--- a/tensorflow/python/__init__.py
+++ b/tensorflow/python/__init__.py
@@ -126,6 +126,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 # Eager execution
 from tensorflow.python.eager.context import executing_eagerly
+from tensorflow.python.eager.remote import connect_to_remote_host
 from tensorflow.python.eager.def_function import function
 from tensorflow.python.framework.ops import enable_eager_execution
 
diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD
index f43cf9327a..cd5c0be283 100644
--- a/tensorflow/python/eager/BUILD
+++ b/tensorflow/python/eager/BUILD
@@ -529,3 +529,15 @@ py_test(
         "//tensorflow/python:framework_ops",
     ],
 )
+
+py_library(
+    name = "remote",
+    srcs = ["remote.py"],
+    srcs_version = "PY2AND3",
+    visibility = ["//tensorflow:internal"],
+    deps = [
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:platform",
+        "//tensorflow/python/eager:context",
+    ],
+)
diff --git a/tensorflow/contrib/eager/python/remote.py b/tensorflow/python/eager/remote.py
similarity index 96%
rename from tensorflow/contrib/eager/python/remote.py
rename to tensorflow/python/eager/remote.py
index b74cf394f6..fdea95fa80 100644
--- a/tensorflow/contrib/eager/python/remote.py
+++ b/tensorflow/python/eager/remote.py
@@ -23,8 +23,10 @@ import os
 from tensorflow.core.protobuf.cluster_pb2 import ClusterDef
 from tensorflow.core.protobuf.tensorflow_server_pb2 import ServerDef
 from tensorflow.python.eager import context
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export("config.experimental_connect_to_host")
 def connect_to_remote_host(remote_host=None, job_name="worker"):
   """Connects to a single machine to enable remote execution on it.
 
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 5fee9c5eaf..a4b1b852ed 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -8,6 +8,7 @@ TENSORFLOW_API_INIT_FILES = [
     "autograph/experimental/__init__.py",
     "bitwise/__init__.py",
     "compat/__init__.py",
+    "config/__init__.py",
     "data/__init__.py",
     "data/experimental/__init__.py",
     "debugging/__init__.py",
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 8d3b86bf26..503de822cc 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -9,6 +9,7 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "autograph/experimental/__init__.py",
     "bitwise/__init__.py",
     "compat/__init__.py",
+    "config/__init__.py",
     "data/__init__.py",
     "data/experimental/__init__.py",
     "debugging/__init__.py",
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt
new file mode 100644
index 0000000000..d7e4529594
--- /dev/null
+++ b/tensorflow/tools/api/golden/v1/tensorflow.config.pbtxt
@@ -0,0 +1,7 @@
+path: "tensorflow.config"
+tf_module {
+  member_method {
+    name: "experimental_connect_to_host"
+    argspec: "args=[\'remote_host\', \'job_name\'], varargs=None, keywords=None, defaults=[\'None\', \'worker\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
index 4ed4deea13..6541952ccf 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt
@@ -320,6 +320,10 @@ tf_module {
     name: "complex64"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "config"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "constant_initializer"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt
new file mode 100644
index 0000000000..d7e4529594
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.config.pbtxt
@@ -0,0 +1,7 @@
+path: "tensorflow.config"
+tf_module {
+  member_method {
+    name: "experimental_connect_to_host"
+    argspec: "args=[\'remote_host\', \'job_name\'], varargs=None, keywords=None, defaults=[\'None\', \'worker\'], "
+  }
+}
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 574b6778fa..0cd525167d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -108,6 +108,10 @@ tf_module {
     name: "complex64"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "config"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "constant_initializer"
     mtype: "<type \'type\'>"
-- 
GitLab


From cfd7c672550187cbcfc70449fff878f33611598b Mon Sep 17 00:00:00 2001
From: Doe Hyun Yoon <dyoon@google.com>
Date: Tue, 18 Dec 2018 10:58:28 -0800
Subject: [PATCH 749/873] Fix data type attr name of Size op in
 graph_properties. Data type attr name is set to T, but it's out_type.

PiperOrigin-RevId: 226024334
---
 tensorflow/core/grappler/costs/graph_properties.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 1df26d94d1..ce1ee75cab 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -1041,7 +1041,7 @@ class SymbolicShapeRefiner {
           // Propagate size value.
           int64 sz = ic->Value(size);
           bool valid = false;
-          if (node.attr().at("T").type() == DT_INT32) {
+          if (node.attr().at("out_type").type() == DT_INT32) {
             if (sz < std::numeric_limits<int32>::max()) {
               const_tensors_to_propagate_.push_back(
                   MakeIntegerScalarTensorProto(DT_INT32, sz));
-- 
GitLab


From 8d47c6cc1468ed3f9c7506473383c196c83d9459 Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Tue, 18 Dec 2018 11:01:01 -0800
Subject: [PATCH 750/873] Export estimator.experimental.stop_if_lower_hook

PiperOrigin-RevId: 226024739
---
 .../api/golden/v1/tensorflow.estimator.experimental.pbtxt     | 4 ++++
 .../api/golden/v2/tensorflow.estimator.experimental.pbtxt     | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt
index f0fd7ce782..741102466d 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt
@@ -36,4 +36,8 @@ tf_module {
     name: "stop_if_higher_hook"
     argspec: "args=[\'estimator\', \'metric_name\', \'threshold\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
   }
+  member_method {
+    name: "stop_if_lower_hook"
+    argspec: "args=[\'estimator\', \'metric_name\', \'threshold\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt
index f0fd7ce782..741102466d 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt
@@ -36,4 +36,8 @@ tf_module {
     name: "stop_if_higher_hook"
     argspec: "args=[\'estimator\', \'metric_name\', \'threshold\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
   }
+  member_method {
+    name: "stop_if_lower_hook"
+    argspec: "args=[\'estimator\', \'metric_name\', \'threshold\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
+  }
 }
-- 
GitLab


From d9d5c09b757da427d73938bdff136d4ef45a095e Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 18 Dec 2018 11:01:56 -0800
Subject: [PATCH 751/873] Add profiling instrumentation to reference kernels

PiperOrigin-RevId: 226024900
---
 .../internal/reference/reference_ops.h        | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h
index b7b9139428..b6a8f3859f 100644
--- a/tensorflow/lite/kernels/internal/reference/reference_ops.h
+++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h
@@ -1662,6 +1662,7 @@ inline void SubWithActivation(const ArithmeticParams& params,
                               const int32* input2_data,
                               const RuntimeShape& output_shape,
                               int32* output_data) {
+  gemmlowp::ScopedProfilingLabel label("SubWithActivation");
   const int flat_size =
       MatchingFlatSize(input1_shape, input2_shape, input2_shape);
   for (int i = 0; i < flat_size; ++i) {
@@ -1693,6 +1694,7 @@ inline void Concatenation(const ConcatenationParams& params,
                           const Scalar* const* input_data,
                           const RuntimeShape& output_shape,
                           Scalar* output_data) {
+  gemmlowp::ScopedProfilingLabel label("Concatenation");
   int axis = params.axis;
   int inputs_count = params.inputs_count;
   const int concat_dimensions = output_shape.DimensionsCount();
@@ -1740,6 +1742,7 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params,
                                      const uint8* const* input_data,
                                      const RuntimeShape& output_shape,
                                      uint8* output_data) {
+  gemmlowp::ScopedProfilingLabel label("ConcatenationWithScaling/Uint8");
   int axis = params.axis;
   const int32* input_zeropoint = params.input_zeropoint;
   const float* input_scale = params.input_scale;
@@ -1801,6 +1804,7 @@ template <typename Scalar>
 void Pack(const PackParams& params, const RuntimeShape* const* input_shapes,
           const Scalar* const* input_data, const RuntimeShape& output_shape,
           Scalar* output_data) {
+  gemmlowp::ScopedProfilingLabel label("Pack");
   const int dimensions = output_shape.DimensionsCount();
   int axis = params.axis;
   int inputs_count = params.inputs_count;
@@ -1828,6 +1832,7 @@ template <typename Scalar>
 void Unpack(const UnpackParams& params, const RuntimeShape& input_shape,
             const Scalar* input_data, const RuntimeShape& output_shape,
             Scalar* const* output_datas) {
+  gemmlowp::ScopedProfilingLabel label("Unpack");
   const int dimensions = input_shape.DimensionsCount();
   const int outputs_count = params.num_split;
 
@@ -1855,6 +1860,7 @@ void PackWithScaling(const PackParams& params,
                      const RuntimeShape* const* input_shapes,
                      const uint8* const* input_data,
                      const RuntimeShape& output_shape, uint8* output_data) {
+  gemmlowp::ScopedProfilingLabel label("PackWithScaling");
   const int dimensions = output_shape.DimensionsCount();
   int axis = params.axis;
   const int32* input_zeropoint = params.input_zeropoint;
@@ -1904,6 +1910,7 @@ void DepthConcatenation(const ConcatenationParams& params,
                         const RuntimeShape* const* input_shapes,
                         const Scalar* const* input_data,
                         const RuntimeShape& output_shape, Scalar* output_data) {
+  gemmlowp::ScopedProfilingLabel label("DepthConcatenation");
   auto params_copy = params;
   params_copy.axis = 3;
   Concatenation(params_copy, input_shapes, input_data, output_shape,
@@ -2305,6 +2312,7 @@ template <typename Scalar>
 void Split(const SplitParams& params, const RuntimeShape& input_shape,
            const Scalar* input_data, const RuntimeShape* const* output_shapes,
            Scalar* const* output_data) {
+  gemmlowp::ScopedProfilingLabel label("Split");
   const int concat_dimensions = input_shape.DimensionsCount();
   int axis = params.axis < 0 ? params.axis + concat_dimensions : params.axis;
   int outputs_count = params.num_split;
@@ -2791,6 +2799,7 @@ log_x_for_x_greater_than_or_equal_to_1(
 inline void LogSoftmax(const SoftmaxParams& params,
                        const RuntimeShape& input_shape, const uint8* input_data,
                        const RuntimeShape& output_shape, uint8* output_data) {
+  gemmlowp::ScopedProfilingLabel label("LogSoftmax/8bit");
   const int32 input_multiplier = params.input_multiplier;
   const int32 input_left_shift = params.input_left_shift;
   const int32 reverse_scaling_divisor = params.reverse_scaling_divisor;
@@ -3056,6 +3065,7 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape,
 inline void Dequantize(const tflite::DequantizationParams& op_params,
                        const RuntimeShape& input_shape, const uint8* input_data,
                        const RuntimeShape& output_shape, float* output_data) {
+  gemmlowp::ScopedProfilingLabel label("Dequantize");
   int32 zero_point = op_params.zero_point;
   double scale = op_params.scale;
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
@@ -3070,6 +3080,7 @@ inline void Dequantize(const tflite::DequantizationParams& op_params,
 inline void FakeQuant(const tflite::FakeQuantParams& op_params,
                       const RuntimeShape& input_shape, const float* input_data,
                       const RuntimeShape& output_shape, float* output_data) {
+  gemmlowp::ScopedProfilingLabel label("FakeQuant");
   float rmin = op_params.minmax.min;
   float rmax = op_params.minmax.max;
   int num_bits = op_params.num_bits;
@@ -3116,6 +3127,7 @@ inline void Gather(const tflite::GatherParams& op_params,
                    const RuntimeShape& input_shape, const T* input_data,
                    const RuntimeShape& coords_shape, const CoordsT* coords_data,
                    const RuntimeShape& output_shape, T* output_data) {
+  gemmlowp::ScopedProfilingLabel label("Gather");
   int axis = op_params.axis;
   if (axis < 0) {
     axis += input_shape.DimensionsCount();
@@ -3219,6 +3231,7 @@ inline void SpaceToBatchND(
     const RuntimeShape& unextended_input2_shape, const int32* block_shape_data,
     const RuntimeShape& unextended_input3_shape, const int32* paddings_data,
     const RuntimeShape& unextended_output_shape, T* output_data) {
+  gemmlowp::ScopedProfilingLabel label("SpaceToBatchND");
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
   const RuntimeShape input1_shape =
@@ -3276,6 +3289,7 @@ inline void BatchToSpaceND(
     const RuntimeShape& unextended_input2_shape, const int32* block_shape_data,
     const RuntimeShape& unextended_input3_shape, const int32* crops_data,
     const RuntimeShape& unextended_output_shape, T* output_data) {
+  gemmlowp::ScopedProfilingLabel label("BatchToSpaceND");
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
   const RuntimeShape input1_shape =
@@ -3549,6 +3563,7 @@ inline void Slice(const tflite::SliceParams& op_params,
 template <typename T>
 inline void Exp(const T* input_data, const size_t num_elements,
                 T* output_data) {
+  gemmlowp::ScopedProfilingLabel label("Exp");
   for (size_t idx = 0; idx < num_elements; ++idx) {
     output_data[idx] = exp(input_data[idx]);
   }
@@ -3679,6 +3694,7 @@ inline bool Mean(const T* input_data, const int* input_dims,
                  const int* output_dims, const int output_num_dims,
                  const int* axis, const int num_axis_dimensions, bool keep_dims,
                  int* temp_index, int* resolved_axis, U* temp_sum) {
+  gemmlowp::ScopedProfilingLabel label("Mean");
   // Reset output data.
   size_t num_outputs = 1;
   for (int idx = 0; idx < output_num_dims; ++idx) {
@@ -3732,7 +3748,7 @@ inline void Mean(const tflite::MeanParams& op_params,
                  const RuntimeShape& unextended_input_shape,
                  const T* input_data,
                  const RuntimeShape& unextended_output_shape, T* output_data) {
-  gemmlowp::ScopedProfilingLabel label("Mean");
+  gemmlowp::ScopedProfilingLabel label("Mean4D");
 
   // Current implementation only supports dimension equals 4 and simultaneous
   // reduction over width and height.
@@ -3784,6 +3800,8 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
                                const int num_axis_dimensions, bool keep_dims,
                                int* temp_index, int* resolved_axis, U* temp_sum,
                                bool compute_sum) {
+  gemmlowp::ScopedProfilingLabel label(compute_sum ? "Sum/Uint8"
+                                                   : "Mean/Uint8");
   // Reset output data.
   size_t num_outputs = 1;
   for (int idx = 0; idx < output_num_dims; ++idx) {
@@ -3899,6 +3917,7 @@ void MaximumMinimumBroadcast4DSlow(const RuntimeShape& unextended_input1_shape,
                                    const T* input2_data,
                                    const RuntimeShape& unextended_output_shape,
                                    T* output_data, Op op) {
+  gemmlowp::ScopedProfilingLabel label("MaximumMinimumBroadcast4DSlow");
   TFLITE_DCHECK_LE(unextended_input1_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_input2_shape.DimensionsCount(), 4);
   TFLITE_DCHECK_LE(unextended_output_shape.DimensionsCount(), 4);
@@ -3930,6 +3949,7 @@ template <typename T1, typename T2, typename T3, typename Cmp>
 void ArgMinMax(const RuntimeShape& input1_shape, const T1* input1_data,
                const T3* input2_data, const RuntimeShape& output_shape,
                T2* output_data, const Cmp& cmp) {
+  gemmlowp::ScopedProfilingLabel label("ArgMinMax");
   // For ArgMax, the number of output dimensions = (number of input dimensions -
   // 1). For the sake of simplicity, the output dimensions are equal to the
   // input dimensions here. We enforce the constraint that the axis dimension
-- 
GitLab


From 507b30a43ad8c5fee7a21c9abcb0c99fc183ec83 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 11:17:12 -0800
Subject: [PATCH 752/873] Test to validate XLA dequantize OP is inverse of
 tf.quantize.

PiperOrigin-RevId: 226027695
---
 tensorflow/compiler/tests/BUILD               |  9 ++-
 .../compiler/tests/quantized_ops_test.py      | 55 +++++++++++++++++
 tensorflow/compiler/tf2xla/kernels/BUILD      |  2 +
 .../tf2xla/kernels/xla_dequantize_op.cc       | 60 +++++++++++++++++++
 tensorflow/compiler/tf2xla/ops/xla_ops.cc     | 24 ++++++++
 tensorflow/compiler/tf2xla/python/xla.py      |  1 +
 6 files changed, 150 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/compiler/tf2xla/kernels/xla_dequantize_op.cc

diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD
index 093b61629c..fa02cf9cbe 100644
--- a/tensorflow/compiler/tests/BUILD
+++ b/tensorflow/compiler/tests/BUILD
@@ -1188,11 +1188,18 @@ tf_xla_py_test(
 
 tf_xla_py_test(
     name = "quantized_ops_test",
-    size = "small",
+    size = "medium",
     srcs = ["quantized_ops_test.py"],
+    disabled_backends = [
+        "cpu",
+        "cpu_ondemand",
+    ],
     deps = [
         ":xla_test",
+        "//tensorflow/compiler/tf2xla/python:xla",
         "//tensorflow/python:array_ops",
+        "//tensorflow/python:bitwise_ops",
+        "//tensorflow/python:constant_op",
         "//tensorflow/python:dtypes",
         "//tensorflow/python:math_ops",
         "//tensorflow/python:platform_test",
diff --git a/tensorflow/compiler/tests/quantized_ops_test.py b/tensorflow/compiler/tests/quantized_ops_test.py
index 80c338513b..cd9b728ab3 100644
--- a/tensorflow/compiler/tests/quantized_ops_test.py
+++ b/tensorflow/compiler/tests/quantized_ops_test.py
@@ -18,11 +18,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import math
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
+from tensorflow.compiler.tf2xla.python import xla
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import bitwise_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import googletest
 
@@ -44,5 +49,55 @@ class QuantizedOpsTest(xla_test.XLATestCase):
         self.assertAllEqual(value, expected)
 
 
+class DeuantizedOpsTest(xla_test.XLATestCase):
+
+  def pack_uint8_r2_to_uint32(self, test_input):
+    num_rows, num_columns = test_input.get_shape().as_list()
+    num_output_columns = int(math.ceil(num_columns / 4.0))
+    padding_input = array_ops.pad(
+        math_ops.cast(test_input, dtype=dtypes.uint8),
+        constant_op.constant([[
+            0,
+            0,
+        ], [0, num_output_columns * 4 - num_columns]]))
+    output = array_ops.zeros([num_rows, num_output_columns],
+                             dtype=dtypes.uint32)
+    num_elements_per_pack = 4
+    shift_bits = 8
+
+    iota_r1 = math_ops.range(num_output_columns * num_elements_per_pack)
+
+    for p in range(num_elements_per_pack):
+      selected_index = math_ops.equal(
+          math_ops.mod(iota_r1, num_elements_per_pack), p)
+      gather_index = array_ops.boolean_mask(iota_r1, selected_index)
+      gathered_input = array_ops.gather(padding_input, gather_index, axis=1)
+      total_shift_bits = shift_bits * (num_elements_per_pack - p - 1)
+      left_shift_input = bitwise_ops.left_shift(
+          math_ops.cast(gathered_input, dtype=dtypes.uint32), total_shift_bits)
+      output = bitwise_ops.bitwise_or(output, left_shift_input)
+    return output
+
+  def testDequantizeQuint8(self):
+    num_rows = 100
+    num_columns = 3547
+    random_input = np.random.normal(128.0, 10.0, [num_rows, num_columns])
+    with self.cached_session() as session:
+      with ops.device("CPU"):
+        test_input = ops.convert_to_tensor(random_input, dtype=dtypes.float32)
+        transposed_input = array_ops.transpose(test_input, [1, 0])
+        quantized_input = array_ops.quantize(transposed_input, 0.0, 255.0,
+                                             dtypes.quint8)
+        packed_input = self.pack_uint8_r2_to_uint32(quantized_input.output)
+      with self.test_scope():
+        transposed_quantized_output = xla.dequantize(packed_input, 0.0, 255.0,
+                                                     "MIN_COMBINED", True)
+        quantized_output = array_ops.slice(transposed_quantized_output, [0, 0],
+                                           [num_rows, num_columns])
+
+    value = session.run(quantized_output)
+    self.assertAllClose(value, random_input, 1.0)
+
+
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD
index a18a4e92d6..47209d285f 100644
--- a/tensorflow/compiler/tf2xla/kernels/BUILD
+++ b/tensorflow/compiler/tf2xla/kernels/BUILD
@@ -101,6 +101,7 @@ tf_kernel_library(
         "variable_ops.cc",
         "xla_broadcast_helper_op.cc",
         "xla_conv_op.cc",
+        "xla_dequantize_op.cc",
         "xla_dot_op.cc",
         "xla_pad_op.cc",
         "xla_reduce_op.cc",
@@ -140,6 +141,7 @@ tf_kernel_library(
         "//tensorflow/compiler/xla/client/lib:pooling",
         "//tensorflow/compiler/xla/client/lib:prng",
         "//tensorflow/compiler/xla/client/lib:qr",
+        "//tensorflow/compiler/xla/client/lib:quantize",
         "//tensorflow/compiler/xla/client/lib:sorting",
         "//tensorflow/compiler/xla/client/lib:triangular_solve",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/tf2xla/kernels/xla_dequantize_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_dequantize_op.cc
new file mode 100644
index 0000000000..a30b4861f6
--- /dev/null
+++ b/tensorflow/compiler/tf2xla/kernels/xla_dequantize_op.cc
@@ -0,0 +1,60 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/tf2xla/shape_util.h"
+#include "tensorflow/compiler/tf2xla/xla_compiler.h"
+#include "tensorflow/compiler/tf2xla/xla_op_kernel.h"
+#include "tensorflow/compiler/tf2xla/xla_op_registry.h"
+#include "tensorflow/compiler/xla/client/lib/quantize.h"
+#include "tensorflow/compiler/xla/client/xla_builder.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/framework/op_kernel.h"
+
+namespace tensorflow {
+namespace {
+
+class XlaDequantizeOp : public XlaOpKernel {
+ public:
+  explicit XlaDequantizeOp(OpKernelConstruction* context)
+      : XlaOpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("min_range", &min_range_));
+    OP_REQUIRES_OK(context, context->GetAttr("max_range", &max_range_));
+    OP_REQUIRES_OK(context, context->GetAttr("mode", &mode_));
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("transpose_output", &transpose_output_));
+  }
+
+  void Compile(XlaOpKernelContext* context) override {
+    const xla::XlaOp& input = context->Input(0);
+
+    xla::QuantizedRange range(min_range_, max_range_);
+
+    xla::XlaOp output =
+        xla::Dequantize<uint8>(input, range, mode_, transpose_output_);
+    context->SetOutput(0, output);
+  }
+
+ private:
+  float min_range_;
+  float max_range_;
+  bool transpose_output_;
+  string mode_;
+  TF_DISALLOW_COPY_AND_ASSIGN(XlaDequantizeOp);
+};
+
+REGISTER_XLA_OP(Name("XlaDequantize"), XlaDequantizeOp);
+
+}  // namespace
+}  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
index bd2c0a5ee8..ab77984684 100644
--- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc
+++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc
@@ -409,5 +409,29 @@ body: A function that takes a list of tensors and returns another
       list of tensors. Both lists have the same types as specified by T.
 )doc");
 
+REGISTER_OP("XlaDequantize")
+    .Input("input: uint32")
+    .Output("output: bfloat16")
+    .Attr("min_range: float")
+    .Attr("max_range: float")
+    .Attr("mode: string")
+    .Attr("transpose_output: bool")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Doc(R"doc(
+Takes the packed uint32 input and unpacks the input to uint8 to do
+Dequantization on deivce.
+
+input: Input tensors whose types is uint32, shape is [d0, ..., dn].
+output: Output tensors whose types is bloat16. If transpose_output is true,
+     output shape is [dn * 4, dn-1, ..., d1, d0]. If transpose_output
+     is false, output shape is [d0,..., dn * 4].
+min_range: The minimum scalar value possibly produced for the input.
+max_range: The maximum scalar value possibly produced for the input.
+mode: String to determine the dequantize mode in {"MIN_COMBINED", "MIN_FIRST", "SCALED"}.
+transpose_output: Boolean to determine if output is transposed. transpose_output
+     is faster when input is large and rank of input is higher than 1.
+)doc");
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py
index 147e562658..345193c936 100644
--- a/tensorflow/compiler/tf2xla/python/xla.py
+++ b/tensorflow/compiler/tf2xla/python/xla.py
@@ -386,3 +386,4 @@ def slice(x, start_dims, limit_dims, strides):
 sort = gen_xla_ops.xla_sort
 key_value_sort = gen_xla_ops.xla_key_value_sort
 while_loop = gen_xla_ops.xla_while
+dequantize = gen_xla_ops.xla_dequantize
-- 
GitLab


From 35c775e50349d3e415962779ed70aefe04d00552 Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Tue, 18 Dec 2018 11:25:08 -0800
Subject: [PATCH 753/873] Add c_test, a small program that is compiled in
 pure-C11 mode

The purpose of this test program is to ensure that the C API is actually
usable from pure-C translation units.

PiperOrigin-RevId: 226029023
---
 tensorflow/c/BUILD    | 14 ++++++--
 tensorflow/c/c_test.c | 79 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow/c/c_test.c

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 9f00cc5de4..9d267e9e59 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -189,14 +189,12 @@ tf_cuda_library(
             ":c_api",
             ":tf_status_helper",
             "//tensorflow/core:android_tensorflow_lib_lite",
-            "//tensorflow/core:platform_env",
             "//tensorflow/core:lib",
         ],
         "//conditions:default": [
             ":c_api",
             ":tf_status_helper",
             "//tensorflow/core:framework",
-            "//tensorflow/core:platform_env",
             "//tensorflow/core:lib",
         ],
     }) + [":c_api_internal"],
@@ -250,6 +248,18 @@ tf_cuda_library(
     ],
 )
 
+tf_cc_test(
+    name = "c_test",
+    srcs = ["c_test.c"],
+    extra_copts = ["-std=c11"],
+    deps = [
+        ":c_api",
+        ":c_api_experimental",
+        ":env",
+        ":kernels",
+    ],
+)
+
 tf_cuda_cc_test(
     name = "c_api_test",
     size = "small",
diff --git a/tensorflow/c/c_test.c b/tensorflow/c/c_test.c
new file mode 100644
index 0000000000..c0ed5ccd15
--- /dev/null
+++ b/tensorflow/c/c_test.c
@@ -0,0 +1,79 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <limits.h>
+#include <malloc.h>
+#include <memory.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/c_api_experimental.h"
+#include "tensorflow/c/env.h"
+#include "tensorflow/c/kernels.h"
+
+// A compute function. This will never actually get called in this test, it's
+// just nice to know that it compiles.
+void compute(void* kernel, TF_OpKernelContext* ctx) {
+  TF_Tensor* input;
+  TF_Status* s = TF_NewStatus();
+  TF_GetInput(ctx, 0, &input, s);
+  TF_DeleteTensor(input);
+  TF_DeleteStatus(s);
+}
+
+// Exercises tensorflow's C API.
+int main(int argc, char** argv) {
+  TF_InitMain(argv[0], &argc, &argv);
+
+  struct TF_StringStream* s = TF_GetLocalTempDirectories();
+  const char* path;
+
+  if (!TF_StringStreamNext(s, &path)) {
+    fprintf(stderr, "TF_GetLocalTempDirectories returned no results\n");
+    return 1;
+  }
+
+  char file_name[100];
+  struct timeval t;
+  if (gettimeofday(&t, NULL)) {
+    perror("gettimeofday failed");
+    return 1;
+  }
+  snprintf(file_name, sizeof(file_name), "test-%d-%ld.txt", getpid(), t.tv_sec);
+
+  size_t length = 2 + strlen(path) + strlen(file_name);
+  char* full_path = malloc(length);
+  snprintf(full_path, length, "%s/%s", path, file_name);
+
+  TF_WritableFileHandle* h;
+  TF_Status* status = TF_NewStatus();
+  TF_NewWritableFile(full_path, &h, status);
+  if (TF_GetCode(status) != TF_OK) {
+    fprintf(stderr, "TF_NewWritableFile failed: %s\n", TF_Message(status));
+    return 1;
+  }
+  fprintf(stderr, "wrote %s\n", full_path);
+  free(full_path);
+  TF_StringStreamDone(s);
+
+  TF_KernelBuilder* b =
+      TF_NewKernelBuilder("SomeOp", "SomeDevice", NULL, &compute, NULL);
+  TF_RegisterKernelBuilder("someKernel", b, status);
+
+  TF_DeleteStatus(status);
+  return 0;
+}
-- 
GitLab


From d9e2eb00a97f68d2384c8460a8781b8af5612fd1 Mon Sep 17 00:00:00 2001
From: Rachel Lim <rachelim@google.com>
Date: Tue, 18 Dec 2018 11:30:52 -0800
Subject: [PATCH 754/873] [tf.data] Limit the number of elements in the shuffle
 buffer to restrict memory consumption

PiperOrigin-RevId: 226030086
---
 tensorflow/core/kernels/data/shuffle_dataset_op.cc | 10 ++++++++++
 .../kernel_tests/shuffle_and_repeat_test.py        | 14 ++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
index 7134793e26..db0cc6fa4d 100644
--- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc
+++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc
@@ -31,6 +31,8 @@ namespace {
 
 const int64 kLogIntervalMicros = 10 * 1000000;  // 10 seconds.
 
+const int64 kMaxEpochsInBuffer = 3;
+
 // See documentation in ../../ops/dataset_ops.cc for a high-level
 // description of the following op.
 
@@ -135,6 +137,14 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
           } else {
             input_impl_.reset();
           }
+          if (slices_.size() > kMaxEpochsInBuffer) {
+            // When the elements stored in `buffer_` span more than
+            // `kMaxEpochsInBuffer` epochs, we do not fill the buffer further to
+            // conserve memory. This means that the upper bound on the size of
+            // `buffer_` is `kMaxEpochsInBuffer * cardinality(input_dataset) +
+            // 1`.
+            break;
+          }
         }
         if (num_log_entries > 0) {
           LOG(INFO) << "Shuffle buffer filled.";
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index 110966a5a0..92ae528b94 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -108,6 +108,20 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     get_next = self.getNext(ds)
     self.evaluate(get_next())
 
+  def testVeryLargeBufferSize(self):
+    num_epochs = 1000 * 1000
+    # Each element being shuffled and repeated has shape (100,). This will OOM
+    # or timeout if we actually load everything into the buffer.
+    ds = dataset_ops.Dataset.range(500).batch(100).apply(
+        shuffle_ops.shuffle_and_repeat(
+            buffer_size=5 * num_epochs, count=num_epochs))
+    # Verify two epochs worth of output.
+    output = self._gen_outputs(lambda: ds, 2 * 5, verify_exhausted=False)
+    for i in range(2):
+      sorted_epoch = sorted(
+          output[i * 5:(i + 1) * 5], key=lambda batch: batch[0])
+      self.assertAllEqual(sorted_epoch, np.arange(500).reshape([5, 100]))
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From 8871d29c8f21317b5a421c4b143790c142f01d17 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Tue, 18 Dec 2018 11:32:27 -0800
Subject: [PATCH 755/873] Make error messages less noisy.

PiperOrigin-RevId: 226030405
---
 .../core/grappler/optimizers/graph_optimizer_stage.h      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h
index 99fcb31523..19dc2c8ad9 100644
--- a/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h
+++ b/tensorflow/core/grappler/optimizers/graph_optimizer_stage.h
@@ -238,10 +238,10 @@ class GraphOptimizerStagePipeline {
         // Each stage must be "error safe" (just like exception safe). In
         // case of any error it must leave optimized graph unmodified.
         if (!stage_status.ok()) {
-          LOG(WARNING) << "Failed to run optimizer " << stage->optimizer_name()
-                       << ", stage " << stage->stage_name() << " node "
-                       << node->name()
-                       << ". Error: " << stage_status.error_message();
+          VLOG(2) << "Failed to run optimizer " << stage->optimizer_name()
+                  << ", stage " << stage->stage_name() << " node "
+                  << node->name()
+                  << ". Error: " << stage_status.error_message();
         }
         if (break_predicate_(*result)) return true;
       }
-- 
GitLab


From 6c4d8aa56feb053ba62526c9d4f3837882f531e7 Mon Sep 17 00:00:00 2001
From: Doe Hyun Yoon <dyoon@google.com>
Date: Tue, 18 Dec 2018 11:37:52 -0800
Subject: [PATCH 756/873] Static shape inference sometimes need values, in
 addition to shapes, for some ops. Though there're many supplemental logic to
 infer output values, but it's still not complete.

This CL adds a fallback mechanism:
If we know all the input values, but output values are not inferred,
we evaluate the node (i.e., execute single node) to infer the output values.

Currently, it's for only white listed ops (mostly simple arithmetic ops, and
StridedSlice) and small tensors (# elements < 17).

Also, to be conservative, it's used only if aggressive_shape_inference option is
enabled in InferStatically(). It's off by default, and currently no one calls
InferStatically() with that option; it'll be enabled in analytical_cost_estimator later.

PiperOrigin-RevId: 226031429
---
 tensorflow/core/grappler/costs/BUILD          |   1 +
 .../core/grappler/costs/graph_properties.cc   | 270 +++++++++++++++++-
 .../core/grappler/costs/graph_properties.h    |  11 +-
 .../grappler/costs/graph_properties_test.cc   |  86 +++++-
 4 files changed, 362 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD
index f8af1232f7..15dc7074b9 100644
--- a/tensorflow/core/grappler/costs/BUILD
+++ b/tensorflow/core/grappler/costs/BUILD
@@ -54,6 +54,7 @@ cc_library(
         "//tensorflow/core/grappler:grappler_item",
         "//tensorflow/core/grappler:utils",
         "//tensorflow/core/grappler/clusters:cluster",
+        "//tensorflow/core/grappler/optimizers:evaluation_utils",
     ] + tf_protos_grappler(),
 )
 
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index ce1ee75cab..d699979896 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.pb.h"
 #include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/framework/versions.pb.h"
 #include "tensorflow/core/graph/graph_constructor.h"
@@ -33,13 +34,18 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/core/grappler/op_types.h"
+#include "tensorflow/core/grappler/optimizers/evaluation_utils.h"
 #include "tensorflow/core/grappler/utils.h"
 #include "tensorflow/core/grappler/utils/functions.h"
 #include "tensorflow/core/grappler/utils/topological_sort.h"
+#include "tensorflow/core/lib/gtl/cleanup.h"
+#include "tensorflow/core/lib/gtl/flatset.h"
 #include "tensorflow/core/lib/strings/str_util.h"
 
 namespace tensorflow {
 namespace grappler {
+using TensorVector = gtl::InlinedVector<TensorValue, 4>;
+
 namespace {
 
 using shape_inference::DimensionHandle;
@@ -446,6 +452,54 @@ class TopoQueue {
   std::set<NodeAndId, OrderByIdAscending> queue_;
 };
 
+bool IsWhiteListedOpTypeForEvaluateNode(const string& op_type) {
+  static const gtl::FlatSet<string>* const kOpTpeWhitelist =
+      CHECK_NOTNULL((new gtl::FlatSet<string>{
+          // Unary arithmetic ops
+          "Floor",
+          "Round",
+          "Sqrt",
+          "Square",
+          "Sign",
+          // Binary arithmetic ops
+          "Add",
+          "Div",
+          "FloorDiv",
+          "FloorMod",
+          "Greater",
+          "GreaterEqual",
+          "Less",
+          "LessEqual",
+          "LogicalAnd",
+          "LogicalNot",
+          "LogicalOr",
+          "Maximum",
+          "Minimum",
+          "Mod",
+          "Mul",
+          "NotEqual",
+          "QuantizedAdd",
+          "QuantizedMul",
+          "SquareDifference",
+          "Sub",
+          "TruncateDiv",
+          "TruncateMod",
+          "RealDiv",
+          // N-ary arithemtic ops
+          "AddN",
+          // Others
+          "StridedSlice",
+          "OnesLike",
+          "ZerosLike",
+          "Concat",
+          "ConcatV2",
+          "Split",
+          "Range",
+          "Fill",
+      }));
+  return kOpTpeWhitelist->find(op_type) != kOpTpeWhitelist->end();
+}
+
 // Processes symbolic shapes.
 // Each symbolic shape or dimension is represented by a handle. Unlike the TF
 // shape refiner which creates new handles every time it processes an unknown
@@ -455,10 +509,12 @@ class SymbolicShapeRefiner {
  public:
   explicit SymbolicShapeRefiner(
       const GraphView& graph,
-      const std::unordered_map<string, std::unordered_set<int>>& fed_ports)
+      const std::unordered_map<string, std::unordered_set<int>>& fed_ports,
+      const bool aggressive_shape_inference)
       : graph_(graph),
         function_library_(OpRegistry::Global(), graph.graph()->library()),
-        fed_ports_(fed_ports) {
+        fed_ports_(fed_ports),
+        aggressive_shape_inference_(aggressive_shape_inference) {
     graph_def_version_ = graph.graph()->versions().producer();
     node_to_context_.reserve(graph.graph()->node_size());
   }
@@ -1011,6 +1067,193 @@ class SymbolicShapeRefiner {
     return dim;
   }
 
+  // Returns true if all the output tensors have known values.
+  bool AllOutputValuesKnown(NodeContext* c) {
+    InferenceContext* ic = c->inference_context.get();
+    if (c->output_tensors_as_shapes.size() < ic->num_outputs() &&
+        c->output_tensor_protos.size() < ic->num_outputs()) {
+      return false;
+    } else {
+      for (int i = 0; i < ic->num_outputs(); i++) {
+        if (c->output_tensor_protos.size() <= i ||
+            c->output_tensor_protos[i] == nullptr) {
+          return false;
+        }
+        if (c->output_tensors_as_shapes.size() <= i ||
+            !ic->FullyDefined(c->output_tensors_as_shapes[i])) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  // Returns true if we can infer output tensors' values -- we know values of
+  // all the input tensors.
+  bool AllInputValuesKnown(NodeContext* c) {
+    InferenceContext* ic = c->inference_context.get();
+
+    // Check inputs are fully defined and values are known.
+    for (int i = 0; i < ic->num_inputs(); i++) {
+      const Tensor* tensor = ic->input_tensor(i);
+      // Note that we don't check c->input_tensor_protos[i], as UpdateNode()
+      // already converted it to ic->input_tensor(i);
+      const ShapeHandle& input_tensors_as_shape =
+          ic->input_tensors_as_shapes()[i];
+      // Either input_tensor is valid or input_tensors_as_shape, which has
+      // value of input tensors as shape format, should be fully defined.
+      if (tensor == nullptr && !ic->FullyDefined(input_tensors_as_shape)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Returns true if we want to update output values with running EvaluateNode()
+  // for this op, based on op type, data type, and size.
+  bool ShouldUpdateOutputValues(NodeContext* c, int64 max_size) {
+    InferenceContext* ic = c->inference_context.get();
+
+    // Due to the cost of running EvaluateNode(), we limit only to white listed
+    // op types.
+    if (!IsWhiteListedOpTypeForEvaluateNode(c->op_data->op_def.name())) {
+      return false;
+    }
+
+    // Check input dtypes are integer.
+    for (const auto& input_type : c->input_types) {
+      if (input_type != DT_INT32 && input_type != DT_INT64) {
+        return false;
+      }
+    }
+
+    // Check output dtypes are integer.
+    for (const auto& output_type : c->output_types) {
+      if (output_type != DT_INT32 && output_type != DT_INT64) {
+        return false;
+      }
+    }
+
+    // Check if the number of elements of each of input tensor is no larger than
+    // the given max size.
+    for (int i = 0; i < ic->num_inputs(); i++) {
+      const Tensor* tensor = ic->input_tensor(i);
+      const ShapeHandle& input_shape_handle = ic->input(i);
+      if (tensor != nullptr) {
+        if (tensor->NumElements() > max_size) {
+          return false;
+        }
+      } else if (ic->Value(ic->NumElements(input_shape_handle)) > max_size) {
+        return false;
+      }
+    }
+
+    // Check if we know the shape of each output tensor, and the number of
+    // elements is larger than the given max size.
+    for (int i = 0; i < ic->num_outputs(); i++) {
+      const ShapeHandle& shape_handle = ic->output(i);
+      if (!ic->FullyDefined(shape_handle) &&
+          ic->Value(ic->NumElements(shape_handle)) > max_size) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // Create input tensors from the NodeConext.
+  void CreateInputTensors(NodeContext* c,
+                          std::vector<Tensor>* input_tensor_vector,
+                          TensorVector* inputs) {
+    InferenceContext* ic = c->inference_context.get();
+    for (int i = 0; i < ic->num_inputs(); i++) {
+      if (ic->input_tensor(i)) {
+        input_tensor_vector->at(i) = *ic->input_tensor(i);
+        inputs->emplace_back(&input_tensor_vector->at(i));
+        // Note that we don't check c->input_tensor_protos[i], as UpdateNode()
+        // already converted it to ic->input_tensor(i);
+      } else {
+        // Create Tensor from input_tensors_as_shapes, and then emplace it
+        // back to inputs.
+        // Note that input_tensors_as_shapes is scalar or vector.
+        const ShapeHandle& shape_handle = ic->input_tensors_as_shapes()[i];
+        const DataType& data_type = c->input_types[i];
+        int32 rank = ic->Rank(shape_handle);
+        if (rank < 1) {
+          input_tensor_vector->emplace_back(Tensor(data_type, {}));
+        } else {
+          input_tensor_vector->emplace_back(Tensor(data_type, {rank}));
+        }
+        auto* tensor = &input_tensor_vector->back();
+        if (data_type == DT_INT32) {
+          auto flat = tensor->flat<int32>();
+          for (int j = 0; j < rank; j++) {
+            int32 dim = ic->Value(ic->Dim(shape_handle, j));
+            flat(j) = dim;
+          }
+        } else {
+          auto flat = tensor->flat<int64>();
+          for (int j = 0; j < rank; j++) {
+            int64 dim = ic->Value(ic->Dim(shape_handle, j));
+            flat(j) = dim;
+          }
+        }
+        inputs->emplace_back(tensor);
+      }
+    }
+  }
+
+  // Run a node to infer output values, and add it to the NodeContext.
+  Status UpdateOutputValues(const NodeDef& node, NodeContext* c) {
+    InferenceContext* ic = c->inference_context.get();
+
+    // Input to EvaluateNode()
+    TensorVector inputs;
+    // Container for temporaily created tensor object.
+    std::vector<Tensor> input_tensor_vector(ic->num_inputs());
+    CreateInputTensors(c, &input_tensor_vector, &inputs);
+
+    // Output for EvaluateNode() and output tensor clean up object.
+    TensorVector outputs;
+    auto outputs_cleanup = gtl::MakeCleanup([&outputs] {
+      for (const auto& output : outputs) {
+        if (output.tensor) {
+          delete output.tensor;
+        }
+      }
+    });
+
+    TF_RETURN_IF_ERROR(EvaluateNode(node, inputs, /*cpu_device=*/nullptr,
+                                    &resource_mgr_, &outputs));
+    c->output_tensors_as_shapes.resize(outputs.size());
+    c->output_tensor_protos.resize(outputs.size(), nullptr);
+    for (int k = 0; k < outputs.size(); k++) {
+      const auto& t = outputs[k];
+      // Override output shape.
+      ShapeHandle output_shape;
+      TF_RETURN_IF_ERROR(
+          ic->MakeShapeFromTensorShape(t->shape(), &output_shape));
+      if (ic->FullyDefined(ic->output(k)) &&
+          !EquivalentShapes(ic->output(k), output_shape)) {
+        LOG(WARNING) << "UpdateOutputValues() -- node: " << node.name()
+                     << ", inferred output shape "
+                     << "doesn't match for k=" << k << ": "
+                     << "ic->output(k): " << ic->DebugString(ic->output(k))
+                     << ", output_shape: " << ic->DebugString(output_shape)
+                     << " -- " << node.DebugString();
+      }
+      ic->set_output(k, output_shape);
+      // Set output_tensors_as_shape.
+      MaybeTensorValueToShape(ic, *t.tensor, &c->output_tensors_as_shapes[k]);
+
+      // Set output_tensor_protos.
+      TensorProto tensor_proto;
+      t->AsProtoTensorContent(&tensor_proto);
+      const_tensors_to_propagate_.push_back(tensor_proto);
+      c->output_tensor_protos[k] = &const_tensors_to_propagate_.back();
+    }
+    return Status::OK();
+  }
+
   Status MaybeUpdateNodeContextOutput(const NodeDef& node, const bool is_fed,
                                       NodeContext* c) {
     // Propagate tensors and shape tensors unless the node is fed.
@@ -1201,6 +1444,19 @@ class SymbolicShapeRefiner {
         }
       }
     }
+
+    if (aggressive_shape_inference_) {
+      // Update output tensor values using EvaluateNode() if we can.
+      // Due to the cost of EvaluateNode(), we run it only for certain op types
+      // (white listed) and small integer tensors.
+
+      const int max_elelment_size = 17;  // Max up to 4x4 matrix or similar.
+      if (AllOutputValuesKnown(c) || !AllInputValuesKnown(c) ||
+          !ShouldUpdateOutputValues(c, max_elelment_size)) {
+        return Status::OK();
+      }
+      UpdateOutputValues(node, c).IgnoreError();  // This is optional.
+    }
     return Status::OK();
   }
 
@@ -1327,6 +1583,10 @@ class SymbolicShapeRefiner {
   // may resize and copy the objects into a new buffer, then the existing
   // pointers become dangling pointers.
   std::list<TensorProto> const_tensors_to_propagate_;
+
+  // For more aggressive shape and value inference.
+  bool aggressive_shape_inference_;
+  ResourceMgr resource_mgr_;
 };
 
 // Keep track of shapes and dimensions in a graph.
@@ -1650,7 +1910,8 @@ Status GraphProperties::UpdateEnqueue(
   return Status::OK();
 }
 
-Status GraphProperties::InferStatically(bool assume_valid_feeds) {
+Status GraphProperties::InferStatically(bool assume_valid_feeds,
+                                        bool aggressive_shape_inference) {
   FunctionLibraryDefinition function_library(OpRegistry::Global(),
                                              item_.graph.library());
   std::unordered_map<string, std::unordered_set<int>> fed_ports;
@@ -1736,7 +1997,8 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds) {
     }
   }
 
-  SymbolicShapeRefiner refiner(graph_view, fed_ports);
+  SymbolicShapeRefiner refiner(graph_view, fed_ports,
+                               aggressive_shape_inference);
 
   TopoQueue new_shapes(topo_order);
   // Also seed the propagation of shapes in the fanout of primary inputs.
diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h
index fbae1ca5b4..0635222fe9 100644
--- a/tensorflow/core/grappler/costs/graph_properties.h
+++ b/tensorflow/core/grappler/costs/graph_properties.h
@@ -46,7 +46,16 @@ class GraphProperties {
   // However, it can help infer shapes in the fanout of fed nodes (even though
   // the correctness of these shapes can't be guaranteed), so in some cases
   // (such as simulation or scheduling) it makes sense of keep these shapes.
-  Status InferStatically(bool assume_valid_feeds);
+  // aggressive_shape_inference option executes nodes on the host to identify
+  // output values when possible and does other aggressive strategies.
+  // Similar to assuming_valid_feeds, this may cause incorrectness in graph
+  // analyses, but is useful for simulation or scheduling.
+  Status InferStatically(bool assume_valid_feeds,
+                         bool aggressive_shape_inference);
+  Status InferStatically(bool assume_valid_feeds) {
+    return InferStatically(assume_valid_feeds,
+                           /*aggressive_shape_inference=*/false);
+  }
   // Infer the shape by running the graph on the specified cluster and recording
   // the shapes of the processed tensors.
   Status InferDynamically(Cluster* cluster);
diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc
index 5aae773994..c7f53b2cde 100644
--- a/tensorflow/core/grappler/costs/graph_properties_test.cc
+++ b/tensorflow/core/grappler/costs/graph_properties_test.cc
@@ -304,7 +304,6 @@ TEST_F(GraphPropertiesTest, ReadVariableOpAfterEnter) {
                   .Input("Enter", 0, DT_RESOURCE)
                   .Finalize(item.graph.add_node()));
 
-  // LOG(INFO) << item.graph.DebugString();
   GraphProperties properties(item);
   TF_CHECK_OK(properties.InferStatically(false));
   const auto props = properties.GetOutputProperties("ReadVariableOpAfterEnter");
@@ -1625,6 +1624,91 @@ TEST_F(GraphPropertiesTest, StridedSlicesOfShapes) {
   EXPECT_EQ(shape_a.dim(1).size(), shape_o2.dim(0).size());
 }
 
+TEST_F(GraphPropertiesTest, StridedSliceOfShapeWithShrinkAxisMask) {
+  tensorflow::Scope scope = tensorflow::Scope::NewRootScope();
+  Output placeholder =
+      ops::Placeholder(scope.WithOpName("input_placeholder"), DT_FLOAT,
+                       ops::Placeholder::Shape(TensorShape({5, 480, 40, 1})));
+  auto input_shape = ops::Shape(scope.WithOpName("input_shape"), placeholder);
+
+  Output begin = ops::Const(scope.WithOpName("begin"), {0}, {1});
+  Output end = ops::Const(scope.WithOpName("end"), {3}, {1});
+  Output stride = ops::Const(scope.WithOpName("stride"), {1}, {1});
+
+  Output slice =
+      ops::StridedSlice(scope.WithOpName("slice"), input_shape, begin, end,
+                        stride, ops::StridedSlice::ShrinkAxisMask(1));
+
+  GrapplerItem item;
+  TF_CHECK_OK(scope.ToGraphDef(&item.graph));
+
+  // Without aggresive shape inference, it cannot infer output value of
+  // StridedSlice with ShrinkAxisMask.
+  {
+    GraphProperties properties(item);
+    TF_CHECK_OK(properties.InferStatically(
+        /*assume_valid_feeds=*/false,
+        /*aggressive_shape_inference=*/false));
+    EXPECT_FALSE(properties.GetOutputProperties("slice").at(0).has_value());
+  }
+
+  // InferStatically with aggresive shape inference can infer output value of
+  // StridedSlice with ShrinkAxisMask.
+  {
+    GraphProperties properties(item);
+    TF_CHECK_OK(properties.InferStatically(
+        /*assume_valid_feeds=*/false,
+        /*aggressive_shape_inference=*/true));
+    EXPECT_TRUE(properties.GetOutputProperties("slice").at(0).has_value());
+    const auto slice_value =
+        properties.GetOutputProperties("slice").at(0).value();
+    ExpectTensorValues({5}, slice_value);
+  }
+}
+
+TEST_F(GraphPropertiesTest, ValuePropagationThroughArithmeticOps) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a = ops::Const(s.WithOpName("a"), {5, 7}, {2});
+  Output b = ops::Const(s.WithOpName("b"), {8, 8}, {2});
+  Output c = ops::Const(s.WithOpName("c"), {2, 2}, {2});
+
+  Output a1 = ops::OnesLike(s.WithOpName("a1"), a);
+  Output a_plus_one = ops::Add(s.WithOpName("a_plus_one"), a, a1);
+  Output a_plus_a = ops::Add(s.WithOpName("a_plus_a"), a, a);
+  Output b_plus_2a = ops::Add(s.WithOpName("b_plus_2a"), b, a_plus_a);
+  Output c_plus_b_plus_2a =
+      ops::Add(s.WithOpName("c_plus_b_plus_2a"), c, b_plus_2a);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+  GraphProperties properties(item);
+  TF_CHECK_OK(properties.InferStatically(
+      /*assume_valid_feeds=*/false,
+      /*aggressive_shape_inference=*/true));
+
+  // Check output shapes and values.
+  const auto& a_plus_one_prop = properties.GetOutputProperties("a_plus_one")[0];
+  EXPECT_EQ("int32: [2]", PropToString(a_plus_one_prop));
+  EXPECT_TRUE(a_plus_one_prop.has_value());
+  ExpectTensorValues({6, 8}, a_plus_one_prop.value());
+
+  const auto& a_plus_a_prop = properties.GetOutputProperties("a_plus_a")[0];
+  EXPECT_EQ("int32: [2]", PropToString(a_plus_a_prop));
+  EXPECT_TRUE(a_plus_a_prop.has_value());
+  ExpectTensorValues({10, 14}, a_plus_a_prop.value());
+
+  const auto& b_plus_2a_prop = properties.GetOutputProperties("b_plus_2a")[0];
+  EXPECT_EQ("int32: [2]", PropToString(b_plus_2a_prop));
+  EXPECT_TRUE(b_plus_2a_prop.has_value());
+  ExpectTensorValues({18, 22}, b_plus_2a_prop.value());
+
+  const auto& c_plus_b_plus_2a_prop =
+      properties.GetOutputProperties("c_plus_b_plus_2a")[0];
+  EXPECT_EQ("int32: [2]", PropToString(c_plus_b_plus_2a_prop));
+  EXPECT_TRUE(c_plus_b_plus_2a_prop.has_value());
+  ExpectTensorValues({20, 24}, c_plus_b_plus_2a_prop.value());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
-- 
GitLab


From 4817dabb5aba45547c6c1f5ef79d2b0c2040d51f Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Tue, 18 Dec 2018 11:44:54 -0800
Subject: [PATCH 757/873] Introduce interactive_graphviz tool for HLO graph
 inspection.

PiperOrigin-RevId: 226032713
---
 tensorflow/compiler/xla/tools/BUILD           |  49 +-
 .../compiler/xla/tools/hlo_extractor.cc       | 159 +++++
 tensorflow/compiler/xla/tools/hlo_extractor.h |  36 +
 .../compiler/xla/tools/hlo_extractor_test.cc  | 142 ++++
 .../xla/tools/interactive_graphviz.cc         | 652 ++++++++++++++++++
 5 files changed, 1037 insertions(+), 1 deletion(-)
 create mode 100644 tensorflow/compiler/xla/tools/hlo_extractor.cc
 create mode 100644 tensorflow/compiler/xla/tools/hlo_extractor.h
 create mode 100644 tensorflow/compiler/xla/tools/hlo_extractor_test.cc
 create mode 100644 tensorflow/compiler/xla/tools/interactive_graphviz.cc

diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD
index 8926bbed2b..99b32c19a5 100644
--- a/tensorflow/compiler/xla/tools/BUILD
+++ b/tensorflow/compiler/xla/tools/BUILD
@@ -14,7 +14,7 @@ filegroup(
     visibility = ["//tensorflow/compiler/xla:internal"],
 )
 
-load("//tensorflow:tensorflow.bzl", "tf_cc_binary")
+load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test")
 
 tf_cc_binary(
     name = "hex_floats_to_packed_literal",
@@ -234,3 +234,50 @@ tf_cc_binary(
         "//tensorflow/core:lib",
     ],
 )
+
+tf_cc_test(
+    name = "hlo_extractor_test",
+    srcs = ["hlo_extractor_test.cc"],
+    deps = [
+        ":hlo_extractor",
+        "//tensorflow/compiler/xla/service:hlo_matchers",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/core:test",
+    ],
+)
+
+cc_library(
+    name = "hlo_extractor",
+    srcs = ["hlo_extractor.cc"],
+    hdrs = ["hlo_extractor.h"],
+    deps = [
+        "//tensorflow/compiler/xla:status",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_verifier",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
+tf_cc_binary(
+    name = "interactive_graphviz",
+    srcs = ["interactive_graphviz.cc"],
+    deps = [
+        ":hlo_extractor",
+        "//tensorflow/compiler/xla/client:client_library",
+        "//tensorflow/compiler/xla/client:local_client",
+        "//tensorflow/compiler/xla/service:compiler",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+        "//tensorflow/compiler/xla/service:gpu_plugin",
+        "//tensorflow/compiler/xla/service:hlo_graph_dumper",
+        "//tensorflow/compiler/xla/service:hlo_proto",
+        "//tensorflow/compiler/xla/service:hlo_runner",
+        "//tensorflow/compiler/xla/service:local_service",
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core:lib",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+    ],
+)
diff --git a/tensorflow/compiler/xla/tools/hlo_extractor.cc b/tensorflow/compiler/xla/tools/hlo_extractor.cc
new file mode 100644
index 0000000000..f3ce5f99b0
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/hlo_extractor.cc
@@ -0,0 +1,159 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/tools/hlo_extractor.h"
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/memory/memory.h"
+#include "tensorflow/compiler/xla/service/hlo_clone_context.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+#include "tensorflow/compiler/xla/status.h"
+
+namespace xla {
+namespace {
+
+// Visitor that build a new HLO module with an entry computation and a root that
+// is provided to the visit function. Only HLOs that are reachable from the new
+// root instruction are included in the new module.
+//
+// The constructor allows specifying a set of boundary HLOs to prune the HLO
+// graph. HLOs at the boundary are replaced with parameters. Can be nullptr
+// which means no boundary, i.e. no HLOs are replaced with parameters.
+class ExtractionVisitor : public ConstDfsHloVisitorWithDefault {
+ public:
+  explicit ExtractionVisitor(
+      const HloModule& old_module,
+      absl::flat_hash_set<const HloInstruction*>* boundary)
+      : old_module_(old_module),
+        module_(absl::make_unique<HloModule>("extracted", config_)),
+        clone_context_(module_.get()),
+        builder_("entry_computation"),
+        boundary_(boundary) {}
+
+  Status HandleParameter(const HloInstruction* parameter) override {
+    // Entry parameters need renumbering.
+    auto new_parameter = HloInstruction::CreateParameter(
+        parameter_number_++, parameter->shape(), parameter->name());
+    clone_context_.MapInstruction(parameter, new_parameter.get());
+    builder_.AddInstruction(std::move(new_parameter));
+    return Status::OK();
+  }
+
+  Status DefaultAction(const HloInstruction* hlo) override {
+    // Replace instructions at the boundary with parameters, but leave constants
+    // untouched.
+    if (boundary_ != nullptr && boundary_->count(hlo) > 0) {
+      auto new_parameter = HloInstruction::CreateParameter(
+          parameter_number_, hlo->shape(), hlo->name());
+      parameter_number_++;
+      clone_context_.MapInstruction(hlo, new_parameter.get());
+      builder_.AddInstruction(std::move(new_parameter));
+      return Status::OK();
+    }
+    std::vector<HloInstruction*> new_operands;
+    for (auto operand : hlo->operands()) {
+      new_operands.push_back(clone_context_.GetInstruction(operand));
+    }
+    auto instruction =
+        hlo->CloneWithNewOperands(hlo->shape(), new_operands, &clone_context_);
+    builder_.AddInstruction(std::move(instruction));
+    return Status::OK();
+  }
+
+  Status FinishVisit(const HloInstruction* /*root*/) override {
+    module_->AddEntryComputation(builder_.Build());
+    // Rename HLOs so that their name matches the original. By default,
+    // HLOs get new unique names when adding a new entry computation to
+    // a module.
+    for (auto computation : old_module_.MakeComputationPostOrder()) {
+      for (auto old_instruction : computation->MakeInstructionPostOrder()) {
+        if (auto new_instruction =
+                clone_context_.FindInstruction(old_instruction)) {
+          new_instruction->SetAndSanitizeName(old_instruction->name());
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  HloModule* module() { return module_.get(); }
+
+  std::unique_ptr<HloModule> ConsumeModule() { return std::move(module_); }
+
+ private:
+  const HloModule& old_module_;
+  HloModuleConfig config_;
+  std::unique_ptr<HloModule> module_;
+  HloCloneContext clone_context_;
+  HloComputation::Builder builder_;
+  absl::flat_hash_set<const HloInstruction*>* boundary_;
+  int64 parameter_number_ = 0;
+};
+
+void ComputeBoundary(const HloInstruction* root, int64 limit,
+                     absl::flat_hash_set<const HloInstruction*>* boundary) {
+  std::deque<const HloInstruction*> worklist;
+  absl::flat_hash_map<const HloInstruction*, int64> visited;
+  worklist.push_back(root);
+  visited.emplace(root, 0);
+  while (!worklist.empty()) {
+    auto hlo = worklist.front();
+    worklist.pop_front();
+    int64 hops = visited[hlo];
+    if (hops > limit) {
+      boundary->insert(hlo);
+      continue;
+    }
+    for (const HloInstruction* operand : hlo->operands()) {
+      if (visited.count(operand)) {
+        continue;
+      }
+      worklist.push_back(operand);
+      visited.emplace(operand, hops + 1);
+    }
+  }
+}
+
+}  // namespace
+
+std::unique_ptr<HloModule> ExtractModule(HloInstruction* instruction,
+                                         int64 height) {
+  absl::flat_hash_set<const HloInstruction*> boundary;
+  if (height != -1) {
+    ComputeBoundary(instruction, height, &boundary);
+  }
+  ExtractionVisitor visitor(*instruction->GetModule(), &boundary);
+  CHECK(instruction->Accept(&visitor).ok());
+
+  // The first pass may leave unused parameter instructions. Do another
+  // extraction pass to remove unused parameters. This is done because
+  // HloComputation does not allow removing parameters after the computation has
+  // been built.
+  ExtractionVisitor cleanup_visitor(*visitor.module(), /*boundary=*/nullptr);
+  TF_CHECK_OK(visitor.module()->entry_computation()->root_instruction()->Accept(
+      &cleanup_visitor));
+
+  HloVerifier verifier(/*layout_sensitive=*/false,
+                       /*allow_mixed_precision=*/true);
+  TF_CHECK_OK(verifier.Run(cleanup_visitor.module()).status());
+  return cleanup_visitor.ConsumeModule();
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/hlo_extractor.h b/tensorflow/compiler/xla/tools/hlo_extractor.h
new file mode 100644
index 0000000000..bc13dc7e43
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/hlo_extractor.h
@@ -0,0 +1,36 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_TOOLS_HLO_EXTRACTOR_H_
+#define TENSORFLOW_COMPILER_XLA_TOOLS_HLO_EXTRACTOR_H_
+
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+
+namespace xla {
+
+// Creates a new HLO module rooted with an entry computation rooted at the given
+// instruction.
+//
+//  By default (height == -1), the new computation includes all transitive
+//  operands of `root`.  If you specify a different height, the new computation
+//  will include all instructions <= `height` hops away from `root`.
+//  Instructions at the boundary are replaced by parameters.
+std::unique_ptr<HloModule> ExtractModule(HloInstruction* instruction,
+                                         int64 height = -1);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_TOOLS_HLO_EXTRACTOR_H_
diff --git a/tensorflow/compiler/xla/tools/hlo_extractor_test.cc b/tensorflow/compiler/xla/tools/hlo_extractor_test.cc
new file mode 100644
index 0000000000..c187222a11
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/hlo_extractor_test.cc
@@ -0,0 +1,142 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/tools/hlo_extractor.h"
+
+#include "tensorflow/compiler/xla/service/hlo_matchers.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+
+namespace xla {
+namespace {
+
+namespace op = testing::opcode_matchers;
+
+using HloExtractorTest = HloTestBase;
+
+TEST_F(HloExtractorTest, ExtractTopLevel) {
+  const string& hlo_string = R"(
+HloModule test
+
+ENTRY %entry {
+  param.0 = f32[4]{0} parameter(0)
+  negate = f32[4]{0} negate(f32[4]{0} param.0)
+  ROOT exp = f32[4]{0} exponential(f32[4]{0} negate)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<HloModule> hlo_module,
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest()));
+
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "exp"));
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Exp(op::Negate(op::Parameter(0))));
+  }
+
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "exp"), /*height=*/0);
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Exp(op::Parameter(0)));
+  }
+
+  {
+    auto extracted_module = ExtractModule(
+        FindInstruction(hlo_module.get(), "negate"), /*height=*/0);
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Negate(op::Parameter(0)));
+  }
+}
+
+TEST_F(HloExtractorTest, ExtractDag) {
+  const string& hlo_string = R"(
+HloModule test
+
+ENTRY %entry {
+  param.0 = f32[4]{0} parameter(0)
+  tanh = f32[4]{0} tanh(f32[4]{0} param.0)
+  negate = f32[4]{0} negate(f32[4]{0} tanh)
+  exp = f32[4]{0} exponential(f32[4]{0} negate)
+  ROOT add = f32[4]{0} add(f32[4]{0} negate, f32[4]{0} exp)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<HloModule> hlo_module,
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest()));
+
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "exp"));
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Exp(op::Negate(op::Tanh(op::Parameter(0)))));
+  }
+
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "add"), /*height=*/0);
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Add(op::Parameter(0), op::Parameter(1)));
+  }
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "add"), /*height=*/1);
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Add(op::Negate(op::Parameter(0)),
+                        op::Exp(op::Negate(op::Parameter(0)))));
+  }
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "add"), /*height=*/2);
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Add(op::Negate(op::Tanh(op::Parameter(0))),
+                        op::Exp(op::Negate(op::Tanh(op::Parameter(0))))));
+  }
+}
+
+TEST_F(HloExtractorTest, ExtractWithConstant) {
+  const string& hlo_string = R"(
+HloModule test
+
+ENTRY %entry {
+  p = f32[4]{0} parameter(0)
+  tanh = f32[4]{0} tanh(p)
+  c = f32[4]{0} constant({1, 2, 3, 4})
+  ROOT add = f32[4]{0} add(tanh, c)
+}
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<HloModule> hlo_module,
+      HloRunner::CreateModuleFromString(hlo_string, GetDebugOptionsForTest()));
+
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "add"), /*height=*/0);
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Add(op::Parameter(0), op::Parameter(1)));
+  }
+  {
+    auto extracted_module =
+        ExtractModule(FindInstruction(hlo_module.get(), "add"), /*height=*/1);
+    EXPECT_THAT(extracted_module->entry_computation()->root_instruction(),
+                op::Add(op::Tanh(op::Parameter(0)), op::Constant()));
+  }
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/interactive_graphviz.cc b/tensorflow/compiler/xla/tools/interactive_graphviz.cc
new file mode 100644
index 0000000000..6c90cde5a7
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/interactive_graphviz.cc
@@ -0,0 +1,652 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// A tool for interactively exploring graphviz dumps of HLO graphs.
+//
+// Input can be a binary HloSnapshot proto, a binary HloProto proto, or a
+// textual HLO string.
+//
+// Generated visualization is opened in a new default browser window using
+// /usr/bin/sensible-browser.
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/string_view_utils.h"
+#include "absl/strings/util.h"
+#include "tensorflow/compiler/xla/client/client_library.h"
+#include "tensorflow/compiler/xla/client/local_client.h"
+#include "tensorflow/compiler/xla/service/compiler.h"
+#include "tensorflow/compiler/xla/service/hlo.pb.h"
+#include "tensorflow/compiler/xla/service/hlo_runner.h"
+#include "tensorflow/compiler/xla/service/local_service.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/tools/hlo_extractor.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/subprocess.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#if defined(PLATFORM_GOOGLE)
+#include "util/readline/readline.h"
+#endif
+
+namespace xla {
+namespace tools {
+namespace {
+
+bool ReadLine(const char *prompt, string *line) {
+#if defined(PLATFORM_GOOGLE)
+  return util::ReadLine(prompt, line);
+#else
+  std::cout << prompt;
+  return std::getline(std::cin, *line);
+#endif
+}
+
+// Command-line opts to this tool.  See main() for descriptions of these
+// fields.
+struct Options {
+  string hlo_snapshot;
+  string hlo_proto;
+  string hlo_text;
+  string platform;
+  string browser;
+};
+
+const char* const kUsage = R"(
+This tool lets you load an XLA dump and then interactively explore its graphical
+representation.
+
+Most models are too large to visualize in their entirety using graphviz, but
+it's still useful to be able to look at the nodes "near" a particular node of
+interest.
+
+If you pass --platform, this tool will compile the HloModule for the given
+platform.  This means that if you acquired your proto from a binary running at a
+particular CL, the HLO graph it ran isn't necessarily the same as the one shown
+here, unless this program was built at the same CL (and our compiler is
+deterministic :).
+
+Be patient when starting this program if you give it a large input; it has to
+compile the whole thing.
+
+Usage:
+
+  interactive_graphviz -- \
+    --{hlo_snapshot,hlo_proto,hlo_text}=path/to/binary_proto
+    --platform={CUDA,CPU,...}
+)";
+
+// Unless an explicit width is specified, we will render a neighborhood of
+// kDefaultWidth nodes around the requested instruction.
+constexpr int64 kDefaultWidth = 2;
+
+// When printing all paths between two nodes, we print out only this many nodes
+// by default, truncating the graph if there are more nodes than this in the
+// all-paths set.
+constexpr int64 kDefaultMaxNumNodesInAllPaths = 100;
+
+using absl::EqualsIgnoreCase;
+
+// A global control for whether backend configuration display is enabled.
+bool show_backend_config = true;
+
+HloInstruction* FindInstruction(const HloModule& module, string node_name) {
+  if (absl::StartsWith(node_name, "%")) {
+    node_name.erase(node_name.begin());
+  }
+  for (const auto& computation : module.computations()) {
+    auto instrs = computation->instructions();
+    auto it = absl::c_find_if(instrs, [&](const HloInstruction* instr) {
+      // Try with and without "%" at the beginning of the node name.
+      return EqualsIgnoreCase(instr->name(), node_name) ||
+             EqualsIgnoreCase(instr->name(), absl::StrCat("%", node_name));
+    });
+    if (it != instrs.end()) {
+      return *it;
+    }
+  }
+  return nullptr;
+}
+
+HloComputation* FindComputation(const HloModule& module,
+                                const string& comp_name) {
+  for (auto* computation : module.computations()) {
+    if (EqualsIgnoreCase(computation->name(), comp_name)) {
+      return computation;
+    }
+  }
+  return nullptr;
+}
+
+// Print a help message describing the various available commands.
+void DoHelpCommand() {
+  std::cout << R"(Commands:
+  <instruction> [<width>]
+    Renders a neighborhood of <width> nodes around <instruction>.  If <width>
+    is not provided, the default value is )"
+            << kDefaultWidth << R"(.
+  allpaths <instruction> <instruction> [<n>]
+    Renders a subset of all paths from one instruction to the other.  Either
+    order of nodes is accepted.  Shows the <n> nodes in the all-paths set on the
+    shortest paths; default is )"
+            << kDefaultMaxNumNodesInAllPaths << R"(.
+  <computation>
+    Renders all nodes in <computation>.
+  backend_config [on|off]
+    Controls whether backend operation configuration information is printed.
+  list [name|op_name|op_type] <pattern>
+    Lists all instructions whose name, metadata op_name, or metadata op_type
+    contains <pattern> as a substring.
+  list computations
+    Lists all computations in the module.
+  info <instruction>
+  info <computation>
+    Prints information about <instruction> or <computation>.
+  extract <instruction> <height>
+    Creates a new HLO module with <instruction> as entry computation root. If
+    <height> is specified, the new computation contains nodes up to <height>
+    nodes above the root.
+  help
+    Prints this usage information.)"
+            << std::endl;
+}
+
+// Turn metadata-printing on or off.
+void DoBackendConfigCommand(const std::vector<string>& tokens) {
+  if (tokens.size() == 2 && tokens[1] == "on") {
+    show_backend_config = true;
+  } else if (tokens.size() == 2 && tokens[1] == "off") {
+    show_backend_config = false;
+  } else if (tokens.size() != 1) {
+    std::cerr << "(Illegal backend_config value.  Use either 'on' or 'off'.)"
+              << std::endl;
+  }
+  std::cout << "Backend configuration display "
+            << (show_backend_config ? "ON" : "OFF") << std::endl;
+}
+
+// List all computations in the module.
+void DoListComputationsCommand(const HloModule& module,
+                               const std::vector<string>& tokens) {
+  if (tokens.size() > 2) {
+    std::cout << R"(Illegal syntax; "list computations" takes no arguments.)";
+    return;
+  }
+  if (module.entry_computation() != nullptr) {
+    std::cout << "Entry computation:" << std::endl;
+    std::cout << "  " << module.entry_computation()->name() << std::endl
+              << std::endl;
+  }
+  std::cout << "Subcomputations:" << std::endl;
+  std::vector<string> names;
+  for (const auto& computation : module.computations()) {
+    if (computation == module.entry_computation()) {
+      continue;
+    }
+    std::cout << "  " << computation->name() << std::endl;
+  }
+}
+
+// List all instructions matching a pattern.
+void DoListCommand(const HloModule& module, const std::vector<string>& tokens) {
+  string pattern = "";
+  string type = "name";
+  if (tokens.size() == 2) {
+    pattern = tokens[1];
+  } else if (tokens.size() == 3) {
+    type = tokens[1];
+    pattern = tokens[2];
+  } else {
+    std::cout << "Illegal list query syntax. Use "
+              << R"("list [name|op_name|op_type] pattern".)" << std::endl;
+    return;
+  }
+
+  std::cout << "Query results:" << std::endl;
+  for (const auto& computation : module.computations()) {
+    for (const auto& instr : computation->instructions()) {
+      if ((type == "name" && instr->name().find(pattern) != string::npos) ||
+          (type == "op_name" &&
+           instr->metadata().op_name().find(pattern) != string::npos) ||
+          (type == "op_type" &&
+           instr->metadata().op_type().find(pattern) != string::npos)) {
+        std::cout << "  " << instr->name();
+        std::cout << ", op_name '" << instr->metadata().op_name() << "'";
+        std::cout << ", op_type '" << instr->metadata().op_type() << "'";
+        std::cout << std::endl;
+      }
+    }
+  }
+}
+
+// Print info about an instruction or computation.
+void DoInfoCommand(const HloModule& module, const std::vector<string>& tokens) {
+  if (tokens.size() != 2) {
+    std::cerr << "Illegal info query syntax. Use "
+              << R"("info name".)";
+    return;
+  }
+  string node_name = tokens[1];
+
+  const HloInstruction* instr = FindInstruction(module, node_name);
+  const HloComputation* comp = FindComputation(module, node_name);
+  if (!instr && !comp) {
+    std::cerr << "Couldn't find HloInstruction or HloComputation named "
+              << node_name << std::endl;
+    return;
+  }
+
+  if (comp != nullptr) {
+    std::cout << "HloComputation " << comp->name() << std::endl;
+    if (comp->IsFusionComputation()) {
+      std::cout << "  Fusion instruction: " << comp->FusionInstruction()->name()
+                << std::endl;
+    }
+    std::cout << "  Parameters:" << std::endl;
+    for (const auto& param : comp->parameter_instructions()) {
+      std::cout << "    " << param->name() << " ("
+                << ShapeUtil::HumanStringWithLayout(param->shape()) << ")"
+                << std::endl;
+    }
+    HloInstruction* root = comp->root_instruction();
+    std::cout << "  Root instruction: " << root->name() << " ("
+              << ShapeUtil::HumanStringWithLayout(root->shape()) << ")"
+              << std::endl;
+
+    auto embedded_computations = comp->MakeEmbeddedComputationsList();
+    std::cout << "  " << embedded_computations.size() << " embedded computation"
+              << (embedded_computations.size() != 1 ? "s" : "")
+              << (!embedded_computations.empty() ? ":" : ".") << std::endl;
+    for (const HloComputation* c : embedded_computations) {
+      std::cout << "    " << c->name() << std::endl;
+    }
+
+    // Find which computations reference comp as an embedded computation.
+    std::vector<const HloComputation*> users;
+    for (const HloComputation* c : module.computations()) {
+      if (absl::c_linear_search(c->MakeEmbeddedComputationsList(), comp)) {
+        users.push_back(c);
+      }
+    }
+    std::cout << "  Used by " << users.size() << " computation"
+              << (users.size() != 1 ? "s" : "") << (!users.empty() ? ":" : ".");
+    for (const HloComputation* c : users) {
+      std::cout << "    " << c->name() << std::endl;
+    }
+  } else {
+    std::cout << "HloInstruction " << instr->name() << std::endl;
+    std::cout << "  Parent computation: " << instr->parent()->name()
+              << std::endl;
+    std::cout << "  Opcode: " << HloOpcodeString(instr->opcode()) << std::endl;
+    std::cout << "  Shape: " << ShapeUtil::HumanStringWithLayout(instr->shape())
+              << std::endl;
+    std::cout << "  Metadata:" << std::endl;
+    if (!instr->metadata().op_name().empty()) {
+      std::cout << "    Name: " << instr->metadata().op_name() << std::endl;
+    }
+    if (!instr->metadata().op_type().empty()) {
+      std::cout << "    Type: " << instr->metadata().op_type() << std::endl;
+    }
+    if (!instr->raw_backend_config_string().empty()) {
+      std::cout << "  Backend configuration: "
+                << instr->raw_backend_config_string() << std::endl;
+    }
+    if (instr->opcode() == HloOpcode::kFusion) {
+      std::cout << "  Fusion kind: " << xla::ToString(instr->fusion_kind())
+                << std::endl;
+      std::cout << "  Fusion computation: "
+                << instr->fused_instructions_computation()->name() << std::endl;
+      std::cout << "  Fused computation root: "
+                << instr->fused_expression_root()->name() << std::endl;
+    }
+    std::cout << "  Operands:" << std::endl;
+    for (HloInstruction* operand : instr->operands()) {
+      std::cout << "    " << operand->name() << " ("
+                << ShapeUtil::HumanStringWithLayout(operand->shape()) << ")"
+                << std::endl;
+    }
+    std::cout << "  Users:" << std::endl;
+    for (HloInstruction* user : instr->users()) {
+      std::cout << "    " << user->name() << std::endl;
+    }
+    if (instr->parent()->root_instruction() == instr) {
+      std::cout << "  Root instruction of " << instr->parent()->name()
+                << std::endl;
+    }
+  }
+}
+
+void DoExtractCommand(const HloModule& module,
+                      absl::Span<const string> tokens) {
+  if (tokens.size() > 3) {
+    std::cerr << R"(Illegal input.  Enter e.g. "extract %fusion.1 2")"
+              << std::endl;
+    return;
+  }
+
+  // Find the node with the given name.
+  string node_name = tokens[1];
+  HloInstruction* instr = FindInstruction(module, node_name);
+  if (!instr) {
+    std::cerr << "Couldn't find HloInstruction named " << node_name << "."
+              << std::endl;
+    return;
+  }
+
+  int64 height = -1;
+  if (tokens.size() == 3) {
+    if (!absl::SimpleAtoi(tokens[2], &height)) {
+      std::cerr << "Can't parse '" << tokens[2] << "' as an integer."
+                << std::endl;
+      return;
+    }
+  }
+
+  auto extracted_module = ExtractModule(instr, height);
+  std::cout << extracted_module->ToString(
+                   HloPrintOptions::ShortParsable().set_print_backend_config(
+                       show_backend_config))
+            << std::endl;
+}
+
+// Checks if there is a use-def path from `from` to `to`.
+bool ExistsPathFromTo(const HloInstruction* from, const HloInstruction* to) {
+  std::unordered_set<const HloInstruction*> visited;
+  std::vector<const HloInstruction*> to_visit = {from};
+  while (!to_visit.empty()) {
+    auto* n = to_visit.back();
+    if (n == to) {
+      return true;
+    }
+    to_visit.pop_back();
+    visited.insert(n);
+    for (auto* user : n->users()) {
+      if (!visited.count(user)) {
+        to_visit.push_back(user);
+      }
+    }
+  }
+  return false;
+}
+
+void DisplayGraphHandle(const Options &opts, const string& handle) {
+  std::cout << handle << std::endl;
+
+  // If it is a url, try to open it up in the user's browser too.
+  if (strings::StartsWithIgnoreCase(handle, "http://") ||
+      strings::StartsWithIgnoreCase(handle, "https://") ||
+      strings::StartsWithIgnoreCase(handle, "file://")) {
+    const char* browser_bin = opts.browser.empty() ? "/usr/bin/sensible-browser"
+                                                   : opts.browser.c_str();
+    tensorflow::SubProcess p;
+    p.SetProgram(browser_bin, {browser_bin, handle});
+    p.Start();
+  } else if (handle.empty()) {
+    std::cerr << "Unable to render graph, perhaps due to graphviz server "
+                 "timeout.  Run with --logtostderr to see."
+              << std::endl;
+  } else {
+    std::cerr << "\nExpected a URL, but got strange graph result (dumped "
+                 "above).  If this isn't what you expected, maybe file a bug?"
+              << std::endl;
+  }
+}
+
+void DoAllPathsCommand(const Options& opts, const HloModule& module,
+                       const std::vector<string>& tokens) {
+  if (tokens.size() > 4) {
+    std::cerr << R"(Illegal input.  Enter e.g. "allpaths %add.4 %subtract.2" or
+"allpaths add.4 subtract.2 42.)"
+              << std::endl;
+    return;
+  }
+
+  int64 max_nodes = kDefaultMaxNumNodesInAllPaths;
+  if (tokens.size() == 4 && !absl::SimpleAtoi(tokens[3], &max_nodes)) {
+    std::cerr << "Can't parse '" << tokens[3] << "' as an integer."
+              << std::endl;
+    return;
+  }
+
+  const HloInstruction* n1 = FindInstruction(module, tokens[1]);
+  if (!n1) {
+    std::cerr << "Couldn't find HloInstruction named " << tokens[1];
+    return;
+  }
+  const HloInstruction* n2 = FindInstruction(module, tokens[2]);
+  if (!n2) {
+    std::cerr << "Couldn't find HloInstruction named " << tokens[2];
+    return;
+  }
+
+  // Is there a path from n1 to n2, or vice versa?
+  const HloInstruction* from;
+  const HloInstruction* to;
+  if (ExistsPathFromTo(n1, n2)) {
+    from = n1;
+    to = n2;
+  } else if (ExistsPathFromTo(n2, n1)) {
+    from = n2;
+    to = n1;
+  } else {
+    std::cerr << "No path from/to " << tokens[1] << " to/from " << tokens[2];
+    return;
+  }
+  DisplayGraphHandle(opts, hlo_graph_dumper::DumpAllPathsFromTo(
+      *from, *to, max_nodes, /*show_backend_config=*/show_backend_config));
+}
+
+// Plot a given instruction neighborhood or computation with graphviz.
+void DoPlotCommand(const Options& opts, const HloModule& module,
+                   const std::vector<string>& tokens) {
+  if (tokens.size() > 2) {
+    std::cerr << R"(Illegal input.  Enter e.g. "%fusion.1 42" or "%fusion.1".)"
+              << std::endl;
+    return;
+  }
+
+  string node_name = tokens[0];
+
+  // Find the node with the given name.
+  const HloInstruction* instr = FindInstruction(module, node_name);
+  const HloComputation* comp = FindComputation(module, node_name);
+  if (!instr && !comp) {
+    std::cerr << "Couldn't find HloInstruction or HloComputation named "
+              << node_name << "." << std::endl;
+    return;
+  }
+
+  uint64 graph_width = kDefaultWidth;
+  if (tokens.size() == 2) {
+    if (comp) {
+      std::cerr << "Can only use graph-size parameter with instructions, but "
+                << node_name << " is a computation." << std::endl;
+      return;
+    }
+    if (!absl::SimpleAtoi(tokens[1], &graph_width)) {
+      std::cerr << "Can't parse '" << tokens[1] << "' as an integer."
+                << std::endl;
+      return;
+    }
+  }
+
+  // Generate the graph and print the resulting string, which should be a
+  // graphviz url.
+  if (comp) {
+    DisplayGraphHandle(opts, hlo_graph_dumper::DumpGraph(
+        *comp, "", comp->parent()->config().debug_options(), nullptr,
+        /*show_backend_config=*/show_backend_config));
+  } else {
+    DisplayGraphHandle(opts, hlo_graph_dumper::DumpNeighborhoodAround(
+        *instr, graph_width, /*show_backend_config=*/show_backend_config));
+  }
+}
+
+// Run the main event loop, reading user commands and processing them.
+void InteractiveDumpGraphs(const Options& opts, const HloModule& module) {
+  // This is an interactive tool, but some may use `extract` in non-tty
+  // environment anyway. Give them a clean hlo dump.
+  if (isatty(fileno(stdin))) {
+    std::cout << "\n\nLoaded module " << module.name() << "." << std::endl;
+    DoHelpCommand();
+  }
+  for (string line; ReadLine("\ncommand: ", &line);) {
+    if (line.empty()) {
+      std::cout << R"(Enter e.g. "fusion.1 3" or "add.8".)" << std::endl
+                << R"(Enter "help" for help; ^D, "quit", or "exit" to exit.)"
+                << std::endl;
+      continue;
+    }
+    std::vector<string> tokens = strings::Split(line, ' ');
+    if (tokens[0] == "quit" || tokens[0] == "exit") {
+      break;
+    } else if (tokens[0] == "help") {
+      DoHelpCommand();
+    } else if (tokens[0] == "backend_config") {
+      DoBackendConfigCommand(tokens);
+    } else if (tokens[0] == "list") {
+      if (tokens.size() > 1 && tokens[1] == "computations") {
+        DoListComputationsCommand(module, tokens);
+      } else {
+        DoListCommand(module, tokens);
+      }
+    } else if (tokens[0] == "info") {
+      DoInfoCommand(module, tokens);
+    } else if (tokens[0] == "extract") {
+      DoExtractCommand(module, tokens);
+    } else if (tokens[0] == "allpaths") {
+      DoAllPathsCommand(opts, module, tokens);
+    } else {
+      DoPlotCommand(opts, module, tokens);
+    }
+  }
+}
+
+void CheckFlags(const Options &opts) {
+  std::vector<string> nonempty_proto_flags;
+  if (!opts.hlo_proto.empty()) {
+    nonempty_proto_flags.push_back("--hlo_proto");
+  }
+  if (!opts.hlo_snapshot.empty()) {
+    nonempty_proto_flags.push_back("--hlo_snapshot");
+  }
+  if (!opts.hlo_text.empty()) {
+    nonempty_proto_flags.push_back("--hlo_text");
+  }
+  switch (nonempty_proto_flags.size()) {
+    case 1:
+      // We're good to go.
+      break;
+    case 0:
+      LOG(FATAL) << "Need one of the following options: "
+                 << absl::StrJoin(nonempty_proto_flags, ", ");
+    default:
+      LOG(FATAL) << "Can only specify one of "
+                 << absl::StrJoin(nonempty_proto_flags, ", ");
+  }
+}
+
+void RealMain(const Options& opts) {
+  if (!isatty(fileno(stdin))) {
+    LOG(ERROR) << "\n\n*****************************************\n"
+               << "This is an interactive tool, but stdin is not a tty.\n"
+               << "*****************************************\n\n";
+  }
+
+  CheckFlags(opts);
+
+  std::unique_ptr<HloModule> module;
+  if (!opts.hlo_snapshot.empty()) {
+    HloSnapshot snapshot;
+    TF_CHECK_OK(tensorflow::ReadBinaryProto(tensorflow::Env::Default(),
+                                            opts.hlo_snapshot, &snapshot))
+        << "Can't open, read, or parse HloSnapshot proto at "
+        << opts.hlo_snapshot;
+    auto config =
+        HloModule::CreateModuleConfigFromProto(snapshot.hlo().hlo_module(),
+                                               xla::GetDebugOptionsFromFlags())
+            .ValueOrDie();
+    module = HloModule::CreateFromProto(snapshot.hlo().hlo_module(), config)
+                 .ValueOrDie();
+  } else if (!opts.hlo_proto.empty()) {
+    module = HloRunner::ReadModuleFromBinaryProtoFile(
+                 opts.hlo_proto, xla::GetDebugOptionsFromFlags())
+                 .ValueOrDie();
+  } else if (!opts.hlo_text.empty()) {
+    module = HloRunner::ReadModuleFromHloTextFile(
+                 opts.hlo_text, xla::GetDebugOptionsFromFlags())
+                 .ValueOrDie();
+  }
+
+  // If a platform was specified, compile the module for that platform.
+  if (!opts.platform.empty()) {
+    se::Platform* platform =
+        PlatformUtil::GetPlatform(opts.platform).ValueOrDie();
+    LOG(INFO) << "Compiling module for " << platform->Name();
+
+    se::StreamExecutor* executor =
+        platform->ExecutorForDevice(/*ordinal=*/0).ValueOrDie();
+    auto compiler = Compiler::GetForPlatform(platform).ValueOrDie();
+    module = compiler
+                 ->RunHloPasses(std::move(module), executor,
+                                /*device_allocator=*/nullptr)
+                 .ValueOrDie();
+    auto executable = compiler
+                          ->RunBackend(std::move(module), executor,
+                                       /*device_allocator=*/nullptr)
+                          .ValueOrDie();
+    InteractiveDumpGraphs(opts, executable->module());
+  } else {
+    InteractiveDumpGraphs(opts, *module);
+  }
+}
+
+}  // namespace
+}  // namespace tools
+}  // namespace xla
+
+int main(int argc, char** argv) {
+  xla::tools::Options opts;
+  opts.browser = "/usr/bin/sensible-browser";
+  bool need_help = false;
+  const std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag("hlo_snapshot", &opts.hlo_snapshot,
+                       "HloSnapshot proto to interactively dump to graphviz"),
+      tensorflow::Flag("hlo_proto", &opts.hlo_proto,
+                       "XLA hlo proto to interactively dump to graphviz"),
+      tensorflow::Flag("hlo_text", &opts.hlo_text,
+                       "XLA hlo proto to interactively dump to graphviz"),
+      tensorflow::Flag("platform", &opts.platform,
+                       "Platform to compile for: CPU, CUDA, etc"),
+      tensorflow::Flag("browser", &opts.browser,
+                       "Path to web browser used to display produced graphs."),
+      tensorflow::Flag("help", &need_help,
+                       "Prints this help message"),
+  };
+  xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
+  bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
+  tensorflow::port::InitMain(argv[0], &argc, &argv);
+  if (argc != 1 || !parse_ok || need_help) {
+    LOG(QFATAL) << usage;
+  }
+  xla::tools::RealMain(opts);
+  return 0;
+}
-- 
GitLab


From 787c2a684e39d9c21525c07da3e56bc68621e9b7 Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Tue, 18 Dec 2018 11:56:15 -0800
Subject: [PATCH 758/873] [XLA] Fix bug in TriangularSolve where both batch
 dimensions and more than one block were present.

Fixes https://github.com/google/jax/issues/128 (or will do so when a new XLA release is integrated into JAX.)

PiperOrigin-RevId: 226034659
---
 tensorflow/compiler/xla/client/lib/BUILD      |  1 +
 .../xla/client/lib/triangular_solve.cc        | 15 ++++++++--
 .../xla/client/lib/triangular_solve_test.cc   | 28 +++++++++++++++++++
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD
index 970f00759f..826b13fe37 100644
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD
@@ -412,6 +412,7 @@ xla_test(
     srcs = ["triangular_solve_test.cc"],
     tags = ["noasan"],  # sometimes times out, http://b/78650012
     deps = [
+        ":matrix",
         ":triangular_solve",
         "//tensorflow/compiler/xla:array2d",
         "//tensorflow/compiler/xla:literal",
diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve.cc b/tensorflow/compiler/xla/client/lib/triangular_solve.cc
index ac58090dfe..159e0c82dc 100644
--- a/tensorflow/compiler/xla/client/lib/triangular_solve.cc
+++ b/tensorflow/compiler/xla/client/lib/triangular_solve.cc
@@ -62,15 +62,26 @@ XlaOp DiagonalBlocks(XlaOp a, int64 block_size) {
                               /*broadcast_sizes=*/{2}),
                     /*permutation=*/{1, 0});
 
+      PaddingConfig padding_config =
+          MakeEdgePaddingConfig({{0, 0}, {ndims - 2, 0}});
+      start_indices =
+          Pad(start_indices, ConstantR0<int32>(builder, 0), padding_config);
+
       // Gather the diagonal blocks
+      std::vector<int64> slice_sizes(ndims);
       GatherDimensionNumbers dim_numbers;
+      for (int i = 0; i < ndims - 2; ++i) {
+        dim_numbers.add_offset_dims(i);
+        dim_numbers.add_start_index_map(i);
+        slice_sizes[i] = ShapeUtil::GetDimension(shape, i);
+      }
+      slice_sizes[ndims - 2] = slice_sizes[ndims - 1] = block_size;
       dim_numbers.add_offset_dims(ndims - 1);
       dim_numbers.add_offset_dims(ndims);
       dim_numbers.add_start_index_map(ndims - 2);
       dim_numbers.add_start_index_map(ndims - 1);
       dim_numbers.set_index_vector_dim(1);
-      diag_blocks = Gather(a, start_indices, dim_numbers,
-                           /*slice_sizes=*/{block_size, block_size});
+      diag_blocks = Gather(a, start_indices, dim_numbers, slice_sizes);
     }
 
     // The last block might be smaller than the block size,
diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc b/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
index d0188e8ea0..3fea627e6a 100644
--- a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
+++ b/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/client/lib/matrix.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -339,5 +340,32 @@ XLA_TEST_F(TriangularSolveTest, SimpleLeftUpperTransposeNoconjugate) {
       &builder, expected, {a_data.get(), b_data.get()}, ErrorSpec(1e-2, 1e-2));
 }
 
+XLA_TEST_F(TriangularSolveTest, BatchedLeftUpper) {
+  XlaBuilder builder(TestName());
+
+  Array3D<float> bvals(7, 5, 5);
+  bvals.FillIota(1.);
+
+  // Set avals to the upper triangle of bvals.
+  Array3D<float> avals = bvals;
+  avals.Each([](absl::Span<const int64> indices, float* value) {
+    if (indices[1] > indices[2]) {
+      *value = 0;
+    }
+  });
+
+  XlaOp a, b;
+  auto a_data = CreateR3Parameter<float>(avals, 0, "a", &builder, &a);
+  auto b_data = CreateR3Parameter<float>(bvals, 1, "b", &builder, &b);
+  BatchDot(ConstantR3FromArray3D(&builder, avals),
+           TriangularSolve(a, b,
+                           /*left_side=*/true, /*lower=*/false,
+                           /*transpose_a=*/false, /*conjugate_a=*/false,
+                           /*block_size=*/2));
+
+  ComputeAndCompareR3<float>(&builder, bvals, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 716fea7be71d03ba486dde6c1adba245d18e805f Mon Sep 17 00:00:00 2001
From: Sergio Guadarrama <sguada@google.com>
Date: Tue, 18 Dec 2018 12:40:20 -0800
Subject: [PATCH 759/873] Update all tf.to_float to
 tf.cast(..,dtype=tf.float32) in losses_impl

PiperOrigin-RevId: 226041616
---
 tensorflow/python/ops/losses/losses_impl.py | 32 ++++++++++-----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index dc3ef4b72d..feef05496a 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -139,7 +139,7 @@ def _num_present(losses, weights, per_batch=False):
        and not math_ops.equal(weights, 0.0))):
     return _num_elements(losses)
   with ops.name_scope(None, "num_present", (losses, weights)) as scope:
-    weights = math_ops.to_float(weights)
+    weights = math_ops.cast(weights, dtype=dtypes.float32)
     present = array_ops.where(
         math_ops.equal(weights, 0.0),
         array_ops.zeros_like(weights),
@@ -207,8 +207,8 @@ def compute_weighted_loss(
         weights_broadcast_ops.assert_broadcastable(weights, losses),)):
       losses = ops.convert_to_tensor(losses)
       input_dtype = losses.dtype
-      losses = math_ops.to_float(losses)
-      weights = math_ops.to_float(weights)
+      losses = math_ops.cast(losses, dtype=dtypes.float32)
+      weights = math_ops.cast(weights, dtype=dtypes.float32)
       weighted_losses = math_ops.multiply(losses, weights)
       if reduction == Reduction.NONE:
         loss = weighted_losses
@@ -275,8 +275,8 @@ def absolute_difference(
     raise ValueError("predictions must not be None.")
   with ops.name_scope(scope, "absolute_difference",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     losses = math_ops.abs(math_ops.subtract(predictions, labels))
     return compute_weighted_loss(
@@ -329,8 +329,8 @@ def cosine_distance(
     raise ValueError("predictions must not be None.")
   with ops.name_scope(scope, "cosine_distance_loss",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
 
     radial_diffs = math_ops.multiply(predictions, labels)
@@ -377,8 +377,8 @@ def hinge_loss(labels, logits, weights=1.0, scope=None,
   if logits is None:
     raise ValueError("logits must not be None.")
   with ops.name_scope(scope, "hinge_loss", (logits, labels, weights)) as scope:
-    logits = math_ops.to_float(logits)
-    labels = math_ops.to_float(labels)
+    logits = math_ops.cast(logits, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     logits.get_shape().assert_is_compatible_with(labels.get_shape())
     # We first need to convert binary labels to -1/1 labels (as floats).
     all_ones = array_ops.ones_like(labels)
@@ -446,8 +446,8 @@ def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
     raise ValueError("predictions must not be None.")
   with ops.name_scope(scope, "huber_loss",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     error = math_ops.subtract(predictions, labels)
     abs_error = math_ops.abs(error)
@@ -512,8 +512,8 @@ def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
     raise ValueError("predictions must not be None.")
   with ops.name_scope(scope, "log_loss",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     losses = -math_ops.multiply(
         labels,
@@ -580,11 +580,11 @@ def mean_pairwise_squared_error(
     raise ValueError("predictions must not be None.")
   with ops.name_scope(scope, "mean_pairwise_squared_error",
                       (predictions, labels, weights)) as scope:
-    weights = math_ops.to_float(weights)
-    labels = math_ops.to_float(labels)
+    weights = math_ops.cast(weights, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     with ops.control_dependencies((
         weights_broadcast_ops.assert_broadcastable(weights, labels),)):
-      predictions = math_ops.to_float(predictions)
+      predictions = math_ops.cast(predictions, dtype=dtypes.float32)
       predictions.get_shape().assert_is_compatible_with(labels.get_shape())
 
       diffs = math_ops.subtract(predictions, labels)
-- 
GitLab


From 9fb0eea0f56c8f61f377a9b4b2d27f9a6e406cb0 Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Tue, 18 Dec 2018 12:49:35 -0800
Subject: [PATCH 760/873] Stop running np.median if no callbacks are run.

PiperOrigin-RevId: 226043102
---
 tensorflow/python/keras/callbacks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index 1cb3267527..bfebc788f2 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -194,6 +194,8 @@ class CallbackList(object):
 
   def _call_batch_hook(self, mode, hook, batch, logs=None):
     """Helper function for all batch_{begin | end} methods."""
+    if not self.callbacks:
+      return
     hook_name = 'on_{mode}_batch_{hook}'.format(mode=mode, hook=hook)
     if hook == 'begin':
       self._t_enter_batch = time.time()
-- 
GitLab


From 69fface76df085e8625d7dffbd5a176e15541e5f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 12:51:17 -0800
Subject: [PATCH 761/873] Rollback outside compile change for TPU inference
 embedding.

PiperOrigin-RevId: 226043387
---
 .../contrib/tpu/python/tpu/feature_column.py   | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/feature_column.py b/tensorflow/contrib/tpu/python/tpu/feature_column.py
index 8edf131bc2..d5d00d628d 100644
--- a/tensorflow/contrib/tpu/python/tpu/feature_column.py
+++ b/tensorflow/contrib/tpu/python/tpu/feature_column.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
 import math
 
 from tensorflow.contrib.tpu.python.tpu import tpu
@@ -278,10 +279,11 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
 
   def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
     if tpu.under_tpu_inference_context():
-      def host_computation():
+      # TODO(shizhiw, b/112012627, b/112336539): Replace _outside_all_rewrites()
+      # with outside compilation.
+      with _outside_all_rewrites():
         return fc._EmbeddingColumn._get_dense_tensor(
             self, inputs, weight_collections, trainable)
-      return tpu.outside_compilation(host_computation)
 
     if _is_running_on_cpu():
       return fc._EmbeddingColumn._get_dense_tensor(
@@ -298,6 +300,13 @@ class _TPUEmbeddingColumn(_TPUBaseEmbeddingColumn, fc._EmbeddingColumn):
     return tensor
 
 
+@contextlib.contextmanager
+def _outside_all_rewrites():
+  """'Break out' of a tpu.rewrite() (or shard(), etc.)."""
+  with ops.control_dependencies(None):
+    yield
+
+
 class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
                                 fc._SharedEmbeddingColumn):
   """Core Shared Embedding Column."""
@@ -376,10 +385,11 @@ class _TPUSharedEmbeddingColumn(_TPUBaseEmbeddingColumn,
 
   def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
     if tpu.under_tpu_inference_context():
-      def host_computation():
+      # TODO(shizhiw, b/112012627, b/112336539): Replace _outside_all_rewrites()
+      # with outside compilation.
+      with _outside_all_rewrites():
         return fc._SharedEmbeddingColumn._get_dense_tensor(
             self, inputs, weight_collections, trainable)
-      return tpu.outside_compilation(host_computation)
 
     if _is_running_on_cpu():
       return fc._SharedEmbeddingColumn._get_dense_tensor(
-- 
GitLab


From 28e306d6c1e7146e0c9eb73842a116607b4391fd Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 18 Dec 2018 12:52:41 -0800
Subject: [PATCH 762/873] Internal change.

PiperOrigin-RevId: 226043644
---
 tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh | 1 -
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 4c4e8ba1ca..7ec386a60e 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -67,7 +67,6 @@ EXTRA_BUILD_FLAGS=""
 #                        ensure performance
 # --test_core_only       Use tensorflow/python/... as test target
 # --test_contrib_only    Use tensorflow/contrib/... as test target
-#for ARG in "$@"; do
 while [[ $# -gt 0 ]]; do
   case "$1" in
     --tf_nightly) TF_NIGHTLY=1 ;;
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 070235fcb2..1c2a1263a9 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -68,7 +68,7 @@ EXTRA_BUILD_FLAGS=""
 # --test_core_only       Use tensorflow/python/... as test target
 # --test_contrib_only    Use tensorflow/contrib/... as test target
 while [[ $# -gt 0 ]]; do
-  case "$ARG" in
+  case "$1" in
     --tf_nightly) TF_NIGHTLY=1 ;;
     --skip_test) SKIP_TEST=1 ;;
     --enable_remote_cache) set_remote_cache_options ;;
-- 
GitLab


From 0c99bf2930f95f28b72f0073c68f28721f60b09d Mon Sep 17 00:00:00 2001
From: Akshay Modi <nareshmodi@google.com>
Date: Tue, 18 Dec 2018 13:00:32 -0800
Subject: [PATCH 763/873] Minor speedup of keras functional models

Before:
entry {
  name: "MicroBenchmarks.benchmark_keras_model_functional_predict_run_model_eagerly"
  iters: 1
  wall_time: 579941.034317
  extras {
    key: "examples_per_sec"
    value {
      double_value: 1.72431323329
    }
  }
}

After:
entry {
  name: "MicroBenchmarks.benchmark_keras_model_functional_predict_run_model_eagerly"
  iters: 1
  wall_time: 483793.973923
  extras {
    key: "examples_per_sec"
    value {
      double_value: 2.06699556816
    }
  }
}
PiperOrigin-RevId: 226044758
---
 tensorflow/python/keras/engine/network.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py
index 7435da61cc..1c30c6b3fb 100644
--- a/tensorflow/python/keras/engine/network.py
+++ b/tensorflow/python/keras/engine/network.py
@@ -223,6 +223,9 @@ class Network(base_layer.Layer):
     self._nodes_by_depth = nodes_by_depth
     self._layers = layers
     self._layers_by_depth = layers_by_depth
+    self._layer_call_argspecs = {}
+    for layer in self._layers:
+      self._layer_call_argspecs[layer] = tf_inspect.getfullargspec(layer.call)
 
     self._track_layers(layers)
 
@@ -982,13 +985,14 @@ class Network(base_layer.Layer):
             else:
               kwargs = {}
             # Ensure `training` arg propagation if applicable.
-            if 'training' in tf_inspect.getfullargspec(layer.call).args:
+            argspec = self._layer_call_argspecs[layer].args
+            if 'training' in argspec:
               kwargs.setdefault('training', training)
 
             if len(computed_data) == 1:
               computed_tensor, computed_mask = computed_data[0]
               # Ensure mask propagation if applicable.
-              if 'mask' in tf_inspect.getfullargspec(layer.call).args:
+              if 'mask' in argspec:
                 kwargs.setdefault('mask', computed_mask)
 
               # Compute outputs and masks.
@@ -1014,7 +1018,7 @@ class Network(base_layer.Layer):
               computed_tensors = [x[0] for x in computed_data]
               computed_masks = [x[1] for x in computed_data]
               # Ensure mask propagation if applicable.
-              if 'mask' in tf_inspect.getfullargspec(layer.call).args:
+              if 'mask' in argspec:
                 kwargs.setdefault('mask', computed_masks)
 
               # Compute outputs and masks.
-- 
GitLab


From 796ca2bb9249db651d72f801efb8736a99d9d3e7 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Tue, 18 Dec 2018 13:09:22 -0800
Subject: [PATCH 764/873] Fix the warning in creating the build file about load
 not being at the top.

PiperOrigin-RevId: 226046335
---
 tensorflow/contrib/distribute/python/BUILD | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 4c9c35da5a..9e27eb2b69 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -1,5 +1,8 @@
 # Implementation of a prototype TF distributed computation library.
 
+load("//tensorflow:tensorflow.bzl", "py_test")
+load("//tensorflow:tensorflow.bzl", "cuda_py_test")
+
 package(
     default_visibility = [
         "//tensorflow:internal",
@@ -10,9 +13,6 @@ licenses(["notice"])  # Apache 2.0
 
 exports_files(["LICENSE"])
 
-load("//tensorflow:tensorflow.bzl", "py_test")
-load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-
 # TODO(priyag): Figure out testonly issues that are preventing us from
 # including our tests in pip for now.
 
-- 
GitLab


From 9088cf61c564bba09c6c025c9383f38142326ef3 Mon Sep 17 00:00:00 2001
From: Michael Case <mikecase@google.com>
Date: Tue, 18 Dec 2018 13:36:29 -0800
Subject: [PATCH 765/873] Separate out Keras API generation from TensorFlow.

When moving Keras to a separate repo, we will have to separate the API
generation from the rest of TensorFlow. The generated API files will have
to part of the Keras package, as core TensorFlow will have no way of knowing
which version of the tf.Keras API will be installed.

This CL does a few things...
- Move ALL @tf_export("keras.*") declarations in to keras/ directory.
  See python/keras/ops.py where I had to move lots of these decorators
  out of TensorFlow code.
- Change ALL @tf_export("keras.*") to @keras_export("keras.*"). Doing
  this will allow us to generate the public Keras python __init__.py files
  separately from the rest of the TensorFlow API.
- Add a new api_gen targets to keras/api/BUILD to generate this API.

PiperOrigin-RevId: 226050838
---
 tensorflow/BUILD                              |   6 +-
 tensorflow/api_template.__init__.py           |   4 +-
 tensorflow/api_template_v1.__init__.py        |   4 +-
 tensorflow/compat_template_v1.__init__.py     |   4 +-
 tensorflow/python/BUILD                       |   1 +
 .../feature_column/feature_column_v2.py       |   5 +-
 tensorflow/python/framework/ops.py            |   2 +-
 tensorflow/python/keras/BUILD                 |   1 +
 tensorflow/python/keras/__init__.py           |   5 +-
 tensorflow/python/keras/activations.py        |  30 +-
 tensorflow/python/keras/api/BUILD             |  60 ++++
 .../python/keras/applications/densenet.py     |  18 +-
 .../keras/applications/imagenet_utils.py      |   6 +-
 .../keras/applications/inception_resnet_v2.py |  10 +-
 .../python/keras/applications/inception_v3.py |  10 +-
 .../python/keras/applications/mobilenet.py    |  10 +-
 .../python/keras/applications/mobilenet_v2.py |  10 +-
 .../python/keras/applications/nasnet.py       |  14 +-
 .../python/keras/applications/resnet50.py     |  10 +-
 tensorflow/python/keras/applications/vgg16.py |  10 +-
 tensorflow/python/keras/applications/vgg19.py |  10 +-
 .../python/keras/applications/xception.py     |  10 +-
 tensorflow/python/keras/backend.py            | 278 +++++++++---------
 tensorflow/python/keras/callbacks.py          |  28 +-
 tensorflow/python/keras/constraints.py        |  18 +-
 .../python/keras/datasets/boston_housing.py   |   4 +-
 tensorflow/python/keras/datasets/cifar10.py   |   4 +-
 tensorflow/python/keras/datasets/cifar100.py  |   4 +-
 .../python/keras/datasets/fashion_mnist.py    |   4 +-
 tensorflow/python/keras/datasets/imdb.py      |   6 +-
 tensorflow/python/keras/datasets/mnist.py     |   4 +-
 tensorflow/python/keras/datasets/reuters.py   |   6 +-
 tensorflow/python/keras/engine/base_layer.py  |   4 +-
 tensorflow/python/keras/engine/input_layer.py |   6 +-
 tensorflow/python/keras/engine/input_spec.py  |   5 +-
 tensorflow/python/keras/engine/saving.py      |  12 +-
 tensorflow/python/keras/engine/sequential.py  |   4 +-
 tensorflow/python/keras/engine/training.py    |   4 +-
 tensorflow/python/keras/estimator/__init__.py |   6 +-
 tensorflow/python/keras/initializers.py       |  20 +-
 .../keras/layers/advanced_activations.py      |  14 +-
 .../python/keras/layers/convolutional.py      |  44 +--
 .../keras/layers/convolutional_recurrent.py   |   4 +-
 tensorflow/python/keras/layers/core.py        |  28 +-
 .../python/keras/layers/cudnn_recurrent.py    |   6 +-
 tensorflow/python/keras/layers/embeddings.py  |   4 +-
 tensorflow/python/keras/layers/local.py       |   6 +-
 tensorflow/python/keras/layers/merge.py       |  34 +--
 tensorflow/python/keras/layers/noise.py       |   8 +-
 .../python/keras/layers/normalization.py      |   6 +-
 tensorflow/python/keras/layers/pooling.py     |  32 +-
 tensorflow/python/keras/layers/recurrent.py   |  24 +-
 tensorflow/python/keras/layers/wrappers.py    |   8 +-
 tensorflow/python/keras/losses.py             | 110 +++----
 tensorflow/python/keras/metrics.py            |  44 +--
 tensorflow/python/keras/models.py             |   4 +-
 tensorflow/python/keras/ops.py                |  56 ++++
 .../python/keras/optimizer_v2/adadelta.py     |   4 +-
 .../python/keras/optimizer_v2/adagrad.py      |   4 +-
 tensorflow/python/keras/optimizer_v2/adam.py  |   4 +-
 .../python/keras/optimizer_v2/adamax.py       |   4 +-
 tensorflow/python/keras/optimizer_v2/ftrl.py  |   4 +-
 .../keras/optimizer_v2/gradient_descent.py    |   4 +-
 .../python/keras/optimizer_v2/optimizer_v2.py |   4 +-
 .../python/keras/optimizer_v2/rmsprop.py      |   4 +-
 tensorflow/python/keras/optimizers.py         |  24 +-
 .../python/keras/preprocessing/image.py       |  37 +--
 .../python/keras/preprocessing/sequence.py    |  10 +-
 tensorflow/python/keras/preprocessing/text.py |  10 +-
 tensorflow/python/keras/regularizers.py       |  18 +-
 tensorflow/python/keras/saving/saved_model.py |   6 +-
 tensorflow/python/keras/utils/data_utils.py   |  12 +-
 .../python/keras/utils/generic_utils.py       |  14 +-
 tensorflow/python/keras/utils/io_utils.py     |   4 +-
 tensorflow/python/keras/utils/layer_utils.py  |   6 +-
 .../python/keras/utils/multi_gpu_utils.py     |   4 +-
 tensorflow/python/keras/utils/np_utils.py     |   6 +-
 tensorflow/python/keras/utils/vis_utils.py    |   4 +-
 .../python/keras/wrappers/scikit_learn.py     |   6 +-
 tensorflow/python/ops/init_ops.py             |  36 +--
 tensorflow/python/ops/losses/losses_impl.py   |   2 +-
 tensorflow/python/tools/api/generator/BUILD   |   4 +-
 .../tools/api/generator/api_init_files.bzl    |  46 +--
 .../tools/api/generator/api_init_files_v1.bzl |  84 +++---
 tensorflow/python/util/tf_export.py           |  18 +-
 85 files changed, 809 insertions(+), 663 deletions(-)
 create mode 100644 tensorflow/python/keras/api/BUILD
 create mode 100644 tensorflow/python/keras/ops.py

diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 93d45206cb..f07e7365d3 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -637,7 +637,11 @@ py_library(
     srcs = select({
         "api_version_2": [":tf_python_api_gen_v2"],
         "//conditions:default": [":tf_python_api_gen_v1"],
-    }) + [":root_init_gen"],
+    }) + [":root_init_gen"] + [
+        "//tensorflow/python/keras/api:keras_python_api_gen",
+        "//tensorflow/python/keras/api:keras_python_api_gen_compat_v1",
+        "//tensorflow/python/keras/api:keras_python_api_gen_compat_v2",
+    ],
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = ["//tensorflow/python:no_contrib"],
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index ce5df0f9d4..983aa361e4 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -36,7 +36,9 @@ if not hasattr(_current_module, 'estimator'):
       parent_package_str=__name__,
       child_package_str=(
           'tensorflow_estimator.python.estimator.api.estimator'))
-
+_component_api_helper.package_hook(
+    parent_package_str=__name__,
+    child_package_str=('tensorflow.python.keras.api._v2.keras'))
 # Make sure directory containing top level submodules is in
 # the __path__ so that "from tensorflow.foo import bar" works.
 # We're using bitwise, but there's nothing special about that.
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index f88e046873..e199639762 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -38,7 +38,9 @@ if not hasattr(_current_module, 'estimator'):
       parent_package_str=__name__,
       child_package_str=(
           'tensorflow_estimator.python.estimator.api.estimator'))
-
+_component_api_helper.package_hook(
+    parent_package_str=__name__,
+    child_package_str=('tensorflow.python.keras.api._v1.keras'))
 from tensorflow.python.util.lazy_loader import LazyLoader  # pylint: disable=g-import-not-at-top
 _CONTRIB_WARNING = """
 WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
diff --git a/tensorflow/compat_template_v1.__init__.py b/tensorflow/compat_template_v1.__init__.py
index d58acde09f..b966c22b23 100644
--- a/tensorflow/compat_template_v1.__init__.py
+++ b/tensorflow/compat_template_v1.__init__.py
@@ -29,6 +29,8 @@ from tensorflow.python.tools import component_api_helper as _component_api_helpe
 _component_api_helper.package_hook(
     parent_package_str=__name__,
     child_package_str=('tensorflow_estimator.python.estimator.api.estimator'))
-
+_component_api_helper.package_hook(
+    parent_package_str=__name__,
+    child_package_str=('tensorflow.python.keras.api._v1.keras'))
 from tensorflow.python.platform import flags  # pylint: disable=g-import-not-at-top
 app.flags = flags  # pylint: disable=undefined-variable
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index c76385a3e2..c12e9ee3f3 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -81,6 +81,7 @@ py_library(
     visibility = [
         "//tensorflow:__pkg__",
         "//tensorflow/python/estimator:__subpackages__",
+        "//tensorflow/python/keras:__subpackages__",
         "//tensorflow/python/tools:__pkg__",
         "//tensorflow/python/tools/api/generator:__pkg__",
         "//tensorflow/tools/api/tests:__pkg__",
diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py
index 914044d6d6..d4e3fc5d2d 100644
--- a/tensorflow/python/feature_column/feature_column_v2.py
+++ b/tensorflow/python/feature_column/feature_column_v2.py
@@ -165,6 +165,7 @@ from tensorflow.python.training import checkpoint_utils
 from tensorflow.python.training.checkpointable import tracking
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import keras_export
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -303,7 +304,7 @@ class _StateManagerImpl(StateManager):
     raise ValueError('Variable does not exist.')
 
 
-@tf_export('keras.layers.DenseFeatures', v1=[])
+@keras_export('keras.layers.DenseFeatures', v1=[])
 class DenseFeatures(Layer):
   """A layer that produces a dense `Tensor` based on given `feature_columns`.
 
@@ -518,7 +519,7 @@ class _LinearModelLayer(Layer):
       return predictions
 
 
-@tf_export('keras.layers.LinearModel', v1=[])
+@keras_export('keras.layers.LinearModel', v1=[])
 class LinearModel(training.Model):
   """Produces a linear prediction `Tensor` based on given `feature_columns`.
 
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 27c56ef990..908d28fb95 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -5988,7 +5988,7 @@ name_scope_cache = {}
 # Named like a function for backwards compatibility with the
 # @tf_contextlib.contextmanager version, which was switched to a class to avoid
 # some object creation overhead.
-@tf_export("name_scope", "keras.backend.name_scope")
+@tf_export("name_scope")
 class name_scope(object):  # pylint: disable=invalid-name
   """A context manager for use when defining a Python op.
 
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 3b3986dc2f..8f9d70b15c 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -42,6 +42,7 @@ py_library(
         "datasets/reuters.py",
         "estimator/__init__.py",
         "keras_parameterized.py",
+        "ops.py",
         "preprocessing/__init__.py",
         "preprocessing/image.py",
         "preprocessing/sequence.py",
diff --git a/tensorflow/python/keras/__init__.py b/tensorflow/python/keras/__init__.py
index 2a6de2de88..e59744f64d 100644
--- a/tensorflow/python/keras/__init__.py
+++ b/tensorflow/python/keras/__init__.py
@@ -33,6 +33,7 @@ from tensorflow.python.keras import layers
 from tensorflow.python.keras import losses
 from tensorflow.python.keras import metrics
 from tensorflow.python.keras import models
+from tensorflow.python.keras import ops
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras import preprocessing
 from tensorflow.python.keras import regularizers
@@ -44,11 +45,11 @@ from tensorflow.python.keras.models import Sequential
 from tensorflow.python.keras.saving.saved_model import export
 from tensorflow.python.keras.saving.saved_model import load_from_saved_model
 
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 __version__ = '2.2.4-tf'
 
-tf_export('keras.__version__').export_constant(__name__, '__version__')
+keras_export('keras.__version__').export_constant(__name__, '__version__')
 
 del absolute_import
 del division
diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index d69791ce8d..8f10aca020 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -24,10 +24,10 @@ from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.activations.softmax')
+@keras_export('keras.activations.softmax')
 def softmax(x, axis=-1):
   """Softmax activation function.
 
@@ -53,7 +53,7 @@ def softmax(x, axis=-1):
                      'Received input: %s' % (x,))
 
 
-@tf_export('keras.activations.elu')
+@keras_export('keras.activations.elu')
 def elu(x, alpha=1.0):
   """Exponential linear unit.
 
@@ -72,7 +72,7 @@ def elu(x, alpha=1.0):
   return K.elu(x, alpha)
 
 
-@tf_export('keras.activations.selu')
+@keras_export('keras.activations.selu')
 def selu(x):
   """Scaled Exponential Linear Unit (SELU).
 
@@ -101,7 +101,7 @@ def selu(x):
   return scale * K.elu(x, alpha)
 
 
-@tf_export('keras.activations.softplus')
+@keras_export('keras.activations.softplus')
 def softplus(x):
   """Softplus activation function.
 
@@ -114,7 +114,7 @@ def softplus(x):
   return nn.softplus(x)
 
 
-@tf_export('keras.activations.softsign')
+@keras_export('keras.activations.softsign')
 def softsign(x):
   """Softsign activation function.
 
@@ -127,7 +127,7 @@ def softsign(x):
   return nn.softsign(x)
 
 
-@tf_export('keras.activations.relu')
+@keras_export('keras.activations.relu')
 def relu(x, alpha=0., max_value=None, threshold=0):
   """Rectified Linear Unit.
 
@@ -150,22 +150,22 @@ def relu(x, alpha=0., max_value=None, threshold=0):
   return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold)
 
 
-@tf_export('keras.activations.tanh')
+@keras_export('keras.activations.tanh')
 def tanh(x):
   return nn.tanh(x)
 
 
-@tf_export('keras.activations.sigmoid')
+@keras_export('keras.activations.sigmoid')
 def sigmoid(x):
   return nn.sigmoid(x)
 
 
-@tf_export('keras.activations.exponential')
+@keras_export('keras.activations.exponential')
 def exponential(x):
   return math_ops.exp(x)
 
 
-@tf_export('keras.activations.hard_sigmoid')
+@keras_export('keras.activations.hard_sigmoid')
 def hard_sigmoid(x):
   """Hard sigmoid activation function.
 
@@ -183,17 +183,17 @@ def hard_sigmoid(x):
   return K.hard_sigmoid(x)
 
 
-@tf_export('keras.activations.linear')
+@keras_export('keras.activations.linear')
 def linear(x):
   return x
 
 
-@tf_export('keras.activations.serialize')
+@keras_export('keras.activations.serialize')
 def serialize(activation):
   return activation.__name__
 
 
-@tf_export('keras.activations.deserialize')
+@keras_export('keras.activations.deserialize')
 def deserialize(name, custom_objects=None):
   return deserialize_keras_object(
       name,
@@ -202,7 +202,7 @@ def deserialize(name, custom_objects=None):
       printable_module_name='activation function')
 
 
-@tf_export('keras.activations.get')
+@keras_export('keras.activations.get')
 def get(identifier):
   if identifier is None:
     return linear
diff --git a/tensorflow/python/keras/api/BUILD b/tensorflow/python/keras/api/BUILD
new file mode 100644
index 0000000000..259cadcb32
--- /dev/null
+++ b/tensorflow/python/keras/api/BUILD
@@ -0,0 +1,60 @@
+# Description:
+# Package for TensorFlow.
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])  # Apache 2.0 License
+
+load("//tensorflow/python/tools/api/generator:api_gen.bzl", "gen_api_init_files")
+load("//tensorflow/python/tools/api/generator:api_init_files.bzl", "KERAS_API_INIT_FILES")
+load("//tensorflow/python/tools/api/generator:api_init_files_v1.bzl", "KERAS_API_INIT_FILES_V1")
+
+gen_api_init_files(
+    name = "keras_python_api_gen",
+    api_name = "keras",
+    api_version = 1,
+    output_files = KERAS_API_INIT_FILES_V1,
+    output_package = "tensorflow.python.keras.api",
+    package_deps = [
+        "//tensorflow/python/keras",
+        "//tensorflow/python:no_contrib",
+    ],
+    packages = [
+        "tensorflow.python",
+        "tensorflow.python.keras",
+    ],
+)
+
+gen_api_init_files(
+    name = "keras_python_api_gen_compat_v1",
+    api_name = "keras",
+    api_version = 1,
+    output_dir = "_v1/",
+    output_files = KERAS_API_INIT_FILES_V1,
+    output_package = "tensorflow.python.keras.api._v1",
+    package_deps = [
+        "//tensorflow/python/keras",
+        "//tensorflow/python:no_contrib",
+    ],
+    packages = [
+        "tensorflow.python",
+        "tensorflow.python.keras",
+    ],
+)
+
+gen_api_init_files(
+    name = "keras_python_api_gen_compat_v2",
+    api_name = "keras",
+    api_version = 2,
+    output_dir = "_v2/",
+    output_files = KERAS_API_INIT_FILES,
+    output_package = "tensorflow.python.keras.api._v2",
+    package_deps = [
+        "//tensorflow/python/keras",
+        "//tensorflow/python:no_contrib",
+    ],
+    packages = [
+        "tensorflow.python",
+        "tensorflow.python.keras",
+    ],
+)
diff --git a/tensorflow/python/keras/applications/densenet.py b/tensorflow/python/keras/applications/densenet.py
index 172848bbdb..9404968c81 100644
--- a/tensorflow/python/keras/applications/densenet.py
+++ b/tensorflow/python/keras/applications/densenet.py
@@ -22,37 +22,37 @@ from __future__ import print_function
 from keras_applications import densenet
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.densenet.DenseNet121',
-           'keras.applications.DenseNet121')
+@keras_export('keras.applications.densenet.DenseNet121',
+              'keras.applications.DenseNet121')
 @keras_modules_injection
 def DenseNet121(*args, **kwargs):
   return densenet.DenseNet121(*args, **kwargs)
 
 
-@tf_export('keras.applications.densenet.DenseNet169',
-           'keras.applications.DenseNet169')
+@keras_export('keras.applications.densenet.DenseNet169',
+              'keras.applications.DenseNet169')
 @keras_modules_injection
 def DenseNet169(*args, **kwargs):
   return densenet.DenseNet169(*args, **kwargs)
 
 
-@tf_export('keras.applications.densenet.DenseNet201',
-           'keras.applications.DenseNet201')
+@keras_export('keras.applications.densenet.DenseNet201',
+              'keras.applications.DenseNet201')
 @keras_modules_injection
 def DenseNet201(*args, **kwargs):
   return densenet.DenseNet201(*args, **kwargs)
 
 
-@tf_export('keras.applications.densenet.decode_predictions')
+@keras_export('keras.applications.densenet.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return densenet.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.densenet.preprocess_input')
+@keras_export('keras.applications.densenet.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return densenet.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/imagenet_utils.py b/tensorflow/python/keras/applications/imagenet_utils.py
index c25b5c2bdd..d60afe43bc 100644
--- a/tensorflow/python/keras/applications/imagenet_utils.py
+++ b/tensorflow/python/keras/applications/imagenet_utils.py
@@ -21,16 +21,16 @@ from __future__ import print_function
 from keras_applications import imagenet_utils
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.imagenet_utils.preprocess_input')
+@keras_export('keras.applications.imagenet_utils.preprocess_input')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return imagenet_utils.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.imagenet_utils.preprocess_input')
+@keras_export('keras.applications.imagenet_utils.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return imagenet_utils.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/applications/inception_resnet_v2.py
index 0b9ef371fa..0203cf6ad9 100644
--- a/tensorflow/python/keras/applications/inception_resnet_v2.py
+++ b/tensorflow/python/keras/applications/inception_resnet_v2.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import inception_resnet_v2
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.inception_resnet_v2.InceptionResNetV2',
-           'keras.applications.InceptionResNetV2')
+@keras_export('keras.applications.inception_resnet_v2.InceptionResNetV2',
+              'keras.applications.InceptionResNetV2')
 @keras_modules_injection
 def InceptionResNetV2(*args, **kwargs):
   return inception_resnet_v2.InceptionResNetV2(*args, **kwargs)
 
 
-@tf_export('keras.applications.inception_resnet_v2.decode_predictions')
+@keras_export('keras.applications.inception_resnet_v2.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return inception_resnet_v2.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.inception_resnet_v2.preprocess_input')
+@keras_export('keras.applications.inception_resnet_v2.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return inception_resnet_v2.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/inception_v3.py b/tensorflow/python/keras/applications/inception_v3.py
index ab76826e17..08bf3f3f2b 100644
--- a/tensorflow/python/keras/applications/inception_v3.py
+++ b/tensorflow/python/keras/applications/inception_v3.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import inception_v3
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.inception_v3.InceptionV3',
-           'keras.applications.InceptionV3')
+@keras_export('keras.applications.inception_v3.InceptionV3',
+              'keras.applications.InceptionV3')
 @keras_modules_injection
 def InceptionV3(*args, **kwargs):
   return inception_v3.InceptionV3(*args, **kwargs)
 
 
-@tf_export('keras.applications.inception_v3.decode_predictions')
+@keras_export('keras.applications.inception_v3.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return inception_v3.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.inception_v3.preprocess_input')
+@keras_export('keras.applications.inception_v3.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return inception_v3.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/mobilenet.py b/tensorflow/python/keras/applications/mobilenet.py
index 1f71a5ae99..d40e4a7614 100644
--- a/tensorflow/python/keras/applications/mobilenet.py
+++ b/tensorflow/python/keras/applications/mobilenet.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import mobilenet
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.mobilenet.MobileNet',
-           'keras.applications.MobileNet')
+@keras_export('keras.applications.mobilenet.MobileNet',
+              'keras.applications.MobileNet')
 @keras_modules_injection
 def MobileNet(*args, **kwargs):
   return mobilenet.MobileNet(*args, **kwargs)
 
 
-@tf_export('keras.applications.mobilenet.decode_predictions')
+@keras_export('keras.applications.mobilenet.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return mobilenet.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.mobilenet.preprocess_input')
+@keras_export('keras.applications.mobilenet.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return mobilenet.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/mobilenet_v2.py b/tensorflow/python/keras/applications/mobilenet_v2.py
index 52ac5959ad..696844067e 100644
--- a/tensorflow/python/keras/applications/mobilenet_v2.py
+++ b/tensorflow/python/keras/applications/mobilenet_v2.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import mobilenet_v2
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.mobilenet_v2.MobileNetV2',
-           'keras.applications.MobileNetV2')
+@keras_export('keras.applications.mobilenet_v2.MobileNetV2',
+              'keras.applications.MobileNetV2')
 @keras_modules_injection
 def MobileNetV2(*args, **kwargs):
   return mobilenet_v2.MobileNetV2(*args, **kwargs)
 
 
-@tf_export('keras.applications.mobilenet_v2.decode_predictions')
+@keras_export('keras.applications.mobilenet_v2.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return mobilenet_v2.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.mobilenet_v2.preprocess_input')
+@keras_export('keras.applications.mobilenet_v2.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return mobilenet_v2.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/nasnet.py b/tensorflow/python/keras/applications/nasnet.py
index 44fc329d57..90c4fb23bb 100644
--- a/tensorflow/python/keras/applications/nasnet.py
+++ b/tensorflow/python/keras/applications/nasnet.py
@@ -22,30 +22,30 @@ from __future__ import print_function
 from keras_applications import nasnet
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.nasnet.NASNetMobile',
-           'keras.applications.NASNetMobile')
+@keras_export('keras.applications.nasnet.NASNetMobile',
+              'keras.applications.NASNetMobile')
 @keras_modules_injection
 def NASNetMobile(*args, **kwargs):
   return nasnet.NASNetMobile(*args, **kwargs)
 
 
-@tf_export('keras.applications.nasnet.NASNetLarge',
-           'keras.applications.NASNetLarge')
+@keras_export('keras.applications.nasnet.NASNetLarge',
+              'keras.applications.NASNetLarge')
 @keras_modules_injection
 def NASNetLarge(*args, **kwargs):
   return nasnet.NASNetLarge(*args, **kwargs)
 
 
-@tf_export('keras.applications.nasnet.decode_predictions')
+@keras_export('keras.applications.nasnet.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return nasnet.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.nasnet.preprocess_input')
+@keras_export('keras.applications.nasnet.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return nasnet.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/resnet50.py b/tensorflow/python/keras/applications/resnet50.py
index 80d3f9044f..38f4d8a67a 100644
--- a/tensorflow/python/keras/applications/resnet50.py
+++ b/tensorflow/python/keras/applications/resnet50.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import resnet50
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.resnet50.ResNet50',
-           'keras.applications.ResNet50')
+@keras_export('keras.applications.resnet50.ResNet50',
+              'keras.applications.ResNet50')
 @keras_modules_injection
 def ResNet50(*args, **kwargs):
   return resnet50.ResNet50(*args, **kwargs)
 
 
-@tf_export('keras.applications.resnet50.decode_predictions')
+@keras_export('keras.applications.resnet50.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return resnet50.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.resnet50.preprocess_input')
+@keras_export('keras.applications.resnet50.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return resnet50.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/vgg16.py b/tensorflow/python/keras/applications/vgg16.py
index 8557d26931..e2a34258ca 100644
--- a/tensorflow/python/keras/applications/vgg16.py
+++ b/tensorflow/python/keras/applications/vgg16.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import vgg16
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.vgg16.VGG16',
-           'keras.applications.VGG16')
+@keras_export('keras.applications.vgg16.VGG16',
+              'keras.applications.VGG16')
 @keras_modules_injection
 def VGG16(*args, **kwargs):
   return vgg16.VGG16(*args, **kwargs)
 
 
-@tf_export('keras.applications.vgg16.decode_predictions')
+@keras_export('keras.applications.vgg16.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return vgg16.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.vgg16.preprocess_input')
+@keras_export('keras.applications.vgg16.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return vgg16.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/vgg19.py b/tensorflow/python/keras/applications/vgg19.py
index 8fc04413a0..ed362edfa8 100644
--- a/tensorflow/python/keras/applications/vgg19.py
+++ b/tensorflow/python/keras/applications/vgg19.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import vgg19
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.vgg19.VGG19',
-           'keras.applications.VGG19')
+@keras_export('keras.applications.vgg19.VGG19',
+              'keras.applications.VGG19')
 @keras_modules_injection
 def VGG19(*args, **kwargs):
   return vgg19.VGG19(*args, **kwargs)
 
 
-@tf_export('keras.applications.vgg19.decode_predictions')
+@keras_export('keras.applications.vgg19.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return vgg19.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.vgg19.preprocess_input')
+@keras_export('keras.applications.vgg19.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return vgg19.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/applications/xception.py b/tensorflow/python/keras/applications/xception.py
index 960e6dec69..4476213f6d 100644
--- a/tensorflow/python/keras/applications/xception.py
+++ b/tensorflow/python/keras/applications/xception.py
@@ -22,23 +22,23 @@ from __future__ import print_function
 from keras_applications import xception
 
 from tensorflow.python.keras.applications import keras_modules_injection
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.applications.xception.Xception',
-           'keras.applications.Xception')
+@keras_export('keras.applications.xception.Xception',
+              'keras.applications.Xception')
 @keras_modules_injection
 def Xception(*args, **kwargs):
   return xception.Xception(*args, **kwargs)
 
 
-@tf_export('keras.applications.xception.decode_predictions')
+@keras_export('keras.applications.xception.decode_predictions')
 @keras_modules_injection
 def decode_predictions(*args, **kwargs):
   return xception.decode_predictions(*args, **kwargs)
 
 
-@tf_export('keras.applications.xception.preprocess_input')
+@keras_export('keras.applications.xception.preprocess_input')
 @keras_modules_injection
 def preprocess_input(*args, **kwargs):
   return xception.preprocess_input(*args, **kwargs)
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 23623e6e2c..42d94e77a0 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -63,7 +63,7 @@ from tensorflow.python.ops import variables as variables_module
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 py_all = all
 py_sum = sum
@@ -120,7 +120,7 @@ _GRAPH_VARIABLES = weakref.WeakKeyDictionary()
 _GRAPH_TF_OPTIMIZERS = weakref.WeakKeyDictionary()
 
 
-@tf_export('keras.backend.backend')
+@keras_export('keras.backend.backend')
 def backend():
   """Publicly accessible method for determining the current backend.
 
@@ -132,7 +132,7 @@ def backend():
   return 'tensorflow'
 
 
-@tf_export('keras.backend.epsilon')
+@keras_export('keras.backend.epsilon')
 def epsilon():
   """Returns the value of the fuzz factor used in numeric expressions.
 
@@ -148,7 +148,7 @@ def epsilon():
   return _EPSILON
 
 
-@tf_export('keras.backend.set_epsilon')
+@keras_export('keras.backend.set_epsilon')
 def set_epsilon(value):
   """Sets the value of the fuzz factor used in numeric expressions.
 
@@ -169,7 +169,7 @@ def set_epsilon(value):
   _EPSILON = value
 
 
-@tf_export('keras.backend.floatx')
+@keras_export('keras.backend.floatx')
 def floatx():
   """Returns the default float type, as a string.
 
@@ -187,7 +187,7 @@ def floatx():
   return _FLOATX
 
 
-@tf_export('keras.backend.set_floatx')
+@keras_export('keras.backend.set_floatx')
 def set_floatx(value):
   """Sets the default float type.
 
@@ -213,7 +213,7 @@ def set_floatx(value):
   _FLOATX = str(value)
 
 
-@tf_export('keras.backend.cast_to_floatx')
+@keras_export('keras.backend.cast_to_floatx')
 def cast_to_floatx(x):
   """Cast a Numpy array to the default Keras float type.
 
@@ -241,7 +241,7 @@ def cast_to_floatx(x):
   return np.asarray(x, dtype=_FLOATX)
 
 
-@tf_export('keras.backend.image_data_format')
+@keras_export('keras.backend.image_data_format')
 def image_data_format():
   """Returns the default image data format convention.
 
@@ -257,7 +257,7 @@ def image_data_format():
   return _IMAGE_DATA_FORMAT
 
 
-@tf_export('keras.backend.set_image_data_format')
+@keras_export('keras.backend.set_image_data_format')
 def set_image_data_format(data_format):
   """Sets the value of the image data format convention.
 
@@ -289,7 +289,7 @@ def set_image_data_format(data_format):
 PER_GRAPH_LAYER_NAME_UIDS = weakref.WeakKeyDictionary()
 
 
-@tf_export('keras.backend.get_uid')
+@keras_export('keras.backend.get_uid')
 def get_uid(prefix=''):
   """Associates a string prefix with an integer counter in a TensorFlow graph.
 
@@ -316,7 +316,7 @@ def get_uid(prefix=''):
   return layer_name_uids[prefix]
 
 
-@tf_export('keras.backend.reset_uids')
+@keras_export('keras.backend.reset_uids')
 def reset_uids():
   """Resets graph identifiers.
   """
@@ -326,7 +326,7 @@ def reset_uids():
     del per_graph_layer_name_uids[key]
 
 
-@tf_export('keras.backend.clear_session')
+@keras_export('keras.backend.clear_session')
 def clear_session():
   """Destroys the current TF graph and creates a new one.
 
@@ -349,7 +349,7 @@ def clear_session():
     _GRAPH_TF_OPTIMIZERS.pop(graph, None)
 
 
-@tf_export('keras.backend.manual_variable_initialization')
+@keras_export('keras.backend.manual_variable_initialization')
 def manual_variable_initialization(value):
   """Sets the manual variable initialization flag.
 
@@ -366,7 +366,7 @@ def manual_variable_initialization(value):
   _MANUAL_VAR_INIT = value
 
 
-@tf_export('keras.backend.learning_phase')
+@keras_export('keras.backend.learning_phase')
 def learning_phase():
   """Returns the learning phase flag.
 
@@ -395,7 +395,7 @@ def symbolic_learning_phase():
     return _GRAPH_LEARNING_PHASES[graph]
 
 
-@tf_export('keras.backend.set_learning_phase')
+@keras_export('keras.backend.set_learning_phase')
 def set_learning_phase(value):
   """Sets the learning phase to a fixed value.
 
@@ -459,7 +459,7 @@ def _get_session():
   return session
 
 
-@tf_export(v1=['keras.backend.get_session'])
+@keras_export(v1=['keras.backend.get_session'])
 def get_session():
   """Returns the TF session to be used by the backend.
 
@@ -493,7 +493,7 @@ def get_graph():
     return ops.get_default_graph()
 
 
-@tf_export('keras.backend.set_session')
+@keras_export('keras.backend.set_session')
 def set_session(session):
   """Sets the global TensorFlow session.
 
@@ -610,7 +610,7 @@ def _to_tensor(x, dtype):
   return ops.convert_to_tensor(x, dtype=dtype)
 
 
-@tf_export('keras.backend.is_sparse')
+@keras_export('keras.backend.is_sparse')
 def is_sparse(tensor):
   """Returns whether a tensor is a sparse tensor.
 
@@ -634,7 +634,7 @@ def is_sparse(tensor):
   return isinstance(tensor, sparse_tensor.SparseTensor)
 
 
-@tf_export('keras.backend.to_dense')
+@keras_export('keras.backend.to_dense')
 def to_dense(tensor):
   """Converts a sparse tensor into a dense tensor and returns it.
 
@@ -664,7 +664,7 @@ def to_dense(tensor):
 name_scope = ops.name_scope
 
 
-@tf_export('keras.backend.variable')
+@keras_export('keras.backend.variable')
 def variable(value, dtype=None, name=None, constraint=None):
   """Instantiates a variable and returns it.
 
@@ -765,7 +765,7 @@ def _initialize_variables(session):
       session.run(variables_module.variables_initializer(uninitialized_vars))
 
 
-@tf_export('keras.backend.constant')
+@keras_export('keras.backend.constant')
 def constant(value, dtype=None, shape=None, name=None):
   """Creates a constant tensor.
 
@@ -836,7 +836,7 @@ def is_keras_tensor(x):
   return hasattr(x, '_keras_history')
 
 
-@tf_export('keras.backend.placeholder')
+@keras_export('keras.backend.placeholder')
 def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None):
   """Instantiates a placeholder tensor and returns it.
 
@@ -892,7 +892,7 @@ def is_placeholder(x):
     return False
 
 
-@tf_export('keras.backend.shape')
+@keras_export('keras.backend.shape')
 def shape(x):
   """Returns the symbolic shape of a tensor or variable.
 
@@ -925,7 +925,7 @@ def shape(x):
   return array_ops.shape(x)
 
 
-@tf_export('keras.backend.int_shape')
+@keras_export('keras.backend.int_shape')
 def int_shape(x):
   """Returns the shape of tensor or variable as a tuple of int or None entries.
 
@@ -956,7 +956,7 @@ def int_shape(x):
     return None
 
 
-@tf_export('keras.backend.ndim')
+@keras_export('keras.backend.ndim')
 def ndim(x):
   """Returns the number of axes in a tensor, as an integer.
 
@@ -984,7 +984,7 @@ def ndim(x):
   return None
 
 
-@tf_export('keras.backend.dtype')
+@keras_export('keras.backend.dtype')
 def dtype(x):
   """Returns the dtype of a Keras tensor or variable, as a string.
 
@@ -1015,7 +1015,7 @@ def dtype(x):
   return x.dtype.base_dtype.name
 
 
-@tf_export('keras.backend.eval')
+@keras_export('keras.backend.eval')
 def eval(x):
   """Evaluates the value of a variable.
 
@@ -1037,7 +1037,7 @@ def eval(x):
   return get_value(to_dense(x))
 
 
-@tf_export('keras.backend.zeros')
+@keras_export('keras.backend.zeros')
 def zeros(shape, dtype=None, name=None):
   """Instantiates an all-zeros variable and returns it.
 
@@ -1072,7 +1072,7 @@ def zeros(shape, dtype=None, name=None):
     return v
 
 
-@tf_export('keras.backend.ones')
+@keras_export('keras.backend.ones')
 def ones(shape, dtype=None, name=None):
   """Instantiates an all-ones variable and returns it.
 
@@ -1107,7 +1107,7 @@ def ones(shape, dtype=None, name=None):
     return v
 
 
-@tf_export('keras.backend.eye')
+@keras_export('keras.backend.eye')
 def eye(size, dtype=None, name=None):
   """Instantiate an identity matrix and returns it.
 
@@ -1136,7 +1136,7 @@ def eye(size, dtype=None, name=None):
   return variable(linalg_ops.eye(size, dtype=tf_dtype), dtype, name)
 
 
-@tf_export('keras.backend.zeros_like')
+@keras_export('keras.backend.zeros_like')
 def zeros_like(x, dtype=None, name=None):
   """Instantiates an all-zeros variable of the same shape as another tensor.
 
@@ -1162,7 +1162,7 @@ def zeros_like(x, dtype=None, name=None):
   return array_ops.zeros_like(x, dtype=dtype, name=name)
 
 
-@tf_export('keras.backend.ones_like')
+@keras_export('keras.backend.ones_like')
 def ones_like(x, dtype=None, name=None):
   """Instantiates an all-ones variable of the same shape as another tensor.
 
@@ -1201,7 +1201,7 @@ def identity(x, name=None):
   return array_ops.identity(x, name=name)
 
 
-@tf_export('keras.backend.random_uniform_variable')
+@keras_export('keras.backend.random_uniform_variable')
 def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None):
   """Instantiates a variable with values drawn from a uniform distribution.
 
@@ -1238,7 +1238,7 @@ def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None):
   return variable(value, dtype=dtype, name=name)
 
 
-@tf_export('keras.backend.random_normal_variable')
+@keras_export('keras.backend.random_normal_variable')
 def random_normal_variable(shape, mean, scale, dtype=None, name=None,
                            seed=None):
   """Instantiates a variable with values drawn from a normal distribution.
@@ -1276,7 +1276,7 @@ def random_normal_variable(shape, mean, scale, dtype=None, name=None,
   return variable(value, dtype=dtype, name=name)
 
 
-@tf_export('keras.backend.count_params')
+@keras_export('keras.backend.count_params')
 def count_params(x):
   """Returns the static number of elements in a variable or tensor.
 
@@ -1299,7 +1299,7 @@ def count_params(x):
   return np.prod(x.shape.as_list())
 
 
-@tf_export('keras.backend.cast')
+@keras_export('keras.backend.cast')
 def cast(x, dtype):
   """Casts a tensor to a different dtype and returns it.
 
@@ -1335,12 +1335,12 @@ def cast(x, dtype):
 # UPDATES OPS
 
 
-@tf_export('keras.backend.update')
+@keras_export('keras.backend.update')
 def update(x, new_x):
   return state_ops.assign(x, new_x)
 
 
-@tf_export('keras.backend.update_add')
+@keras_export('keras.backend.update_add')
 def update_add(x, increment):
   """Update the value of `x` by adding `increment`.
 
@@ -1354,7 +1354,7 @@ def update_add(x, increment):
   return state_ops.assign_add(x, increment)
 
 
-@tf_export('keras.backend.update_sub')
+@keras_export('keras.backend.update_sub')
 def update_sub(x, decrement):
   """Update the value of `x` by subtracting `decrement`.
 
@@ -1368,7 +1368,7 @@ def update_sub(x, decrement):
   return state_ops.assign_sub(x, decrement)
 
 
-@tf_export('keras.backend.moving_average_update')
+@keras_export('keras.backend.moving_average_update')
 def moving_average_update(x, value, momentum):
   """Compute the moving average of a variable.
 
@@ -1392,7 +1392,7 @@ def moving_average_update(x, value, momentum):
 # LINEAR ALGEBRA
 
 
-@tf_export('keras.backend.dot')
+@keras_export('keras.backend.dot')
 def dot(x, y):
   """Multiplies 2 tensors (and/or variables) and returns a *tensor*.
 
@@ -1464,7 +1464,7 @@ def dot(x, y):
   return out
 
 
-@tf_export('keras.backend.batch_dot')
+@keras_export('keras.backend.batch_dot')
 def batch_dot(x, y, axes=None):
   """Batchwise dot product.
 
@@ -1556,7 +1556,7 @@ def batch_dot(x, y, axes=None):
   return out
 
 
-@tf_export('keras.backend.transpose')
+@keras_export('keras.backend.transpose')
 def transpose(x):
   """Transposes a tensor and returns it.
 
@@ -1592,7 +1592,7 @@ def transpose(x):
   return array_ops.transpose(x)
 
 
-@tf_export('keras.backend.gather')
+@keras_export('keras.backend.gather')
 def gather(reference, indices):
   """Retrieves the elements of indices `indices` in the tensor `reference`.
 
@@ -1609,7 +1609,7 @@ def gather(reference, indices):
 # ELEMENT-WISE OPERATIONS
 
 
-@tf_export('keras.backend.max')
+@keras_export('keras.backend.max')
 def max(x, axis=None, keepdims=False):
   """Maximum value in a tensor.
 
@@ -1627,7 +1627,7 @@ def max(x, axis=None, keepdims=False):
   return math_ops.reduce_max(x, axis, keepdims)
 
 
-@tf_export('keras.backend.min')
+@keras_export('keras.backend.min')
 def min(x, axis=None, keepdims=False):
   """Minimum value in a tensor.
 
@@ -1645,7 +1645,7 @@ def min(x, axis=None, keepdims=False):
   return math_ops.reduce_min(x, axis, keepdims)
 
 
-@tf_export('keras.backend.sum')
+@keras_export('keras.backend.sum')
 def sum(x, axis=None, keepdims=False):
   """Sum of the values in a tensor, alongside the specified axis.
 
@@ -1663,7 +1663,7 @@ def sum(x, axis=None, keepdims=False):
   return math_ops.reduce_sum(x, axis, keepdims)
 
 
-@tf_export('keras.backend.prod')
+@keras_export('keras.backend.prod')
 def prod(x, axis=None, keepdims=False):
   """Multiplies the values in a tensor, alongside the specified axis.
 
@@ -1707,7 +1707,7 @@ def cumprod(x, axis=0):
   return math_ops.cumprod(x, axis=axis)
 
 
-@tf_export('keras.backend.var')
+@keras_export('keras.backend.var')
 def var(x, axis=None, keepdims=False):
   """Variance of a tensor, alongside the specified axis.
 
@@ -1727,7 +1727,7 @@ def var(x, axis=None, keepdims=False):
   return math_ops.reduce_variance(x, axis=axis, keepdims=keepdims)
 
 
-@tf_export('keras.backend.std')
+@keras_export('keras.backend.std')
 def std(x, axis=None, keepdims=False):
   """Standard deviation of a tensor, alongside the specified axis.
 
@@ -1747,7 +1747,7 @@ def std(x, axis=None, keepdims=False):
   return math_ops.reduce_std(x, axis=axis, keepdims=keepdims)
 
 
-@tf_export('keras.backend.mean')
+@keras_export('keras.backend.mean')
 def mean(x, axis=None, keepdims=False):
   """Mean of a tensor, alongside the specified axis.
 
@@ -1767,7 +1767,7 @@ def mean(x, axis=None, keepdims=False):
   return math_ops.reduce_mean(x, axis, keepdims)
 
 
-@tf_export('keras.backend.any')
+@keras_export('keras.backend.any')
 def any(x, axis=None, keepdims=False):
   """Bitwise reduction (logical OR).
 
@@ -1783,7 +1783,7 @@ def any(x, axis=None, keepdims=False):
   return math_ops.reduce_any(x, axis, keepdims)
 
 
-@tf_export('keras.backend.all')
+@keras_export('keras.backend.all')
 def all(x, axis=None, keepdims=False):
   """Bitwise reduction (logical AND).
 
@@ -1799,7 +1799,7 @@ def all(x, axis=None, keepdims=False):
   return math_ops.reduce_all(x, axis, keepdims)
 
 
-@tf_export('keras.backend.argmax')
+@keras_export('keras.backend.argmax')
 def argmax(x, axis=-1):
   """Returns the index of the maximum value along an axis.
 
@@ -1813,7 +1813,7 @@ def argmax(x, axis=-1):
   return math_ops.argmax(x, axis)
 
 
-@tf_export('keras.backend.argmin')
+@keras_export('keras.backend.argmin')
 def argmin(x, axis=-1):
   """Returns the index of the minimum value along an axis.
 
@@ -1827,7 +1827,7 @@ def argmin(x, axis=-1):
   return math_ops.argmin(x, axis)
 
 
-@tf_export('keras.backend.square')
+@keras_export('keras.backend.square')
 def square(x):
   """Element-wise square.
 
@@ -1840,7 +1840,7 @@ def square(x):
   return math_ops.square(x)
 
 
-@tf_export('keras.backend.abs')
+@keras_export('keras.backend.abs')
 def abs(x):
   """Element-wise absolute value.
 
@@ -1853,7 +1853,7 @@ def abs(x):
   return math_ops.abs(x)
 
 
-@tf_export('keras.backend.sqrt')
+@keras_export('keras.backend.sqrt')
 def sqrt(x):
   """Element-wise square root.
 
@@ -1869,7 +1869,7 @@ def sqrt(x):
   return math_ops.sqrt(x)
 
 
-@tf_export('keras.backend.exp')
+@keras_export('keras.backend.exp')
 def exp(x):
   """Element-wise exponential.
 
@@ -1882,7 +1882,7 @@ def exp(x):
   return math_ops.exp(x)
 
 
-@tf_export('keras.backend.log')
+@keras_export('keras.backend.log')
 def log(x):
   """Element-wise log.
 
@@ -1916,7 +1916,7 @@ def logsumexp(x, axis=None, keepdims=False):
   return math_ops.reduce_logsumexp(x, axis, keepdims)
 
 
-@tf_export('keras.backend.round')
+@keras_export('keras.backend.round')
 def round(x):
   """Element-wise rounding to the closest integer.
 
@@ -1931,7 +1931,7 @@ def round(x):
   return math_ops.round(x)
 
 
-@tf_export('keras.backend.sign')
+@keras_export('keras.backend.sign')
 def sign(x):
   """Element-wise sign.
 
@@ -1944,7 +1944,7 @@ def sign(x):
   return math_ops.sign(x)
 
 
-@tf_export('keras.backend.pow')
+@keras_export('keras.backend.pow')
 def pow(x, a):
   """Element-wise exponentiation.
 
@@ -1958,7 +1958,7 @@ def pow(x, a):
   return math_ops.pow(x, a)
 
 
-@tf_export('keras.backend.clip')
+@keras_export('keras.backend.clip')
 def clip(x, min_value, max_value):
   """Element-wise value clipping.
 
@@ -1979,7 +1979,7 @@ def clip(x, min_value, max_value):
   return clip_ops.clip_by_value(x, min_value, max_value)
 
 
-@tf_export('keras.backend.equal')
+@keras_export('keras.backend.equal')
 def equal(x, y):
   """Element-wise equality between two tensors.
 
@@ -1993,7 +1993,7 @@ def equal(x, y):
   return math_ops.equal(x, y)
 
 
-@tf_export('keras.backend.not_equal')
+@keras_export('keras.backend.not_equal')
 def not_equal(x, y):
   """Element-wise inequality between two tensors.
 
@@ -2007,7 +2007,7 @@ def not_equal(x, y):
   return math_ops.not_equal(x, y)
 
 
-@tf_export('keras.backend.greater')
+@keras_export('keras.backend.greater')
 def greater(x, y):
   """Element-wise truth value of (x > y).
 
@@ -2021,7 +2021,7 @@ def greater(x, y):
   return math_ops.greater(x, y)
 
 
-@tf_export('keras.backend.greater_equal')
+@keras_export('keras.backend.greater_equal')
 def greater_equal(x, y):
   """Element-wise truth value of (x >= y).
 
@@ -2035,7 +2035,7 @@ def greater_equal(x, y):
   return math_ops.greater_equal(x, y)
 
 
-@tf_export('keras.backend.less')
+@keras_export('keras.backend.less')
 def less(x, y):
   """Element-wise truth value of (x < y).
 
@@ -2049,7 +2049,7 @@ def less(x, y):
   return math_ops.less(x, y)
 
 
-@tf_export('keras.backend.less_equal')
+@keras_export('keras.backend.less_equal')
 def less_equal(x, y):
   """Element-wise truth value of (x <= y).
 
@@ -2063,7 +2063,7 @@ def less_equal(x, y):
   return math_ops.less_equal(x, y)
 
 
-@tf_export('keras.backend.maximum')
+@keras_export('keras.backend.maximum')
 def maximum(x, y):
   """Element-wise maximum of two tensors.
 
@@ -2077,7 +2077,7 @@ def maximum(x, y):
   return math_ops.maximum(x, y)
 
 
-@tf_export('keras.backend.minimum')
+@keras_export('keras.backend.minimum')
 def minimum(x, y):
   """Element-wise minimum of two tensors.
 
@@ -2091,7 +2091,7 @@ def minimum(x, y):
   return math_ops.minimum(x, y)
 
 
-@tf_export('keras.backend.sin')
+@keras_export('keras.backend.sin')
 def sin(x):
   """Computes sin of x element-wise.
 
@@ -2104,7 +2104,7 @@ def sin(x):
   return math_ops.sin(x)
 
 
-@tf_export('keras.backend.cos')
+@keras_export('keras.backend.cos')
 def cos(x):
   """Computes cos of x element-wise.
 
@@ -2219,7 +2219,7 @@ def _fused_normalize_batch_in_training(x,
       x, gamma, beta, epsilon=epsilon, data_format=tf_data_format)
 
 
-@tf_export('keras.backend.normalize_batch_in_training')
+@keras_export('keras.backend.normalize_batch_in_training')
 def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3):
   """Computes mean and std for batch then apply batch_normalization on batch.
 
@@ -2249,7 +2249,7 @@ def normalize_batch_in_training(x, gamma, beta, reduction_axes, epsilon=1e-3):
           x, gamma, beta, reduction_axes, epsilon=epsilon)
 
 
-@tf_export('keras.backend.batch_normalization')
+@keras_export('keras.backend.batch_normalization')
 def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3):
   """Applies batch normalization on x given mean, var, beta and gamma.
 
@@ -2311,7 +2311,7 @@ def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3):
 # SHAPE OPERATIONS
 
 
-@tf_export('keras.backend.concatenate')
+@keras_export('keras.backend.concatenate')
 def concatenate(tensors, axis=-1):
   """Concatenates a list of tensors alongside the specified axis.
 
@@ -2335,7 +2335,7 @@ def concatenate(tensors, axis=-1):
     return array_ops.concat([to_dense(x) for x in tensors], axis)
 
 
-@tf_export('keras.backend.reshape')
+@keras_export('keras.backend.reshape')
 def reshape(x, shape):
   """Reshapes a tensor to the specified shape.
 
@@ -2349,7 +2349,7 @@ def reshape(x, shape):
   return array_ops.reshape(x, shape)
 
 
-@tf_export('keras.backend.permute_dimensions')
+@keras_export('keras.backend.permute_dimensions')
 def permute_dimensions(x, pattern):
   """Permutes axes in a tensor.
 
@@ -2364,7 +2364,7 @@ def permute_dimensions(x, pattern):
   return array_ops.transpose(x, perm=pattern)
 
 
-@tf_export('keras.backend.resize_images')
+@keras_export('keras.backend.resize_images')
 def resize_images(x, height_factor, width_factor, data_format,
                   interpolation='nearest'):
   """Resizes the images contained in a 4D tensor.
@@ -2425,7 +2425,7 @@ def resize_images(x, height_factor, width_factor, data_format,
   return x
 
 
-@tf_export('keras.backend.resize_volumes')
+@keras_export('keras.backend.resize_volumes')
 def resize_volumes(x, depth_factor, height_factor, width_factor, data_format):
   """Resizes the volume contained in a 5D tensor.
 
@@ -2457,7 +2457,7 @@ def resize_volumes(x, depth_factor, height_factor, width_factor, data_format):
     raise ValueError('Invalid data_format: ' + str(data_format))
 
 
-@tf_export('keras.backend.repeat_elements')
+@keras_export('keras.backend.repeat_elements')
 def repeat_elements(x, rep, axis):
   """Repeats the elements of a tensor along an axis, like `np.repeat`.
 
@@ -2510,7 +2510,7 @@ def repeat_elements(x, rep, axis):
   return x_rep
 
 
-@tf_export('keras.backend.repeat')
+@keras_export('keras.backend.repeat')
 def repeat(x, n):
   """Repeats a 2D tensor.
 
@@ -2530,7 +2530,7 @@ def repeat(x, n):
   return array_ops.tile(x, pattern)
 
 
-@tf_export('keras.backend.arange')
+@keras_export('keras.backend.arange')
 def arange(start, stop=None, step=1, dtype='int32'):
   """Creates a 1D tensor containing a sequence of integers.
 
@@ -2560,7 +2560,7 @@ def arange(start, stop=None, step=1, dtype='int32'):
   return result
 
 
-@tf_export('keras.backend.tile')
+@keras_export('keras.backend.tile')
 def tile(x, n):
   """Creates a tensor by tiling `x` by `n`.
 
@@ -2577,7 +2577,7 @@ def tile(x, n):
   return array_ops.tile(x, n)
 
 
-@tf_export('keras.backend.flatten')
+@keras_export('keras.backend.flatten')
 def flatten(x):
   """Flatten a tensor.
 
@@ -2590,7 +2590,7 @@ def flatten(x):
   return array_ops.reshape(x, [-1])
 
 
-@tf_export('keras.backend.batch_flatten')
+@keras_export('keras.backend.batch_flatten')
 def batch_flatten(x):
   """Turn a nD tensor into a 2D tensor with same 0th dimension.
 
@@ -2606,7 +2606,7 @@ def batch_flatten(x):
   return x
 
 
-@tf_export('keras.backend.expand_dims')
+@keras_export('keras.backend.expand_dims')
 def expand_dims(x, axis=-1):
   """Adds a 1-sized dimension at index "axis".
 
@@ -2620,7 +2620,7 @@ def expand_dims(x, axis=-1):
   return array_ops.expand_dims(x, axis)
 
 
-@tf_export('keras.backend.squeeze')
+@keras_export('keras.backend.squeeze')
 def squeeze(x, axis):
   """Removes a 1-dimension from the tensor at index "axis".
 
@@ -2634,7 +2634,7 @@ def squeeze(x, axis):
   return array_ops.squeeze(x, [axis])
 
 
-@tf_export('keras.backend.temporal_padding')
+@keras_export('keras.backend.temporal_padding')
 def temporal_padding(x, padding=(1, 1)):
   """Pads the middle dimension of a 3D tensor.
 
@@ -2651,7 +2651,7 @@ def temporal_padding(x, padding=(1, 1)):
   return array_ops.pad(x, pattern)
 
 
-@tf_export('keras.backend.spatial_2d_padding')
+@keras_export('keras.backend.spatial_2d_padding')
 def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None):
   """Pads the 2nd and 3rd dimensions of a 4D tensor.
 
@@ -2682,7 +2682,7 @@ def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None):
   return array_ops.pad(x, pattern)
 
 
-@tf_export('keras.backend.spatial_3d_padding')
+@keras_export('keras.backend.spatial_3d_padding')
 def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None):
   """Pads 5D tensor with zeros along the depth, height, width dimensions.
 
@@ -2726,7 +2726,7 @@ def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None):
   return array_ops.pad(x, pattern)
 
 
-@tf_export('keras.backend.stack')
+@keras_export('keras.backend.stack')
 def stack(x, axis=0):
   """Stacks a list of rank `R` tensors into a rank `R+1` tensor.
 
@@ -2740,7 +2740,7 @@ def stack(x, axis=0):
   return array_ops.stack(x, axis=axis)
 
 
-@tf_export('keras.backend.one_hot')
+@keras_export('keras.backend.one_hot')
 def one_hot(indices, num_classes):
   """Computes the one-hot representation of an integer tensor.
 
@@ -2759,7 +2759,7 @@ def one_hot(indices, num_classes):
   return array_ops.one_hot(indices, depth=num_classes, axis=-1)
 
 
-@tf_export('keras.backend.reverse')
+@keras_export('keras.backend.reverse')
 def reverse(x, axes):
   """Reverse a tensor along the specified axes.
 
@@ -2779,7 +2779,7 @@ def reverse(x, axes):
 # VALUE MANIPULATION
 
 
-@tf_export('keras.backend.get_value')
+@keras_export('keras.backend.get_value')
 def get_value(x):
   """Returns the value of a variable.
 
@@ -2799,7 +2799,7 @@ def get_value(x):
   return x.eval(session=get_session())
 
 
-@tf_export('keras.backend.batch_get_value')
+@keras_export('keras.backend.batch_get_value')
 def batch_get_value(tensors):
   """Returns the value of more than one tensor variable.
 
@@ -2822,7 +2822,7 @@ def batch_get_value(tensors):
     return []
 
 
-@tf_export('keras.backend.set_value')
+@keras_export('keras.backend.set_value')
 def set_value(x, value):
   """Sets the value of a variable, from a Numpy array.
 
@@ -2848,7 +2848,7 @@ def set_value(x, value):
       get_session().run(assign_op, feed_dict={assign_placeholder: value})
 
 
-@tf_export('keras.backend.batch_set_value')
+@keras_export('keras.backend.batch_set_value')
 def batch_set_value(tuples):
   """Sets the values of many tensor variables at once.
 
@@ -2881,7 +2881,7 @@ def batch_set_value(tuples):
         get_session().run(assign_ops, feed_dict=feed_dict)
 
 
-@tf_export('keras.backend.print_tensor')
+@keras_export('keras.backend.print_tensor')
 def print_tensor(x, message=''):
   """Prints `message` and the tensor value when evaluated.
 
@@ -3169,7 +3169,7 @@ class EagerExecutionFunction(object):
                                  [x.numpy() for x in outputs])
 
 
-@tf_export('keras.backend.function')
+@keras_export('keras.backend.function')
 def function(inputs, outputs, updates=None, name=None, **kwargs):
   """Instantiates a Keras function.
 
@@ -3202,7 +3202,7 @@ def function(inputs, outputs, updates=None, name=None, **kwargs):
   return GraphExecutionFunction(inputs, outputs, updates=updates, **kwargs)
 
 
-@tf_export('keras.backend.gradients')
+@keras_export('keras.backend.gradients')
 def gradients(loss, variables):
   """Returns the gradients of `loss` w.r.t. `variables`.
 
@@ -3217,7 +3217,7 @@ def gradients(loss, variables):
       loss, variables, colocate_gradients_with_ops=True)
 
 
-@tf_export('keras.backend.stop_gradient')
+@keras_export('keras.backend.stop_gradient')
 def stop_gradient(variables):
   """Returns `variables` but with zero gradient w.r.t. every other variable.
 
@@ -3238,7 +3238,7 @@ def stop_gradient(variables):
 # CONTROL FLOW
 
 
-@tf_export('keras.backend.rnn')
+@keras_export('keras.backend.rnn')
 def rnn(step_function,
         inputs,
         initial_states,
@@ -3596,7 +3596,7 @@ def rnn(step_function,
   return last_output, outputs, new_states
 
 
-@tf_export('keras.backend.switch')
+@keras_export('keras.backend.switch')
 def switch(condition, then_expression, else_expression):
   """Switches between two operations depending on a scalar value.
 
@@ -3660,7 +3660,7 @@ def switch(condition, then_expression, else_expression):
   return x
 
 
-@tf_export('keras.backend.in_train_phase')
+@keras_export('keras.backend.in_train_phase')
 def in_train_phase(x, alt, training=None):
   """Selects `x` in train phase, and `alt` otherwise.
 
@@ -3699,7 +3699,7 @@ def in_train_phase(x, alt, training=None):
   return x
 
 
-@tf_export('keras.backend.in_test_phase')
+@keras_export('keras.backend.in_test_phase')
 def in_test_phase(x, alt, training=None):
   """Selects `x` in test phase, and `alt` otherwise.
 
@@ -3723,7 +3723,7 @@ def in_test_phase(x, alt, training=None):
 # NN OPERATIONS
 
 
-@tf_export('keras.backend.relu')
+@keras_export('keras.backend.relu')
 def relu(x, alpha=0., max_value=None, threshold=0):
   """Rectified linear unit.
 
@@ -3776,7 +3776,7 @@ def relu(x, alpha=0., max_value=None, threshold=0):
   return x
 
 
-@tf_export('keras.backend.elu')
+@keras_export('keras.backend.elu')
 def elu(x, alpha=1.):
   """Exponential linear unit.
 
@@ -3794,7 +3794,7 @@ def elu(x, alpha=1.):
     return array_ops.where(x > 0, res, alpha * res)
 
 
-@tf_export('keras.backend.softmax')
+@keras_export('keras.backend.softmax')
 def softmax(x, axis=-1):
   """Softmax of a tensor.
 
@@ -3809,7 +3809,7 @@ def softmax(x, axis=-1):
   return nn.softmax(x, axis=axis)
 
 
-@tf_export('keras.backend.softplus')
+@keras_export('keras.backend.softplus')
 def softplus(x):
   """Softplus of a tensor.
 
@@ -3822,7 +3822,7 @@ def softplus(x):
   return nn.softplus(x)
 
 
-@tf_export('keras.backend.softsign')
+@keras_export('keras.backend.softsign')
 def softsign(x):
   """Softsign of a tensor.
 
@@ -3835,7 +3835,7 @@ def softsign(x):
   return nn.softsign(x)
 
 
-@tf_export('keras.backend.categorical_crossentropy')
+@keras_export('keras.backend.categorical_crossentropy')
 def categorical_crossentropy(target, output, from_logits=False, axis=-1):
   """Categorical crossentropy between an output tensor and a target tensor.
 
@@ -3875,7 +3875,7 @@ def categorical_crossentropy(target, output, from_logits=False, axis=-1):
   return nn.softmax_cross_entropy_with_logits_v2(labels=target, logits=output)
 
 
-@tf_export('keras.backend.sparse_categorical_crossentropy')
+@keras_export('keras.backend.sparse_categorical_crossentropy')
 def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1):
   """Categorical crossentropy with integer targets.
 
@@ -3927,7 +3927,7 @@ def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1):
     return res
 
 
-@tf_export('keras.backend.binary_crossentropy')
+@keras_export('keras.backend.binary_crossentropy')
 def binary_crossentropy(target, output, from_logits=False):
   """Binary crossentropy between an output tensor and a target tensor.
 
@@ -3956,7 +3956,7 @@ def binary_crossentropy(target, output, from_logits=False):
   return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
 
 
-@tf_export('keras.backend.sigmoid')
+@keras_export('keras.backend.sigmoid')
 def sigmoid(x):
   """Element-wise sigmoid.
 
@@ -3969,7 +3969,7 @@ def sigmoid(x):
   return nn.sigmoid(x)
 
 
-@tf_export('keras.backend.hard_sigmoid')
+@keras_export('keras.backend.hard_sigmoid')
 def hard_sigmoid(x):
   """Segment-wise linear approximation of sigmoid.
 
@@ -3990,7 +3990,7 @@ def hard_sigmoid(x):
   return x
 
 
-@tf_export('keras.backend.tanh')
+@keras_export('keras.backend.tanh')
 def tanh(x):
   """Element-wise tanh.
 
@@ -4003,7 +4003,7 @@ def tanh(x):
   return nn.tanh(x)
 
 
-@tf_export('keras.backend.dropout')
+@keras_export('keras.backend.dropout')
 def dropout(x, level, noise_shape=None, seed=None):
   """Sets entries in `x` to zero at random, while scaling the entire tensor.
 
@@ -4026,7 +4026,7 @@ def dropout(x, level, noise_shape=None, seed=None):
   return nn.dropout(x * 1., retain_prob, noise_shape, seed=seed)
 
 
-@tf_export('keras.backend.l2_normalize')
+@keras_export('keras.backend.l2_normalize')
 def l2_normalize(x, axis=None):
   """Normalizes a tensor wrt the L2 norm alongside the specified axis.
 
@@ -4040,7 +4040,7 @@ def l2_normalize(x, axis=None):
   return nn.l2_normalize(x, axis=axis)
 
 
-@tf_export('keras.backend.in_top_k')
+@keras_export('keras.backend.in_top_k')
 def in_top_k(predictions, targets, k):
   """Returns whether the `targets` are in the top `k` `predictions`.
 
@@ -4142,7 +4142,7 @@ def _preprocess_padding(padding):
   return padding
 
 
-@tf_export('keras.backend.conv1d')
+@keras_export('keras.backend.conv1d')
 def conv1d(x,
            kernel,
            strides=1,
@@ -4192,7 +4192,7 @@ def conv1d(x,
   return x
 
 
-@tf_export('keras.backend.conv2d')
+@keras_export('keras.backend.conv2d')
 def conv2d(x,
            kernel,
            strides=(1, 1),
@@ -4237,7 +4237,7 @@ def conv2d(x,
   return x
 
 
-@tf_export('keras.backend.conv2d_transpose')
+@keras_export('keras.backend.conv2d_transpose')
 def conv2d_transpose(x,
                      kernel,
                      output_shape,
@@ -4379,7 +4379,7 @@ def separable_conv1d(x,
   return x
 
 
-@tf_export('keras.backend.separable_conv2d')
+@keras_export('keras.backend.separable_conv2d')
 def separable_conv2d(x,
                      depthwise_kernel,
                      pointwise_kernel,
@@ -4483,7 +4483,7 @@ def depthwise_conv2d(x,
   return x
 
 
-@tf_export('keras.backend.conv3d')
+@keras_export('keras.backend.conv3d')
 def conv3d(x,
            kernel,
            strides=(1, 1, 1),
@@ -4589,7 +4589,7 @@ def conv3d_transpose(x,
   return x
 
 
-@tf_export('keras.backend.pool2d')
+@keras_export('keras.backend.pool2d')
 def pool2d(x,
            pool_size,
            strides=(1, 1),
@@ -4646,7 +4646,7 @@ def pool2d(x,
   return x
 
 
-@tf_export('keras.backend.pool3d')
+@keras_export('keras.backend.pool3d')
 def pool3d(x,
            pool_size,
            strides=(1, 1, 1),
@@ -4850,7 +4850,7 @@ def local_conv2d(inputs,
                     data_format)
 
 
-@tf_export('keras.backend.bias_add')
+@keras_export('keras.backend.bias_add')
 def bias_add(x, bias, data_format=None):
   """Adds a bias vector to a tensor.
 
@@ -4924,7 +4924,7 @@ def bias_add(x, bias, data_format=None):
 # RANDOMNESS
 
 
-@tf_export('keras.backend.random_normal')
+@keras_export('keras.backend.random_normal')
 def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
   """Returns a tensor with normal distribution of values.
 
@@ -4947,7 +4947,7 @@ def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
       shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed)
 
 
-@tf_export('keras.backend.random_uniform')
+@keras_export('keras.backend.random_uniform')
 def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
   """Returns a tensor with uniform distribution of values.
 
@@ -4971,7 +4971,7 @@ def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
       shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed)
 
 
-@tf_export('keras.backend.random_binomial')
+@keras_export('keras.backend.random_binomial')
 def random_binomial(shape, p=0.0, dtype=None, seed=None):
   """Returns a tensor with random binomial distribution of values.
 
@@ -4993,7 +4993,7 @@ def random_binomial(shape, p=0.0, dtype=None, seed=None):
       array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
 
 
-@tf_export('keras.backend.truncated_normal')
+@keras_export('keras.backend.truncated_normal')
 def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
   """Returns a tensor with truncated random normal distribution of values.
 
@@ -5027,7 +5027,7 @@ def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
 # in TensorFlow's CTC implementation
 
 
-@tf_export('keras.backend.ctc_label_dense_to_sparse')
+@keras_export('keras.backend.ctc_label_dense_to_sparse')
 def ctc_label_dense_to_sparse(labels, label_lengths):
   """Converts CTC labels from dense to sparse.
 
@@ -5072,7 +5072,7 @@ def ctc_label_dense_to_sparse(labels, label_lengths):
       math_ops.to_int64(indices), vals_sparse, math_ops.to_int64(label_shape))
 
 
-@tf_export('keras.backend.ctc_batch_cost')
+@keras_export('keras.backend.ctc_batch_cost')
 def ctc_batch_cost(y_true, y_pred, input_length, label_length):
   """Runs CTC loss algorithm on each batch element.
 
@@ -5102,7 +5102,7 @@ def ctc_batch_cost(y_true, y_pred, input_length, label_length):
           inputs=y_pred, labels=sparse_labels, sequence_length=input_length), 1)
 
 
-@tf_export('keras.backend.ctc_decode')
+@keras_export('keras.backend.ctc_decode')
 def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
   """Decodes the output of a softmax.
 
@@ -5154,7 +5154,7 @@ def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
 # HIGH ORDER FUNCTIONS
 
 
-@tf_export('keras.backend.map_fn')
+@keras_export('keras.backend.map_fn')
 def map_fn(fn, elems, name=None, dtype=None):
   """Map the function fn over the elements elems and return the outputs.
 
@@ -5170,7 +5170,7 @@ def map_fn(fn, elems, name=None, dtype=None):
   return functional_ops.map_fn(fn, elems, name=name, dtype=dtype)
 
 
-@tf_export('keras.backend.foldl')
+@keras_export('keras.backend.foldl')
 def foldl(fn, elems, initializer=None, name=None):
   """Reduce elems using fn to combine them from left to right.
 
@@ -5187,7 +5187,7 @@ def foldl(fn, elems, initializer=None, name=None):
   return functional_ops.foldl(fn, elems, initializer=initializer, name=name)
 
 
-@tf_export('keras.backend.foldr')
+@keras_export('keras.backend.foldr')
 def foldr(fn, elems, initializer=None, name=None):
   """Reduce elems using fn to combine them from right to left.
 
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index bfebc788f2..a8fb87f5cc 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -43,7 +43,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary as tf_summary
 from tensorflow.python.training import saver
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 try:
   import requests
@@ -393,7 +393,7 @@ class CallbackList(object):
     return iter(self.callbacks)
 
 
-@tf_export('keras.callbacks.Callback')
+@keras_export('keras.callbacks.Callback')
 class Callback(object):
   """Abstract base class used to build new callbacks.
 
@@ -596,7 +596,7 @@ class Callback(object):
     """
 
 
-@tf_export('keras.callbacks.BaseLogger')
+@keras_export('keras.callbacks.BaseLogger')
 class BaseLogger(Callback):
   """Callback that accumulates epoch averages of metrics.
 
@@ -645,7 +645,7 @@ class BaseLogger(Callback):
             logs[k] = self.totals[k] / self.seen
 
 
-@tf_export('keras.callbacks.TerminateOnNaN')
+@keras_export('keras.callbacks.TerminateOnNaN')
 class TerminateOnNaN(Callback):
   """Callback that terminates training when a NaN loss is encountered.
   """
@@ -659,7 +659,7 @@ class TerminateOnNaN(Callback):
         self.model.stop_training = True
 
 
-@tf_export('keras.callbacks.ProgbarLogger')
+@keras_export('keras.callbacks.ProgbarLogger')
 class ProgbarLogger(Callback):
   """Callback that prints metrics to stdout.
 
@@ -739,7 +739,7 @@ class ProgbarLogger(Callback):
       self.progbar.update(self.seen, self.log_values)
 
 
-@tf_export('keras.callbacks.History')
+@keras_export('keras.callbacks.History')
 class History(Callback):
   """Callback that records events into a `History` object.
 
@@ -759,7 +759,7 @@ class History(Callback):
       self.history.setdefault(k, []).append(v)
 
 
-@tf_export('keras.callbacks.ModelCheckpoint')
+@keras_export('keras.callbacks.ModelCheckpoint')
 class ModelCheckpoint(Callback):
   """Save the model after every epoch.
 
@@ -863,7 +863,7 @@ class ModelCheckpoint(Callback):
           self.model.save(filepath, overwrite=True)
 
 
-@tf_export('keras.callbacks.EarlyStopping')
+@keras_export('keras.callbacks.EarlyStopping')
 class EarlyStopping(Callback):
   """Stop training when a monitored quantity has stopped improving.
 
@@ -974,7 +974,7 @@ class EarlyStopping(Callback):
     return monitor_value
 
 
-@tf_export('keras.callbacks.RemoteMonitor')
+@keras_export('keras.callbacks.RemoteMonitor')
 class RemoteMonitor(Callback):
   """Callback used to stream events to a server.
 
@@ -1030,7 +1030,7 @@ class RemoteMonitor(Callback):
                       'root server at ' + str(self.root))
 
 
-@tf_export('keras.callbacks.LearningRateScheduler')
+@keras_export('keras.callbacks.LearningRateScheduler')
 class LearningRateScheduler(Callback):
   """Learning rate scheduler.
 
@@ -1067,7 +1067,7 @@ class LearningRateScheduler(Callback):
     logs['lr'] = K.get_value(self.model.optimizer.lr)
 
 
-@tf_export('keras.callbacks.TensorBoard')
+@keras_export('keras.callbacks.TensorBoard')
 class TensorBoard(Callback):
   # pylint: disable=line-too-long
   """Tensorboard basic visualizations.
@@ -1450,7 +1450,7 @@ class TensorBoard(Callback):
     self.writer.close()
 
 
-@tf_export('keras.callbacks.ReduceLROnPlateau')
+@keras_export('keras.callbacks.ReduceLROnPlateau')
 class ReduceLROnPlateau(Callback):
   """Reduce learning rate when a metric has stopped improving.
 
@@ -1575,7 +1575,7 @@ class ReduceLROnPlateau(Callback):
     return self.cooldown_counter > 0
 
 
-@tf_export('keras.callbacks.CSVLogger')
+@keras_export('keras.callbacks.CSVLogger')
 class CSVLogger(Callback):
   """Callback that streams epoch results to a csv file.
 
@@ -1668,7 +1668,7 @@ class CSVLogger(Callback):
     self.writer = None
 
 
-@tf_export('keras.callbacks.LambdaCallback')
+@keras_export('keras.callbacks.LambdaCallback')
 class LambdaCallback(Callback):
   r"""Callback for creating simple, custom callbacks on-the-fly.
 
diff --git a/tensorflow/python/keras/constraints.py b/tensorflow/python/keras/constraints.py
index bf3a3a728a..334d072d5a 100644
--- a/tensorflow/python/keras/constraints.py
+++ b/tensorflow/python/keras/constraints.py
@@ -25,10 +25,10 @@ from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
 from tensorflow.python.ops import math_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.constraints.Constraint')
+@keras_export('keras.constraints.Constraint')
 class Constraint(object):
 
   def __call__(self, w):
@@ -38,7 +38,7 @@ class Constraint(object):
     return {}
 
 
-@tf_export('keras.constraints.MaxNorm', 'keras.constraints.max_norm')
+@keras_export('keras.constraints.MaxNorm', 'keras.constraints.max_norm')
 class MaxNorm(Constraint):
   """MaxNorm weight constraint.
 
@@ -75,7 +75,7 @@ class MaxNorm(Constraint):
     return {'max_value': self.max_value, 'axis': self.axis}
 
 
-@tf_export('keras.constraints.NonNeg', 'keras.constraints.non_neg')
+@keras_export('keras.constraints.NonNeg', 'keras.constraints.non_neg')
 class NonNeg(Constraint):
   """Constrains the weights to be non-negative.
   """
@@ -84,7 +84,7 @@ class NonNeg(Constraint):
     return w * math_ops.cast(math_ops.greater_equal(w, 0.), K.floatx())
 
 
-@tf_export('keras.constraints.UnitNorm', 'keras.constraints.unit_norm')
+@keras_export('keras.constraints.UnitNorm', 'keras.constraints.unit_norm')
 class UnitNorm(Constraint):
   """Constrains the weights incident to each hidden unit to have unit norm.
 
@@ -115,7 +115,7 @@ class UnitNorm(Constraint):
     return {'axis': self.axis}
 
 
-@tf_export('keras.constraints.MinMaxNorm', 'keras.constraints.min_max_norm')
+@keras_export('keras.constraints.MinMaxNorm', 'keras.constraints.min_max_norm')
 class MinMaxNorm(Constraint):
   """MinMaxNorm weight constraint.
 
@@ -181,12 +181,12 @@ nonneg = non_neg
 unitnorm = unit_norm
 
 
-@tf_export('keras.constraints.serialize')
+@keras_export('keras.constraints.serialize')
 def serialize(constraint):
   return serialize_keras_object(constraint)
 
 
-@tf_export('keras.constraints.deserialize')
+@keras_export('keras.constraints.deserialize')
 def deserialize(config, custom_objects=None):
   return deserialize_keras_object(
       config,
@@ -195,7 +195,7 @@ def deserialize(config, custom_objects=None):
       printable_module_name='constraint')
 
 
-@tf_export('keras.constraints.get')
+@keras_export('keras.constraints.get')
 def get(identifier):
   if identifier is None:
     return None
diff --git a/tensorflow/python/keras/datasets/boston_housing.py b/tensorflow/python/keras/datasets/boston_housing.py
index eeb7cbc44a..cf1a1097bb 100644
--- a/tensorflow/python/keras/datasets/boston_housing.py
+++ b/tensorflow/python/keras/datasets/boston_housing.py
@@ -21,10 +21,10 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.keras.utils.data_utils import get_file
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.datasets.boston_housing.load_data')
+@keras_export('keras.datasets.boston_housing.load_data')
 def load_data(path='boston_housing.npz', test_split=0.2, seed=113):
   """Loads the Boston Housing dataset.
 
diff --git a/tensorflow/python/keras/datasets/cifar10.py b/tensorflow/python/keras/datasets/cifar10.py
index d627160875..36e1b83c10 100644
--- a/tensorflow/python/keras/datasets/cifar10.py
+++ b/tensorflow/python/keras/datasets/cifar10.py
@@ -25,10 +25,10 @@ import numpy as np
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.datasets.cifar import load_batch
 from tensorflow.python.keras.utils.data_utils import get_file
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.datasets.cifar10.load_data')
+@keras_export('keras.datasets.cifar10.load_data')
 def load_data():
   """Loads CIFAR10 dataset.
 
diff --git a/tensorflow/python/keras/datasets/cifar100.py b/tensorflow/python/keras/datasets/cifar100.py
index e9a6d634a5..ee58d46228 100644
--- a/tensorflow/python/keras/datasets/cifar100.py
+++ b/tensorflow/python/keras/datasets/cifar100.py
@@ -25,10 +25,10 @@ import numpy as np
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.datasets.cifar import load_batch
 from tensorflow.python.keras.utils.data_utils import get_file
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.datasets.cifar100.load_data')
+@keras_export('keras.datasets.cifar100.load_data')
 def load_data(label_mode='fine'):
   """Loads CIFAR100 dataset.
 
diff --git a/tensorflow/python/keras/datasets/fashion_mnist.py b/tensorflow/python/keras/datasets/fashion_mnist.py
index 3f4c6c7413..5e73635a3c 100644
--- a/tensorflow/python/keras/datasets/fashion_mnist.py
+++ b/tensorflow/python/keras/datasets/fashion_mnist.py
@@ -24,10 +24,10 @@ import os
 import numpy as np
 
 from tensorflow.python.keras.utils.data_utils import get_file
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.datasets.fashion_mnist.load_data')
+@keras_export('keras.datasets.fashion_mnist.load_data')
 def load_data():
   """Loads the Fashion-MNIST dataset.
 
diff --git a/tensorflow/python/keras/datasets/imdb.py b/tensorflow/python/keras/datasets/imdb.py
index b73b024162..022a9b7fc1 100644
--- a/tensorflow/python/keras/datasets/imdb.py
+++ b/tensorflow/python/keras/datasets/imdb.py
@@ -25,10 +25,10 @@ import numpy as np
 from tensorflow.python.keras.preprocessing.sequence import _remove_long_seq
 from tensorflow.python.keras.utils.data_utils import get_file
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.datasets.imdb.load_data')
+@keras_export('keras.datasets.imdb.load_data')
 def load_data(path='imdb.npz',
               num_words=None,
               skip_top=0,
@@ -131,7 +131,7 @@ def load_data(path='imdb.npz',
   return (x_train, y_train), (x_test, y_test)
 
 
-@tf_export('keras.datasets.imdb.get_word_index')
+@keras_export('keras.datasets.imdb.get_word_index')
 def get_word_index(path='imdb_word_index.json'):
   """Retrieves the dictionary mapping word indices back to words.
 
diff --git a/tensorflow/python/keras/datasets/mnist.py b/tensorflow/python/keras/datasets/mnist.py
index a96b581960..bad41a5164 100644
--- a/tensorflow/python/keras/datasets/mnist.py
+++ b/tensorflow/python/keras/datasets/mnist.py
@@ -21,10 +21,10 @@ from __future__ import print_function
 import numpy as np
 
 from tensorflow.python.keras.utils.data_utils import get_file
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.datasets.mnist.load_data')
+@keras_export('keras.datasets.mnist.load_data')
 def load_data(path='mnist.npz'):
   """Loads the MNIST dataset.
 
diff --git a/tensorflow/python/keras/datasets/reuters.py b/tensorflow/python/keras/datasets/reuters.py
index cb796bb06c..0daa1c2306 100644
--- a/tensorflow/python/keras/datasets/reuters.py
+++ b/tensorflow/python/keras/datasets/reuters.py
@@ -25,10 +25,10 @@ import numpy as np
 from tensorflow.python.keras.preprocessing.sequence import _remove_long_seq
 from tensorflow.python.keras.utils.data_utils import get_file
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.datasets.reuters.load_data')
+@keras_export('keras.datasets.reuters.load_data')
 def load_data(path='reuters.npz',
               num_words=None,
               skip_top=0,
@@ -115,7 +115,7 @@ def load_data(path='reuters.npz',
   return (x_train, y_train), (x_test, y_test)
 
 
-@tf_export('keras.datasets.reuters.get_word_index')
+@keras_export('keras.datasets.reuters.get_word_index')
 def get_word_index(path='reuters_word_index.json'):
   """Retrieves the dictionary mapping word indices back to words.
 
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 80ae99475e..8aa0bac8cb 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -51,11 +51,11 @@ from tensorflow.python.util import function_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 from tensorflow.tools.docs import doc_controls
 
 
-@tf_export('keras.layers.Layer')
+@keras_export('keras.layers.Layer')
 class Layer(checkpointable.CheckpointableBase):
   """Base layer class.
 
diff --git a/tensorflow/python/keras/engine/input_layer.py b/tensorflow/python/keras/engine/input_layer.py
index a8303dc663..bc2cf2fb6e 100644
--- a/tensorflow/python/keras/engine/input_layer.py
+++ b/tensorflow/python/keras/engine/input_layer.py
@@ -23,10 +23,10 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import base_layer
 from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.InputLayer')
+@keras_export('keras.layers.InputLayer')
 class InputLayer(base_layer.Layer):
   """Layer to be used as an entry point into a Network (a graph of layers).
 
@@ -140,7 +140,7 @@ class InputLayer(base_layer.Layer):
     return config
 
 
-@tf_export('keras.layers.Input', 'keras.Input')
+@keras_export('keras.layers.Input', 'keras.Input')
 def Input(  # pylint: disable=invalid-name
     shape=None,
     batch_size=None,
diff --git a/tensorflow/python/keras/engine/input_spec.py b/tensorflow/python/keras/engine/input_spec.py
index 7277c16fe5..b0d1157364 100644
--- a/tensorflow/python/keras/engine/input_spec.py
+++ b/tensorflow/python/keras/engine/input_spec.py
@@ -21,11 +21,12 @@ from __future__ import print_function
 from six.moves import zip  # pylint: disable=redefined-builtin
 
 from tensorflow.python.util import nest
+from tensorflow.python.util.tf_export import keras_export
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export('keras.layers.InputSpec',
-           v1=['keras.layers.InputSpec', 'layers.InputSpec'])
+@keras_export('keras.layers.InputSpec', v1=['keras.layers.InputSpec'])
+@tf_export(v1=['layers.InputSpec'])
 class InputSpec(object):
   """Specifies the ndim, dtype and shape of every input to a layer.
 
diff --git a/tensorflow/python/keras/engine/saving.py b/tensorflow/python/keras/engine/saving.py
index 54d9e32fb2..91eba0acab 100644
--- a/tensorflow/python/keras/engine/saving.py
+++ b/tensorflow/python/keras/engine/saving.py
@@ -31,7 +31,7 @@ from tensorflow.python.keras.utils import conv_utils
 from tensorflow.python.keras.utils.io_utils import ask_to_proceed_with_overwrite
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import serialization
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 # pylint: disable=g-import-not-at-top
 try:
@@ -47,7 +47,7 @@ except ImportError:
 # pylint: enable=g-import-not-at-top
 
 
-@tf_export('keras.models.save_model')
+@keras_export('keras.models.save_model')
 def save_model(model, filepath, overwrite=True, include_optimizer=True):
   """Saves a model to a HDF5 file.
 
@@ -161,7 +161,7 @@ def save_model(model, filepath, overwrite=True, include_optimizer=True):
       f.close()
 
 
-@tf_export('keras.models.load_model')
+@keras_export('keras.models.load_model')
 def load_model(filepath, custom_objects=None, compile=True):  # pylint: disable=redefined-builtin
   """Loads a model saved via `save_model`.
 
@@ -300,7 +300,7 @@ def load_model(filepath, custom_objects=None, compile=True):  # pylint: disable=
   return model
 
 
-@tf_export('keras.models.model_from_config')
+@keras_export('keras.models.model_from_config')
 def model_from_config(config, custom_objects=None):
   """Instantiates a Keras model from its config.
 
@@ -324,7 +324,7 @@ def model_from_config(config, custom_objects=None):
   return deserialize(config, custom_objects=custom_objects)
 
 
-@tf_export('keras.models.model_from_yaml')
+@keras_export('keras.models.model_from_yaml')
 def model_from_yaml(yaml_string, custom_objects=None):
   """Parses a yaml model configuration file and returns a model instance.
 
@@ -347,7 +347,7 @@ def model_from_yaml(yaml_string, custom_objects=None):
   return deserialize(config, custom_objects=custom_objects)
 
 
-@tf_export('keras.models.model_from_json')
+@keras_export('keras.models.model_from_json')
 def model_from_json(json_string, custom_objects=None):
   """Parses a JSON model configuration file and returns a model instance.
 
diff --git a/tensorflow/python/keras/engine/sequential.py b/tensorflow/python/keras/engine/sequential.py
index 8c388cbafc..5773d6e44b 100644
--- a/tensorflow/python/keras/engine/sequential.py
+++ b/tensorflow/python/keras/engine/sequential.py
@@ -34,10 +34,10 @@ from tensorflow.python.keras.utils import layer_utils
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.models.Sequential', 'keras.Sequential')
+@keras_export('keras.models.Sequential', 'keras.Sequential')
 class Sequential(Model):
   """Linear stack of layers.
 
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index bc85cc6fd1..d591495458 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -50,10 +50,10 @@ from tensorflow.python.training import optimizer as tf_optimizer_module
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.models.Model', 'keras.Model')
+@keras_export('keras.models.Model', 'keras.Model')
 class Model(Network):
   """`Model` groups layers into an object with training and inference features.
 
diff --git a/tensorflow/python/keras/estimator/__init__.py b/tensorflow/python/keras/estimator/__init__.py
index dcd0600897..f0e04c066d 100644
--- a/tensorflow/python/keras/estimator/__init__.py
+++ b/tensorflow/python/keras/estimator/__init__.py
@@ -18,7 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 # Keras has undeclared dependency on tensorflow/estimator:estimator_py.
 # As long as you depend //third_party/py/tensorflow:tensorflow target
@@ -26,7 +26,7 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 # LINT.IfChange
-@tf_export('keras.estimator.model_to_estimator')
+@keras_export('keras.estimator.model_to_estimator')
 def model_to_estimator(
     keras_model=None,
     keras_model_path=None,
@@ -72,6 +72,6 @@ def model_to_estimator(
       model_dir=model_dir,
       config=config)
 
-# LINT.ThenChange(//third_party/tensorflow_estimator/python/estimator/keras.py)
+# LINT.ThenChange(//tensorflow_estimator/python/estimator/keras.py)
 
 
diff --git a/tensorflow/python/keras/initializers.py b/tensorflow/python/keras/initializers.py
index cac78c44ca..33415fd139 100644
--- a/tensorflow/python/keras/initializers.py
+++ b/tensorflow/python/keras/initializers.py
@@ -43,11 +43,11 @@ from tensorflow.python.ops.init_ops import TruncatedNormal as TFTruncatedNormal
 from tensorflow.python.ops.init_ops import VarianceScaling  # pylint: disable=unused-import
 from tensorflow.python.ops.init_ops import Zeros
 
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.initializers.TruncatedNormal',
-           'keras.initializers.truncated_normal')
+@keras_export('keras.initializers.TruncatedNormal',
+              'keras.initializers.truncated_normal')
 class TruncatedNormal(TFTruncatedNormal):
   """Initializer that generates a truncated normal distribution.
 
@@ -71,8 +71,8 @@ class TruncatedNormal(TFTruncatedNormal):
         mean=mean, stddev=stddev, seed=seed, dtype=dtype)
 
 
-@tf_export('keras.initializers.RandomUniform', 'keras.initializers.uniform',
-           'keras.initializers.random_uniform')
+@keras_export('keras.initializers.RandomUniform', 'keras.initializers.uniform',
+              'keras.initializers.random_uniform')
 class RandomUniform(TFRandomUniform):
   """Initializer that generates tensors with a uniform distribution.
 
@@ -92,8 +92,8 @@ class RandomUniform(TFRandomUniform):
         minval=minval, maxval=maxval, seed=seed, dtype=dtype)
 
 
-@tf_export('keras.initializers.RandomNormal', 'keras.initializers.normal',
-           'keras.initializers.random_normal')
+@keras_export('keras.initializers.RandomNormal', 'keras.initializers.normal',
+              'keras.initializers.random_normal')
 class RandomNormal(TFRandomNormal):
   """Initializer that generates tensors with a normal distribution.
 
@@ -133,12 +133,12 @@ glorot_uniform = GlorotUniform
 # Utility functions
 
 
-@tf_export('keras.initializers.serialize')
+@keras_export('keras.initializers.serialize')
 def serialize(initializer):
   return serialize_keras_object(initializer)
 
 
-@tf_export('keras.initializers.deserialize')
+@keras_export('keras.initializers.deserialize')
 def deserialize(config, custom_objects=None):
   return deserialize_keras_object(
       config,
@@ -147,7 +147,7 @@ def deserialize(config, custom_objects=None):
       printable_module_name='initializer')
 
 
-@tf_export('keras.initializers.get')
+@keras_export('keras.initializers.get')
 def get(identifier):
   if identifier is None:
     return None
diff --git a/tensorflow/python/keras/layers/advanced_activations.py b/tensorflow/python/keras/layers/advanced_activations.py
index 35ac7830b2..be1039a2ac 100644
--- a/tensorflow/python/keras/layers/advanced_activations.py
+++ b/tensorflow/python/keras/layers/advanced_activations.py
@@ -26,10 +26,10 @@ from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import math_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.LeakyReLU')
+@keras_export('keras.layers.LeakyReLU')
 class LeakyReLU(Layer):
   """Leaky version of a Rectified Linear Unit.
 
@@ -68,7 +68,7 @@ class LeakyReLU(Layer):
     return input_shape
 
 
-@tf_export('keras.layers.PReLU')
+@keras_export('keras.layers.PReLU')
 class PReLU(Layer):
   """Parametric Rectified Linear Unit.
 
@@ -166,7 +166,7 @@ class PReLU(Layer):
     return input_shape
 
 
-@tf_export('keras.layers.ELU')
+@keras_export('keras.layers.ELU')
 class ELU(Layer):
   """Exponential Linear Unit.
 
@@ -205,7 +205,7 @@ class ELU(Layer):
     return input_shape
 
 
-@tf_export('keras.layers.ThresholdedReLU')
+@keras_export('keras.layers.ThresholdedReLU')
 class ThresholdedReLU(Layer):
   """Thresholded Rectified Linear Unit.
 
@@ -245,7 +245,7 @@ class ThresholdedReLU(Layer):
     return input_shape
 
 
-@tf_export('keras.layers.Softmax')
+@keras_export('keras.layers.Softmax')
 class Softmax(Layer):
   """Softmax activation function.
 
@@ -279,7 +279,7 @@ class Softmax(Layer):
     return input_shape
 
 
-@tf_export('keras.layers.ReLU')
+@keras_export('keras.layers.ReLU')
 class ReLU(Layer):
   """Rectified Linear Unit activation function.
 
diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index 6564d6e8fd..7251a67191 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py
@@ -42,7 +42,7 @@ from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import nn_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 class Conv(Layer):
@@ -282,7 +282,7 @@ class Conv(Layer):
     return causal_padding
 
 
-@tf_export('keras.layers.Conv1D', 'keras.layers.Convolution1D')
+@keras_export('keras.layers.Conv1D', 'keras.layers.Convolution1D')
 class Conv1D(Conv):
   """1D convolution layer (e.g. temporal convolution).
 
@@ -384,7 +384,7 @@ class Conv1D(Conv):
     return super(Conv1D, self).call(inputs)
 
 
-@tf_export('keras.layers.Conv2D', 'keras.layers.Convolution2D')
+@keras_export('keras.layers.Conv2D', 'keras.layers.Convolution2D')
 class Conv2D(Conv):
   """2D convolution layer (e.g. spatial convolution over images).
 
@@ -495,7 +495,7 @@ class Conv2D(Conv):
         **kwargs)
 
 
-@tf_export('keras.layers.Conv3D', 'keras.layers.Convolution3D')
+@keras_export('keras.layers.Conv3D', 'keras.layers.Convolution3D')
 class Conv3D(Conv):
   """3D convolution layer (e.g. spatial convolution over volumes).
 
@@ -613,8 +613,8 @@ class Conv3D(Conv):
         **kwargs)
 
 
-@tf_export('keras.layers.Conv2DTranspose',
-           'keras.layers.Convolution2DTranspose')
+@keras_export('keras.layers.Conv2DTranspose',
+              'keras.layers.Convolution2DTranspose')
 class Conv2DTranspose(Conv2D):
   """Transposed convolution layer (sometimes called Deconvolution).
 
@@ -885,8 +885,8 @@ class Conv2DTranspose(Conv2D):
     return config
 
 
-@tf_export('keras.layers.Conv3DTranspose',
-           'keras.layers.Convolution3DTranspose')
+@keras_export('keras.layers.Conv3DTranspose',
+              'keras.layers.Convolution3DTranspose')
 class Conv3DTranspose(Conv3D):
   """Transposed convolution layer (sometimes called Deconvolution).
 
@@ -1401,8 +1401,8 @@ class SeparableConv(Conv):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.SeparableConv1D',
-           'keras.layers.SeparableConvolution1D')
+@keras_export('keras.layers.SeparableConv1D',
+              'keras.layers.SeparableConvolution1D')
 class SeparableConv1D(SeparableConv):
   """Depthwise separable 1D convolution.
 
@@ -1549,8 +1549,8 @@ class SeparableConv1D(SeparableConv):
     return outputs
 
 
-@tf_export('keras.layers.SeparableConv2D',
-           'keras.layers.SeparableConvolution2D')
+@keras_export('keras.layers.SeparableConv2D',
+              'keras.layers.SeparableConvolution2D')
 class SeparableConv2D(SeparableConv):
   """Depthwise separable 2D convolution.
 
@@ -1701,7 +1701,7 @@ class SeparableConv2D(SeparableConv):
     return outputs
 
 
-@tf_export('keras.layers.DepthwiseConv2D')
+@keras_export('keras.layers.DepthwiseConv2D')
 class DepthwiseConv2D(Conv2D):
   """Depthwise separable 2D convolution.
 
@@ -1895,7 +1895,7 @@ class DepthwiseConv2D(Conv2D):
     return config
 
 
-@tf_export('keras.layers.UpSampling1D')
+@keras_export('keras.layers.UpSampling1D')
 class UpSampling1D(Layer):
   """Upsampling layer for 1D inputs.
 
@@ -1931,7 +1931,7 @@ class UpSampling1D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.UpSampling2D')
+@keras_export('keras.layers.UpSampling2D')
 class UpSampling2D(Layer):
   """Upsampling layer for 2D inputs.
 
@@ -2010,7 +2010,7 @@ class UpSampling2D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.UpSampling3D')
+@keras_export('keras.layers.UpSampling3D')
 class UpSampling3D(Layer):
   """Upsampling layer for 3D inputs.
 
@@ -2083,7 +2083,7 @@ class UpSampling3D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.ZeroPadding1D')
+@keras_export('keras.layers.ZeroPadding1D')
 class ZeroPadding1D(Layer):
   """Zero-padding layer for 1D input (e.g. temporal sequence).
 
@@ -2124,7 +2124,7 @@ class ZeroPadding1D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.ZeroPadding2D')
+@keras_export('keras.layers.ZeroPadding2D')
 class ZeroPadding2D(Layer):
   """Zero-padding layer for 2D input (e.g. picture).
 
@@ -2226,7 +2226,7 @@ class ZeroPadding2D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.ZeroPadding3D')
+@keras_export('keras.layers.ZeroPadding3D')
 class ZeroPadding3D(Layer):
   """Zero-padding layer for 3D data (spatial or spatio-temporal).
 
@@ -2344,7 +2344,7 @@ class ZeroPadding3D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Cropping1D')
+@keras_export('keras.layers.Cropping1D')
 class Cropping1D(Layer):
   """Cropping layer for 1D input (e.g. temporal sequence).
 
@@ -2389,7 +2389,7 @@ class Cropping1D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Cropping2D')
+@keras_export('keras.layers.Cropping2D')
 class Cropping2D(Layer):
   """Cropping layer for 2D input (e.g. picture).
 
@@ -2521,7 +2521,7 @@ class Cropping2D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Cropping3D')
+@keras_export('keras.layers.Cropping3D')
 class Cropping3D(Layer):
   """Cropping layer for 3D data (e.g.
 
diff --git a/tensorflow/python/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/layers/convolutional_recurrent.py
index cf3861da21..c0479e71a2 100644
--- a/tensorflow/python/keras/layers/convolutional_recurrent.py
+++ b/tensorflow/python/keras/layers/convolutional_recurrent.py
@@ -34,7 +34,7 @@ from tensorflow.python.keras.layers.recurrent import RNN
 from tensorflow.python.keras.utils import conv_utils
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 class ConvRNN2D(RNN):
@@ -770,7 +770,7 @@ class ConvLSTM2DCell(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.ConvLSTM2D')
+@keras_export('keras.layers.ConvLSTM2D')
 class ConvLSTM2D(ConvRNN2D):
   """Convolutional LSTM.
 
diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py
index 39bcb82c72..dfbab80be3 100644
--- a/tensorflow/python/keras/layers/core.py
+++ b/tensorflow/python/keras/layers/core.py
@@ -45,10 +45,10 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import standard_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.Masking')
+@keras_export('keras.layers.Masking')
 class Masking(Layer):
   """Masks a sequence by using a mask value to skip timesteps.
 
@@ -99,7 +99,7 @@ class Masking(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Dropout')
+@keras_export('keras.layers.Dropout')
 class Dropout(Layer):
   """Applies Dropout to the input.
 
@@ -159,7 +159,7 @@ class Dropout(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.SpatialDropout1D')
+@keras_export('keras.layers.SpatialDropout1D')
 class SpatialDropout1D(Dropout):
   """Spatial 1D version of Dropout.
 
@@ -196,7 +196,7 @@ class SpatialDropout1D(Dropout):
     return noise_shape
 
 
-@tf_export('keras.layers.SpatialDropout2D')
+@keras_export('keras.layers.SpatialDropout2D')
 class SpatialDropout2D(Dropout):
   """Spatial 2D version of Dropout.
 
@@ -250,7 +250,7 @@ class SpatialDropout2D(Dropout):
       return (input_shape[0], 1, 1, input_shape[3])
 
 
-@tf_export('keras.layers.SpatialDropout3D')
+@keras_export('keras.layers.SpatialDropout3D')
 class SpatialDropout3D(Dropout):
   """Spatial 3D version of Dropout.
 
@@ -303,7 +303,7 @@ class SpatialDropout3D(Dropout):
       return (input_shape[0], 1, 1, 1, input_shape[4])
 
 
-@tf_export('keras.layers.Activation')
+@keras_export('keras.layers.Activation')
 class Activation(Layer):
   """Applies an activation function to an output.
 
@@ -337,7 +337,7 @@ class Activation(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Reshape')
+@keras_export('keras.layers.Reshape')
 class Reshape(Layer):
   """Reshapes an output to a certain shape.
 
@@ -444,7 +444,7 @@ class Reshape(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Permute')
+@keras_export('keras.layers.Permute')
 class Permute(Layer):
   """Permutes the dimensions of the input according to a given pattern.
 
@@ -502,7 +502,7 @@ class Permute(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Flatten')
+@keras_export('keras.layers.Flatten')
 class Flatten(Layer):
   """Flattens the input. Does not affect the batch size.
 
@@ -572,7 +572,7 @@ class Flatten(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.RepeatVector')
+@keras_export('keras.layers.RepeatVector')
 class RepeatVector(Layer):
   """Repeats the input n times.
 
@@ -616,7 +616,7 @@ class RepeatVector(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Lambda')
+@keras_export('keras.layers.Lambda')
 class Lambda(Layer):
   """Wraps arbitrary expression as a `Layer` object.
 
@@ -845,7 +845,7 @@ class Lambda(Layer):
     return cls(**config)
 
 
-@tf_export('keras.layers.Dense')
+@keras_export('keras.layers.Dense')
 class Dense(Layer):
   """Just your regular densely-connected NN layer.
 
@@ -1006,7 +1006,7 @@ class Dense(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.ActivityRegularization')
+@keras_export('keras.layers.ActivityRegularization')
 class ActivityRegularization(Layer):
   """Layer that applies an update to the cost function based input activity.
 
diff --git a/tensorflow/python/keras/layers/cudnn_recurrent.py b/tensorflow/python/keras/layers/cudnn_recurrent.py
index e9925eeba6..a74308f69c 100644
--- a/tensorflow/python/keras/layers/cudnn_recurrent.py
+++ b/tensorflow/python/keras/layers/cudnn_recurrent.py
@@ -31,7 +31,7 @@ from tensorflow.python.keras.layers.recurrent import RNN
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_cudnn_rnn_ops
 from tensorflow.python.ops import state_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 class _CuDNNRNN(RNN):
@@ -158,7 +158,7 @@ class _CuDNNRNN(RNN):
         RNN, self).get_losses_for(inputs=inputs)
 
 
-@tf_export(v1=['keras.layers.CuDNNGRU'])
+@keras_export(v1=['keras.layers.CuDNNGRU'])
 class CuDNNGRU(_CuDNNRNN):
   """Fast GRU implementation backed by cuDNN.
 
@@ -335,7 +335,7 @@ class CuDNNGRU(_CuDNNRNN):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export(v1=['keras.layers.CuDNNLSTM'])
+@keras_export(v1=['keras.layers.CuDNNLSTM'])
 class CuDNNLSTM(_CuDNNRNN):
   """Fast LSTM implementation backed by cuDNN.
 
diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py
index e8a8575705..df5e82c245 100644
--- a/tensorflow/python/keras/layers/embeddings.py
+++ b/tensorflow/python/keras/layers/embeddings.py
@@ -28,10 +28,10 @@ from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.Embedding')
+@keras_export('keras.layers.Embedding')
 class Embedding(Layer):
   """Turns positive integers (indexes) into dense vectors of fixed size.
 
diff --git a/tensorflow/python/keras/layers/local.py b/tensorflow/python/keras/layers/local.py
index d2c4aaa125..2c66608f86 100644
--- a/tensorflow/python/keras/layers/local.py
+++ b/tensorflow/python/keras/layers/local.py
@@ -27,10 +27,10 @@ from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.keras.utils import conv_utils
 from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.LocallyConnected1D')
+@keras_export('keras.layers.LocallyConnected1D')
 class LocallyConnected1D(Layer):
   """Locally-connected layer for 1D inputs.
 
@@ -293,7 +293,7 @@ class LocallyConnected1D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.LocallyConnected2D')
+@keras_export('keras.layers.LocallyConnected2D')
 class LocallyConnected2D(Layer):
   """Locally-connected layer for 2D inputs.
 
diff --git a/tensorflow/python/keras/layers/merge.py b/tensorflow/python/keras/layers/merge.py
index c73b21d965..b497bf48cd 100644
--- a/tensorflow/python/keras/layers/merge.py
+++ b/tensorflow/python/keras/layers/merge.py
@@ -26,7 +26,7 @@ from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 class _Merge(Layer):
@@ -218,7 +218,7 @@ class _Merge(Layer):
     return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False)
 
 
-@tf_export('keras.layers.Add')
+@keras_export('keras.layers.Add')
 class Add(_Merge):
   """Layer that adds a list of inputs.
 
@@ -250,7 +250,7 @@ class Add(_Merge):
     return output
 
 
-@tf_export('keras.layers.Subtract')
+@keras_export('keras.layers.Subtract')
 class Subtract(_Merge):
   """Layer that subtracts two inputs.
 
@@ -289,7 +289,7 @@ class Subtract(_Merge):
     return inputs[0] - inputs[1]
 
 
-@tf_export('keras.layers.Multiply')
+@keras_export('keras.layers.Multiply')
 class Multiply(_Merge):
   """Layer that multiplies (element-wise) a list of inputs.
 
@@ -305,7 +305,7 @@ class Multiply(_Merge):
     return output
 
 
-@tf_export('keras.layers.Average')
+@keras_export('keras.layers.Average')
 class Average(_Merge):
   """Layer that averages a list of inputs.
 
@@ -321,7 +321,7 @@ class Average(_Merge):
     return output / len(inputs)
 
 
-@tf_export('keras.layers.Maximum')
+@keras_export('keras.layers.Maximum')
 class Maximum(_Merge):
   """Layer that computes the maximum (element-wise) a list of inputs.
 
@@ -337,7 +337,7 @@ class Maximum(_Merge):
     return output
 
 
-@tf_export('keras.layers.Minimum')
+@keras_export('keras.layers.Minimum')
 class Minimum(_Merge):
   """Layer that computes the minimum (element-wise) a list of inputs.
 
@@ -353,7 +353,7 @@ class Minimum(_Merge):
     return output
 
 
-@tf_export('keras.layers.Concatenate')
+@keras_export('keras.layers.Concatenate')
 class Concatenate(_Merge):
   """Layer that concatenates a list of inputs.
 
@@ -444,7 +444,7 @@ class Concatenate(_Merge):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.Dot')
+@keras_export('keras.layers.Dot')
 class Dot(_Merge):
   """Layer that computes a dot product between samples in two tensors.
 
@@ -559,7 +559,7 @@ class Dot(_Merge):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.add')
+@keras_export('keras.layers.add')
 def add(inputs, **kwargs):
   """Functional interface to the `Add` layer.
 
@@ -588,7 +588,7 @@ def add(inputs, **kwargs):
   return Add(**kwargs)(inputs)
 
 
-@tf_export('keras.layers.subtract')
+@keras_export('keras.layers.subtract')
 def subtract(inputs, **kwargs):
   """Functional interface to the `Subtract` layer.
 
@@ -617,7 +617,7 @@ def subtract(inputs, **kwargs):
   return Subtract(**kwargs)(inputs)
 
 
-@tf_export('keras.layers.multiply')
+@keras_export('keras.layers.multiply')
 def multiply(inputs, **kwargs):
   """Functional interface to the `Multiply` layer.
 
@@ -631,7 +631,7 @@ def multiply(inputs, **kwargs):
   return Multiply(**kwargs)(inputs)
 
 
-@tf_export('keras.layers.average')
+@keras_export('keras.layers.average')
 def average(inputs, **kwargs):
   """Functional interface to the `Average` layer.
 
@@ -645,7 +645,7 @@ def average(inputs, **kwargs):
   return Average(**kwargs)(inputs)
 
 
-@tf_export('keras.layers.maximum')
+@keras_export('keras.layers.maximum')
 def maximum(inputs, **kwargs):
   """Functional interface to the `Maximum` layer.
 
@@ -659,7 +659,7 @@ def maximum(inputs, **kwargs):
   return Maximum(**kwargs)(inputs)
 
 
-@tf_export('keras.layers.minimum')
+@keras_export('keras.layers.minimum')
 def minimum(inputs, **kwargs):
   """Functional interface to the `Minimum` layer.
 
@@ -673,7 +673,7 @@ def minimum(inputs, **kwargs):
   return Minimum(**kwargs)(inputs)
 
 
-@tf_export('keras.layers.concatenate')
+@keras_export('keras.layers.concatenate')
 def concatenate(inputs, axis=-1, **kwargs):
   """Functional interface to the `Concatenate` layer.
 
@@ -688,7 +688,7 @@ def concatenate(inputs, axis=-1, **kwargs):
   return Concatenate(axis=axis, **kwargs)(inputs)
 
 
-@tf_export('keras.layers.dot')
+@keras_export('keras.layers.dot')
 def dot(inputs, axes, normalize=False, **kwargs):
   """Functional interface to the `Dot` layer.
 
diff --git a/tensorflow/python/keras/layers/noise.py b/tensorflow/python/keras/layers/noise.py
index cb7cee3ebc..958ab7c0f6 100644
--- a/tensorflow/python/keras/layers/noise.py
+++ b/tensorflow/python/keras/layers/noise.py
@@ -25,10 +25,10 @@ from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.GaussianNoise')
+@keras_export('keras.layers.GaussianNoise')
 class GaussianNoise(Layer):
   """Apply additive zero-centered Gaussian noise.
 
@@ -74,7 +74,7 @@ class GaussianNoise(Layer):
     return input_shape
 
 
-@tf_export('keras.layers.GaussianDropout')
+@keras_export('keras.layers.GaussianDropout')
 class GaussianDropout(Layer):
   """Apply multiplicative 1-centered Gaussian noise.
 
@@ -121,7 +121,7 @@ class GaussianDropout(Layer):
     return input_shape
 
 
-@tf_export('keras.layers.AlphaDropout')
+@keras_export('keras.layers.AlphaDropout')
 class AlphaDropout(Layer):
   """Applies Alpha Dropout to the input.
 
diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py
index ee37e8a242..e1b8c0bca7 100644
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@@ -40,10 +40,10 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.BatchNormalization', v1=[])
+@keras_export('keras.layers.BatchNormalization', v1=[])
 class BatchNormalizationV2(Layer):
   """Batch normalization layer (Ioffe and Szegedy, 2014).
 
@@ -740,7 +740,7 @@ def _replace_in_v2_docstring(old, new):
   return string.replace(old, new)
 
 
-@tf_export(v1=['keras.layers.BatchNormalization'])  # pylint: disable=missing-docstring
+@keras_export(v1=['keras.layers.BatchNormalization'])  # pylint: disable=missing-docstring
 class BatchNormalizationV1(BatchNormalizationV2):
 
   __doc__ = _replace_in_v2_docstring(
diff --git a/tensorflow/python/keras/layers/pooling.py b/tensorflow/python/keras/layers/pooling.py
index a0744cddad..6d76f96216 100644
--- a/tensorflow/python/keras/layers/pooling.py
+++ b/tensorflow/python/keras/layers/pooling.py
@@ -28,7 +28,7 @@ from tensorflow.python.keras.utils import conv_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 class Pooling1D(Layer):
@@ -108,7 +108,7 @@ class Pooling1D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.MaxPool1D', 'keras.layers.MaxPooling1D')
+@keras_export('keras.layers.MaxPool1D', 'keras.layers.MaxPooling1D')
 class MaxPooling1D(Pooling1D):
   """Max pooling operation for temporal data.
 
@@ -155,7 +155,7 @@ class MaxPooling1D(Pooling1D):
         **kwargs)
 
 
-@tf_export('keras.layers.AveragePooling1D', 'keras.layers.AvgPool1D')
+@keras_export('keras.layers.AveragePooling1D', 'keras.layers.AvgPool1D')
 class AveragePooling1D(Pooling1D):
   """Average pooling for temporal data.
 
@@ -286,7 +286,7 @@ class Pooling2D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.MaxPool2D', 'keras.layers.MaxPooling2D')
+@keras_export('keras.layers.MaxPool2D', 'keras.layers.MaxPooling2D')
 class MaxPooling2D(Pooling2D):
   """Max pooling operation for spatial data.
 
@@ -340,7 +340,7 @@ class MaxPooling2D(Pooling2D):
         padding=padding, data_format=data_format, **kwargs)
 
 
-@tf_export('keras.layers.AveragePooling2D', 'keras.layers.AvgPool2D')
+@keras_export('keras.layers.AveragePooling2D', 'keras.layers.AvgPool2D')
 class AveragePooling2D(Pooling2D):
   """Average pooling operation for spatial data.
 
@@ -490,7 +490,7 @@ class Pooling3D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.MaxPool3D', 'keras.layers.MaxPooling3D')
+@keras_export('keras.layers.MaxPool3D', 'keras.layers.MaxPooling3D')
 class MaxPooling3D(Pooling3D):
   """Max pooling operation for 3D data (spatial or spatio-temporal).
 
@@ -540,7 +540,7 @@ class MaxPooling3D(Pooling3D):
         padding=padding, data_format=data_format, **kwargs)
 
 
-@tf_export('keras.layers.AveragePooling3D', 'keras.layers.AvgPool3D')
+@keras_export('keras.layers.AveragePooling3D', 'keras.layers.AvgPool3D')
 class AveragePooling3D(Pooling3D):
   """Average pooling operation for 3D data (spatial or spatio-temporal).
 
@@ -615,8 +615,8 @@ class GlobalPooling1D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.GlobalAveragePooling1D',
-           'keras.layers.GlobalAvgPool1D')
+@keras_export('keras.layers.GlobalAveragePooling1D',
+              'keras.layers.GlobalAvgPool1D')
 class GlobalAveragePooling1D(GlobalPooling1D):
   """Global average pooling operation for temporal data.
 
@@ -664,7 +664,7 @@ class GlobalAveragePooling1D(GlobalPooling1D):
     return None
 
 
-@tf_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D')
+@keras_export('keras.layers.GlobalMaxPool1D', 'keras.layers.GlobalMaxPooling1D')
 class GlobalMaxPooling1D(GlobalPooling1D):
   """Global max pooling operation for temporal data.
 
@@ -720,8 +720,8 @@ class GlobalPooling2D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.GlobalAveragePooling2D',
-           'keras.layers.GlobalAvgPool2D')
+@keras_export('keras.layers.GlobalAveragePooling2D',
+              'keras.layers.GlobalAvgPool2D')
 class GlobalAveragePooling2D(GlobalPooling2D):
   """Global average pooling operation for spatial data.
 
@@ -757,7 +757,7 @@ class GlobalAveragePooling2D(GlobalPooling2D):
       return backend.mean(inputs, axis=[2, 3])
 
 
-@tf_export('keras.layers.GlobalMaxPool2D', 'keras.layers.GlobalMaxPooling2D')
+@keras_export('keras.layers.GlobalMaxPool2D', 'keras.layers.GlobalMaxPooling2D')
 class GlobalMaxPooling2D(GlobalPooling2D):
   """Global max pooling operation for spatial data.
 
@@ -818,8 +818,8 @@ class GlobalPooling3D(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.GlobalAveragePooling3D',
-           'keras.layers.GlobalAvgPool3D')
+@keras_export('keras.layers.GlobalAveragePooling3D',
+              'keras.layers.GlobalAvgPool3D')
 class GlobalAveragePooling3D(GlobalPooling3D):
   """Global Average pooling operation for 3D data.
 
@@ -855,7 +855,7 @@ class GlobalAveragePooling3D(GlobalPooling3D):
       return backend.mean(inputs, axis=[2, 3, 4])
 
 
-@tf_export('keras.layers.GlobalMaxPool3D', 'keras.layers.GlobalMaxPooling3D')
+@keras_export('keras.layers.GlobalMaxPool3D', 'keras.layers.GlobalMaxPooling3D')
 class GlobalMaxPooling3D(GlobalPooling3D):
   """Global Max pooling operation for 3D data.
 
diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py
index 3051416c6e..95257a085e 100644
--- a/tensorflow/python/keras/layers/recurrent.py
+++ b/tensorflow/python/keras/layers/recurrent.py
@@ -44,7 +44,7 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 # The following string constants are used by Defun approach for unified backend
@@ -55,7 +55,7 @@ _CPU_DEVICE_NAME = 'CPU'
 _GPU_DEVICE_NAME = 'GPU'
 
 
-@tf_export('keras.layers.StackedRNNCells')
+@keras_export('keras.layers.StackedRNNCells')
 class StackedRNNCells(Layer):
   """Wrapper allowing a stack of RNN cells to behave as a single cell.
 
@@ -259,7 +259,7 @@ class StackedRNNCells(Layer):
     return updates + self._updates
 
 
-@tf_export('keras.layers.RNN')
+@keras_export('keras.layers.RNN')
 class RNN(Layer):
   """Base class for recurrent layers.
 
@@ -1000,7 +1000,7 @@ class RNN(Layer):
     return updates + self._updates
 
 
-@tf_export('keras.layers.SimpleRNNCell')
+@keras_export('keras.layers.SimpleRNNCell')
 class SimpleRNNCell(Layer):
   """Cell class for SimpleRNN.
 
@@ -1170,7 +1170,7 @@ class SimpleRNNCell(Layer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.layers.SimpleRNN')
+@keras_export('keras.layers.SimpleRNN')
 class SimpleRNN(RNN):
   """Fully-connected RNN where the output is to be fed back to input.
 
@@ -1382,7 +1382,7 @@ class SimpleRNN(RNN):
     return cls(**config)
 
 
-@tf_export('keras.layers.GRUCell')
+@keras_export('keras.layers.GRUCell')
 class GRUCell(Layer):
   """Cell class for the GRU layer.
 
@@ -1663,7 +1663,7 @@ class GRUCell(Layer):
     return _generate_zero_filled_state_for_cell(self, inputs, batch_size, dtype)
 
 
-@tf_export(v1=['keras.layers.GRU'])
+@keras_export(v1=['keras.layers.GRU'])
 class GRU(RNN):
   """Gated Recurrent Unit - Cho et al. 2014.
 
@@ -1922,7 +1922,7 @@ class GRU(RNN):
     return cls(**config)
 
 
-@tf_export('keras.layers.GRU', v1=[])
+@keras_export('keras.layers.GRU', v1=[])
 class UnifiedGRU(GRU):
   """Gated Recurrent Unit - Cho et al. 2014.
 
@@ -2307,7 +2307,7 @@ def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, time_major):
       'cudnn', dtype=dtypes.string, name='runtime')
 
 
-@tf_export('keras.layers.LSTMCell')
+@keras_export('keras.layers.LSTMCell')
 class LSTMCell(Layer):
   """Cell class for the LSTM layer.
 
@@ -2587,7 +2587,7 @@ class LSTMCell(Layer):
         self, inputs, batch_size, dtype))
 
 
-@tf_export('keras.experimental.PeepholeLSTMCell')
+@keras_export('keras.experimental.PeepholeLSTMCell')
 class PeepholeLSTMCell(LSTMCell):
   """Equivalent to LSTMCell class but adds peephole connections.
 
@@ -2667,7 +2667,7 @@ class PeepholeLSTMCell(LSTMCell):
     return c, o
 
 
-@tf_export(v1=['keras.layers.LSTM'])
+@keras_export(v1=['keras.layers.LSTM'])
 class LSTM(RNN):
   """Long Short-Term Memory layer - Hochreiter 1997.
 
@@ -2926,7 +2926,7 @@ class LSTM(RNN):
     return cls(**config)
 
 
-@tf_export('keras.layers.LSTM', v1=[])
+@keras_export('keras.layers.LSTM', v1=[])
 class UnifiedLSTM(LSTM):
   """Long Short-Term Memory layer - Hochreiter 1997.
 
diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py
index c78807611b..c9424c9f63 100644
--- a/tensorflow/python/keras/layers/wrappers.py
+++ b/tensorflow/python/keras/layers/wrappers.py
@@ -31,10 +31,10 @@ from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.layers.Wrapper')
+@keras_export('keras.layers.Wrapper')
 class Wrapper(Layer):
   """Abstract wrapper base class.
 
@@ -114,7 +114,7 @@ class Wrapper(Layer):
     return cls(layer, **config)
 
 
-@tf_export('keras.layers.TimeDistributed')
+@keras_export('keras.layers.TimeDistributed')
 class TimeDistributed(Wrapper):
   """This wrapper allows to apply a layer to every temporal slice of an input.
 
@@ -353,7 +353,7 @@ class TimeDistributed(Wrapper):
     return output_mask
 
 
-@tf_export('keras.layers.Bidirectional')
+@keras_export('keras.layers.Bidirectional')
 class Bidirectional(Wrapper):
   """Bidirectional wrapper for RNNs.
 
diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py
index 4c584d0ff0..bca0cb0e4d 100644
--- a/tensorflow/python/keras/losses.py
+++ b/tensorflow/python/keras/losses.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops.losses import losses_impl
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 class Loss(object):
@@ -117,7 +117,7 @@ class Loss(object):
     NotImplementedError('Must be implemented in subclasses.')
 
 
-@tf_export('keras.losses.MeanSquaredError')
+@keras_export('keras.losses.MeanSquaredError')
 class MeanSquaredError(Loss):
   """Computes the mean of squares of errors between labels and predictions.
 
@@ -155,7 +155,7 @@ class MeanSquaredError(Loss):
     return mean_squared_error(y_true, y_pred)
 
 
-@tf_export('keras.losses.MeanAbsoluteError')
+@keras_export('keras.losses.MeanAbsoluteError')
 class MeanAbsoluteError(Loss):
   """Computes the mean of absolute difference between labels and predictions.
 
@@ -193,7 +193,7 @@ class MeanAbsoluteError(Loss):
     return mean_absolute_error(y_true, y_pred)
 
 
-@tf_export('keras.losses.MeanAbsolutePercentageError')
+@keras_export('keras.losses.MeanAbsolutePercentageError')
 class MeanAbsolutePercentageError(Loss):
   """Computes the mean absolute percentage error between `y_true` and `y_pred`.
 
@@ -231,7 +231,7 @@ class MeanAbsolutePercentageError(Loss):
     return mean_absolute_percentage_error(y_true, y_pred)
 
 
-@tf_export('keras.losses.MeanSquaredLogarithmicError')
+@keras_export('keras.losses.MeanSquaredLogarithmicError')
 class MeanSquaredLogarithmicError(Loss):
   """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
 
@@ -269,7 +269,7 @@ class MeanSquaredLogarithmicError(Loss):
     return mean_squared_logarithmic_error(y_true, y_pred)
 
 
-@tf_export('keras.losses.BinaryCrossentropy')
+@keras_export('keras.losses.BinaryCrossentropy')
 class BinaryCrossentropy(Loss):
   """Computes the binary cross entropy loss between the labels and predictions.
 
@@ -325,7 +325,7 @@ class BinaryCrossentropy(Loss):
     return binary_crossentropy(y_true, y_pred, from_logits=self.from_logits)
 
 
-@tf_export('keras.losses.CategoricalCrossentropy')
+@keras_export('keras.losses.CategoricalCrossentropy')
 class CategoricalCrossentropy(Loss):
   """Computes categorical cross entropy loss between the `y_true` and `y_pred`.
 
@@ -395,69 +395,69 @@ class CategoricalCrossentropy(Loss):
           y_true, y_pred, from_logits=self.from_logits)
 
 
-@tf_export('keras.metrics.mean_squared_error',
-           'keras.metrics.mse',
-           'keras.metrics.MSE',
-           'keras.losses.mean_squared_error',
-           'keras.losses.mse',
-           'keras.losses.MSE')
+@keras_export('keras.metrics.mean_squared_error',
+              'keras.metrics.mse',
+              'keras.metrics.MSE',
+              'keras.losses.mean_squared_error',
+              'keras.losses.mse',
+              'keras.losses.MSE')
 def mean_squared_error(y_true, y_pred):
   return K.mean(math_ops.square(y_pred - y_true), axis=-1)
 
 
-@tf_export('keras.metrics.mean_absolute_error',
-           'keras.metrics.mae',
-           'keras.metrics.MAE',
-           'keras.losses.mean_absolute_error',
-           'keras.losses.mae',
-           'keras.losses.MAE')
+@keras_export('keras.metrics.mean_absolute_error',
+              'keras.metrics.mae',
+              'keras.metrics.MAE',
+              'keras.losses.mean_absolute_error',
+              'keras.losses.mae',
+              'keras.losses.MAE')
 def mean_absolute_error(y_true, y_pred):
   return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
 
 
-@tf_export('keras.metrics.mean_absolute_percentage_error',
-           'keras.metrics.mape',
-           'keras.metrics.MAPE',
-           'keras.losses.mean_absolute_percentage_error',
-           'keras.losses.mape',
-           'keras.losses.MAPE')
+@keras_export('keras.metrics.mean_absolute_percentage_error',
+              'keras.metrics.mape',
+              'keras.metrics.MAPE',
+              'keras.losses.mean_absolute_percentage_error',
+              'keras.losses.mape',
+              'keras.losses.MAPE')
 def mean_absolute_percentage_error(y_true, y_pred):
   diff = math_ops.abs(
       (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None))
   return 100. * K.mean(diff, axis=-1)
 
 
-@tf_export('keras.metrics.mean_squared_logarithmic_error',
-           'keras.metrics.msle',
-           'keras.metrics.MSLE',
-           'keras.losses.mean_squared_logarithmic_error',
-           'keras.losses.msle',
-           'keras.losses.MSLE')
+@keras_export('keras.metrics.mean_squared_logarithmic_error',
+              'keras.metrics.msle',
+              'keras.metrics.MSLE',
+              'keras.losses.mean_squared_logarithmic_error',
+              'keras.losses.msle',
+              'keras.losses.MSLE')
 def mean_squared_logarithmic_error(y_true, y_pred):
   first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.)
   second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.)
   return K.mean(math_ops.square(first_log - second_log), axis=-1)
 
 
-@tf_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')
+@keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')
 def squared_hinge(y_true, y_pred):
   return K.mean(
       math_ops.square(math_ops.maximum(1. - y_true * y_pred, 0.)), axis=-1)
 
 
-@tf_export('keras.metrics.hinge', 'keras.losses.hinge')
+@keras_export('keras.metrics.hinge', 'keras.losses.hinge')
 def hinge(y_true, y_pred):
   return K.mean(math_ops.maximum(1. - y_true * y_pred, 0.), axis=-1)
 
 
-@tf_export('keras.losses.categorical_hinge')
+@keras_export('keras.losses.categorical_hinge')
 def categorical_hinge(y_true, y_pred):
   pos = math_ops.reduce_sum(y_true * y_pred, axis=-1)
   neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1)
   return math_ops.maximum(0., neg - pos + 1.)
 
 
-@tf_export('keras.losses.logcosh')
+@keras_export('keras.losses.logcosh')
 def logcosh(y_true, y_pred):
   """Logarithm of the hyperbolic cosine of the prediction error.
 
@@ -480,47 +480,47 @@ def logcosh(y_true, y_pred):
   return K.mean(_logcosh(y_pred - y_true), axis=-1)
 
 
-@tf_export('keras.metrics.categorical_crossentropy',
-           'keras.losses.categorical_crossentropy')
+@keras_export('keras.metrics.categorical_crossentropy',
+              'keras.losses.categorical_crossentropy')
 def categorical_crossentropy(y_true, y_pred, from_logits=False):
   return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
 
 
-@tf_export('keras.metrics.sparse_categorical_crossentropy',
-           'keras.losses.sparse_categorical_crossentropy')
+@keras_export('keras.metrics.sparse_categorical_crossentropy',
+              'keras.losses.sparse_categorical_crossentropy')
 def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False):
   return K.sparse_categorical_crossentropy(
       y_true, y_pred, from_logits=from_logits)
 
 
-@tf_export('keras.metrics.binary_crossentropy',
-           'keras.losses.binary_crossentropy')
+@keras_export('keras.metrics.binary_crossentropy',
+              'keras.losses.binary_crossentropy')
 def binary_crossentropy(y_true, y_pred, from_logits=False):
   return K.mean(
       K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
 
 
-@tf_export('keras.metrics.kullback_leibler_divergence',
-           'keras.metrics.kld',
-           'keras.metrics.KLD',
-           'keras.losses.kullback_leibler_divergence',
-           'keras.losses.kld',
-           'keras.losses.KLD')
+@keras_export('keras.metrics.kullback_leibler_divergence',
+              'keras.metrics.kld',
+              'keras.metrics.KLD',
+              'keras.losses.kullback_leibler_divergence',
+              'keras.losses.kld',
+              'keras.losses.KLD')
 def kullback_leibler_divergence(y_true, y_pred):
   y_true = K.clip(y_true, K.epsilon(), 1)
   y_pred = K.clip(y_pred, K.epsilon(), 1)
   return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)
 
 
-@tf_export('keras.metrics.poisson', 'keras.losses.poisson')
+@keras_export('keras.metrics.poisson', 'keras.losses.poisson')
 def poisson(y_true, y_pred):
   return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()), axis=-1)
 
 
-@tf_export('keras.metrics.cosine_proximity',
-           'keras.metrics.cosine',
-           'keras.losses.cosine_proximity',
-           'keras.losses.cosine')
+@keras_export('keras.metrics.cosine_proximity',
+              'keras.metrics.cosine',
+              'keras.losses.cosine_proximity',
+              'keras.losses.cosine')
 def cosine_proximity(y_true, y_pred):
   y_true = nn.l2_normalize(y_true, axis=-1)
   y_pred = nn.l2_normalize(y_pred, axis=-1)
@@ -571,12 +571,12 @@ kld = KLD = kullback_leibler_divergence
 cosine = cosine_proximity
 
 
-@tf_export('keras.losses.serialize')
+@keras_export('keras.losses.serialize')
 def serialize(loss):
   return serialize_keras_object(loss)
 
 
-@tf_export('keras.losses.deserialize')
+@keras_export('keras.losses.deserialize')
 def deserialize(name, custom_objects=None):
   return deserialize_keras_object(
       name,
@@ -585,7 +585,7 @@ def deserialize(name, custom_objects=None):
       printable_module_name='loss function')
 
 
-@tf_export('keras.losses.get')
+@keras_export('keras.losses.get')
 def get(identifier):
   if identifier is None:
     return None
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index c8ccb7f624..8ccb514ba9 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -62,7 +62,7 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.ops import weights_broadcast_ops
 from tensorflow.python.util import tf_decorator
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 from tensorflow.tools.docs import doc_controls
 
 
@@ -517,7 +517,7 @@ class Metric(Layer):
   ### End: For use by subclasses ###
 
 
-@tf_export('keras.metrics.Mean')
+@keras_export('keras.metrics.Mean')
 class Mean(Metric):
   """Computes the (weighted) mean of the given values.
 
@@ -657,7 +657,7 @@ class MeanMetricWrapper(Mean):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.metrics.Accuracy')
+@keras_export('keras.metrics.Accuracy')
 class Accuracy(MeanMetricWrapper):
   """Calculates how often predictions matches labels.
 
@@ -699,7 +699,7 @@ class Accuracy(MeanMetricWrapper):
     return super(Accuracy, cls).from_config(config)
 
 
-@tf_export('keras.metrics.BinaryAccuracy')
+@keras_export('keras.metrics.BinaryAccuracy')
 class BinaryAccuracy(MeanMetricWrapper):
   """Calculates how often predictions matches labels.
 
@@ -750,7 +750,7 @@ class BinaryAccuracy(MeanMetricWrapper):
     return super(BinaryAccuracy, cls).from_config(config)
 
 
-@tf_export('keras.metrics.CategoricalAccuracy')
+@keras_export('keras.metrics.CategoricalAccuracy')
 class CategoricalAccuracy(MeanMetricWrapper):
   """Calculates how often predictions matches labels.
 
@@ -806,7 +806,7 @@ class CategoricalAccuracy(MeanMetricWrapper):
     return super(CategoricalAccuracy, cls).from_config(config)
 
 
-@tf_export('keras.metrics.SparseCategoricalAccuracy')
+@keras_export('keras.metrics.SparseCategoricalAccuracy')
 class SparseCategoricalAccuracy(MeanMetricWrapper):
   """Calculates how often predictions matches integer labels.
 
@@ -912,7 +912,7 @@ class _ConfusionMatrixConditionCount(Metric):
       K.set_value(v, np.zeros((num_thresholds,)))
 
 
-@tf_export('keras.metrics.FalsePositives')
+@keras_export('keras.metrics.FalsePositives')
 class FalsePositives(_ConfusionMatrixConditionCount):
   """Calculates the number of false positives.
 
@@ -962,7 +962,7 @@ class FalsePositives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('keras.metrics.FalseNegatives')
+@keras_export('keras.metrics.FalseNegatives')
 class FalseNegatives(_ConfusionMatrixConditionCount):
   """Calculates the number of false negatives.
 
@@ -1012,7 +1012,7 @@ class FalseNegatives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('keras.metrics.TrueNegatives')
+@keras_export('keras.metrics.TrueNegatives')
 class TrueNegatives(_ConfusionMatrixConditionCount):
   """Calculates the number of true negatives.
 
@@ -1062,7 +1062,7 @@ class TrueNegatives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('keras.metrics.TruePositives')
+@keras_export('keras.metrics.TruePositives')
 class TruePositives(_ConfusionMatrixConditionCount):
   """Calculates the number of true positives.
 
@@ -1112,7 +1112,7 @@ class TruePositives(_ConfusionMatrixConditionCount):
         dtype=dtype)
 
 
-@tf_export('keras.metrics.Precision')
+@keras_export('keras.metrics.Precision')
 class Precision(Metric):
   """Computes the precision of the predictions with respect to the labels.
 
@@ -1196,7 +1196,7 @@ class Precision(Metric):
       K.set_value(v, np.zeros((num_thresholds,)))
 
 
-@tf_export('keras.metrics.Recall')
+@keras_export('keras.metrics.Recall')
 class Recall(Metric):
   """Computes the recall of the predictions with respect to the labels.
 
@@ -1344,7 +1344,7 @@ class SensitivitySpecificityBase(Metric):
       K.set_value(v, np.zeros((num_thresholds,)))
 
 
-@tf_export('keras.metrics.SensitivityAtSpecificity')
+@keras_export('keras.metrics.SensitivityAtSpecificity')
 class SensitivityAtSpecificity(SensitivitySpecificityBase):
   """Computes the sensitivity at a given specificity.
 
@@ -1413,7 +1413,7 @@ class SensitivityAtSpecificity(SensitivitySpecificityBase):
                                self.tp[min_index] + self.fn[min_index])
 
 
-@tf_export('keras.metrics.SpecificityAtSensitivity')
+@keras_export('keras.metrics.SpecificityAtSensitivity')
 class SpecificityAtSensitivity(SensitivitySpecificityBase):
   """Computes the specificity at a given sensitivity.
 
@@ -1526,14 +1526,14 @@ def accuracy(y_true, y_pred):
   return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
 
 
-@tf_export('keras.metrics.binary_accuracy')
+@keras_export('keras.metrics.binary_accuracy')
 def binary_accuracy(y_true, y_pred, threshold=0.5):
   threshold = math_ops.cast(threshold, y_pred.dtype)
   y_pred = math_ops.cast(y_pred > threshold, y_pred.dtype)
   return K.mean(math_ops.equal(y_true, y_pred), axis=-1)
 
 
-@tf_export('keras.metrics.categorical_accuracy')
+@keras_export('keras.metrics.categorical_accuracy')
 def categorical_accuracy(y_true, y_pred):
   return math_ops.cast(
       math_ops.equal(
@@ -1541,7 +1541,7 @@ def categorical_accuracy(y_true, y_pred):
       K.floatx())
 
 
-@tf_export('keras.metrics.sparse_categorical_accuracy')
+@keras_export('keras.metrics.sparse_categorical_accuracy')
 def sparse_categorical_accuracy(y_true, y_pred):
   # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
   if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
@@ -1556,13 +1556,13 @@ def sparse_categorical_accuracy(y_true, y_pred):
   return math_ops.cast(math_ops.equal(y_true, y_pred), K.floatx())
 
 
-@tf_export('keras.metrics.top_k_categorical_accuracy')
+@keras_export('keras.metrics.top_k_categorical_accuracy')
 def top_k_categorical_accuracy(y_true, y_pred, k=5):
   return K.mean(
       nn.in_top_k(y_pred, math_ops.argmax(y_true, axis=-1), k), axis=-1)
 
 
-@tf_export('keras.metrics.sparse_top_k_categorical_accuracy')
+@keras_export('keras.metrics.sparse_top_k_categorical_accuracy')
 def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
   # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
   if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))):
@@ -1579,12 +1579,12 @@ msle = MSLE = mean_squared_logarithmic_error
 cosine = cosine_proximity
 
 
-@tf_export('keras.metrics.serialize')
+@keras_export('keras.metrics.serialize')
 def serialize(metric):
   return serialize_keras_object(metric)
 
 
-@tf_export('keras.metrics.deserialize')
+@keras_export('keras.metrics.deserialize')
 def deserialize(config, custom_objects=None):
   return deserialize_keras_object(
       config,
@@ -1593,7 +1593,7 @@ def deserialize(config, custom_objects=None):
       printable_module_name='metric function')
 
 
-@tf_export('keras.metrics.get')
+@keras_export('keras.metrics.get')
 def get(identifier):
   if isinstance(identifier, dict):
     return deserialize(identifier)
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index 00ec5978be..3082988a81 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -31,7 +31,7 @@ from tensorflow.python.keras.engine.input_layer import InputLayer
 from tensorflow.python.keras.engine.network import Network
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 # API entries importable from `keras.models`:
 Model = training.Model  # pylint: disable=invalid-name
@@ -262,7 +262,7 @@ def _clone_sequential_model(model, input_tensors=None, share_weights=False):
     return Sequential(layers=[input_layer] + layers, name=model.name)
 
 
-@tf_export('keras.models.clone_model')
+@keras_export('keras.models.clone_model')
 def clone_model(model, input_tensors=None):
   """Clone any `Model` instance.
 
diff --git a/tensorflow/python/keras/ops.py b/tensorflow/python/keras/ops.py
new file mode 100644
index 0000000000..dca076eea1
--- /dev/null
+++ b/tensorflow/python/keras/ops.py
@@ -0,0 +1,56 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Module for exporting TensorFlow ops under tf.keras.*."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops.losses import losses_impl
+from tensorflow.python.util.tf_export import keras_export
+
+
+keras_export("keras.initializers.Initializer")(
+    init_ops.Initializer)
+keras_export("keras.initializers.Zeros", "keras.initializers.zeros")(
+    init_ops.Zeros)
+keras_export("keras.initializers.Ones", "keras.initializers.ones")(
+    init_ops.Ones)
+keras_export("keras.initializers.Constant", "keras.initializers.constant")(
+    init_ops.Constant)
+keras_export("keras.initializers.VarianceScaling")(
+    init_ops.VarianceScaling)
+keras_export("keras.initializers.Orthogonal", "keras.initializers.orthogonal")(
+    init_ops.Orthogonal)
+keras_export("keras.initializers.Identity", "keras.initializers.identity")(
+    init_ops.Identity)
+keras_export("keras.initializers.glorot_uniform")(
+    init_ops.GlorotUniform)
+keras_export("keras.initializers.glorot_normal")(
+    init_ops.GlorotNormal)
+keras_export("keras.initializers.lecun_normal")(
+    init_ops.lecun_normal)
+keras_export("keras.initializers.lecun_uniform")(
+    init_ops.lecun_uniform)
+keras_export("keras.initializers.he_normal")(
+    init_ops.he_normal)
+keras_export("keras.initializers.he_uniform")(
+    init_ops.he_uniform)
+
+keras_export("keras.backend.name_scope")(ops.name_scope)
+
+keras_export("keras.losses.Reduction", v1=[])(
+    losses_impl.ReductionV2)
diff --git a/tensorflow/python/keras/optimizer_v2/adadelta.py b/tensorflow/python/keras/optimizer_v2/adadelta.py
index 88ddc94324..8f485b2440 100644
--- a/tensorflow/python/keras/optimizer_v2/adadelta.py
+++ b/tensorflow/python/keras/optimizer_v2/adadelta.py
@@ -22,10 +22,10 @@ import numpy as np
 
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.training import training_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.optimizers.Adadelta', v1=[])
+@keras_export('keras.optimizers.Adadelta', v1=[])
 class Adadelta(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the Adadelta algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/adagrad.py b/tensorflow/python/keras/optimizer_v2/adagrad.py
index ac55d2075a..af359b5f59 100644
--- a/tensorflow/python/keras/optimizer_v2/adagrad.py
+++ b/tensorflow/python/keras/optimizer_v2/adagrad.py
@@ -27,10 +27,10 @@ from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.optimizers.Adagrad', v1=[])
+@keras_export('keras.optimizers.Adagrad', v1=[])
 class Adagrad(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the Adagrad algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/adam.py b/tensorflow/python/keras/optimizer_v2/adam.py
index 873dadb31a..292323be60 100644
--- a/tensorflow/python/keras/optimizer_v2/adam.py
+++ b/tensorflow/python/keras/optimizer_v2/adam.py
@@ -24,10 +24,10 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.training import training_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.optimizers.Adam', v1=[])
+@keras_export('keras.optimizers.Adam', v1=[])
 class Adam(optimizer_v2.OptimizerV2):
   """Optimizer that implements the Adam algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/adamax.py b/tensorflow/python/keras/optimizer_v2/adamax.py
index 9c826eb42a..8ee5c2a9f8 100644
--- a/tensorflow/python/keras/optimizer_v2/adamax.py
+++ b/tensorflow/python/keras/optimizer_v2/adamax.py
@@ -25,10 +25,10 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.training import training_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.optimizers.Adamax', v1=[])
+@keras_export('keras.optimizers.Adamax', v1=[])
 class Adamax(adam.Adam):
   """Optimizer that implements the Adamax algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/ftrl.py b/tensorflow/python/keras/optimizer_v2/ftrl.py
index 7828b1791e..5783fb12b3 100644
--- a/tensorflow/python/keras/optimizer_v2/ftrl.py
+++ b/tensorflow/python/keras/optimizer_v2/ftrl.py
@@ -21,10 +21,10 @@ from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.training import training_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.optimizers.Ftrl', v1=[])
+@keras_export('keras.optimizers.Ftrl', v1=[])
 class Ftrl(optimizer_v2.OptimizerV2):
   """Optimizer that implements the FTRL algorithm.
 
diff --git a/tensorflow/python/keras/optimizer_v2/gradient_descent.py b/tensorflow/python/keras/optimizer_v2/gradient_descent.py
index 06db2f3b4c..2e64e08095 100644
--- a/tensorflow/python/keras/optimizer_v2/gradient_descent.py
+++ b/tensorflow/python/keras/optimizer_v2/gradient_descent.py
@@ -21,10 +21,10 @@ from tensorflow.python.framework import ops
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.training import training_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export("keras.optimizers.SGD", v1=[])
+@keras_export("keras.optimizers.SGD", v1=[])
 class SGD(optimizer_v2.OptimizerV2):
   """Stochastic gradient descent and momentum optimizer.
 
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index 0e909d0d79..ed780cb50d 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -43,7 +43,7 @@ from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 def _deduplicate_indexed_slices(values, indices):
@@ -67,7 +67,7 @@ def _deduplicate_indexed_slices(values, indices):
 
 
 @six.add_metaclass(abc.ABCMeta)
-@tf_export("keras.optimizers.Optimizer", v1=[])
+@keras_export("keras.optimizers.Optimizer", v1=[])
 class OptimizerV2(checkpointable.CheckpointableBase):
   """Updated base class for optimizers.
 
diff --git a/tensorflow/python/keras/optimizer_v2/rmsprop.py b/tensorflow/python/keras/optimizer_v2/rmsprop.py
index dbb5a37fd8..b52ac45246 100644
--- a/tensorflow/python/keras/optimizer_v2/rmsprop.py
+++ b/tensorflow/python/keras/optimizer_v2/rmsprop.py
@@ -20,10 +20,10 @@ from __future__ import print_function
 from tensorflow.python.framework import ops
 from tensorflow.python.keras.optimizer_v2 import optimizer_v2
 from tensorflow.python.training import training_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export("keras.optimizers.RMSprop", v1=[])
+@keras_export("keras.optimizers.RMSprop", v1=[])
 class RMSprop(optimizer_v2.OptimizerV2):
   r"""Optimizer that implements the RMSprop algorithm.
 
diff --git a/tensorflow/python/keras/optimizers.py b/tensorflow/python/keras/optimizers.py
index dda603fa2e..a8544199fd 100644
--- a/tensorflow/python/keras/optimizers.py
+++ b/tensorflow/python/keras/optimizers.py
@@ -42,10 +42,10 @@ from tensorflow.python.ops import state_ops
 from tensorflow.python.training import optimizer as tf_optimizer_module
 from tensorflow.python.training import training_util
 from tensorflow.python.training.checkpointable import base as checkpointable
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export(v1=['keras.optimizers.Optimizer'])
+@keras_export(v1=['keras.optimizers.Optimizer'])
 class Optimizer(object):
   """Abstract optimizer base class.
 
@@ -159,7 +159,7 @@ class Optimizer(object):
     return cls(**config)
 
 
-@tf_export(v1=['keras.optimizers.SGD'])
+@keras_export(v1=['keras.optimizers.SGD'])
 class SGD(Optimizer):
   """Stochastic gradient descent optimizer.
 
@@ -224,7 +224,7 @@ class SGD(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export(v1=['keras.optimizers.RMSprop'])
+@keras_export(v1=['keras.optimizers.RMSprop'])
 class RMSprop(Optimizer):
   """RMSProp optimizer.
 
@@ -291,7 +291,7 @@ class RMSprop(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export(v1=['keras.optimizers.Adagrad'])
+@keras_export(v1=['keras.optimizers.Adagrad'])
 class Adagrad(Optimizer):
   """Adagrad optimizer.
 
@@ -358,7 +358,7 @@ class Adagrad(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export(v1=['keras.optimizers.Adadelta'])
+@keras_export(v1=['keras.optimizers.Adadelta'])
 class Adadelta(Optimizer):
   """Adadelta optimizer.
 
@@ -442,7 +442,7 @@ class Adadelta(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export(v1=['keras.optimizers.Adam'])
+@keras_export(v1=['keras.optimizers.Adam'])
 class Adam(Optimizer):
   """Adam optimizer.
 
@@ -539,7 +539,7 @@ class Adam(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export(v1=['keras.optimizers.Adamax'])
+@keras_export(v1=['keras.optimizers.Adamax'])
 class Adamax(Optimizer):
   """Adamax optimizer from Adam paper's Section 7.
 
@@ -622,7 +622,7 @@ class Adamax(Optimizer):
     return dict(list(base_config.items()) + list(config.items()))
 
 
-@tf_export('keras.optimizers.Nadam')
+@keras_export('keras.optimizers.Nadam')
 class Nadam(Optimizer):
   """Nesterov Adam optimizer.
 
@@ -787,12 +787,12 @@ adamax = Adamax
 nadam = Nadam
 
 
-@tf_export('keras.optimizers.serialize')
+@keras_export('keras.optimizers.serialize')
 def serialize(optimizer):
   return serialize_keras_object(optimizer)
 
 
-@tf_export('keras.optimizers.deserialize')
+@keras_export('keras.optimizers.deserialize')
 def deserialize(config, custom_objects=None):
   """Inverse of the `serialize` function.
 
@@ -838,7 +838,7 @@ def deserialize(config, custom_objects=None):
       printable_module_name='optimizer')
 
 
-@tf_export('keras.optimizers.get')
+@keras_export('keras.optimizers.get')
 def get(identifier):
   """Retrieves a Keras Optimizer instance.
 
diff --git a/tensorflow/python/keras/preprocessing/image.py b/tensorflow/python/keras/preprocessing/image.py
index e33993950d..f2fefffb0a 100644
--- a/tensorflow/python/keras/preprocessing/image.py
+++ b/tensorflow/python/keras/preprocessing/image.py
@@ -30,7 +30,7 @@ except ImportError:
 from tensorflow.python.keras import backend
 from tensorflow.python.keras import utils
 from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 random_rotation = image.random_rotation
 random_shift = image.random_shift
@@ -44,7 +44,7 @@ apply_affine_transform = image.apply_affine_transform
 load_img = image.load_img
 
 
-@tf_export('keras.preprocessing.image.array_to_img')
+@keras_export('keras.preprocessing.image.array_to_img')
 def array_to_img(x, data_format=None, scale=True, dtype=None):
   """Converts a 3D Numpy array to a PIL Image instance.
 
@@ -74,7 +74,7 @@ def array_to_img(x, data_format=None, scale=True, dtype=None):
   return image.array_to_img(x, data_format=data_format, scale=scale, **kwargs)
 
 
-@tf_export('keras.preprocessing.image.img_to_array')
+@keras_export('keras.preprocessing.image.img_to_array')
 def img_to_array(img, data_format=None, dtype=None):
   """Converts a PIL Image instance to a Numpy array.
 
@@ -101,7 +101,7 @@ def img_to_array(img, data_format=None, dtype=None):
   return image.img_to_array(img, data_format=data_format, **kwargs)
 
 
-@tf_export('keras.preprocessing.image.save_img')
+@keras_export('keras.preprocessing.image.save_img')
 def save_img(path,
              x,
              data_format=None,
@@ -131,12 +131,12 @@ def save_img(path,
                  scale=scale, **kwargs)
 
 
-@tf_export('keras.preprocessing.image.Iterator')
+@keras_export('keras.preprocessing.image.Iterator')
 class Iterator(image.Iterator, utils.Sequence):
   pass
 
 
-@tf_export('keras.preprocessing.image.DirectoryIterator')
+@keras_export('keras.preprocessing.image.DirectoryIterator')
 class DirectoryIterator(image.DirectoryIterator, Iterator):
   """Iterator capable of reading images from a directory on disk.
 
@@ -227,7 +227,7 @@ class DirectoryIterator(image.DirectoryIterator, Iterator):
         **kwargs)
 
 
-@tf_export('keras.preprocessing.image.NumpyArrayIterator')
+@keras_export('keras.preprocessing.image.NumpyArrayIterator')
 class NumpyArrayIterator(image.NumpyArrayIterator, Iterator):
   """Iterator yielding data from a Numpy array.
 
@@ -291,7 +291,7 @@ class NumpyArrayIterator(image.NumpyArrayIterator, Iterator):
         **kwargs)
 
 
-@tf_export('keras.preprocessing.image.ImageDataGenerator')
+@keras_export('keras.preprocessing.image.ImageDataGenerator')
 class ImageDataGenerator(image.ImageDataGenerator):
   """Generate batches of tensor image data with real-time data augmentation.
 
@@ -518,16 +518,17 @@ class ImageDataGenerator(image.ImageDataGenerator):
         validation_split=validation_split,
         **kwargs)
 
-tf_export('keras.preprocessing.image.random_rotation')(random_rotation)
-tf_export('keras.preprocessing.image.random_shift')(random_shift)
-tf_export('keras.preprocessing.image.random_shear')(random_shear)
-tf_export('keras.preprocessing.image.random_zoom')(random_zoom)
-tf_export('keras.preprocessing.image.apply_channel_shift')(apply_channel_shift)
-tf_export(
+keras_export('keras.preprocessing.image.random_rotation')(random_rotation)
+keras_export('keras.preprocessing.image.random_shift')(random_shift)
+keras_export('keras.preprocessing.image.random_shear')(random_shear)
+keras_export('keras.preprocessing.image.random_zoom')(random_zoom)
+keras_export(
+    'keras.preprocessing.image.apply_channel_shift')(apply_channel_shift)
+keras_export(
     'keras.preprocessing.image.random_channel_shift')(random_channel_shift)
-tf_export(
+keras_export(
     'keras.preprocessing.image.apply_brightness_shift')(apply_brightness_shift)
-tf_export('keras.preprocessing.image.random_brightness')(random_brightness)
-tf_export(
+keras_export('keras.preprocessing.image.random_brightness')(random_brightness)
+keras_export(
     'keras.preprocessing.image.apply_affine_transform')(apply_affine_transform)
-tf_export('keras.preprocessing.image.load_img')(load_img)
+keras_export('keras.preprocessing.image.load_img')(load_img)
diff --git a/tensorflow/python/keras/preprocessing/sequence.py b/tensorflow/python/keras/preprocessing/sequence.py
index f014668909..1d73a1e4da 100644
--- a/tensorflow/python/keras/preprocessing/sequence.py
+++ b/tensorflow/python/keras/preprocessing/sequence.py
@@ -22,7 +22,7 @@ from __future__ import print_function
 from keras_preprocessing import sequence
 
 from tensorflow.python.keras import utils
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 pad_sequences = sequence.pad_sequences
 make_sampling_table = sequence.make_sampling_table
@@ -31,7 +31,7 @@ skipgrams = sequence.skipgrams
 _remove_long_seq = sequence._remove_long_seq  # pylint: disable=protected-access
 
 
-@tf_export('keras.preprocessing.sequence.TimeseriesGenerator')
+@keras_export('keras.preprocessing.sequence.TimeseriesGenerator')
 class TimeseriesGenerator(sequence.TimeseriesGenerator, utils.Sequence):
   """Utility class for generating batches of temporal data.
   This class takes in a sequence of data-points gathered at
@@ -89,7 +89,7 @@ class TimeseriesGenerator(sequence.TimeseriesGenerator, utils.Sequence):
   pass
 
 
-tf_export('keras.preprocessing.sequence.pad_sequences')(pad_sequences)
-tf_export(
+keras_export('keras.preprocessing.sequence.pad_sequences')(pad_sequences)
+keras_export(
     'keras.preprocessing.sequence.make_sampling_table')(make_sampling_table)
-tf_export('keras.preprocessing.sequence.skipgrams')(skipgrams)
+keras_export('keras.preprocessing.sequence.skipgrams')(skipgrams)
diff --git a/tensorflow/python/keras/preprocessing/text.py b/tensorflow/python/keras/preprocessing/text.py
index 57e5d00e04..f10a768c31 100644
--- a/tensorflow/python/keras/preprocessing/text.py
+++ b/tensorflow/python/keras/preprocessing/text.py
@@ -21,15 +21,15 @@ from __future__ import print_function
 
 from keras_preprocessing import text
 
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 text_to_word_sequence = text.text_to_word_sequence
 one_hot = text.one_hot
 hashing_trick = text.hashing_trick
 Tokenizer = text.Tokenizer
 
-tf_export(
+keras_export(
     'keras.preprocessing.text.text_to_word_sequence')(text_to_word_sequence)
-tf_export('keras.preprocessing.text.one_hot')(one_hot)
-tf_export('keras.preprocessing.text.hashing_trick')(hashing_trick)
-tf_export('keras.preprocessing.text.Tokenizer')(Tokenizer)
+keras_export('keras.preprocessing.text.one_hot')(one_hot)
+keras_export('keras.preprocessing.text.hashing_trick')(hashing_trick)
+keras_export('keras.preprocessing.text.Tokenizer')(Tokenizer)
diff --git a/tensorflow/python/keras/regularizers.py b/tensorflow/python/keras/regularizers.py
index 28b6ad4c65..b828fa933b 100644
--- a/tensorflow/python/keras/regularizers.py
+++ b/tensorflow/python/keras/regularizers.py
@@ -24,10 +24,10 @@ from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
 from tensorflow.python.ops import math_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.regularizers.Regularizer')
+@keras_export('keras.regularizers.Regularizer')
 class Regularizer(object):
   """Regularizer base class.
   """
@@ -40,7 +40,7 @@ class Regularizer(object):
     return cls(**config)
 
 
-@tf_export('keras.regularizers.L1L2')
+@keras_export('keras.regularizers.L1L2')
 class L1L2(Regularizer):
   """Regularizer for L1 and L2 regularization.
 
@@ -68,27 +68,27 @@ class L1L2(Regularizer):
 # Aliases.
 
 
-@tf_export('keras.regularizers.l1')
+@keras_export('keras.regularizers.l1')
 def l1(l=0.01):
   return L1L2(l1=l)
 
 
-@tf_export('keras.regularizers.l2')
+@keras_export('keras.regularizers.l2')
 def l2(l=0.01):
   return L1L2(l2=l)
 
 
-@tf_export('keras.regularizers.l1_l2')
+@keras_export('keras.regularizers.l1_l2')
 def l1_l2(l1=0.01, l2=0.01):  # pylint: disable=redefined-outer-name
   return L1L2(l1=l1, l2=l2)
 
 
-@tf_export('keras.regularizers.serialize')
+@keras_export('keras.regularizers.serialize')
 def serialize(regularizer):
   return serialize_keras_object(regularizer)
 
 
-@tf_export('keras.regularizers.deserialize')
+@keras_export('keras.regularizers.deserialize')
 def deserialize(config, custom_objects=None):
   return deserialize_keras_object(
       config,
@@ -97,7 +97,7 @@ def deserialize(config, custom_objects=None):
       printable_module_name='regularizer')
 
 
-@tf_export('keras.regularizers.get')
+@keras_export('keras.regularizers.get')
 def get(identifier):
   if identifier is None:
     return None
diff --git a/tensorflow/python/keras/saving/saved_model.py b/tensorflow/python/keras/saving/saved_model.py
index 2b83f321c2..d22c4ee5d3 100644
--- a/tensorflow/python/keras/saving/saved_model.py
+++ b/tensorflow/python/keras/saving/saved_model.py
@@ -44,10 +44,10 @@ from tensorflow.python.training import saver as saver_lib
 from tensorflow.python.training.checkpointable import util as checkpointable_utils
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.experimental.export')
+@keras_export('keras.experimental.export')
 def export(
     model, saved_model_path, custom_objects=None, as_text=None,
     input_signature=None, serving_only=False):
@@ -367,7 +367,7 @@ def _assert_same_non_optimizer_objects(model, model_graph, clone, clone_graph):
   return True
 
 
-@tf_export('keras.experimental.load_from_saved_model')
+@keras_export('keras.experimental.load_from_saved_model')
 def load_from_saved_model(saved_model_path):
   """Loads a keras.Model from a SavedModel created by keras export().
 
diff --git a/tensorflow/python/keras/utils/data_utils.py b/tensorflow/python/keras/utils/data_utils.py
index d133e3fa8a..9b4a50dd7e 100644
--- a/tensorflow/python/keras/utils/data_utils.py
+++ b/tensorflow/python/keras/utils/data_utils.py
@@ -40,7 +40,7 @@ from six.moves.urllib.request import urlopen
 
 from tensorflow.python.keras.utils.generic_utils import Progbar
 from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 try:
@@ -144,7 +144,7 @@ def _extract_archive(file_path, path='.', archive_format='auto'):
   return False
 
 
-@tf_export('keras.utils.get_file')
+@keras_export('keras.utils.get_file')
 def get_file(fname,
              origin,
              untar=False,
@@ -324,7 +324,7 @@ def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535):
     return False
 
 
-@tf_export('keras.utils.Sequence')
+@keras_export('keras.utils.Sequence')
 class Sequence(object):
   """Base object for fitting to a sequence of data, such as a dataset.
 
@@ -445,7 +445,7 @@ def get_index(uid, i):
   return _SHARED_SEQUENCES[uid][i]
 
 
-@tf_export('keras.utils.SequenceEnqueuer')
+@keras_export('keras.utils.SequenceEnqueuer')
 class SequenceEnqueuer(object):
   """Base class to enqueue inputs.
 
@@ -570,7 +570,7 @@ class SequenceEnqueuer(object):
     raise NotImplementedError
 
 
-@tf_export('keras.utils.OrderedEnqueuer')
+@keras_export('keras.utils.OrderedEnqueuer')
 class OrderedEnqueuer(SequenceEnqueuer):
   """Builds a Enqueuer from a Sequence.
 
@@ -680,7 +680,7 @@ def next_sample(uid):
   return six.next(_SHARED_SEQUENCES[uid])
 
 
-@tf_export('keras.utils.GeneratorEnqueuer')
+@keras_export('keras.utils.GeneratorEnqueuer')
 class GeneratorEnqueuer(SequenceEnqueuer):
   """Builds a queue out of a data generator.
 
diff --git a/tensorflow/python/keras/utils/generic_utils.py b/tensorflow/python/keras/utils/generic_utils.py
index c331ce430b..454854618c 100644
--- a/tensorflow/python/keras/utils/generic_utils.py
+++ b/tensorflow/python/keras/utils/generic_utils.py
@@ -32,12 +32,12 @@ import six
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 _GLOBAL_CUSTOM_OBJECTS = {}
 
 
-@tf_export('keras.utils.CustomObjectScope')
+@keras_export('keras.utils.CustomObjectScope')
 class CustomObjectScope(object):
   """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape.
 
@@ -73,7 +73,7 @@ class CustomObjectScope(object):
     _GLOBAL_CUSTOM_OBJECTS.update(self.backup)
 
 
-@tf_export('keras.utils.custom_object_scope')
+@keras_export('keras.utils.custom_object_scope')
 def custom_object_scope(*args):
   """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape.
 
@@ -104,7 +104,7 @@ def custom_object_scope(*args):
   return CustomObjectScope(*args)
 
 
-@tf_export('keras.utils.get_custom_objects')
+@keras_export('keras.utils.get_custom_objects')
 def get_custom_objects():
   """Retrieves a live reference to the global dictionary of custom objects.
 
@@ -130,7 +130,7 @@ def serialize_keras_class_and_config(cls_name, cls_config):
   return {'class_name': cls_name, 'config': cls_config}
 
 
-@tf_export('keras.utils.serialize_keras_object')
+@keras_export('keras.utils.serialize_keras_object')
 def serialize_keras_object(instance):
   _, instance = tf_decorator.unwrap(instance)
   if instance is None:
@@ -167,7 +167,7 @@ def class_and_config_for_serialized_keras_object(
   return (cls, config['config'])
 
 
-@tf_export('keras.utils.deserialize_keras_object')
+@keras_export('keras.utils.deserialize_keras_object')
 def deserialize_keras_object(identifier,
                              module_objects=None,
                              custom_objects=None,
@@ -306,7 +306,7 @@ def has_arg(fn, name, accept_all=False):
   return name in arg_spec.args
 
 
-@tf_export('keras.utils.Progbar')
+@keras_export('keras.utils.Progbar')
 class Progbar(object):
   """Displays a progress bar.
 
diff --git a/tensorflow/python/keras/utils/io_utils.py b/tensorflow/python/keras/utils/io_utils.py
index 62674a9c77..5bb9a93cca 100644
--- a/tensorflow/python/keras/utils/io_utils.py
+++ b/tensorflow/python/keras/utils/io_utils.py
@@ -22,7 +22,7 @@ from collections import defaultdict
 
 import numpy as np
 import six
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 try:
@@ -31,7 +31,7 @@ except ImportError:
   h5py = None
 
 
-@tf_export('keras.utils.HDF5Matrix')
+@keras_export('keras.utils.HDF5Matrix')
 class HDF5Matrix(object):
   """Representation of HDF5 dataset to be used instead of a Numpy array.
 
diff --git a/tensorflow/python/keras/utils/layer_utils.py b/tensorflow/python/keras/utils/layer_utils.py
index 60677be735..d7eed2e86d 100644
--- a/tensorflow/python/keras/utils/layer_utils.py
+++ b/tensorflow/python/keras/utils/layer_utils.py
@@ -23,10 +23,10 @@ import numpy as np
 
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.utils.conv_utils import convert_kernel
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.utils.get_source_inputs')
+@keras_export('keras.utils.get_source_inputs')
 def get_source_inputs(tensor, layer=None, node_index=None):
   """Returns the list of input tensors necessary to compute `tensor`.
 
@@ -298,7 +298,7 @@ def gather_non_trainable_weights(trainable, sub_layers, extra_variables):
   return weights + non_trainable_extra_variables
 
 
-@tf_export('keras.utils.convert_all_kernels_in_model')
+@keras_export('keras.utils.convert_all_kernels_in_model')
 def convert_all_kernels_in_model(model):
   """Converts all convolution kernels in a model from Theano to TensorFlow.
 
diff --git a/tensorflow/python/keras/utils/multi_gpu_utils.py b/tensorflow/python/keras/utils/multi_gpu_utils.py
index 04b2ea8fe3..9c97e554b0 100644
--- a/tensorflow/python/keras/utils/multi_gpu_utils.py
+++ b/tensorflow/python/keras/utils/multi_gpu_utils.py
@@ -21,7 +21,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine.training import Model
 from tensorflow.python.ops import array_ops
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 def _get_available_devices():
@@ -33,7 +33,7 @@ def _normalize_device_name(name):
   return name
 
 
-@tf_export('keras.utils.multi_gpu_model')
+@keras_export('keras.utils.multi_gpu_model')
 def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False):
   """Replicates a model on different GPUs.
 
diff --git a/tensorflow/python/keras/utils/np_utils.py b/tensorflow/python/keras/utils/np_utils.py
index 3763999bff..5227a472a3 100644
--- a/tensorflow/python/keras/utils/np_utils.py
+++ b/tensorflow/python/keras/utils/np_utils.py
@@ -18,10 +18,10 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
-@tf_export('keras.utils.to_categorical')
+@keras_export('keras.utils.to_categorical')
 def to_categorical(y, num_classes=None, dtype='float32'):
   """Converts a class vector (integers) to binary class matrix.
 
@@ -52,7 +52,7 @@ def to_categorical(y, num_classes=None, dtype='float32'):
   return categorical
 
 
-@tf_export('keras.utils.normalize')
+@keras_export('keras.utils.normalize')
 def normalize(x, axis=-1, order=2):
   """Normalizes a Numpy array.
 
diff --git a/tensorflow/python/keras/utils/vis_utils.py b/tensorflow/python/keras/utils/vis_utils.py
index 7a454ac831..82bc2755bd 100644
--- a/tensorflow/python/keras/utils/vis_utils.py
+++ b/tensorflow/python/keras/utils/vis_utils.py
@@ -20,7 +20,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 try:
@@ -127,7 +127,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'):
   return dot
 
 
-@tf_export('keras.utils.plot_model')
+@keras_export('keras.utils.plot_model')
 def plot_model(model,
                to_file='model.png',
                show_shapes=False,
diff --git a/tensorflow/python/keras/wrappers/scikit_learn.py b/tensorflow/python/keras/wrappers/scikit_learn.py
index 4462d94ecd..566f9db5d4 100644
--- a/tensorflow/python/keras/wrappers/scikit_learn.py
+++ b/tensorflow/python/keras/wrappers/scikit_learn.py
@@ -26,7 +26,7 @@ import numpy as np
 from tensorflow.python.keras.models import Sequential
 from tensorflow.python.keras.utils.generic_utils import has_arg
 from tensorflow.python.keras.utils.np_utils import to_categorical
-from tensorflow.python.util.tf_export import tf_export
+from tensorflow.python.util.tf_export import keras_export
 
 
 class BaseWrapper(object):
@@ -188,7 +188,7 @@ class BaseWrapper(object):
     return res
 
 
-@tf_export('keras.wrappers.scikit_learn.KerasClassifier')
+@keras_export('keras.wrappers.scikit_learn.KerasClassifier')
 class KerasClassifier(BaseWrapper):
   """Implementation of the scikit-learn classifier API for Keras.
   """
@@ -311,7 +311,7 @@ class KerasClassifier(BaseWrapper):
                      'the `model.compile()` method.')
 
 
-@tf_export('keras.wrappers.scikit_learn.KerasRegressor')
+@keras_export('keras.wrappers.scikit_learn.KerasRegressor')
 class KerasRegressor(BaseWrapper):
   """Implementation of the scikit-learn regressor API for Keras.
   """
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
index c0a4bcd51d..395d53b328 100644
--- a/tensorflow/python/ops/init_ops.py
+++ b/tensorflow/python/ops/init_ops.py
@@ -49,7 +49,6 @@ from tensorflow.python.util.deprecation import  deprecated_arg_values
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("keras.initializers.Initializer")
 class Initializer(object):
   """Initializer base class: all initializers inherit from this class.
   """
@@ -96,8 +95,7 @@ class Initializer(object):
     return cls(**config)
 
 
-@tf_export("keras.initializers.Zeros", "initializers.zeros",
-           "zeros_initializer", "keras.initializers.zeros")
+@tf_export("initializers.zeros", "zeros_initializer")
 class Zeros(Initializer):
   """Initializer that generates tensors initialized to 0."""
 
@@ -113,8 +111,7 @@ class Zeros(Initializer):
     return {"dtype": self.dtype.name}
 
 
-@tf_export("keras.initializers.Ones", "initializers.ones", "ones_initializer",
-           "keras.initializers.ones")
+@tf_export("initializers.ones", "ones_initializer")
 class Ones(Initializer):
   """Initializer that generates tensors initialized to 1."""
 
@@ -130,8 +127,7 @@ class Ones(Initializer):
     return {"dtype": self.dtype.name}
 
 
-@tf_export("keras.initializers.Constant", "initializers.constant",
-           "constant_initializer", "keras.initializers.constant")
+@tf_export("initializers.constant", "constant_initializer")
 class Constant(Initializer):
   """Initializer that generates tensors with constant values.
 
@@ -422,10 +418,9 @@ class UniformUnitScaling(Initializer):
 
 
 @tf_export(
-    "keras.initializers.VarianceScaling",
     "initializers.variance_scaling",
     v1=[
-        "keras.initializers.VarianceScaling", "initializers.variance_scaling",
+        "initializers.variance_scaling",
         "variance_scaling_initializer"
     ])
 @deprecation.deprecated_endpoints("variance_scaling_initializer")
@@ -522,12 +517,10 @@ class VarianceScaling(Initializer):
 
 
 @tf_export(
-    "keras.initializers.Orthogonal",
     "initializers.orthogonal",
-    "keras.initializers.orthogonal",
     v1=[
-        "keras.initializers.Orthogonal", "initializers.orthogonal",
-        "orthogonal_initializer", "keras.initializers.orthogonal"
+        "initializers.orthogonal",
+        "orthogonal_initializer",
     ])
 @deprecation.deprecated_endpoints("orthogonal_initializer")
 class Orthogonal(Initializer):
@@ -1144,8 +1137,7 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal):
     return self._dict_to_tensor(p, ksize, ksize, ksize)
 
 
-@tf_export("keras.initializers.Identity", "initializers.identity",
-           "keras.initializers.identity")
+@tf_export("initializers.identity")
 class Identity(Initializer):
   """Initializer that generates the identity matrix.
 
@@ -1178,8 +1170,7 @@ class Identity(Initializer):
     return {"gain": self.gain, "dtype": self.dtype.name}
 
 
-@tf_export("glorot_uniform_initializer", "keras.initializers.glorot_uniform",
-           "initializers.glorot_uniform")
+@tf_export("glorot_uniform_initializer", "initializers.glorot_uniform")
 class GlorotUniform(VarianceScaling):
   """The Glorot uniform initializer, also called Xavier uniform initializer.
 
@@ -1213,10 +1204,9 @@ class GlorotUniform(VarianceScaling):
 
 
 @tf_export(
-    "keras.initializers.glorot_normal",
     "initializers.glorot_normal",
     v1=[
-        "glorot_normal_initializer", "keras.initializers.glorot_normal",
+        "glorot_normal_initializer",
         "initializers.glorot_normal"
     ])
 @deprecation.deprecated_endpoints("glorot_normal_initializer")
@@ -1273,7 +1263,7 @@ convolutional_orthogonal_3d = ConvolutionOrthogonal3D
 # pylint: enable=invalid-name
 
 
-@tf_export("keras.initializers.lecun_normal", "initializers.lecun_normal")
+@tf_export("initializers.lecun_normal")
 def lecun_normal(seed=None):
   """LeCun normal initializer.
 
@@ -1298,7 +1288,7 @@ def lecun_normal(seed=None):
       scale=1., mode="fan_in", distribution="truncated_normal", seed=seed)
 
 
-@tf_export("keras.initializers.lecun_uniform", "initializers.lecun_uniform")
+@tf_export("initializers.lecun_uniform")
 def lecun_uniform(seed=None):
   """LeCun uniform initializer.
 
@@ -1323,7 +1313,7 @@ def lecun_uniform(seed=None):
       scale=1., mode="fan_in", distribution="uniform", seed=seed)
 
 
-@tf_export("keras.initializers.he_normal", "initializers.he_normal")
+@tf_export("initializers.he_normal")
 def he_normal(seed=None):
   """He normal initializer.
 
@@ -1345,7 +1335,7 @@ def he_normal(seed=None):
       scale=2., mode="fan_in", distribution="truncated_normal", seed=seed)
 
 
-@tf_export("keras.initializers.he_uniform", "initializers.he_uniform")
+@tf_export("initializers.he_uniform")
 def he_uniform(seed=None):
   """He uniform variance scaling initializer.
 
diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py
index feef05496a..7f88ccd879 100644
--- a/tensorflow/python/ops/losses/losses_impl.py
+++ b/tensorflow/python/ops/losses/losses_impl.py
@@ -34,7 +34,7 @@ from tensorflow.python.util.deprecation import deprecated_argument_lookup
 from tensorflow.python.util.tf_export import tf_export
 
 
-@tf_export("losses.Reduction", "keras.losses.Reduction", v1=[])
+@tf_export("losses.Reduction", v1=[])
 class ReductionV2(object):
   """Types of loss reduction.
 
diff --git a/tensorflow/python/tools/api/generator/BUILD b/tensorflow/python/tools/api/generator/BUILD
index 9fd069c5be..109c71b41d 100644
--- a/tensorflow/python/tools/api/generator/BUILD
+++ b/tensorflow/python/tools/api/generator/BUILD
@@ -6,6 +6,8 @@ licenses(["notice"])  # Apache 2.0
 load("//tensorflow:tensorflow.bzl", "py_test")
 load("//tensorflow/python/tools/api/generator:api_init_files.bzl", "TENSORFLOW_API_INIT_FILES")
 load("//tensorflow/python/tools/api/generator:api_init_files_v1.bzl", "TENSORFLOW_API_INIT_FILES_V1")
+load("//tensorflow/python/tools/api/generator:api_init_files.bzl", "KERAS_API_INIT_FILES")
+load("//tensorflow/python/tools/api/generator:api_init_files_v1.bzl", "KERAS_API_INIT_FILES_V1")
 
 exports_files(
     [
@@ -55,7 +57,7 @@ py_test(
     args = [
         "--package=tensorflow.python",
         "--api_name=tensorflow",
-    ] + TENSORFLOW_API_INIT_FILES + TENSORFLOW_API_INIT_FILES_V1,
+    ] + KERAS_API_INIT_FILES + KERAS_API_INIT_FILES_V1 + TENSORFLOW_API_INIT_FILES + TENSORFLOW_API_INIT_FILES_V1,
     main = "doc_srcs_test.py",
     srcs_version = "PY2AND3",
     deps = [
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index a4b1b852ed..6776d1bea6 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -23,6 +23,31 @@ TENSORFLOW_API_INIT_FILES = [
     "io/__init__.py",
     "queue/__init__.py",
     "initializers/__init__.py",
+    "linalg/__init__.py",
+    "lite/__init__.py",
+    "lite/constants/__init__.py",
+    "losses/__init__.py",
+    "math/__init__.py",
+    "nn/__init__.py",
+    "nn/rnn_cell/__init__.py",
+    "quantization/__init__.py",
+    "ragged/__init__.py",
+    "random/__init__.py",
+    "saved_model/__init__.py",
+    "sets/__init__.py",
+    "signal/__init__.py",
+    "sparse/__init__.py",
+    "strings/__init__.py",
+    "summary/__init__.py",
+    "sysconfig/__init__.py",
+    "test/__init__.py",
+    "train/__init__.py",
+    "version/__init__.py",
+    # END GENERATED FILES
+]
+
+KERAS_API_INIT_FILES = [
+    "__init__.py",
     "keras/__init__.py",
     "keras/activations/__init__.py",
     "keras/applications/__init__.py",
@@ -63,25 +88,4 @@ TENSORFLOW_API_INIT_FILES = [
     "keras/utils/__init__.py",
     "keras/wrappers/__init__.py",
     "keras/wrappers/scikit_learn/__init__.py",
-    "linalg/__init__.py",
-    "lite/__init__.py",
-    "lite/constants/__init__.py",
-    "losses/__init__.py",
-    "math/__init__.py",
-    "nn/__init__.py",
-    "nn/rnn_cell/__init__.py",
-    "quantization/__init__.py",
-    "ragged/__init__.py",
-    "random/__init__.py",
-    "saved_model/__init__.py",
-    "sets/__init__.py",
-    "signal/__init__.py",
-    "sparse/__init__.py",
-    "strings/__init__.py",
-    "summary/__init__.py",
-    "sysconfig/__init__.py",
-    "test/__init__.py",
-    "train/__init__.py",
-    "version/__init__.py",
-    # END GENERATED FILES
 ]
diff --git a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
index 503de822cc..2d5898f31c 100644
--- a/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files_v1.bzl
@@ -26,46 +26,6 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "io/__init__.py",
     "queue/__init__.py",
     "initializers/__init__.py",
-    "keras/__init__.py",
-    "keras/activations/__init__.py",
-    "keras/applications/__init__.py",
-    "keras/applications/densenet/__init__.py",
-    "keras/applications/inception_resnet_v2/__init__.py",
-    "keras/applications/inception_v3/__init__.py",
-    "keras/applications/mobilenet/__init__.py",
-    "keras/applications/mobilenet_v2/__init__.py",
-    "keras/applications/nasnet/__init__.py",
-    "keras/applications/resnet50/__init__.py",
-    "keras/applications/vgg16/__init__.py",
-    "keras/applications/vgg19/__init__.py",
-    "keras/applications/xception/__init__.py",
-    "keras/backend/__init__.py",
-    "keras/callbacks/__init__.py",
-    "keras/constraints/__init__.py",
-    "keras/datasets/__init__.py",
-    "keras/datasets/boston_housing/__init__.py",
-    "keras/datasets/cifar10/__init__.py",
-    "keras/datasets/cifar100/__init__.py",
-    "keras/datasets/fashion_mnist/__init__.py",
-    "keras/datasets/imdb/__init__.py",
-    "keras/datasets/mnist/__init__.py",
-    "keras/datasets/reuters/__init__.py",
-    "keras/estimator/__init__.py",
-    "keras/experimental/__init__.py",
-    "keras/initializers/__init__.py",
-    "keras/layers/__init__.py",
-    "keras/losses/__init__.py",
-    "keras/metrics/__init__.py",
-    "keras/models/__init__.py",
-    "keras/optimizers/__init__.py",
-    "keras/preprocessing/__init__.py",
-    "keras/preprocessing/image/__init__.py",
-    "keras/preprocessing/sequence/__init__.py",
-    "keras/preprocessing/text/__init__.py",
-    "keras/regularizers/__init__.py",
-    "keras/utils/__init__.py",
-    "keras/wrappers/__init__.py",
-    "keras/wrappers/scikit_learn/__init__.py",
     "layers/__init__.py",
     "layers/experimental/__init__.py",
     "linalg/__init__.py",
@@ -108,3 +68,47 @@ TENSORFLOW_API_INIT_FILES_V1 = [
     "version/__init__.py",
     # END GENERATED FILES
 ]
+
+KERAS_API_INIT_FILES_V1 = [
+    "__init__.py",
+    "keras/__init__.py",
+    "keras/activations/__init__.py",
+    "keras/applications/__init__.py",
+    "keras/applications/densenet/__init__.py",
+    "keras/applications/inception_resnet_v2/__init__.py",
+    "keras/applications/inception_v3/__init__.py",
+    "keras/applications/mobilenet/__init__.py",
+    "keras/applications/mobilenet_v2/__init__.py",
+    "keras/applications/nasnet/__init__.py",
+    "keras/applications/resnet50/__init__.py",
+    "keras/applications/vgg16/__init__.py",
+    "keras/applications/vgg19/__init__.py",
+    "keras/applications/xception/__init__.py",
+    "keras/backend/__init__.py",
+    "keras/callbacks/__init__.py",
+    "keras/constraints/__init__.py",
+    "keras/datasets/__init__.py",
+    "keras/datasets/boston_housing/__init__.py",
+    "keras/datasets/cifar10/__init__.py",
+    "keras/datasets/cifar100/__init__.py",
+    "keras/datasets/fashion_mnist/__init__.py",
+    "keras/datasets/imdb/__init__.py",
+    "keras/datasets/mnist/__init__.py",
+    "keras/datasets/reuters/__init__.py",
+    "keras/estimator/__init__.py",
+    "keras/experimental/__init__.py",
+    "keras/initializers/__init__.py",
+    "keras/layers/__init__.py",
+    "keras/losses/__init__.py",
+    "keras/metrics/__init__.py",
+    "keras/models/__init__.py",
+    "keras/optimizers/__init__.py",
+    "keras/preprocessing/__init__.py",
+    "keras/preprocessing/image/__init__.py",
+    "keras/preprocessing/sequence/__init__.py",
+    "keras/preprocessing/text/__init__.py",
+    "keras/regularizers/__init__.py",
+    "keras/utils/__init__.py",
+    "keras/wrappers/__init__.py",
+    "keras/wrappers/scikit_learn/__init__.py",
+]
diff --git a/tensorflow/python/util/tf_export.py b/tensorflow/python/util/tf_export.py
index 74afc3746f..90c9c4b5b3 100644
--- a/tensorflow/python/util/tf_export.py
+++ b/tensorflow/python/util/tf_export.py
@@ -48,6 +48,7 @@ import sys
 from tensorflow.python.util import tf_decorator
 
 ESTIMATOR_API_NAME = 'estimator'
+KERAS_API_NAME = 'keras'
 TENSORFLOW_API_NAME = 'tensorflow'
 
 # List of subpackage names used by TensorFlow components. Have to check that
@@ -64,7 +65,10 @@ API_ATTRS = {
         '_tf_api_constants'),
     ESTIMATOR_API_NAME: _Attributes(
         '_estimator_api_names',
-        '_estimator_api_constants')
+        '_estimator_api_constants'),
+    KERAS_API_NAME: _Attributes(
+        '_keras_api_names',
+        '_keras_api_constants')
 }
 
 API_ATTRS_V1 = {
@@ -73,7 +77,10 @@ API_ATTRS_V1 = {
         '_tf_api_constants_v1'),
     ESTIMATOR_API_NAME: _Attributes(
         '_estimator_api_names_v1',
-        '_estimator_api_constants_v1')
+        '_estimator_api_constants_v1'),
+    KERAS_API_NAME: _Attributes(
+        '_keras_api_names_v1',
+        '_keras_api_constants_v1')
 }
 
 
@@ -160,6 +167,7 @@ def get_v1_names(symbol):
   names_v1 = []
   tensorflow_api_attr_v1 = API_ATTRS_V1[TENSORFLOW_API_NAME].names
   estimator_api_attr_v1 = API_ATTRS_V1[ESTIMATOR_API_NAME].names
+  keras_api_attr_v1 = API_ATTRS_V1[KERAS_API_NAME].names
 
   if not hasattr(symbol, tensorflow_api_attr_v1):
     return names_v1
@@ -167,6 +175,8 @@ def get_v1_names(symbol):
     names_v1.extend(getattr(symbol, tensorflow_api_attr_v1))
   if estimator_api_attr_v1 in symbol.__dict__:
     names_v1.extend(getattr(symbol, estimator_api_attr_v1))
+  if keras_api_attr_v1 in symbol.__dict__:
+    names_v1.extend(getattr(symbol, keras_api_attr_v1))
   return names_v1
 
 
@@ -183,6 +193,7 @@ def get_v2_names(symbol):
   names_v2 = []
   tensorflow_api_attr = API_ATTRS[TENSORFLOW_API_NAME].names
   estimator_api_attr = API_ATTRS[ESTIMATOR_API_NAME].names
+  keras_api_attr = API_ATTRS[KERAS_API_NAME].names
 
   if not hasattr(symbol, tensorflow_api_attr):
     return names_v2
@@ -190,6 +201,8 @@ def get_v2_names(symbol):
     names_v2.extend(getattr(symbol, tensorflow_api_attr))
   if estimator_api_attr in symbol.__dict__:
     names_v2.extend(getattr(symbol, estimator_api_attr))
+  if keras_api_attr in symbol.__dict__:
+    names_v2.extend(getattr(symbol, keras_api_attr))
   return names_v2
 
 
@@ -364,3 +377,4 @@ class api_export(object):  # pylint: disable=invalid-name
 
 tf_export = functools.partial(api_export, api_name=TENSORFLOW_API_NAME)
 estimator_export = functools.partial(api_export, api_name=ESTIMATOR_API_NAME)
+keras_export = functools.partial(api_export, api_name=KERAS_API_NAME)
-- 
GitLab


From 700f5ba6e8e04d1bd59b626c5b83a09cd3983b73 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 13:40:17 -0800
Subject: [PATCH 766/873] Add support for batch_major inputs for the bidi-LSTM

PiperOrigin-RevId: 226051559
---
 tensorflow/lite/c/builtin_op_data.h           |   7 +-
 .../lite/core/api/flatbuffer_conversions.cc   |   1 +
 .../kernels/bidirectional_sequence_lstm.cc    |  19 +-
 .../bidirectional_sequence_lstm_test.cc       | 726 +++++++++++++++++-
 tensorflow/lite/kernels/lstm_eval.cc          | 118 +--
 tensorflow/lite/kernels/register.cc           |   3 +-
 tensorflow/lite/schema/schema.fbs             |   7 +
 tensorflow/lite/schema/schema_generated.h     |  23 +-
 8 files changed, 828 insertions(+), 76 deletions(-)

diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h
index 6a5a027a9d..58e7221bc6 100644
--- a/tensorflow/lite/c/builtin_op_data.h
+++ b/tensorflow/lite/c/builtin_op_data.h
@@ -207,13 +207,18 @@ typedef struct {
 } TfLiteUnidirectionalSequenceLSTMParams;
 
 typedef struct {
-  // Parameters for the LSTM kernel.
+  // Parameters supported by version 1:
+  // Parameters inherited for the LSTM kernel.
   TfLiteFusedActivation activation;
   float cell_clip;
   float proj_clip;
 
   // If true, store the outputs of both directions in the first output.
   bool merge_outputs;
+
+  // Parameters supported by version 2:
+  // If set to true then the first dimension is time, otherwise batch.
+  bool time_major;
 } TfLiteBidirectionalSequenceLSTMParams;
 
 typedef struct {
diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc
index c00a0a3a54..e73c4ce023 100644
--- a/tensorflow/lite/core/api/flatbuffer_conversions.cc
+++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc
@@ -417,6 +417,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type,
         params->cell_clip = bidi_lstm_params->cell_clip();
         params->proj_clip = bidi_lstm_params->proj_clip();
         params->merge_outputs = bidi_lstm_params->merge_outputs();
+        params->time_major = bidi_lstm_params->time_major();
       }
       *builtin_data = reinterpret_cast<void*>(params);
       break;
diff --git a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
index 1cd927a305..1620374f46 100644
--- a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
@@ -395,8 +395,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
   TF_LITE_ENSURE_EQ(context, input->dims->size, 3);
-  const int max_time = input->dims->data[0];
-  const int n_batch = input->dims->data[1];
+  const bool time_major = params->time_major;
+  const int max_time = time_major ? input->dims->data[0] : input->dims->data[1];
+  const int n_batch = time_major ? input->dims->data[1] : input->dims->data[0];
   const int n_input = input->dims->data[2];
 
   const TfLiteTensor* fw_input_to_output_weights =
@@ -496,8 +497,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   // Resize the output tensors.
   TfLiteIntArray* fw_output_size = TfLiteIntArrayCreate(3);
-  fw_output_size->data[0] = max_time;
-  fw_output_size->data[1] = n_batch;
+  fw_output_size->data[0] = time_major ? max_time : n_batch;
+  fw_output_size->data[1] = time_major ? n_batch : max_time;
   fw_output_size->data[2] =
       params->merge_outputs ? n_bw_output + n_fw_output : n_fw_output;
   TF_LITE_ENSURE_OK(context,
@@ -555,8 +556,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   if (!params->merge_outputs) {
     TfLiteTensor* bw_output = GetOutput(context, node, kBwOutputTensor);
     TfLiteIntArray* bw_output_size = TfLiteIntArrayCreate(3);
-    bw_output_size->data[0] = max_time;
-    bw_output_size->data[1] = n_batch;
+    bw_output_size->data[0] = time_major ? max_time : n_batch;
+    bw_output_size->data[1] = time_major ? n_batch : max_time;
     bw_output_size->data[2] = n_bw_output;
     TF_LITE_ENSURE_OK(
         context, context->ResizeTensor(context, bw_output, bw_output_size));
@@ -876,7 +877,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       params->merge_outputs ? fw_recurrent_to_output_weights->dims->data[1] : 0;
   const auto actual_bw_output = params->merge_outputs ? fw_output : bw_output;
 
-  // TODO(mirkov): add batch_major support (http://b/117326122).
+  const bool time_major = params->time_major;
   switch (fw_input_to_output_weights->type) {
     case kTfLiteFloat32: {
       TfLiteStatus fw_pass_status = lstm_eval::EvalFloat(
@@ -890,7 +891,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_aux_input_to_output_weights, fw_input_gate_bias,
           fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
           fw_projection_weights, fw_projection_bias, &lstm_params,
-          /*forward_sequence=*/true, /*time_major=*/true, /*output_offset=*/0,
+          /*forward_sequence=*/true, time_major, /*output_offset=*/0,
           fw_scratch_buffer, fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
 
@@ -905,7 +906,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_aux_input_to_output_weights, bw_input_gate_bias,
           bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
           bw_projection_weights, bw_projection_bias, &lstm_params,
-          /*forward_sequence=*/false, /*time_major=*/true, bw_output_offset,
+          /*forward_sequence=*/false, time_major, bw_output_offset,
           bw_scratch_buffer, bw_activation_state, bw_cell_state,
           actual_bw_output);
       TF_LITE_ENSURE_OK(context, bw_pass_status);
diff --git a/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
index 4d6f91ec74..f5df6d15af 100644
--- a/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_lstm_test.cc
@@ -39,7 +39,7 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
                            bool use_peephole, bool use_projection_weights,
                            bool use_projection_bias, bool merge_outputs,
                            float cell_clip, float proj_clip,
-                           bool quantize_weights,
+                           bool quantize_weights, bool time_major,
                            const std::vector<std::vector<int>>& input_shapes)
       : n_batch_(n_batch),
         n_input_(n_input),
@@ -199,7 +199,7 @@ class BidirectionalLSTMOpModel : public SingleOpModel {
                  BuiltinOptions_BidirectionalSequenceLSTMOptions,
                  CreateBidirectionalSequenceLSTMOptions(
                      builder_, ActivationFunctionType_TANH, cell_clip,
-                     proj_clip, merge_outputs)
+                     proj_clip, merge_outputs, time_major)
                      .Union());
     BuildInterpreter(input_shapes);
   }
@@ -407,7 +407,7 @@ TEST_P(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0, quantize_weights,
+      /*proj_clip=*/0.0, quantize_weights, /*time_major=*/true,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -465,7 +465,7 @@ TEST_P(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) {
 
           // TODO(b/121134029): Update tests so tensor shapes after state tensor
           // are used. They are currently ignored by test_util.
-          {n_batch, sequence_length, 0},  // aux_input tensor
+          {sequence_length, n_batch, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
           {n_cell, 0},                    // aux_fw_input_to_cell tensor
@@ -571,7 +571,7 @@ TEST_P(LSTMOpTest, BlackBoxTestMergedOutput) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/true, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0, quantize_weights,
+      /*proj_clip=*/0.0, quantize_weights, /*time_major=*/true,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -629,7 +629,7 @@ TEST_P(LSTMOpTest, BlackBoxTestMergedOutput) {
 
           // TODO(b/121134029): Update tests so tensor shapes after state tensor
           // are used. They are currently ignored by test_util.
-          {n_batch, sequence_length, 0},  // aux_input tensor
+          {sequence_length, n_batch, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
           {n_cell, 0},                    // aux_fw_input_to_cell tensor
@@ -734,7 +734,7 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/false, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0, /*quantize_weights=*/false,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false, /*time_major=*/true,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -792,7 +792,7 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) {
 
           // TODO(b/121134029): Update tests so tensor shapes after state tensor
           // are used. They are currently ignored by test_util.
-          {n_batch, sequence_length, 0},  // aux_input tensor
+          {sequence_length, n_batch, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
           {n_cell, 0},                    // aux_fw_input_to_cell tensor
@@ -896,7 +896,7 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0, /*quantize_weights=*/false,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false, /*time_major=*/true,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -952,7 +952,7 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) {
 
           // TODO(b/121134029): Update tests so tensor shapes after state tensor
           // are used. They are currently ignored by test_util.
-          {n_batch, sequence_length, 0},  // aux_input tensor
+          {sequence_length, n_batch, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
           {n_cell, 0},                    // aux_fw_input_to_cell tensor
@@ -1048,7 +1048,7 @@ TEST(LSTMOpTest,
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true,
       /*use_peephole=*/true, /*use_projection_weights=*/false,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0, /*quantize_weights=*/false,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false, /*time_major=*/true,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -1104,7 +1104,7 @@ TEST(LSTMOpTest,
 
           // TODO(b/121134029): Update tests so tensor shapes after state tensor
           // are used. They are currently ignored by test_util.
-          {n_batch, sequence_length, 0},  // aux_input tensor
+          {sequence_length, n_batch, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
           {n_cell, 0},                    // aux_fw_input_to_cell tensor
@@ -1200,7 +1200,7 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
       n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
       /*use_peephole=*/true, /*use_projection_weights=*/true,
       /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
-      /*proj_clip=*/0.0, /*quantize_weights=*/false,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false, /*time_major=*/true,
       {
           {sequence_length, n_batch, n_input},  // input tensor
 
@@ -1256,7 +1256,7 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
 
           // TODO(b/121134029): Update tests so tensor shapes after state tensor
           // are used. They are currently ignored by test_util.
-          {n_batch, sequence_length, 0},  // aux_input tensor
+          {sequence_length, n_batch, 0},  // aux_input tensor
           {n_cell, 0},                    // aux_fw_input_to_input tensor
           {n_cell, 0},                    // aux_fw_input_to_forget tensor
           {n_cell, 0},                    // aux_fw_input_to_cell tensor
@@ -1892,6 +1892,704 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) {
   EXPECT_THAT(combined, ElementsAreArray(ArrayFloatNear(expected)));
 }
 
+// Same as above but with batch_major input/output.
+TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClippingBatchMajor) {
+  const int n_batch = 2;
+  const int n_input = 5;
+  const int n_cell = 20;
+  const int n_output = 16;
+  const int sequence_length = 4;
+
+  BidirectionalLSTMOpModel lstm(
+      n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false,
+      /*use_peephole=*/true, /*use_projection_weights=*/true,
+      /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0,
+      /*proj_clip=*/0.0, /*quantize_weights=*/false, /*time_major=*/false,
+      {
+          {n_batch, sequence_length, n_input},  // input tensor
+
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {n_cell},  // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {n_cell},  // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_batch, sequence_length, 0},  // aux_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_input tensor
+          {n_cell, 0},                    // aux_fw_input_to_forget tensor
+          {n_cell, 0},                    // aux_fw_input_to_cell tensor
+          {n_cell, 0},                    // aux_fw_input_to_output tensor
+          {n_cell, 0},                    // aux_bw_input_to_input tensor
+          {n_cell, 0},                    // aux_bw_input_to_forget tensor
+          {n_cell, 0},                    // aux_bw_input_to_cell tensor
+          {n_cell, 0},                    // aux_bw_input_to_output tensor
+      });
+
+  lstm.SetInputToInputWeights(
+      {0.021393683,  0.06124551,    0.046905167,  -0.014657677,  -0.03149463,
+       0.09171803,   0.14647801,    0.10797193,   -0.0057968358, 0.0019193048,
+       -0.2726754,   0.10154029,    -0.018539885, 0.080349885,   -0.10262385,
+       -0.022599787, -0.09121155,   -0.008675967, -0.045206103,  -0.0821282,
+       -0.008045952, 0.015478081,   0.055217247,  0.038719587,   0.044153627,
+       -0.06453243,  0.05031825,    -0.046935108, -0.008164439,  0.014574226,
+       -0.1671009,   -0.15519552,   -0.16819797,  -0.13971269,   -0.11953059,
+       0.25005487,   -0.22790983,   0.009855087,  -0.028140958,  -0.11200698,
+       0.11295408,   -0.0035217577, 0.054485075,  0.05184695,    0.064711206,
+       0.10989193,   0.11674786,    0.03490607,   0.07727357,    0.11390585,
+       -0.1863375,   -0.1034451,    -0.13945189,  -0.049401227,  -0.18767063,
+       0.042483903,  0.14233552,    0.13832581,   0.18350165,    0.14545603,
+       -0.028545704, 0.024939531,   0.050929718,  0.0076203286,  -0.0029723682,
+       -0.042484224, -0.11827596,   -0.09171104,  -0.10808628,   -0.16327988,
+       -0.2273378,   -0.0993647,    -0.017155107, 0.0023917493,  0.049272764,
+       0.0038534778, 0.054764505,   0.089753784,  0.06947234,    0.08014476,
+       -0.04544234,  -0.0497073,    -0.07135631,  -0.048929106,  -0.004042012,
+       -0.009284026, 0.018042054,   0.0036860977, -0.07427302,   -0.11434604,
+       -0.018995456, 0.031487543,   0.012834908,  0.019977754,   0.044256654,
+       -0.39292613,  -0.18519334,   -0.11651281,  -0.06809892,   0.011373677});
+
+  lstm.SetInputToForgetWeights(
+      {-0.0018401089, -0.004852237,  0.03698424,   0.014181704,   0.028273236,
+       -0.016726194,  -0.05249759,   -0.10204261,  0.00861066,    -0.040979505,
+       -0.009899187,  0.01923892,    -0.028177269, -0.08535103,   -0.14585495,
+       0.10662567,    -0.01909731,   -0.017883534, -0.0047269356, -0.045103323,
+       0.0030784295,  0.076784775,   0.07463696,   0.094531395,   0.0814421,
+       -0.12257899,   -0.033945758,  -0.031303465, 0.045630626,   0.06843887,
+       -0.13492945,   -0.012480007,  -0.0811829,   -0.07224499,   -0.09628791,
+       0.045100946,   0.0012300825,  0.013964662,  0.099372394,   0.02543059,
+       0.06958324,    0.034257296,   0.0482646,    0.06267997,    0.052625068,
+       0.12784666,    0.07077897,    0.025725935,  0.04165009,    0.07241905,
+       0.018668644,   -0.037377294,  -0.06277783,  -0.08833636,   -0.040120605,
+       -0.011405586,  -0.007808335,  -0.010301386, -0.005102167,  0.027717464,
+       0.05483423,    0.11449111,    0.11289652,   0.10939839,    0.13396506,
+       -0.08402166,   -0.01901462,   -0.044678304, -0.07720565,   0.014350063,
+       -0.11757958,   -0.0652038,    -0.08185733,  -0.076754324,  -0.092614375,
+       0.10405491,    0.052960336,   0.035755895,  0.035839386,   -0.012540553,
+       0.036881298,   0.02913376,    0.03420159,   0.05448447,    -0.054523353,
+       0.02582715,    0.02327355,    -0.011857179, -0.0011980024, -0.034641717,
+       -0.026125094,  -0.17582615,   -0.15923657,  -0.27486774,   -0.0006143371,
+       0.0001771948,  -8.470171e-05, 0.02651807,   0.045790765,   0.06956496});
+
+  lstm.SetInputToCellWeights(
+      {-0.04580283,   -0.09549462,   -0.032418985,  -0.06454633,
+       -0.043528453,  0.043018587,   -0.049152344,  -0.12418144,
+       -0.078985475,  -0.07596889,   0.019484362,   -0.11434962,
+       -0.0074034138, -0.06314844,   -0.092981495,  0.0062155537,
+       -0.025034338,  -0.0028890965, 0.048929527,   0.06235075,
+       0.10665918,    -0.032036792,  -0.08505916,   -0.10843358,
+       -0.13002433,   -0.036816437,  -0.02130134,   -0.016518239,
+       0.0047691227,  -0.0025825808, 0.066017866,   0.029991534,
+       -0.10652836,   -0.1037554,    -0.13056071,   -0.03266643,
+       -0.033702414,  -0.006473424,  -0.04611692,   0.014419339,
+       -0.025174323,  0.0396852,     0.081777506,   0.06157468,
+       0.10210095,    -0.009658194,  0.046511717,   0.03603906,
+       0.0069369148,  0.015960095,   -0.06507666,   0.09551598,
+       0.053568836,   0.06408714,    0.12835667,    -0.008714329,
+       -0.20211966,   -0.12093674,   0.029450472,   0.2849013,
+       -0.029227901,  0.1164364,     -0.08560263,   0.09941786,
+       -0.036999565,  -0.028842626,  -0.0033637602, -0.017012902,
+       -0.09720865,   -0.11193351,   -0.029155117,  -0.017936034,
+       -0.009768936,  -0.04223324,   -0.036159635,  0.06505112,
+       -0.021742892,  -0.023377212,  -0.07221364,   -0.06430552,
+       0.05453865,    0.091149814,   0.06387331,    0.007518393,
+       0.055960953,   0.069779344,   0.046411168,   0.10509911,
+       0.07463894,    0.0075130584,  0.012850982,   0.04555431,
+       0.056955688,   0.06555285,    0.050801456,   -0.009862683,
+       0.00826772,    -0.026555609,  -0.0073611983, -0.0014897042});
+
+  lstm.SetInputToOutputWeights(
+      {-0.0998932,   -0.07201956,  -0.052803773,  -0.15629593,  -0.15001918,
+       -0.07650751,  0.02359855,   -0.075155355,  -0.08037709,  -0.15093534,
+       0.029517552,  -0.04751393,  0.010350531,   -0.02664851,  -0.016839722,
+       -0.023121163, 0.0077019283, 0.012851257,   -0.05040649,  -0.0129761,
+       -0.021737747, -0.038305793, -0.06870586,   -0.01481247,  -0.001285394,
+       0.10124236,   0.083122835,  0.053313006,   -0.062235646, -0.075637154,
+       -0.027833903, 0.029774971,  0.1130802,     0.09218906,   0.09506135,
+       -0.086665764, -0.037162706, -0.038880914,  -0.035832845, -0.014481564,
+       -0.09825003,  -0.12048569,  -0.097665586,  -0.05287633,  -0.0964047,
+       -0.11366429,  0.035777505,  0.13568819,    0.052451383,  0.050649304,
+       0.05798951,   -0.021852335, -0.099848844,  0.014740475,  -0.078897946,
+       0.04974699,   0.014160473,  0.06973932,    0.04964942,   0.033364646,
+       0.08190124,   0.025535367,  0.050893165,   0.048514254,  0.06945813,
+       -0.078907564, -0.06707616,  -0.11844508,   -0.09986688,  -0.07509403,
+       0.06263226,   0.14925587,   0.20188436,    0.12098451,   0.14639415,
+       0.0015017595, -0.014267382, -0.03417257,   0.012711468,  0.0028300495,
+       -0.024758482, -0.05098548,  -0.0821182,    0.014225672,  0.021544158,
+       0.08949725,   0.07505268,   -0.0020780868, 0.04908258,   0.06476295,
+       -0.022907063, 0.027562456,  0.040185735,   0.019567577,  -0.015598739,
+       -0.049097303, -0.017121866, -0.083368234,  -0.02332002,  -0.0840956});
+
+  lstm.SetInputGateBias(
+      {0.02234832,  0.14757581,   0.18176508,  0.10380666,  0.053110216,
+       -0.06928846, -0.13942584,  -0.11816189, 0.19483899,  0.03652339,
+       -0.10250295, 0.036714908,  -0.18426876, 0.036065217, 0.21810818,
+       0.02383196,  -0.043370757, 0.08690144,  -0.04444982, 0.00030581196});
+
+  lstm.SetForgetGateBias({0.035185695, -0.042891346, -0.03032477, 0.23027696,
+                          0.11098921,  0.15378423,   0.09263801,  0.09790885,
+                          0.09508917,  0.061199076,  0.07665568,  -0.015443159,
+                          -0.03499149, 0.046190713,  0.08895977,  0.10899629,
+                          0.40694186,  0.06030037,   0.012413437, -0.06108739});
+
+  lstm.SetCellBias({-0.024379363, 0.0055531194, 0.23377132,   0.033463873,
+                    -0.1483596,   -0.10639995,  -0.091433935, 0.058573797,
+                    -0.06809782,  -0.07889636,  -0.043246906, -0.09829136,
+                    -0.4279842,   0.034901652,  0.18797937,   0.0075234566,
+                    0.016178843,  0.1749513,    0.13975595,   0.92058027});
+
+  lstm.SetOutputGateBias(
+      {0.046159424,  -0.0012809046, 0.03563469,   0.12648113, 0.027195795,
+       0.35373217,   -0.018957434,  0.008907322,  -0.0762701, 0.12018895,
+       0.04216877,   0.0022856654,  0.040952638,  0.3147856,  0.08225149,
+       -0.057416286, -0.14995944,   -0.008040261, 0.13208859, 0.029760877});
+
+  lstm.SetRecurrentToInputWeights(
+      {-0.001374326,   -0.078856036,   0.10672688,    0.029162422,
+       -0.11585556,    0.02557986,     -0.13446963,   -0.035785314,
+       -0.01244275,    0.025961924,    -0.02337298,   -0.044228926,
+       -0.055839065,   -0.046598054,   -0.010546039,  -0.06900766,
+       0.027239809,    0.022582639,    -0.013296484,  -0.05459212,
+       0.08981,        -0.045407712,   0.08682226,    -0.06867011,
+       -0.14390695,    -0.02916037,    0.000996957,   0.091420636,
+       0.14283475,     -0.07390571,    -0.06402044,   0.062524505,
+       -0.093129106,   0.04860203,     -0.08364217,   -0.08119002,
+       0.009352075,    0.22920375,     0.0016303885,  0.11583097,
+       -0.13732095,    0.012405723,    -0.07551853,   0.06343048,
+       0.12162708,     -0.031923793,   -0.014335606,  0.01790974,
+       -0.10650317,    -0.0724401,     0.08554849,    -0.05727212,
+       0.06556731,     -0.042729504,   -0.043227166,  0.011683251,
+       -0.013082158,   -0.029302018,   -0.010899579,  -0.062036745,
+       -0.022509435,   -0.00964907,    -0.01567329,   0.04260106,
+       -0.07787477,    -0.11576462,    0.017356863,   0.048673786,
+       -0.017577527,   -0.05527947,    -0.082487635,  -0.040137455,
+       -0.10820036,    -0.04666372,    0.022746278,   -0.07851417,
+       0.01068115,     0.032956902,    0.022433773,   0.0026891115,
+       0.08944216,     -0.0685835,     0.010513544,   0.07228705,
+       0.02032331,     -0.059686817,   -0.0005566496, -0.086984694,
+       0.040414046,    -0.1380399,     0.094208956,   -0.05722982,
+       0.012092817,    -0.04989123,    -0.086576,     -0.003399834,
+       -0.04696032,    -0.045747425,   0.10091314,    0.048676282,
+       -0.029037097,   0.031399418,    -0.0040285117, 0.047237843,
+       0.09504992,     0.041799378,    -0.049185462,  -0.031518843,
+       -0.10516937,    0.026374253,    0.10058866,    -0.0033195973,
+       -0.041975245,   0.0073591834,   0.0033782164,  -0.004325073,
+       -0.10167381,    0.042500053,    -0.01447153,   0.06464186,
+       -0.017142897,   0.03312627,     0.009205989,   0.024138335,
+       -0.011337001,   0.035530265,    -0.010912711,  0.0706555,
+       -0.005894094,   0.051841937,    -0.1401738,    -0.02351249,
+       0.0365468,      0.07590991,     0.08838724,    0.021681072,
+       -0.10086113,    0.019608743,    -0.06195883,   0.077335775,
+       0.023646897,    -0.095322326,   0.02233014,    0.09756986,
+       -0.048691444,   -0.009579111,   0.07595467,    0.11480546,
+       -0.09801813,    0.019894179,    0.08502348,    0.004032281,
+       0.037211012,    0.068537936,    -0.048005626,  -0.091520436,
+       -0.028379958,   -0.01556313,    0.06554592,    -0.045599163,
+       -0.01672207,    -0.020169014,   -0.011877351,  -0.20212261,
+       0.010889619,    0.0047078193,   0.038385306,   0.08540671,
+       -0.017140968,   -0.0035865551,  0.016678626,   0.005633034,
+       0.015963363,    0.00871737,     0.060130805,   0.028611384,
+       0.10109069,     -0.015060172,   -0.07894427,   0.06401885,
+       0.011584063,    -0.024466386,   0.0047652307,  -0.09041358,
+       0.030737216,    -0.0046374933,  0.14215417,    -0.11823516,
+       0.019899689,    0.006106124,    -0.027092824,  0.0786356,
+       0.05052217,     -0.058925,      -0.011402121,  -0.024987547,
+       -0.0013661642,  -0.06832946,    -0.015667673,  -0.1083353,
+       -0.00096863037, -0.06988685,    -0.053350925,  -0.027275559,
+       -0.033664223,   -0.07978348,    -0.025200296,  -0.017207067,
+       -0.058403496,   -0.055697463,   0.005798788,   0.12965427,
+       -0.062582195,   0.0013350133,   -0.10482091,   0.0379771,
+       0.072521195,    -0.0029455067,  -0.13797039,   -0.03628521,
+       0.013806405,    -0.017858358,   -0.01008298,   -0.07700066,
+       -0.017081132,   0.019358726,    0.0027079724,  0.004635139,
+       0.062634714,    -0.02338735,    -0.039547626,  -0.02050681,
+       0.03385117,     -0.083611414,   0.002862572,   -0.09421313,
+       0.058618143,    -0.08598433,    0.00972939,    0.023867095,
+       -0.053934585,   -0.023203006,   0.07452513,    -0.048767887,
+       -0.07314807,    -0.056307215,   -0.10433547,   -0.06440842,
+       0.04328182,     0.04389765,     -0.020006588,  -0.09076438,
+       -0.11652589,    -0.021705797,   0.03345259,    -0.010329105,
+       -0.025767034,   0.013057034,    -0.07316461,   -0.10145612,
+       0.06358255,     0.18531723,     0.07759293,    0.12006465,
+       0.1305557,      0.058638252,    -0.03393652,   0.09622831,
+       -0.16253184,    -2.4580743e-06, 0.079869635,   -0.070196845,
+       -0.005644518,   0.06857898,     -0.12598175,   -0.035084512,
+       0.03156317,     -0.12794146,    -0.031963028,  0.04692781,
+       0.030070418,    0.0071660685,   -0.095516115,  -0.004643372,
+       0.040170413,    -0.062104587,   -0.0037324072, 0.0554317,
+       0.08184801,     -0.019164372,   0.06791302,    0.034257166,
+       -0.10307039,    0.021943003,    0.046745934,   0.0790918,
+       -0.0265588,     -0.007824208,   0.042546265,   -0.00977924,
+       -0.0002440307,  -0.017384544,   -0.017990116,  0.12252321,
+       -0.014512694,   -0.08251313,    0.08861942,    0.13589665,
+       0.026351685,    0.012641483,    0.07466548,    0.044301085,
+       -0.045414884,   -0.051112458,   0.03444247,    -0.08502782,
+       -0.04106223,    -0.028126027,   0.028473156,   0.10467447});
+
+  lstm.SetRecurrentToForgetWeights(
+      {-0.057784554,  -0.026057621,  -0.068447545,   -0.022581743,
+       0.14811787,    0.10826372,    0.09471067,     0.03987225,
+       -0.0039523416, 0.00030638507, 0.053185795,    0.10572994,
+       0.08414449,    -0.022036452,  -0.00066928595, -0.09203576,
+       0.032950465,   -0.10985798,   -0.023809856,   0.0021431844,
+       -0.02196096,   -0.00326074,   0.00058621005,  -0.074678116,
+       -0.06193199,   0.055729095,   0.03736828,     0.020123724,
+       0.061878487,   -0.04729229,   0.034919553,    -0.07585433,
+       -0.04421272,   -0.044019096,  0.085488975,    0.04058006,
+       -0.06890133,   -0.030951202,  -0.024628663,   -0.07672815,
+       0.034293607,   0.08556707,    -0.05293577,    -0.033561368,
+       -0.04899627,   0.0241671,     0.015736353,    -0.095442444,
+       -0.029564252,  0.016493602,   -0.035026584,   0.022337519,
+       -0.026871363,  0.004780428,   0.0077918363,   -0.03601621,
+       0.016435321,   -0.03263031,   -0.09543275,    -0.047392778,
+       0.013454138,   0.028934088,   0.01685226,     -0.086110644,
+       -0.046250615,  -0.01847454,   0.047608484,    0.07339695,
+       0.034546845,   -0.04881143,   0.009128804,    -0.08802852,
+       0.03761666,    0.008096139,   -0.014454086,   0.014361001,
+       -0.023502491,  -0.0011840804, -0.07607001,    0.001856849,
+       -0.06509276,   -0.006021153,  -0.08570962,    -0.1451793,
+       0.060212336,   0.055259194,   0.06974018,     0.049454916,
+       -0.027794661,  -0.08077226,   -0.016179763,   0.1169753,
+       0.17213494,    -0.0056326236, -0.053934924,   -0.0124349,
+       -0.11520337,   0.05409887,    0.088759385,    0.0019655675,
+       0.0042065294,  0.03881498,    0.019844765,    0.041858196,
+       -0.05695512,   0.047233116,   0.038937137,    -0.06542224,
+       0.014429736,   -0.09719407,   0.13908425,     -0.05379757,
+       0.012321099,   0.082840554,   -0.029899208,   0.044217527,
+       0.059855383,   0.07711018,    -0.045319796,   0.0948846,
+       -0.011724666,  -0.0033288454, -0.033542685,   -0.04764985,
+       -0.13873616,   0.040668588,   0.034832682,    -0.015319203,
+       -0.018715994,  0.046002675,   0.0599172,      -0.043107376,
+       0.0294216,     -0.002314414,  -0.022424703,   0.0030315618,
+       0.0014641669,  0.0029166266,  -0.11878115,    0.013738511,
+       0.12375372,    -0.0006038222, 0.029104086,    0.087442465,
+       0.052958444,   0.07558703,    0.04817258,     0.044462286,
+       -0.015213451,  -0.08783778,   -0.0561384,     -0.003008196,
+       0.047060397,   -0.002058388,  0.03429439,     -0.018839769,
+       0.024734668,   0.024614193,   -0.042046934,   0.09597743,
+       -0.0043254104, 0.04320769,    0.0064070094,   -0.0019131786,
+       -0.02558259,   -0.022822596,  -0.023273505,   -0.02464396,
+       -0.10991725,   -0.006240552,  0.0074488563,   0.024044557,
+       0.04383914,    -0.046476185,  0.028658995,    0.060410924,
+       0.050786525,   0.009452605,   -0.0073054377,  -0.024810238,
+       0.0052906186,  0.0066939713,  -0.0020913032,  0.014515517,
+       0.015898481,   0.021362653,   -0.030262267,   0.016587038,
+       -0.011442813,  0.041154444,   -0.007631438,   -0.03423484,
+       -0.010977775,  0.036152758,   0.0066366293,   0.11915515,
+       0.02318443,    -0.041350313,  0.021485701,    -0.10906167,
+       -0.028218046,  -0.00954771,   0.020531068,    -0.11995105,
+       -0.03672871,   0.024019798,   0.014255957,    -0.05221243,
+       -0.00661567,   -0.04630967,   0.033188973,    0.10107534,
+       -0.014027541,  0.030796422,   -0.10270911,    -0.035999842,
+       0.15443139,    0.07684145,    0.036571592,    -0.035900835,
+       -0.0034699554, 0.06209149,    0.015920248,    -0.031122351,
+       -0.03858649,   0.01849943,    0.13872518,     0.01503974,
+       0.069941424,   -0.06948533,   -0.0088794185,  0.061282158,
+       -0.047401894,  0.03100163,    -0.041533746,   -0.10430945,
+       0.044574402,   -0.01425562,   -0.024290353,   0.034563623,
+       0.05866852,    0.023947537,   -0.09445152,    0.035450947,
+       0.02247216,    -0.0042998926, 0.061146557,    -0.10250651,
+       0.020881841,   -0.06747029,   0.10062043,     -0.0023941975,
+       0.03532124,    -0.016341697,  0.09685456,     -0.016764693,
+       0.051808182,   0.05875331,    -0.04536488,    0.001626336,
+       -0.028892258,  -0.01048663,   -0.009793449,   -0.017093895,
+       0.010987891,   0.02357273,    -0.00010856845, 0.0099760275,
+       -0.001845119,  -0.03551521,   0.0018358806,   0.05763657,
+       -0.01769146,   0.040995963,   0.02235177,     -0.060430344,
+       0.11475477,    -0.023854522,  0.10071741,     0.0686208,
+       -0.014250481,  0.034261297,   0.047418304,    0.08562733,
+       -0.030519066,  0.0060542435,  0.014653856,    -0.038836084,
+       0.04096551,    0.032249358,   -0.08355519,    -0.026823482,
+       0.056386515,   -0.010401743,  -0.028396193,   0.08507674,
+       0.014410365,   0.020995233,   0.17040324,     0.11511526,
+       0.02459721,    0.0066619175,  0.025853224,    -0.023133837,
+       -0.081302024,  0.017264642,   -0.009585969,   0.09491168,
+       -0.051313367,  0.054532815,   -0.014298593,   0.10657464,
+       0.007076659,   0.10964551,    0.0409152,      0.008275321,
+       -0.07283536,   0.07937492,    0.04192024,     -0.1075027});
+
+  lstm.SetRecurrentToCellWeights(
+      {-0.037322544,   0.018592842,   0.0056175636,  -0.06253426,
+       0.055647098,    -0.05713207,   -0.05626563,   0.005559383,
+       0.03375411,     -0.025757805,  -0.088049285,  0.06017052,
+       -0.06570978,    0.007384076,   0.035123326,   -0.07920549,
+       0.053676967,    0.044480428,   -0.07663568,   0.0071805613,
+       0.08089997,     0.05143358,    0.038261272,   0.03339287,
+       -0.027673481,   0.044746667,   0.028349208,   0.020090483,
+       -0.019443132,   -0.030755889,  -0.0040000007, 0.04465846,
+       -0.021585021,   0.0031670958,  0.0053199246,  -0.056117613,
+       -0.10893326,    0.076739706,   -0.08509834,   -0.027997585,
+       0.037871376,    0.01449768,    -0.09002357,   -0.06111149,
+       -0.046195522,   0.0422062,     -0.005683705,  -0.1253618,
+       -0.012925729,   -0.04890792,   0.06985068,    0.037654128,
+       0.03398274,     -0.004781977,  0.007032333,   -0.031787455,
+       0.010868644,    -0.031489216,  0.09525667,    0.013939797,
+       0.0058680447,   0.0167067,     0.02668468,    -0.04797466,
+       -0.048885044,   -0.12722108,   0.035304096,   0.06554885,
+       0.00972396,     -0.039238118,  -0.05159735,   -0.11329045,
+       0.1613692,      -0.03750952,   0.06529313,    -0.071974665,
+       -0.11769596,    0.015524369,   -0.0013754242, -0.12446318,
+       0.02786344,     -0.014179351,  0.005264273,   0.14376344,
+       0.015983658,    0.03406988,    -0.06939408,   0.040699873,
+       0.02111075,     0.09669095,    0.041345075,   -0.08316494,
+       -0.07684199,    -0.045768797,  0.032298047,   -0.041805092,
+       0.0119405,      0.0061010392,  0.12652606,    0.0064572375,
+       -0.024950314,   0.11574242,    0.04508852,    -0.04335324,
+       0.06760663,     -0.027437469,  0.07216407,    0.06977076,
+       -0.05438599,    0.034033038,   -0.028602652,  0.05346137,
+       0.043184172,    -0.037189785,  0.10420091,    0.00882477,
+       -0.054019816,   -0.074273005,  -0.030617684,  -0.0028467078,
+       0.024302477,    -0.0038869337, 0.005332455,   0.0013399826,
+       0.04361412,     -0.007001822,  0.09631092,    -0.06702025,
+       -0.042049985,   -0.035070654,  -0.04103342,   -0.10273396,
+       0.0544271,      0.037184782,   -0.13150354,   -0.0058036847,
+       -0.008264958,   0.042035464,   0.05891794,    0.029673764,
+       0.0063542654,   0.044788733,   0.054816857,   0.062257513,
+       -0.00093483756, 0.048938446,   -0.004952862,  -0.007730018,
+       -0.04043371,    -0.017094059,  0.07229206,    -0.023670016,
+       -0.052195564,   -0.025616996,  -0.01520939,   0.045104615,
+       -0.007376126,   0.003533447,   0.006570588,   0.056037236,
+       0.12436656,     0.051817212,   0.028532185,   -0.08686856,
+       0.11868599,     0.07663395,    -0.07323171,   0.03463402,
+       -0.050708205,   -0.04458982,   -0.11590894,   0.021273347,
+       0.1251325,      -0.15313013,   -0.12224372,   0.17228661,
+       0.023029093,    0.086124025,   0.006445803,   -0.03496501,
+       0.028332196,    0.04449512,    -0.042436164,  -0.026587414,
+       -0.006041347,   -0.09292539,   -0.05678812,   0.03897832,
+       0.09465633,     0.008115513,   -0.02171956,   0.08304309,
+       0.071401566,    0.019622514,   0.032163795,   -0.004167056,
+       0.02295182,     0.030739572,   0.056506045,   0.004612461,
+       0.06524936,     0.059999723,   0.046395954,   -0.0045512207,
+       -0.1335546,     -0.030136576,  0.11584653,    -0.014678886,
+       0.0020118146,   -0.09688814,   -0.0790206,    0.039770417,
+       -0.0329582,     0.07922767,    0.029322514,   0.026405897,
+       0.04207835,     -0.07073373,   0.063781224,   0.0859677,
+       -0.10925287,    -0.07011058,   0.048005477,   0.03438226,
+       -0.09606514,    -0.006669445,  -0.043381985,  0.04240257,
+       -0.06955775,    -0.06769346,   0.043903265,   -0.026784198,
+       -0.017840602,   0.024307009,   -0.040079936,  -0.019946516,
+       0.045318738,    -0.12233574,   0.026170589,   0.0074471775,
+       0.15978073,     0.10185836,    0.10298046,    -0.015476589,
+       -0.039390966,   -0.072174534,  0.0739445,     -0.1211869,
+       -0.0347889,     -0.07943156,   0.014809798,   -0.12412325,
+       -0.0030663363,  0.039695457,   0.0647603,     -0.08291318,
+       -0.018529687,   -0.004423833,  0.0037507233,  0.084633216,
+       -0.01514876,    -0.056505352,  -0.012800942,  -0.06994386,
+       0.012962922,    -0.031234352,  0.07029052,    0.016418684,
+       0.03618972,     0.055686004,   -0.08663945,   -0.017404709,
+       -0.054761406,   0.029065743,   0.052404847,   0.020238016,
+       0.0048197987,   -0.0214882,    0.07078733,    0.013016777,
+       0.06262858,     0.009184685,   0.020785125,   -0.043904778,
+       -0.0270329,     -0.03299152,   -0.060088247,  -0.015162964,
+       -0.001828936,   0.12642565,    -0.056757294,  0.013586685,
+       0.09232601,     -0.035886683,  0.06000002,    0.05229691,
+       -0.052580316,   -0.082029596,  -0.010794592,  0.012947712,
+       -0.036429964,   -0.085508935,  -0.13127148,   -0.017744139,
+       0.031502828,    0.036232427,   -0.031581745,  0.023051167,
+       -0.05325106,    -0.03421577,   0.028793324,   -0.034633752,
+       -0.009881397,   -0.043551125,  -0.018609839,  0.0019097115,
+       -0.008799762,   0.056595087,   0.0022273948,  0.055752404});
+
+  lstm.SetRecurrentToOutputWeights({
+      0.025825322,   -0.05813119,  0.09495884,   -0.045984812,   -0.01255415,
+      -0.0026479573, -0.08196161,  -0.054914974, -0.0046604523,  -0.029587349,
+      -0.044576716,  -0.07480124,  -0.082868785, 0.023254942,    0.027502948,
+      -0.0039728214, -0.08683098,  -0.08116779,  -0.014675607,   -0.037924774,
+      -0.023314456,  -0.007401714, -0.09255757,  0.029460307,    -0.08829125,
+      -0.005139627,  -0.08989442,  -0.0555066,   0.13596267,     -0.025062224,
+      -0.048351806,  -0.03850004,  0.07266485,   -0.022414139,   0.05940088,
+      0.075114764,   0.09597592,   -0.010211725, -0.0049794707,  -0.011523867,
+      -0.025980417,  0.072999895,  0.11091378,   -0.081685916,   0.014416728,
+      0.043229222,   0.034178585,  -0.07530371,  0.035837382,    -0.085607,
+      -0.007721233,  -0.03287832,  -0.043848954, -0.06404588,    -0.06632928,
+      -0.073643476,  0.008214239,  -0.045984086, 0.039764922,    0.03474462,
+      0.060612556,   -0.080590084, 0.049127717,  0.04151091,     -0.030063879,
+      0.008801774,   -0.023021035, -0.019558564, 0.05158114,     -0.010947698,
+      -0.011825728,  0.0075720972, 0.0699727,    -0.0039981045,  0.069350146,
+      0.08799282,    0.016156472,  0.035502106,  0.11695009,     0.006217345,
+      0.13392477,    -0.037875112, 0.025745004,  0.08940699,     -0.00924166,
+      0.0046702605,  -0.036598757, -0.08811812,  0.10522024,     -0.032441203,
+      0.008176899,   -0.04454919,  0.07058152,   0.0067963637,   0.039206743,
+      0.03259838,    0.03725492,   -0.09515802,  0.013326398,    -0.052055415,
+      -0.025676316,  0.03198509,   -0.015951829, -0.058556724,   0.036879618,
+      0.043357447,   0.028362012,  -0.05908629,  0.0059240665,   -0.04995891,
+      -0.019187413,  0.0276265,    -0.01628143,  0.0025863599,   0.08800015,
+      0.035250366,   -0.022165963, -0.07328642,  -0.009415526,   -0.07455109,
+      0.11690406,    0.0363299,    0.07411125,   0.042103454,    -0.009660886,
+      0.019076364,   0.018299393,  -0.046004917, 0.08891175,     0.0431396,
+      -0.026327137,  -0.051502608, 0.08979574,   -0.051670972,   0.04940282,
+      -0.07491107,   -0.021240504, 0.022596184,  -0.034280192,   0.060163025,
+      -0.058211457,  -0.051837247, -0.01349775,  -0.04639988,    -0.035936575,
+      -0.011681591,  0.064818054,  0.0073146066, -0.021745546,   -0.043124277,
+      -0.06471268,   -0.07053354,  -0.029321948, -0.05330136,    0.016933719,
+      -0.053782392,  0.13747959,   -0.1361751,   -0.11569455,    0.0033329215,
+      0.05693899,    -0.053219706, 0.063698,     0.07977434,     -0.07924483,
+      0.06936997,    0.0034815092, -0.007305279, -0.037325785,   -0.07251102,
+      -0.033633437,  -0.08677009,  0.091591336,  -0.14165086,    0.021752775,
+      0.019683983,   0.0011612234, -0.058154266, 0.049996935,    0.0288841,
+      -0.0024567875, -0.14345716,  0.010955264,  -0.10234828,    0.1183656,
+      -0.0010731248, -0.023590032, -0.072285876, -0.0724771,     -0.026382286,
+      -0.0014920527, 0.042667855,  0.0018776858, 0.02986552,     0.009814309,
+      0.0733756,     0.12289186,   0.018043943,  -0.0458958,     0.049412545,
+      0.033632483,   0.05495232,   0.036686596,  -0.013781798,   -0.010036754,
+      0.02576849,    -0.08307328,  0.010112348,  0.042521734,    -0.05869831,
+      -0.071689695,  0.03876447,   -0.13275425,  -0.0352966,     -0.023077697,
+      0.10285965,    0.084736146,  0.15568255,   -0.00040734606, 0.027835453,
+      -0.10292561,   -0.032401145, 0.10053256,   -0.026142767,   -0.08271222,
+      -0.0030240538, -0.016368777, 0.1070414,    0.042672627,    0.013456989,
+      -0.0437609,    -0.022309763, 0.11576483,   0.04108048,     0.061026827,
+      -0.0190714,    -0.0869359,   0.037901703,  0.0610107,      0.07202949,
+      0.01675338,    0.086139716,  -0.08795751,  -0.014898893,   -0.023771819,
+      -0.01965048,   0.007955471,  -0.043740474, 0.03346837,     -0.10549954,
+      0.090567775,   0.042013682,  -0.03176985,  0.12569028,     -0.02421228,
+      -0.029526481,  0.023851605,  0.031539805,  0.05292009,     -0.02344001,
+      -0.07811758,   -0.08834428,  0.10094801,   0.16594367,     -0.06861939,
+      -0.021256343,  -0.041093912, -0.06669611,  0.035498552,    0.021757556,
+      -0.09302526,   -0.015403468, -0.06614931,  -0.051798206,   -0.013874718,
+      0.03630673,    0.010412845,  -0.08077351,  0.046185967,    0.0035662893,
+      0.03541868,    -0.094149634, -0.034814864, 0.003128424,    -0.020674974,
+      -0.03944324,   -0.008110165, -0.11113267,  0.08484226,     0.043586485,
+      0.040582247,   0.0968012,    -0.065249965, -0.028036479,   0.0050708856,
+      0.0017462453,  0.0326779,    0.041296225,  0.09164146,     -0.047743853,
+      -0.015952192,  -0.034451712, 0.084197424,  -0.05347844,    -0.11768019,
+      0.085926116,   -0.08251791,  -0.045081906, 0.0948852,      0.068401024,
+      0.024856757,   0.06978981,   -0.057309967, -0.012775832,   -0.0032452994,
+      0.01977615,    -0.041040014, -0.024264973, 0.063464895,    0.05431621,
+  });
+
+  lstm.SetCellToInputWeights(
+      {0.040369894, 0.030746894,  0.24704495,  0.018586371,  -0.037586458,
+       -0.15312155, -0.11812848,  -0.11465643, 0.20259799,   0.11418174,
+       -0.10116027, -0.011334949, 0.12411352,  -0.076769054, -0.052169047,
+       0.21198851,  -0.38871562,  -0.09061183, -0.09683246,  -0.21929175});
+
+  lstm.SetCellToForgetWeights(
+      {-0.01998659,  -0.15568835,  -0.24248174,   -0.012770197, 0.041331276,
+       -0.072311886, -0.052123554, -0.0066330447, -0.043891653, 0.036225766,
+       -0.047248036, 0.021479502,  0.033189066,   0.11952997,   -0.020432774,
+       0.64658105,   -0.06650122,  -0.03467612,   0.095340036,  0.23647355});
+
+  lstm.SetCellToOutputWeights(
+      {0.08286371,  -0.08261836, -0.51210177, 0.002913762, 0.17764764,
+       -0.5495371,  -0.08460716, -0.24552552, 0.030037103, 0.04123544,
+       -0.11940523, 0.007358328, 0.1890978,   0.4833202,   -0.34441817,
+       0.36312827,  -0.26375428, 0.1457655,   -0.19724406, 0.15548733});
+
+  lstm.SetProjectionWeights(
+      {-0.009802181,  0.09401916,    0.0717386,     -0.13895074,  0.09641832,
+       0.060420845,   0.08539281,    0.054285463,   0.061395317,  0.034448683,
+       -0.042991187,  0.019801661,   -0.16840284,   -0.015726732, -0.23041931,
+       -0.024478018,  -0.10959692,   -0.013875541,  0.18600968,   -0.061274476,
+       0.0138165,     -0.08160894,   -0.07661644,   0.032372914,  0.16169067,
+       0.22465782,    -0.03993472,   -0.004017731,  0.08633481,   -0.28869787,
+       0.08682067,    0.17240396,    0.014975425,   0.056431185,  0.031037588,
+       0.16702051,    0.0077946745,  0.15140012,    0.29405436,   0.120285,
+       -0.188994,     -0.027265169,  0.043389652,   -0.022061434, 0.014777949,
+       -0.20203483,   0.094781205,   0.19100232,    0.13987629,   -0.036132768,
+       -0.06426278,   -0.05108664,   0.13221376,    0.009441198,  -0.16715929,
+       0.15859416,    -0.040437475,  0.050779544,   -0.022187516, 0.012166504,
+       0.027685808,   -0.07675938,   -0.0055694645, -0.09444123,  0.0046453946,
+       0.050794356,   0.10770313,    -0.20790008,   -0.07149004,  -0.11425117,
+       0.008225835,   -0.035802525,  0.14374903,    0.15262283,   0.048710253,
+       0.1847461,     -0.007487823,  0.11000021,    -0.09542012,  0.22619456,
+       -0.029149994,  0.08527916,    0.009043713,   0.0042746216, 0.016261552,
+       0.022461696,   0.12689082,    -0.043589946,  -0.12035478,  -0.08361797,
+       -0.050666027,  -0.1248618,    -0.1275799,    -0.071875185, 0.07377272,
+       0.09944291,    -0.18897448,   -0.1593054,    -0.06526116,  -0.040107165,
+       -0.004618631,  -0.067624845,  -0.007576253,  0.10727444,   0.041546922,
+       -0.20424393,   0.06907816,    0.050412357,   0.00724631,   0.039827548,
+       0.12449835,    0.10747581,    0.13708383,    0.09134148,   -0.12617786,
+       -0.06428341,   0.09956831,    0.1208086,     -0.14676677,  -0.0727722,
+       0.1126304,     0.010139365,   0.015571211,   -0.038128063, 0.022913318,
+       -0.042050496,  0.16842307,    -0.060597885,  0.10531834,   -0.06411776,
+       -0.07451711,   -0.03410368,   -0.13393489,   0.06534304,   0.003620307,
+       0.04490757,    0.05970546,    0.05197996,    0.02839995,   0.10434969,
+       -0.013699693,  -0.028353551,  -0.07260381,   0.047201227,  -0.024575593,
+       -0.036445823,  0.07155557,    0.009672501,   -0.02328883,  0.009533515,
+       -0.03606021,   -0.07421458,   -0.028082801,  -0.2678904,   -0.13221288,
+       0.18419984,    -0.13012612,   -0.014588381,  -0.035059117, -0.04824723,
+       0.07830115,    -0.056184657,  0.03277091,    0.025466874,  0.14494097,
+       -0.12522776,   -0.098633975,  -0.10766018,   -0.08317623,  0.08594209,
+       0.07749552,    0.039474737,   0.1776665,     -0.07409566,  -0.0477268,
+       0.29323658,    0.10801441,    0.1154011,     0.013952499,  0.10739139,
+       0.10708251,    -0.051456142,  0.0074137426,  -0.10430189,  0.10034707,
+       0.045594677,   0.0635285,     -0.0715442,    -0.089667566, -0.10811871,
+       0.00026344223, 0.08298446,    -0.009525053,  0.006585689,  -0.24567553,
+       -0.09450807,   0.09648481,    0.026996298,   -0.06419476,  -0.04752702,
+       -0.11063944,   -0.23441927,   -0.17608605,   -0.052156363, 0.067035615,
+       0.19271925,    -0.0032889997, -0.043264326,  0.09663576,   -0.057112187,
+       -0.10100678,   0.0628376,     0.04447668,    0.017961001,  -0.10094388,
+       -0.10190601,   0.18335468,    0.10494553,    -0.052095775, -0.0026118709,
+       0.10539724,    -0.04383912,   -0.042349473,  0.08438151,   -0.1947263,
+       0.02251204,    0.11216432,    -0.10307853,   0.17351969,   -0.039091777,
+       0.08066188,    -0.00561982,   0.12633002,    0.11335965,   -0.0088127935,
+       -0.019777594,  0.06864014,    -0.059751723,  0.016233567,  -0.06894641,
+       -0.28651384,   -0.004228674,  0.019708522,   -0.16305895,  -0.07468996,
+       -0.0855457,    0.099339016,   -0.07580735,   -0.13775392,  0.08434318,
+       0.08330512,    -0.12131499,   0.031935584,   0.09180414,   -0.08876437,
+       -0.08049874,   0.008753825,   0.03498998,    0.030215185,  0.03907079,
+       0.089751154,   0.029194152,   -0.03337423,   -0.019092513, 0.04331237,
+       0.04299654,    -0.036394123,  -0.12915532,   0.09793732,   0.07512415,
+       -0.11319543,   -0.032502122,  0.15661901,    0.07671967,   -0.005491124,
+       -0.19379048,   -0.218606,     0.21448623,    0.017840758,  0.1416943,
+       -0.07051762,   0.19488361,    0.02664691,    -0.18104725,  -0.09334311,
+       0.15026465,    -0.15493552,   -0.057762887,  -0.11604192,  -0.262013,
+       -0.01391798,   0.012185008,   0.11156489,    -0.07483202,  0.06693364,
+       -0.26151478,   0.046425626,   0.036540434,   -0.16435726,  0.17338543,
+       -0.21401681,   -0.11385144,   -0.08283257,   -0.069031075, 0.030635102,
+       0.010969227,   0.11109743,    0.010919218,   0.027526086,  0.13519906,
+       0.01891392,    -0.046839405,  -0.040167913,  0.017953383,  -0.09700955,
+       0.0061885654,  -0.07000971,   0.026893595,   -0.038844477, 0.14543656});
+
+  static float lstm_input[][20] = {
+      {// Batch0: 4 (input_sequence_size) * 5 (n_input)
+       0.787926, 0.151646, 0.071352, 0.118426, 0.458058, 0.596268, 0.998386,
+       0.568695, 0.864524, 0.571277, 0.073204, 0.296072, 0.743333, 0.069199,
+       0.045348, 0.867394, 0.291279, 0.013714, 0.482521, 0.626339},
+
+      {// Batch1: 4 (input_sequence_size) * 5 (n_input)
+       0.295743, 0.544053, 0.690064, 0.858138, 0.497181, 0.642421, 0.524260,
+       0.134799, 0.003639, 0.162482, 0.640394, 0.930399, 0.050782, 0.432485,
+       0.988078, 0.082922, 0.563329, 0.865614, 0.333232, 0.259916}};
+
+  static float lstm_fw_golden_output[][64] = {
+      {// Batch0: 4 (input_sequence_size) * 16 (n_output)
+       -0.00396806, 0.029352,     -0.00279226, 0.0159977,   -0.00835576,
+       -0.0211779,  0.0283512,    -0.0114597,  0.00907307,  -0.0244004,
+       -0.0152191,  -0.0259063,   0.00914318,  0.00415118,  0.017147,
+       0.0134203,   -0.0166936,   0.0381209,   0.000889694, 0.0143363,
+       -0.0328911,  -0.0234288,   0.0333051,   -0.012229,   0.0110322,
+       -0.0457725,  -0.000832209, -0.0202817,  0.0327257,   0.0121308,
+       0.0155969,   0.0312091,    -0.0213783,  0.0350169,   0.000324794,
+       0.0276012,   -0.0263374,   -0.0371449,  0.0446149,   -0.0205474,
+       0.0103729,   -0.0576349,   -0.0150052,  -0.0292043,  0.0376827,
+       0.0136115,   0.0243435,    0.0354492,   -0.0189322,  0.0464512,
+       -0.00251373, 0.0225745,    -0.0308346,  -0.0317124,  0.0460407,
+       -0.0189395,  0.0149363,    -0.0530162,  -0.0150767,  -0.0340193,
+       0.0286833,   0.00824207,   0.0264887,   0.0305169},
+      {// Batch1: 4 (input_sequence_size) * 16 (n_output)
+       -0.013869,    0.0287268,   -0.00334693, 0.00733398,  -0.0287926,
+       -0.0186926,   0.0193662,   -0.0115437,  0.00422612,  -0.0345232,
+       0.00223253,   -0.00957321, 0.0210624,   0.013331,    0.0150954,
+       0.02168,      -0.0141913,  0.0322082,   0.00227024,  0.0260507,
+       -0.0188721,   -0.0296489,  0.0399134,   -0.0160509,  0.0116039,
+       -0.0447318,   -0.0150515,  -0.0277406,  0.0316596,   0.0118233,
+       0.0214762,    0.0293641,   -0.0204549,  0.0450315,   -0.00117378,
+       0.0167673,    -0.0375007,  -0.0238314,  0.038784,    -0.0174034,
+       0.0131743,    -0.0506589,  -0.0048447,  -0.0240239,  0.0325789,
+       0.00790065,   0.0220157,   0.0333314,   -0.0264787,  0.0387855,
+       -0.000764675, 0.0217599,   -0.037537,   -0.0335206,  0.0431679,
+       -0.0211424,   0.010203,    -0.062785,   -0.00832363, -0.025181,
+       0.0412031,    0.0118723,   0.0239643,   0.0394009}};
+
+  static float lstm_combined_golden_output[][64] = {
+      {-0.022014, 0.073544,  -0.002235, 0.040068,  -0.037136, -0.052788,
+       0.075325,  -0.029378, 0.024298,  -0.07733,  -0.030674, -0.060229,
+       0.040599,  0.011608,  0.042005,  0.045977,  -0.039225, 0.076294,
+       0.000735,  0.032852,  -0.069869, -0.053312, 0.073527,  -0.028136,
+       0.021585,  -0.102679, -0.004327, -0.043304, 0.072861,  0.027077,
+       0.034558,  0.068292,  -0.036292, 0.069832,  -0.003032, 0.053829,
+       -0.043821, -0.072713, 0.085029,  -0.040374, 0.020014,  -0.104521,
+       -0.034504, -0.059759, 0.062569,  0.025652,  0.049306,  0.061189,
+       -0.025146, 0.079643,  -0.005188, 0.033080,  -0.048079, -0.048082,
+       0.069369,  -0.028900, 0.024572,  -0.077547, -0.022517, -0.054477,
+       0.038857,  0.013336,  0.043234,  0.044788},
+      {-0.039186, 0.070792,  -0.005913, 0.02642,   -0.068274, -0.05022,
+       0.061444,  -0.031241, 0.014996,  -0.094544, -0.004146, -0.03464,
+       0.058981,  0.026097,  0.039781,  0.058408,  -0.031887, 0.069252,
+       0.00576,   0.054062,  -0.042801, -0.059974, 0.085272,  -0.034453,
+       0.026097,  -0.0959,   -0.031164, -0.058699, 0.06839,   0.020512,
+       0.044727,  0.063609,  -0.039863, 0.084819,  -0.003909, 0.028666,
+       -0.075677, -0.045125, 0.070379,  -0.033895, 0.022111,  -0.097184,
+       -0.004921, -0.040851, 0.062316,  0.017435,  0.041437,  0.064568,
+       -0.039656, 0.060726,  -0.003402, 0.036854,  -0.056503, -0.058554,
+       0.068588,  -0.034879, 0.01352,   -0.09962,  -0.01434,  -0.039505,
+       0.065133,  0.024321,  0.038473,  0.062438}};
+
+  const int input_sequence_size = lstm.sequence_length() * lstm.num_inputs();
+  EXPECT_EQ(input_sequence_size, 20);
+  float* batch0_start = lstm_input[0];
+  float* batch0_end = batch0_start + input_sequence_size;
+  lstm.SetInput(0, batch0_start, batch0_end);
+
+  float* batch1_start = lstm_input[1];
+  float* batch1_end = batch1_start + input_sequence_size;
+  lstm.SetInput(input_sequence_size, batch1_start, batch1_end);
+
+  lstm.Invoke();
+
+  const int output_sequence_size =
+      lstm.sequence_length() * lstm.num_fw_outputs();
+  EXPECT_EQ(output_sequence_size, 64);
+  std::vector<float> expected;
+  const float* golden_start_batch0 = lstm_fw_golden_output[0];
+  const float* golden_end_batch0 = golden_start_batch0 + output_sequence_size;
+  expected.insert(expected.end(), golden_start_batch0, golden_end_batch0);
+
+  const float* golden_start_batch1 = lstm_fw_golden_output[1];
+  const float* golden_end_batch1 = golden_start_batch1 + output_sequence_size;
+  expected.insert(expected.end(), golden_start_batch1, golden_end_batch1);
+  EXPECT_THAT(lstm.GetFwOutput(), ElementsAreArray(ArrayFloatNear(expected)));
+
+  // Check if the sum of forward backward matches the golden.
+  expected.clear();
+  golden_start_batch0 = lstm_combined_golden_output[0];
+  golden_end_batch0 = golden_start_batch0 + output_sequence_size;
+  expected.insert(expected.end(), golden_start_batch0, golden_end_batch0);
+
+  golden_start_batch1 = lstm_combined_golden_output[1];
+  golden_end_batch1 = golden_start_batch1 + output_sequence_size;
+  expected.insert(expected.end(), golden_start_batch1, golden_end_batch1);
+
+  std::vector<float> combined;
+  for (int i = 0; i < lstm.GetFwOutput().size(); ++i) {
+    combined.push_back(lstm.GetFwOutput()[i] + lstm.GetBwOutput()[i]);
+  }
+  EXPECT_THAT(combined, ElementsAreArray(ArrayFloatNear(expected)));
+}
+
 }  // namespace
 }  // namespace tflite
 
diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc
index 0c6a462d29..50b2bca7b5 100644
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/kernels/lstm_eval.h"
 
-#include <stdint.h>
+#include <cstdint>
 
 #include "tensorflow/lite/kernels/internal/kernel_utils.h"
 #include "tensorflow/lite/kernels/internal/tensor_utils.h"
@@ -794,7 +794,7 @@ TfLiteStatus EvalFloat(
       // If this is the forward_sequence, step forward, otherwise step
       // backwards.
       const int t_rel = forward_sequence ? t : max_time - t - 1;
-      const float* input_ptr = input->data.f + t_rel * input_step;
+      const float* input_ptr_batch = input->data.f + t_rel * input_step;
       if (aux_input) {
         aux_input_ptr = aux_input->data.f + t_rel * input_step;
       }
@@ -802,7 +802,7 @@ TfLiteStatus EvalFloat(
           output->data.f + t_rel * output_step + output_offset;
 
       LstmStepWithAuxInput(
-          input_ptr, input_to_input_weights_ptr,
+          input_ptr_batch, input_to_input_weights_ptr,
           input_to_forget_weights->data.f, input_to_cell_weights->data.f,
           input_to_output_weights->data.f, aux_input_ptr,
           aux_input_to_input_weights_ptr, aux_input_to_forget_weights_ptr,
@@ -826,12 +826,24 @@ TfLiteStatus EvalFloat(
         // If this is the forward_sequence, step forward, otherwise step
         // backwards.
         const int t_rel = forward_sequence ? t : max_time - t - 1;
-        const float* input_ptr = input->data.f + t_rel * input_step;
+        const int time_offset = b * max_time + t_rel;
+        const float* input_ptr = input->data.f + time_offset * input_step;
         if (aux_input) {
-          aux_input_ptr = aux_input->data.f + t_rel * input_step;
+          aux_input_ptr = aux_input->data.f + time_offset * input_step;
         }
-        float* output_ptr_time =
-            output->data.f + t_rel * output_step + output_offset;
+        float* output_ptr =
+            output->data.f + time_offset * output_step + output_offset;
+
+        // Offset the {activation,cell}_state pointers to the right batch.
+        float* activation_state_ptr =
+            activation_state->data.f + b * output_batch_leading_dim;
+        float* cell_state_ptr = cell_state->data.f + b * n_cell;
+        // Offset the scratch pointers to the right batch.
+        float* input_gate_scratch_ptr =
+            input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+        float* forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
+        float* cell_scratch_ptr = cell_scratch + b * n_cell;
+        float* output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
 
         LstmStepWithAuxInput(
             input_ptr, input_to_input_weights_ptr,
@@ -847,9 +859,9 @@ TfLiteStatus EvalFloat(
             output_gate_bias->data.f, projection_weights_ptr,
             projection_bias_ptr, params, /*n_batch=*/1, n_cell, n_input,
             aux_input_size, n_output, output_batch_leading_dim,
-            activation_state->data.f, cell_state->data.f, input_gate_scratch,
-            forget_gate_scratch, cell_scratch, output_gate_scratch,
-            output_ptr_time);
+            activation_state_ptr, cell_state_ptr, input_gate_scratch_ptr,
+            forget_gate_scratch_ptr, cell_scratch_ptr, output_gate_scratch_ptr,
+            output_ptr);
       }
     }
   }
@@ -991,9 +1003,6 @@ TfLiteStatus EvalHybrid(
   const float* cell_bias_ptr = cell_bias->data.f;
   const float* output_gate_bias_ptr = output_gate_bias->data.f;
 
-  float* output_state_ptr = output_state->data.f;
-  float* cell_state_ptr = cell_state->data.f;
-
   // Temporary storage for quantized values and scaling factors.
   int8_t* quantized_input_ptr =
       reinterpret_cast<int8_t*>(input_quantized->data.uint8);
@@ -1051,38 +1060,40 @@ TfLiteStatus EvalHybrid(
       // If this is the forward_sequence, step forward, otherwise step
       // backwards.
       const int t_rel = forward_sequence ? t : max_time - t - 1;
-      const float* input_ptr = input->data.f + t_rel * input_step;
+      const float* input_ptr_batch = input->data.f + t_rel * input_step;
       if (aux_input) {
         aux_input_ptr = aux_input->data.f + t_rel * input_step;
       }
-      float* output_ptr = output->data.f + t_rel * output_step + output_offset;
+      float* output_ptr_batch =
+          output->data.f + t_rel * output_step + output_offset;
 
       LstmStepWithAuxInput(
-          input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
-          input_to_forget_weights_ptr, input_to_forget_weights_scale,
-          input_to_cell_weights_ptr, input_to_cell_weights_scale,
-          input_to_output_weights_ptr, input_to_output_weights_scale,
-          aux_input_ptr, aux_input_to_input_weights_ptr,
-          aux_input_to_input_weights_scale, aux_input_to_forget_weights_ptr,
-          aux_input_to_forget_weights_scale, aux_input_to_cell_weights_ptr,
-          aux_input_to_cell_weights_scale, aux_input_to_output_weights_ptr,
-          aux_input_to_output_weights_scale, recurrent_to_input_weights_ptr,
-          recurrent_to_input_weights_scale, recurrent_to_forget_weights_ptr,
-          recurrent_to_forget_weights_scale, recurrent_to_cell_weights_ptr,
-          recurrent_to_cell_weights_scale, recurrent_to_output_weights_ptr,
-          recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
-          cell_to_input_weights_scale, cell_to_forget_weights_ptr,
-          cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-          cell_to_output_weights_scale, input_gate_bias_ptr,
-          forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
-          projection_weights_ptr, projection_weights_scale, projection_bias_ptr,
-          params, n_batch, n_cell, n_input, aux_input_size, n_output,
-          output_batch_leading_dim, input_gate_scratch, forget_gate_scratch,
-          cell_scratch, output_gate_scratch, scaling_factors_ptr,
-          prod_scaling_factors_ptr, recovered_cell_weights_ptr,
-          quantized_input_ptr, quantized_aux_input_ptr,
-          quantized_output_state_ptr, quantized_cell_state_ptr,
-          output_state_ptr, cell_state_ptr, output_ptr);
+          input_ptr_batch, input_to_input_weights_ptr,
+          input_to_input_weights_scale, input_to_forget_weights_ptr,
+          input_to_forget_weights_scale, input_to_cell_weights_ptr,
+          input_to_cell_weights_scale, input_to_output_weights_ptr,
+          input_to_output_weights_scale, aux_input_ptr,
+          aux_input_to_input_weights_ptr, aux_input_to_input_weights_scale,
+          aux_input_to_forget_weights_ptr, aux_input_to_forget_weights_scale,
+          aux_input_to_cell_weights_ptr, aux_input_to_cell_weights_scale,
+          aux_input_to_output_weights_ptr, aux_input_to_output_weights_scale,
+          recurrent_to_input_weights_ptr, recurrent_to_input_weights_scale,
+          recurrent_to_forget_weights_ptr, recurrent_to_forget_weights_scale,
+          recurrent_to_cell_weights_ptr, recurrent_to_cell_weights_scale,
+          recurrent_to_output_weights_ptr, recurrent_to_output_weights_scale,
+          cell_to_input_weights_ptr, cell_to_input_weights_scale,
+          cell_to_forget_weights_ptr, cell_to_forget_weights_scale,
+          cell_to_output_weights_ptr, cell_to_output_weights_scale,
+          input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
+          output_gate_bias_ptr, projection_weights_ptr,
+          projection_weights_scale, projection_bias_ptr, params, n_batch,
+          n_cell, n_input, aux_input_size, n_output, output_batch_leading_dim,
+          input_gate_scratch, forget_gate_scratch, cell_scratch,
+          output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
+          recovered_cell_weights_ptr, quantized_input_ptr,
+          quantized_aux_input_ptr, quantized_output_state_ptr,
+          quantized_cell_state_ptr, output_state->data.f, cell_state->data.f,
+          output_ptr_batch);
     }
   } else {
     for (int b = 0; b < n_batch; b++) {
@@ -1092,12 +1103,24 @@ TfLiteStatus EvalHybrid(
         // If this is the forward_sequence, step forward, otherwise step
         // backwards.
         const int t_rel = forward_sequence ? t : max_time - t - 1;
-        const float* input_ptr = input->data.f + t_rel * input_step;
+        const int time_offset = b * max_time + t_rel;
+        const float* input_ptr = input->data.f + time_offset * input_step;
         if (aux_input) {
-          aux_input_ptr = aux_input->data.f + t_rel * input_step;
+          aux_input_ptr = aux_input->data.f + time_offset * input_step;
         }
         float* output_ptr =
-            output->data.f + t_rel * output_step + output_offset;
+            output->data.f + time_offset * output_step + output_offset;
+
+        // Offset the {output,cell}_state pointers to the right batch.
+        float* output_state_ptr =
+            output_state->data.f + b * output_batch_leading_dim;
+        float* cell_state_ptr = cell_state->data.f + b * n_cell;
+        // Offset the scratch pointers to the right batch.
+        float* input_gate_scratch_ptr =
+            input_gate_scratch ? input_gate_scratch + b * n_cell : nullptr;
+        float* forget_gate_scratch_ptr = forget_gate_scratch + b * n_cell;
+        float* cell_scratch_ptr = cell_scratch + b * n_cell;
+        float* output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
 
         LstmStepWithAuxInput(
             input_ptr, input_to_input_weights_ptr, input_to_input_weights_scale,
@@ -1118,10 +1141,11 @@ TfLiteStatus EvalHybrid(
             cell_to_output_weights_scale, input_gate_bias_ptr,
             forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
             projection_weights_ptr, projection_weights_scale,
-            projection_bias_ptr, params, /*n_batch=*/1, n_cell, n_input,
-            aux_input_size, n_output, output_batch_leading_dim,
-            input_gate_scratch, forget_gate_scratch, cell_scratch,
-            output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr,
+            projection_bias_ptr, params,
+            /*n_batch=*/1, n_cell, n_input, aux_input_size, n_output,
+            output_batch_leading_dim, input_gate_scratch_ptr,
+            forget_gate_scratch_ptr, cell_scratch_ptr, output_gate_scratch_ptr,
+            scaling_factors_ptr, prod_scaling_factors_ptr,
             recovered_cell_weights_ptr, quantized_input_ptr,
             quantized_aux_input_ptr, quantized_output_state_ptr,
             quantized_cell_state_ptr, output_state_ptr, cell_state_ptr,
diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc
index c0e6f6994f..d5219bfa83 100644
--- a/tensorflow/lite/kernels/register.cc
+++ b/tensorflow/lite/kernels/register.cc
@@ -197,7 +197,8 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_LSTM, Register_LSTM(), /* min_version */ 1,
              /* max_version */ 2);
   AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
-             Register_BIDIRECTIONAL_SEQUENCE_LSTM());
+             Register_BIDIRECTIONAL_SEQUENCE_LSTM(), /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
              Register_UNIDIRECTIONAL_SEQUENCE_LSTM());
   AddBuiltin(BuiltinOperator_PAD, Register_PAD());
diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs
index 980f13b19b..91d8049301 100644
--- a/tensorflow/lite/schema/schema.fbs
+++ b/tensorflow/lite/schema/schema.fbs
@@ -443,12 +443,19 @@ table UnidirectionalSequenceLSTMOptions {
 }
 
 table BidirectionalSequenceLSTMOptions {
+  // Parameters supported by version 1:
   fused_activation_function:ActivationFunctionType;
   cell_clip: float; // Optional, 0.0 means no clipping
   proj_clip: float; // Optional, 0.0 means no clipping
 
   // If true, store the outputs of both directions into the first output.
   merge_outputs: bool;
+
+  // Parameters supported by version 2:
+  // If true then first dimension is sequence, otherwise batch.
+  // Version 1 implementations assumed time_major to be true, so this default
+  // value should never change.
+  time_major: bool = true;
 }
 
 table ResizeBilinearOptions {
diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h
index 637cbafabd..0883cce497 100755
--- a/tensorflow/lite/schema/schema_generated.h
+++ b/tensorflow/lite/schema/schema_generated.h
@@ -3963,11 +3963,13 @@ struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable {
   float cell_clip;
   float proj_clip;
   bool merge_outputs;
+  bool time_major;
   BidirectionalSequenceLSTMOptionsT()
       : fused_activation_function(ActivationFunctionType_NONE),
         cell_clip(0.0f),
         proj_clip(0.0f),
-        merge_outputs(false) {
+        merge_outputs(false),
+        time_major(true) {
   }
 };
 
@@ -3977,7 +3979,8 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
     VT_FUSED_ACTIVATION_FUNCTION = 4,
     VT_CELL_CLIP = 6,
     VT_PROJ_CLIP = 8,
-    VT_MERGE_OUTPUTS = 10
+    VT_MERGE_OUTPUTS = 10,
+    VT_TIME_MAJOR = 12
   };
   ActivationFunctionType fused_activation_function() const {
     return static_cast<ActivationFunctionType>(GetField<int8_t>(VT_FUSED_ACTIVATION_FUNCTION, 0));
@@ -3991,12 +3994,16 @@ struct BidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbu
   bool merge_outputs() const {
     return GetField<uint8_t>(VT_MERGE_OUTPUTS, 0) != 0;
   }
+  bool time_major() const {
+    return GetField<uint8_t>(VT_TIME_MAJOR, 1) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<int8_t>(verifier, VT_FUSED_ACTIVATION_FUNCTION) &&
            VerifyField<float>(verifier, VT_CELL_CLIP) &&
            VerifyField<float>(verifier, VT_PROJ_CLIP) &&
            VerifyField<uint8_t>(verifier, VT_MERGE_OUTPUTS) &&
+           VerifyField<uint8_t>(verifier, VT_TIME_MAJOR) &&
            verifier.EndTable();
   }
   BidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -4019,6 +4026,9 @@ struct BidirectionalSequenceLSTMOptionsBuilder {
   void add_merge_outputs(bool merge_outputs) {
     fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_MERGE_OUTPUTS, static_cast<uint8_t>(merge_outputs), 0);
   }
+  void add_time_major(bool time_major) {
+    fbb_.AddElement<uint8_t>(BidirectionalSequenceLSTMOptions::VT_TIME_MAJOR, static_cast<uint8_t>(time_major), 1);
+  }
   explicit BidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -4036,10 +4046,12 @@ inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectional
     ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE,
     float cell_clip = 0.0f,
     float proj_clip = 0.0f,
-    bool merge_outputs = false) {
+    bool merge_outputs = false,
+    bool time_major = true) {
   BidirectionalSequenceLSTMOptionsBuilder builder_(_fbb);
   builder_.add_proj_clip(proj_clip);
   builder_.add_cell_clip(cell_clip);
+  builder_.add_time_major(time_major);
   builder_.add_merge_outputs(merge_outputs);
   builder_.add_fused_activation_function(fused_activation_function);
   return builder_.Finish();
@@ -8816,6 +8828,7 @@ inline void BidirectionalSequenceLSTMOptions::UnPackTo(BidirectionalSequenceLSTM
   { auto _e = cell_clip(); _o->cell_clip = _e; };
   { auto _e = proj_clip(); _o->proj_clip = _e; };
   { auto _e = merge_outputs(); _o->merge_outputs = _e; };
+  { auto _e = time_major(); _o->time_major = _e; };
 }
 
 inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> BidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -8830,12 +8843,14 @@ inline flatbuffers::Offset<BidirectionalSequenceLSTMOptions> CreateBidirectional
   auto _cell_clip = _o->cell_clip;
   auto _proj_clip = _o->proj_clip;
   auto _merge_outputs = _o->merge_outputs;
+  auto _time_major = _o->time_major;
   return tflite::CreateBidirectionalSequenceLSTMOptions(
       _fbb,
       _fused_activation_function,
       _cell_clip,
       _proj_clip,
-      _merge_outputs);
+      _merge_outputs,
+      _time_major);
 }
 
 inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-- 
GitLab


From ae57bc54d1f47053ae6a54b252160e1e38b93a54 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 18 Dec 2018 13:41:53 -0800
Subject: [PATCH 767/873] [TF:XLA] Bump open source llvm revision to r349508

PiperOrigin-RevId: 226051818
---
 tensorflow/workspace.bzl                  | 8 ++++----
 third_party/llvm/llvm.autogenerated.BUILD | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 566f705ce2..700aa065b1 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -498,11 +498,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
-        sha256 = "55769c91b9f5b5255d58a1ecd88e690a4e192dc8cbdf8f984596649abe3b5433",
-        strip_prefix = "llvm-2ba3294845dedcbb27dc49287bfbcdb49aa1e6b7",
+        sha256 = "65b48c80eba736ab834a9790b78a72cd0e3919b6dace44a96259d3e6936624ec",
+        strip_prefix = "llvm-cfa2cf74cd9ba0e759974ce11bfd7b9e051dd8ff",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/2ba3294845dedcbb27dc49287bfbcdb49aa1e6b7.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/2ba3294845dedcbb27dc49287bfbcdb49aa1e6b7.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/cfa2cf74cd9ba0e759974ce11bfd7b9e051dd8ff.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/cfa2cf74cd9ba0e759974ce11bfd7b9e051dd8ff.tar.gz",
         ],
     )
 
diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD
index 6599b9e91b..f0ee086a7e 100644
--- a/third_party/llvm/llvm.autogenerated.BUILD
+++ b/third_party/llvm/llvm.autogenerated.BUILD
@@ -646,6 +646,7 @@ cc_library(
         ":amdgpu_asm_printer",
         ":amdgpu_info",
         ":amdgpu_utils",
+        ":binary_format",
         ":config",
         ":core",
         ":mc",
@@ -793,6 +794,7 @@ cc_library(
         ":amdgpu_utils",
         ":analysis",
         ":asm_printer",
+        ":binary_format",
         ":code_gen",
         ":config",
         ":core",
-- 
GitLab


From 6cab8a04ce99c116e521ad91b358574415c9af6a Mon Sep 17 00:00:00 2001
From: Suyog Gupta <suyoggupta@google.com>
Date: Tue, 18 Dec 2018 13:42:44 -0800
Subject: [PATCH 768/873] Use top_k for threshold computation instead of
 histogram/cdf Deprecate the nbins hyperparamter

PiperOrigin-RevId: 226051944
---
 tensorflow/contrib/model_pruning/README.md    |   1 -
 .../contrib/model_pruning/python/pruning.py   |  28 ++--
 .../model_pruning/python/pruning_test.py      |   4 +-
 .../model_pruning/python/pruning_utils.py     | 126 ------------------
 .../python/pruning_utils_test.py              |  55 --------
 5 files changed, 15 insertions(+), 199 deletions(-)

diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md
index 45a60d7948..710a262f33 100644
--- a/tensorflow/contrib/model_pruning/README.md
+++ b/tensorflow/contrib/model_pruning/README.md
@@ -53,7 +53,6 @@ The pruning library allows for specification of the following hyper parameters:
 | weight_sparsity_map | list of strings | [""] | list of weight variable name (or layer name):target sparsity pairs. Eg. [conv1:0.9,conv2/kernel:0.8]. For layers/weights not in this list, sparsity as specified by the target_sparsity hyperparameter is used. |
 | threshold_decay | float | 0.0 | The decay factor to use for exponential decay of the thresholds |
 | pruning_frequency | integer | 10 | How often should the masks be updated? (in # of global_steps) |
-| nbins | integer | 256 | Number of bins to use for histogram computation. Note: When running on TPUs, a large (>1024) value for `nbins` may adversely affect the training time. |
 | block_height|integer | 1 | Number of rows in a block for block sparse matrices|
 | block_width |integer | 1 | Number of cols in a block for block sparse matrices|
 | block_pooling_function| string | AVG | The function to use to pool weight values in a block: average (AVG) or max (MAX)|
diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py
index 43ea66ac5a..9966f7cf79 100644
--- a/tensorflow/contrib/model_pruning/python/pruning.py
+++ b/tensorflow/contrib/model_pruning/python/pruning.py
@@ -397,28 +397,26 @@ class Pruning(object):
       raise ValueError('Sparsity variable undefined')
 
     sparsity = self._get_sparsity(weights.op.name)
-
     with ops.name_scope(weights.op.name + '_pruning_ops'):
       abs_weights = math_ops.abs(weights)
-      max_value = math_ops.reduce_max(abs_weights)
-      cdf_fn = pruning_utils.compute_cdf_from_histogram
-      if self._spec.use_tpu:
-        cdf_fn = pruning_utils.compute_cdf
-
-      norm_cdf = cdf_fn(abs_weights, [0.0, max_value], nbins=self._spec.nbins)
-      current_threshold = math_ops.multiply(
-          math_ops.div(
-              math_ops.reduce_sum(
-                  math_ops.cast(
-                      math_ops.less(norm_cdf, sparsity), dtypes.float32)),
-              float(self._spec.nbins)), max_value)
-
+      k = math_ops.cast(
+          math_ops.round(
+              math_ops.cast(array_ops.size(abs_weights), dtypes.float32) *
+              (1 - sparsity)), dtypes.int32)
+      # Sort the entire array
+      values, _ = nn_ops.top_k(
+          array_ops.reshape(abs_weights, [-1]), k=array_ops.size(abs_weights))
+      # Grab the (k-1) th value
+      current_threshold = array_ops.gather(values, k - 1)
       smoothed_threshold = math_ops.add_n([
           math_ops.multiply(current_threshold, 1 - self._spec.threshold_decay),
           math_ops.multiply(threshold, self._spec.threshold_decay)
       ])
+
       new_mask = math_ops.cast(
-          math_ops.greater(abs_weights, smoothed_threshold), dtypes.float32)
+          math_ops.greater_equal(abs_weights, smoothed_threshold),
+          dtypes.float32)
+
     return smoothed_threshold, new_mask
 
   def _maybe_update_block_mask(self, weights, threshold):
diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py
index 1b6da5ce2b..835614d882 100644
--- a/tensorflow/contrib/model_pruning/python/pruning_test.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_test.py
@@ -102,7 +102,7 @@ class PruningTest(test.TestCase):
       weights = variables.VariableV1(
           math_ops.linspace(1.0, 100.0, 100), name="weights")
       masked_weights = pruning.apply_mask(weights)
-      sparsity = variables.VariableV1(0.5, name="sparsity")
+      sparsity = variables.VariableV1(0.95, name="sparsity")
       p = pruning.Pruning(sparsity=sparsity)
       p._spec.threshold_decay = 0.0
       mask_update_op = p.mask_update_op()
@@ -111,7 +111,7 @@ class PruningTest(test.TestCase):
       self.assertAllEqual(np.count_nonzero(masked_weights_val), 100)
       session.run(mask_update_op)
       masked_weights_val = masked_weights.eval()
-      self.assertAllEqual(np.count_nonzero(masked_weights_val), 50)
+      self.assertAllEqual(np.count_nonzero(masked_weights_val), 5)
 
   def _blockMasking(self, hparams, weights, expected_mask):
 
diff --git a/tensorflow/contrib/model_pruning/python/pruning_utils.py b/tensorflow/contrib/model_pruning/python/pruning_utils.py
index 14fc51229a..8f2ba03646 100644
--- a/tensorflow/contrib/model_pruning/python/pruning_utils.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_utils.py
@@ -25,16 +25,12 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 
-_NBINS = 256
-
 
 def weight_mask_variable(var, scope):
   """Create a mask for the weights.
@@ -165,128 +161,6 @@ def expand_tensor(tensor, block_dims):
   return expanded_tensor
 
 
-def _histogram(values, value_range, nbins=100, dtype=dtypes.int32, name=None):
-  """Return histogram of values.
-
-  Given the tensor `values`, this operation returns a rank 1 histogram counting
-  the number of entries in `values` that fell into every bin.  The bins are
-  equal width and determined by the arguments `value_range` and `nbins`.
-
-  Args:
-    values:  Numeric `Tensor`.
-    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
-      values <= value_range[0] will be mapped to hist[0],
-      values >= value_range[1] will be mapped to hist[-1].
-    nbins:  Scalar `int32 Tensor`.  Number of histogram bins.
-    dtype:  dtype for returned histogram.
-    name:  A name for this operation (defaults to 'histogram').
-
-  Returns:
-    A 1-D `Tensor` holding histogram of values.
-
-  """
-  with ops.name_scope(name, 'histogram', [values, value_range, nbins]) as scope:
-    values = ops.convert_to_tensor(values, name='values')
-    values = array_ops.reshape(values, [-1])
-    nbins_float = np.float32(nbins)
-
-    # Map tensor values that fall within value_range to [0, 1].
-    scaled_values = math_ops.truediv(
-        values - value_range[0],
-        value_range[1] - value_range[0],
-        name='scaled_values')
-
-    # map tensor values within the open interval value_range to {0,.., nbins-1},
-    # values outside the open interval will be zero or less, or nbins or more.
-    indices = math_ops.floor(nbins_float * scaled_values, name='indices')
-
-    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
-    indices = math_ops.cast(
-        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
-
-    return math_ops.unsorted_segment_sum(
-        array_ops.ones_like(indices, dtype=dtype), indices, nbins, name=scope)
-
-
-def compute_cdf_from_histogram(values, value_range, **kwargs):
-  """Returns the normalized cumulative distribution of the given values tensor.
-
-  Computes the histogram and uses tf.cumsum to arrive at cdf
-
-  Args:
-    values:  Numeric `Tensor`.
-    value_range:  Shape [2] `Tensor` of same `dtype` as `values`.
-    **kwargs: keyword arguments: nbins, name
-
-  Returns:
-    A 1-D `Tensor` holding normalized cdf of values.
-
-  """
-  nbins = kwargs.get('nbins', _NBINS)
-  name = kwargs.get('name', None)
-  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
-    histogram = _histogram(
-        values, value_range, dtype=dtypes.float32, nbins=nbins)
-    cdf = math_ops.cumsum(histogram)
-    return math_ops.div(cdf, math_ops.reduce_max(cdf))
-
-
-def compute_cdf(values, value_range, **kwargs):
-  """Returns the normalized cumulative distribution of the given values tensor.
-
-  Uses tf.while_loop to directly compute the cdf of the values.
-
-  Args:
-    values:  Numeric `Tensor`.
-    value_range:  Shape [2] `Tensor` of same `dtype` as `values`
-    **kwargs: keyword arguments: nbins, name
-
-  Returns:
-    A 1-D `Tensor` holding normalized cdf of values.
-
-  """
-  nbins = kwargs.get('nbins', _NBINS)
-  name = kwargs.get('name', None)
-  with ops.name_scope(name, 'cdf', [values, value_range, nbins]):
-    values = ops.convert_to_tensor(values, name='values')
-    nbins_float = np.float32(nbins)
-
-    # Map tensor values that fall within value_range to [0, 1].
-    scaled_values = math_ops.truediv(
-        values - value_range[0],
-        value_range[1] - value_range[0],
-        name='scaled_values')
-
-    # map tensor values within the open interval value_range to {0,.., nbins-1},
-    # values outside the open interval will be zero or less, or nbins or more.
-    indices = math_ops.floor(nbins_float * scaled_values, name='indices')
-
-    # Clip edge cases (e.g. value = value_range[1]) or "outliers."
-    indices = math_ops.cast(
-        clip_ops.clip_by_value(indices, 0, nbins_float - 1), dtypes.int32)
-
-    cdf = array_ops.zeros(nbins)
-    i = constant_op.constant(0)
-
-    def loop_cond(loop_count, _):
-      return math_ops.less(loop_count, nbins)
-
-    def loop_body(loop_count, cdf):
-      temp = math_ops.reduce_sum(
-          math_ops.cast(
-              math_ops.less_equal(indices, loop_count), dtypes.float32))
-      cdf = math_ops.add(
-          cdf,
-          array_ops.one_hot(
-              loop_count, depth=nbins, on_value=temp, off_value=0.0))
-      return [loop_count + 1, cdf]
-
-    _, cdf = control_flow_ops.while_loop(
-        loop_cond, loop_body, [i, cdf], maximum_iterations=nbins)
-
-    return math_ops.div(cdf, math_ops.reduce_max(cdf))
-
-
 def factorized_pool(input_tensor,
                     window_shape,
                     pooling_type,
diff --git a/tensorflow/contrib/model_pruning/python/pruning_utils_test.py b/tensorflow/contrib/model_pruning/python/pruning_utils_test.py
index d6f2bfcb6c..b85bc41315 100644
--- a/tensorflow/contrib/model_pruning/python/pruning_utils_test.py
+++ b/tensorflow/contrib/model_pruning/python/pruning_utils_test.py
@@ -19,13 +19,9 @@ from __future__ import division
 from __future__ import print_function
 
 from absl.testing import parameterized
-import numpy as np
 
 from tensorflow.contrib.model_pruning.python import pruning_utils
-from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
@@ -33,57 +29,6 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
-class PruningUtilsTest(test.TestCase):
-
-  def _compare_cdf(self, values):
-    abs_values = math_ops.abs(values)
-    max_value = math_ops.reduce_max(abs_values)
-    with self.cached_session():
-      variables.global_variables_initializer().run()
-      cdf_from_histogram = pruning_utils.compute_cdf_from_histogram(
-          abs_values, [0.0, max_value], nbins=pruning_utils._NBINS)
-      cdf = pruning_utils.compute_cdf(abs_values, [0.0, max_value])
-      self.assertAllEqual(cdf.eval(), cdf_from_histogram.eval())
-
-  def testHistogram(self):
-    width = 10
-    height = 10
-    nbins = 100
-    expected_histogram = np.full(nbins, 1.0)
-    init = init_ops.constant_initializer(np.linspace(0.0, 1.0, width * height))
-    weights = variable_scope.get_variable(
-        "weights", [width, height], initializer=init)
-    histogram = pruning_utils._histogram(
-        weights, [0, 1.0], nbins, dtype=np.float32)
-    with self.cached_session():
-      variables.global_variables_initializer().run()
-      computed_histogram = histogram.eval()
-    self.assertAllEqual(expected_histogram, computed_histogram)
-
-  def testCDF(self):
-    nbins = 5
-    weights = constant_op.constant([-1, 0, 1, 1.5, 2, 3, 4, 5, 10, 100])
-    abs_weights = math_ops.abs(weights)
-    norm_cdf = pruning_utils.compute_cdf_from_histogram(
-        abs_weights, [0.0, 5.0], nbins=nbins)
-    expected_cdf = np.array([0.1, 0.4, 0.5, 0.6, 1.0], dtype=np.float32)
-    with self.cached_session() as sess:
-      variables.global_variables_initializer().run()
-      norm_cdf_val = sess.run(norm_cdf)
-      self.assertAllEqual(len(norm_cdf_val), nbins)
-      self.assertAllEqual(expected_cdf, norm_cdf_val)
-
-  def testCDFEquivalence2D(self):
-    width = 100
-    height = 100
-    weights = variable_scope.get_variable("weights", shape=[width, height])
-    self._compare_cdf(weights)
-
-  def testCDFEquivalence4D(self):
-    weights = variable_scope.get_variable("weights", shape=[5, 5, 128, 128])
-    self._compare_cdf(weights)
-
-
 @parameterized.named_parameters(
     ("Input_32x32_block_1x1", [32, 32], [1, 1]),
     # block size 6x6
-- 
GitLab


From 354662fb929da898911db96520b3db6b27b4c914 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 18 Dec 2018 13:57:01 -0800
Subject: [PATCH 769/873] Fix for importing functions with a control dependency
 on an op with no outputs

PiperOrigin-RevId: 226054242
---
 tensorflow/python/framework/function_def_to_graph.py     | 4 ++--
 .../python/framework/function_def_to_graph_test.py       | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/framework/function_def_to_graph.py b/tensorflow/python/framework/function_def_to_graph.py
index 4d1aabde06..10ad7ad541 100644
--- a/tensorflow/python/framework/function_def_to_graph.py
+++ b/tensorflow/python/framework/function_def_to_graph.py
@@ -168,8 +168,8 @@ def function_def_to_graph_def(fdef, input_shapes=None):
         flat_name = "{}:{}".format(node_def.name, flattened_index)
         nested_to_flat_tensor_name[nested_name] = flat_name
         flattened_index += 1
-      control_name = "^" + node_def.name
-      nested_to_flat_tensor_name[control_name] = control_name
+    control_name = "^" + node_def.name
+    nested_to_flat_tensor_name[control_name] = control_name
 
   # Update inputs of all nodes in graph.
   for node_def in graph_def.node:
diff --git a/tensorflow/python/framework/function_def_to_graph_test.py b/tensorflow/python/framework/function_def_to_graph_test.py
index ddf1a6e74d..d1dc46d6f8 100644
--- a/tensorflow/python/framework/function_def_to_graph_test.py
+++ b/tensorflow/python/framework/function_def_to_graph_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.framework import test_ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -225,12 +226,15 @@ class FunctionDefToGraphDefTest(test.TestCase):
 
   def testControlDependencies(self):
 
+    v = variables.Variable(1)
+
     @function.defun
     def fn(inp):
+      assign = v.assign(3, name="assign", read_value=False)
       x = constant_op.constant(2.0, name="x")
       # TODO(b/79881896): Test external control dependency once that's
       # supported.
-      with ops.control_dependencies([x, inp]):
+      with ops.control_dependencies([x, inp, assign]):
         constant_op.constant(3.0, name="y")
       return 4.0
 
@@ -239,9 +243,10 @@ class FunctionDefToGraphDefTest(test.TestCase):
     func_graph = function_def_to_graph.function_def_to_graph(fdef)
 
     op = func_graph.get_operation_by_name("y")
-    self.assertEqual(len(op.control_inputs), 2)
+    self.assertEqual(len(op.control_inputs), 3)
     self.assertEqual(op.control_inputs[0].name, "x")
     self.assertEqual(op.control_inputs[1].name, "inp")
+    self.assertEqual(op.control_inputs[2].name, "assign")
 
 
 if __name__ == "__main__":
-- 
GitLab


From 8e7850f45a61cc45770834e79dbcddc133a78605 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 13:59:36 -0800
Subject: [PATCH 770/873] Update ops-related pbtxt files.

PiperOrigin-RevId: 226054661
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 271 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 271 ++++++++++++++++++
 2 files changed, 542 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 602d4a009d..9b7776bbf3 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -16219,6 +16219,152 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNBackpropV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  output_arg {
+    name: "input_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_h_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_c_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "params_backprop"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "CudnnRNNCanonicalToParams"
   input_arg {
@@ -16661,6 +16807,131 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "Cumprod"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 779d4297c7..1157380b8f 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -7202,6 +7202,152 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNBackpropV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  output_arg {
+    name: "input_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_h_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_c_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "params_backprop"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "CudnnRNNCanonicalToParams"
   input_arg {
@@ -7644,6 +7790,131 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "Cumprod"
   input_arg {
-- 
GitLab


From 13fce1e3bea288bf08c695a078ac42853e69182a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 14:07:47 -0800
Subject: [PATCH 771/873] Migrate from `tf.distribute.Strategy.read_var()` to
 `...extended.read_var()`.

PiperOrigin-RevId: 226056306
---
 .../collective_all_reduce_strategy_test.py     |  4 ++--
 .../python/keras_optimizer_v2_test.py          | 18 +++++++++---------
 .../python/mirrored_strategy_multigpu_test.py  |  8 ++++----
 .../python/parameter_server_strategy_test.py   |  4 ++--
 .../distribute/python/strategy_test_lib.py     |  8 ++++----
 tensorflow/python/distribute/distribute_lib.py |  5 -----
 tensorflow/python/distribute/values.py         |  9 +++++----
 tensorflow/python/ops/metrics_impl.py          |  2 +-
 ...sorflow.distribute.-mirrored-strategy.pbtxt |  4 ----
 .../v1/tensorflow.distribute.-strategy.pbtxt   |  4 ----
 ...sorflow.distribute.-mirrored-strategy.pbtxt |  4 ----
 .../v2/tensorflow.distribute.-strategy.pbtxt   |  4 ----
 12 files changed, 27 insertions(+), 47 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
index 6d7cd14ed5..74c69982b9 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
@@ -128,7 +128,7 @@ class CollectiveAllReduceStrategyTestBase(
         before_list = []
         after_list = []
         for g, v in g_v:
-          fetched = d.read_var(v)
+          fetched = d.extended.read_var(v)
           before_list.append(fetched)
           with ops.control_dependencies([fetched]):
             # TODO(yuefengz): support non-Mirrored variable as destinations.
@@ -136,7 +136,7 @@ class CollectiveAllReduceStrategyTestBase(
                 reduce_util.ReduceOp.SUM, g, destinations=v)
             with ops.control_dependencies(
                 d.update(v, update, g, grouped=False)):
-              after_list.append(d.read_var(v))
+              after_list.append(d.extended.read_var(v))
         return before_list, after_list
 
       before_out, after_out = step()
diff --git a/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py b/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
index 8c596549c4..c7f6ba9bed 100644
--- a/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
+++ b/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
@@ -71,12 +71,12 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
       self.assertAllClose(
           var_val,
           self.evaluate(
-              [distribution.read_var(var),
+              [distribution.extended.read_var(var),
                var.get(devices[0]),
                var.get(devices[1])]))
       self.assertAllClose([0, 0, 0],
                           self.evaluate([
-                              distribution.read_var(counter),
+                              distribution.extended.read_var(counter),
                               counter.get(devices[0]),
                               counter.get(devices[1])
                           ]))
@@ -89,7 +89,7 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
       self.assertAllClose(
           m_val,
           self.evaluate(
-              [distribution.read_var(m),
+              [distribution.extended.read_var(m),
                m.get(devices[0]),
                m.get(devices[1])]))
       # v(1) = beta2 * v(0) + (1-beta2) * grad^2 = 0.2 * 0 + 0.8 * 2.25
@@ -97,7 +97,7 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
       self.assertAllClose(
           v_val,
           self.evaluate(
-              [distribution.read_var(v),
+              [distribution.extended.read_var(v),
                v.get(devices[0]),
                v.get(devices[1])]))
       # var(1) = var(0) - lr * m(1) * sqrt(1 - beta2) / sqrt(v(1)) / (1 - beta1)
@@ -106,12 +106,12 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
       self.assertAllClose(
           var_val,
           self.evaluate(
-              [distribution.read_var(var),
+              [distribution.extended.read_var(var),
                var.get(devices[0]),
                var.get(devices[1])]))
       self.assertAllClose([1, 1, 1],
                           self.evaluate([
-                              distribution.read_var(counter),
+                              distribution.extended.read_var(counter),
                               counter.get(devices[0]),
                               counter.get(devices[1])
                           ]))
@@ -122,7 +122,7 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
       self.assertAllClose(
           m_val,
           self.evaluate(
-              [distribution.read_var(m),
+              [distribution.extended.read_var(m),
                m.get(devices[0]),
                m.get(devices[1])]))
       # v(2) = beta2 * v(1) + (1-beta2) * grad^2 = 0.2 * 1.8 + 0.8 * 2.25
@@ -130,12 +130,12 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
       self.assertAllClose(
           v_val,
           self.evaluate(
-              [distribution.read_var(v),
+              [distribution.extended.read_var(v),
                v.get(devices[0]),
                v.get(devices[1])]))
       self.assertAllClose([2, 2, 2],
                           self.evaluate([
-                              distribution.read_var(counter),
+                              distribution.extended.read_var(counter),
                               counter.get(devices[0]),
                               counter.get(devices[1])
                           ]))
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index 337a86b342..fd6841266c 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -1123,7 +1123,7 @@ class ReplicaLocalVariableAssignTest(test.TestCase):
       # When we read the value using `read_var` we should see the SUM of each of
       # values on each of the replicas.
       self.assertEqual(2.0, self.evaluate(
-          distribution.read_var(replica_local_var)))
+          distribution.extended.read_var(replica_local_var)))
       # Assigning 6.0 in cross replica context will assign a value of
       # 6.0/num_replicas to each replica.
       tlv_ops = replica_local_var.assign(6.0)
@@ -1132,7 +1132,7 @@ class ReplicaLocalVariableAssignTest(test.TestCase):
       # The value on all the replicas are added before being returned by
       # `read_var`.
       self.assertEqual(6.0, self.evaluate(
-          distribution.read_var(replica_local_var)))
+          distribution.extended.read_var(replica_local_var)))
 
   def testAssignReplicaLocalVarMeanAggregation(self, distribution):
     def model_fn():
@@ -1151,13 +1151,13 @@ class ReplicaLocalVariableAssignTest(test.TestCase):
       # When we read the value using `read_var` we should see the MEAN of values
       # on all replicas which is the value assigned in replica context.
       self.assertEqual(1.0, self.evaluate(
-          distribution.read_var(replica_local_var)))
+          distribution.extended.read_var(replica_local_var)))
       tlv_ops = replica_local_var.assign(6.0)
       self.evaluate(tlv_ops)
       # On reading the replica local var we should get the MEAN of all values
       # which is equal to the value assigned.
       self.assertEqual(6.0, self.evaluate(
-          distribution.read_var(replica_local_var)))
+          distribution.extended.read_var(replica_local_var)))
 
 
 class MockModel(object):
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index 83d7473666..6d6ec88722 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
@@ -477,7 +477,7 @@ class ParameterServerStrategyTestBase(
         before_list = []
         after_list = []
         for g, v in g_v:
-          fetched = d.read_var(v)
+          fetched = d.extended.read_var(v)
           before_list.append(fetched)
           with ops.control_dependencies([fetched]):
             # TODO(yuefengz): support non-Mirrored variable as destinations.
@@ -485,7 +485,7 @@ class ParameterServerStrategyTestBase(
                 reduce_util.ReduceOp.SUM, g, destinations=v)
             with ops.control_dependencies(
                 d.update(v, update, g, grouped=False)):
-              after_list.append(d.read_var(v))
+              after_list.append(d.extended.read_var(v))
         return before_list, after_list
 
       before_out, after_out = step()
diff --git a/tensorflow/contrib/distribute/python/strategy_test_lib.py b/tensorflow/contrib/distribute/python/strategy_test_lib.py
index d441b5af5f..cc46c84005 100644
--- a/tensorflow/contrib/distribute/python/strategy_test_lib.py
+++ b/tensorflow/contrib/distribute/python/strategy_test_lib.py
@@ -112,7 +112,7 @@ class DistributionTestBase(test.TestCase):
         before_list = []
         after_list = []
         for g, v in g_v:
-          fetched = d.read_var(v)
+          fetched = d.extended.read_var(v)
           before_list.append(fetched)
           # control_dependencies irrelevant but harmless in eager execution
           with ops.control_dependencies([fetched]):
@@ -120,7 +120,7 @@ class DistributionTestBase(test.TestCase):
                 reduce_util.ReduceOp.SUM, g, destinations=v)
             with ops.control_dependencies(d.update(
                 v, update, g, grouped=False)):
-              after_list.append(d.read_var(v))
+              after_list.append(d.extended.read_var(v))
         return before_list, after_list
 
       for i in range(10):
@@ -168,14 +168,14 @@ class DistributionTestBase(test.TestCase):
         before_list = []
         after_list = []
         for g, v in g_v:
-          fetched = d.read_var(v)
+          fetched = d.extended.read_var(v)
           before_list.append(fetched)
           with ops.control_dependencies([fetched]):
             g = d.extended.reduce_to(
                 reduce_util.ReduceOp.SUM, g, destinations=v)
             with ops.control_dependencies(d.update(
                 v, update, g, grouped=False)):
-              after_list.append(d.read_var(v))
+              after_list.append(d.extended.read_var(v))
         return before_list, after_list
 
       before_out, after_out = step()
diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index 60bb75ded0..13ddcaab68 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -324,11 +324,6 @@ class DistributionStrategy(object):
     """
     return self._extended._scope(self)  # pylint: disable=protected-access
 
-  @doc_controls.do_not_generate_docs  # DEPRECATED, moving to `extended`
-  def read_var(self, v):
-    """DEPRECATED: use extended.read_var() instead."""
-    return self._extended.read_var(v)
-
   @doc_controls.do_not_generate_docs  # DEPRECATED, moving to `extended`
   def colocate_vars_with(self, colocate_with_variable):
     """DEPRECATED: use extended.colocate_vars_with() instead."""
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index e4cc8bb3ea..538f214602 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -319,8 +319,8 @@ class DistributedVariable(DistributedDelegate):
     return self._primary_var._in_graph_mode   # pylint: disable=protected-access
 
   def read_value(self):
-    return distribution_strategy_context.get_distribution_strategy().read_var(
-        self)
+    strategy = distribution_strategy_context.get_distribution_strategy()
+    return strategy.extended.read_var(self)
 
   def _should_act_as_resource_variable(self):
     """Pass resource_variable_ops.is_resource_variable check."""
@@ -873,8 +873,9 @@ class _ReplicaLocalSaveable(saver.BaseSaverBuilder.SaveableObject):
     # We use a callable so that we don't have to evaluate this expression
     # in the case where we are trying to restore instead of save.
     def tensor():
-      return distribution_strategy_context.get_distribution_strategy().read_var(
-          replica_local_variable)
+      strategy = distribution_strategy_context.get_distribution_strategy()
+      return strategy.extended.read_var(replica_local_variable)
+
     spec = saver.BaseSaverBuilder.SaveSpec(
         tensor=tensor,
         slice_spec="",
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index ec39b1790e..df9ca86ec7 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -621,7 +621,7 @@ def _confusion_matrix_at_thresholds(labels,
 
 
 def _aggregate_variable(v, collections):
-  f = lambda distribution, value: distribution.read_var(value)
+  f = lambda distribution, value: distribution.extended.read_var(value)
   return _aggregate_across_replicas(collections, f, v)
 
 
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt
index 81224f00a4..b06c73d126 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-mirrored-strategy.pbtxt
@@ -103,10 +103,6 @@ tf_class {
     name: "non_slot_devices"
     argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "read_var"
-    argspec: "args=[\'self\', \'v\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "reduce"
     argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt
index 63b6584caf..9a1df55142 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.distribute.-strategy.pbtxt
@@ -102,10 +102,6 @@ tf_class {
     name: "non_slot_devices"
     argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "read_var"
-    argspec: "args=[\'self\', \'v\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "reduce"
     argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
index 81224f00a4..b06c73d126 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-mirrored-strategy.pbtxt
@@ -103,10 +103,6 @@ tf_class {
     name: "non_slot_devices"
     argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "read_var"
-    argspec: "args=[\'self\', \'v\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "reduce"
     argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
index 63b6584caf..9a1df55142 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.distribute.-strategy.pbtxt
@@ -102,10 +102,6 @@ tf_class {
     name: "non_slot_devices"
     argspec: "args=[\'self\', \'var_list\'], varargs=None, keywords=None, defaults=None"
   }
-  member_method {
-    name: "read_var"
-    argspec: "args=[\'self\', \'v\'], varargs=None, keywords=None, defaults=None"
-  }
   member_method {
     name: "reduce"
     argspec: "args=[\'self\', \'reduce_op\', \'value\'], varargs=None, keywords=None, defaults=None"
-- 
GitLab


From 9b54632711aef580a90eb5f4039377b7e59ee3a5 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Tue, 18 Dec 2018 14:08:52 -0800
Subject: [PATCH 772/873] Add additional documentation about FlatBufferModel
 lifetime semantics

The FlatBufferModel must be kept alive as long as any associated
Interpreters remain in use. Moreover, the caller must also keep alive
any raw buffers wrapped by a FlatBufferModel. Emphasize this in the
documentation.

PiperOrigin-RevId: 226056499
---
 tensorflow/lite/model.cc | 30 +++++++++++-----------
 tensorflow/lite/model.h  | 54 +++++++++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/tensorflow/lite/model.cc b/tensorflow/lite/model.cc
index bfadf2d6a0..663ee38280 100644
--- a/tensorflow/lite/model.cc
+++ b/tensorflow/lite/model.cc
@@ -85,7 +85,7 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromFile(
   std::unique_ptr<FlatBufferModel> model;
   auto allocation = GetAllocationFromFile(filename, /*mmap_file=*/true,
                                           error_reporter, /*use_nnapi=*/true);
-  model.reset(new FlatBufferModel(allocation.release(), error_reporter));
+  model.reset(new FlatBufferModel(std::move(allocation), error_reporter));
   if (!model->initialized()) model.reset();
   return model;
 }
@@ -112,20 +112,21 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::VerifyAndBuildFromFile(
                               allocation->bytes(), error_reporter)) {
     return model;
   }
-  model.reset(new FlatBufferModel(allocation.release(), error_reporter));
+  model.reset(new FlatBufferModel(std::move(allocation), error_reporter));
   if (!model->initialized()) model.reset();
   return model;
 }
 #endif
 
 std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromBuffer(
-    const char* buffer, size_t buffer_size, ErrorReporter* error_reporter) {
+    const char* caller_owned_buffer, size_t buffer_size,
+    ErrorReporter* error_reporter) {
   error_reporter = ValidateErrorReporter(error_reporter);
 
   std::unique_ptr<FlatBufferModel> model;
-  Allocation* allocation =
-      new MemoryAllocation(buffer, buffer_size, error_reporter);
-  model.reset(new FlatBufferModel(allocation, error_reporter));
+  std::unique_ptr<Allocation> allocation(
+      new MemoryAllocation(caller_owned_buffer, buffer_size, error_reporter));
+  model.reset(new FlatBufferModel(std::move(allocation), error_reporter));
   if (!model->initialized()) model.reset();
   return model;
 }
@@ -151,11 +152,12 @@ std::unique_ptr<FlatBufferModel> FlatBufferModel::VerifyAndBuildFromBuffer(
 }
 
 std::unique_ptr<FlatBufferModel> FlatBufferModel::BuildFromModel(
-    const tflite::Model* model_spec, ErrorReporter* error_reporter) {
+    const tflite::Model* caller_owned_model_spec,
+    ErrorReporter* error_reporter) {
   error_reporter = ValidateErrorReporter(error_reporter);
 
   std::unique_ptr<FlatBufferModel> model;
-  model.reset(new FlatBufferModel(model_spec, error_reporter));
+  model.reset(new FlatBufferModel(caller_owned_model_spec, error_reporter));
   if (!model->initialized()) model.reset();
   return model;
 }
@@ -173,20 +175,18 @@ bool FlatBufferModel::CheckModelIdentifier() const {
 
 FlatBufferModel::FlatBufferModel(const Model* model,
                                  ErrorReporter* error_reporter)
-    : error_reporter_(ValidateErrorReporter(error_reporter)) {
-  model_ = model;
-}
+    : model_(model), error_reporter_(ValidateErrorReporter(error_reporter)) {}
 
-FlatBufferModel::FlatBufferModel(Allocation* allocation,
+FlatBufferModel::FlatBufferModel(std::unique_ptr<Allocation> allocation,
                                  ErrorReporter* error_reporter)
-    : error_reporter_(ValidateErrorReporter(error_reporter)) {
-  allocation_ = allocation;
+    : error_reporter_(ValidateErrorReporter(error_reporter)),
+      allocation_(std::move(allocation)) {
   if (!allocation_->valid() || !CheckModelIdentifier()) return;
 
   model_ = ::tflite::GetModel(allocation_->base());
 }
 
-FlatBufferModel::~FlatBufferModel() { delete allocation_; }
+FlatBufferModel::~FlatBufferModel() {}
 
 InterpreterBuilder::InterpreterBuilder(const FlatBufferModel& model,
                                        const OpResolver& op_resolver)
diff --git a/tensorflow/lite/model.h b/tensorflow/lite/model.h
index bd0f4baef6..069cefabf9 100644
--- a/tensorflow/lite/model.h
+++ b/tensorflow/lite/model.h
@@ -56,6 +56,9 @@ class TfLiteVerifier {
 
 // An RAII object that represents a read-only tflite model, copied from disk,
 // or mmapped. This uses flatbuffers as the serialization format.
+//
+// NOTE: The current API requires that a FlatBufferModel instance be kept alive
+// by the client as long as it is in use by any dependent Interpreter instances.
 class FlatBufferModel {
  public:
   // Builds a model based on a file.
@@ -79,15 +82,16 @@ class FlatBufferModel {
       const char* filename, TfLiteVerifier* extra_verifier = nullptr,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
-  // Builds a model based on a pre-loaded flatbuffer. The caller retains
-  // ownership of the buffer and should keep it alive until the returned object
-  // is destroyed. Caller retains ownership of `error_reporter` and must ensure
-  // its lifetime is longer than the FlatBufferModel instance.
+  // Builds a model based on a pre-loaded flatbuffer.
+  // Caller retains ownership of the buffer and should keep it alive until
+  // the returned object is destroyed. Caller also retains ownership of
+  // `error_reporter` and must ensure its lifetime is longer than the
+  // FlatBufferModel instance.
   // Returns a nullptr in case of failure.
   // NOTE: this does NOT validate the buffer so it should NOT be called on
   // invalid/untrusted input. Use VerifyAndBuildFromBuffer in that case
   static std::unique_ptr<FlatBufferModel> BuildFromBuffer(
-      const char* buffer, size_t buffer_size,
+      const char* caller_owned_buffer, size_t buffer_size,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
   // Verifies whether the content of the buffer is legit, then builds a model
@@ -105,13 +109,13 @@ class FlatBufferModel {
       TfLiteVerifier* extra_verifier = nullptr,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
-  // Builds a model directly from a flatbuffer pointer. The caller retains
-  // ownership of the buffer and should keep it alive until the returned object
-  // is destroyed. Caller retains ownership of `error_reporter` and must ensure
-  // its lifetime is longer than the FlatBufferModel instance.
+  // Builds a model directly from a flatbuffer pointer
+  // Caller retains ownership of the buffer and should keep it alive until the
+  // returned object is destroyed. Caller retains ownership of `error_reporter`
+  // and must ensure its lifetime is longer than the FlatBufferModel instance.
   // Returns a nullptr in case of failure.
   static std::unique_ptr<FlatBufferModel> BuildFromModel(
-      const tflite::Model* model_spec,
+      const tflite::Model* caller_owned_model_spec,
       ErrorReporter* error_reporter = DefaultErrorReporter());
 
   // Releases memory or unmaps mmaped memory.
@@ -125,7 +129,7 @@ class FlatBufferModel {
   const tflite::Model* operator->() const { return model_; }
   const tflite::Model* GetModel() const { return model_; }
   ErrorReporter* error_reporter() const { return error_reporter_; }
-  const Allocation* allocation() const { return allocation_; }
+  const Allocation* allocation() const { return allocation_.get(); }
 
   // Returns true if the model identifier is correct (otherwise false and
   // reports an error).
@@ -137,7 +141,7 @@ class FlatBufferModel {
   // `error_reporter`remains with the caller and must have lifetime at least
   // as much as FlatBufferModel. This is to allow multiple models to use the
   // same ErrorReporter instance.
-  FlatBufferModel(Allocation* allocation,
+  FlatBufferModel(std::unique_ptr<Allocation> allocation,
                   ErrorReporter* error_reporter = DefaultErrorReporter());
 
   // Loads a model from Model flatbuffer. The `model` has to remain alive and
@@ -150,24 +154,28 @@ class FlatBufferModel {
   // The error reporter to use for model errors and subsequent errors when
   // the interpreter is created
   ErrorReporter* error_reporter_;
-  // The allocator used for holding memory of the model.
-  Allocation* allocation_ = nullptr;
+  // The allocator used for holding memory of the model. Note that this will
+  // be null if the client provides a tflite::Model directly.
+  std::unique_ptr<Allocation> allocation_;
 };
 
 // Build an interpreter capable of interpreting `model`.
 //
-// model: a scoped model whose lifetime must be at least as long as
-//   the interpreter. In principle multiple interpreters can be made from
-//   a single model.
-// op_resolver: An instance that implements the Resolver interface which maps
-//   custom op names and builtin op codes to op registrations.
-// reportError: a functor that is called to report errors that handles
-//   printf var arg semantics. The lifetime of the reportError object must
+// model: A model whose lifetime must be at least as long as any
+//   interpreter(s) created by the builder. In principle multiple interpreters
+//   can be made from a single model.
+// op_resolver: An instance that implements the OpResolver interface, which maps
+//   custom op names and builtin op codes to op registrations. The lifetime
+//   of the provided `op_resolver` object must be at least as long as the
+//   InterpreterBuilder; unlike `model` and `error_reporter`, the `op_resolver`
+//   does not need to exist for the duration of any created Interpreter objects.
+// error_reporter: a functor that is called to report errors that handles
+//   printf var arg semantics. The lifetime of the `error_reporter` object must
 //   be greater than or equal to the Interpreter created by operator().
 //
 // Returns a kTfLiteOk when successful and sets interpreter to a valid
-// Interpreter. Note: the user must ensure the model lifetime is at least as
-// long as interpreter's lifetime.
+// Interpreter. Note: The user must ensure the model lifetime (and error
+// reporter, if provided) is at least as long as interpreter's lifetime.
 class InterpreterBuilder {
  public:
   InterpreterBuilder(const FlatBufferModel& model,
-- 
GitLab


From 3cee532afd32c2db28e9359765d88ad5f1567f5f Mon Sep 17 00:00:00 2001
From: James Ring <sjr@google.com>
Date: Tue, 18 Dec 2018 14:10:24 -0800
Subject: [PATCH 773/873] Add WritableFile::Tell, make tf.GFile.tell call it.

Currently, tf.GFile.tell fails if the file was opened for writing. This is
undesirable behavior.

This change adds WritableFile::Tell and has tf.GFile.tell delegate to that in
the case of a writable file.

The Tell method returns the write position in the given writable file. This
method is intended to function like the POSIX ftell method, but any
WritableFile implementation may choose to return errors::Unimplemented (the
default behavior).

Implementations are provided for posix, HDFS, gzip, GCS and some others.

PiperOrigin-RevId: 226056765
---
 tensorflow/core/lib/io/recordio_test.cc               |  4 ++++
 tensorflow/core/lib/io/table_test.cc                  |  4 ++++
 tensorflow/core/lib/io/zlib_outputbuffer.cc           |  2 ++
 tensorflow/core/lib/io/zlib_outputbuffer.h            |  4 ++++
 tensorflow/core/platform/cloud/gcs_file_system.cc     |  8 ++++++++
 .../core/platform/cloud/gcs_file_system_test.cc       |  3 +++
 tensorflow/core/platform/cloud/retrying_file_system.h |  5 +++++
 .../core/platform/cloud/retrying_file_system_test.cc  |  3 +++
 tensorflow/core/platform/env_test.cc                  |  8 ++++++++
 tensorflow/core/platform/file_system.h                | 10 ++++++++++
 tensorflow/core/platform/hadoop/BUILD                 |  2 +-
 tensorflow/core/platform/hadoop/hadoop_file_system.cc | 10 ++++++++++
 .../core/platform/hadoop/hadoop_file_system_test.cc   |  3 +++
 tensorflow/core/platform/posix/posix_file_system.cc   | 11 +++++++++++
 tensorflow/python/lib/io/file_io.i                    | 11 +++++++++++
 tensorflow/python/lib/io/file_io.py                   | 10 ++++++++--
 16 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/tensorflow/core/lib/io/recordio_test.cc b/tensorflow/core/lib/io/recordio_test.cc
index 946d7188d3..e6a2e4a066 100644
--- a/tensorflow/core/lib/io/recordio_test.cc
+++ b/tensorflow/core/lib/io/recordio_test.cc
@@ -62,6 +62,10 @@ class StringDest : public WritableFile {
     contents_->append(slice.data(), slice.size());
     return Status::OK();
   }
+  Status Tell(int64* pos) override {
+    *pos = contents_->size();
+    return Status::OK();
+  }
 
  private:
   string* contents_;
diff --git a/tensorflow/core/lib/io/table_test.cc b/tensorflow/core/lib/io/table_test.cc
index 9cebbf40c6..addba92005 100644
--- a/tensorflow/core/lib/io/table_test.cc
+++ b/tensorflow/core/lib/io/table_test.cc
@@ -97,6 +97,10 @@ class StringSink : public WritableFile {
   Status Close() override { return Status::OK(); }
   Status Flush() override { return Status::OK(); }
   Status Sync() override { return Status::OK(); }
+  Status Tell(int64* pos) override {
+    *pos = contents_.size();
+    return Status::OK();
+  }
 
   Status Append(StringPiece data) override {
     contents_.append(data.data(), data.size());
diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.cc b/tensorflow/core/lib/io/zlib_outputbuffer.cc
index cba139e6ad..726d2b1364 100644
--- a/tensorflow/core/lib/io/zlib_outputbuffer.cc
+++ b/tensorflow/core/lib/io/zlib_outputbuffer.cc
@@ -225,5 +225,7 @@ Status ZlibOutputBuffer::Deflate(int flush) {
   return errors::DataLoss(error_string);
 }
 
+Status ZlibOutputBuffer::Tell(int64* position) { return file_->Tell(position); }
+
 }  // namespace io
 }  // namespace tensorflow
diff --git a/tensorflow/core/lib/io/zlib_outputbuffer.h b/tensorflow/core/lib/io/zlib_outputbuffer.h
index ccad2fda44..f4c4d9cbc3 100644
--- a/tensorflow/core/lib/io/zlib_outputbuffer.h
+++ b/tensorflow/core/lib/io/zlib_outputbuffer.h
@@ -80,6 +80,10 @@ class ZlibOutputBuffer : public WritableFile {
   // Deflates any cached input, writes all output to file and syncs it.
   Status Sync() override;
 
+  // Returns the write position in the underlying file. The position does not
+  // reflect buffered, un-flushed data.
+  Status Tell(int64* position) override;
+
  private:
   WritableFile* file_;  // Not owned
   Status init_status_;
diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc
index 26eff8f834..fe2e0f5b1f 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system.cc
@@ -406,6 +406,14 @@ class GcsWritableFile : public WritableFile {
     return status;
   }
 
+  Status Tell(int64* position) override {
+    *position = outfile_.tellp();
+    if (*position == -1) {
+      return errors::Internal("tellp on the internal temporary file failed");
+    }
+    return Status::OK();
+  }
+
  private:
   /// Copies the current version of the file to GCS.
   ///
diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
index f0f5f592fa..bf057d876f 100644
--- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc
@@ -639,6 +639,9 @@ TEST(GcsFileSystemTest, NewWritableFile) {
   std::unique_ptr<WritableFile> wfile;
   TF_EXPECT_OK(fs.NewWritableFile("gs://bucket/path/writeable", &wfile));
   TF_EXPECT_OK(wfile->Append("content1,"));
+  int64 pos;
+  TF_EXPECT_OK(wfile->Tell(&pos));
+  EXPECT_EQ(9, pos);
   TF_EXPECT_OK(wfile->Append("content2"));
   TF_EXPECT_OK(wfile->Flush());
   // Re-reading the file should trigger another HTTP request to GCS.
diff --git a/tensorflow/core/platform/cloud/retrying_file_system.h b/tensorflow/core/platform/cloud/retrying_file_system.h
index 5ce6670dc7..f72a666690 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system.h
+++ b/tensorflow/core/platform/cloud/retrying_file_system.h
@@ -191,6 +191,11 @@ class RetryingWritableFile : public WritableFile {
     return RetryingUtils::CallWithRetries(
         [this]() { return base_file_->Sync(); }, retry_config_);
   }
+  Status Tell(int64* position) override {
+    return RetryingUtils::CallWithRetries(
+        [this, &position]() { return base_file_->Tell(position); },
+        retry_config_);
+  }
 
  private:
   std::unique_ptr<WritableFile> base_file_;
diff --git a/tensorflow/core/platform/cloud/retrying_file_system_test.cc b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
index 868eea096c..2bc9d830aa 100644
--- a/tensorflow/core/platform/cloud/retrying_file_system_test.cc
+++ b/tensorflow/core/platform/cloud/retrying_file_system_test.cc
@@ -78,6 +78,9 @@ class MockWritableFile : public WritableFile {
   Status Close() override { return calls_.ConsumeNextCall("Close"); }
   Status Flush() override { return calls_.ConsumeNextCall("Flush"); }
   Status Sync() override { return calls_.ConsumeNextCall("Sync"); }
+  Status Tell(int64* position) override {
+    return calls_.ConsumeNextCall("Tell");
+  }
 
  private:
   mutable MockCallSequence calls_;
diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc
index 2e32abdffb..2e1d4a263f 100644
--- a/tensorflow/core/platform/env_test.cc
+++ b/tensorflow/core/platform/env_test.cc
@@ -356,6 +356,14 @@ TEST_F(DefaultEnvTest, LocalTempFilename) {
   TF_CHECK_OK(file_to_write->Close());
   TF_CHECK_OK(env->FileExists(filename));
 
+  // Open the file in append mode, check that Tell() reports the appropriate
+  // offset.
+  std::unique_ptr<WritableFile> file_to_append;
+  TF_CHECK_OK(env->NewAppendableFile(filename, &file_to_append));
+  int64 pos;
+  TF_CHECK_OK(file_to_append->Tell(&pos));
+  ASSERT_EQ(4, pos);
+
   // Read from the temporary file and check content.
   std::unique_ptr<RandomAccessFile> file_to_read;
   TF_CHECK_OK(env->NewRandomAccessFile(filename, &file_to_read));
diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h
index c84a93b1bf..48ffa66358 100644
--- a/tensorflow/core/platform/file_system.h
+++ b/tensorflow/core/platform/file_system.h
@@ -305,6 +305,16 @@ class WritableFile {
   /// be properly saved.
   virtual Status Sync() = 0;
 
+  /// \brief Retrieves the current write position in the file, or -1 on
+  /// error.
+  ///
+  /// This is an optional operation, subclasses may choose to return
+  /// errors::Unimplemented.
+  virtual Status Tell(int64* position) {
+    *position = -1;
+    return errors::Unimplemented("This filesystem does not support Tell()");
+  }
+
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(WritableFile);
 };
diff --git a/tensorflow/core/platform/hadoop/BUILD b/tensorflow/core/platform/hadoop/BUILD
index 7c38c399bd..e04835f4f3 100644
--- a/tensorflow/core/platform/hadoop/BUILD
+++ b/tensorflow/core/platform/hadoop/BUILD
@@ -26,7 +26,7 @@ cc_library(
 
 # This test is set to manual because it requires downloading the Hadoop
 # distribution to run. To run this test:
-# 1. Ensure $JAVA_HOME is set.
+# 1. Ensure $JAVA_HOME is set to the location of a JDK 8 installation.
 # 2. Download the binary Hadoop distribution from:
 #    http://hadoop.apache.org/releases.html
 # 3. Extract the Hadoop distribution and run:
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
index eb35531e9f..ad4ed5cdd8 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc
@@ -66,6 +66,7 @@ class LibHDFS {
   std::function<tSize(hdfsFS, hdfsFile, const void*, tSize)> hdfsWrite;
   std::function<int(hdfsFS, hdfsFile)> hdfsHFlush;
   std::function<int(hdfsFS, hdfsFile)> hdfsHSync;
+  std::function<tOffset(hdfsFS, hdfsFile)> hdfsTell;
   std::function<hdfsFile(hdfsFS, const char*, int, int, short, tSize)>
       hdfsOpenFile;
   std::function<int(hdfsFS, const char*)> hdfsExists;
@@ -92,6 +93,7 @@ class LibHDFS {
       BIND_HDFS_FUNC(hdfsPread);
       BIND_HDFS_FUNC(hdfsWrite);
       BIND_HDFS_FUNC(hdfsHFlush);
+      BIND_HDFS_FUNC(hdfsTell);
       BIND_HDFS_FUNC(hdfsHSync);
       BIND_HDFS_FUNC(hdfsOpenFile);
       BIND_HDFS_FUNC(hdfsExists);
@@ -315,6 +317,14 @@ class HDFSWritableFile : public WritableFile {
     return Status::OK();
   }
 
+  Status Tell(int64* position) override {
+    *position = hdfs_->hdfsTell(fs_, file_);
+    if (*position == -1) {
+      return IOError(filename_, errno);
+    }
+    return Status::OK();
+  }
+
  private:
   string filename_;
   LibHDFS* hdfs_;
diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
index b207d34749..d29667944a 100644
--- a/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
+++ b/tensorflow/core/platform/hadoop/hadoop_file_system_test.cc
@@ -95,6 +95,9 @@ TEST_F(HadoopFileSystemTest, WritableFile) {
   const string fname = TmpDir("WritableFile");
   TF_EXPECT_OK(hdfs.NewWritableFile(fname, &writer));
   TF_EXPECT_OK(writer->Append("content1,"));
+  int64 pos;
+  TF_EXPECT_OK(writer->Tell(&pos));
+  EXPECT_EQ(pos, 9);
   TF_EXPECT_OK(writer->Append("content2"));
   TF_EXPECT_OK(writer->Flush());
   TF_EXPECT_OK(writer->Sync());
diff --git a/tensorflow/core/platform/posix/posix_file_system.cc b/tensorflow/core/platform/posix/posix_file_system.cc
index fc48cab564..2f59940ef3 100644
--- a/tensorflow/core/platform/posix/posix_file_system.cc
+++ b/tensorflow/core/platform/posix/posix_file_system.cc
@@ -122,6 +122,17 @@ class PosixWritableFile : public WritableFile {
     }
     return s;
   }
+
+  Status Tell(int64* position) override {
+    Status s;
+    *position = ftell(file_);
+
+    if (*position == -1) {
+      s = IOError(filename_, errno);
+    }
+
+    return s;
+  }
 };
 
 class PosixReadOnlyMemoryRegion : public ReadOnlyMemoryRegion {
diff --git a/tensorflow/python/lib/io/file_io.i b/tensorflow/python/lib/io/file_io.i
index 0aa08ea3d1..135e9e38d7 100644
--- a/tensorflow/python/lib/io/file_io.i
+++ b/tensorflow/python/lib/io/file_io.i
@@ -220,6 +220,16 @@ void AppendToFile(const string& file_content, tensorflow::WritableFile* file,
   }
 }
 
+int64 TellFile(tensorflow::WritableFile* file, TF_Status* out_status) {
+  int64 position = -1;
+  tensorflow::Status status = file->Tell(&position);
+  if (!status.ok()) {
+    Set_TF_Status_from_Status(out_status, status);
+  }
+  return position;
+}
+
+
 string ReadFromStream(tensorflow::io::BufferedInputStream* stream,
                       size_t bytes,
                       TF_Status* out_status) {
@@ -265,6 +275,7 @@ tensorflow::WritableFile* CreateWritableFile(const string& filename,
                                              TF_Status* out_status);
 void AppendToFile(const string& file_content, tensorflow::WritableFile* file,
                   TF_Status* out_status);
+int64 TellFile(tensorflow::WritableFile* file, TF_Status* out_status);
 string ReadFromStream(tensorflow::io::BufferedInputStream* stream,
                       size_t bytes,
                       TF_Status* out_status);
diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py
index ee55d89bff..2720962084 100644
--- a/tensorflow/python/lib/io/file_io.py
+++ b/tensorflow/python/lib/io/file_io.py
@@ -196,8 +196,14 @@ class FileIO(object):
 
   def tell(self):
     """Returns the current position in the file."""
-    self._preread_check()
-    return self._read_buf.Tell()
+    if self._read_check_passed:
+      self._preread_check()
+      return self._read_buf.Tell()
+    else:
+      self._prewrite_check()
+
+      with errors.raise_exception_on_not_ok_status() as status:
+        return pywrap_tensorflow.TellFile(self._writable_file, status)
 
   def __enter__(self):
     """Make usable with "with" statement."""
-- 
GitLab


From 87785df898d2167aad1ef3db9c488b02df0bef83 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Tue, 18 Dec 2018 14:17:57 -0800
Subject: [PATCH 774/873] Hybrid conv kernel should use int8 weights instead of
 casted uint8.

Motivation for Hybrid conv to use int8:
- Currently the conv operation reinterpret_casts uint8 weights to int8 before using them in the kernel. This is an abuse of the schema's type system.
- We need support for shared weights in the QuantizeWeights tool. Currently if a shared weight is used by an operation that is consumed by a dequantize op and a hybrid op, we have no way to quantize the weights since one uses int8 and the other uses uint8. By specifying the type accurately for hybrid ops, we enable the tool to quantize shared weights. (We need to update the other hybrid kernels too.)

PiperOrigin-RevId: 226058081
---
 tensorflow/lite/kernels/conv.cc            |  38 ++++--
 tensorflow/lite/kernels/conv_test.cc       | 143 ++++++++++++++++++++-
 tensorflow/lite/kernels/test_util.h        |  30 +++--
 tensorflow/lite/toco/tflite/export_test.cc |  15 +++
 tensorflow/lite/toco/tflite/operator.cc    |  13 ++
 5 files changed, 213 insertions(+), 26 deletions(-)

diff --git a/tensorflow/lite/kernels/conv.cc b/tensorflow/lite/kernels/conv.cc
index 1fd870be93..05368aa5ed 100644
--- a/tensorflow/lite/kernels/conv.cc
+++ b/tensorflow/lite/kernels/conv.cc
@@ -133,7 +133,8 @@ void TransposeFloatTensor(TfLiteTensor* input, TfLiteTensor* output) {
 // Note: `context->AddTensors` might invalidate pointers to existing tensors.
 // Therefore the logic to add tensors are isolated into this function.
 static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context,
-                                                       TfLiteNode* node) {
+                                                       TfLiteNode* node,
+                                                       bool is_hybrid) {
   auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
   OpData* data = reinterpret_cast<OpData*>(node->user_data);
 
@@ -141,9 +142,6 @@ static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context,
   TfLiteTensor* input = &context->tensors[node->inputs->data[0]];
   TfLiteTensor* filter = &context->tensors[node->inputs->data[1]];
 
-  const bool is_hybrid =
-      (input->type == kTfLiteFloat32 && filter->type == kTfLiteUInt8);
-
   int filter_width = filter->dims->data[2];
   int filter_height = filter->dims->data[1];
 
@@ -250,7 +248,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   const bool is_hybrid =
-      (input->type == kTfLiteFloat32 && filter->type == kTfLiteUInt8);
+      (input->type == kTfLiteFloat32 &&
+       (filter->type == kTfLiteUInt8 || filter->type == kTfLiteInt8));
 
   data->run_multithreaded_kernel = context->recommended_num_threads != 1;
   // Hybrid kernels don't support multithreading yet.
@@ -258,7 +257,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     data->run_multithreaded_kernel = false;
   }
 
-  TF_LITE_ENSURE_STATUS(AllocateTemporaryTensorsIfRequired(context, node));
+  TF_LITE_ENSURE_STATUS(
+      AllocateTemporaryTensorsIfRequired(context, node, is_hybrid));
 
   int channels_in = filter->dims->data[3];
   int channels_out = filter->dims->data[0];
@@ -334,7 +334,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         &context->tensors[node->temporaries->data[data->im2col_index]];
     im2col->type = input->type;
     if (is_hybrid) {
-      im2col->type = kTfLiteUInt8;
+      im2col->type = filter->type;
     }
     im2col->allocation_type = kTfLiteArenaRw;
     auto im2col_status = context->ResizeTensor(context, im2col, im2col_size);
@@ -372,7 +372,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
         data->input_quantized_id;
     TfLiteTensor* input_quantized =
         GetTemporary(context, node, data->input_quantized_index);
-    input_quantized->type = kTfLiteUInt8;
+    input_quantized->type = kTfLiteInt8;
     input_quantized->allocation_type = kTfLiteArenaRw;
     if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) {
       TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims);
@@ -562,8 +562,7 @@ void EvalHybrid(TfLiteContext* context, TfLiteNode* node,
 
   const TfLiteTensor* input_quantized =
       GetTemporary(context, node, data->input_quantized_index);
-  int8_t* quantized_input_ptr_batch =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+  int8_t* quantized_input_ptr_batch = input_quantized->data.int8;
   float* scaling_factors_ptr =
       GetTemporary(context, node, data->scaling_factors_index)->data.f;
 
@@ -578,10 +577,21 @@ void EvalHybrid(TfLiteContext* context, TfLiteNode* node,
   }
 
   int8_t* im2col_ptr = nullptr;
-  if (im2col != nullptr) {
-    im2col_ptr = reinterpret_cast<int8_t*>(im2col->data.uint8);
+  int8_t* filter_ptr = nullptr;
+  if (filter->type == kTfLiteUInt8) {
+    // For backward compatibility, we need to support the case where filters
+    // are quantized to int8 but stored as uint8.
+    if (im2col != nullptr) {
+      im2col_ptr = reinterpret_cast<int8_t*>(im2col->data.uint8);
+    }
+    filter_ptr = reinterpret_cast<int8_t*>(filter->data.uint8);
+  } else {
+    // Code at head uses the int8 type so we do not need to do the cast.
+    if (im2col != nullptr) {
+      im2col_ptr = im2col->data.int8;
+    }
+    filter_ptr = filter->data.int8;
   }
-  int8_t* filter_ptr = reinterpret_cast<int8_t*>(filter->data.uint8);
 
   switch (kernel_type) {
     case kReference:
@@ -640,7 +650,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // separate ops to avoid dispatch overhead here.
   switch (input->type) {  // Already know in/outtypes are same.
     case kTfLiteFloat32:
-      if (filter->type == kTfLiteUInt8) {
+      if (filter->type == kTfLiteUInt8 || filter->type == kTfLiteInt8) {
         EvalHybrid<kernel_type>(context, node, params, data, input, filter,
                                 bias, im2col, hwcn_weights, output);
       } else if (data->run_multithreaded_kernel) {
diff --git a/tensorflow/lite/kernels/conv_test.cc b/tensorflow/lite/kernels/conv_test.cc
index eebf9f9de4..478df3354f 100644
--- a/tensorflow/lite/kernels/conv_test.cc
+++ b/tensorflow/lite/kernels/conv_test.cc
@@ -758,6 +758,10 @@ class HybridConvolutionOpModel : public BaseConvolutionOpModel {
     SymmetricQuantizeAndPopulate(filter_, f);
   }
 
+  void SetSignedFilter(std::initializer_list<float> f) {
+    SignedSymmetricQuantizeAndPopulate(filter_, f);
+  }
+
   void SetBias(std::initializer_list<float> data) {
     PopulateTensor(bias_, data);
   }
@@ -765,7 +769,7 @@ class HybridConvolutionOpModel : public BaseConvolutionOpModel {
   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
 };
 
-TEST_P(ConvolutionOpTest, SimpleTestHybrid) {
+TEST_P(ConvolutionOpTest, SimpleTestHybridUint8) {
   HybridConvolutionOpModel m(
       GetRegistration(), {TensorType_FLOAT32, {2, 2, 4, 1}},
       {TensorType_UINT8, {3, 2, 2, 1}}, {TensorType_FLOAT32, {}});
@@ -824,7 +828,7 @@ TEST_P(ConvolutionOpTest, SimpleTestHybrid) {
 // while keeping the filters for each channel equivalent.
 //
 // 2 * (A/2) * B = A * B, where the left side is this new test.
-TEST_P(ConvolutionOpTest, SimpleTestHybridWithChannels) {
+TEST_P(ConvolutionOpTest, SimpleTestHybridWithChannelsUint8) {
   HybridConvolutionOpModel m(
       GetRegistration(), {TensorType_FLOAT32, {2, 2, 4, 2}},
       {TensorType_UINT8, {3, 2, 2, 2}}, {TensorType_FLOAT32, {}});
@@ -856,7 +860,7 @@ TEST_P(ConvolutionOpTest, SimpleTestHybridWithChannels) {
                                  0.16)));
 }
 
-TEST_P(ConvolutionOpTest, PointwiseHybrid) {
+TEST_P(ConvolutionOpTest, PointwiseHybridUint8) {
   HybridConvolutionOpModel m(
       GetRegistration(), {TensorType_FLOAT32, {2, 2, 4, 2}},
       {TensorType_UINT8, {1, 1, 1, 2}}, {TensorType_FLOAT32, {}}, 1, 1);
@@ -898,6 +902,139 @@ TEST_P(ConvolutionOpTest, PointwiseHybrid) {
                   0.0316)));
 }
 
+TEST_P(ConvolutionOpTest, SimpleTestHybridInt8) {
+  HybridConvolutionOpModel m(
+      GetRegistration(), {TensorType_FLOAT32, {2, 2, 4, 1}},
+      {TensorType_INT8, {3, 2, 2, 1}}, {TensorType_FLOAT32, {}});
+
+  m.SetInput({
+      // First batch
+      1, 1, 1, 1,  // row = 1
+      2, 2, 2, 2,  // row = 2
+      // Second batch
+      1, 2, 3, 4,  // row = 1
+      1, 2, 3, 4,  // row = 2
+  });
+  m.SetSignedFilter({
+      1, 2, 3, 4,    // first 2x2 filter
+      -1, 1, -1, 1,  // second 2x2 filter
+      -1, -1, 1, 1,  // third 2x2 filter
+  });
+  m.SetBias({1, 2, 3});
+
+  m.Invoke();
+
+  // Example: we get 17.1577 instead of 17.
+  //
+  // Second batch:
+  // 1 2 3 4  -> 32 64 95 127 with scale factor 127/4.
+  // 1 2 3 4     32 64 95 127
+  //
+  // First filter:
+  // 1 2  -> 32 64  with scale factor of 127/4.
+  // 3 4     95 127
+  //
+  // The left half of the input gives us 16288. Multiply by (4/127)^2 for
+  // dequantization and adding 1 for the bias gives us the result. and adding
+  // the bias gives us the result.
+  //
+  // The optimized kernel converts the input into this matrix via Im2Col
+  //
+  // 1 1 2 2
+  // 1 1 2 2
+  // 1 2 1 2
+  // 3 4 3 4
+  //
+  // and multiplies it with the filter directly.
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(
+                                 {
+                                     18, 2, 5,  // first batch, left
+                                     18, 2, 5,  // first batch, right
+                                     17, 4, 3,  // second batch, left
+                                     37, 4, 3,  // second batch, right
+                                 },
+                                 0.16)));
+}
+
+// This test's output is equivalent to the SimpleTestHybrid
+// because we break each input into two channels, each with half of the value,
+// while keeping the filters for each channel equivalent.
+//
+// 2 * (A/2) * B = A * B, where the left side is this new test.
+TEST_P(ConvolutionOpTest, SimpleTestHybridWithChannelsInt8) {
+  HybridConvolutionOpModel m(
+      GetRegistration(), {TensorType_FLOAT32, {2, 2, 4, 2}},
+      {TensorType_INT8, {3, 2, 2, 2}}, {TensorType_FLOAT32, {}});
+
+  m.SetInput({
+      // First batch
+      0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  // row = 1
+      1, 1, 1, 1, 1, 1, 1, 1,                  // row = 2
+      // Second batch
+      0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2,  // row = 1
+      0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2   // row = 2
+  });
+  m.SetSignedFilter({
+      1,  1,  2,  2,  3,  3,  4, 4,  // first 2x2 filter
+      -1, -1, 1,  1,  -1, -1, 1, 1,  // second 2x2 filter
+      -1, -1, -1, -1, 1,  1,  1, 1   // third 2x2 filter
+  });
+  m.SetBias({1, 2, 3});
+
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(
+                                 {
+                                     18, 2, 5,  // first batch, left
+                                     18, 2, 5,  // first batch, right
+                                     17, 4, 3,  // second batch, left
+                                     37, 4, 3,  // second batch, right
+                                 },
+                                 0.16)));
+}
+
+TEST_P(ConvolutionOpTest, PointwiseHybridInt8) {
+  HybridConvolutionOpModel m(
+      GetRegistration(), {TensorType_FLOAT32, {2, 2, 4, 2}},
+      {TensorType_INT8, {1, 1, 1, 2}}, {TensorType_FLOAT32, {}}, 1, 1);
+
+  m.SetInput({
+      // First batch
+      0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,  // row = 1
+      1, 1, 1, 1, 1, 1, 1, 1,                  // row = 2
+      // Second batch
+      0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2,  // row = 1
+      0.5, 0.5, 1, 1, 1.5, 1.5, 2, 2   // row = 2
+  });
+
+  m.SetSignedFilter({
+      1, 2,  // first filter
+  });
+  m.SetBias({0});
+
+  m.Invoke();
+
+  // Example: we get 3.03156 instead of 3.
+  //
+  // Second batch:
+  // 0.5 0.5 1 1 1.5 1.5 2 2  -> 32 32 64 64 95 95 127 127 with scale factor
+  // 127/2. We care about the two 64's.
+  //
+  // Filter:
+  // 64 127 with scale factor of 127/2.
+  //
+  // (64 * 64 + 64 * 127) * (2/127)^2 gives us the expected result.
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear(
+                  {
+                      1.5, 1.5, 1.5, 1.5,  // first batch, row = 1
+                      3., 3., 3., 3.,      // first batch, row = 2
+                      1.5, 3., 4.5, 6.,    // second batch, row = 1
+                      1.5, 3., 4.5, 6.,    // second batch, row = 2
+                  },
+                  0.0316)));
+}
+
 // TODO(alanchiao): this passes locally, but fails on continuous build system.
 // Re-enable when root cause found.
 TEST_P(ConvolutionOpTest, DISABLED_PointwiseMultifilterHybrid) {
diff --git a/tensorflow/lite/kernels/test_util.h b/tensorflow/lite/kernels/test_util.h
index f5c67c3e9c..4a442f9fa7 100644
--- a/tensorflow/lite/kernels/test_util.h
+++ b/tensorflow/lite/kernels/test_util.h
@@ -161,19 +161,17 @@ class SingleOpModel {
   }
 
   void SymmetricQuantizeAndPopulate(int index, const std::vector<float>& data) {
-    TfLiteTensor* t = interpreter_->tensor(index);
-    const int length = data.size();
-    std::vector<int8_t> q(length);
-    float min, max, scaling_factor;
-    tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
-                                          &max, &scaling_factor);
-    // Update quantization params.
-    t->params.scale = scaling_factor;
-    t->params.zero_point = 0;
+    std::vector<int8_t> q = QuantizeTensor(index, data);
     PopulateTensor(index, /*offset=*/0, reinterpret_cast<uint8_t*>(q.data()),
                    reinterpret_cast<uint8_t*>(q.data() + q.size()));
   }
 
+  void SignedSymmetricQuantizeAndPopulate(int index,
+                                          const std::vector<float>& data) {
+    std::vector<int8_t> q = QuantizeTensor(index, data);
+    PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
+  }
+
   const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
 
   float GetScale(int id) { return tensor_data_.at(id).scale; }
@@ -358,6 +356,20 @@ class SingleOpModel {
     return id;
   }
 
+  std::vector<int8_t> QuantizeTensor(int index,
+                                     const std::vector<float>& data) {
+    TfLiteTensor* t = interpreter_->tensor(index);
+    const int length = data.size();
+    std::vector<int8_t> q(length);
+    float min, max, scaling_factor;
+    tensor_utils::SymmetricQuantizeFloats(data.data(), length, q.data(), &min,
+                                          &max, &scaling_factor);
+    // Update quantization params.
+    t->params.scale = scaling_factor;
+    t->params.zero_point = 0;
+    return q;
+  }
+
   std::map<int, TensorData> tensor_data_;
   std::vector<int32_t> inputs_;
   std::vector<int32_t> outputs_;
diff --git a/tensorflow/lite/toco/tflite/export_test.cc b/tensorflow/lite/toco/tflite/export_test.cc
index 821ed4dbf3..58cfb4987f 100644
--- a/tensorflow/lite/toco/tflite/export_test.cc
+++ b/tensorflow/lite/toco/tflite/export_test.cc
@@ -41,6 +41,14 @@ class ExportTest : public ::testing::Test {
       if (name == "Conv") {
         auto* op = new ConvOperator;
         op->padding.type = PaddingType::kSame;
+        op->inputs = {"input", "filter"};
+        op->outputs = {"output"};
+        Array& input_array = input_model_.GetOrCreateArray(op->inputs[0]);
+        Array& filter_array = input_model_.GetOrCreateArray(op->inputs[1]);
+        Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]);
+        input_array.data_type = ArrayDataType::kFloat;
+        filter_array.data_type = ArrayDataType::kFloat;
+        output_array.data_type = ArrayDataType::kFloat;
         input_model_.operators.emplace_back(op);
       } else if (name == "Add") {
         input_model_.operators.emplace_back(new AddOperator);
@@ -97,6 +105,13 @@ class ExportTest : public ::testing::Test {
       auto* op = new ConvOperator;
       op->padding.type = PaddingType::kSame;
       op->inputs = {"inputs", "weights"};
+      op->outputs = {"output"};
+      Array& input_array = input_model_.GetArray(op->inputs[0]);
+      Array& filter_array = input_model_.GetArray(op->inputs[1]);
+      Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]);
+      input_array.data_type = ArrayDataType::kFloat;
+      filter_array.data_type = ArrayDataType::kFloat;
+      output_array.data_type = ArrayDataType::kFloat;
       input_model_.operators.emplace_back(op);
     }
     input_model_.operators.emplace_back(new AddOperator);
diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc
index f77ebf0d47..c3cb4a1923 100644
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc
@@ -96,6 +96,19 @@ class Convolution
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const string& filter_name = op_signature.op->inputs[1];
+    const string& output_name = op_signature.op->outputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    const Array& filter_array = op_signature.model->GetArray(filter_name);
+    const Array& output_array = op_signature.model->GetArray(output_name);
+    // If the op is a signed int8 hybrid operation, we need to return
+    // version 2.
+    if (input_array.data_type == ArrayDataType::kFloat &&
+        filter_array.data_type == ArrayDataType::kInt8 &&
+        output_array.data_type == ArrayDataType::kFloat) {
+      return 2;
+    }
     return 1;
   }
 };
-- 
GitLab


From 28343c1c7fcdc87d4d3031dfe3d8449f33c5fbad Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 14:27:03 -0800
Subject: [PATCH 775/873] Fix strip_include_prefix value.

PiperOrigin-RevId: 226059697
---
 third_party/toolchains/preconfig/generate/generate.sh        | 2 +-
 third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/third_party/toolchains/preconfig/generate/generate.sh b/third_party/toolchains/preconfig/generate/generate.sh
index 76fb1bd3c0..523c232ee1 100755
--- a/third_party/toolchains/preconfig/generate/generate.sh
+++ b/third_party/toolchains/preconfig/generate/generate.sh
@@ -54,7 +54,7 @@ tar xvf "${ROOT}/bazel-bin/${PKG}/generate/${TARGET}_outputs.tar"
 
 # Other than @local_config_tensorrt, the remote config repo is a subpackage of
 # @org_tensorflow and we need to add '-iquote <package_path>' manually.
-buildozer "set strip_include_prefix [package_name()]" //local_config_tensorrt:%cc_library
+buildozer "set strip_include_prefix package_name()" //local_config_tensorrt:%cc_library
 
 # Delete all empty files: configurations leave empty files around when they are
 # unnecessary.
diff --git a/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
index 8bf9115a86..da16bb31b6 100755
--- a/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
+++ b/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
@@ -12,7 +12,7 @@ package(default_visibility = ["//visibility:public"])
 cc_library(
     name = "tensorrt_headers",
     hdrs = [":tensorrt_include"],
-    strip_include_prefix = [package_name()],
+    strip_include_prefix = package_name(),
     visibility = ["//visibility:public"],
 )
 
@@ -22,7 +22,7 @@ cc_library(
     copts = cuda_default_copts(),
     data = ["tensorrt/lib/libnvinfer.so.5"],
     linkstatic = 1,
-    strip_include_prefix = [package_name()],
+    strip_include_prefix = package_name(),
     visibility = ["//visibility:public"],
     deps = [
         ":tensorrt_headers",
-- 
GitLab


From 0848c480c0613257fdff9f5a2a6e2b77d8d0f942 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 18 Dec 2018 14:56:46 -0800
Subject: [PATCH 776/873] Internal change.

PiperOrigin-RevId: 226065180
---
 tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index 1c2a1263a9..c076157b08 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -108,7 +108,7 @@ if [[ "$TF_NIGHTLY" == 1 ]]; then
   if [ -z ${PROJECT_NAME} ]; then
     EXTRA_PIP_FLAGS="--nightly_flag"
   else
-    EXTRA_PIP_FLAGS="--project_name=${PROJECT_NAME} --nightly_flag"
+    EXTRA_PIP_FLAGS="--project_name ${PROJECT_NAME} --nightly_flag"
   fi
 fi
 
-- 
GitLab


From 8d1099b51f7ff8a5ac0bae8cad5bd91fc0b43815 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 18 Dec 2018 14:57:03 -0800
Subject: [PATCH 777/873] roll-forward (again) after fixing the windows
 failure:

    Introduce a logger overriding mechanism.

    Before: the whole program has to link against a unique
    Logger::Singleton(), and there is no default. It's hard to control when
    to use the custom logger vs the default.

    After: By default always use the default logger. The pubsub logger
    overrides via REGISTER_MODULE_INITIALIZER. Multiple implementations can
    co-exist.

    This simplifies the registration management and dependency management.

PiperOrigin-RevId: 226065246
---
 tensorflow/core/BUILD                         | 20 ++-----------
 .../core/platform/default/build_config/BUILD  |  5 ----
 .../core/platform/{default => }/logger.cc     | 25 +++++++++-------
 tensorflow/core/platform/logger.h             | 19 +++++++++++-
 tensorflow/stream_executor/BUILD              |  3 ++
 .../stream_executor/stream_executor_pimpl.cc  | 30 ++++++++++++++++++-
 6 files changed, 66 insertions(+), 36 deletions(-)
 rename tensorflow/core/platform/{default => }/logger.cc (72%)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 8bf1480d33..258c46fbcb 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -445,7 +445,8 @@ cc_library(
 )
 
 cc_library(
-    name = "logger_interface",
+    name = "logger",
+    srcs = ["platform/logger.cc"],
     hdrs = ["platform/logger.h"],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
@@ -455,23 +456,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "default_logger",
-    srcs = ["platform/default/logger.cc"],
-    hdrs = ["platform/logger.h"],
-    deps = [
-        "//tensorflow/core:lib_proto_parsing",
-        "//tensorflow/core:logger_interface",
-    ],
-)
-
-cc_library(
-    name = "logger",
-    hdrs = ["platform/logger.h"],
-    visibility = ["//visibility:public"],
-    deps = ["//tensorflow/core/platform/default/build_config:logger"],
-)
-
 filegroup(
     name = "platform_env_hdrs",
     srcs = [
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index ee6936b372..da1f66dc67 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -275,8 +275,3 @@ alias(
     actual = ":mobile_srcs",
     visibility = ["//visibility:public"],
 )
-
-alias(
-    name = "logger",
-    actual = "//tensorflow/core:default_logger",
-)
diff --git a/tensorflow/core/platform/default/logger.cc b/tensorflow/core/platform/logger.cc
similarity index 72%
rename from tensorflow/core/platform/default/logger.cc
rename to tensorflow/core/platform/logger.cc
index 54b1a1a67c..f5a961e4d3 100644
--- a/tensorflow/core/platform/default/logger.cc
+++ b/tensorflow/core/platform/logger.cc
@@ -18,17 +18,20 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
+namespace {
 
-Logger* Logger::Singleton() {
-  class DefaultLogger : public Logger {
-   private:
-    void DoLogProto(google::protobuf::Any* proto) override {
-      VLOG(2) << proto->ShortDebugString();
-    }
-    void DoFlush() override {}
-  };
-  static Logger* instance = new DefaultLogger();
-  return instance;
-}
+class DefaultLogger : public Logger {
+ private:
+  void DoLogProto(google::protobuf::Any* proto) override {
+    VLOG(2) << proto->ShortDebugString();
+  }
+  void DoFlush() override {}
+};
+
+}  // namespace
+
+Logger::FactoryFunc Logger::singleton_factory_ = []() -> Logger* {
+  return new DefaultLogger();
+};
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/logger.h b/tensorflow/core/platform/logger.h
index 5d304bea63..f0bfef4f2d 100644
--- a/tensorflow/core/platform/logger.h
+++ b/tensorflow/core/platform/logger.h
@@ -26,7 +26,22 @@ namespace tensorflow {
 // log anything to a non-local place, e.g. a database.
 class Logger {
  public:
-  static Logger* Singleton();
+  // The singleton is supposed to be used in the following steps:
+  // * At program start time, REGISTER_MOUDLE_INITIALIZER calls
+  //   SetSingletonFactory.
+  // * At some point in the program execution, Singleton() is called for the
+  //   first time, initializing the logger.
+  // * Succeeding calls to Singleton() return the initiailized logger.
+  using FactoryFunc = Logger* (*)();
+
+  static void SetSingletonFactory(FactoryFunc factory) {
+    singleton_factory_ = factory;
+  }
+
+  static Logger* Singleton() {
+    static Logger* instance = singleton_factory_();
+    return instance;
+  }
 
   virtual ~Logger() = default;
 
@@ -44,6 +59,8 @@ class Logger {
  private:
   virtual void DoLogProto(google::protobuf::Any* proto) = 0;
   virtual void DoFlush() = 0;
+
+  static FactoryFunc singleton_factory_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index 00c23b8d17..c43efc799c 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -54,7 +54,9 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc_impl",
+        ":logging_proto_cc_impl",
         "//tensorflow/core:lib",
+        "//tensorflow/core:logger",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
@@ -71,6 +73,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc",
+        ":logging_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index ee3d2b6da0..cb67a906a8 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/strings/str_cat.h"
+#include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/stream_executor/blas.h"
 #include "tensorflow/stream_executor/fft.h"
@@ -33,6 +34,7 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
+#include "tensorflow/stream_executor/logging.pb.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/rng.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
@@ -192,6 +194,8 @@ StreamExecutor::StreamExecutor(
     platform_kind_ = PlatformKind::kOpenCL;
   } else if (port::Lowercase(platform_->Name()) == "host") {
     platform_kind_ = PlatformKind::kHost;
+  } else {
+    platform_kind_ = PlatformKind::kInvalid;
   }
 }
 
@@ -217,7 +221,31 @@ StreamExecutor::~StreamExecutor() {
 port::Status StreamExecutor::Init(int device_ordinal,
                                   DeviceOptions device_options) {
   device_ordinal_ = device_ordinal;
-  return implementation_->Init(device_ordinal, std::move(device_options));
+  TF_RETURN_IF_ERROR(
+      implementation_->Init(device_ordinal, std::move(device_options)));
+
+  if (platform_kind_ == PlatformKind::kCuda) {
+    CudaInfo info;
+
+    int cc_major, cc_minor;
+    GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor);
+    info.mutable_compute_capability()->set_major(cc_major);
+    info.mutable_compute_capability()->set_minor(cc_minor);
+
+    if (auto *dnn = AsDnn()) {
+      port::StatusOr<dnn::VersionInfo> version_or = dnn->GetVersion();
+      if (version_or.ok()) {
+        const auto &version = version_or.ValueOrDie();
+        info.mutable_cudnn_version()->set_major(version.major_version());
+        info.mutable_cudnn_version()->set_minor(version.minor_version());
+        info.mutable_cudnn_version()->set_patch(version.patch());
+      }
+    }
+
+    tensorflow::Logger::Singleton()->LogProto(info);
+  }
+
+  return port::Status::OK();
 }
 
 port::Status StreamExecutor::Init() {
-- 
GitLab


From 2365c5c4e83c753bb02d0c35627e64ce75f5fedc Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Tue, 18 Dec 2018 15:14:32 -0800
Subject: [PATCH 778/873] Switch back to default gRPC timeouts.

PiperOrigin-RevId: 226068612
---
 tensorflow/core/distributed_runtime/rpc/grpc_channel.cc    | 2 --
 tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
index e5634d38bd..781b7d65cd 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel.cc
@@ -60,8 +60,6 @@ Status ValidateHostPortPair(const string& host_port) {
   // TODO(mrry): Implement secure channels.
   ::grpc::ChannelArguments args;
   args.SetInt(GRPC_ARG_MAX_MESSAGE_LENGTH, std::numeric_limits<int32>::max());
-  args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, std::numeric_limits<int>::max());
-  args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, std::numeric_limits<int>::max());
   // NOTE(mrry): Some versions of gRPC use a 20-second minimum backoff
   // on connection failure, which makes our tests time out.
   args.SetInt("grpc.testing.fixed_reconnect_backoff_ms", 1000);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
index 08518606f6..ac73182190 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc
@@ -193,10 +193,6 @@ Status GrpcServer::Init(
   builder.AddListeningPort(strings::StrCat("0.0.0.0:", requested_port),
                            GetServerCredentials(server_def_), &bound_port_);
   builder.SetMaxMessageSize(std::numeric_limits<int32>::max());
-  builder.AddChannelArgument(GRPC_ARG_KEEPALIVE_TIME_MS,
-                             std::numeric_limits<int>::max());
-  builder.AddChannelArgument(GRPC_ARG_KEEPALIVE_TIMEOUT_MS,
-                             std::numeric_limits<int>::max());
 
   builder.SetOption(
       std::unique_ptr<::grpc::ServerBuilderOption>(new NoReusePortOption));
-- 
GitLab


From 8cb5ae9d6ad9e6f1b0bdc9ae0b38c22fd83fc57f Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Tue, 18 Dec 2018 15:14:58 -0800
Subject: [PATCH 779/873] Add functionality to normalization layer to work in
 the cross-replica context

PiperOrigin-RevId: 226068674
---
 .../python/keras/layers/normalization.py      | 59 ++++++++++++++-----
 1 file changed, 44 insertions(+), 15 deletions(-)

diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py
index e1b8c0bca7..c3c5b2db7d 100644
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@@ -414,10 +414,19 @@ class BatchNormalizationV2(Layer):
   def _assign_moving_average(self, variable, value, momentum):
     with ops.name_scope(None, 'AssignMovingAvg',
                         [variable, value, momentum]) as scope:
+      # TODO(b/120571621): We want to avoid colocating the variables here
+      # since TPUStrategy does not implement replica local variables.
+      # Remove this hack once we support TPULocalVariables.
+      is_tpu_strategy = False
+      if distribution_strategy_context.has_distribution_strategy():
+        distribute = distribution_strategy_context.get_distribution_strategy()
+        if distribute.__class__.__name__ == 'TPUStrategy':
+          is_tpu_strategy = True
+
       # TODO(apassos,srbs,skyewm): the colocation constraints here are disabled
       # because of a bug which leads cond_v2 to skip rewriting them creating
       # conflicts.
-      if tf2.enabled():
+      if tf2.enabled() or is_tpu_strategy:
         cm = contextlib.contextmanager(lambda: (yield))()
       else:
         cm = ops.colocate_with(variable)
@@ -655,20 +664,40 @@ class BatchNormalizationV2(Layer):
         d = _broadcast(array_ops.stop_gradient(d, name='renorm_d'))
         scale, offset = _compose_transforms(r, d, scale, offset)
 
-      def _do_update(var, value):
-        if in_eager_mode and not self.trainable:
-          return
-
-        return self._assign_moving_average(var, value, self.momentum)
-
-      mean_update = tf_utils.smart_cond(
-          training,
-          lambda: _do_update(self.moving_mean, new_mean),
-          lambda: self.moving_mean)
-      variance_update = tf_utils.smart_cond(
-          training,
-          lambda: _do_update(self.moving_variance, new_variance),
-          lambda: self.moving_variance)
+      if distribution_strategy_context.in_cross_replica_context():
+        strategy = distribution_strategy_context.get_distribution_strategy()
+        def _do_update(var, value):
+          """Compute the updates for mean and variance."""
+          if in_eager_mode and not self.trainable:
+            return
+          return strategy.extended.update(
+              var, self._assign_moving_average, (value, self.momentum),
+              group=False)
+        # We need to unwrap the moving_mean or moving_variance in the case of
+        # training being false to match the output of true_fn and false_fn
+        # in the smart cond.
+        mean_update = tf_utils.smart_cond(
+            training,
+            lambda: _do_update(self.moving_mean, new_mean),
+            lambda: strategy.unwrap(self.moving_mean))
+        variance_update = tf_utils.smart_cond(
+            training,
+            lambda: _do_update(self.moving_variance, new_variance),
+            lambda: strategy.unwrap(self.moving_variance))
+      else:
+        def _do_update(var, value):
+          """Compute the updates for mean and variance."""
+          if in_eager_mode and not self.trainable:
+            return
+          return self._assign_moving_average(var, value, self.momentum)
+        mean_update = tf_utils.smart_cond(
+            training,
+            lambda: _do_update(self.moving_mean, new_mean),
+            lambda: self.moving_mean)
+        variance_update = tf_utils.smart_cond(
+            training,
+            lambda: _do_update(self.moving_variance, new_variance),
+            lambda: self.moving_variance)
       if not context.executing_eagerly():
         self.add_update(mean_update, inputs=True)
         self.add_update(variance_update, inputs=True)
-- 
GitLab


From d27dd98f3f48e860f0cdfebb32871ba994a8e87f Mon Sep 17 00:00:00 2001
From: Jonathan Hseu <jhseu@google.com>
Date: Tue, 18 Dec 2018 15:16:23 -0800
Subject: [PATCH 780/873] Raise the TypeError at the line of code that causes
 the issue.

PiperOrigin-RevId: 226068866
---
 tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
index e44b51f3e7..87a970f052 100644
--- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
+++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py
@@ -1672,7 +1672,7 @@ class _OutfeedHostCall(object):
               'Exception while calling %s: %s. It is likely the tensors '
               '(%s[1]) do not match the '
               'function\'s arguments', name, e, name)
-          raise e
+          raise
     return ret
 
   def record(self, host_calls):
@@ -1805,7 +1805,7 @@ class _OutfeedHostCall(object):
                 'Exception while calling %s: %s. It is likely the tensors '
                 '(%s[1]) do not match the '
                 'function\'s arguments', name, e, name)
-            raise e
+            raise
         else:
           ret[name] = self._host_fns[name](*dequeue_ops)
 
-- 
GitLab


From cb53fa065d0cb65e1189bc44129a18943e554c94 Mon Sep 17 00:00:00 2001
From: Suharsh Sivakumar <suharshs@google.com>
Date: Tue, 18 Dec 2018 15:39:08 -0800
Subject: [PATCH 781/873] Add signed int8 hybrid execution for FullyConnected.

PiperOrigin-RevId: 226072298
---
 tensorflow/lite/kernels/fully_connected.cc    | 31 ++++++++++++----
 .../lite/kernels/fully_connected_test.cc      | 36 +++++++++++++++++--
 tensorflow/lite/toco/tflite/operator.cc       | 19 ++++++++--
 3 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/tensorflow/lite/kernels/fully_connected.cc b/tensorflow/lite/kernels/fully_connected.cc
index a1eecb284a..dfc9550ed6 100644
--- a/tensorflow/lite/kernels/fully_connected.cc
+++ b/tensorflow/lite/kernels/fully_connected.cc
@@ -132,13 +132,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // If we have to perform on-the-fly quantization (with quantized weights and
   // float inputs) first we need to quantize the inputs. Allocate a temporary
   // buffer to store the intermediate quantized values.
-  if (input->type == kTfLiteFloat32 && filter->type == kTfLiteUInt8) {
+  if (input->type == kTfLiteFloat32 &&
+      (filter->type == kTfLiteUInt8 || filter->type == kTfLiteInt8)) {
     TfLiteIntArrayFree(node->temporaries);
     node->temporaries = TfLiteIntArrayCreate(2);
     node->temporaries->data[0] = data->scratch_tensor_index;
 
     TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/0);
-    input_quantized->type = kTfLiteUInt8;
+    input_quantized->type = filter->type;
     input_quantized->allocation_type = kTfLiteArenaRw;
 
     // TODO(raziel): add this logic to ResizeTensor.
@@ -209,7 +210,8 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
                         TfLiteTensor* scaling_factors, TfLiteTensor* output) {
   // Check the types for this hybrid Op.
   TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32);
-  TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteUInt8);
+  TF_LITE_ENSURE(context,
+                 filter->type == kTfLiteUInt8 || filter->type == kTfLiteInt8);
   TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteFloat32);
   TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32);
 
@@ -241,7 +243,15 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
   // Quantize input from float to uint8 + quantization params (scaling factor).
   float unused_min, unused_max;
   float* scaling_factors_ptr = scaling_factors->data.f;
-  int8_t* quant_data = reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+  int8_t* quant_data;
+  int8_t* filter_data;
+  if (filter->type == kTfLiteUInt8) {
+    quant_data = reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+    filter_data = reinterpret_cast<int8_t*>(filter->data.uint8);
+  } else {
+    quant_data = input_quantized->data.int8;
+    filter_data = filter->data.int8;
+  }
 
   // Quantize each batch independently.
   for (int b = 0; b < batch_size; ++b) {
@@ -255,8 +265,8 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
 
   // Compute output += weight * quantized_input
   tensor_utils::MatrixBatchVectorMultiplyAccumulate(
-      reinterpret_cast<int8_t*>(filter->data.uint8), num_units, input_size,
-      quant_data, scaling_factors_ptr, batch_size, output->data.f,
+      filter_data, num_units, input_size, quant_data, scaling_factors_ptr,
+      batch_size, output->data.f,
       /*result_stride=*/1);
 
   // Apply activation function to floats.
@@ -452,6 +462,15 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                              "Unhandled fully-connected weights format");
         return kTfLiteError;
       }
+    case kTfLiteInt8:
+      if (params->weights_format == kTfLiteFullyConnectedWeightsFormatDefault) {
+        return EvalQuantized<kernel_type>(context, node, params, data, input,
+                                          filter, bias, output);
+      } else {
+        context->ReportError(context,
+                             "Unhandled fully-connected weights format");
+        return kTfLiteError;
+      }
     default:
       context->ReportError(context, "Type %d not currently supported.",
                            filter->type);
diff --git a/tensorflow/lite/kernels/fully_connected_test.cc b/tensorflow/lite/kernels/fully_connected_test.cc
index 3351a30b12..d1d29fc7e6 100644
--- a/tensorflow/lite/kernels/fully_connected_test.cc
+++ b/tensorflow/lite/kernels/fully_connected_test.cc
@@ -296,6 +296,10 @@ class HybridFullyConnectedOpModel : public SingleOpModel {
     SymmetricQuantizeAndPopulate(weights_, data);
   }
 
+  void SetSignedWeights(std::initializer_list<float> f) {
+    SignedSymmetricQuantizeAndPopulate(weights_, f);
+  }
+
   void SetInput(const std::vector<float>& f) { PopulateTensor(input_, f); }
   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
 
@@ -577,11 +581,11 @@ TEST_P(QuantizedFullyConnectedOpTest,
   }
 }
 
-TEST(HybridFullyConnectedOpTest, SimpleTestQuantized) {
+TEST(HybridFullyConnectedOpTest, SimpleTestQuantizedUint8) {
   HybridFullyConnectedOpModel m(
       /*units=*/3, /*batches=*/2,
       /*input=*/{TensorType_FLOAT32, {2, 10}},
-      /*weights=*/{TensorType_UINT8, {3, 10}, -63.5, 64});  // PIE
+      /*weights=*/{TensorType_UINT8, {3, 10}, -63.5, 64});  // Hybrid
 
   m.SetWeights({
       1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
@@ -605,6 +609,34 @@ TEST(HybridFullyConnectedOpTest, SimpleTestQuantized) {
                                  /*max_abs_error=*/1.3f)));
 }
 
+TEST(HybridFullyConnectedOpTest, SimpleTestQuantizedInt8) {
+  HybridFullyConnectedOpModel m(
+      /*units=*/3, /*batches=*/2,
+      /*input=*/{TensorType_FLOAT32, {2, 10}},
+      /*weights=*/{TensorType_INT8, {3, 10}, -63.5, 64});  // Hybrid
+
+  m.SetSignedWeights({
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 0
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+      1, 2, 3, 4, 5, 6, 7, 8, 9, 10,  // u = 1
+  });
+  m.SetBias({1, 2, 3});
+
+  m.SetInput({
+      1, 2, 3, 4, 5, 6, 7, 8,  -9, -10,  // b = 0
+      1, 2, 3, 4, 5, 6, 7, -8, 9,  -10,  // b = 1
+  });
+
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear(
+                                 {
+                                     24, 25, 26,  //
+                                     58, 59, 60,  //
+                                 },
+                                 /*max_abs_error=*/1.3f)));
+}
+
 TEST_P(FloatFullyConnectedOpTest, SimpleTest4DInput) {
   // Note that it is not required that the first dimension be the number of
   // batches. All we care is that the input can be evenly distributed in
diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc
index c3cb4a1923..abfd370b86 100644
--- a/tensorflow/lite/toco/tflite/operator.cc
+++ b/tensorflow/lite/toco/tflite/operator.cc
@@ -406,8 +406,23 @@ class FullyConnected
   int GetVersion(const OperatorSignature& op_signature) const override {
     const auto& fc_op =
         static_cast<const FullyConnectedOperator&>(*op_signature.op);
-    return fc_op.weights_format == FullyConnectedWeightsFormat::kDefault ? 1
-                                                                         : 2;
+    if (fc_op.weights_format == FullyConnectedWeightsFormat::kDefault) {
+      return 1;
+    }
+    const string& input_name = op_signature.op->inputs[0];
+    const string& weights_name = op_signature.op->inputs[1];
+    const string& output_name = op_signature.op->outputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    const Array& weights_array = op_signature.model->GetArray(weights_name);
+    const Array& output_array = op_signature.model->GetArray(output_name);
+    // If the op is a signed int8 hybrid operation, we need to return
+    // version 3.
+    if (input_array.data_type == ArrayDataType::kFloat &&
+        weights_array.data_type == ArrayDataType::kInt8 &&
+        output_array.data_type == ArrayDataType::kFloat) {
+      return 3;
+    }
+    return 2;
   }
 };
 
-- 
GitLab


From 2efcb526d78bdae605ec7c923d85f2db8a77570c Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 18 Dec 2018 16:04:33 -0800
Subject: [PATCH 782/873] Update toolchain for arm.

PiperOrigin-RevId: 226075926
---
 third_party/toolchains/cpus/arm/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/third_party/toolchains/cpus/arm/BUILD b/third_party/toolchains/cpus/arm/BUILD
index 10c7867c23..00350cb341 100644
--- a/third_party/toolchains/cpus/arm/BUILD
+++ b/third_party/toolchains/cpus/arm/BUILD
@@ -32,6 +32,7 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = ":empty",
     supports_param_files = 1,
+    toolchain_identifier = "local_linux",
 )
 
 cc_toolchain(
@@ -46,4 +47,5 @@ cc_toolchain(
     static_runtime_libs = [":empty"],
     strip_files = "arm_linux_all_files",
     supports_param_files = 1,
+    toolchain_identifier = "arm-linux-gnueabihf",
 )
-- 
GitLab


From 44f117cecbdcea196ef7ccf93c01c6425ad3232a Mon Sep 17 00:00:00 2001
From: Ayush Dubey <ayushd@google.com>
Date: Tue, 18 Dec 2018 16:09:50 -0800
Subject: [PATCH 783/873] Add support for multi-node collectives in
 NcclManager.

NCCL 2 enables collective communication across workers.  This change introduces
a multi-worker capable NcclManager.  The main API change is to first generate a
ncclUniqueId wrapped in a communicator key, and then pass in this unique id to
every collective call.

NCCL works best (no deadlocks) if workers enqueue collectives on GPU streams in
the same order.  The NCCL manager callee can prepare multiple collectives
concurrently, but to achieve lockstep synchronization the callee needs to signal
that a collective is ready to execute across all workers in the same order.
This is exposed via SignalMultiNodeReady.

PiperOrigin-RevId: 226076894
---
 tensorflow/core/kernels/nccl_ops.cc       |  79 +++-
 tensorflow/core/nccl/nccl_manager.cc      | 499 ++++++++++++----------
 tensorflow/core/nccl/nccl_manager.h       | 178 ++++++--
 tensorflow/core/nccl/nccl_manager_test.cc | 299 ++++++++++---
 4 files changed, 710 insertions(+), 345 deletions(-)

diff --git a/tensorflow/core/kernels/nccl_ops.cc b/tensorflow/core/kernels/nccl_ops.cc
index 6fdeb22478..d3bdebfc87 100644
--- a/tensorflow/core/kernels/nccl_ops.cc
+++ b/tensorflow/core/kernels/nccl_ops.cc
@@ -91,9 +91,10 @@ class NcclAllReduceOpKernel : public NcclReduceOpBase {
       : NcclReduceOpBase(c) {}
 
   void ComputeAsync(OpKernelContext* c, DoneCallback done) override {
-    const Tensor* in_t = &c->input(0);
-    Tensor* out_t;
-    OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, in_t->shape(), &out_t), done);
+    const Tensor* input = &c->input(0);
+    Tensor* output;
+    OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, input->shape(), &output),
+                         done);
 
     auto actual_done = [c, done](Status s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
@@ -102,10 +103,17 @@ class NcclAllReduceOpKernel : public NcclReduceOpBase {
 
     auto* compute_stream = c->op_device_context()->stream();
     auto* gpu_info = c->device()->tensorflow_gpu_device_info();
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        compute_stream->parent(), compute_stream, gpu_info->event_mgr,
+        gpu_info->gpu_id, input, output, /*global_rank=*/-1,
+        std::move(actual_done));
     NcclManager::instance()->AddToAllReduce(
-        num_devices(), GetCollectiveKey(c), reduction_op(),
-        compute_stream->parent(), gpu_info->gpu_id, gpu_info->event_mgr,
-        compute_stream, in_t, out_t, std::move(actual_done));
+        std::move(participant),
+        {GetCollectiveKey(c),
+         /*num_local_devices=*/num_devices(),
+         /*num_global_devices=*/num_devices(),
+         /*communicator_key=*/""},
+        reduction_op());
   }
 };
 REGISTER_KERNEL_BUILDER(Name("NcclAllReduce").Device(DEVICE_GPU),
@@ -127,10 +135,17 @@ class NcclReduceSendKernel : public NcclReduceOpBase {
 
     auto* compute_stream = c->op_device_context()->stream();
     auto* gpu_info = c->device()->tensorflow_gpu_device_info();
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        compute_stream->parent(), compute_stream, gpu_info->event_mgr,
+        gpu_info->gpu_id, &c->input(0), /*output=*/nullptr, /*global_rank=*/-1,
+        std::move(actual_done));
     NcclManager::instance()->AddReduceSend(
-        num_devices(), GetCollectiveKey(c), reduction_op(),
-        compute_stream->parent(), gpu_info->gpu_id, gpu_info->event_mgr,
-        compute_stream, &c->input(0), std::move(actual_done));
+        std::move(participant),
+        {GetCollectiveKey(c),
+         /*num_local_devices=*/num_devices(),
+         /*num_global_devices=*/num_devices(),
+         /*communicator_key=*/""},
+        reduction_op());
   }
 };
 REGISTER_KERNEL_BUILDER(Name("_NcclReduceSend").Device(DEVICE_GPU),
@@ -145,9 +160,10 @@ class NcclReduceRecvKernel : public NcclReduceOpBase {
       : NcclReduceOpBase(c) {}
 
   void ComputeAsync(OpKernelContext* c, DoneCallback done) override {
-    const Tensor& in_t = c->input(0);
-    Tensor* out_t;
-    OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, in_t.shape(), &out_t), done);
+    const Tensor* input = &c->input(0);
+    Tensor* output;
+    OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, input->shape(), &output),
+                         done);
 
     auto actual_done = [c, done](Status s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
@@ -156,10 +172,17 @@ class NcclReduceRecvKernel : public NcclReduceOpBase {
 
     auto* compute_stream = c->op_device_context()->stream();
     auto* gpu_info = c->device()->tensorflow_gpu_device_info();
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        compute_stream->parent(), compute_stream, gpu_info->event_mgr,
+        gpu_info->gpu_id, input, output, /*global_rank=*/-1,
+        std::move(actual_done));
     NcclManager::instance()->AddReduceRecv(
-        num_devices(), GetCollectiveKey(c), reduction_op(),
-        compute_stream->parent(), gpu_info->gpu_id, gpu_info->event_mgr,
-        compute_stream, &in_t, out_t, std::move(actual_done));
+        std::move(participant),
+        {GetCollectiveKey(c),
+         /*num_local_devices=*/num_devices(),
+         /*num_global_devices=*/num_devices(),
+         /*communicator_key=*/""},
+        reduction_op());
   }
 
  private:
@@ -184,10 +207,15 @@ class NcclBroadcastSendKernel : public NcclAsyncOpBase {
 
     auto* compute_stream = c->op_device_context()->stream();
     auto* gpu_info = c->device()->tensorflow_gpu_device_info();
-    NcclManager::instance()->AddBroadcastSend(
-        num_devices(), GetCollectiveKey(c), compute_stream->parent(),
-        gpu_info->gpu_id, gpu_info->event_mgr, compute_stream, &c->input(0),
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        compute_stream->parent(), compute_stream, gpu_info->event_mgr,
+        gpu_info->gpu_id, &c->input(0), /*output=*/nullptr, /*global_rank=*/-1,
         std::move(actual_done));
+    NcclManager::instance()->AddBroadcastSend(
+        std::move(participant), {GetCollectiveKey(c),
+                                 /*num_local_devices=*/num_devices(),
+                                 /*num_global_devices=*/num_devices(),
+                                 /*communicator_key=*/""});
   }
 };
 REGISTER_KERNEL_BUILDER(Name("_NcclBroadcastSend").Device(DEVICE_GPU),
@@ -206,8 +234,8 @@ class NcclBroadcastRecvKernel : public NcclAsyncOpBase {
     TensorShape shape;
     OP_REQUIRES_OK_ASYNC(
         c, TensorShapeUtils::MakeShape(shape_t.vec<int32>(), &shape), done);
-    Tensor* out_t;
-    OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape, &out_t), done);
+    Tensor* output;
+    OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape, &output), done);
 
     auto actual_done = [c, done](Status s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
@@ -216,10 +244,15 @@ class NcclBroadcastRecvKernel : public NcclAsyncOpBase {
 
     auto* compute_stream = c->op_device_context()->stream();
     auto* gpu_info = c->device()->tensorflow_gpu_device_info();
-    NcclManager::instance()->AddBroadcastRecv(
-        num_devices(), GetCollectiveKey(c), compute_stream->parent(),
-        gpu_info->gpu_id, gpu_info->event_mgr, compute_stream, out_t,
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        compute_stream->parent(), compute_stream, gpu_info->event_mgr,
+        gpu_info->gpu_id, /*input=*/nullptr, output, /*global_rank=*/-1,
         std::move(actual_done));
+    NcclManager::instance()->AddBroadcastSend(
+        std::move(participant), {GetCollectiveKey(c),
+                                 /*num_local_devices=*/num_devices(),
+                                 /*num_global_devices=*/num_devices(),
+                                 /*communicator_key=*/""});
   }
 };
 REGISTER_KERNEL_BUILDER(
diff --git a/tensorflow/core/nccl/nccl_manager.cc b/tensorflow/core/nccl/nccl_manager.cc
index df49bf1b97..545284331d 100644
--- a/tensorflow/core/nccl/nccl_manager.cc
+++ b/tensorflow/core/nccl/nccl_manager.cc
@@ -82,14 +82,17 @@ struct NcclManager::CommunicatorMember {
 
 struct NcclManager::Communicator {
  public:
-  explicit Communicator(std::vector<CommunicatorMember> members)
-      : num_devices(members.size()), members(std::move(members)) {}
+  explicit Communicator(std::vector<CommunicatorMember> members,
+                        const string& key)
+      : num_devices(members.size()), members(std::move(members)), key(key) {}
 
   const int num_devices;
-  const std::vector<CommunicatorMember> members;  // indexed by rank.
+  const std::vector<CommunicatorMember> members;
+  const string key;
 };
 
 namespace {
+
 ncclDataType_t ToNcclType(DataType t) {
   switch (t) {
     case DT_HALF:
@@ -106,64 +109,46 @@ ncclDataType_t ToNcclType(DataType t) {
       return ncclFloat;
   }
 }
-}  // namespace
 
-// A participant in a Collective.  See <Collective> below.
-struct NcclManager::Participant {
-  Participant(const Tensor* in_t, Tensor* out_t, EventMgr* event_mgr,
-              se::Stream* tensor_stream, se::StreamExecutor* executor,
-              int gpu_device_id, NcclManager::DoneCallback done_callback)
-      : in_t(in_t),
-        out_t(out_t),
-        event_mgr(event_mgr),
-        tensor_stream(tensor_stream),
-        executor(executor),
-        gpu_device_id(gpu_device_id),
-        done_callback(std::move(done_callback)) {
-    DCHECK(executor != nullptr);
-    DCHECK(event_mgr != nullptr);
-    DCHECK(tensor_stream != nullptr);
+void StringToNcclUniqueId(const string& str_id, ncclUniqueId* nccl_id) {
+  if (str_id.size() == NCCL_UNIQUE_ID_BYTES) {
+    memcpy(nccl_id->internal, str_id.data(), NCCL_UNIQUE_ID_BYTES);
   }
-  // Owned by the caller, who must keep it live until <done_callback> is called.
-  // Is NULL for participants that only receive data.
-  const Tensor* in_t;
-
-  // Owned by the caller, who must keep it live until <done_callback> is called.
-  // Is NULL for participants that only send data.
-  Tensor* out_t;
-
-  // Owned by the caller, who must keep it live until <done_callback> is called.
-  EventMgr* const event_mgr;
-
-  // Owned by the caller, who must keep it live until <done_callback> is called.
-  se::Stream* const tensor_stream;
-
-  // Matches the executor in CommunicatorMember::stream. Expected to be live for
-  // process lifetime.
-  se::StreamExecutor* const executor = nullptr;
-
-  const int gpu_device_id;
-
-  NcclManager::DoneCallback done_callback;
+}
 
-  bool root = false;
-};
+}  // namespace
 
-// A Collective tracks a single communicator operation (e.g., a single
-// AllReduce call).
+// A `Collective` encapsulates state for a collective instance at one node.
+// Typically, an instance in TensorFlow context would be defined by a collective
+// group and the (step, frame iteration) for that execution.
+//
+// For each collective instance there will be one `Collective` object per node.
+// For example,  a NCCL collective that runs on a single node with 4 GPUs would
+// have a single `Collective` per step.  However, a collective that executes on
+// 3 nodes with 4 GPUs each would have a `Collective` per node, each of which is
+// tracking the 4 GPUs local to that node.
 struct NcclManager::Collective {
   Collective(DataType data_type_in, CollectiveType type_in,
-             ncclRedOp_t reduction_op_in, int num_devices)
+             ncclRedOp_t reduction_op_in, int num_local_devices_in,
+             int num_global_devices_in, const string& communicator_key_in)
       : data_type(data_type_in),
         type(type_in),
         reduction_op(reduction_op_in),
-        remaining_participants(num_devices) {
-    participants.reserve(num_devices);
+        num_local_devices(num_local_devices_in),
+        num_global_devices(num_global_devices_in),
+        single_node(num_local_devices_in == num_global_devices_in),
+        communicator_key(communicator_key_in),
+        remaining_participants(num_local_devices_in) {
+    participants.reserve(num_local_devices_in);
   }
 
   const DataType data_type;
   const CollectiveType type;
   const ncclRedOp_t reduction_op;  // applies when <type> is a reduction.
+  const int num_local_devices;     // devices local to this node
+  const int num_global_devices;    // devices across all nodes
+  const bool single_node;          // true if all devices are at one node
+  const string communicator_key;
 
   Communicator* communicator = nullptr;
 
@@ -178,12 +163,20 @@ struct NcclManager::Collective {
   int root_rank = -1;
 
   // How many participants have been registered so far. The Collective is
-  // eligible for running with <available_participants> == participants.size().
+  // eligible for running with <available_participants> == num_local_devices.
+  //
+  // If this is a multi-node collective, we additionally have to synchronize
+  // across nodes.  The caller would need to signal multi node readiness by
+  // calling NcclManager::SignalMultiNodeReady, which sets `multi_node_ready` to
+  // true.
   //
   // Guarded by the mutex of the containing Communicator.
   int available_participants = 0;
+  bool multi_node_ready = false;
 
   mutable std::atomic_int_fast32_t remaining_participants;
+
+  Status status;
 };
 
 NcclManager::NcclManager() {}
@@ -193,6 +186,12 @@ NcclManager* NcclManager::instance() {
   return instance;
 }
 
+string NcclManager::GenerateCommunicatorKey() {
+  ncclUniqueId nccl_id;
+  ncclGetUniqueId(&nccl_id);
+  return string(nccl_id.internal, NCCL_UNIQUE_ID_BYTES);
+}
+
 Status NcclManager::GetCommunicator(NcclManager::Collective* collective,
                                     NcclManager::Communicator** communicator) {
   // Sort by executor to make ordering of executors deterministic.
@@ -201,39 +200,60 @@ Status NcclManager::GetCommunicator(NcclManager::Collective* collective,
                const std::unique_ptr<Participant>& b) {
               return a->executor < b->executor;
             });
-  const int num_devices = collective->participants.size();
 
   mutex_lock l(mu_);
 
-  // Scan to find an existing communicator that provides nccl communication
-  // between the executors used by the participants in the collective. For
-  // example, if a collective is for GPUs 0, 1, and 2 then this will scan
-  // to find the communicator for GPUs 0, 1, and 2.
-  //
-  // Note that each executor identifies a context on one device, so this is the
-  // same as getting the communicator connecting the devices in the collective.
-  // A device can be in different communicators as well - for example, a
-  // communicator for GPUs 0 and 1 is separate from one for GPUs 0, 1, and 2.
-  //
-  // Since it's expected that a small number of distinct communicators will
-  // be needed, communicators_ is not garbage collected currently.
-  //
-  // Launching of kernels must be serialized so that, given collectives A and B,
-  // and an order of them (e.g., A before B), then for each comm_stream
-  // involved, the kernel for A is launched before the kernel for B. This is
-  // guaranteed currently be a global mutex controlling additions of the kernels
-  // to per-stream launch queues.  The launch queues are processed by
-  // LoopKernelLaunches.
-  for (auto& comm : communicators_) {
-    if (comm->num_devices == num_devices) {
-      int i;
-      for (i = 0; i < num_devices; ++i) {
-        if (comm->members[i].nccl_stream->executor !=
-            collective->participants[i]->executor) {
-          break;
+  if (collective->single_node) {
+    // For single-node collectives, we identify a communicator uniquely by the
+    // set of devices participating in the collective.  For example, if a
+    // collective is for GPUs 0, 1, and 2 then this will scan to find the
+    // communicator for GPUs 0, 1, and 2.
+    //
+    // Note that each executor identifies a context on one device, so this is
+    // the same as getting the communicator connecting the devices in the
+    // collective. A device can be in different communicators as well - for
+    // example, a communicator for GPUs 0 and 1 is separate from one for GPUs 0,
+    // 1, and 2.
+    //
+    // Since it's expected that a small number of distinct communicators will
+    // be needed, communicators_ is not garbage collected currently.
+    //
+    // Launching of kernels must be serialized so that, given collectives A and
+    // B, and an order of them (e.g., A before B), then for each comm_stream
+    // involved, the kernel for A is launched before the kernel for B. This is
+    // guaranteed currently be a global mutex controlling additions of the
+    // kernels to per-stream launch queues.  The launch queues are processed by
+    // LoopKernelLaunches.
+    for (auto& comm : communicators_) {
+      if (comm->num_devices == collective->num_global_devices) {
+        int i;
+        for (i = 0; i < collective->num_local_devices; ++i) {
+          if (comm->members[i].nccl_stream->executor !=
+              collective->participants[i]->executor) {
+            break;
+          }
+        }
+        if (i == collective->num_local_devices) {
+          *communicator = comm.get();
+          return Status::OK();
         }
       }
-      if (i == num_devices) {
+    }
+  } else {
+#if NCCL_MAJOR < 2
+    return errors::Internal(
+        "Cannot use multi-node NCCL collectives with NCCL 1.x");
+#endif
+    if (collective->communicator_key.size() != NCCL_UNIQUE_ID_BYTES) {
+      return errors::Internal("Expected communicator_key of size ",
+                              NCCL_UNIQUE_ID_BYTES, " but found size ",
+                              collective->communicator_key.size());
+    }
+    // This is an instance of multi-node collective.  We have previously
+    // created a NCCL unique id and shared with all workers.  Now we find the
+    // `Communicator` corresponding to this id.
+    for (auto& comm : communicators_) {
+      if (comm->key == collective->communicator_key) {
         *communicator = comm.get();
         return Status::OK();
       }
@@ -246,9 +266,9 @@ Status NcclManager::GetCommunicator(NcclManager::Collective* collective,
   // Create and initialize a new communicator.
   // Note that this is done under the lock; performance is not expected to
   // matter as this happens a very small number of times.
-  std::vector<CommunicatorMember> members(num_devices);
-  std::vector<int> devices(num_devices);
-  for (int i = 0; i < num_devices; ++i) {
+  std::vector<CommunicatorMember> members(collective->num_local_devices);
+  std::vector<int> devices(collective->num_local_devices);
+  for (int i = 0; i < collective->num_local_devices; ++i) {
     auto* executor = collective->participants[i]->executor;
 
     // Find a communication stream to use for the device.
@@ -278,164 +298,209 @@ Status NcclManager::GetCommunicator(NcclManager::Collective* collective,
     devices[i] = collective->participants[i]->gpu_device_id;
   }
 
-  int device_count = num_devices;
+  std::vector<ncclComm_t> nccl_comms(collective->num_local_devices);
 #if NCCL_MAJOR >= 2
-  // NCCL2 prevents InitAll for more communicators than devices (but doesn't
-  // check that device ids are unique). Work around it by initializing each
-  // rank individually.
-  CUDA_RETURN_IF_ERROR(cudaGetDeviceCount(&device_count));
-#endif
-  std::vector<ncclComm_t> nccl_comms(num_devices);
-  if (num_devices <= device_count) {
-    NCCL_RETURN_IF_ERROR(
-        ncclCommInitAll(nccl_comms.data(), num_devices, devices.data()));
+  // For NCCL 2, we always initialize using ncclCommInitRank guarded by NCCL
+  // group primitives.
+  ncclUniqueId nccl_id;
+  if (collective->single_node) {
+    NCCL_RETURN_IF_ERROR(ncclGetUniqueId(&nccl_id));
   } else {
-    int savedDevice = 0;
-    CUDA_RETURN_IF_ERROR(cudaGetDevice(&savedDevice));
-    ncclUniqueId commId;
-    NCCL_RETURN_IF_ERROR(ncclGetUniqueId(&commId));
-#if NCCL_MAJOR >= 2
-    NCCL_RETURN_IF_ERROR(ncclGroupStart());
-#endif
-    for (int rank = 0; rank < num_devices; ++rank) {
-      CUDA_RETURN_IF_ERROR(cudaSetDevice(devices[rank]));
-      NCCL_RETURN_IF_ERROR(ncclCommInitRank(nccl_comms.data() + rank,
-                                            num_devices, commId, rank));
-    }
-#if NCCL_MAJOR >= 2
-    NCCL_RETURN_IF_ERROR(ncclGroupEnd());
-#endif
-    CUDA_RETURN_IF_ERROR(cudaSetDevice(savedDevice));
+    StringToNcclUniqueId(collective->communicator_key, &nccl_id);
   }
-  for (int rank = 0; rank < num_devices; ++rank) {
-    members[rank].nccl_comm = nccl_comms[rank];
+  int saved_device = 0;
+  CUDA_RETURN_IF_ERROR(cudaGetDevice(&saved_device));
+  NCCL_RETURN_IF_ERROR(ncclGroupStart());
+  for (int i = 0; i < collective->num_local_devices; ++i) {
+    const int rank =
+        collective->single_node ? i : collective->participants[i]->global_rank;
+    CUDA_RETURN_IF_ERROR(cudaSetDevice(devices[i]));
+    NCCL_RETURN_IF_ERROR(ncclCommInitRank(
+        nccl_comms.data() + i, collective->num_global_devices, nccl_id, rank));
   }
-  communicators_.emplace_back(new Communicator(std::move(members)));
+  NCCL_RETURN_IF_ERROR(ncclGroupEnd());
+  CUDA_RETURN_IF_ERROR(cudaSetDevice(saved_device));
+#else
+  // Since NCCL 1 is single node only, we use ncclCommInitAll.  We could have
+  // used ncclCommInitRank with NCCL 1 as well, but then we would have to
+  // issue each init call from a different thread
+  // (https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/nccl1.html).
+  NCCL_RETURN_IF_ERROR(ncclCommInitAll(
+      nccl_comms.data(), collective->num_local_devices, devices.data()));
+#endif
+
+  for (int i = 0; i < collective->num_local_devices; ++i) {
+    members[i].nccl_comm = nccl_comms[i];
+  }
+  communicators_.emplace_back(
+      new Communicator(std::move(members), collective->communicator_key));
   *communicator = communicators_.back().get();
   return Status::OK();
 }
 
-void NcclManager::AddToAllReduce(int num_devices, const string& key,
-                                 ncclRedOp_t reduction_op,
-                                 se::StreamExecutor* executor,
-                                 int gpu_device_id, EventMgr* event_mgr,
-                                 se::Stream* tensor_stream, const Tensor* in_t,
-                                 Tensor* out_t,
-                                 const DoneCallback& done_callback) {
-  std::unique_ptr<Participant> participant(
-      new Participant(in_t, out_t, event_mgr, tensor_stream, executor,
-                      gpu_device_id, done_callback));
-  AddParticipant(num_devices, key, std::move(participant), in_t->dtype(),
-                 kAllReduce, reduction_op);
+void NcclManager::AddToAllReduce(std::unique_ptr<Participant> participant,
+                                 const Context& context,
+                                 ncclRedOp_t reduction_op) {
+  AddParticipant(std::move(participant), context, kAllReduce, reduction_op);
 }
 
-void NcclManager::AddBroadcastSend(int num_devices, const string& key,
-                                   se::StreamExecutor* executor,
-                                   int gpu_device_id, EventMgr* event_mgr,
-                                   se::Stream* tensor_stream,
-                                   const Tensor* in_t,
-                                   DoneCallback done_callback) {
-  std::unique_ptr<Participant> participant(
-      new Participant(in_t, nullptr /* out_t */, event_mgr, tensor_stream,
-                      executor, gpu_device_id, std::move(done_callback)));
+void NcclManager::AddBroadcastSend(std::unique_ptr<Participant> participant,
+                                   const Context& context) {
   participant->root = true;
-  AddParticipant(num_devices, key, std::move(participant), in_t->dtype(),
-                 kBroadcast, ncclSum /* unused */);
+  AddParticipant(std::move(participant), context, kBroadcast,
+                 ncclSum /* unused */);
 }
 
-void NcclManager::AddBroadcastRecv(int num_devices, const string& key,
-                                   se::StreamExecutor* executor,
-                                   int gpu_device_id, EventMgr* event_mgr,
-                                   se::Stream* tensor_stream, Tensor* out_t,
-                                   DoneCallback done_callback) {
-  std::unique_ptr<Participant> participant(
-      new Participant(nullptr /* in_t */, out_t, event_mgr, tensor_stream,
-                      executor, gpu_device_id, std::move(done_callback)));
-  AddParticipant(num_devices, key, std::move(participant), out_t->dtype(),
-                 kBroadcast, ncclSum /* unused */);
+void NcclManager::AddBroadcastRecv(std::unique_ptr<Participant> participant,
+                                   const Context& context) {
+  AddParticipant(std::move(participant), context, kBroadcast,
+                 ncclSum /* unused */);
 }
 
-void NcclManager::AddReduceSend(int num_devices, const string& key,
-                                ncclRedOp_t reduction_op,
-                                se::StreamExecutor* executor, int gpu_device_id,
-                                EventMgr* event_mgr, se::Stream* tensor_stream,
-                                const Tensor* in_t,
-                                DoneCallback done_callback) {
-  std::unique_ptr<Participant> participant(
-      new Participant(in_t, nullptr /* out_t */, event_mgr, tensor_stream,
-                      executor, gpu_device_id, std::move(done_callback)));
-  AddParticipant(num_devices, key, std::move(participant), in_t->dtype(),
-                 kReduce, reduction_op);
+void NcclManager::AddReduceSend(std::unique_ptr<Participant> participant,
+                                const Context& context,
+                                ncclRedOp_t reduction_op) {
+  AddParticipant(std::move(participant), context, kReduce, reduction_op);
 }
 
-void NcclManager::AddReduceRecv(int num_devices, const string& key,
-                                ncclRedOp_t reduction_op,
-                                se::StreamExecutor* executor, int gpu_device_id,
-                                EventMgr* event_mgr, se::Stream* tensor_stream,
-                                const Tensor* in_t, Tensor* out_t,
-                                DoneCallback done_callback) {
-  std::unique_ptr<Participant> participant(
-      new Participant(in_t, out_t, event_mgr, tensor_stream, executor,
-                      gpu_device_id, std::move(done_callback)));
-  participant->root = true;
-  AddParticipant(num_devices, key, std::move(participant), in_t->dtype(),
-                 kReduce, reduction_op);
+void NcclManager::AddReduceRecv(std::unique_ptr<Participant> participant,
+                                const Context& context,
+                                ncclRedOp_t reduction_op) {
+  AddParticipant(std::move(participant), context, kReduce, reduction_op);
 }
 
-void NcclManager::AddParticipant(int num_devices, const string& key,
-                                 std::unique_ptr<Participant> participant,
-                                 DataType data_type,
+void NcclManager::SignalMultiNodeReady(const string& collective_key) {
+  Collective* to_run = nullptr;
+  {
+    mutex_lock l(mu_);
+    auto collective_it = collectives_.find(collective_key);
+    if (collective_it != collectives_.end()) {
+      Collective* collective = collective_it->second.get();
+      collective->multi_node_ready = true;
+      to_run = CheckReady(collective_key, collective);
+    }
+  }
+
+  if (to_run != nullptr) RunCollective(to_run);
+}
+
+void NcclManager::AddParticipant(std::unique_ptr<Participant> participant,
+                                 const Context& context,
                                  CollectiveType collective_type,
                                  ncclRedOp_t reduction_op) {
   Collective* to_run = nullptr;
+  const DataType data_type = participant->input->dtype();
   {
     mutex_lock l(mu_);
-    auto& collective_ptr = collectives_[key];
-    if (collective_ptr == nullptr) {
-      collective_ptr.reset(new Collective(data_type, collective_type,
-                                          reduction_op, num_devices));
+    auto collective_it = collectives_.find(context.collective_key);
+    Collective* collective = nullptr;
+    if (collective_it == collectives_.end()) {
+      auto collective_unique_ptr = absl::make_unique<Collective>(
+          data_type, collective_type, reduction_op, context.num_local_devices,
+          context.num_global_devices, context.communicator_key);
+      collective = collective_unique_ptr.get();
+      collectives_.emplace(context.collective_key,
+                           std::move(collective_unique_ptr));
+    } else {
+      collective = collective_it->second.get();
+    }
+
+    // Check `collective` is correct and consistent.
+    if (collective->status.ok() && collective->single_node &&
+        !collective->communicator_key.empty()) {
+      collective->status =
+          errors::Internal("Collective ", reduction_op,
+                           " is single node but has communicator_key of size ",
+                           collective->communicator_key.size());
+    }
+    if (collective->status.ok() && collective->communicator_key.size() !=
+                                       context.communicator_key.size()) {
+      collective->status =
+          errors::Internal("Collective ", reduction_op,
+                           " mismatch in member communicator_key with size ",
+                           collective->communicator_key.size(),
+                           " and arg communicator_key with size ",
+                           context.communicator_key.size());
+    }
+    if (collective->status.ok() && collective->type != collective_type) {
+      collective->status = errors::Internal(
+          "Collective ", reduction_op, " previously initialized with type ",
+          collective->type, " but now got type ", collective_type);
     }
-    Collective* collective = collective_ptr.get();
-    DCHECK_EQ(collective->type, collective_type);
-    DCHECK_LT(collective->participants.size(), num_devices);
+    if (collective->status.ok() &&
+        collective->num_global_devices != context.num_global_devices) {
+      collective->status =
+          errors::Internal("Collective ", reduction_op,
+                           " previously initialized with num_global_devices ",
+                           collective->num_global_devices, " but now got ",
+                           context.num_global_devices);
+    }
+    if (collective->status.ok() &&
+        collective->num_local_devices != context.num_local_devices) {
+      collective->status =
+          errors::Internal("Collective ", reduction_op,
+                           "previously initialized with num_local_devices ",
+                           collective->num_local_devices, " but now got ",
+                           context.num_local_devices);
+    }
+    if (collective->status.ok() &&
+        collective->participants.size() >= collective->num_local_devices) {
+      collective->status = errors::Internal(
+          "Collective ", reduction_op, " expected ",
+          collective->num_local_devices, " participants but now has ",
+          collective->participants.size(),
+          " with one more participant being added");
+    }
+
     collective->participants.emplace_back(std::move(participant));
     ++collective->available_participants;
 
-    if (collective->available_participants == num_devices) {
-      to_run = collective;
-
-      // Ownership is going to be transferred to RunCollective.
-      collective_ptr.release();
-      collectives_.erase(key);
-    }
+    to_run = CheckReady(context.collective_key, collective);
   }
 
-  if (to_run != nullptr) {
-    RunCollective(key, to_run);
+  if (to_run != nullptr) RunCollective(to_run);
+}
+
+NcclManager::Collective* NcclManager::CheckReady(const string& collective_key,
+                                                 Collective* collective) {
+  Collective* to_run = nullptr;
+  if (collective->available_participants == collective->num_local_devices) {
+    if (collective->num_global_devices == collective->num_local_devices ||
+        collective->multi_node_ready) {
+      // Ownership transferred to callee.
+      to_run = collective;
+      auto collectives_it = collectives_.find(collective_key);
+      collectives_it->second.release();
+      collectives_.erase(collectives_it);
+    }
   }
+  return to_run;
 }
 
-void NcclManager::RunCollective(const string& key, Collective* collective) {
+void NcclManager::RunCollective(Collective* collective) {
   static mutex collective_mu(LINKER_INITIALIZED);
 
-  Communicator* communicator = nullptr;
-  const int size = static_cast<int>(collective->participants.size());
-  Status s = GetCommunicator(collective, &communicator);
+  Status s = collective->status;
+  if (s.ok()) {
+    s = GetCommunicator(collective, &collective->communicator);
+  }
   if (!s.ok()) {
-    for (int i = 0; i < size; ++i) {
+    for (int i = 0; i < collective->num_local_devices; ++i) {
       collective->participants[i]->done_callback(s);
     }
     delete collective;
     return;
   }
 
-  collective->communicator = communicator;
-  for (int rank = 0; rank < size; ++rank) {
-    Participant* p = collective->participants[rank].get();
-    NcclStream* nccl_stream = communicator->members[rank].nccl_stream;
+  for (int local_rank = 0; local_rank < collective->num_local_devices;
+       ++local_rank) {
+    Participant* p = collective->participants[local_rank].get();
+    NcclStream* nccl_stream =
+        collective->communicator->members[local_rank].nccl_stream;
     CHECK(nccl_stream != nullptr);
+    const int rank = collective->single_node ? local_rank : p->global_rank;
 
-    if (p->in_t != nullptr) {
+    if (p->input != nullptr) {
       // Wait to ensure that the kernel that produces the data in the input
       // tensor has finished running before the nccl kernel runs on the
       // communication stream.
@@ -457,11 +522,13 @@ void NcclManager::RunCollective(const string& key, Collective* collective) {
     // Note that it would be possible to run multiple collectives at once, if
     // they have non-intersecting sets of devices.
     mutex_lock l(collective_mu);
-    for (int rank = 0; rank < size; ++rank) {
-      NcclStream* nccl_stream = communicator->members[rank].nccl_stream;
+    for (int local_rank = 0; local_rank < collective->num_local_devices;
+         ++local_rank) {
+      NcclStream* nccl_stream =
+          collective->communicator->members[local_rank].nccl_stream;
       mutex_lock l(nccl_stream->mu);
       nccl_stream->pending_launches_.push_front(
-          std::make_pair(collective, rank));
+          std::make_pair(collective, local_rank));
       nccl_stream->cv.notify_all();
     }
   }
@@ -489,37 +556,41 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) {
       nccl_stream->pending_launches_.pop_back();
     }
     Collective* collective = next_launch.first;
-    int rank = next_launch.second;
+    int local_rank = next_launch.second;
 
     // Launch the nccl kernel.
     ncclDataType_t data_type = ToNcclType(collective->data_type);
-    Participant* p = collective->participants[rank].get();
+    Participant* p = collective->participants[local_rank].get();
 
-    auto nccl_comm = collective->communicator->members[rank].nccl_comm;
+    auto nccl_comm = collective->communicator->members[local_rank].nccl_comm;
     ncclResult_t nccl_result = ncclSuccess;
     switch (collective->type) {
       case kAllReduce: {
-        const void* sendbuff = p->in_t->tensor_data().data();
-        void* recvbuff = const_cast<char*>(p->out_t->tensor_data().data());
-
-        nccl_result =
-            ncclAllReduce(sendbuff, recvbuff, p->in_t->NumElements(), data_type,
-                          collective->reduction_op, nccl_comm, *cu_stream);
+        const void* sendbuff = p->input->tensor_data().data();
+        void* recvbuff = const_cast<char*>(p->output->tensor_data().data());
+
+        VLOG(2) << "call NcclAllReduce participant " << local_rank
+                << " sendbuff " << sendbuff << " recvbuff " << recvbuff
+                << " nccl_comm " << nccl_comm << " comm_stream " << comm_stream
+                << " cuda_stream " << cu_stream;
+        nccl_result = ncclAllReduce(sendbuff, recvbuff, p->input->NumElements(),
+                                    data_type, collective->reduction_op,
+                                    nccl_comm, *cu_stream);
         break;
       }
       case kBroadcast: {
-        const Tensor* buf_t = p->in_t ? p->in_t : p->out_t;
+        const Tensor* buf_t = p->input ? p->input : p->output;
         void* buf = const_cast<char*>(buf_t->tensor_data().data());
         nccl_result = ncclBcast(buf, buf_t->NumElements(), data_type,
                                 collective->root_rank, nccl_comm, *cu_stream);
         break;
       }
       case kReduce: {
-        const void* sendbuff = p->in_t->tensor_data().data();
-        void* recvbuff = p->out_t
-                             ? const_cast<char*>(p->out_t->tensor_data().data())
-                             : nullptr;
-        nccl_result = ncclReduce(sendbuff, recvbuff, p->in_t->NumElements(),
+        const void* sendbuff = p->input->tensor_data().data();
+        void* recvbuff =
+            p->output ? const_cast<char*>(p->output->tensor_data().data())
+                      : nullptr;
+        nccl_result = ncclReduce(sendbuff, recvbuff, p->input->NumElements(),
                                  data_type, collective->reduction_op,
                                  collective->root_rank, nccl_comm, *cu_stream);
         break;
@@ -527,13 +598,13 @@ void NcclManager::LoopKernelLaunches(NcclStream* nccl_stream) {
     }
 
     // Run the done_callback when the nccl kernel finishes running.
-    auto done_callback = [collective, rank, nccl_result]() {
+    auto done_callback = [collective, local_rank, nccl_result]() {
       if (nccl_result == ncclSuccess) {
-        collective->participants[rank]->done_callback(Status::OK());
+        collective->participants[local_rank]->done_callback(Status::OK());
       } else {
         // Propagate the error, but note that if other members of the collective
         // did launch their kernels, then they are hanging.
-        collective->participants[rank]->done_callback(errors::Unknown(
+        collective->participants[local_rank]->done_callback(errors::Unknown(
             "Error invoking NCCL: ", ncclGetErrorString(nccl_result)));
       }
 
diff --git a/tensorflow/core/nccl/nccl_manager.h b/tensorflow/core/nccl/nccl_manager.h
index 5da4fe5554..6ac5deb91a 100644
--- a/tensorflow/core/nccl/nccl_manager.h
+++ b/tensorflow/core/nccl/nccl_manager.h
@@ -35,7 +35,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-// The communicator is used to make the asynchronous communicator calls and to
+// NCCL manager is used to make the asynchronous communicator calls and to
 // manage the per-device streams used for communication.
 //
 // See nccl_ops.cc for example usage, including description of memory
@@ -48,48 +48,122 @@ class NcclManager {
 
   static NcclManager* instance();
 
-  // Add one participant to an all-reduce, sending in data from <in_t> and
-  // receiving the result of the all-reduce in <out_t>.  The device for this
-  // participant is managed by <executor>, and its events are polled by
-  // <event_mgr>.
-  //
-  // This is an asynchronous call. When <done_callback> is called, <out_t> has
-  // been set to the all-reduce result (note: the stream may not yet have been
-  // synced).
-  //
-  // <tensor_stream> is the stream that should be waited on to ensure <in_t>'s
-  // data is available on the GPU for the communication stream to access. It
-  // is also the stream that will use the produced data; <done_callback> is
-  // not called until the next kernel launched on <stream> would see the data.
-  void AddToAllReduce(int num_devices, const string& key,
-                      ncclRedOp_t reduction_op, se::StreamExecutor* executor,
-                      int gpu_device_id, EventMgr* event_mgr,
-                      se::Stream* tensor_stream, const Tensor* in_t,
-                      Tensor* out_t, const DoneCallback& done_callback);
-
-  // AddBroadcastSend and AddBroadcastRecv combine to sent data from one sender
+  // Calls `ncclGetUniqueId` and returns the id as a string.  The returned value
+  // may be shared with other participants on different nodes and passed in to
+  // multi-node collective invocations.
+  string GenerateCommunicatorKey();
+
+  // A participant in a Collective.
+  struct Participant {
+    Participant(se::StreamExecutor* executor, se::Stream* tensor_stream,
+                EventMgr* event_mgr, int gpu_device_id, const Tensor* input,
+                Tensor* output, int global_rank, DoneCallback done_callback)
+        : executor(executor),
+          tensor_stream(tensor_stream),
+          event_mgr(event_mgr),
+          gpu_device_id(gpu_device_id),
+          input(input),
+          output(output),
+          global_rank(global_rank),
+          done_callback(std::move(done_callback)),
+          root(false) {
+      DCHECK(executor != nullptr);
+      DCHECK(event_mgr != nullptr);
+      DCHECK(tensor_stream != nullptr);
+    }
+
+    // StreamExecutor for the device. Expected to be live for process lifetime.
+    se::StreamExecutor* const executor = nullptr;
+
+    // `tensor_stream` is the stream that should be waited on to ensure
+    // `input`'s data is available on the GPU for the communication stream to
+    // access. It is also the stream that will use the produced data;
+    // `done_callback` is not called until the next kernel launched on `stream`
+    // would see the data. Owned by the caller, who must keep it live until
+    // `done_callback` is called.
+    se::Stream* const tensor_stream;
+
+    // EventMgr which polls on executor.
+    // Owned by the caller, who must keep it live until `done_callback` is
+    // called.
+    EventMgr* const event_mgr;
+
+    const int gpu_device_id;
+
+    // Owned by the caller, who must keep it live until `done_callback` is
+    // called. Is NULL for participants that only receive data.
+    const Tensor* input;
+
+    // Owned by the caller, who must keep it live until `done_callback` is
+    // called. Is NULL for participants that only send data.
+    Tensor* output;
+
+    // Rank across all devices and all nodes.
+    // `global_rank` is not required for single-node collectives.
+    const int global_rank;
+
+    // The callback which is called at the completion of the NCCL operation.
+    // When called, `output` has been set to the result of the operation. (note:
+    // the stream may not yet have been synced)
+    DoneCallback done_callback;
+
+    // True if this is the root of the collective, e.g. source of broadcast.
+    bool root;
+  };
+
+  // Data that provides context for the collective operation, including the
+  // operation key, number of participants, and communicator key.
+  struct Context {
+    Context(const string& collective_key, int num_local_devices,
+            int num_global_devices, const string& communicator_key)
+        : collective_key(collective_key),
+          num_local_devices(num_local_devices),
+          num_global_devices(num_global_devices),
+          communicator_key(communicator_key) {}
+
+    // Unique key for this collective instance
+    const string& collective_key;
+
+    // Devices local to this node
+    int num_local_devices;
+
+    // Devices across all nodes
+    int num_global_devices;
+
+    // In order to use NCCL across nodes, the callee first has to generate a
+    // `communicator_key` via `GenerateCommunicatorKey()` function and share
+    // this with all the other nodes.  Each node should pass in this
+    // `communicator_key` to the `NcclManager` functions.
+    // `communicator_key` is not required for single-node collectives and can be
+    // empty.
+    const string& communicator_key;
+  };
+
+  // Adds one participant to an all-reduce.
+  void AddToAllReduce(std::unique_ptr<Participant> participant,
+                      const Context& context, ncclRedOp_t reduction_op);
+
+  // AddBroadcastSend and AddBroadcastRecv combine to send data from one sender
   // to all receivers.
-  void AddBroadcastSend(int num_devices, const string& key,
-                        se::StreamExecutor* executor, int gpu_device_id,
-                        EventMgr* event_mgr, se::Stream* tensor_stream,
-                        const Tensor* in_t, DoneCallback done_callback);
-  void AddBroadcastRecv(int num_devices, const string& key,
-                        se::StreamExecutor* executor, int gpu_device_id,
-                        EventMgr* event_mgr, se::Stream* tensor_stream,
-                        Tensor* out_t, DoneCallback done_callback);
-
-  // AddReduceSend and AddReduceRecv combine to sent data from all senders
+  void AddBroadcastSend(std::unique_ptr<Participant> participant,
+                        const Context& context);
+  void AddBroadcastRecv(std::unique_ptr<Participant> participant,
+                        const Context& context);
+
+  // AddReduceSend and AddReduceRecv combine to send data from all senders
   // to one receiver.
-  void AddReduceSend(int num_devices, const string& key,
-                     ncclRedOp_t reduction_op, se::StreamExecutor* executor,
-                     int gpu_device_id, EventMgr* event_mgr,
-                     se::Stream* tensor_stream, const Tensor* in_t,
-                     DoneCallback done_callback);
-  void AddReduceRecv(int num_devices, const string& key,
-                     ncclRedOp_t reduction_op, se::StreamExecutor* executor,
-                     int gpu_device_id, EventMgr* event_mgr,
-                     se::Stream* tensor_stream, const Tensor* in_t,
-                     Tensor* out_t, DoneCallback done_callback);
+  void AddReduceSend(std::unique_ptr<Participant> participant,
+                     const Context& context, ncclRedOp_t reduction_op);
+  void AddReduceRecv(std::unique_ptr<Participant> participant,
+                     const Context& context, ncclRedOp_t reduction_op);
+
+  // Signals that the `Collective` corresponding to `key` is ready to launch
+  // across all nodes participating in this multi-node collective operation.
+  //
+  // This should only be called for multi-node collectives; single-node
+  // collectives are implicitly ready when all participants have called Add*
+  // function.
+  void SignalMultiNodeReady(const string& collective_key);
 
  private:
   enum CollectiveType {
@@ -101,7 +175,6 @@ class NcclManager {
   struct Communicator;
   struct CommunicatorMember;
   struct NcclStream;
-  struct Participant;
 
   // Gets the `Communicator` object that will be used to enqueue NCCL kernels
   // for `collective`, and returns it via `communicator`.
@@ -111,13 +184,26 @@ class NcclManager {
   // the corresponding NCCL/CUDA error string.
   Status GetCommunicator(Collective* collective, Communicator** communicator);
 
-  void AddParticipant(int num_devices, const string& key,
-                      std::unique_ptr<Participant> participant,
-                      DataType data_type, CollectiveType collective_type,
+  // Adds a participant device to the local `Collective` instance correponding
+  // to `collective_key`.  Launches the `Collective` if it is ready, which it
+  // checks by calling `CheckReady()`.  Also performs consistency and sanity
+  // checks before launching.
+  void AddParticipant(std::unique_ptr<Participant> participant,
+                      const Context& context, CollectiveType collective_type,
                       ncclRedOp_t reduction_op);
 
+  // If `collective` is ready to run, removes it from the `collectives_` map and
+  // returns the pointer.  Otherwise returns `nullptr`.
+  // Assumes `collective_key` corresponds to `collective`.
+  //
+  // A collective is ready to run when all local participants have called Add*
+  // function, and the collective is signalled globally ready via
+  // `SetMultiNodeReady`.
+  Collective* CheckReady(const string& collective_key, Collective* collective)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
+
   // Run <collective>.  This calls takes ownership of <collective>.
-  void RunCollective(const string& key, Collective* collective);
+  void RunCollective(Collective* collective);
   void LoopKernelLaunches(NcclStream* stream);
 
   mutex mu_;
diff --git a/tensorflow/core/nccl/nccl_manager_test.cc b/tensorflow/core/nccl/nccl_manager_test.cc
index f9ed4d0b9a..58bb84ac57 100644
--- a/tensorflow/core/nccl/nccl_manager_test.cc
+++ b/tensorflow/core/nccl/nccl_manager_test.cc
@@ -53,7 +53,6 @@ class NcclManagerTest : public ::testing::Test {
  public:
   // A single all-reduce to apply.
   struct TestCase {
-    string key;
     std::vector<Tensor> ins;
     std::vector<Tensor> outs;
     Tensor expected;
@@ -64,18 +63,19 @@ class NcclManagerTest : public ::testing::Test {
   };
 
   static void SetUpTestCase() {
-    setenv("NCCL_DEBUG", "WARN", 1 /* replace */);
+    setenv("NCCL_DEBUG", "INFO", 1 /* replace */);
     setenv("NCCL_LAUNCH_MODE", "PARALLEL", 1 /* replace */);
     devices_ = new std::vector<std::unique_ptr<BaseGPUDevice>>(GetGPUDevices());
-    LOG(ERROR) << "Running test with " << devices_->size() << " gpus";
+    LOG(INFO) << "Running test with " << devices_->size() << " gpus";
   }
 
   static int32 NumGPUs() { return static_cast<int32>(devices_->size()); }
 
   static void TearDownTestCase() { delete devices_; }
 
-  TestCase* MakeTestCase(int num_ranks, ncclRedOp_t reduction_op,
-                         TensorShape shape, float value_offset) {
+  TestCase* MakeTestCase(int num_nodes, int num_ranks_per_node,
+                         ncclRedOp_t reduction_op, TensorShape shape,
+                         float value_offset) {
     TestCase* test_case = new TestCase();
     test_case->expected = Tensor(data_type_, shape);
     if (reduction_op == ncclProd) {
@@ -93,55 +93,61 @@ class NcclManagerTest : public ::testing::Test {
     }
 
     float value_scale = 0.01;  // Small scale to avoid fp16 overflow.
-    for (int rank = 0; rank < num_ranks; ++rank) {
-      auto* device = GetDevice(rank);
-      auto* stream = device->tensorflow_gpu_device_info()->stream;
+    for (int node = 0; node < num_nodes; ++node) {
+      for (int local_rank = 0; local_rank < num_ranks_per_node; ++local_rank) {
+        auto* device = GetDevice(local_rank);
+        auto* stream = device->tensorflow_gpu_device_info()->stream;
 
-      Tensor in_cpu(data_type_, shape);
-      test::FillFn<Scalar>(&in_cpu, [&](int index) {
-        return static_cast<Scalar>((index + 1) * value_scale + value_offset);
-      });
-      for (int j = 0; j < shape.num_elements(); ++j) {
-        auto in_val = in_cpu.flat<Scalar>()(j);
-        auto out_expr = test_case->expected.template flat<Scalar>();
-        if (reduction_op == ncclProd) {
-          out_expr(j) = out_expr(j) * in_val;
-        } else if (reduction_op == ncclSum) {
-          out_expr(j) = out_expr(j) + in_val;
-        } else if (reduction_op == ncclMax) {
-          if (in_val > out_expr(j)) {
-            out_expr(j) = in_val;
-          }
-        } else if (reduction_op == ncclMin) {
-          if (in_val < out_expr(j)) {
-            out_expr(j) = in_val;
+        Tensor in_cpu(data_type_, shape);
+        test::FillFn<Scalar>(&in_cpu, [&](int index) {
+          return static_cast<Scalar>((index + 1) * value_scale + value_offset);
+        });
+        for (int j = 0; j < shape.num_elements(); ++j) {
+          auto in_val = in_cpu.flat<Scalar>()(j);
+          auto out_expr = test_case->expected.template flat<Scalar>();
+          if (reduction_op == ncclProd) {
+            out_expr(j) = out_expr(j) * in_val;
+          } else if (reduction_op == ncclSum) {
+            out_expr(j) = out_expr(j) + in_val;
+          } else if (reduction_op == ncclMax) {
+            if (in_val > out_expr(j)) {
+              out_expr(j) = in_val;
+            }
+          } else if (reduction_op == ncclMin) {
+            if (in_val < out_expr(j)) {
+              out_expr(j) = in_val;
+            }
           }
         }
-      }
 
-      value_scale *= 10;
-      test_case->ins.emplace_back(GpuAllocator(device), data_type_, shape);
-      test_case->outs.emplace_back(GpuAllocator(device), data_type_, shape);
+        value_scale *= 10;
+        test_case->ins.emplace_back(GpuAllocator(device), data_type_, shape);
+        test_case->outs.emplace_back(GpuAllocator(device), data_type_, shape);
 
-      const Tensor& in_gpu = test_case->ins.back();
-      auto in_gpu_mem = AsDeviceMemory(in_gpu.flat<Scalar>().data());
-      stream->ThenMemcpy(&in_gpu_mem, in_cpu.flat<Scalar>().data(),
-                         in_cpu.TotalBytes());
+        const Tensor& in_gpu = test_case->ins.back();
+        auto in_gpu_mem = AsDeviceMemory(in_gpu.flat<Scalar>().data());
+        stream->ThenMemcpy(&in_gpu_mem, in_cpu.flat<Scalar>().data(),
+                           in_cpu.TotalBytes());
+      }
     }
+
     return test_case;
   }
 
-  void VerifyResults(const string& case_label, TestCase* test_case) {
-    // Wait for the done callback to be called.
-    {
-      test_case->mu.lock();
-      while (test_case->num_completed != test_case->outs.size()) {
-        test_case->mu.unlock();
-        Env::Default()->SleepForMicroseconds(10);
-        test_case->mu.lock();
-      }
+  // Waits for the done callback to be called for each participant.
+  void WaitForTestCompletion(TestCase* test_case) {
+    test_case->mu.lock();
+    while (test_case->num_completed != test_case->outs.size()) {
       test_case->mu.unlock();
+      Env::Default()->SleepForMicroseconds(10);
+      test_case->mu.lock();
     }
+    test_case->mu.unlock();
+  }
+
+  void VerifyResults(TestCase* test_case) {
+    WaitForTestCompletion(test_case);
+    TF_ASSERT_OK(test_case->final_status);
     // Copy memory to host and verify.
     for (int rank = 0; rank < test_case->outs.size(); ++rank) {
       auto* device = GetDevice(rank);
@@ -156,6 +162,12 @@ class NcclManagerTest : public ::testing::Test {
     }
   }
 
+  void VerifyError(TestCase* test_case) {
+    WaitForTestCompletion(test_case);
+    LOG(INFO) << test_case->final_status;
+    EXPECT_EQ(test_case->final_status.code(), error::INTERNAL);
+  }
+
   NcclManager::DoneCallback CreateDoneCallback(TestCase* test_case) {
     return [this, test_case](Status s) {
       mutex_lock l(test_case->mu);
@@ -206,20 +218,26 @@ TYPED_TEST(NcclManagerTest, BasicSumReduction) {
   for (int op = 0; op < 4; ++op) {
     ncclRedOp_t reduction_op = static_cast<ncclRedOp_t>(op);
     std::unique_ptr<typename TestFixture::TestCase> test_case(
-        this->MakeTestCase(num_ranks, reduction_op, TensorShape({2, 3}), 0.0f));
+        this->MakeTestCase(/*num_nodes=*/1, num_ranks, reduction_op,
+                           TensorShape({2, 3}), 0.0f));
     for (int rank = 0; rank < num_ranks; ++rank) {
       auto* device = this->GetDevice(rank);
       VLOG(2) << "rank " << rank << " device " << device->name();
       auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
       auto* stream = device->tensorflow_gpu_device_info()->stream;
+      auto participant = absl::make_unique<NcclManager::Participant>(
+          device->executor(), stream, event_mgr, device->gpu_id(),
+          &test_case->ins[rank], &test_case->outs[rank], /*global_rank=*/-1,
+          this->CreateDoneCallback(test_case.get()));
       NcclManager::instance()->AddToAllReduce(
-          num_ranks, "allreduce", reduction_op, device->executor(),
-          device->gpu_id(), event_mgr, stream, &test_case->ins[rank],
-          &test_case->outs[rank], this->CreateDoneCallback(test_case.get()));
+          std::move(participant),
+          {"allreduce", /*num_local_devices=*/num_ranks,
+           /*num_global_devices=*/num_ranks, /*communicator_key=*/""},
+          reduction_op);
     }
 
-    LOG(ERROR) << "Verifying results";
-    this->VerifyResults("test_case", test_case.get());
+    LOG(INFO) << "Verifying results";
+    this->VerifyResults(test_case.get());
   }
 }
 
@@ -230,9 +248,9 @@ TYPED_TEST(NcclManagerTest, BasicSumReduction) {
 // time_limit_micros.
 TYPED_TEST(NcclManagerTest, MultipleCallers) {
   const int num_ranks = 4;
-  const int num_collectives_per_iteration = 10;  // 1000;
+  const int num_collectives_per_iteration = 10;
   const int num_threads = num_ranks * 2;
-  const int time_limit_micros = 100;  // 60 * 30 * 1000 * 1000;
+  const int time_limit_micros = 1 * 1000 * 1000;  // 1 second
 
   int64 start = Env::Default()->NowMicros();
   srand(Env::Default()->NowMicros());
@@ -242,8 +260,8 @@ TYPED_TEST(NcclManagerTest, MultipleCallers) {
     std::vector<std::unique_ptr<typename TestFixture::TestCase>> test_cases;
     for (int i = 0; i < num_collectives_per_iteration; ++i) {
       test_cases.emplace_back(this->MakeTestCase(
-          num_ranks, ncclSum, TensorShape({100, i % 5 + 1, i % 3 + 1}),
-          1.1f * i));
+          /*num_nodes=*/1, num_ranks, ncclSum,
+          TensorShape({100, i % 5 + 1, i % 3 + 1}), 1.1f * i));
       for (int j = 0; j < num_ranks; ++j) {
         case_and_rank.emplace_back(i, j);
       }
@@ -276,31 +294,188 @@ TYPED_TEST(NcclManagerTest, MultipleCallers) {
         auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
         auto* stream = device->tensorflow_gpu_device_info()->stream;
         typename TestFixture::TestCase* test_case = test_cases[test_num].get();
-        NcclManager::instance()->AddToAllReduce(
-            num_ranks, strings::StrCat("allreduce", test_num), ncclSum,
-            device->executor(), device->gpu_id(), event_mgr, stream,
-            &test_case->ins[rank], &test_case->outs[rank],
+        auto participant = absl::make_unique<NcclManager::Participant>(
+            device->executor(), stream, event_mgr, device->gpu_id(),
+            &test_case->ins[rank], &test_case->outs[rank], /*global_rank=*/-1,
             this->CreateDoneCallback(test_case));
+        NcclManager::instance()->AddToAllReduce(
+            std::move(participant),
+            {strings::StrCat("allreduce", test_num),
+             /*num_local_devices=*/num_ranks,
+             /*num_global_devices=*/num_ranks,
+             /*communicator_key=*/""},
+            ncclSum);
       };
       pool->Schedule(fn);
     }
     pool.reset();  // wait for all work to be scheduled.
 
-    LOG(ERROR) << "Verifying results for " << num_collectives_per_iteration
-               << " collectives";
+    VLOG(2) << "Verifying results for " << num_collectives_per_iteration
+            << " collectives";
     for (int i = 0; i < test_cases.size(); ++i) {
-      this->VerifyResults(strings::StrCat("collective", i),
-                          test_cases[i].get());
+      this->VerifyResults(test_cases[i].get());
     }
 
     int64 delta = Env::Default()->NowMicros() - start;
     if (delta > time_limit_micros) {
-      LOG(ERROR) << "Ran for " << delta << " quitting";
+      LOG(INFO) << "Ran for " << delta << " microsecs, now quitting";
       break;
     }
   }
 }
 
+// Multi-node NCCL tests.
+
+TEST(NcclManagerTest, CommunicatorKey) {
+  const string communicator_key =
+      NcclManager::instance()->GenerateCommunicatorKey();
+  EXPECT_EQ(communicator_key.size(), NCCL_UNIQUE_ID_BYTES);
+}
+
+// This test creates `num_nodes` NcclManagers to simulate a multi-node
+// environment.  It works on a single node and reuse GPUs.  It enqueues NCCL ops
+// on separate stream per rank.
+TYPED_TEST(NcclManagerTest, MultiNode) {
+  const int num_nodes = 2;
+  const int num_ranks_per_node = 4;
+  const int num_global_ranks = num_nodes * num_ranks_per_node;
+  std::vector<NcclManager> nccl_managers(num_nodes);
+  const string collective_key = "allreduce";
+  // The NcclManagers in this test synchronize in real-time, so we need to run
+  // each node's code in a separate thread.
+  // Specifically, the call to ncclGroupEnd() after calling ncclCommInitRank
+  // waits for all communicators before returning.
+  thread::ThreadPool pool(Env::Default(), "test_multi_node_nccl", num_nodes);
+
+  // First, initialize the communicator_key used for this collective.
+  const string communicator_key = nccl_managers[0].GenerateCommunicatorKey();
+
+  for (int op = 0; op < 4; ++op) {
+    ncclRedOp_t reduction_op = static_cast<ncclRedOp_t>(op);
+    std::unique_ptr<typename TestFixture::TestCase> test_case(
+        this->MakeTestCase(num_nodes, num_ranks_per_node, reduction_op,
+                           TensorShape({2, 3}), 0.0f));
+    for (int node = 0; node < num_nodes; ++node) {
+      auto node_fn = [this, node, &nccl_managers, &communicator_key,
+                      &collective_key, reduction_op, &test_case] {
+        for (int local_rank = 0; local_rank < num_ranks_per_node;
+             ++local_rank) {
+          auto* device = this->GetDevice(local_rank);
+          auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
+          auto* stream = device->tensorflow_gpu_device_info()->stream;
+          const int global_rank = node * num_ranks_per_node + local_rank;
+          auto participant = absl::make_unique<NcclManager::Participant>(
+              device->executor(), stream, event_mgr, device->gpu_id(),
+              &test_case->ins[global_rank], &test_case->outs[global_rank],
+              global_rank, this->CreateDoneCallback(test_case.get()));
+          nccl_managers[node].AddToAllReduce(
+              std::move(participant),
+              {collective_key, num_ranks_per_node, num_global_ranks,
+               communicator_key},
+              reduction_op);
+          VLOG(1) << "AddToAllReduce node " << node << " global_rank "
+                  << global_rank;
+        }
+
+        // Signal collective ready to launch at this node.
+        nccl_managers[node].SignalMultiNodeReady(collective_key);
+      };
+      pool.Schedule(node_fn);
+    }
+
+    VLOG(2) << "Verifying results";
+    this->VerifyResults(test_case.get());
+  }
+}
+
+// Checks that we return error status if a collective_key is used for different
+// types of collectives, e.g. a reduction and a broadcast.
+TYPED_TEST(NcclManagerTest, ConsistentCollectiveType) {
+  const int num_ranks = 2;
+
+  std::unique_ptr<typename TestFixture::TestCase> test_case(this->MakeTestCase(
+      1 /* num_nodes */, num_ranks, ncclSum, TensorShape({2, 3}), 0.0f));
+  for (int rank = 0; rank < num_ranks; ++rank) {
+    auto* device = this->GetDevice(rank);
+    auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
+    auto* stream = device->tensorflow_gpu_device_info()->stream;
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        device->executor(), stream, event_mgr, device->gpu_id(),
+        &test_case->ins[rank], &test_case->outs[rank], /*global_rank=*/-1,
+        this->CreateDoneCallback(test_case.get()));
+    if (rank == 0) {
+      NcclManager::instance()->AddToAllReduce(std::move(participant),
+                                              {"bad_coll_type",
+                                               /*num_local_devices=*/num_ranks,
+                                               /*num_global_devices=*/num_ranks,
+                                               /*communicator_key=*/""},
+                                              ncclSum);
+    } else {
+      NcclManager::instance()->AddBroadcastSend(
+          std::move(participant), {"bad_coll_type",
+                                   /*num_local_devices=*/num_ranks,
+                                   /*num_global_devices=*/num_ranks,
+                                   /*communicator_key=*/""});
+    }
+  }
+
+  this->VerifyError(test_case.get());
+}
+
+// Checks that we return error status if different communicator_key is passed to
+// same collective.
+TYPED_TEST(NcclManagerTest, ConsistentCommunicatorKey) {
+  const int num_ranks = 2;
+
+  std::unique_ptr<typename TestFixture::TestCase> test_case(this->MakeTestCase(
+      1 /* num_nodes */, num_ranks, ncclSum, TensorShape({2, 3}), 0.0f));
+  for (int rank = 0; rank < num_ranks; ++rank) {
+    auto* device = this->GetDevice(rank);
+    auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
+    auto* stream = device->tensorflow_gpu_device_info()->stream;
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        device->executor(), stream, event_mgr, device->gpu_id(),
+        &test_case->ins[rank], &test_case->outs[rank], /*global_rank=*/-1,
+        this->CreateDoneCallback(test_case.get()));
+    NcclManager::instance()->AddToAllReduce(
+        std::move(participant),
+        {"bad_coll_type",
+         /*num_local_devices=*/num_ranks,
+         /*num_global_devices=*/num_ranks,
+         rank == 0 ? "" : NcclManager::instance()->GenerateCommunicatorKey()},
+        ncclSum);
+  }
+
+  this->VerifyError(test_case.get());
+}
+
+// Checks that we return error status if the number of devices is inconsistent
+// across multiple participants of a collective.
+TYPED_TEST(NcclManagerTest, ConsistentNumberOfDevices) {
+  const int num_ranks = 2;
+
+  std::unique_ptr<typename TestFixture::TestCase> test_case(this->MakeTestCase(
+      1 /* num_nodes */, num_ranks, ncclSum, TensorShape({2, 3}), 0.0f));
+  for (int rank = 0; rank < num_ranks; ++rank) {
+    auto* device = this->GetDevice(rank);
+    auto* event_mgr = device->tensorflow_gpu_device_info()->event_mgr;
+    auto* stream = device->tensorflow_gpu_device_info()->stream;
+    int num_devices = rank == 0 ? num_ranks : num_ranks + 1;
+    auto participant = absl::make_unique<NcclManager::Participant>(
+        device->executor(), stream, event_mgr, device->gpu_id(),
+        &test_case->ins[rank], &test_case->outs[rank], /*global_rank=*/-1,
+        this->CreateDoneCallback(test_case.get()));
+    NcclManager::instance()->AddToAllReduce(std::move(participant),
+                                            {"bad_coll_type",
+                                             /*num_local_devices=*/num_devices,
+                                             /*num_global_devices=*/num_devices,
+                                             /*communicator_key=*/""},
+                                            ncclSum);
+  }
+
+  this->VerifyError(test_case.get());
+}  // namespace tensorflow
+
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA
-- 
GitLab


From a2bf042d36b7b1abe3dc2b81e074aa9f0abea03d Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Tue, 18 Dec 2018 16:10:45 -0800
Subject: [PATCH 784/873] Use functional_saver to write SaverDefs in
 tf.saved_model.save

Replaces the restore() code with tf.train.Saver's bulk restore logic, which was its default. I only noticed because apparently the other path fails on some saveables, and the restore code gets more thoroughly tested via to_proto.

PiperOrigin-RevId: 226077043
---
 tensorflow/python/saved_model/save_test.py    |  2 +-
 .../python/training/checkpointable/base.py    |  5 +-
 .../python/training/checkpointable/util.py    |  5 +-
 .../training/checkpointable/util_test.py      | 10 ++-
 .../training/saving/functional_saver.py       | 77 +++++++++++++------
 .../training/saving/functional_saver_test.py  | 29 +++++++
 6 files changed, 93 insertions(+), 35 deletions(-)

diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index 005bc99afa..cffc1ec202 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -299,7 +299,7 @@ class SaveTest(test.TestCase):
     graph = ops.Graph()
     with graph.as_default(), self.session(graph) as session:
       loader.load(session, [tag_constants.SERVING], save_dir)
-      func, = graph._functions.values()
+      func, = [f for name, f in graph._functions.items() if "call" in name]
       complex_node, = [
           node for node in func.definition.node_def if node.op == "Complex"]
       self.assertNotIn("T", complex_node.attr)
diff --git a/tensorflow/python/training/checkpointable/base.py b/tensorflow/python/training/checkpointable/base.py
index 3cd1c6f9c8..c752f9ca7e 100644
--- a/tensorflow/python/training/checkpointable/base.py
+++ b/tensorflow/python/training/checkpointable/base.py
@@ -144,7 +144,10 @@ class PythonStringStateSaveable(PythonStateSaveable):
       restore_callback: A function taking a Python string, used to restore
         state. Optional; defaults to doing nothing.
     """
-    self._state_callback = state_callback
+    def _state_callback_wrapper():
+      with ops.init_scope():
+        return state_callback()
+    self._state_callback = _state_callback_wrapper
     self._restore_callback = restore_callback
     with ops.device("/cpu:0"):
       self._save_string = constant_op.constant("", dtype=dtypes.string)
diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py
index 5d5a67714c..7f70d973d0 100644
--- a/tensorflow/python/training/checkpointable/util.py
+++ b/tensorflow/python/training/checkpointable/util.py
@@ -1412,10 +1412,7 @@ class CheckpointableSaver(object):
           base.NoRestoreSaveable(
               tensor=object_graph_tensor,
               name=base.OBJECT_GRAPH_PROTO_KEY))
-      # TODO(allenl): Swap in a function-based saver here once it can serialize
-      # to a SaverDef.
-      return v1_saver_lib.Saver(
-          var_list=named_saveable_objects, max_to_keep=None)
+      return functional_saver.Saver(named_saveable_objects)
 
   def _save_cached_when_graph_building(
       self,
diff --git a/tensorflow/python/training/checkpointable/util_test.py b/tensorflow/python/training/checkpointable/util_test.py
index 61de46898a..a5f4fec672 100644
--- a/tensorflow/python/training/checkpointable/util_test.py
+++ b/tensorflow/python/training/checkpointable/util_test.py
@@ -554,7 +554,7 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testFreezing(self):
-    with self.cached_session(use_gpu=True) as session:
+    with test_util.use_gpu():
       # Save an object-based checkpoint using a frozen saver
       directory = self.get_temp_dir()
       prefix = os.path.join(directory, "ckpt")
@@ -565,10 +565,12 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase):
       # existing in the checkpoint on restore.
       self.evaluate(checkpoint.save_counter.assign(12))
       saver = checkpointable_utils.frozen_saver(checkpoint)
-      save_path = saver.save(session, prefix)
+      with ops.device("cpu:0"):
+        prefix_tensor = constant_op.constant(prefix)
+      save_path = self.evaluate(saver.save(prefix_tensor))
       self.evaluate(v.assign(10))
       # Use the frozen saver to restore the same object graph
-      saver.restore(session, save_path)
+      self.evaluate(saver.restore(prefix_tensor))
       self.assertEqual(3, self.evaluate(v))
 
       # Restore using another frozen saver on an identical object graph
@@ -576,7 +578,7 @@ class CheckpointingTests(parameterized.TestCase, test.TestCase):
       v = resource_variable_ops.ResourceVariable(0, dtype=dtypes.int64)
       checkpoint = checkpointable_utils.Checkpoint(v=v)
       saver = checkpointable_utils.frozen_saver(checkpoint)
-      saver.restore(session, save_path)
+      self.evaluate(saver.restore(prefix_tensor))
       self.assertEqual(3, self.evaluate(v))
 
       # Restore as an object-based checkpoint
diff --git a/tensorflow/python/training/saving/functional_saver.py b/tensorflow/python/training/saving/functional_saver.py
index 7eed333662..74991b240c 100644
--- a/tensorflow/python/training/saving/functional_saver.py
+++ b/tensorflow/python/training/saving/functional_saver.py
@@ -18,12 +18,15 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.core.protobuf import saver_pb2
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import io_ops
 from tensorflow.python.training.saving import saveable_object
-from tensorflow.python.training.saving import saveable_object_util
+from tensorflow.python.util import nest
 
 
 class Saver(object):
@@ -47,7 +50,27 @@ class Saver(object):
             "Saver expected a list of SaveableObjects, got %s." % (saveable,))
     self._saveable_objects = saveable_objects
 
-  # TODO(b/120569892): Use tf.function here
+  def to_proto(self):
+    """Serializes to a SaverDef referencing the current graph."""
+    filename_tensor = array_ops.placeholder(
+        shape=[], dtype=dtypes.string, name="saver_filename")
+    # TODO(allenl): Add save and restore function names to the proto directly.
+    save_tensor = self.save(filename_tensor)
+    restore_op = self.restore(filename_tensor).op
+    return saver_pb2.SaverDef(
+        filename_tensor_name=filename_tensor.name,
+        save_tensor_name=save_tensor.name,
+        restore_op_name=restore_op.name,
+        version=saver_pb2.SaverDef.V2)
+
+  @def_function.function(
+      input_signature=(tensor_spec.TensorSpec(shape=(), dtype=dtypes.string),),
+      # Autograph is off because of reference cycles which must be collected
+      # when a function is created and destroyed (as in
+      # tf.saved_model.save). It's also not necessary, so having it off may be
+      # slightly faster.
+      autograph=False,
+  )
   def save(self, file_prefix):
     """Save the saveable objects to a checkpoint with `file_prefix`.
 
@@ -66,11 +89,13 @@ class Saver(object):
         tensor_names.append(spec.name)
         tensors.append(spec.tensor)
         tensor_slices.append(spec.slice_spec)
-    with ops.control_dependencies(
-        [io_ops.save_v2(file_prefix, tensor_names, tensor_slices, tensors)]):
-      return array_ops.identity(file_prefix)
+    io_ops.save_v2(file_prefix, tensor_names, tensor_slices, tensors)
+    return file_prefix
 
-  # TODO(b/120569892): Use tf.function here
+  @def_function.function(
+      input_signature=(tensor_spec.TensorSpec(shape=(), dtype=dtypes.string),),
+      autograph=False,
+  )
   def restore(self, file_prefix):
     """Restore the saveable objects from a checkpoint with `file_prefix`.
 
@@ -79,23 +104,25 @@ class Saver(object):
         files to read from.
 
     Returns:
-      An operation which restores the `Saver`'s `SaveableObject`s when run, or
-      None if executing eagerly.
+      A scalar string Tensor containing `file_prefix` with control dependencies
+      on the restore ops.
     """
-    restore_ops = []
+    restore_specs = []
+    tensor_structure = []
     for saveable in self._saveable_objects:
-      if saveable.device:
-        device = saveable_object_util.set_cpu0(saveable.device)
-      else:
-        device = None
-      with ops.device(device):
-        tensors = []
-        for spec in saveable.specs:
-          tensors.append(
-              io_ops.restore_v2(
-                  file_prefix,
-                  [spec.name],
-                  [spec.slice_spec],
-                  [spec.dtype])[0])
-        restore_ops.append(saveable.restore(tensors, restored_shapes=None))
-    return control_flow_ops.group(restore_ops)
+      saveable_tensor_structure = []
+      tensor_structure.append(saveable_tensor_structure)
+      for spec in saveable.specs:
+        saveable_tensor_structure.append(spec.name)
+        restore_specs.append((spec.name, spec.slice_spec, spec.dtype))
+    tensor_names, tensor_slices, tensor_dtypes = zip(*restore_specs)
+    with ops.device("cpu:0"):
+      restored_tensors = io_ops.restore_v2(
+          file_prefix, tensor_names, tensor_slices, tensor_dtypes)
+    structured_restored_tensors = nest.pack_sequence_as(
+        tensor_structure, restored_tensors)
+    for saveable, restored_tensors in zip(self._saveable_objects,
+                                          structured_restored_tensors):
+      saveable.restore(restored_tensors,
+                       restored_shapes=None)
+    return file_prefix
diff --git a/tensorflow/python/training/saving/functional_saver_test.py b/tensorflow/python/training/saving/functional_saver_test.py
index 40002255aa..a394f0b5c6 100644
--- a/tensorflow/python/training/saving/functional_saver_test.py
+++ b/tensorflow/python/training/saving/functional_saver_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import os
 
 from tensorflow.python.eager import test
+from tensorflow.python.eager import wrap_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.training.saving import functional_saver
@@ -45,6 +46,34 @@ class SaverTest(test.TestCase):
     second_saver.restore(save_path)
     self.assertEqual(2., self.evaluate(v2))
 
+  def test_to_proto(self):
+    v1 = resource_variable_ops.ResourceVariable(2.)
+    saver = functional_saver.Saver(
+        saveable_object_util.saveable_objects_for_op(v1, "x"))
+    prefix = os.path.join(self.get_temp_dir(), "ckpt")
+
+    proto_accumulator = []
+    wrapped = wrap_function.wrap_function(
+        lambda: proto_accumulator.append(saver.to_proto()), signature=())
+    self.assertEqual(1, len(proto_accumulator))
+    proto = proto_accumulator[0]
+    save = wrapped.prune(
+        feeds=wrapped.graph.get_tensor_by_name(proto.filename_tensor_name),
+        fetches=wrapped.graph.get_tensor_by_name(proto.save_tensor_name))
+    restore = wrapped.prune(
+        feeds=wrapped.graph.get_tensor_by_name(proto.filename_tensor_name),
+        fetches=wrapped.graph.get_operation_by_name(proto.restore_op_name))
+    save_path = save(constant_op.constant(prefix))
+    v1.assign(1.)
+    restore(constant_op.constant(save_path))
+    self.assertEqual(2., self.evaluate(v1))
+
+    v2 = resource_variable_ops.ResourceVariable(3.)
+    second_saver = functional_saver.Saver(
+        saveable_object_util.saveable_objects_for_op(v2, "x"))
+    second_saver.restore(save_path)
+    self.assertEqual(2., self.evaluate(v2))
+
 
 if __name__ == "__main__":
   test.main()
-- 
GitLab


From b90b36956c466ca4fa4c47bf776a23365195eec6 Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Tue, 18 Dec 2018 16:23:25 -0800
Subject: [PATCH 785/873] Display test execution mode in stack trace.

Previously when using `run_all_keras_modes` in tests, the execution mode was not included
in the stack trace, making it difficult to debug issues. Now the execution mode is
clearly printed out in the stack trace (it's the name of the last function call
before the actual test execution).

PiperOrigin-RevId: 226079310
---
 .../python/keras/keras_parameterized.py       | 30 +++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/tensorflow/python/keras/keras_parameterized.py b/tensorflow/python/keras/keras_parameterized.py
index d76bbadeb3..f470033b6e 100644
--- a/tensorflow/python/keras/keras_parameterized.py
+++ b/tensorflow/python/keras/keras_parameterized.py
@@ -233,17 +233,11 @@ def run_all_keras_modes(
     def decorated(self, run_mode, *args, **kwargs):
       """A run of a single test case w/ specified run mode."""
       if run_mode == 'v1_graph':
-        with context.graph_mode(), testing_utils.run_eagerly_scope(False):
-          with self.test_session(use_gpu=True, config=config):
-            f(self, *args, **kwargs)
+        _v1_graph_test(f, self, config, *args, **kwargs)
       elif run_mode == 'v2_function':
-        with context.eager_mode():
-          with testing_utils.run_eagerly_scope(False):
-            f(self, *args, **kwargs)
+        _v2_graph_functions_test(f, self, *args, **kwargs)
       elif run_mode == 'v2_eager':
-        with context.eager_mode():
-          with testing_utils.run_eagerly_scope(True):
-            f(self, *args, **kwargs)
+        _v2_eager_test(f, self, *args, **kwargs)
       else:
         return ValueError('Unknown run mode %s' % run_mode)
 
@@ -252,6 +246,24 @@ def run_all_keras_modes(
   return _test_or_class_decorator(test_or_class, single_method_decorator)
 
 
+def _v1_graph_test(f, test_or_class, config, *args, **kwargs):
+  with context.graph_mode(), testing_utils.run_eagerly_scope(False):
+    with test_or_class.test_session(use_gpu=True, config=config):
+      f(test_or_class, *args, **kwargs)
+
+
+def _v2_graph_functions_test(f, test_or_class, *args, **kwargs):
+  with context.eager_mode():
+    with testing_utils.run_eagerly_scope(False):
+      f(test_or_class, *args, **kwargs)
+
+
+def _v2_eager_test(f, test_or_class, *args, **kwargs):
+  with context.eager_mode():
+    with testing_utils.run_eagerly_scope(True):
+      f(test_or_class, *args, **kwargs)
+
+
 def _test_or_class_decorator(test_or_class, single_method_decorator):
   """Decorate a test or class with a decorator intended for one method.
 
-- 
GitLab


From 09feb6963d691ce12c41fd20ff7638daa89aea11 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 18 Dec 2018 17:14:49 -0800
Subject: [PATCH 786/873] Add k8 to toolchains.

PiperOrigin-RevId: 226087365
---
 third_party/gpus/crosstool/BUILD.tpl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl
index 1260b265ab..5e6b47f44f 100644
--- a/third_party/gpus/crosstool/BUILD.tpl
+++ b/third_party/gpus/crosstool/BUILD.tpl
@@ -23,6 +23,7 @@ cc_toolchain_suite(
         "darwin|compiler": ":cc-compiler-darwin",
         "x64_windows|msvc-cl": ":cc-compiler-windows",
         "x64_windows": ":cc-compiler-windows",
+        "k8": ":cc-compiler-local",
     },
 )
 
-- 
GitLab


From 105fbe528c4e92c1542848a69056a70f35b1ec53 Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Tue, 18 Dec 2018 17:17:11 -0800
Subject: [PATCH 787/873] Internal change.

PiperOrigin-RevId: 226087680
---
 tensorflow/python/kernel_tests/distributions/BUILD | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tensorflow/python/kernel_tests/distributions/BUILD b/tensorflow/python/kernel_tests/distributions/BUILD
index 14532965d8..a83622ac9a 100644
--- a/tensorflow/python/kernel_tests/distributions/BUILD
+++ b/tensorflow/python/kernel_tests/distributions/BUILD
@@ -217,7 +217,13 @@ cuda_py_test(
         "//tensorflow/python:nn_ops",
         "//tensorflow/python:platform_test",
     ],
-    tags = ["nomsan"],  # disable to avoid false positives from scipy.
+    tags = [
+        # TODO(b/121223043): Re-enable this test on mac after fixing "mean not
+        # defined" errors.
+        "no_mac",
+        # disable to avoid false positives from scipy.
+        "nomsan",
+    ],
 )
 
 cuda_py_test(
-- 
GitLab


From 51bac90e00b6af9c4c803870b2b60b3a41d35ddc Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 18 Dec 2018 17:43:10 -0800
Subject: [PATCH 788/873] [XLA:CPU] Make the Eigen matmul routines flexible
 around array alignment

This code will be used in a later CL where we will use implement batchdot in
XLA:CPU by calling individual dot operations in a loop.
PiperOrigin-RevId: 226091175
---
 tensorflow/compiler/xla/service/cpu/BUILD     |  2 +
 .../xla/service/cpu/runtime_matmul.cc         | 46 ++++++++++++-------
 .../cpu/runtime_single_threaded_matmul.cc     | 46 ++++++++++++-------
 3 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 7e4447a9dd..f49b5110be 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -767,6 +767,8 @@ cc_library(
         ":target_machine_features",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla/service:computation_layout",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
         "//tensorflow/compiler/xla/service:layout_assignment",
         "//tensorflow/core:lib",
         "@com_google_absl//absl/container:flat_hash_map",
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc
index 56f018abdd..fe7e87a197 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc
@@ -32,7 +32,11 @@ using tensorflow::int64;
 
 namespace {
 
-template <typename T>
+bool Is16BytesAligned(void* ptr) {
+  return reinterpret_cast<uintptr_t>(ptr) % 16 == 0;
+}
+
+template <typename T, Eigen::AlignmentType Alignment>
 void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
             int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
   const xla::ExecutableRunOptions* run_options =
@@ -50,11 +54,11 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
     std::swap(rhs_rows, rhs_cols);
   }
 
-  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Eigen::Aligned> A(
-      lhs, lhs_rows, lhs_cols);
-  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Eigen::Aligned> B(
-      rhs, rhs_rows, rhs_cols);
-  Eigen::TensorMap<Eigen::Tensor<T, 2>, Eigen::Aligned> C(out, m, n);
+  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Alignment> A(lhs, lhs_rows,
+                                                                 lhs_cols);
+  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Alignment> B(rhs, rhs_rows,
+                                                                 rhs_cols);
+  Eigen::TensorMap<Eigen::Tensor<T, 2>, Alignment> C(out, m, n);
 
   typedef typename Eigen::Tensor<T, 2>::DimensionPair DimPair;
   int lhs_contract_dim = transpose_lhs ? 0 : 1;
@@ -69,14 +73,24 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
 }
 
 template <typename T>
-void MatMulImpl(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
-                int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
+void MatMulDispatch(const void* run_options_ptr, T* out, T* lhs, T* rhs,
+                    int64 m, int64 n, int64 k, int32 transpose_lhs,
+                    int32 transpose_rhs) {
+  bool all_buffers_16b_aligned =
+      Is16BytesAligned(out) && Is16BytesAligned(lhs) && Is16BytesAligned(rhs);
+
+  if (!all_buffers_16b_aligned) {
+    MatMul<T, Eigen::Unaligned>(run_options_ptr, out, lhs, rhs, m, n, k,
+                                transpose_lhs, transpose_rhs);
+    return;
+  }
+
   if (m == 1 || n == 1) {
     // Despite being single threaded, this version of matrix * vector is faster.
     xla::EigenMatVec<T>(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs);
   } else {
-    MatMul<T>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
-              transpose_rhs);
+    MatMul<T, Eigen::Aligned16>(run_options_ptr, out, lhs, rhs, m, n, k,
+                                transpose_lhs, transpose_rhs);
   }
 }
 
@@ -86,20 +100,20 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF16(
     const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
     Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs,
     int32 transpose_rhs) {
-  MatMulImpl<Eigen::half>(run_options_ptr, out, lhs, rhs, m, n, k,
-                          transpose_lhs, transpose_rhs);
+  MatMulDispatch<Eigen::half>(run_options_ptr, out, lhs, rhs, m, n, k,
+                              transpose_lhs, transpose_rhs);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF32(
     const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 m,
     int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
-  MatMulImpl<float>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
-                    transpose_rhs);
+  MatMulDispatch<float>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
+                        transpose_rhs);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenMatMulF64(
     const void* run_options_ptr, double* out, double* lhs, double* rhs, int64 m,
     int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
-  MatMulImpl<double>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
-                     transpose_rhs);
+  MatMulDispatch<double>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
+                         transpose_rhs);
 }
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc
index 16692e7f2e..1ed743afc3 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc
@@ -25,7 +25,11 @@ using tensorflow::int64;
 
 namespace {
 
-template <typename T>
+bool Is16BytesAligned(void* ptr) {
+  return reinterpret_cast<uintptr_t>(ptr) % 16 == 0;
+}
+
+template <typename T, Eigen::AlignmentType Alignment>
 void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
             int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) {
   int64 lhs_rows = m;
@@ -40,11 +44,11 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
     std::swap(rhs_rows, rhs_cols);
   }
 
-  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Eigen::Aligned> A(
-      lhs, lhs_rows, lhs_cols);
-  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Eigen::Aligned> B(
-      rhs, rhs_rows, rhs_cols);
-  Eigen::TensorMap<Eigen::Tensor<T, 2>, Eigen::Aligned> C(out, m, n);
+  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Alignment> A(lhs, lhs_rows,
+                                                                 lhs_cols);
+  const Eigen::TensorMap<Eigen::Tensor<const T, 2>, Alignment> B(rhs, rhs_rows,
+                                                                 rhs_cols);
+  Eigen::TensorMap<Eigen::Tensor<T, 2>, Alignment> C(out, m, n);
 
   typedef typename Eigen::Tensor<T, 2>::DimensionPair DimPair;
   int lhs_contract_dim = transpose_lhs ? 0 : 1;
@@ -59,14 +63,22 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m,
 }
 
 template <typename T>
-void SingleThreadedMatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs,
-                          int64 m, int64 n, int64 k, int32 transpose_lhs,
-                          int32 transpose_rhs) {
+void SingleThreadedMatMulDispatch(const void* run_options_ptr, T* out, T* lhs,
+                                  T* rhs, int64 m, int64 n, int64 k,
+                                  int32 transpose_lhs, int32 transpose_rhs) {
+  bool all_buffers_16b_aligned =
+      Is16BytesAligned(out) && Is16BytesAligned(lhs) && Is16BytesAligned(rhs);
+
+  if (!all_buffers_16b_aligned) {
+    MatMul<T, Eigen::Unaligned>(run_options_ptr, out, lhs, rhs, m, n, k,
+                                transpose_lhs, transpose_rhs);
+  }
+
   if (m == 1 || n == 1) {
     xla::EigenMatVec<T>(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs);
   } else {
-    MatMul<T>(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs,
-              transpose_rhs);
+    MatMul<T, Eigen::Aligned16>(run_options_ptr, out, lhs, rhs, m, n, k,
+                                transpose_lhs, transpose_rhs);
   }
 }
 
@@ -77,8 +89,8 @@ __xla_cpu_runtime_EigenSingleThreadedMatMulF16(
     const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
     Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs,
     int32 transpose_rhs) {
-  SingleThreadedMatMul<Eigen::half>(run_options_ptr, out, lhs, rhs, m, n, k,
-                                    transpose_lhs, transpose_rhs);
+  SingleThreadedMatMulDispatch<Eigen::half>(run_options_ptr, out, lhs, rhs, m,
+                                            n, k, transpose_lhs, transpose_rhs);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
@@ -87,8 +99,8 @@ __xla_cpu_runtime_EigenSingleThreadedMatMulF32(const void* run_options_ptr,
                                                float* rhs, int64 m, int64 n,
                                                int64 k, int32 transpose_lhs,
                                                int32 transpose_rhs) {
-  SingleThreadedMatMul<float>(run_options_ptr, out, lhs, rhs, m, n, k,
-                              transpose_lhs, transpose_rhs);
+  SingleThreadedMatMulDispatch<float>(run_options_ptr, out, lhs, rhs, m, n, k,
+                                      transpose_lhs, transpose_rhs);
 }
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
@@ -97,6 +109,6 @@ __xla_cpu_runtime_EigenSingleThreadedMatMulF64(const void* run_options_ptr,
                                                double* rhs, int64 m, int64 n,
                                                int64 k, int32 transpose_lhs,
                                                int32 transpose_rhs) {
-  SingleThreadedMatMul<double>(run_options_ptr, out, lhs, rhs, m, n, k,
-                               transpose_lhs, transpose_rhs);
+  SingleThreadedMatMulDispatch<double>(run_options_ptr, out, lhs, rhs, m, n, k,
+                                       transpose_lhs, transpose_rhs);
 }
-- 
GitLab


From 5195204b47d1cf9516ff5eea8232fbff8d320521 Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 18 Dec 2018 17:44:10 -0800
Subject: [PATCH 789/873] Add capability to load plugins installed via
 tensorflow-plugins pip directory.

PiperOrigin-RevId: 226091344
---
 tensorflow/api_template.__init__.py    | 40 ++++++++++++++++++++++++++
 tensorflow/api_template_v1.__init__.py | 37 ++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 983aa361e4..2c0a745269 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -18,7 +18,10 @@ from __future__ import absolute_import as _absolute_import
 from __future__ import division as _division
 from __future__ import print_function as _print_function
 
+import distutils as _distutils
+import inspect as _inspect
 import os as _os
+import site as _site
 import sys as _sys
 
 # API IMPORTS PLACEHOLDER
@@ -52,6 +55,41 @@ elif _tf_api_dir not in __path__:
 from tensorflow.python.compat import compat as _compat  # pylint: disable=g-import-not-at-top
 _compat.enable_v2_behavior()
 
+
+# Load all plugin libraries from site-packages/tensorflow-plugins if we are
+# running under pip.
+# TODO(gunan): Enable setting an environment variable to define arbitrary plugin
+# directories.
+# TODO(gunan): Find a better location for this code snippet.
+from tensorflow.python.framework import load_library as _ll
+from tensorflow.python.lib.io import file_io as _fi
+
+# Get sitepackages directories for the python installation.
+_site_packages_dirs = []
+_site_packages_dirs += [_site.USER_SITE]
+_site_packages_dirs += [_p for _p in _sys.path if 'site-packages' in _p]
+if 'getsitepackages' in dir(_site):
+  _site_packages_dirs += _site.getsitepackages()
+
+if 'sysconfig' in dir(_distutils):
+  _site_packages_dirs += [_distutils.sysconfig.get_python_lib()]
+
+_site_packages_dirs = list(set(_site_packages_dirs))
+
+# Find the location of this exact file.
+_current_file_location = _inspect.getfile(_inspect.currentframe())
+
+def _running_from_pip_package():
+  return any(
+      _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
+
+if _running_from_pip_package():
+  for s in _site_packages_dirs:
+    # TODO(gunan): Add sanity checks to loaded modules here.
+    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
+    if _fi.file_exists(plugin_dir):
+      _ll.load_library(plugin_dir)
+
 # These symbols appear because we import the python package which
 # in turn imports from tensorflow.core and tensorflow.python. They
 # must come from this module. So python adds these symbols for the
@@ -72,4 +110,6 @@ try:
   del compiler
 except NameError:
   pass
+
+
 # pylint: enable=undefined-variable
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index e199639762..514aba1b59 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -18,7 +18,10 @@ from __future__ import absolute_import as _absolute_import
 from __future__ import division as _division
 from __future__ import print_function as _print_function
 
+import distutils as _distutils
+import inspect as _inspect
 import os as _os
+import site as _site
 import sys as _sys
 
 # pylint: disable=g-bad-import-order
@@ -69,6 +72,40 @@ if not hasattr(_current_module, '__path__'):
 elif _tf_api_dir not in __path__:
   __path__.append(_tf_api_dir)
 
+# Load all plugin libraries from site-packages/tensorflow-plugins if we are
+# running under pip.
+# TODO(gunan): Enable setting an environment variable to define arbitrary plugin
+# directories.
+# TODO(gunan): Find a better location for this code snippet.
+from tensorflow.python.framework import load_library as _ll
+from tensorflow.python.lib.io import file_io as _fi
+
+# Get sitepackages directories for the python installation.
+_site_packages_dirs = []
+_site_packages_dirs += [_site.USER_SITE]
+_site_packages_dirs += [_p for _p in _sys.path if 'site-packages' in _p]
+if 'getsitepackages' in dir(_site):
+  _site_packages_dirs += _site.getsitepackages()
+
+if 'sysconfig' in dir(_distutils):
+  _site_packages_dirs += [_distutils.sysconfig.get_python_lib()]
+
+_site_packages_dirs = list(set(_site_packages_dirs))
+
+# Find the location of this exact file.
+_current_file_location = _inspect.getfile(_inspect.currentframe())
+
+def _running_from_pip_package():
+  return any(
+      _current_file_location.startswith(dir_) for dir_ in _site_packages_dirs)
+
+if _running_from_pip_package():
+  for s in _site_packages_dirs:
+    # TODO(gunan): Add sanity checks to loaded modules here.
+    plugin_dir = _os.path.join(s, 'tensorflow-plugins')
+    if _fi.file_exists(plugin_dir):
+      _ll.load_library(plugin_dir)
+
 # These symbols appear because we import the python package which
 # in turn imports from tensorflow.core and tensorflow.python. They
 # must come from this module. So python adds these symbols for the
-- 
GitLab


From 4558e3098f5a5ee8712ad8f7dc96bc20a0dadc2c Mon Sep 17 00:00:00 2001
From: Davide Libenzi <dlibenzi@google.com>
Date: Tue, 18 Dec 2018 17:49:19 -0800
Subject: [PATCH 790/873] Allow the user to specify a layout for an infeed
 operation, to allow it to override the TF logic of calculating an infeed
 layout.

PiperOrigin-RevId: 226091990
---
 tensorflow/compiler/tf2xla/BUILD         |  1 +
 tensorflow/compiler/tf2xla/shape_util.cc | 29 ++++++++++++++++++++++++
 tensorflow/compiler/tf2xla/shape_util.h  | 11 +++++++++
 tensorflow/contrib/tpu/ops/infeed_ops.cc |  9 ++++++++
 4 files changed, 50 insertions(+)

diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD
index 5a0d9b9af9..d8123e956f 100644
--- a/tensorflow/compiler/tf2xla/BUILD
+++ b/tensorflow/compiler/tf2xla/BUILD
@@ -244,6 +244,7 @@ cc_library(
     deps = [
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:statusor",
         "//tensorflow/compiler/xla:xla_data_proto",
         "//tensorflow/core:core_cpu_internal",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/tf2xla/shape_util.cc b/tensorflow/compiler/tf2xla/shape_util.cc
index b589512dcd..ec604af138 100644
--- a/tensorflow/compiler/tf2xla/shape_util.cc
+++ b/tensorflow/compiler/tf2xla/shape_util.cc
@@ -18,10 +18,33 @@ limitations under the License.
 #include <numeric>
 
 #include "tensorflow/compiler/tf2xla/type_util.h"
+#include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/core/lib/core/status.h"
 
 namespace tensorflow {
+namespace {
+
+Status PopulateInfeedLayoutVector(const xla::Shape& shape,
+                                  std::vector<int>* layouts) {
+  if (xla::ShapeUtil::IsTuple(shape)) {
+    int64 tuple_elements = xla::ShapeUtil::TupleElementCount(shape);
+    for (int64 i = 0; i < tuple_elements; ++i) {
+      const xla::Shape& subshape =
+          xla::ShapeUtil::GetTupleElementShape(shape, i);
+      TF_RETURN_IF_ERROR(PopulateInfeedLayoutVector(subshape, layouts));
+    }
+  } else if (xla::LayoutUtil::HasLayout(shape)) {
+    for (auto dim : xla::LayoutUtil::MinorToMajor(shape)) {
+      layouts->push_back(dim);
+    }
+  } else {
+    layouts->insert(layouts->end(), xla::ShapeUtil::Rank(shape), -1);
+  }
+  return Status::OK();
+}
+
+}  // namespace
 
 // Convert an XLA Shape into the equivalent TensorFlow shape.
 Status XLAShapeToTensorShape(const xla::Shape& shape,
@@ -61,4 +84,10 @@ xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type,
   return xla::ShapeUtil::MakeShapeWithLayout(type, dimensions, layout);
 }
 
+xla::StatusOr<std::vector<int>> GetInfeedLayoutVector(const xla::Shape& shape) {
+  std::vector<int> layouts;
+  TF_RETURN_IF_ERROR(PopulateInfeedLayoutVector(shape, &layouts));
+  return layouts;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/shape_util.h b/tensorflow/compiler/tf2xla/shape_util.h
index 0b231ea8e7..cf52bf46e7 100644
--- a/tensorflow/compiler/tf2xla/shape_util.h
+++ b/tensorflow/compiler/tf2xla/shape_util.h
@@ -18,7 +18,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_TF2XLA_SHAPE_UTIL_H_
 #define TENSORFLOW_COMPILER_TF2XLA_SHAPE_UTIL_H_
 
+#include <vector>
+
 #include "tensorflow/compiler/xla/shape.h"
+#include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/framework/tensor_shape.h"
 #include "tensorflow/core/framework/types.pb.h"
@@ -41,6 +44,14 @@ Status TensorShapeToXLAShape(DataType dtype, const TensorShape& tensor_shape,
 xla::Shape TensorShapeToXLAShape(xla::PrimitiveType type,
                                  const TensorShape& tensor_shape);
 
+// Given an XLA shape with layouts, builds a layout vector in the form able to
+// be fed to an InfeedEnqueue/InfeedEnqueueTuple ops.
+// THe returned vector is a linearized sequence of the minor-to-major values of
+// the layouts held within the input shape.
+// In case the input shape is a tuple, the minor-to-major values will be in the
+// order of the tuple elements within the tuple shape.
+xla::StatusOr<std::vector<int>> GetInfeedLayoutVector(const xla::Shape& shape);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_TF2XLA_SHAPE_UTIL_H_
diff --git a/tensorflow/contrib/tpu/ops/infeed_ops.cc b/tensorflow/contrib/tpu/ops/infeed_ops.cc
index efc546f9a6..2ed16c2a22 100644
--- a/tensorflow/contrib/tpu/ops/infeed_ops.cc
+++ b/tensorflow/contrib/tpu/ops/infeed_ops.cc
@@ -40,6 +40,7 @@ REGISTER_OP("InfeedEnqueue")
     .Input("input: dtype")
     .Attr("dtype: type")
     .Attr("shape: shape = {}")
+    .Attr("layout: list(int) = []")
     .Attr("device_ordinal: int = -1")
     .SetShapeFn(shape_inference::NoOutputs)
     .SetIsStateful()
@@ -49,6 +50,9 @@ An op which feeds a single Tensor value into the computation.
 input: A tensor that will be provided using the infeed mechanism.
 dtype: The type of elements in the tensor.
 shape: The shape of the tensor.
+layout: A vector holding the requested layout in minor-to-major sequence.
+If a layout attribute is passed, but its values are all -1, the layout will
+be computed by the infeed operation.
 device_ordinal: The TPU device to use. This should be -1 when the Op
 is running on a TPU device, and >= 0 when the Op is running on the CPU
 device.
@@ -58,6 +62,7 @@ REGISTER_OP("InfeedEnqueueTuple")
     .Input("inputs: dtypes")
     .Attr("dtypes: list(type)")
     .Attr("shapes: list(shape)")
+    .Attr("layouts: list(int) = []")
     .Attr("device_ordinal: int = -1")
     .SetShapeFn(shape_inference::NoOutputs)
     .SetIsStateful()
@@ -67,6 +72,10 @@ An op which feeds multiple Tensor values into the computation as an XLA tuple.
 inputs: A list of tensors that will be provided using the infeed mechanism.
 dtypes: The element types of each element in `inputs`.
 shapes: The shapes of each tensor in `inputs`.
+layouts: A vector holding the requested layout in minor-to-major sequence for
+all the tuple shapes, in the order the shapes appear in the "shapes" input.
+The layout elements for a sub-shape can be set to -1, in which case the
+corresponding layout will be computed by the infeed operation.
 device_ordinal: The TPU device to use. This should be -1 when the Op
 is running on a TPU device, and >= 0 when the Op is running on the CPU
 device.
-- 
GitLab


From 1b1ae22b302f74e5d9e9f7d751233c811902c443 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 17:54:22 -0800
Subject: [PATCH 791/873] [Grappler]: Don't build swap-in/swap-out pairs in
 memory optimizer unless the node is on GPU.

PiperOrigin-RevId: 226092666
---
 .../grappler/optimizers/memory_optimizer.cc   |  7 +++
 .../python/grappler/memory_optimizer_test.py  | 63 ++++++++++---------
 2 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
index 227c2bb8b0..042e9fa32b 100644
--- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc
@@ -702,6 +702,13 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap,
                      const std::unordered_map<string, const NodeDef*>& name_map,
                      GraphDef* graph,
                      std::pair<NodeDef*, NodeDef*>* swap_pair) {
+  string task, device;
+  if (!DeviceNameUtils::SplitDeviceName(node->device(), &task, &device) ||
+      !str_util::StrContains(device, DEVICE_GPU)) {
+    return errors::InvalidArgument("Can't swap input ", input_to_swap,
+                                   " of node ", node->name(),
+                                   " since it is not on GPU");
+  }
   const OpDef* op_def;
   TF_RETURN_IF_ERROR(OpRegistry::Global()->LookUpOpDef(node->op(), &op_def));
   DataType input_type;
diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py
index e2864ebb4d..a5d70d994d 100644
--- a/tensorflow/python/grappler/memory_optimizer_test.py
+++ b/tensorflow/python/grappler/memory_optimizer_test.py
@@ -65,41 +65,42 @@ class MemoryOptimizerSwapTest(test.TestCase):
   @test_util.run_v1_only('b/120545219')
   def testSimpleSwap(self):
     """Check that the swap annotations are followed."""
-    a = variables.VariableV1(10, name='a')
-    b = variables.VariableV1(20, name='b')
-    c = math_ops.add_n([a, b], name='c')
-    d = math_ops.add_n([b, c], name='d')
-    train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
-    train_op.append(d)
+    with ops.device('/gpu:0'):
+      a = variables.VariableV1(10, name='a')
+      b = variables.VariableV1(20, name='b')
+      c = math_ops.add_n([a, b], name='c')
+      d = math_ops.add_n([b, c], name='d')
+      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+      train_op.append(d)
 
-    d.op._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0))
+      d.op._set_attr('_swap_to_host', attr_value_pb2.AttrValue(i=0))
 
-    mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
-    graph_size = len(mg.graph_def.node)
+      mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
+      graph_size = len(mg.graph_def.node)
 
-    config = config_pb2.ConfigProto()
-    config.graph_options.rewrite_options.CopyFrom(
-        rewriter_config_pb2.RewriterConfig(
-            disable_model_pruning=True,
-            meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE,
-            constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
-            memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL,
-            min_graph_nodes=-1))
-    graph = tf_optimizer.OptimizeGraph(config, mg)
+      config = config_pb2.ConfigProto()
+      config.graph_options.rewrite_options.CopyFrom(
+          rewriter_config_pb2.RewriterConfig(
+              disable_model_pruning=True,
+              meta_optimizer_iterations=rewriter_config_pb2.RewriterConfig.ONE,
+              constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+              memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL,
+              min_graph_nodes=-1))
+      graph = tf_optimizer.OptimizeGraph(config, mg)
 
-    self.assertEqual(len(graph.node), graph_size + 2)
-    self.assertTrue(
-        set([node.name for node in graph.node]) > set(
-            ['a', 'b', 'c', 'd', 'swap_in_d_0', 'swap_out_d_0']))
-    for node in graph.node:
-      if node.name == 'swap_in_d_0':
-        self.assertEqual('swap_out_d_0', node.input[0])
-        self.assertEqual('^b/read', node.input[1])
-      elif node.name == 'swap_out_d_0':
-        self.assertEqual('b/read', node.input[0])
-      elif node.name == 'd':
-        self.assertEqual('swap_in_d_0', node.input[0])
-        self.assertEqual('c', node.input[1])
+      self.assertEqual(len(graph.node), graph_size + 2)
+      self.assertTrue(
+          set([node.name for node in graph.node]) > set(
+              ['a', 'b', 'c', 'd', 'swap_in_d_0', 'swap_out_d_0']))
+      for node in graph.node:
+        if node.name == 'swap_in_d_0':
+          self.assertEqual('swap_out_d_0', node.input[0])
+          self.assertEqual('^b/read', node.input[1])
+        elif node.name == 'swap_out_d_0':
+          self.assertEqual('b/read', node.input[0])
+        elif node.name == 'd':
+          self.assertEqual('swap_in_d_0', node.input[0])
+          self.assertEqual('c', node.input[1])
 
 
 class MemoryOptimizerRecomputeTest(test.TestCase):
-- 
GitLab


From 433fc205600e725365430c07a04eaa292a6cc1a1 Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Tue, 18 Dec 2018 18:08:26 -0800
Subject: [PATCH 792/873] Internal change.

PiperOrigin-RevId: 226094885
---
 tensorflow/contrib/constrained_optimization/BUILD | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tensorflow/contrib/constrained_optimization/BUILD b/tensorflow/contrib/constrained_optimization/BUILD
index 619153df67..eee4329acb 100644
--- a/tensorflow/contrib/constrained_optimization/BUILD
+++ b/tensorflow/contrib/constrained_optimization/BUILD
@@ -42,6 +42,11 @@ py_test(
     name = "candidates_test",
     srcs = ["python/candidates_test.py"],
     srcs_version = "PY2AND3",
+    tags = [
+        # TODO(b/121223093): Re-enable this test after fixing "Distribution
+        # should match known solution" errors.
+        "no_mac",
+    ],
     deps = [
         ":constrained_optimization",
         "//tensorflow/python:client_testlib",
-- 
GitLab


From 6654ce7cf81332d1fda0fcdcb2e9f6036f98aa39 Mon Sep 17 00:00:00 2001
From: Yanan Cao <ycao@google.com>
Date: Tue, 18 Dec 2018 18:27:58 -0800
Subject: [PATCH 793/873] Replace assert_called_once, which is not available
 for < python 3.6

PiperOrigin-RevId: 226096904
---
 tensorflow/contrib/compiler/xla_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/contrib/compiler/xla_test.py b/tensorflow/contrib/compiler/xla_test.py
index a85b2dd155..c4384dcde7 100644
--- a/tensorflow/contrib/compiler/xla_test.py
+++ b/tensorflow/contrib/compiler/xla_test.py
@@ -315,7 +315,7 @@ class XlaDecoratorTest(test.TestCase, parameterized.TestCase):
       estimator_spec = model_fn(
           features=features, labels=labels, mode=_TRAIN, params=params or {})
 
-      mock_xla_compile.assert_called_once()
+      self.assertEqual(mock_xla_compile.call_count, 1)
       self.assertEqual(estimator_spec.mode, _TRAIN)
 
       with self.test_session() as sess:
-- 
GitLab


From f93128a24f7d0f95cbd3b328b237d4087358fa90 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 18:34:18 -0800
Subject: [PATCH 794/873] Internal change

PiperOrigin-RevId: 226097563
---
 tensorflow/python/keras/engine/training_test.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index 887e3b84b5..345673a84d 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -895,9 +895,6 @@ class LossWeightingTest(keras_parameterized.TestCase):
     class_weight = dict([(i, 1.) for i in range(num_classes)])
     class_weight[weighted_class] = weight
 
-    sample_weight = np.ones((y_train.shape[0]))
-    sample_weight[int_y_train == weighted_class] = 2.
-
     model.fit(
         x_train,
         y_train,
@@ -905,7 +902,7 @@ class LossWeightingTest(keras_parameterized.TestCase):
         epochs=epochs // 3,
         verbose=0,
         class_weight=class_weight,
-        validation_data=(x_train, y_train, sample_weight))
+        validation_data=(x_train, y_train))
     model.fit(
         x_train,
         y_train,
-- 
GitLab


From c23bfbcae43da38ef8225db66138e7145afaae4a Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Tue, 18 Dec 2018 18:47:42 -0800
Subject: [PATCH 795/873] Remove unnecessary use of macros; NFC

PiperOrigin-RevId: 226098859
---
 .../xla/service/cpu/dot_op_emitter.cc         | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index a33035ad10..37cefcb2e8 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -1405,16 +1405,20 @@ Status DotOpEmitter::EmitScalarDot() {
   llvm::Value* rhs_value =
       rhs_array_.EmitReadArrayElement(/*index=*/element_index, b_);
   if (ShapeUtil::ElementIsComplex(lhs_array_.GetShape())) {
-#define REAL(x) b_->CreateExtractValue(x, {0})
-#define IMAG(x) b_->CreateExtractValue(x, {1})
-    llvm::Value* real =
-        b_->CreateFSub(b_->CreateFMul(REAL(lhs_value), REAL(rhs_value)),
-                       b_->CreateFMul(IMAG(lhs_value), IMAG(rhs_value)));
-    llvm::Value* imag =
-        b_->CreateFAdd(b_->CreateFMul(REAL(lhs_value), IMAG(rhs_value)),
-                       b_->CreateFMul(IMAG(lhs_value), REAL(rhs_value)));
-#undef IMAG
-#undef REAL
+    auto get_real = [&](llvm::Value* x) {
+      return b_->CreateExtractValue(x, {0});
+    };
+
+    auto get_imag = [&](llvm::Value* x) {
+      return b_->CreateExtractValue(x, {1});
+    };
+
+    llvm::Value* real = b_->CreateFSub(
+        b_->CreateFMul(get_real(lhs_value), get_real(rhs_value)),
+        b_->CreateFMul(get_imag(lhs_value), get_imag(rhs_value)));
+    llvm::Value* imag = b_->CreateFAdd(
+        b_->CreateFMul(get_real(lhs_value), get_imag(rhs_value)),
+        b_->CreateFMul(get_imag(lhs_value), get_real(rhs_value)));
     result = llvm::ConstantAggregateZero::get(lhs_array_.GetElementLlvmType());
     result = b_->CreateInsertValue(result, real, {0});
     result = b_->CreateInsertValue(result, imag, {1});
-- 
GitLab


From eb1d13e031069994b9deb55b79f704e2c840dcde Mon Sep 17 00:00:00 2001
From: Jeremy Lau <lauj@google.com>
Date: Tue, 18 Dec 2018 18:48:16 -0800
Subject: [PATCH 796/873] Internal change.

PiperOrigin-RevId: 226098910
---
 tensorflow/c/BUILD | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 9d267e9e59..3e1f220db2 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -252,6 +252,12 @@ tf_cc_test(
     name = "c_test",
     srcs = ["c_test.c"],
     extra_copts = ["-std=c11"],
+    tags = [
+        # TODO(b/121223209): Re-enable after fixing asan memory leaks and MacOS
+        # build errors.
+        "noasan",
+        "no_mac",
+    ],
     deps = [
         ":c_api",
         ":c_api_experimental",
-- 
GitLab


From aeee280ad2054235ff3038ad8204037c7354db37 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 19:32:26 -0800
Subject: [PATCH 797/873] Fix build after previous change broke when
 CUDNN_VERSION < 7201.

PiperOrigin-RevId: 226103009
---
 tensorflow/stream_executor/cuda/cuda_dnn.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 833d68ed21..249bad0c10 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -1267,6 +1267,7 @@ class CudnnRnnSequenceTensorDescriptor
   static port::StatusOr<CudnnRnnSequenceTensorDescriptor> Create(
       CUDAExecutor* parent, int max_seq_length, int batch_size, int data_size,
       const absl::Span<const int>& seq_lengths, cudnnDataType_t data_type) {
+#if CUDNN_VERSION >= 7201
     CHECK_GT(max_seq_length, 0);
     int dims[] = {batch_size, data_size, 1};
     int strides[] = {dims[1] * dims[2], dims[2], 1};
@@ -1276,7 +1277,6 @@ class CudnnRnnSequenceTensorDescriptor
         /*nbDims=*/sizeof(dims) / sizeof(dims[0]), /*dimA=*/dims,
         /*strideA=*/strides));
     const int* seq_lengths_array = seq_lengths.data();
-#if CUDNN_VERSION >= 7201
     RNNDataDescriptor data_desc = CreateRNNDataDescriptor();
     float padding_fill = 0.0f;
     RETURN_IF_CUDNN_ERROR(cudnnSetRNNDataDescriptor(
-- 
GitLab


From b00f714bb4750fc9d3fdf4e2b80fd341ce528a8b Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 18 Dec 2018 19:40:05 -0800
Subject: [PATCH 798/873] Automated rollback of commit
 8e7850f45a61cc45770834e79dbcddc133a78605

PiperOrigin-RevId: 226103744
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 271 ------------------
 tensorflow/core/ops/ops.pbtxt                 | 271 ------------------
 2 files changed, 542 deletions(-)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 9b7776bbf3..602d4a009d 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -16219,152 +16219,6 @@ op {
   }
   is_stateful: true
 }
-op {
-  name: "CudnnRNNBackpropV3"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_h"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_c"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "params"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "sequence_lengths"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_h"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_c"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_backprop"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_h_backprop"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_c_backprop"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "reserve_space"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "host_reserved"
-    type: DT_INT8
-  }
-  output_arg {
-    name: "input_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "input_h_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "input_c_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "params_backprop"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "rnn_mode"
-    type: "string"
-    default_value {
-      s: "lstm"
-    }
-    allowed_values {
-      list {
-        s: "rnn_relu"
-        s: "rnn_tanh"
-        s: "lstm"
-        s: "gru"
-      }
-    }
-  }
-  attr {
-    name: "input_mode"
-    type: "string"
-    default_value {
-      s: "linear_input"
-    }
-    allowed_values {
-      list {
-        s: "linear_input"
-        s: "skip_input"
-        s: "auto_select"
-      }
-    }
-  }
-  attr {
-    name: "direction"
-    type: "string"
-    default_value {
-      s: "unidirectional"
-    }
-    allowed_values {
-      list {
-        s: "unidirectional"
-        s: "bidirectional"
-      }
-    }
-  }
-  attr {
-    name: "dropout"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  is_stateful: true
-}
 op {
   name: "CudnnRNNCanonicalToParams"
   input_arg {
@@ -16807,131 +16661,6 @@ op {
   }
   is_stateful: true
 }
-op {
-  name: "CudnnRNNV3"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_h"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_c"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "params"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "sequence_lengths"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output_h"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output_c"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "reserve_space"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "host_reserved"
-    type: DT_INT8
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "rnn_mode"
-    type: "string"
-    default_value {
-      s: "lstm"
-    }
-    allowed_values {
-      list {
-        s: "rnn_relu"
-        s: "rnn_tanh"
-        s: "lstm"
-        s: "gru"
-      }
-    }
-  }
-  attr {
-    name: "input_mode"
-    type: "string"
-    default_value {
-      s: "linear_input"
-    }
-    allowed_values {
-      list {
-        s: "linear_input"
-        s: "skip_input"
-        s: "auto_select"
-      }
-    }
-  }
-  attr {
-    name: "direction"
-    type: "string"
-    default_value {
-      s: "unidirectional"
-    }
-    allowed_values {
-      list {
-        s: "unidirectional"
-        s: "bidirectional"
-      }
-    }
-  }
-  attr {
-    name: "dropout"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "is_training"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  is_stateful: true
-}
 op {
   name: "Cumprod"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 1157380b8f..779d4297c7 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -7202,152 +7202,6 @@ op {
   }
   is_stateful: true
 }
-op {
-  name: "CudnnRNNBackpropV3"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_h"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_c"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "params"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "sequence_lengths"
-    type: DT_INT32
-  }
-  input_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_h"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_c"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_backprop"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_h_backprop"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "output_c_backprop"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "reserve_space"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "host_reserved"
-    type: DT_INT8
-  }
-  output_arg {
-    name: "input_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "input_h_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "input_c_backprop"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "params_backprop"
-    type_attr: "T"
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "rnn_mode"
-    type: "string"
-    default_value {
-      s: "lstm"
-    }
-    allowed_values {
-      list {
-        s: "rnn_relu"
-        s: "rnn_tanh"
-        s: "lstm"
-        s: "gru"
-      }
-    }
-  }
-  attr {
-    name: "input_mode"
-    type: "string"
-    default_value {
-      s: "linear_input"
-    }
-    allowed_values {
-      list {
-        s: "linear_input"
-        s: "skip_input"
-        s: "auto_select"
-      }
-    }
-  }
-  attr {
-    name: "direction"
-    type: "string"
-    default_value {
-      s: "unidirectional"
-    }
-    allowed_values {
-      list {
-        s: "unidirectional"
-        s: "bidirectional"
-      }
-    }
-  }
-  attr {
-    name: "dropout"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  is_stateful: true
-}
 op {
   name: "CudnnRNNCanonicalToParams"
   input_arg {
@@ -7790,131 +7644,6 @@ op {
   }
   is_stateful: true
 }
-op {
-  name: "CudnnRNNV3"
-  input_arg {
-    name: "input"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_h"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "input_c"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "params"
-    type_attr: "T"
-  }
-  input_arg {
-    name: "sequence_lengths"
-    type: DT_INT32
-  }
-  output_arg {
-    name: "output"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output_h"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "output_c"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "reserve_space"
-    type_attr: "T"
-  }
-  output_arg {
-    name: "host_reserved"
-    type: DT_INT8
-  }
-  attr {
-    name: "T"
-    type: "type"
-    allowed_values {
-      list {
-        type: DT_HALF
-        type: DT_FLOAT
-        type: DT_DOUBLE
-      }
-    }
-  }
-  attr {
-    name: "rnn_mode"
-    type: "string"
-    default_value {
-      s: "lstm"
-    }
-    allowed_values {
-      list {
-        s: "rnn_relu"
-        s: "rnn_tanh"
-        s: "lstm"
-        s: "gru"
-      }
-    }
-  }
-  attr {
-    name: "input_mode"
-    type: "string"
-    default_value {
-      s: "linear_input"
-    }
-    allowed_values {
-      list {
-        s: "linear_input"
-        s: "skip_input"
-        s: "auto_select"
-      }
-    }
-  }
-  attr {
-    name: "direction"
-    type: "string"
-    default_value {
-      s: "unidirectional"
-    }
-    allowed_values {
-      list {
-        s: "unidirectional"
-        s: "bidirectional"
-      }
-    }
-  }
-  attr {
-    name: "dropout"
-    type: "float"
-    default_value {
-      f: 0
-    }
-  }
-  attr {
-    name: "seed"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "seed2"
-    type: "int"
-    default_value {
-      i: 0
-    }
-  }
-  attr {
-    name: "is_training"
-    type: "bool"
-    default_value {
-      b: true
-    }
-  }
-  is_stateful: true
-}
 op {
   name: "Cumprod"
   input_arg {
-- 
GitLab


From 0b2eafafb0ddf3e8968728fed310d4fb8ffde10d Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Tue, 18 Dec 2018 19:41:57 -0800
Subject: [PATCH 799/873] Disable keras correctness test for default
 distribution strategy.

PiperOrigin-RevId: 226103886
---
 tensorflow/contrib/distribute/python/keras_test.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index c53e76f922..9590d0cf7c 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -26,6 +26,7 @@ from tensorflow.contrib.distribute.python import mirrored_strategy
 from tensorflow.contrib.distribute.python import tpu_strategy
 from tensorflow.python import keras
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import values
 from tensorflow.python.eager import test
 from tensorflow.python.estimator import keras as keras_lib
@@ -1305,6 +1306,11 @@ class TestDistributionStrategyCorrectness(test.TestCase,
   @combinations.generate(strategy_and_input_combinations())
   def test_correctness(self, distribution, use_numpy, use_validation_data):
 
+    # TODO(b/121224478): This test is flaky with default strategy. Remove this
+    # once the issue is fixed.
+    if isinstance(distribution, distribute_lib._DefaultDistributionStrategy):  # pylint: disable=protected-access
+      self.skipTest('Disable the test for default strategy.')
+
     with self.cached_session():
       default_tolerance = 1e-5
       tol_table = {}
-- 
GitLab


From f73a4f9951d065c3586994b0850b4d2ce843a252 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 20:19:06 -0800
Subject: [PATCH 800/873] Update ops-related pbtxt files.

PiperOrigin-RevId: 226107101
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 271 ++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 271 ++++++++++++++++++
 2 files changed, 542 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 602d4a009d..9b7776bbf3 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -16219,6 +16219,152 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNBackpropV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  output_arg {
+    name: "input_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_h_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_c_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "params_backprop"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "CudnnRNNCanonicalToParams"
   input_arg {
@@ -16661,6 +16807,131 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "Cumprod"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 779d4297c7..1157380b8f 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -7202,6 +7202,152 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNBackpropV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_h_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "output_c_backprop"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  output_arg {
+    name: "input_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_h_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "input_c_backprop"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "params_backprop"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "CudnnRNNCanonicalToParams"
   input_arg {
@@ -7644,6 +7790,131 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CudnnRNNV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_h"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "input_c"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "params"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "sequence_lengths"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_h"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output_c"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "reserve_space"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "host_reserved"
+    type: DT_INT8
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "rnn_mode"
+    type: "string"
+    default_value {
+      s: "lstm"
+    }
+    allowed_values {
+      list {
+        s: "rnn_relu"
+        s: "rnn_tanh"
+        s: "lstm"
+        s: "gru"
+      }
+    }
+  }
+  attr {
+    name: "input_mode"
+    type: "string"
+    default_value {
+      s: "linear_input"
+    }
+    allowed_values {
+      list {
+        s: "linear_input"
+        s: "skip_input"
+        s: "auto_select"
+      }
+    }
+  }
+  attr {
+    name: "direction"
+    type: "string"
+    default_value {
+      s: "unidirectional"
+    }
+    allowed_values {
+      list {
+        s: "unidirectional"
+        s: "bidirectional"
+      }
+    }
+  }
+  attr {
+    name: "dropout"
+    type: "float"
+    default_value {
+      f: 0
+    }
+  }
+  attr {
+    name: "seed"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "seed2"
+    type: "int"
+    default_value {
+      i: 0
+    }
+  }
+  attr {
+    name: "is_training"
+    type: "bool"
+    default_value {
+      b: true
+    }
+  }
+  is_stateful: true
+}
 op {
   name: "Cumprod"
   input_arg {
-- 
GitLab


From d75dde919bb3a6ad86167a16aca66d7ef0ffe69a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Tue, 18 Dec 2018 20:33:52 -0800
Subject: [PATCH 801/873] Change implementation of DistributedValues to be a
 device map and tuple of values instead of a dict mapping devices to values.
 Values in a DistributedValues object are now indexed by replica id. Lays the
 groundwork for supporting multiple logical devices (model parallelism).

PiperOrigin-RevId: 226108301
---
 .../python/collective_all_reduce_strategy.py  |  34 +-
 .../collective_all_reduce_strategy_test.py    |  11 +-
 .../python/cross_device_ops_test.py           |  52 +-
 .../python/cross_device_utils_test.py         |   3 +-
 .../contrib/distribute/python/keras_test.py   |  10 +-
 .../distribute/python/mirrored_strategy.py    |  12 +-
 .../python/mirrored_strategy_multigpu_test.py |  18 +-
 .../distribute/python/one_device_strategy.py  |  15 +-
 .../python/parameter_server_strategy.py       |  72 +-
 .../python/parameter_server_strategy_test.py  |  11 +-
 .../distribute/python/strategy_test_lib.py    |   7 +-
 .../contrib/distribute/python/tpu_strategy.py |  91 +-
 .../contrib/distribute/python/values_test.py  | 239 ++---
 .../python/distribute/cross_device_ops.py     | 157 ++--
 .../python/distribute/cross_device_utils.py   |   2 +-
 .../python/distribute/distribute_lib.py       |   6 +-
 .../python/distribute/mirrored_strategy.py    | 386 +++++----
 tensorflow/python/distribute/values.py        | 819 ++++++++++++------
 18 files changed, 1152 insertions(+), 793 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
index 346513dc58..e6bbf0c308 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
@@ -130,6 +130,8 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
 
     self._collective_keys = cross_device_utils.CollectiveKeys()
     self._initialize_local(local_devices)
+    self._input_workers = values.InputWorkers(
+        self._device_map, [(self._worker_device, self.worker_devices)])
     self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
         num_workers=self._num_workers,
         num_gpus_per_worker=num_gpus_per_worker,
@@ -151,13 +153,18 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
 
   def _create_variable(self, next_creator, *args, **kwargs):
     colocate_with = kwargs.pop("colocate_with", None)
-    devices = self._get_devices_from(colocate_with)
-    group_size = len(devices) * self._num_workers
-    group_key = self._collective_keys.get_group_key(self._devices)
+    if colocate_with is None:
+      device_map = self._device_map
+      logical_device = 0  # TODO(josh11b): Get logical device from scope here.
+    else:
+      device_map = colocate_with.device_map
+      logical_device = colocate_with.logical_device
+    group_size = device_map.num_replicas_in_graph * self._num_workers
+    group_key = self._collective_keys.get_group_key(self.worker_devices)
 
     def _real_mirrored_creator(devices, *args, **kwargs):
       """Creates one MirroredVariable on the current worker."""
-      index = {}
+      value_list = []
       unique_var_name = ops.get_default_graph().unique_name(
           kwargs["name"], mark_as_used=False).rstrip("/")
       collective_instance_key = self._collective_keys.get_instance_key(
@@ -174,7 +181,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
         with ops.device(d):
           if i > 0:
             # Give replicas meaningful distinct names:
-            var0name = index[devices[0]].name.split(":")[0]
+            var0name = value_list[0].name.split(":")[0]
             # We append a / to variable names created on replicas with id > 0 to
             # ensure that we ignore the name scope and instead use the given
             # name as the absolute name of the variable.
@@ -210,22 +217,23 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
             assert unique_var_name == actual_var_name, "%r vs %r" % (
                 unique_var_name, actual_var_name)
           assert not isinstance(v, values.DistributedVariable)
-          index[d] = v
-      return index
+          value_list.append(v)
+      return value_list
 
     # pylint: disable=protected-access
     return mirrored_strategy._create_mirrored_variable(
-        devices, _real_mirrored_creator, *args, **kwargs)
+        device_map, logical_device, _real_mirrored_creator, *args, **kwargs)
 
   def _distribute_dataset(self, dataset_fn):
     """Distributes the dataset to each local GPU."""
     # TODO(yuefengz): shard the dataset.
+    worker_index = 0
     return values.PerReplicaDataset(
-        self._call_dataset_fn(dataset_fn), self._devices, True)
+        self._call_dataset_fn(dataset_fn), self._input_workers, worker_index,
+        prefetch_on_device=True)
 
   def _make_dataset_iterator(self, dataset):
-    worker_device_pairs = [(self._worker_device, self._devices)]
-    return values.DatasetIterator(dataset, worker_device_pairs,
+    return values.DatasetIterator(dataset, self._input_workers,
                                   self._num_replicas_in_sync)
 
   def _make_input_fn_iterator(
@@ -244,7 +252,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
         num_replicas_in_sync=self._num_replicas_in_sync)
 
     return values.InputFunctionIterator(
-        input_fn, [(self._worker_device, self._devices)], [input_context])
+        input_fn, self._input_workers, [input_context])
 
   def _configure(self,
                  session_config=None,
@@ -332,7 +340,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
 
   @property
   def _num_replicas_in_sync(self):
-    return len(self._devices) * self._num_workers
+    return len(self.worker_devices) * self._num_workers
 
   # TODO(priyag): Delete this once all strategies use global batch size.
   @property
diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
index 74c69982b9..0fb672dded 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy_test.py
@@ -252,21 +252,22 @@ class CollectiveAllReduceStrategyTestBase(
 
       for expected_value in expected_values:
         next_element = iterator.get_next()
-        computed_value = sess.run(
-            [values.select_device(d, next_element) for d in devices])
+        computed_value = sess.run([values.select_replica(r, next_element)
+                                   for r in range(len(devices))])
         self.assertEqual(expected_value, computed_value)
 
       with self.assertRaises(errors.OutOfRangeError):
         next_element = iterator.get_next()
-        sess.run([values.select_device(d, next_element) for d in devices])
+        sess.run([values.select_replica(r, next_element)
+                  for r in range(len(devices))])
 
       # After re-initializing the iterator, should be able to iterate again.
       sess.run(iterator.initialize())
 
       for expected_value in expected_values:
         next_element = iterator.get_next()
-        computed_value = sess.run(
-            [values.select_device(d, next_element) for d in devices])
+        computed_value = sess.run([values.select_replica(r, next_element)
+                                   for r in range(len(devices))])
         self.assertEqual(expected_value, computed_value)
 
 
diff --git a/tensorflow/contrib/distribute/python/cross_device_ops_test.py b/tensorflow/contrib/distribute/python/cross_device_ops_test.py
index d6e9521c1c..54cce29883 100644
--- a/tensorflow/contrib/distribute/python/cross_device_ops_test.py
+++ b/tensorflow/contrib/distribute/python/cross_device_ops_test.py
@@ -40,8 +40,16 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 
 
+def _get_devices(devices):
+  if isinstance(devices, (tuple, list)):
+    return tuple(device_util.resolve(d) for d in devices)
+  elif isinstance(devices, value_lib.DistributedValues):
+    return devices.devices
+  return (device_util.resolve(devices),)
+
+
 def _make_per_replica(values, devices, regroup=False):
-  devices = cross_device_ops_lib.get_devices_from(devices)
+  devices = _get_devices(devices)
   assert len(values) == len(devices)
 
   # We simulate the result of regroup called on PerReplica which strips the
@@ -51,12 +59,12 @@ def _make_per_replica(values, devices, regroup=False):
       placed_v = array_ops.identity(values[0])
     return placed_v
 
-  index = {}
+  index = []
   for d, v in zip(devices, values):
     with ops.device(d):
       placed_v = array_ops.identity(v)
-    index[d] = placed_v
-  return value_lib.PerReplica(index)
+    index.append(placed_v)
+  return value_lib.PerReplica(value_lib.ReplicaDeviceMap(devices), index)
 
 
 # pylint: disable=g-doc-args,g-doc-return-or-yield
@@ -66,9 +74,9 @@ def _fake_mirrored(value, devices):
   All components of the returned Mirrored have the same objects, which is not
   true in reality.
   """
-  devices = cross_device_ops_lib.get_devices_from(devices)
-  return value_lib.Mirrored(
-      {d: v for d, v in zip(devices, [value] * len(devices))})
+  devices = _get_devices(devices)
+  return value_lib.Mirrored(value_lib.ReplicaDeviceMap(devices),
+                            [value] * len(devices))
 
 
 def _make_indexed_slices(values, indices, dense_shape, device):
@@ -81,9 +89,9 @@ def _make_indexed_slices(values, indices, dense_shape, device):
 
 
 def _make_mirrored_indexed_slices(devices, values, indices, dense_shape):
-  return value_lib.Mirrored({
-      d: _make_indexed_slices(values, indices, dense_shape, d) for d in devices
-  })
+  values = [_make_indexed_slices(values, indices, dense_shape, d)
+            for d in devices]
+  return value_lib.Mirrored(value_lib.ReplicaDeviceMap(devices), values)
 
 
 _cpu_device = "/device:CPU:0"
@@ -107,16 +115,16 @@ class CrossDeviceOpsTestBase(test.TestCase, parameterized.TestCase):
     else:
       self.assertEqual(type(left), type(right))
       self.assertEqual(set(left.devices), set(right.devices))
-      if isinstance(list(left._index.values())[0], ops.IndexedSlices):
-        for (d, v) in left._index.items():
-          self._assert_indexed_slices_equal(v, right._index[d])
+      if isinstance(left.values[0], ops.IndexedSlices):
+        for d in left.devices:
+          self._assert_indexed_slices_equal(left.get(d), right.get(d))
       elif context.executing_eagerly():
-        self.assertEqual([v.numpy() for v in left._index.values()],
-                         list(right._index.values()))
+        self.assertEqual([v.numpy() for v in left.values],
+                         list(right.values))
       else:
         with self.cached_session() as sess:
           self.assertEqual(
-              sess.run(list(left._index.values())), list(right._index.values()))
+              sess.run(list(left.values)), list(right.values))
 
   def _testReductionAndBroadcast(self, cross_device_ops, distribution):
     devices = distribution.extended.worker_devices
@@ -280,7 +288,8 @@ class SingleWorkerCrossDeviceOpsTest(CrossDeviceOpsTestBase):
     devices = ["/cpu:0", "/gpu:0"]
     t0 = _make_indexed_slices([[1., 2.]], [1], [5, 2], devices[0])
     t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], [5, 2], devices[1])
-    per_replica = value_lib.PerReplica({devices[0]: t0, devices[1]: t1})
+    per_replica = value_lib.PerReplica(
+        value_lib.ReplicaDeviceMap(devices), (t0, t1))
     result = cross_device_ops_lib._simple_reduce(
         per_replica, devices[0], math_ops.add_n, reduce_util.ReduceOp.SUM)
 
@@ -314,7 +323,8 @@ class SingleWorkerCrossDeviceOpsTest(CrossDeviceOpsTestBase):
     t0 = _make_indexed_slices([[1., 2.]], [1], dense_shape, devices[0])
     t1 = _make_indexed_slices(
         [[3., 4.], [5., 6.]], [1, 3], dense_shape, devices[1])
-    per_replica = value_lib.PerReplica({devices[0]: t0, devices[1]: t1})
+    per_replica = value_lib.PerReplica(
+        value_lib.ReplicaDeviceMap(devices), (t0, t1))
 
     if batch_reduce:
       result = cross_device_ops_instance.batch_reduce(
@@ -474,8 +484,8 @@ class MultiWorkerCollectiveAllReduceTest(
       run_options.experimental.collective_graph_key = 6
 
       left_values = np.array(
-          sess.run(list(left._index.values()), options=run_options)).flatten()
-      right_values = np.array(list(right._index.values())).flatten()
+          sess.run(list(left.values), options=run_options)).flatten()
+      right_values = np.array(list(right.values)).flatten()
       self.assertEqual(len(left_values), len(right_values))
       for l, r in zip(left_values, right_values):
         self.assertEqual(l, r)
@@ -496,7 +506,7 @@ class MultiWorkerCollectiveAllReduceTest(
       # Collective ops doesn't support scalar tensors, so we have to construct
       # 1-d tensors.
       values = [constant_op.constant([float(d)]) for d in range(len(devices))]
-      per_replica = _make_per_replica(values, devices, regroup=True)
+      per_replica = _make_per_replica(values, devices)
       mean = np.array([(len(devices) - 1.) / 2.])
 
       values_2 = [constant_op.constant([d + 1.0]) for d in range(len(devices))]
diff --git a/tensorflow/contrib/distribute/python/cross_device_utils_test.py b/tensorflow/contrib/distribute/python/cross_device_utils_test.py
index 2303a31677..275aac2eec 100644
--- a/tensorflow/contrib/distribute/python/cross_device_utils_test.py
+++ b/tensorflow/contrib/distribute/python/cross_device_utils_test.py
@@ -103,7 +103,8 @@ class IndexedSlicesUtilsTest(test.TestCase, parameterized.TestCase):
         constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
     t1 = math_ops._as_indexed_slices(
         constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
-    per_replica = value_lib.PerReplica({"/gpu:0": t0, "/cpu:0": t1})
+    device_map = value_lib.ReplicaDeviceMap(("/gpu:0", "/cpu:0"))
+    per_replica = value_lib.PerReplica(device_map, (t0, t1))
     self.assertTrue(cross_device_utils.contains_indexed_slices(per_replica))
 
   @combinations.generate(combinations.combine(
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 9590d0cf7c..b91c27e184 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1058,8 +1058,9 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
     with self.cached_session():
       a = constant_op.constant([1, 2], shape=(1, 2))
       b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
-      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
-      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
+      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
+      x = values.DistributedValues(device_map, (a, b))
+      y = values.DistributedValues(device_map, (a, a))
       with distribution.scope():
         # Removed device and input tensor shape details from the error message
         # since the order of the device and the corresponding input tensor shape
@@ -1081,8 +1082,9 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
     with self.cached_session():
       a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
       b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
-      x = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': b})
-      y = values.DistributedValues({'/device:CPU:0': a, '/device:GPU:0': a})
+      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
+      x = values.DistributedValues(device_map, (a, b))
+      y = values.DistributedValues(device_map, (a, a))
       with distribution.scope():
         # Removed device and input tensor dtype details from the error message
         # since the order of the device and the corresponding input tensor dtype
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py
index 24399db652..71e50b83b0 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py
@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import functools
 
-from tensorflow.python.distribute import device_util
 from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import mirrored_strategy
 from tensorflow.python.distribute import values
@@ -136,21 +135,16 @@ class MirroredExtended(CoreMirroredExtended):
     Returns:
       An `InputIterator` which returns inputs for each step of the computation.
     """
-    if self._local_mode:
-      worker = device_util.canonicalize("/device:CPU:0")
-      worker_device_pairs = [(worker, self._devices)]
-    else:
-      worker_device_pairs = self._worker_devices
-    return values.DatasetIterator(dataset, worker_device_pairs)
+    return values.DatasetIterator(dataset, self._input_workers)
 
   def _distribute_dataset(self, dataset_fn):
     if self._local_mode:
       return values.PerReplicaDataset(
-          self._call_dataset_fn(dataset_fn), self._devices)
+          self._call_dataset_fn(dataset_fn), self._input_workers, 0)
     else:
       return values.MultiWorkerDataset(
           functools.partial(self._call_dataset_fn, dataset_fn),
-          self._worker_devices,
+          self._input_workers,
           auto_shard=self._auto_shard_dataset)
 
   # TODO(priyag): Delete this once all strategies use global batch size.
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index fd6841266c..f4becf1d62 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -558,10 +558,8 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
       return v
 
     with distribution.scope():
-      names = values.DistributedValues({
-          "/device:CPU:0": "foo",
-          "/device:GPU:0": "bar"
-      })
+      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+      names = values.DistributedValues(device_map, ("foo", "bar"))
       with self.assertRaises(RuntimeError):
         _ = distribution.extended.call_for_each_replica(model_fn, args=(names,))
 
@@ -1210,9 +1208,9 @@ class MirroredStrategyDefunTest(test.TestCase):
 
       result = distribution.extended.call_for_each_replica(
           model_fn, args=[mock_model] + inputs)
-      for device in devices:
-        device_result = values.select_device(device, result)
-        device_expected_result = values.select_device(device, expected_result)
+      for r in range(len(devices)):
+        device_result = values.select_replica(r, result)
+        device_expected_result = values.select_replica(r, expected_result)
         self.assertAllClose(device_expected_result,
                             self.evaluate(device_result))
 
@@ -1293,9 +1291,9 @@ class MirroredStrategyDefunTest(test.TestCase):
     def fn1(mock_model, factor):
       return mock_model(factor)
 
-    factors = values.PerReplica({"CPU:0": 5.0, "GPU:0": 3.0})
-    expected_result = values.PerReplica({"CPU:0": 5.0 * 1.25,
-                                         "GPU:0": 3.0 * 1.25})
+    device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+    factors = values.PerReplica(device_map, (5.0, 3.0))
+    expected_result = values.PerReplica(device_map, (5.0 * 1.25, 3.0 * 1.25))
     self._call_and_check(distribution, fn1, [factors], expected_result, [fn1])
 
   def testTrain(self, distribution):
diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py
index fdbfba4e04..4b60f3c786 100644
--- a/tensorflow/contrib/distribute/python/one_device_strategy.py
+++ b/tensorflow/contrib/distribute/python/one_device_strategy.py
@@ -51,6 +51,10 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended):
     super(OneDeviceExtended, self).__init__(container_strategy)
     self._device = device
     self._default_device = device
+    worker = device_util.canonicalize("/device:CPU:0")
+    worker_device_pairs = [(worker, [self._device])]
+    device_map = values.SingleDeviceMap(device)
+    self._input_workers = values.InputWorkers(device_map, worker_device_pairs)
 
   def _create_variable(self, next_creator, *args, **kwargs):
     colocate_with = kwargs.pop("colocate_with", None)
@@ -69,23 +73,18 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended):
 
   def _make_dataset_iterator(self, dataset):
     """Make iterator from dataset without splitting the batch."""
-    worker = device_util.canonicalize("/device:CPU:0")
-    worker_device_pairs = [(worker, [self._device])]
-    return values.DatasetIterator(dataset, worker_device_pairs)
+    return values.DatasetIterator(dataset, self._input_workers)
 
   def _distribute_dataset(self, dataset_fn):
     return values.PerReplicaDataset(
-        self._call_dataset_fn(dataset_fn), [self._device])
+        self._call_dataset_fn(dataset_fn), self._input_workers, 0)
 
   def _make_input_fn_iterator(
       self,
       input_fn,
       replication_mode=distribute_lib.InputReplicationMode.PER_WORKER):
-    worker = device_util.canonicalize("/device:CPU:0")
-    worker_device_pairs = [(worker, [self._device])]
     return values.InputFunctionIterator(
-        input_fn, worker_device_pairs,
-        [distribute_lib.InputContext()])
+        input_fn, self._input_workers, [distribute_lib.InputContext()])
 
   def _broadcast_to(self, tensor, destinations):
     del destinations
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
index ca51b07be6..5029d59641 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
@@ -139,22 +139,22 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
                        "`task_type` and `task_id`")
     cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
 
-    self._worker_device = "/job:%s/task:%d" % (self._task_type, self._task_id)
+    worker_device = "/job:%s/task:%d" % (self._task_type, self._task_id)
 
     # Define compute devices which is a list of device strings and one for each
     # replica. When there are GPUs, replicate operations on these GPUs.
     # Otherwise, place operations on CPU.
     if num_gpus_per_worker > 0:
-      self._compute_devices = tuple(
-          "%s/device:GPU:%d" % (self._worker_device, i)
+      compute_devices = tuple(
+          "%s/device:GPU:%d" % (worker_device, i)
           for i in range(num_gpus_per_worker)
       )
     else:
-      self._compute_devices = (self._worker_device,)
+      compute_devices = (worker_device,)
 
-    self._compute_devices = tuple(
-        map(device_util.resolve, self._compute_devices))
-    self._canonical_compute_device_set = set(self._compute_devices)
+    self._device_map = values.ReplicaDeviceMap(compute_devices)
+    self._input_workers = values.InputWorkers(
+        self._device_map, [(worker_device, compute_devices)])
 
     # In distributed mode, place variables on ps jobs in a round-robin fashion.
     # Note that devices returned from `replica_device_setter` are not
@@ -169,7 +169,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
       raise ValueError("The cluster spec needs to have `ps` jobs.")
     self._variable_device = device_setter.replica_device_setter(
         ps_tasks=num_ps_replicas,
-        worker_device=self._worker_device,
+        worker_device=worker_device,
         merge_devices=True,
         cluster=cluster_spec)
 
@@ -181,7 +181,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
 
     # Add a default device so that ops without specified devices will not end up
     # on other workers.
-    self._default_device = self._worker_device
+    self._default_device = worker_device
 
     self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type,
                                                 task_id)
@@ -192,31 +192,31 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     logging.info(
         "Multi-worker ParameterServerStrategy with "
         "cluster_spec = %r, task_type = %r, task_id = %r, "
-        "num_ps_replicas = %r, is_chief = %r, compute_devices = %r, "
+        "num_ps_replicas = %r, is_chief = %r, device_map = %r, "
         "variable_device = %r", cluster_spec.as_dict(), task_type, task_id,
-        num_ps_replicas, self._is_chief, self._compute_devices,
+        num_ps_replicas, self._is_chief, self._device_map,
         self._variable_device)
 
   def _initialize_local(self, num_gpus_per_worker):
     """Initialize internal devices for local training."""
-    self._worker_device = device_util.canonicalize("/device:CPU:0")
+    worker_device = device_util.canonicalize("/device:CPU:0")
     # Define compute devices which is a list of device strings and one for each
     # replica. When there are GPUs, replicate operations on these GPUs.
     # Otherwise, place operations on CPU.
     if num_gpus_per_worker > 0:
-      self._compute_devices = tuple(
+      compute_devices = tuple(
           map("/device:GPU:{}".format, range(num_gpus_per_worker)))
     else:
-      self._compute_devices = (_LOCAL_CPU,)
+      compute_devices = (_LOCAL_CPU,)
 
-    self._compute_devices = tuple(
-        map(device_util.resolve, self._compute_devices))
-    self._canonical_compute_device_set = set(self._compute_devices)
+    self._device_map = values.ReplicaDeviceMap(compute_devices)
+    self._input_workers = values.InputWorkers(
+        self._device_map, [(worker_device, compute_devices)])
 
     # If there is only one GPU, put everything on that GPU. Otherwise, place
     # variables on CPU.
     if num_gpus_per_worker == 1:
-      assert len(self._compute_devices) == 1
+      assert len(compute_devices) == 1
       self._variable_device = _LOCAL_GPU_0
       self._parameter_devices = (_LOCAL_GPU_0,)
     else:
@@ -230,16 +230,16 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
 
     logging.info(
         "ParameterServerStrategy with compute_devices = %r, "
-        "variable_device = %r", self._compute_devices, self._variable_device)
+        "variable_device = %r", compute_devices, self._variable_device)
 
   def _distribute_dataset(self, dataset_fn):
     """Distributes the dataset to each local GPU."""
     return values.PerReplicaDataset(
-        self._call_dataset_fn(dataset_fn), self._compute_devices, True)
+        self._call_dataset_fn(dataset_fn), self._input_workers, 0,
+        prefetch_on_device=True)
 
   def _make_dataset_iterator(self, dataset):
-    worker_device_pairs = [(self._worker_device, self._compute_devices)]
-    return values.DatasetIterator(dataset, worker_device_pairs,
+    return values.DatasetIterator(dataset, self._input_workers,
                                   self._num_replicas_in_sync)
 
   def _make_input_fn_iterator(
@@ -259,9 +259,8 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
         num_input_pipelines=num_input_pipelines,
         input_pipeline_id=input_pipeline_id,
         num_replicas_in_sync=self._num_replicas_in_sync)
-    worker_device_pairs = [(self._worker_device, self._compute_devices)]
     return values.InputFunctionIterator(
-        input_fn, worker_device_pairs, [input_context])
+        input_fn, self._input_workers, [input_context])
 
   def _broadcast_to(self, tensor, destinations):
     # This is both a fast path for Python constants, and a way to delay
@@ -272,7 +271,9 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     if isinstance(tensor, (float, int)):
       return tensor
     if not cross_device_ops_lib.check_destinations(destinations):
-      destinations = self._compute_devices
+      # TODO(josh11b): Use current logical device instead of 0 here.
+      destinations = values.LogicalDeviceSpec(
+          device_map=self._device_map, logical_device=0)
     return self._cross_device_ops.broadcast(tensor, destinations)
 
   def _allow_variable_partition(self):
@@ -338,7 +339,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
   def _call_for_each_replica(self, fn, args, kwargs):
     # pylint: disable=protected-access
     return mirrored_strategy._call_for_each_replica(
-        self._container_strategy(), fn, args, kwargs)
+        self._container_strategy(), self._device_map, fn, args, kwargs)
 
   def _verify_destinations_not_different_worker(self, destinations):
     if not self._cluster_spec:
@@ -350,14 +351,14 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
       if d_spec.job == self._task_type and d_spec.task != self._task_id:
         raise ValueError(
             "Cannot reduce to another worker: %r, current worker is %r" %
-            (d, self._worker_device))
+            (d, self._input_workers.worker_devices[0]))
 
   def _reduce_to(self, reduce_op, value, destinations):
     self._verify_destinations_not_different_worker(destinations)
     if not isinstance(value, values.DistributedValues):
       # pylint: disable=protected-access
       return cross_device_ops_lib.reduce_non_distributed_value(
-          self, reduce_op, value, destinations)
+          reduce_op, self._device_map, value, destinations)
     return self._cross_device_ops.reduce(
         reduce_op, value, destinations=destinations)
 
@@ -373,7 +374,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
     def _select_fn(x):  # pylint: disable=g-missing-docstring
       if isinstance(x, values.Mirrored):
         if len(x.devices) == 1:
-          return list(x._index.values())[0]  # pylint: disable=protected-access
+          return x.primary
         else:
           raise ValueError(
               "You cannot update variable with a Mirrored object with multiple "
@@ -415,10 +416,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
 
   def _unwrap(self, val):
     if isinstance(val, values.DistributedValues):
-      # Return in a deterministic order.
-      if set(val.devices) == self._canonical_compute_device_set:
-        return tuple(val.get(device=d) for d in self._compute_devices)
-      return tuple(val.get(device=d) for d in sorted(val.devices))
+      return val.values
     return (val,)
 
   def value_container(self, val):
@@ -493,11 +491,15 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
 
   @property
   def _num_replicas_in_sync(self):
-    return len(self._compute_devices)
+    return self._device_map.num_replicas_in_graph
 
   @property
   def worker_devices(self):
-    return self._compute_devices
+    return self._device_map.all_devices
+
+  @property
+  def worker_devices_by_replica(self):
+    return self._device_map.devices_by_replica
 
   @property
   def parameter_devices(self):
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index 6d6ec88722..805c643e67 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
@@ -532,21 +532,22 @@ class ParameterServerStrategyTestBase(
 
       for expected_value in expected_values:
         next_element = iterator.get_next()
-        computed_value = sess.run(
-            [values.select_device(d, next_element) for d in devices])
+        computed_value = sess.run([values.select_replica(r, next_element)
+                                   for r in range(len(devices))])
         self.assertEqual(expected_value, computed_value)
 
       with self.assertRaises(errors.OutOfRangeError):
         next_element = iterator.get_next()
-        sess.run([values.select_device(d, next_element) for d in devices])
+        sess.run([values.select_replica(r, next_element)
+                  for r in range(len(devices))])
 
       # After re-initializing the iterator, should be able to iterate again.
       sess.run(iterator.initialize())
 
       for expected_value in expected_values:
         next_element = iterator.get_next()
-        computed_value = sess.run(
-            [values.select_device(d, next_element) for d in devices])
+        computed_value = sess.run([values.select_replica(r, next_element)
+                                   for r in range(len(devices))])
         self.assertEqual(expected_value, computed_value)
 
 
diff --git a/tensorflow/contrib/distribute/python/strategy_test_lib.py b/tensorflow/contrib/distribute/python/strategy_test_lib.py
index cc46c84005..6e5280e356 100644
--- a/tensorflow/contrib/distribute/python/strategy_test_lib.py
+++ b/tensorflow/contrib/distribute/python/strategy_test_lib.py
@@ -254,12 +254,13 @@ class DistributionTestBase(test.TestCase):
     for expected_value in expected_values:
       next_element = iterator.get_next()
       computed_value = evaluate(
-          [values.select_device(d, next_element) for d in devices])
+          [values.select_replica(r, next_element) for r in range(len(devices))])
       self.assertEqual(expected_value, computed_value)
 
     with self.assertRaises(errors.OutOfRangeError):
       next_element = iterator.get_next()
-      evaluate([values.select_device(d, next_element) for d in devices])
+      evaluate(
+          [values.select_replica(r, next_element) for r in range(len(devices))])
 
     # After re-initializing the iterator, should be able to iterate again.
     evaluate(iterator.initialize())
@@ -267,7 +268,7 @@ class DistributionTestBase(test.TestCase):
     for expected_value in expected_values:
       next_element = iterator.get_next()
       computed_value = evaluate(
-          [values.select_device(d, next_element) for d in devices])
+          [values.select_replica(r, next_element) for r in range(len(devices))])
       self.assertEqual(expected_value, computed_value)
 
   def _test_global_step_update(self, strategy):
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 4d2e1540eb..bdcad14704 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -67,8 +67,8 @@ def get_tpu_system_metadata(tpu_cluster_resolver):
 
 
 # TODO(jhseu): Deduplicate with MirroredStrategy?
-def _create_tpu_mirrored_variable(devices, real_mirrored_creator, *args,
-                                  **kwargs):  # pylint: disable=g-missing-docstring
+def _create_tpu_mirrored_variable(  # pylint: disable=missing-docstring
+    device_map, logical_device, real_mirrored_creator, *args, **kwargs):
   # Figure out what collections this variable should be added to.
   # We'll add the TPUMirroredVariable to those collections instead.
   collections = kwargs.pop("collections", None)
@@ -98,8 +98,10 @@ def _create_tpu_mirrored_variable(devices, real_mirrored_creator, *args,
   # was never recorded on the tape instead of having to do this manually
   # here.
   with tape.stop_recording():
-    index = real_mirrored_creator(devices, *args, **kwargs)
-    result = values.TPUMirroredVariable(index, index[devices[0]], aggregation)
+    devices = device_map.logical_to_actual_devices(logical_device)
+    value_list = real_mirrored_creator(devices, *args, **kwargs)
+    result = values.TPUMirroredVariable(
+        device_map, value_list, aggregation, logical_device=logical_device)
 
   if not context.executing_eagerly():
     g = ops.get_default_graph()
@@ -111,7 +113,7 @@ def _create_tpu_mirrored_variable(devices, real_mirrored_creator, *args,
     if kwargs.get("trainable", True):
       collections.append(ops.GraphKeys.TRAINABLE_VARIABLES)
       l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES)
-      for v in index.values():
+      for v in value_list:
         l.remove(v)
     g.add_to_collections(collections, result)
   return result
@@ -176,13 +178,24 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
 
     # TODO(jhseu): Switch to DeviceAssignment to support pods and model
     # parallelism.
-    device_map = {d.name: i for i, d in enumerate(self._tpu_metadata.devices)
-                  if "device:TPU:" in d.name}
-    self._device_index = values.PerReplica(device_map)
+    self._device_index = {
+        d.name: i for i, d in enumerate(self._tpu_metadata.devices)
+        if "device:TPU:" in d.name
+    }
     self._host_device = self.get_host_cpu_device(0)
-    self._tpu_devices = tuple(sorted(device_map.keys()))
+    self._tpu_devices = tuple(sorted(self._device_index.keys()))
     # Only create variables for the number of replicas we're running.
     self._tpu_devices = self._tpu_devices[:self._num_replicas_in_sync]
+    self._device_map = values.ReplicaDeviceMap(self._tpu_devices)
+
+    # For input:
+    input_device_map = values.ReplicaDeviceMap(tuple(
+        self.get_host_cpu_device(hid) for hid in range(self.num_hosts)))
+    worker_devices = [
+        (self.get_host(hid), [self.get_host_cpu_device(hid)])
+        for hid in range(self.num_hosts)
+    ]
+    self._input_workers = values.InputWorkers(input_device_map, worker_devices)
 
     # TODO(sourabhbajaj): Remove this once performance of running one step
     # at a time is comparable to multiple steps.
@@ -279,20 +292,13 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
   def _make_dataset_iterator(self, dataset):
     """Make iterators for each of the TPU hosts."""
 
-    worker_devices = [
-        (self.get_host(hid), [self.get_host_cpu_device(hid)])
-        for hid in range(self.num_hosts)
-    ]
-    return values.DatasetIterator(dataset, worker_devices,
+    return values.DatasetIterator(dataset, self._input_workers,
                                   self._num_replicas_in_sync)
 
   def _distribute_dataset(self, dataset_fn):
-    worker_devices = [
-        (self.get_host(hid), [self.get_host_cpu_device(hid)])
-        for hid in range(self.num_hosts)
-    ]
     return values.MultiWorkerDataset(
-        functools.partial(self._call_dataset_fn, dataset_fn), worker_devices)
+        functools.partial(self._call_dataset_fn, dataset_fn),
+        self._input_workers)
 
   # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed.
   # TODO(sourabhbajaj): Remove the initial_loop_values parameter when we have
@@ -435,22 +441,23 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
     else:
       return []
 
-  def _get_devices_from(self, colocate_with=None):
-    # TODO(jhseu): Change this when we support model parallelism.
-    return self._tpu_devices
-
   def _create_variable(self, next_creator, *args, **kwargs):
     """Create a TPUMirroredVariable. See `DistributionStrategy.scope`."""
     colocate_with = kwargs.pop("colocate_with", None)
-    devices = self._get_devices_from(colocate_with)
+    if colocate_with is None:
+      device_map = self._device_map
+      logical_device = 0  # TODO(josh11b): Get logical device from scope here.
+    else:
+      device_map = colocate_with.device_map
+      logical_device = colocate_with.logical_device
 
     def _real_mirrored_creator(devices, *args, **kwargs):  # pylint: disable=g-missing-docstring
-      index = {}
+      value_list = []
       for i, d in enumerate(devices):
         with ops.device(d):
           if i > 0:
             # Give replicas meaningful distinct names:
-            var0name = index[devices[0]].name.split(":")[0]
+            var0name = value_list[0].name.split(":")[0]
             # We append a / to variable names created on replicas with id > 0 to
             # ensure that we ignore the name scope and instead use the given
             # name as the absolute name of the variable.
@@ -458,20 +465,20 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
             # Initialize replicas with the same value:
             if context.executing_eagerly():
               kwargs["initial_value"] = array_ops.identity(
-                  index[devices[0]].value())
+                  value_list[0].value())
             else:
               def initial_value_fn(device=d):
                 with ops.device(device):
-                  return array_ops.identity(index[devices[0]].initial_value)
+                  return array_ops.identity(value_list[0].initial_value)
               kwargs["initial_value"] = initial_value_fn
           with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
             v = next_creator(*args, **kwargs)
           assert not isinstance(v, values.TPUMirroredVariable)
-          index[d] = v
-      return index
+          value_list.append(v)
+      return value_list
 
-    return _create_tpu_mirrored_variable(devices, _real_mirrored_creator, *args,
-                                         **kwargs)
+    return _create_tpu_mirrored_variable(
+        device_map, logical_device, _real_mirrored_creator, *args, **kwargs)
 
   def _reduce_to(self, reduce_op, value, destinations):
     if values._enclosing_tpu_context() is not None:  # pylint: disable=protected-access
@@ -489,7 +496,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
       # replicas in which case `value` would be a single value or value could
       # be 0.
       return cross_device_ops_lib.reduce_non_distributed_value(
-          self, reduce_op, value, destinations)
+          reduce_op, self._device_map, value, destinations)
 
     # Validate that the destination is same as the host device
     # Note we don't do this when in replicate context as the reduction is
@@ -512,19 +519,19 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
       if group:
         return fn(var, *args, **kwargs)
       else:
-        return [fn(var, *args, **kwargs)]
+        return (fn(var, *args, **kwargs),)
 
     # Otherwise, we revert to MirroredStrategy behavior and update each variable
     # directly.
-    updates = {}
-    for d, v in var._index.items():  # pylint: disable=protected-access
-      name = "update_%d" % self._device_index.get(d)
+    updates = []
+    for i, (d, v) in enumerate(zip(var.devices, var.values)):
+      name = "update_%d" % i
       with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name):
         # If args and kwargs are not mirrored, the value is returned as is.
-        updates[d] = fn(v,
-                        *values.select_device_mirrored(d, args),
-                        **values.select_device_mirrored(d, kwargs))
-    return values.update_regroup(self, updates, group)
+        updates.append(fn(v,
+                          *values.select_device_mirrored(d, args),
+                          **values.select_device_mirrored(d, kwargs)))
+    return values.update_regroup(self, self._device_map, updates, group)
 
   def read_var(self, var):
     assert isinstance(var, values.TPUMirroredVariable)
@@ -543,7 +550,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
       # pylint: disable=protected-access
       if values._enclosing_tpu_context() is not None:
         return (val,)
-      return tuple(val._get(device=d) for d in sorted(val._index.keys()))
+      return val.values
     return (val,)
 
   def value_container(self, value):
diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index a91fe7c945..f6cb3d6313 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -52,7 +52,8 @@ class DistributedValuesTest(test.TestCase):
     with ops.device("/device:CPU:0"):
       one = constant_op.constant(1)
       two = constant_op.constant(2)
-      v = values.DistributedValues({"/device:CPU:0": one, "/device:GPU:0": two})
+      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+      v = values.DistributedValues(device_map, (one, two))
       self.assertEqual(two, v.get("/device:GPU:0"))
       self.assertEqual(one, v.get())
       with self.assertRaises(ValueError):
@@ -64,24 +65,26 @@ class DistributedValuesTest(test.TestCase):
         ops.device("/device:CPU:0"):
       one = constant_op.constant(1)
       two = constant_op.constant(2)
-      v = values.DistributedValues({"/device:CPU:0": one, "/device:GPU:0": two})
+      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+      v = values.DistributedValues(device_map, (one, two))
       self.assertEqual(two, v.get("/device:GPU:0"))
       self.assertEqual(one, v.get())
       with self.assertRaises(ValueError):
         self.assertIsNone(v.get("/device:GPU:2"))
 
   def testCanonicalization(self):
-    canonical_cpu = ["/job:localhost/replica:0/task:0/device:CPU:0"]
-    v = values.DistributedValues({"": 42})
-    self.assertEqual(canonical_cpu, list(v._index.keys()))
-    v = values.DistributedValues({"/device:CPU:0": 42})
-    self.assertEqual(canonical_cpu, list(v._index.keys()))
-    v = values.DistributedValues({"/cpu:0": 42})
-    self.assertEqual(canonical_cpu, list(v._index.keys()))
-    v = values.DistributedValues({"/CPU:0": 42})
-    self.assertEqual(canonical_cpu, list(v._index.keys()))
+    canonical_cpu = ("/job:localhost/replica:0/task:0/device:CPU:0",)
+    v = values.DistributedValues(values.SingleDeviceMap(""), (42,))
+    self.assertEqual(canonical_cpu, v.devices)
+    v = values.DistributedValues(values.SingleDeviceMap("/device:CPU:0"), (42,))
+    self.assertEqual(canonical_cpu, v.devices)
+    v = values.DistributedValues(values.SingleDeviceMap("/cpu:0"), (42,))
+    self.assertEqual(canonical_cpu, v.devices)
+    v = values.DistributedValues(values.SingleDeviceMap("/CPU:0"), (42,))
+    self.assertEqual(canonical_cpu, v.devices)
     with self.assertRaises(AssertionError):
-      v = values.DistributedValues({"/device:cpu:0": 42})
+      v = values.DistributedValues(
+          values.SingleDeviceMap("/device:cpu:0"), (42,))
 
   def testIsTensorLike(self):
     with context.graph_mode(), \
@@ -89,7 +92,8 @@ class DistributedValuesTest(test.TestCase):
          ops.device("/device:CPU:0"):
       one = constant_op.constant(1)
       two = constant_op.constant(2)
-      v = values.DistributedValues({"/device:CPU:0": one, "/device:GPU:0": two})
+      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+      v = values.DistributedValues(device_map, (one, two))
       self.assertEqual(two, v.get("/device:GPU:0"))
       self.assertEqual(one, v.get())
       self.assertTrue(v.is_tensor_like)
@@ -101,7 +105,8 @@ class DistributedValuesTest(test.TestCase):
          ops.device("/device:CPU:0"):
       one = constant_op.constant(1)
       two = 2.0
-      v = values.DistributedValues({"/device:CPU:0": one, "/device:GPU:0": two})
+      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+      v = values.DistributedValues(device_map, (one, two))
       self.assertEqual(two, v.get("/device:GPU:0"))
       self.assertEqual(one, v.get())
       self.assertFalse(v.is_tensor_like)
@@ -119,8 +124,8 @@ class DistributedDelegateTest(test.TestCase):
         def __init__(self, x):
           self.x = x
 
-      v = values.DistributedDelegate(
-          {"/device:CPU:0": Foo(7), "/device:GPU:0": Foo(8)})
+      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+      v = values.DistributedDelegate(device_map, (Foo(7), Foo(8)))
       self.assertEqual(7, v.x)
       with self.assertRaises(AttributeError):
         _ = v.y
@@ -128,7 +133,8 @@ class DistributedDelegateTest(test.TestCase):
   @test_util.run_in_graph_and_eager_modes
   def testOperatorOverride(self):
     with ops.device("/device:CPU:0"):
-      v = values.DistributedDelegate({"/device:CPU:0": 7, "/device:GPU:0": 8})
+      device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
+      v = values.DistributedDelegate(device_map, (7, 8))
       # v should act like int(7).
       self.assertEqual(8, v + 1)
       self.assertEqual(10, 3 + v)
@@ -179,16 +185,15 @@ def _nested_value(d):
 
 def _make_mirrored():
   v = []
-  index = {}
   devices = ["/device:GPU:0", "/device:CPU:0"]
   for d, n, init in zip(devices, ["v", "v/replica"], [1., 2.]):
     with ops.device(d):
       v.append(variable_scope.get_variable(
           name=n, initializer=init, use_resource=True))
-      index[d] = v[-1]
-  mirrored = values.MirroredVariable(index, v[0],
+  device_map = values.ReplicaDeviceMap(devices)
+  mirrored = values.MirroredVariable(device_map, v,
                                      variable_scope.VariableAggregation.SUM)
-  return v, devices, mirrored
+  return v, device_map, mirrored
 
 
 class RegroupAndSelectDeviceTest(test.TestCase):
@@ -205,8 +210,9 @@ class RegroupAndSelectDeviceTest(test.TestCase):
       self.assertEqual(expected[i], result.get(_device_str(i)))
 
   def testNested(self):
-    result = values.regroup({_device_str(0): _nested_value("1"),
-                             _device_str(1): _nested_value("2")})
+    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
+    result = values.regroup(device_map,
+                            (_nested_value("1"), _nested_value("2")))
     self.assertIsInstance(result, tuple)
     self.assertEqual(3, len(result))
     self._is_per_replica(result[0], ["a1", "a2"])
@@ -222,11 +228,11 @@ class RegroupAndSelectDeviceTest(test.TestCase):
     self._is_per_replica(result[1][1]["c"], ["d1", "d2"])
     self._is_per_replica(result[1][1]["e"], ["f1", "f2"])
 
-    # Also test that we can undo the merge using select_device()
+    # Also test that we can undo the merge using select_replica()
     self.assertEqual(_nested_value("1"),
-                     values.select_device(_device_str(0), result))
+                     values.select_replica(0, result))
     self.assertEqual(_nested_value("2"),
-                     values.select_device(_device_str(1), result))
+                     values.select_replica(1, result))
     # select_device_mirrored() should fail due to non-mirrored values
     with self.assertRaises(TypeError):
       values.select_device_mirrored(_device_str(0), result)
@@ -236,8 +242,9 @@ class RegroupAndSelectDeviceTest(test.TestCase):
   def testWrapClass(self):
     # Normally a mirrored value would be the same across devices, but
     # for a test it is convenient to be able to tell the values apart.
-    result = values.regroup({_device_str(0): _nested_value("1"),
-                             _device_str(1): _nested_value("2")},
+    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
+    result = values.regroup(device_map,
+                            (_nested_value("1"), _nested_value("2")),
                             values.Mirrored)
     self.assertIsInstance(result, tuple)
     self.assertEqual(3, len(result))
@@ -254,11 +261,11 @@ class RegroupAndSelectDeviceTest(test.TestCase):
     self._is_per_replica(result[1][1]["c"], ["d1", "d2"], values.Mirrored)
     self._is_per_replica(result[1][1]["e"], ["f1", "f2"], values.Mirrored)
 
-    # Also test that we can undo the merge using select_device()
+    # Also test that we can undo the merge using select_replica()
     self.assertEqual(_nested_value("1"),
-                     values.select_device(_device_str(0), result))
+                     values.select_replica(0, result))
     self.assertEqual(_nested_value("2"),
-                     values.select_device(_device_str(1), result))
+                     values.select_replica(1, result))
     # Values are marked as mirrored, so select_device_mirrored() is allowed.
     self.assertEqual(_nested_value("1"),
                      values.select_device_mirrored(_device_str(0), result))
@@ -268,63 +275,66 @@ class RegroupAndSelectDeviceTest(test.TestCase):
   def testMirroredContainer(self):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
-    v, devices, mirrored = _make_mirrored()
-    result = values.regroup(dict(zip(devices, v)))
+    v, device_map, mirrored = _make_mirrored()
+    result = values.regroup(device_map, v)
     self.assertIs(mirrored, result)
 
   def testSameId(self):
     foo = object()
-    result = values.regroup({_device_str(0): ("a", foo),
-                             _device_str(1): ("b", foo)})
+    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
+    result = values.regroup(device_map, (("a", foo), ("b", foo)))
     self.assertIsInstance(result, tuple)
     self.assertEqual(2, len(result))
     self._is_per_replica(result[0], ["a", "b"])
     self.assertIs(foo, result[1])
 
-    # Test select_device(), should undo the merge done by regroup().
-    result_0 = values.select_device(_device_str(0), result)
+    # Test select_replica(), should undo the merge done by regroup().
+    result_0 = values.select_replica(0, result)
     self.assertIsInstance(result_0, tuple)
     self.assertEqual(2, len(result_0))
     self.assertEqual("a", result_0[0])
     self.assertIs(foo, result_0[1])
-    result_1 = values.select_device(_device_str(1), result)
+    result_1 = values.select_replica(1, result)
     self.assertIsInstance(result_1, tuple)
     self.assertEqual(2, len(result_1))
     self.assertEqual("b", result_1[0])
     self.assertIs(foo, result_1[1])
 
   def testOneDevice(self):
-    result = values.regroup({_device_str(0): _nested_value("1")})
-    # On one device regroup() and select_device() are basically identity.
+    device_map = values.ReplicaDeviceMap((_device_str(0),))
+    result = values.regroup(device_map, (_nested_value("1"),))
+    # On one device regroup() and select_replica() are basically identity.
     self.assertEqual(_nested_value("1"), result)
     self.assertEqual(_nested_value("1"),
-                     values.select_device(_device_str(0), result))
+                     values.select_replica(0, result))
 
     # The one exception has to do with MirroredVariables.
     d = "/device:CPU:0"
     with ops.device(d):
       v = variable_scope.get_variable(
           name="v", initializer=1., use_resource=True)
-      index = {d: v}
-    mirrored = values.MirroredVariable(index, v,
+      device_map = values.ReplicaDeviceMap((d,))
+    mirrored = values.MirroredVariable(device_map, (v,),
                                        variable_scope.VariableAggregation.SUM)
-    result = values.regroup(index)
+    result = values.regroup(device_map, (v,))
     self.assertIs(mirrored, result)
 
   def testNamedTupleEstimatorSpec(self):
     with context.graph_mode(), ops.Graph().as_default():
-      created_estimator_specs = {}
-      to_regroup = {}
+      devices = []
+      created_estimator_specs = []
 
       for device_id in range(3):
         spec = model_fn_lib.EstimatorSpec(
             mode=model_fn_lib.ModeKeys.TRAIN,
             loss=constant_op.constant(device_id / 2),
             train_op=array_ops.identity(constant_op.constant(device_id)))
-        created_estimator_specs[device_id] = spec
-        to_regroup[_device_str(device_id)] = spec
+        devices.append(_device_str(device_id))
+        created_estimator_specs.append(spec)
 
-      merged_estimator_spec = values.regroup(to_regroup)
+      device_map = values.ReplicaDeviceMap(devices)
+      merged_estimator_spec = values.regroup(
+          device_map, created_estimator_specs)
 
       self.assertTrue(
           isinstance(merged_estimator_spec, model_fn_lib.EstimatorSpec))
@@ -338,10 +348,10 @@ class RegroupAndSelectDeviceTest(test.TestCase):
         # Scaffold is populated by `EstimatorSpec.__new__`.
         self.assertEqual(created_estimator_specs[device_id].scaffold,
                          merged_estimator_spec.scaffold.get(d))
-        # Also test that we can undo the merge using select_device()
+        # Also test that we can undo the merge using select_replica()
         self.assertEqual(created_estimator_specs[device_id],
-                         values.select_device(_device_str(device_id),
-                                              merged_estimator_spec))
+                         values.select_replica(device_id,
+                                               merged_estimator_spec))
 
 
 class PerReplicaDatasetTest(test.TestCase):
@@ -350,7 +360,9 @@ class PerReplicaDatasetTest(test.TestCase):
   config.allow_soft_placement = True
 
   def _test_iterator(self, devices, dataset, expected_values):
-    per_replica_dataset = values.PerReplicaDataset(dataset, devices)
+    device_map = values.ReplicaDeviceMap(devices)
+    input_workers = values.InputWorkers(device_map)
+    per_replica_dataset = values.PerReplicaDataset(dataset, input_workers, 0)
     if context.executing_eagerly():
       iterator = per_replica_dataset.make_one_shot_iterator()
     else:
@@ -358,15 +370,13 @@ class PerReplicaDatasetTest(test.TestCase):
       self.evaluate([iterator.initializer])
 
     for expected_value in expected_values:
-      next_element = iterator.get_next()
-      computed_value = self.evaluate(
-          [values.select_device(d, next_element) for d in devices])
+      next_element = iterator.get_next_as_list()
+      computed_value = self.evaluate(next_element)
       self.assertEqual(expected_value, computed_value)
 
     with self.assertRaises(errors.OutOfRangeError):
-      next_element = iterator.get_next()
-      self.evaluate([
-          values.select_device(d, next_element) for d in devices])
+      next_element = iterator.get_next_as_list()
+      self.evaluate(next_element)
 
   @test_util.run_in_graph_and_eager_modes
   def testOneDevice(self):
@@ -422,11 +432,13 @@ class PerReplicaDatasetTest(test.TestCase):
       dataset = dataset_ops.Dataset.from_tensor_slices(
           random_ops.random_uniform((10,)))
 
-      per_replica_dataset = values.PerReplicaDataset(dataset, devices)
+      device_map = values.ReplicaDeviceMap(devices)
+      input_workers = values.InputWorkers(device_map)
+      per_replica_dataset = values.PerReplicaDataset(dataset, input_workers, 0)
       iterator = per_replica_dataset.make_initializable_iterator()
 
       self.evaluate(iterator.initializer)
-      next_element = iterator.get_next()
+      next_element = iterator.get_next_as_list()
       for _ in range(10):
         self.evaluate(next_element)
 
@@ -444,35 +456,39 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
 
   def _test_iterator(self, sess, iterator, devices, expected_values):
     next_element = iterator.get_next()
-    for device in devices:
-      v = values.select_device(device, next_element)
+    for r, device in enumerate(devices):
+      v = values.select_replica(r, next_element)
       # The `v` here can be a tuple.
       for element in nest.flatten(v):
         self.assertTrue(element.device in device)
 
     for expected_value in expected_values:
-      actual = sess.run(
-          [values.select_device(d, next_element) for d in devices])
+      t = [values.select_replica(r, next_element) for r in range(len(devices))]
+      actual = sess.run(t)
       self.assertEqual(expected_value, actual)
 
     with self.assertRaises(errors.OutOfRangeError):
-      sess.run([values.select_device(d, next_element) for d in devices])
+      sess.run([values.select_replica(r, next_element)
+                for r in range(len(devices))])
 
   def _test_dataset(self, dataset_fn, worker_devices, devices,
                     expected_values, auto_shard=True):
+    device_map = values.ReplicaDeviceMap(devices)
+    input_workers = values.InputWorkers(device_map, worker_devices)
     multi_worker_dataset = values.MultiWorkerDataset(
-        dataset_fn, worker_devices, auto_shard=auto_shard)
+        dataset_fn, input_workers, auto_shard=auto_shard)
     multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()
     with self.cached_session() as sess:
       sess.run(multi_worker_iterator.initializer)
       self._test_iterator(sess, multi_worker_iterator, devices, expected_values)
 
   def _cpu_devices(self):
-    worker_devices = [
+    worker_devices = (
         ("/job:worker/replica:0/task:0",
          ["/job:worker/replica:0/task:0/device:CPU:0"]),
         ("/job:worker/replica:0/task:1",
-         ["/job:worker/replica:0/task:1/device:CPU:0"])]
+         ["/job:worker/replica:0/task:1/device:CPU:0"])
+    )
     devices = [
         "/job:worker/replica:0/task:0/device:CPU:0",
         "/job:worker/replica:0/task:1/device:CPU:0"
@@ -480,16 +496,16 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
     return worker_devices, devices
 
   def _cpu_and_one_gpu_devices(self):
-    worker_devices = [
-        ("/job:worker/replica:0/task:0", [
+    worker_devices = (
+        ("/job:worker/replica:0/task:0", (
             "/job:worker/replica:0/task:0/device:GPU:0",
             "/job:worker/replica:0/task:0/device:CPU:0"
-        ]),
-        ("/job:worker/replica:0/task:1", [
+        )),
+        ("/job:worker/replica:0/task:1", (
             "/job:worker/replica:0/task:1/device:GPU:0",
             "/job:worker/replica:0/task:1/device:CPU:0"
-        ])
-    ]
+        ))
+    )
     devices = [
         "/job:worker/replica:0/task:0/device:GPU:0",
         "/job:worker/replica:0/task:0/device:CPU:0",
@@ -542,8 +558,10 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
     worker_devices, devices = self._cpu_devices()
     with context.graph_mode(), self.cached_session() as sess:
       dataset_fn = lambda: dataset_ops.Dataset.range(8)
+      device_map = values.ReplicaDeviceMap(devices)
+      input_workers = values.InputWorkers(device_map, worker_devices)
       multi_worker_dataset = values.MultiWorkerDataset(
-          dataset_fn, worker_devices, auto_shard=True)
+          dataset_fn, input_workers, auto_shard=True)
       multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()
 
       sess.run(multi_worker_iterator.initializer)
@@ -557,19 +575,19 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
 
   def testValueErrorForIterator(self):
     # Incompatiable arguments.
+    d1 = "/device:GPU:0"
+    d2 = "/device:GPU:1"
+    device_map = values.ReplicaDeviceMap([d1, d2])
+    input_workers = values.InputWorkers(
+        device_map, (("w1", (d1,)), ("w2", (d2,))))
     with self.assertRaises(ValueError):
-      values.MultiWorkerDataIterator({"w1": None}, {"w1": "d1", "w2": "d2"})
+      values.MultiWorkerDataIterator([("w1", None)], input_workers)
 
-    # Test duplicated devices under same worker.
-    worker_devices, _ = self._cpu_devices()
-    worker_devices[0][1].append("/job:worker/replica:0/task:0/device:CPU:0")
-    with context.graph_mode():
-      dataset_fn = lambda: dataset_ops.Dataset.range(8)
-      multi_worker_dataset = values.MultiWorkerDataset(
-          dataset_fn, worker_devices, auto_shard=True)
-      multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()
-      with self.assertRaises(ValueError):
-        multi_worker_iterator.get_next()
+  def testDuplicateDevices(self):
+    _, devices = self._cpu_devices()
+    devices.append("/job:worker/replica:0/task:0/device:CPU:0")
+    with self.assertRaises(ValueError):
+      _ = values.ReplicaDeviceMap(devices)
 
 
 class InputIteratorTestBase(test.TestCase):
@@ -577,16 +595,18 @@ class InputIteratorTestBase(test.TestCase):
   def _test_iterator(self, input_type, dataset_fn, worker_device_pairs,
                      expected_values, sess=None, split_batch_by=None):
     devices = nest.flatten([ds for _, ds in worker_device_pairs])
+    device_map = values.ReplicaDeviceMap(devices)
+    input_workers = values.InputWorkers(device_map, worker_device_pairs)
 
     if input_type == "input_fn":
       input_contexts = [
           distribute_lib.InputContext() for _ in worker_device_pairs]
       input_fn = lambda _: dataset_fn()
-      iterator = values.InputFunctionIterator(input_fn, worker_device_pairs,
-                                              input_contexts)
+      iterator = values.InputFunctionIterator(
+          input_fn, input_workers, input_contexts)
     else:
-      iterator = values.DatasetIterator(dataset_fn(), worker_device_pairs,
-                                        split_batch_by)
+      iterator = values.DatasetIterator(
+          dataset_fn(), input_workers, split_batch_by)
 
     evaluate = lambda x: sess.run(x) if sess else self.evaluate(x)
 
@@ -595,12 +615,13 @@ class InputIteratorTestBase(test.TestCase):
     for expected_value in expected_values:
       next_element = iterator.get_next()
       computed_value = evaluate(
-          [values.select_device(d, next_element) for d in devices])
+          [values.select_replica(r, next_element) for r in range(len(devices))])
       self.assertAllEqual(expected_value, computed_value)
 
     with self.assertRaises(errors.OutOfRangeError):
       next_element = iterator.get_next()
-      evaluate([values.select_device(d, next_element) for d in devices])
+      evaluate([values.select_replica(r, next_element)
+                for r in range(len(devices))])
 
     # After re-initializing the iterator, should be able to iterate again.
     evaluate(control_flow_ops.group(iterator.initialize()))
@@ -608,7 +629,7 @@ class InputIteratorTestBase(test.TestCase):
     for expected_value in expected_values:
       next_element = iterator.get_next()
       computed_value = evaluate(
-          [values.select_device(d, next_element) for d in devices])
+          [values.select_replica(r, next_element) for r in range(len(devices))])
       self.assertAllEqual(expected_value, computed_value)
 
 
@@ -797,8 +818,8 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
   def testVariableOnAnotherDevice(self):
     v = variable_scope.get_variable(
         name="v", initializer=[1.], use_resource=True)
-    index = {"/job:foo/device:CPU:0": v}
-    mirrored = values.MirroredVariable(index, v,
+    device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
+    mirrored = values.MirroredVariable(device_map, (v,),
                                        variable_scope.VariableAggregation.MEAN)
 
     self.assertEqual(v.name, mirrored.name)
@@ -826,7 +847,8 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
       self.skipTest("A GPU is not available for this test in eager mode.")
 
     with self.cached_session(config=self.config) as sess:
-      v, devices, mirrored = _make_mirrored()
+      v, device_map, mirrored = _make_mirrored()
+      devices = device_map.all_devices
 
       # Overwrite the initial values.
       self._assign_mirrored(devices, v, [3., 4.])
@@ -844,7 +866,8 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
   def _save_mirrored(self):
     """Save variables with mirroring, returns save_path."""
     with self.session(graph=ops.Graph()) as sess:
-      v, devices, mirrored = _make_mirrored()
+      v, device_map, mirrored = _make_mirrored()
+      devices = device_map.all_devices
 
       # Overwrite the initial values.
       self._assign_mirrored(devices, v, [3., 4.])
@@ -889,7 +912,8 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
   def _restore_mirrored(self, save_path):
     """Restore to variables with mirroring in a fresh graph."""
     with self.session(graph=ops.Graph()) as sess:
-      v, devices, mirrored = _make_mirrored()
+      v, device_map, mirrored = _make_mirrored()
+      devices = device_map.all_devices
 
       # Overwrite the initial values.
       self._assign_mirrored(devices, v, [7., 8.])
@@ -933,25 +957,24 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
       with ops.device("/device:GPU:0"):
         v = variable_scope.get_variable(
             name="v", initializer=1., use_resource=True)
-      mirrored = values.MirroredVariable({
-          "/device:GPU:0": v
-      }, v, variable_scope.VariableAggregation.MEAN)
+      mirrored = values.MirroredVariable(
+          values.ReplicaDeviceMap(("/device:GPU:0",)), (v,),
+          variable_scope.VariableAggregation.MEAN)
       sess.run(variables_lib.global_variables_initializer())
       sess.run({"complicated": mirrored})
 
 
-_devices = ["/device:GPU:0", "/device:CPU:0"]
+_devices = ("/device:GPU:0", "/device:CPU:0")
 
 
 def _make_replica_local(method):
+  device_map = values.ReplicaDeviceMap(_devices)
   v = []
-  index = {}
   for d, n, init in zip(_devices, ["v", "v/replica"], [1., 2.]):
     with ops.device(d):
       v.append(variable_scope.get_variable(
           name=n, initializer=init, use_resource=True))
-      index[d] = v[-1]
-  replica_local = values.ReplicaLocalVariable(index, v[0], method)
+  replica_local = values.ReplicaLocalVariable(device_map, v, method)
   return v, replica_local
 
 
@@ -977,9 +1000,9 @@ class ReplicaLocalVariablePropertiesTest(test.TestCase):
   def testVariableOnAnotherDevice(self):
     v = variable_scope.get_variable(
         name="v", initializer=[1.], use_resource=True)
-    index = {"/job:foo/device:CPU:0": v}
+    device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
     replica_local = values.ReplicaLocalVariable(
-        index, v, variable_scope.VariableAggregation.MEAN)
+        device_map, (v,), variable_scope.VariableAggregation.MEAN)
 
     self.assertEqual(v.name, replica_local.name)
     self.assertEqual(v.dtype, replica_local.dtype)
diff --git a/tensorflow/python/distribute/cross_device_ops.py b/tensorflow/python/distribute/cross_device_ops.py
index 23349a965e..9575301d97 100644
--- a/tensorflow/python/distribute/cross_device_ops.py
+++ b/tensorflow/python/distribute/cross_device_ops.py
@@ -38,7 +38,7 @@ def check_destinations(destinations):
   """Checks whether `destinations` is not empty.
 
   Args:
-    destinations: a DistributedValues, Variable, string or a list of strings.
+    destinations: a `DistributedValues`, variable, or string object.
 
   Returns:
     Boolean which is True if `destinations` is not empty.
@@ -50,20 +50,23 @@ def check_destinations(destinations):
 
 
 def validate_destinations(destinations):
-  if not isinstance(
-      destinations,
-      (value_lib.DistributedValues, resource_variable_ops.ResourceVariable,
-       value_lib.AggregatingVariable, six.string_types, list, tuple,
-       value_lib.TPUMirroredVariable)):
+  if not isinstance(destinations,
+                    (value_lib.DistributedValues,
+                     resource_variable_ops.ResourceVariable,
+                     value_lib.AggregatingVariable,
+                     six.string_types,
+                     value_lib.TPUMirroredVariable,
+                     # LogicalDeviceSpec is only used internally, e.g. as a
+                     # broadcast destination, never supplied by a user.
+                     value_lib.LogicalDeviceSpec)):
     raise ValueError("destinations must be one of a `DistributedValues` object,"
-                     " a tf.Variable object, a device string, a list or tuple "
-                     "of device strings")
+                     " a tf.Variable object, or a device string.")
 
   if not check_destinations(destinations):
     raise ValueError("destinations can not be empty")
 
 
-def reduce_non_distributed_value(extended, reduce_op, value, destinations):
+def reduce_non_distributed_value(reduce_op, device_map, value, destinations):
   """Reduce a non-DistributedValue `value` to `destinations`."""
   if isinstance(value, value_lib.DistributedValues):
     raise ValueError("You are passing a `DistributedValue` to "
@@ -83,21 +86,10 @@ def reduce_non_distributed_value(extended, reduce_op, value, destinations):
   # We do not support a reduce op of SUM if the value is the same across
   # all replicas. We call this as part of assign functions for MirroredVariables
   # and summing up identical values across replicas is not clearly defined.
-  if (len(extended.worker_devices) != 1 or
-      not check_destinations(destinations)):
+  if device_map.num_replicas_in_graph != 1:
     raise ValueError("A non-DistributedValues value %s cannot be reduced with "
                      "the given reduce op %s." % (value, reduce_op))
-  # TODO(anjalisridhar): Moves these methods to a device utility file?
-  devices = get_devices_from(destinations)
-  if len(devices) == 1:
-    with ops.device(devices[0]):
-      return array_ops.identity(value)
-  else:
-    value_updates = {}
-    for d in devices:
-      with ops.device(d):
-        value_updates[d] = array_ops.identity(value)
-    return value_lib.Mirrored(value_updates)
+  return simple_broadcast(value, destinations)
 
 
 def _make_tensor_into_per_replica(input_tensor):
@@ -115,7 +107,8 @@ def _make_tensor_into_per_replica(input_tensor):
     raise ValueError("Cannot convert `input_tensor` to a `PerReplica` object "
                      "because it doesn't have device set.")
 
-  return value_lib.PerReplica({device: input_tensor})
+  device_map = value_lib.SingleDeviceMap(device)
+  return value_lib.PerReplica(device_map, (input_tensor,))
 
 
 def _normalize_value_destination_pairs(value_destination_pairs):
@@ -153,16 +146,24 @@ def _validate_value_destination_pairs(value_destination_pairs):
 # CrossDeviceOps.
 def get_devices_from(destinations):
   if isinstance(destinations, value_lib.DistributedValues):
-    return list(destinations.devices)
-  elif isinstance(destinations, (resource_variable_ops.ResourceVariable,
-                                 value_lib.AggregatingVariable)):
-    return [destinations.device]
+    return destinations.devices
+  elif isinstance(destinations, value_lib.LogicalDeviceSpec):
+    return destinations.device_map.logical_to_actual_devices(
+        destinations.logical_device)
   elif isinstance(destinations, six.string_types):
-    return [device_util.resolve(destinations)]
-  elif isinstance(destinations, (list, tuple)):
-    return [device_util.resolve(destination) for destination in destinations]
+    return (device_util.resolve(destinations),)
+  return (destinations.device,)
+
+
+def get_device_map_from(destinations):
+  if isinstance(destinations, (value_lib.DistributedValues,
+                               value_lib.LogicalDeviceSpec)):
+    return destinations.device_map, destinations.logical_device
+  if isinstance(destinations, six.string_types):
+    device = device_util.resolve(destinations)
   else:
-    return [destinations.device]
+    device = destinations.device
+  return value_lib.SingleDeviceMap(device), 0
 
 
 def _devices_match(left, right):
@@ -178,25 +179,29 @@ def _all_devices_match(value_destination_pairs):
   return True
 
 
-def _simple_broadcast(value, destinations):
-  index = {}
-  devices = get_devices_from(destinations)
-  for d in devices:
-    index[d] = cross_device_utils.copy_tensor_or_indexed_slices_to_device(
-        value, d)
-  return value_lib.Mirrored(index)
+def simple_broadcast(value, destinations, always_mirrored=False):
+  """Broadcast `value` to `destinations` using simple copies."""
+  device_map, logical_device = get_device_map_from(destinations)
+  devices = device_map.logical_to_actual_devices(logical_device)
+  if len(devices) == 1 and not always_mirrored:
+    return cross_device_utils.copy_tensor_or_indexed_slices_to_device(
+        value, devices[0])
+  else:
+    value_updates = []
+    for d in devices:
+      value_updates.append(
+          cross_device_utils.copy_tensor_or_indexed_slices_to_device(
+              value, d))
+    return value_lib.Mirrored(device_map, value_updates, logical_device)
 
 
 def _simple_reduce(per_replica_value, reduce_to_device, accumulation_fn,
                    reduce_op):
   # pylint: disable=g-missing-docstring
-  all_values = []
-  count = 0
-  for v in per_replica_value._index.values():  # pylint: disable=protected-access
-    count += 1
-    all_values.append(v)
+  all_values = per_replica_value.values
   if not all_values:
     raise ValueError("`per_replica_value` must be non-empty")
+  count = len(all_values)
 
   with ops.device(reduce_to_device):
     with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
@@ -232,7 +237,8 @@ class CrossDeviceOps(object):
       a Mirrored object.
 
     Raises:
-      ValueError: if per_replica_value is not a PerReplica object.
+      ValueError: if per_replica_value can't be converted to a PerReplica
+        object.
     """
     if not isinstance(per_replica_value, value_lib.PerReplica):
       per_replica_value = _make_tensor_into_per_replica(per_replica_value)
@@ -292,7 +298,7 @@ class CrossDeviceOps(object):
         "_batch_reduce method must be implemented in descendants.")
 
   def _broadcast(self, tensor, destinations):
-    return _simple_broadcast(tensor, destinations)
+    return simple_broadcast(tensor, destinations, always_mirrored=True)
 
 
 class ReductionToOneDeviceCrossDeviceOps(CrossDeviceOps):
@@ -314,14 +320,12 @@ class ReductionToOneDeviceCrossDeviceOps(CrossDeviceOps):
     super(ReductionToOneDeviceCrossDeviceOps, self).__init__()
 
   def _reduce(self, reduce_op, per_replica_value, destinations):
-    if check_destinations(destinations):
-      devices = get_devices_from(destinations)
-    else:
-      devices = get_devices_from(per_replica_value)
+    assert check_destinations(destinations)
+    devices = get_devices_from(destinations)
     reduce_to_device = self.reduce_to_device or devices[0]
     reduced = _simple_reduce(per_replica_value, reduce_to_device,
                              self.accumulation_fn, reduce_op)
-    return self.broadcast(reduced, devices)
+    return self.broadcast(reduced, destinations)
 
   def _batch_reduce(self, reduce_op, value_destination_pairs):
     return [
@@ -352,7 +356,7 @@ def _group_value_by_device(per_replica_values):
   grouped = [[] for _ in range(len(destinations))]
   for per_replica_value in per_replica_values:
     # pylint: disable=protected-access
-    for i, v in enumerate(per_replica_value._index.values()):
+    for i, v in enumerate(per_replica_value.values):
       assert per_replica_value.devices == destinations
       grouped[i].append((v, None))
   return grouped
@@ -371,7 +375,7 @@ def _ungroup_and_make_mirrored(grouped_reduced,
     grouped_reduced: a list of lists, each sublist has components for each
       device, paired with a None. It is the result from
       cross_device_utils.aggregate_gradients_using*.
-    destinations: a list of device strings for returned Mirrored objects.
+    destinations: a value to colocate the result with.
     reduce_op: Indicates how values will be aggregated. Accepted values
       are `tf.distribute.ReduceOp.SUM`, `tf.distribute.ReduceOp.MEAN`.
     num_between_graph_workers: number of workers in the between-graph
@@ -380,15 +384,16 @@ def _ungroup_and_make_mirrored(grouped_reduced,
   Returns:
     a list of Mirrored objects.
   """
-  index = [{} for _ in range(len(grouped_reduced[0]))]
-  for d, per_replica_reduced in enumerate(grouped_reduced):
+  device_map, logical_device = get_device_map_from(destinations)
+  num_replicas = device_map.num_replicas_in_graph * num_between_graph_workers
+  index = [[] for _ in range(len(grouped_reduced[0]))]
+  for per_replica_reduced in grouped_reduced:
     for i, (v, _) in enumerate(per_replica_reduced):
       if reduce_op == reduce_util.ReduceOp.MEAN:
-        index[i][destinations[d]] = v / (
-            len(destinations) * num_between_graph_workers)
+        index[i].append(v / num_replicas)
       else:
-        index[i][destinations[d]] = v
-  return [value_lib.Mirrored(v) for v in index]
+        index[i].append(v)
+  return [value_lib.Mirrored(device_map, v, logical_device) for v in index]
 
 
 class ConcatAndSplitPacker(object):
@@ -614,7 +619,7 @@ class AllReduceCrossDeviceOps(CrossDeviceOps):
       reduce_to_device = devices[0]
       reduced = _simple_reduce(per_replica_value, reduce_to_device,
                                math_ops.add_n, reduce_op)
-      return self.broadcast(reduced, devices)
+      return self.broadcast(reduced, destinations)
 
   def _batch_reduce(self, reduce_op, value_destination_pairs):
     all_devices_match = _all_devices_match(value_destination_pairs)
@@ -666,8 +671,7 @@ class AllReduceCrossDeviceOps(CrossDeviceOps):
               destinations, device_grad_packs))
 
     reduced = _unpack_tensors(reduced, tensor_packer)
-    return _ungroup_and_make_mirrored(reduced, per_replica_values[0].devices,
-                                      reduce_op)
+    return _ungroup_and_make_mirrored(reduced, per_replica_values[0], reduce_op)
 
 
 # For compatibility with code using the old name of `AllReduceCrossDeviceOps`.
@@ -760,7 +764,6 @@ class MultiWorkerAllReduce(AllReduceCrossDeviceOps):
         (len(per_replica_values), self._all_reduce_spec, self._num_packs,
          self._agg_small_grads_max_bytes, self._agg_small_grads_max_group), 10)
 
-    destinations = sorted(per_replica_values[0].devices)
     device_grads = _group_value_by_device(per_replica_values)
 
     # The all reduce library requires fully defined shapes.
@@ -797,7 +800,7 @@ class MultiWorkerAllReduce(AllReduceCrossDeviceOps):
             aggregated_grads[i] += range_agg_grads[i]
     assert not remaining_grads
 
-    return _ungroup_and_make_mirrored(aggregated_grads, destinations,
+    return _ungroup_and_make_mirrored(aggregated_grads, per_replica_values[0],
                                       reduce_op)
 
 
@@ -841,20 +844,22 @@ class CollectiveAllReduce(CrossDeviceOps):
           "Eager execution is not supported for Collective All-Reduce")
 
     all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0]
-    if _devices_match(per_replica_value, destinations):
+    device_map, logical_device = get_device_map_from(destinations)
+    if (all_reduced.device_map is device_map and
+        all_reduced.logical_device == logical_device):
       return all_reduced
-    else:
-      index = {}
-      for d in get_devices_from(destinations):
-        # pylint: disable=protected-access
-        if d in all_reduced._index:
-          index[d] = all_reduced._index[d]
-        else:
-          with ops.control_dependencies(list(
-              all_reduced._index.values())), ops.device(d):
-            index[d] = array_ops.identity(list(all_reduced._index.values())[0])
+    devices = device_map.logical_to_actual_devices(logical_device)
+    index = []
+    for d in devices:
+      if d in all_reduced.devices:
+        index.append(all_reduced.get(d))
+      else:
+        # TODO(josh11b): Once we add support for model parallelism, get the
+        # copy from the corresponding replica instead of the primary.
+        with ops.control_dependencies(all_reduced.values), ops.device(d):
+          index.append(array_ops.identity(all_reduced.primary))
 
-      return value_lib.Mirrored(index)
+    return value_lib.Mirrored(device_map, index, logical_device)
 
   def _batch_reduce(self, reduce_op, value_destination_pairs):
     if cross_device_utils.contains_indexed_slices(value_destination_pairs):
@@ -919,7 +924,7 @@ class CollectiveAllReduce(CrossDeviceOps):
     new_device_grads = [list(x) for x in zip(*reduced_gv_list)]
     return _ungroup_and_make_mirrored(
         new_device_grads,
-        per_replica_values[0].devices,
+        per_replica_values[0],
         reduce_op,
         num_between_graph_workers=self._num_workers)
 
diff --git a/tensorflow/python/distribute/cross_device_utils.py b/tensorflow/python/distribute/cross_device_utils.py
index 0faadd7e0c..5b4b3a6f97 100644
--- a/tensorflow/python/distribute/cross_device_utils.py
+++ b/tensorflow/python/distribute/cross_device_utils.py
@@ -666,6 +666,6 @@ def contains_indexed_slices(value):
   elif isinstance(value, (list, tuple)) and value:
     return any(contains_indexed_slices(v) for v in value)
   elif isinstance(value, value_lib.DistributedValues):
-    return contains_indexed_slices(list(value._index.values()))  # pylint: disable=protected-access
+    return contains_indexed_slices(value.values)
   else:
     return False
diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index 13ddcaab68..edb80d7fb4 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -1038,9 +1038,9 @@ class DistributionStrategyExtended(object):
     ```
 
     Args:
-      colocate_with_variable: A created in `self.scope()`. Variables created
-        while in the returned context manager will be on the same set of
-        devices as `colocate_with_variable`.
+      colocate_with_variable: A variable created in this strategy's `scope()`.
+        Variables created while in the returned context manager will be on the
+        same set of devices as `colocate_with_variable`.
 
     Returns:
       A context manager.
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index fb3cf84449..b4f9761b98 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -81,12 +81,13 @@ class _RequestedStop(Exception):  # pylint: disable=g-bad-exception-name
 
 # TODO(yuefengz): maybe create a common class for those who need to call this
 # _call_for_each_replica.
-def _call_for_each_replica(distribution, fn, args, kwargs):
+def _call_for_each_replica(distribution, device_map, fn, args, kwargs):
   """Run `fn` in separate threads, once per replica/worker device.
 
   Args:
     distribution: the DistributionStrategy object.
-    fn: function to run (will be run once per device, each in its own thread).
+    device_map: the DeviceMap with the devices to run `fn` on.
+    fn: function to run (will be run once per replica, each in its own thread).
     args: positional arguments for `fn`
     kwargs: keyword arguments for `fn`.
 
@@ -108,15 +109,15 @@ def _call_for_each_replica(distribution, fn, args, kwargs):
 
   shared_variable_store = {}
 
-  # TODO(isaprykin): Create these threads once instead of during every run()
-  # call.
+  # TODO(isaprykin): Create these threads once instead of during every call.
   threads = []
-  for index, d in enumerate(distribution.extended.worker_devices):
+  for index in range(device_map.num_replicas_in_graph):
     variable_creator_fn = shared_variable_creator.make_fn(
         shared_variable_store, index)
-    t = MirroredExtended._MirroredReplicaThread(  # pylint: disable=protected-access
-        distribution, coord, d, variable_creator_fn, fn,
-        *values.select_device(d, args), **values.select_device(d, kwargs))
+    t = _MirroredReplicaThread(
+        distribution, coord, index, device_map, variable_creator_fn, fn,
+        values.select_replica(index, args),
+        values.select_replica(index, kwargs))
     threads.append(t)
 
   for t in threads:
@@ -164,9 +165,10 @@ def _call_for_each_replica(distribution, fn, args, kwargs):
             raise RuntimeError("Some replicas made a different number of "
                                "replica_context().merge_call() calls.")
           # get_replica_context().merge_call() case
-          merge_args = values.regroup({t.device: t.merge_args for t in threads})
+          merge_args = values.regroup(
+              device_map, tuple(t.merge_args for t in threads))
           merge_kwargs = values.regroup(
-              {t.device: t.merge_kwargs for t in threads})
+              device_map, tuple(t.merge_kwargs for t in threads))
           # We capture the name_scope of the MRT when we call merge_fn
           # to ensure that if we have opened a name scope in the MRT,
           # it will be respected when executing the merge function. We only
@@ -181,17 +183,18 @@ def _call_for_each_replica(distribution, fn, args, kwargs):
               ops.control_dependencies(mtt_captured_control_deps):
             merge_result = threads[0].merge_fn(distribution, *merge_args,
                                                **merge_kwargs)
-          for t in threads:
-            t.merge_result = values.select_device(t.device, merge_result)
+          for r, t in enumerate(threads):
+            t.merge_result = values.select_replica(r, merge_result)
   finally:
     for t in threads:
       t.should_run.set()
     coord.join(threads)
 
-  return values.regroup({t.device: t.main_result for t in threads})
+  return values.regroup(device_map, tuple(t.main_result for t in threads))
 
 
-def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs):  # pylint: disable=g-missing-docstring
+def _create_mirrored_variable(device_map, logical_device, real_mirrored_creator,
+                              *args, **kwargs):  # pylint: disable=g-missing-docstring
   # Figure out what collections this variable should be added to.
   # We'll add the MirroredVariable to those collections instead.
   collections = kwargs.pop("collections", None)
@@ -238,13 +241,15 @@ def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs):
   # was never recorded on the tape instead of having to do this manually
   # here.
   with tape.stop_recording():
-    index = real_mirrored_creator(devices, *args, **kwargs)
+    devices = device_map.logical_to_actual_devices(logical_device)
+    value_list = real_mirrored_creator(devices, *args, **kwargs)
 
     if is_replica_local:
-      result = values.ReplicaLocalVariable(
-          index, index[devices[0]], aggregation)
+      result = values.ReplicaLocalVariable(device_map, value_list, aggregation,
+                                           logical_device=logical_device)
     else:
-      result = values.MirroredVariable(index, index[devices[0]], aggregation)
+      result = values.MirroredVariable(device_map, value_list, aggregation,
+                                       logical_device=logical_device)
 
   # Add the wrapped variable to the requested collections.
   # The handling of eager mode and the global step matches
@@ -259,7 +264,7 @@ def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs):
     if kwargs.get("trainable", True):
       collections.append(ops.GraphKeys.TRAINABLE_VARIABLES)
       l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES)
-      for v in index.values():
+      for v in value_list:
         if v in l:
           l.remove(v)
     g.add_to_collections(collections, result)
@@ -347,6 +352,10 @@ def _group_device_list(devices):
   return device_dict
 
 
+def _is_gpu_device(device):
+  return tf_device.DeviceSpec().parse_from_string(device).device_type == "GPU"
+
+
 def _infer_num_gpus_per_worker(devices):
   """Infers the number of GPUs on each worker.
 
@@ -365,26 +374,25 @@ def _infer_num_gpus_per_worker(devices):
     consecutive and starting from 0.
   """
   if _is_device_list_local(devices):
-    return len([d for d in devices if "GPU" in d.upper()])
+    return sum(1 for d in devices if _is_gpu_device(d))
   else:
     device_dict = _group_device_list(devices)
     num_gpus = None
     for _, devices_in_task in device_dict.items():
       for device_in_task in devices_in_task:
         if num_gpus is None:
-          num_gpus = len([d for d in device_in_task if "GPU" in d.upper()])
+          num_gpus = sum(1 for d in device_in_task if _is_gpu_device(d))
 
         # Verify other workers have the same number of GPUs.
-        elif (
-            num_gpus != len([d for d in device_in_task if "GPU" in d.upper()])):
+        elif num_gpus != sum(1 for d in device_in_task if _is_gpu_device(d)):
           raise ValueError("All workers should have the same number of GPUs.")
 
         for d in device_in_task:
           d_spec = tf_device.DeviceSpec().parse_from_string(d)
-          if (d_spec.device_type.upper() == "GPU" and
+          if (d_spec.device_type == "GPU" and
               d_spec.device_index >= num_gpus):
-            raise ValueError("Device_index on a worker should be consecutive "
-                             "and start from 0.")
+            raise ValueError("GPU `device_index` on a worker should be "
+                             "consecutive and start from 0.")
     return num_gpus
 
 
@@ -441,14 +449,12 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     """Initializes the object for local training."""
     self._local_mode = True
     assert devices, "Must specify at least one device."
+    devices = tuple(device_util.resolve(d) for d in devices)
     assert len(set(devices)) == len(devices), (
-        "No duplicates allowed in `devices` argument.")
+        "No duplicates allowed in `devices` argument: %s" % devices)
     # TODO(josh11b): Require at least 2 devices?
-    self._devices = tuple(device_util.resolve(d) for d in devices)
-    self._canonical_device_set = set(self._devices)
-    self._device_index = values.PerReplica(
-        {d: i for i, d in enumerate(devices)})
-
+    self._device_map = values.ReplicaDeviceMap(devices)
+    self._input_workers = values.InputWorkers(self._device_map)
     self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best(
         devices)
 
@@ -457,22 +463,19 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     self._local_mode = False
 
     assert devices, "Must specify at least one device."
+    devices = tuple(device_util.resolve(d) for d in devices)
     assert len(set(devices)) == len(devices), (
-        "No duplicates allowed in `devices` argument.")
+        "No duplicates allowed in `devices` argument: %s" % devices)
     # TODO(josh11b): Require at least 2 devices?
-    self._devices = tuple(device_util.resolve(d) for d in devices)
-    self._canonical_device_set = set(self._devices)
-    self._device_index = values.PerReplica(
-        {d: i for i, d in enumerate(devices)})
 
     device_dict = _group_device_list(devices)
-    self._workers = []
-    self._worker_devices = []
-    for job in ["chief", "worker"]:
+    workers = []
+    worker_devices = []
+    for job in ("chief", "worker"):
       for task in range(len(device_dict.get(job, []))):
         worker = "/job:%s/task:%d" % (job, task)
-        self._workers.append(worker)
-        self._worker_devices.append((worker, device_dict[job][task]))
+        workers.append(worker)
+        worker_devices.append((worker, device_dict[job][task]))
 
     # Setting `_default_device` will add a device scope in the
     # distribution.scope. We set the default device to the first worker. When
@@ -481,23 +484,30 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     #     ...
     # their ops will end up on the cpu device of its first worker, e.g.
     # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
-    self._default_device = self._workers[0]
+    self._default_device = workers[0]
 
+    self._device_map = values.ReplicaDeviceMap(devices)
+    self._input_workers = values.InputWorkers(self._device_map, worker_devices)
     self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
-        self._workers, _infer_num_gpus_per_worker(self._devices))
+        workers, _infer_num_gpus_per_worker(devices))
 
   def _create_variable(self, next_creator, *args, **kwargs):
     """Create a mirrored variable. See `DistributionStrategy.scope`."""
     colocate_with = kwargs.pop("colocate_with", None)
-    devices = self._get_devices_from(colocate_with)
+    if colocate_with is None:
+      device_map = self._device_map
+      logical_device = 0  # TODO(josh11b): Get logical device from scope here.
+    else:
+      device_map = colocate_with.device_map
+      logical_device = colocate_with.logical_device
 
     def _real_mirrored_creator(devices, *args, **kwargs):  # pylint: disable=g-missing-docstring
-      index = {}
+      value_list = []
       for i, d in enumerate(devices):
         with ops.init_scope(), ops.device(d):
           if i > 0:
             # Give replicas meaningful distinct names:
-            var0name = index[devices[0]].name.split(":")[0]
+            var0name = value_list[0].name.split(":")[0]
             # We append a / to variable names created on replicas with id > 0 to
             # ensure that we ignore the name scope and instead use the given
             # name as the absolute name of the variable.
@@ -505,11 +515,11 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
             # Initialize replicas with the same value:
             def initial_value_fn(device=d):
               if context.executing_eagerly():
-                init_value = index[devices[0]].value()
+                init_value = value_list[0].value()
                 return array_ops.identity(init_value)
               else:
                 with ops.device(device):
-                  init_value = index[devices[0]].initial_value
+                  init_value = value_list[0].initial_value
                   return array_ops.identity(init_value)
             kwargs["initial_value"] = initial_value_fn
           with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
@@ -518,52 +528,40 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
             with tape.stop_recording():
               v = next_creator(*args, **kwargs)
           assert not isinstance(v, values.DistributedVariable)
-          index[d] = v
-      return index
+          value_list.append(v)
+      return value_list
 
-    return _create_mirrored_variable(devices, _real_mirrored_creator, *args,
-                                     **kwargs)
+    return _create_mirrored_variable(device_map, logical_device,
+                                     _real_mirrored_creator, *args, **kwargs)
 
   def _distribute_dataset(self, dataset_fn):
     if self._local_mode:
+      worker_index = 0
       return values.PerReplicaDataset(
-          self._call_dataset_fn(dataset_fn), self._devices)
+          self._call_dataset_fn(dataset_fn), self._input_workers, worker_index)
     else:
       return values.MultiWorkerDataset(
           functools.partial(self._call_dataset_fn, dataset_fn),
-          self._worker_devices,
+          self._input_workers,
           auto_shard=False)
 
   def _make_dataset_iterator(self, dataset):
-    if self._local_mode:
-      worker = device_util.canonicalize("/device:CPU:0")
-      worker_device_pairs = [(worker, self._devices)]
-    else:
-      worker_device_pairs = self._worker_devices
-
-    return values.DatasetIterator(dataset, worker_device_pairs,
-                                  self._num_replicas_in_sync)
+    return values.DatasetIterator(
+        dataset, self._input_workers, self._num_replicas_in_sync)
 
   def _make_input_fn_iterator(
       self,
       input_fn,
       replication_mode=distribute_lib.InputReplicationMode.PER_WORKER):
     input_contexts = []
-    if self._local_mode:
-      num_workers = 1
-      worker = device_util.canonicalize("/device:CPU:0")
-      worker_device_pairs = [(worker, self._devices)]
-    else:
-      num_workers = len(self._worker_devices)
-      worker_device_pairs = self._worker_devices
-
+    num_workers = self._input_workers.num_workers
     for i in range(num_workers):
       input_contexts.append(distribute_lib.InputContext(
           num_input_pipelines=num_workers,
           input_pipeline_id=i,
           num_replicas_in_sync=self._num_replicas_in_sync))
     return values.InputFunctionIterator(
-        input_fn, worker_device_pairs, input_contexts)
+        input_fn, self._input_workers, input_contexts)
 
   # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed.
   def _experimental_run_steps_on_iterator(self, fn, iterator, iterations,
@@ -616,8 +614,8 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
       # For outputs that have already been reduced, wrap them in a Mirrored
       # container, else in a PerReplica container.
       if reduce_op is None:
-        last_step_tensor_outputs_dict[name] = values.regroup(
-            {d: t for d, t in zip(self._devices, output)}, values.PerReplica)
+        last_step_tensor_outputs_dict[name] = values.regroup(self._device_map,
+                                                             output)
       else:
         assert len(output) == 1
         last_step_tensor_outputs_dict[name] = output[0]
@@ -634,11 +632,15 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     if isinstance(tensor, (float, int)):
       return tensor
     # TODO(josh11b): In eager mode, use one thread per device, or async mode.
-    return self._get_cross_device_ops().broadcast(
-        tensor, destinations or self._devices)
+    if not destinations:
+      # TODO(josh11b): Use current logical device instead of 0 here.
+      destinations = values.LogicalDeviceSpec(
+          device_map=self._device_map, logical_device=0)
+    return self._get_cross_device_ops().broadcast(tensor, destinations)
 
   def _call_for_each_replica(self, fn, args, kwargs):
-    return _call_for_each_replica(self._container_strategy(), fn, args, kwargs)
+    return _call_for_each_replica(self._container_strategy(), self._device_map,
+                                  fn, args, kwargs)
 
   def _configure(self,
                  session_config=None,
@@ -653,7 +655,8 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     if cluster_spec:
       # TODO(yuefengz): remove the following code once cluster_resolver is
       # added.
-      num_gpus_per_worker = _infer_num_gpus_per_worker(self._devices)
+      num_gpus_per_worker = _infer_num_gpus_per_worker(
+          self._device_map.all_devices)
       multi_worker_devices = _cluster_spec_to_device_list(
           cluster_spec, num_gpus_per_worker)
       self._initialize_multi_worker(multi_worker_devices)
@@ -677,37 +680,37 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
       # replicas in which case `value` would be a single value or value could
       # be 0.
       return cross_device_ops_lib.reduce_non_distributed_value(
-          self, reduce_op, value, destinations)
+          reduce_op, self._device_map, value, destinations)
     return self._get_cross_device_ops().reduce(
         reduce_op, value, destinations=destinations)
 
   def _batch_reduce_to(self, reduce_op, value_destination_pairs):
-    return self._get_cross_device_ops().batch_reduce(reduce_op,
-                                                     value_destination_pairs)
+    return self._get_cross_device_ops().batch_reduce(
+        reduce_op, value_destination_pairs)
 
   def _update(self, var, fn, args, kwargs, group):
     # TODO(josh11b): In eager mode, use one thread per device.
     assert isinstance(var, values.DistributedVariable)
-    updates = {}
-    for d, v in var._index.items():  # pylint: disable=protected-access
-      name = "update_%d" % self._device_index.get(d)
+    updates = []
+    for i, (d, v) in enumerate(zip(var.devices, var.values)):
+      name = "update_%d" % i
       with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name):
         # If args and kwargs are not mirrored, the value is returned as is.
-        updates[d] = fn(v,
-                        *values.select_device_mirrored(d, args),
-                        **values.select_device_mirrored(d, kwargs))
-    return values.update_regroup(self, updates, group)
+        updates.append(fn(v,
+                          *values.select_device_mirrored(d, args),
+                          **values.select_device_mirrored(d, kwargs)))
+    return values.update_regroup(self, self._device_map, updates, group)
 
   def _update_non_slot(self, colocate_with, fn, args, kwargs, group):
     assert isinstance(colocate_with, tuple)
     # TODO(josh11b): In eager mode, use one thread per device.
-    updates = {}
-    for d in colocate_with:
-      name = "update_%d" % self._device_index.get(d)
+    updates = []
+    for i, d in enumerate(colocate_with):
+      name = "update_%d" % i
       with ops.device(d), distribute_lib.UpdateContext(d), ops.name_scope(name):
-        updates[d] = fn(*values.select_device_mirrored(d, args),
-                        **values.select_device_mirrored(d, kwargs))
-    return values.update_regroup(self, updates, group)
+        updates.append(fn(*values.select_device_mirrored(d, args),
+                          **values.select_device_mirrored(d, kwargs)))
+    return values.update_regroup(self, self._device_map, updates, group)
 
   def read_var(self, replica_local_var):
     """Read the aggregate value of a replica-local variable."""
@@ -718,10 +721,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
 
   def _unwrap(self, val):
     if isinstance(val, values.DistributedValues):
-      # Return in a deterministic order.
-      if set(val.devices) == self._canonical_device_set:
-        return tuple(val.get(device=d) for d in self._devices)
-      return tuple(val.get(device=d) for d in sorted(val.devices))
+      return val.values
     return (val,)
 
   def value_container(self, val):
@@ -729,15 +729,19 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
 
   @property
   def _num_replicas_in_sync(self):
-    return len(self._devices)
+    return self._device_map.num_replicas_in_graph
 
   @property
   def worker_devices(self):
-    return self._devices
+    return self._device_map.all_devices
+
+  @property
+  def worker_devices_by_replica(self):
+    return self._device_map.devices_by_replica
 
   @property
   def parameter_devices(self):
-    return self._devices
+    return self._device_map.all_devices
 
   @property
   def experimental_between_graph(self):
@@ -757,102 +761,99 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
 
   def non_slot_devices(self, var_list):
     del var_list
-    return tuple(self._devices)
-
-  def _get_devices_from(self, colocate_with=None):
-    if colocate_with is None:
-      return self._devices
-    else:
-      return cross_device_ops_lib.get_devices_from(colocate_with)
+    # TODO(josh11b): Should this be the last logical device instead?
+    return self._device_map.logical_to_actual_devices(0)
 
   # TODO(priyag): Delete this once all strategies use global batch size.
   @property
   def _global_batch_size(self):
     return True
 
-  class _MirroredReplicaThread(threading.Thread):
-    """A thread that runs() a function on a device."""
-
-    def __init__(self, dist, coord, device, variable_creator_fn, fn, *args,
-                 **kwargs):
-      super(MirroredExtended._MirroredReplicaThread, self).__init__()  # pylint: disable=protected-access
-      self.coord = coord
-      self.distribution = dist
-      self.device = device
-      self.replica_id = dist.extended.worker_devices.index(device)
-      self.variable_creator_fn = variable_creator_fn
-      # State needed to run and return the results of `fn`.
-      self.main_fn = fn
-      self.main_args = args
-      self.main_kwargs = kwargs
-      self.main_result = None
-      self.done = False
-      # State needed to run the next merge_call() (if any) requested via
-      # ReplicaContext.
-      self.merge_fn = None
-      self.merge_args = None
-      self.merge_kwargs = None
-      self.merge_result = None
-      self.captured_name_scope = None
-      # We use a thread.Event for the main thread to signal when this
-      # thread should start running (`should_run`), and another for
-      # this thread to transfer control back to the main thread
-      # (`has_paused`, either when it gets to a
-      # `get_replica_context().merge_call` or when `fn` returns). In
-      # either case the event starts cleared, is signaled by calling
-      # set(). The receiving thread waits for the signal by calling
-      # wait() and then immediately clearing the event using clear().
-      self.should_run = threading.Event()
-      self.has_paused = threading.Event()
-      # These fields have to do with inheriting various contexts from the
-      # parent thread:
-      ctx = context.context()
-      self.in_eager = ctx.executing_eagerly()
-      # pylint: disable=protected-access
-      if not ctx._context_handle:
-        ctx._initialize_handle_and_devices()
-      self.context_device_policy = (
-          pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy(
-              ctx._context_handle))
-      self.graph = ops.get_default_graph()
-      with ops.init_scope():
-        self._init_in_eager = context.executing_eagerly()
-        self._init_graph = ops.get_default_graph()
-
-      self._variable_creator_stack = self.graph._variable_creator_stack[:]
-      self._captured_var_scope = variable_scope.get_variable_scope()
-      # Adding a "/" at end lets us re-enter this scope later.
-      self._name_scope = self.graph.get_name_scope()
-      if self._name_scope:
-        self._name_scope += "/"
-      if self.replica_id > 0:
-        if not self._name_scope:
-          self._name_scope = ""
-        self._name_scope += "replica_%d/" % self.replica_id
-
-    def run(self):
-      # pylint: disable=protected-access
-      self.should_run.wait()
-      self.should_run.clear()
-      try:
-        if self.coord.should_stop():
-          return
-        with self.coord.stop_on_exception(), \
-            _enter_graph(self._init_graph, self._init_in_eager), \
-            _enter_graph(self.graph, self.in_eager,
-                         self._variable_creator_stack), \
-            context.context().device_policy(self.context_device_policy), \
-            MirroredReplicaContext(self.distribution, constant_op.constant(
-                self.replica_id, dtypes.int32)), \
-            ops.device(self.device), \
-            ops.name_scope(self._name_scope), \
-            variable_scope.variable_scope(
-                self._captured_var_scope, reuse=self.replica_id > 0), \
-            variable_scope.variable_creator_scope(self.variable_creator_fn):
-          self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
-          self.done = True
-      finally:
-        self.has_paused.set()
+
+class _MirroredReplicaThread(threading.Thread):
+  """A thread that runs() a function on a device."""
+
+  def __init__(self, dist, coord, replica_id, device_map, variable_creator_fn,
+               fn, args, kwargs):
+    super(_MirroredReplicaThread, self).__init__()
+    self.coord = coord
+    self.distribution = dist
+    self.device_map = device_map
+    self.replica_id = replica_id
+    self.variable_creator_fn = variable_creator_fn
+    # State needed to run and return the results of `fn`.
+    self.main_fn = fn
+    self.main_args = args
+    self.main_kwargs = kwargs
+    self.main_result = None
+    self.done = False
+    # State needed to run the next merge_call() (if any) requested via
+    # ReplicaContext.
+    self.merge_fn = None
+    self.merge_args = None
+    self.merge_kwargs = None
+    self.merge_result = None
+    self.captured_name_scope = None
+    # We use a thread.Event for the main thread to signal when this
+    # thread should start running (`should_run`), and another for
+    # this thread to transfer control back to the main thread
+    # (`has_paused`, either when it gets to a
+    # `get_replica_context().merge_call` or when `fn` returns). In
+    # either case the event starts cleared, is signaled by calling
+    # set(). The receiving thread waits for the signal by calling
+    # wait() and then immediately clearing the event using clear().
+    self.should_run = threading.Event()
+    self.has_paused = threading.Event()
+    # These fields have to do with inheriting various contexts from the
+    # parent thread:
+    ctx = context.context()
+    self.in_eager = ctx.executing_eagerly()
+    # pylint: disable=protected-access
+    if not ctx._context_handle:
+      ctx._initialize_handle_and_devices()
+    self.context_device_policy = (
+        pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy(
+            ctx._context_handle))
+    self.graph = ops.get_default_graph()
+    with ops.init_scope():
+      self._init_in_eager = context.executing_eagerly()
+      self._init_graph = ops.get_default_graph()
+
+    self._variable_creator_stack = self.graph._variable_creator_stack[:]
+    self._captured_var_scope = variable_scope.get_variable_scope()
+    # Adding a "/" at end lets us re-enter this scope later.
+    self._name_scope = self.graph.get_name_scope()
+    if self._name_scope:
+      self._name_scope += "/"
+    if self.replica_id > 0:
+      if not self._name_scope:
+        self._name_scope = ""
+      self._name_scope += "replica_%d/" % self.replica_id
+
+  def run(self):
+    self.should_run.wait()
+    self.should_run.clear()
+    try:
+      if self.coord.should_stop():
+        return
+      # TODO(josh11b): Use current logical device instead of 0 here.
+      with self.coord.stop_on_exception(), \
+          _enter_graph(self._init_graph, self._init_in_eager), \
+          _enter_graph(self.graph, self.in_eager,
+                       self._variable_creator_stack), \
+          context.context().device_policy(self.context_device_policy), \
+          MirroredReplicaContext(self.distribution, constant_op.constant(
+              self.replica_id, dtypes.int32)), \
+          ops.device(self.device_map.logical_to_actual_devices(0)[
+              self.replica_id]), \
+          ops.name_scope(self._name_scope), \
+          variable_scope.variable_scope(
+              self._captured_var_scope, reuse=self.replica_id > 0), \
+          variable_scope.variable_creator_scope(self.variable_creator_fn):
+        self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
+        self.done = True
+    finally:
+      self.has_paused.set()
 
 
 class MirroredReplicaContext(distribute_lib.ReplicaContext):
@@ -888,4 +889,5 @@ class MirroredReplicaContext(distribute_lib.ReplicaContext):
   def devices(self):
     distribute_lib.require_replica_context(self)
     replica_id = tensor_util.constant_value(self._replica_id_in_sync_group)
-    return [self._distribution_strategy.extended.worker_devices[replica_id]]
+    extended = self._distribution_strategy.extended
+    return extended.worker_devices_by_replica[replica_id]
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index 538f214602..a2c834f893 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -12,10 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Various classes representing distributed values.
-
-See go/tf-distribution-strategy.
-"""
+"""Various classes representing distributed values."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -50,54 +47,262 @@ from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.util import nest
 
 
-# pylint: disable=line-too-long
-# TODO(josh11b): Should device values be strings or DeviceSpec objects?
-# Not sure DeviceSpec objects are usable as a dict key.
+def _devices_match(d1, d2):
+  return device_util.canonicalize(d1) == device_util.canonicalize(d2)
+
+
+class DeviceMap(object):
+  """A mapping of replicas & logical device ids to devices."""
+
+  @property
+  def all_devices(self):
+    """Returns a tuple of strings with all devices in this DeviceMap."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  @property
+  def devices_by_replica(self):
+    """Returns a tuple `t` where `t[replica]` is the devices for `replica`."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  @property
+  def num_logical_devices(self):
+    """Count of the number of devices each replica may be defined across."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  @property
+  def num_replicas_in_graph(self):
+    """Number of replicas defined in this graph."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  def logical_device_from_values(self, values):
+    """Returns the logical device index `values` is on."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  def logical_to_actual_devices(self, logical_device_id):
+    """Returns sequence of `num_replicas_in_graph` devices."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  def select_for_current_replica(self, values, replica_context):
+    """Select the element of `values` for the current replica."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  def replica_for_device(self, device):
+    """Return the replica id containing `device`."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  def select_for_device(self, values, device):
+    """Select the element of `values` to access from `device`."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+  def is_device_in_replica(self, device, replica_id):
+    """Returns whether `device` is a member of replica `replica_id`."""
+    raise NotImplementedError("Required for DeviceMap implementations.")
+
+
+class SingleDeviceMap(DeviceMap):
+  """A device map for 1 non-computation device.
+
+  Use `SingleDeviceMap` when the device does not correspond to some replica of
+  the computation. For computation devices, use `ReplicaDeviceMap` below (even
+  if there is only a single device in the map).
+  """
+
+  def __init__(self, device):
+    """Initialize a `SingleDeviceMap`.
+
+    Args:
+      device: A string device.
+    """
+    assert isinstance(device, six.string_types)
+    self._device = device_util.canonicalize(device)
+    self._devices = (self._device,)
+
+  @property
+  def all_devices(self):
+    return self._devices
+
+  @property
+  def devices_by_replica(self):
+    raise ValueError("SingleDeviceMap not indexed by replicas")
+
+  @property
+  def num_logical_devices(self):
+    return 1
+
+  @property
+  def num_replicas_in_graph(self):
+    return 1
+
+  def logical_device_from_values(self, values):
+    del values
+    return 0
+
+  def logical_to_actual_devices(self, logical_device_id):
+    assert logical_device_id == 0
+    return self._devices
+
+  def select_for_current_replica(self, values, replica_context):
+    assert len(values) == 1
+    del replica_context
+    return values[0]
+
+  def replica_for_device(self, device):
+    raise ValueError("SingleDeviceMap not indexed by replicas")
+
+  def select_for_device(self, values, device):
+    assert len(values) == 1
+    if self._device != device:
+      raise ValueError("Device %s not found in %s (current device %s)" %
+                       (device, self._devices, device_util.current()))
+    return values[0]
+
+  def is_device_in_replica(self, device, replica_id):
+    raise ValueError("SingleDeviceMap not indexed by replicas")
+
+  def __repr__(self):
+    return "%s(%r)" % (self.__class__.__name__, self._device)
+
+
+class ReplicaDeviceMap(DeviceMap):
+  """A device map for 1 device per replica."""
+
+  def __init__(self, devices):
+    """Initialize a `ReplicaDeviceMap`.
+
+    Args:
+      devices: `devices[i]` is the string device for replica `i`.
+    """
+    self._devices = tuple(device_util.canonicalize(d) for d in devices)
+    if len(set(self._devices)) != len(self._devices):
+      raise ValueError("Duplicate devices in %s, after canonicalization: %s" %
+                       (devices, self._devices))
+    self._device_to_replica = {d: r for r, d in enumerate(self._devices)}
+
+  @property
+  def all_devices(self):
+    return self._devices
+
+  @property
+  def devices_by_replica(self):
+    return ((d,) for d in self._devices)
+
+  @property
+  def num_logical_devices(self):
+    return 1
+
+  @property
+  def num_replicas_in_graph(self):
+    return len(self._devices)
+
+  def logical_device_from_values(self, values):
+    del values
+    return 0
+
+  def logical_to_actual_devices(self, logical_device_id):
+    assert logical_device_id == 0
+    return self._devices
+
+  def select_for_current_replica(self, values, replica_context):
+    assert len(values) == len(self._devices)
+    replica_id = replica_context.replica_id_in_sync_group
+    if not isinstance(replica_id, int):
+      replica_id = tensor_util.constant_value(replica_id)
+    return values[replica_id]
+
+  def replica_for_device(self, device):
+    return self._device_to_replica.get(device)
+
+  def select_for_device(self, values, device):
+    assert len(values) == len(self._devices)
+    replica_id = self._device_to_replica.get(device)
+    if replica_id is None:
+      raise ValueError("Device %s not found in %s (current device %s)" %
+                       (device, self._devices, device_util.current()))
+    return values[replica_id]
+
+  def is_device_in_replica(self, device, replica_id):
+    return _devices_match(device, self._devices[replica_id])
+
+  def __str__(self):
+    return "[%s]" % (", ".join(self._devices))
+
+  def __repr__(self):
+    return "%s([%s])" % (self.__class__.__name__,
+                         ", ".join(repr(d) for d in self._devices))
+
+
+LogicalDeviceSpec = collections.namedtuple(
+    "LogicalDeviceSpec", ("device_map", "logical_device"))
+
+
 class DistributedValues(object):
   """Holds a map from device to values. Either PerReplica or Mirrored."""
 
-  def __init__(self, index):
-    self._index = {device_util.canonicalize(key): value
-                   for key, value in six.iteritems(index)}
+  def __init__(self, device_map, values, logical_device=None):
+    assert isinstance(device_map, DeviceMap)
+    self._device_map = device_map
+    self._values = tuple(values)
+    if logical_device is None:
+      logical_device = device_map.logical_device_from_values(self._values)
+    self._logical_device = logical_device
 
+  # TODO(josh11b): Split this into two functions, one with device, one without.
   def get(self, device=None):
     """Returns the value for the current device or raises a ValueError."""
     if device is None:
       replica_context = distribution_strategy_context.get_replica_context()
       if replica_context:
-        # TODO(josh11b): support model parallelism better here
-        device = replica_context.devices[0]
+        return self._device_map.select_for_current_replica(
+            self._values, replica_context)
       else:
         device = distribute_lib.get_update_device()
         if device is None:
           return self._get_cross_replica()
     device = device_util.canonicalize(device)
-    try:
-      return self._index[device]
-    except KeyError as e:
-      six.raise_from(
-          ValueError("Device %s not found in %s (current device %s)" %
-                     (device, self._index.keys(), device_util.current())), e)
+    return self._device_map.select_for_device(self._values, device)
+
+  @property
+  def primary(self):
+    """Returns a representative component."""
+    return self._values[0]
 
   @property
   def devices(self):
-    return list(self._index.keys())
+    return self._device_map.logical_to_actual_devices(self._logical_device)
+
+  @property
+  def logical_device(self):
+    return self._logical_device
+
+  @property
+  def device_map(self):
+    return self._device_map
+
+  # TODO(josh11b): Replace unwrap with this?
+  @property
+  def values(self):
+    return self._values
 
   @property
   def is_tensor_like(self):
-    for v in self._index.values():
+    for v in self._values:
       if not tensor_util.is_tensor(v):
         return False
     return True
 
   def __str__(self):
-    return "%s:%s" % (self.__class__.__name__, self._index)
+    devices = self.devices
+    assert len(self._values) == len(devices)
+    debug_str = ",\n".join("  %d %s: %s" % (i, devices[i], self._values[i])
+                           for i in range(len(devices)))
+    return "%s:{\n%s\n}" % (self.__class__.__name__, debug_str)
 
   def __repr__(self):
-    return "%s(%r)" % (self.__class__.__name__, self._index)
-
-  # TODO(josh11b): Possibly make an accessor for _index for use by
-  # DistributionStrategy implementations.
+    devices = self.devices
+    assert len(self._values) == len(devices)
+    debug_repr = ",\n".join("  %d %s: %r" % (i, devices[i], self._values[i])
+                            for i in range(len(devices)))
+    return "%s:{\n%s\n}" % (self.__class__.__name__, debug_repr)
 
 
 # NOTE(josh11b,apassos): It would be great if we could inspect the values this was
@@ -190,9 +395,10 @@ class Mirrored(DistributedDelegate):
 
   def _get_cross_replica(self):
     device = device_util.canonicalize(device_util.current())
-    if device in self._index:
-      return self._index[device]
-    return list(self._index.values())[0]
+    replica_id = self._device_map.replica_for_device(device)
+    if replica_id is None:
+      return self.primary
+    return self._values[replica_id]
 
   def _as_graph_element(self):
     obj = self.get()
@@ -216,13 +422,13 @@ class DistributedVariable(DistributedDelegate):
   # TODO(josh11b): Support changing the set of variables if e.g. if new
   # devices are joining or a device is to leave.
 
-  def __init__(self, index):
-    # Child class must set self._primary_var before calling
-    # super(...).__init__(index).
-    self._common_name = self._primary_var.name.split(":")[0]
+  def __init__(self, device_map, values, logical_device=None):
+    super(DistributedVariable, self).__init__(
+        device_map, values, logical_device=logical_device)
+    self._common_name = self.primary.name.split(":")[0]
     # Use a weakref to make it easy to map from the contained values
     # to the container without introducing a reference cycle.
-    for v in six.itervalues(index):
+    for v in values:
       v._distributed_container = weakref.ref(self)  # pylint: disable=protected-access
     # tf.keras keeps track of variables initialized using this attribute. When
     # tf.keras gets the default session, it initializes all uninitialized vars.
@@ -235,7 +441,6 @@ class DistributedVariable(DistributedDelegate):
     # when restoring from a checkpoint, we may set the _initializer_op
     # property on the entire `DistributedVariable`.
     self._initializer_op = None
-    super(DistributedVariable, self).__init__(index)
 
   def is_initialized(self, name=None):
     """Identifies if all the component variables are initialized.
@@ -247,18 +452,14 @@ class DistributedVariable(DistributedDelegate):
       The op that evaluates to True or False depending on if all the
       component variables are initialized.
     """
-    # We have to cast the self._index.values() to a `list` because when we
-    # use `model_to_estimator` to run tf.keras models, self._index.values() is
-    # of type `dict_values` and not `list`.
-    values_list = list(self._index.values())
-    result = values_list[0].is_initialized()
+    result = self.primary.is_initialized()
     # We iterate through the list of values except the last one to allow us to
     # name the final `logical_and` op the same name that is passed by the user
     # to the `is_initialized` op. For distributed variables, the
     # `is_initialized` op is a `logical_and` op.
-    for v in values_list[1:-1]:
+    for v in self._values[1:-1]:
       result = math_ops.logical_and(result, v.is_initialized())
-    result = math_ops.logical_and(result, values_list[-1].is_initialized(),
+    result = math_ops.logical_and(result, self._values[-1].is_initialized(),
                                   name=name)
     return result
 
@@ -269,13 +470,34 @@ class DistributedVariable(DistributedDelegate):
     else:
       # return grouped ops of all the var initializations of component values of
       # the mirrored variable
-      init_op = control_flow_ops.group(
-          [v.initializer for v in self._index.values()])
+      init_op = control_flow_ops.group(tuple(
+          v.initializer for v in self._values))
     return init_op
 
+  def _get_closest(self):
+    """Return member in the same replica if possible, else the primary."""
+    replica_context = distribution_strategy_context.get_replica_context()
+    if replica_context:
+      return self._device_map.select_for_current_replica(
+          self._values, replica_context)
+    device = distribute_lib.get_update_device()
+    if device is None:
+      device = device_util.canonicalize(device_util.current())
+    replica_id = self._device_map.replica_for_device(device)
+    if replica_id is None:
+      return self.primary
+    return self._values[replica_id]
+
+  def initialized_value(self):
+    return self._get_closest().initialized_value()
+
+  @property
+  def initial_value(self):
+    return self._get_closest().initial_value
+
   @property
   def graph(self):
-    return self._primary_var.graph
+    return self.primary.graph
 
   @property
   def _shared_name(self):
@@ -283,25 +505,25 @@ class DistributedVariable(DistributedDelegate):
 
   @property
   def _unique_id(self):
-    return self._primary_var._unique_id   # pylint: disable=protected-access
+    return self.primary._unique_id   # pylint: disable=protected-access
 
   @property
   def name(self):
-    return self._primary_var.name
+    return self.primary.name
 
   @property
   def dtype(self):
-    return self._primary_var.dtype
+    return self.primary.dtype
 
   @property
   def shape(self):
-    return self._primary_var.shape
+    return self.primary.shape
 
   def get_shape(self):
-    return self._primary_var.get_shape()
+    return self.primary.get_shape()
 
   def to_proto(self, export_scope=None):
-    return self._primary_var.to_proto(export_scope=export_scope)
+    return self.primary.to_proto(export_scope=export_scope)
 
   @property
   def op(self):
@@ -309,14 +531,14 @@ class DistributedVariable(DistributedDelegate):
     # to work (even if the current device isn't in self.devices), but
     # other uses of var.op in a cross-replica context to fail.
     if distribution_strategy_context.get_cross_replica_context():
-      return DistributedVarOp(self._primary_var.op.name,
-                              self._primary_var.op.graph,
-                              self._primary_var.op.type)
+      return DistributedVarOp(self.primary.op.name,
+                              self.primary.op.graph,
+                              self.primary.op.type)
     return self.get().op
 
   @property
   def _in_graph_mode(self):
-    return self._primary_var._in_graph_mode   # pylint: disable=protected-access
+    return self.primary._in_graph_mode   # pylint: disable=protected-access
 
   def read_value(self):
     strategy = distribution_strategy_context.get_distribution_strategy()
@@ -348,19 +570,19 @@ class _MirroredSaveable(saver.BaseSaverBuilder.ResourceVariableSaveable):
   def restore(self, restored_tensors, restored_shapes):
     """Restore the same value into all variables."""
     tensor, = restored_tensors
-    return control_flow_ops.group([
-        _assign_on_device(d, v, tensor)
-        for d, v in six.iteritems(self._mirrored_variable._index)])  # pylint: disable=protected-access
+    return control_flow_ops.group(tuple(
+        _assign_on_device(v.device, v, tensor)
+        for v in self._mirrored_variable.values))
 
 
 class MirroredVariable(DistributedVariable, Mirrored,
                        checkpointable.CheckpointableBase):
   """Holds a map from device to variables whose values are kept in sync."""
 
-  def __init__(self, index, primary_var, aggregation):
-    self._primary_var = primary_var
+  def __init__(self, device_map, values, aggregation, logical_device=None):
+    super(MirroredVariable, self).__init__(
+        device_map, values, logical_device=logical_device)
     self._aggregation = aggregation
-    super(MirroredVariable, self).__init__(index)
 
   # The arguments to update() are automatically unwrapped so the update()
   # function would normally see regular variables, not MirroredVariables.
@@ -379,7 +601,7 @@ class MirroredVariable(DistributedVariable, Mirrored,
         return f(v, *args, **kwargs)
 
       # We are calling assign on the mirrored variable in cross replica context,
-      # use update to update the variable.
+      # use `strategy.update()` to update the variable.
       strategy = distribution_strategy_context.get_distribution_strategy()
       return strategy.update(self, f, *args, **kwargs)
     else:
@@ -419,14 +641,15 @@ class MirroredVariable(DistributedVariable, Mirrored,
 
   def _get_cross_replica(self):
     device = device_util.canonicalize(device_util.current())
-    if device in self._index:
-      return array_ops.identity(self._index[device])
-    return array_ops.identity(self._primary_var)
+    replica_id = self._device_map.replica_for_device(device)
+    if replica_id is None:
+      return array_ops.identity(self.primary)
+    return array_ops.identity(self._values[replica_id])
 
   def _as_graph_element(self):
     # pylint: disable=protected-access
     if distribution_strategy_context.get_cross_replica_context():
-      return self._primary_var._as_graph_element()
+      return self.primary._as_graph_element()
     return self.get()._as_graph_element()
 
   def _gather_saveables_for_checkpoint(self):
@@ -439,7 +662,7 @@ class MirroredVariable(DistributedVariable, Mirrored,
       A dictionary mapping attribute names to `SaveableObject` factories.
     """
     def _saveable_factory(name=self._common_name):
-      return _MirroredSaveable(self, self._primary_var, name)
+      return _MirroredSaveable(self, self.primary, name)
     return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory}
 
 
@@ -475,18 +698,22 @@ def _enclosing_tpu_context():
 class TPUMirroredVariable(checkpointable.CheckpointableBase):
   """Holds a map from device to TPU variables whose values are kept in sync."""
 
-  def __init__(self, index, primary_var, aggregation):
+  def __init__(self, device_map, values, aggregation, logical_device=None):
+    assert isinstance(device_map, DeviceMap)
+    self._device_map = device_map
+    self._values = tuple(values)
+    if logical_device is None:
+      logical_device = device_map.logical_device_from_values(self._values)
+    self._logical_device = logical_device
+
     # Use a weakref to make it easy to map from the contained values
     # to the container without introducing a reference cycle.
-    for v in six.itervalues(index):
+    for v in self._values:
       v._mirrored_container = weakref.ref(self)  # pylint: disable=protected-access
-    self._index = {device_util.canonicalize(key): value
-                   for key, value in six.iteritems(index)}
-    self._primary_var = primary_var
-    self._common_name = self._primary_var.name.split(":")[0]
+    self._common_name = self.primary.name.split(":")[0]
     self._aggregation = aggregation
     # Needed for GradientTape
-    self._trainable = self._primary_var.trainable
+    self._trainable = self.primary.trainable
     # Typically like `DistributedVariable`, a `TPUMirroredVariable`'s
     # initializer is composed of the initializers of the components variables.
     # However, in some cases, such as when restoring from a checkpoint, we may
@@ -498,19 +725,36 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
     if device is None:
       replica_context = distribution_strategy_context.get_replica_context()
       if replica_context:
-        # TODO(josh11b): support model parallelism better here
-        device = replica_context.devices[0]
+        return self._device_map.select_for_current_replica(
+            self._values, replica_context)
       else:
         device = distribute_lib.get_update_device()
         if device is None:
           return self._get_cross_replica()
     device = device_util.canonicalize(device)
-    try:
-      return self._index[device]
-    except KeyError as e:
-      six.raise_from(
-          ValueError("Device %s not found in %s (current device %s)" %
-                     (device, self._index.keys(), device_util.current())), e)
+    return self._device_map.select_for_device(self._values, device)
+
+  @property
+  def primary(self):
+    """Returns a representative component."""
+    return self._values[0]
+
+  @property
+  def devices(self):
+    return self._device_map.logical_to_actual_devices(self._logical_device)
+
+  @property
+  def logical_device(self):
+    return self._logical_device
+
+  @property
+  def device_map(self):
+    return self._device_map
+
+  # TODO(josh11b): Replace unwrap with this?
+  @property
+  def values(self):
+    return self._values
 
   # pylint: disable=multiple-statements
   def __add__(self, o): return self.read_value() + o
@@ -571,10 +815,16 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
       return NotImplemented
 
   def __str__(self):
-    return "%s:%s" % (self.__class__.__name__, self._index)
+    devices = self.devices
+    debug_str = ",\n".join("  %d %s: %s" % (i, devices[i], self._values[i])
+                           for i in range(len(devices)))
+    return "%s:{\n%s\n}" % (self.__class__.__name__, debug_str)
 
   def __repr__(self):
-    return "%s(%r)" % (self.__class__.__name__, self._index)
+    devices = self.devices
+    debug_repr = ",\n".join("  %d %s: %r" % (i, devices[i], self._values[i])
+                            for i in range(len(devices)))
+    return "%s:{\n%s\n}" % (self.__class__.__name__, debug_repr)
 
   @property
   def handle(self):
@@ -582,18 +832,12 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
     tpu_context = _enclosing_tpu_context()
     if tpu_context is not None:
       return tpu_context.get_replicated_var_handle(
-          self._common_name, nest.flatten(self._index))
+          self._common_name, self._values)
 
     device = distribute_lib.get_update_device()
     if device is None:
-      return self._primary_var.handle
-    device = device_util.canonicalize(device)
-    try:
-      return self._index[device].handle
-    except KeyError as e:
-      six.raise_from(
-          ValueError("Device %s not found in %s (current device %s)" %
-                     (device, self._index.keys(), device_util.current())), e)
+      return self.primary.handle
+    return self._get(device=device).handle
 
   @property
   def device(self):
@@ -729,13 +973,13 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
     if self._initializer_op:
       init_op = self._initializer_op
     else:
-      init_op = control_flow_ops.group(
-          [v.initializer for v in self._index.values()])
+      init_op = control_flow_ops.group(tuple(
+          v.initializer for v in self._values))
     return init_op
 
   @property
   def graph(self):
-    return self._primary_var.graph
+    return self.primary.graph
 
   @property
   def _shared_name(self):
@@ -743,36 +987,37 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
 
   @property
   def _unique_id(self):
-    return self._primary_var._unique_id  # pylint: disable=protected-access
+    return self.primary._unique_id  # pylint: disable=protected-access
 
   @property
   def name(self):
-    return self._primary_var.name
+    return self.primary.name
 
   @property
   def dtype(self):
-    return self._primary_var.dtype
+    return self.primary.dtype
 
   @property
   def shape(self):
-    return self._primary_var.shape
+    return self.primary.shape
 
   def get_shape(self):
-    return self._primary_var.get_shape()
+    return self.primary.get_shape()
 
   def to_proto(self, export_scope=None):
-    return self._primary_var.to_proto(export_scope=export_scope)
+    return self.primary.to_proto(export_scope=export_scope)
 
   def _get_cross_replica(self):
     device = device_util.canonicalize(device_util.current())
-    if device in self._index:
-      return self._index[device]
-    return self._primary_var
+    replica = self._device_map.replica_for_device(device)
+    if replica is None:
+      return self.primary
+    return self._values[replica]
 
   def _as_graph_element(self):
     # pylint: disable=protected-access
     if distribution_strategy_context.get_cross_replica_context():
-      return self._primary_var._as_graph_element()
+      return self.primary._as_graph_element()
     return self._read_variable_op()
 
   def _gather_saveables_for_checkpoint(self):
@@ -785,7 +1030,7 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
       A dictionary mapping attribute names to `SaveableObject` factories.
     """
     def _saveable_factory(name=self._common_name):
-      return _MirroredSaveable(self, self._primary_var, name)
+      return _MirroredSaveable(self, self.primary, name)
     return {checkpointable.VARIABLE_VALUE_KEY: _saveable_factory}
 
   def _should_act_as_resource_variable(self):
@@ -795,23 +1040,23 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
   # Needed to pass ResourceVariable checks.
   @property
   def op(self):
-    return self._primary_var.op
+    return self.primary.op
 
   # pylint: disable=protected-access
   @property
   def _save_slice_info(self):
-    return self._primary_var._save_slice_info
+    return self.primary._save_slice_info
 
   def _get_save_slice_info(self):
-    return self._primary_var._get_save_slice_info()
+    return self.primary._get_save_slice_info()
 
   def _set_save_slice_info(self, save_slice_info):
-    return self._primary_var._set_save_slice_info(save_slice_info)
+    return self.primary._set_save_slice_info(save_slice_info)
   # pylint: enable=protected-access
 
   @property
   def _in_graph_mode(self):
-    return self._primary_var._in_graph_mode   # pylint: disable=protected-access
+    return self.primary._in_graph_mode   # pylint: disable=protected-access
 
   def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False):
     """Converts a variable to a tensor."""
@@ -838,18 +1083,14 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
     """
     # TODO(jhseu): Do we need TPU context implementation?
 
-    # We have to cast the self._index.values() to a `list` because when we
-    # use `model_to_estimator` to run tf.keras models, self._index.values() is
-    # of type `dict_values` and not `list`.
-    values_list = nest.flatten(self._index)
-    result = values_list[0].is_initialized()
+    result = self.primary.is_initialized()
     # We iterate through the list of values except the last one to allow us to
     # name the final `logical_and` op the same name that is passed by the user
     # to the `is_initialized` op. For distributed variables, the
     # `is_initialized` op is a `logical_and` op.
-    for v in values_list[1:-1]:
+    for v in self._values[1:-1]:
       result = math_ops.logical_and(result, v.is_initialized())
-    result = math_ops.logical_and(result, values_list[-1].is_initialized(),
+    result = math_ops.logical_and(result, self._values[-1].is_initialized(),
                                   name=name)
     return result
 
@@ -899,10 +1140,10 @@ class ReplicaLocalVariable(DistributedVariable, PerReplica,
                            checkpointable.CheckpointableBase):
   """Holds a map from device to variables whose values are reduced on save."""
 
-  def __init__(self, index, primary_var, aggregation):
-    self._primary_var = primary_var
+  def __init__(self, device_map, values, aggregation, logical_device=None):
     self._aggregation = aggregation
-    super(ReplicaLocalVariable, self).__init__(index)
+    super(ReplicaLocalVariable, self).__init__(
+        device_map, values, logical_device=logical_device)
 
   def assign_sub(self, *args, **kwargs):
     _assert_replica_context()
@@ -920,9 +1161,8 @@ class ReplicaLocalVariable(DistributedVariable, PerReplica,
       tensor = args[0]
       if self._aggregation == vs.VariableAggregation.SUM:
         tensor *= 1. / len(self.devices)
-      return control_flow_ops.group(
-          [_assign_on_device(d, v, tensor)
-           for d, v in six.iteritems(self._index)])
+      return control_flow_ops.group(tuple(
+          _assign_on_device(v.device, v, tensor) for v in self._values))
     else:
       _assert_replica_context()
       return self.get().assign(*args, **kwargs)
@@ -933,12 +1173,11 @@ class ReplicaLocalVariable(DistributedVariable, PerReplica,
 
   def _get_cross_replica(self):
     if self._aggregation == vs.VariableAggregation.ONLY_FIRST_REPLICA:
-      return self._primary_var
-    all_components = tuple(self._index.values())
+      return self.primary
     # TODO(josh11b): Use a strategy-specific method.
-    total = math_ops.add_n(all_components)
+    total = math_ops.add_n(self._values)
     if self._aggregation == vs.VariableAggregation.MEAN:
-      return total * (1./ len(all_components))
+      return total * (1./ len(self._values))
     return total
 
   def _as_graph_element(self):
@@ -972,30 +1211,27 @@ ops.register_tensor_conversion_function(ReplicaLocalVariable,
                                         _tensor_conversion_replica_local)
 
 
-def _devices_match(d1, d2):
-  return device_util.canonicalize(d1) == device_util.canonicalize(d2)
-
-
-def regroup(per_replica, wrap_class=PerReplica):
-  """Makes device->nest map into a nest of PerReplica/Mirrored values."""
-  items = list(per_replica.items())
-  assert items
-  v0 = items[0][1]  # First value
+def regroup(device_map, values, wrap_class=PerReplica):
+  """Makes a nest per-replica into a nest of PerReplica/Mirrored values."""
+  assert isinstance(device_map, DeviceMap)
+  assert len(values) == device_map.num_replicas_in_graph
+  v0 = values[0]
 
   if isinstance(v0, list):
-    for _, v in items[1:]:
+    for v in values[1:]:
       assert isinstance(v, list)
       assert len(v) == len(v0), ("len(v) == %d, len(v0) == %d, v: %s, v0: %s" %
                                  (len(v), len(v0), v, v0))
-    return [regroup({k: v[i] for k, v in items}, wrap_class)
+    return [regroup(device_map, tuple(v[i] for v in values), wrap_class)
             for i in range(len(v0))]
 
   if isinstance(v0, tuple):
-    for _, v in items[1:]:
+    for v in values[1:]:
       assert isinstance(v, tuple)
       assert len(v) == len(v0)
-    regrouped_tuple = tuple(regroup({k: v[i] for k, v in items}, wrap_class)
-                            for i in range(len(v0)))
+    regrouped_tuple = tuple(
+        regroup(device_map, tuple(v[i] for v in values), wrap_class)
+        for i in range(len(v0)))
     if hasattr(v0, "_fields"):
       # This tuple is in fact a namedtuple! Create a new namedtuple instance
       # and initialize it with the regrouped values:
@@ -1006,15 +1242,16 @@ def regroup(per_replica, wrap_class=PerReplica):
 
   if isinstance(v0, dict):
     v0keys = set(v0.keys())
-    for _, v in items[1:]:
-      assert isinstance(v, dict)
-      assert set(v.keys()) == v0keys
-    return {key: regroup({k: v[key] for k, v in items}, wrap_class)
+    for v in values[1:]:
+      assert isinstance(v, dict), ("v[0]: %r  v[i]: %r" % (v0, v))
+      assert set(v.keys()) == v0keys, ("v[0].keys: %s  v[i].keys: %s" %
+                                       (v0keys, set(v.keys())))
+    return {key: regroup(device_map, tuple(v[key] for v in values), wrap_class)
             for key in v0keys}
 
   # If exactly the same object across all devices, return it unwrapped.
   same_id = True
-  for _, v in items[1:]:
+  for v in values[1:]:
     if v is not v0:
       same_id = False
       break
@@ -1043,25 +1280,26 @@ def regroup(per_replica, wrap_class=PerReplica):
   if hasattr(v0, "_distributed_container"):
     # pylint: disable=protected-access
     assert not isinstance(v0, MirroredVariable), (
-        "ids = %s, items = %s" % ([id(v[1]) for v in items], items))
-    assert _devices_match(v0.device, items[0][0]), (
-        "v0.device = %s, items = %s" % (v0.device, items))
+        "ids = %s, values = %s" % ([id(v) for v in values], values))
+    assert device_map.is_device_in_replica(v0.device, 0), (
+        "v0.device = %s, device_map = %s" % (v0.device, device_map))
     distributed_container = v0._distributed_container()
     assert distributed_container is not None
-    for d, v in items[1:]:
-      assert _devices_match(v.device, d), (
-          "v.device = %s, d = %s, items = %s" % (v.device, d, items))
+    for r, v in enumerate(values[1:]):
+      assert device_map.is_device_in_replica(v.device, r + 1), (
+          "v.device = %s, r = %d, device_map = %s" %
+          (v.device, r + 1, device_map))
       assert distributed_container is v._distributed_container()
     return distributed_container
   # pylint: enable=protected-access
 
-  return wrap_class(per_replica)
+  return wrap_class(device_map, values)
 
 
-def select_device(device, structured):
-  """Specialize a nest of regular & per-replica values for one device."""
+def select_replica(replica_id, structured):
+  """Specialize a nest of regular & per-replica values for one replica."""
   def _get(x):
-    return x.get(device) if isinstance(x, DistributedValues) else x
+    return x.values[replica_id] if isinstance(x, DistributedValues) else x
 
   return nest.map_structure(_get, structured)
 
@@ -1081,9 +1319,11 @@ def select_device_mirrored(device, structured):
   return nest.map_structure(_get_mirrored, structured)
 
 
-def update_regroup(extended, updates, group):
+def update_regroup(extended, device_map, updates, group):
   """Regroup for an update, with dependencies to ensure all updates execute."""
-  regrouped = regroup(updates, Mirrored)
+  # TODO(josh11b): Replace "Mirrored" here with a function that does the following
+  # so we can avoid all these nest operations.
+  regrouped = regroup(device_map, updates, Mirrored)
   if not group:
     return nest.map_structure(extended._unwrap, regrouped)  # pylint: disable=protected-access
   grouped_flat = []
@@ -1093,47 +1333,113 @@ def update_regroup(extended, updates, group):
       if u.is_tensor_like:
         # Make sure we run all updates. Without this, something like
         # session.run(extended.update(...)) may only update one replica.
-        index = {}
+        values = []
         for d in u.devices:
           with ops.device(d), ops.control_dependencies([g]):
-            index[d] = array_ops.identity(u.get(d))
-        g = Mirrored(index)
+            values.append(array_ops.identity(u.get(d)))
+        g = Mirrored(u.device_map, values)
     else:
       g = u
     grouped_flat.append(g)
   return nest.pack_sequence_as(regrouped, grouped_flat)
 
 
+class InputWorkers(object):
+  """A 1-to-many mapping from input worker devices to compute devices."""
+
+  def __init__(self, device_map, worker_device_pairs=None, logical_device=0):
+    """Initialize an `InputWorkers` object.
+
+    Args:
+      device_map: A `DeviceMap` with the computation devices fed by the
+        input workers.
+      worker_device_pairs: A sequence of pairs:
+        `(input device, a tuple of compute devices fed by that input device)`.
+      logical_device: The logical device of `device_map` to feed.
+    """
+    self._device_map = device_map
+    self._logical_device = logical_device
+    if worker_device_pairs is None:
+      worker_device_pairs = ((
+          device_util.canonicalize("/device:CPU:0"),
+          device_map.logical_to_actual_devices(logical_device)),)
+    self._input_worker_devices = tuple(d for d, _ in worker_device_pairs)
+    self._fed_devices = tuple(tuple(device_util.canonicalize(d) for d in f)
+                              for _, f in worker_device_pairs)
+    flattened = tuple(d for l in self._fed_devices for d in l)
+    assert (flattened ==
+            device_map.logical_to_actual_devices(logical_device)), (
+                "flattened: %s logical device %d: %s" %
+                (flattened, logical_device,
+                 device_map.logical_to_actual_devices(logical_device)))
+
+  @property
+  def device_map(self):
+    return self._device_map
+
+  @property
+  def logical_device(self):
+    return self._logical_device
+
+  @property
+  def num_workers(self):
+    return len(self._input_worker_devices)
+
+  @property
+  def worker_devices(self):
+    return self._input_worker_devices
+
+  def compute_devices_for_worker(self, worker_index):
+    return self._fed_devices[worker_index]
+
+  def __repr__(self):
+    devices = self.worker_devices
+    debug_repr = ",\n".join("  %d %s: %s" %
+                            (i, devices[i], self._fed_devices[i])
+                            for i in range(len(devices)))
+    return "%s:{\n%s\n  device_map: %s}" % (
+        self.__class__.__name__, debug_repr, self._device_map)
+
+
 class PerReplicaDataIterator(object):
   """An iterator (like `tf.data.Iterator`) into a `PerReplicaDataset`."""
 
-  def __init__(self, iterator, devices, prefetch_on_device=None):
+  def __init__(self, iterator, input_workers, worker_index, prefetch_on_device):
+    assert isinstance(input_workers, InputWorkers)
     self._iterator = iterator
-    self._devices = devices
+    self._input_workers = input_workers
+    self._worker_index = worker_index
     self._prefetch_on_device = prefetch_on_device
 
   @property
   def initializer(self):
     return self._iterator.initializer
 
-  def get_next(self, name=None):
+  def get_next_as_list(self, name=None):
     """Scatter the input across devices."""
     if self._prefetch_on_device:
       data_list = self._iterator.get_next()
-      index = dict(zip(self._devices, data_list))
     else:
       batch = self._iterator.get_next(name=name)
-      index = {}
+      data_list = []
       def get_ith(i):
         return lambda x: x[i]
 
-      for i, d in enumerate(self._devices):
-        index[d] = nest.map_structure(get_ith(i), batch)
+      devices = self._input_workers.compute_devices_for_worker(
+          self._worker_index)
+      for i, d in enumerate(devices):
+        v = nest.map_structure(get_ith(i), batch)
         if context.executing_eagerly():
           with ops.device(d):
-            index[d] = nest.map_structure(array_ops.identity, index[d])
+            v = nest.map_structure(array_ops.identity, v)
+        data_list.append(v)
+
+    return data_list
 
-    return regroup(index)
+  def get_next(self, name=None):
+    assert self._input_workers.num_workers == 1
+    data_list = self.get_next_as_list(name)
+    return regroup(self._input_workers.device_map, data_list)
 
   @property
   def output_classes(self):
@@ -1151,8 +1457,14 @@ class PerReplicaDataIterator(object):
 class PerReplicaDataset(object):
   """Like `tf.data.Dataset` split devices, producing `PerReplica` data."""
 
-  def __init__(self, dataset, devices, prefetch_on_device=None):
-    self._devices = devices
+  def __init__(self, dataset, input_workers, worker_index,
+               prefetch_on_device=None):
+    assert isinstance(input_workers, InputWorkers)
+    assert worker_index is not None
+    assert worker_index is not True
+    assert worker_index is not False
+    self._input_workers = input_workers
+    self._worker_index = worker_index
 
     # Default to using prefetching in graph mode, unless specified.
     # TODO(rohanj): Enable prefetching in eager mode.
@@ -1167,7 +1479,8 @@ class PerReplicaDataset(object):
       # TODO(priyag): If dropping remainder is not appropriate, find another
       # approach to distributing the dataset when not possible to divide evenly.
       # Possibly not an issue when we start using PartitionedDataset.
-      self._dataset = dataset.batch(len(devices), drop_remainder=True)
+      num_replicas = len(input_workers.compute_devices_for_worker(worker_index))
+      self._dataset = dataset.batch(num_replicas, drop_remainder=True)
 
   def make_one_shot_iterator(self):
     """Get a one time use iterator for the distributed PerReplicaDataset."""
@@ -1180,7 +1493,8 @@ class PerReplicaDataset(object):
     # PerReplicaDataIterator to handle that case.
     dataset_iterator = dataset_ops.make_one_shot_iterator(self._dataset)
     return PerReplicaDataIterator(
-        dataset_iterator, self._devices, prefetch_on_device=False)
+        dataset_iterator, self._input_workers, self._worker_index,
+        prefetch_on_device=False)
 
   def make_initializable_iterator(self):
     """Get an initializable iterator for the distributed PerReplicaDataset."""
@@ -1190,43 +1504,46 @@ class PerReplicaDataset(object):
       raise ValueError("Cannot create initializable iterator in Eager mode. "
                        "Please use `make_one_shot_iterator` instead.")
     if self._prefetch_on_device:
+      replica_devices = self._input_workers.compute_devices_for_worker(
+          self._worker_index)
       dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator(
-          self._dataset, self._devices)
+          self._dataset, replica_devices)
     else:
       dataset_iterator = dataset_ops.make_initializable_iterator(self._dataset)
     return PerReplicaDataIterator(
-        dataset_iterator,
-        self._devices,
+        dataset_iterator, self._input_workers, self._worker_index,
         prefetch_on_device=self._prefetch_on_device)
 
 
 class MultiWorkerDataIterator(object):
   """An iterator (like `tf.data.Iterator`) into a `MultiWorkerDataset`."""
 
-  def __init__(self, iterators, worker_device_pairs):
-    """Initialize the MultiWorkerDataIterator object.
+  def __init__(self, iterators, input_workers):
+    """Initialize the `MultiWorkerDataIterator` object.
 
     Args:
       iterators: a list of worker, iterator pairs.
-      worker_device_pairs: a list of (worker's devices, a list of
-        devices that belong to this worker) pairs.
+      input_workers: an `InputWorkers` object.
 
     Raises:
-      ValueError: if iterators and worker_device_pairs are not compatible.
+      ValueError: if iterators and input_workers are not compatible.
     """
-    if [d for d, _ in iterators] != [d for d, _ in worker_device_pairs]:
-      raise ValueError("iterators and worker_device_pairs are not compatible.")
-    self._workers = [d for d, _ in iterators]
-    self._iterators = [i for _, i in iterators]
-    self._worker_devices = [l for _, l in worker_device_pairs]
+    assert isinstance(input_workers, InputWorkers)
+    workers = tuple(d for d, _ in iterators)
+    if workers != input_workers.worker_devices:
+      raise ValueError("iterators and input_workers are not compatible. "
+                       "iterator workers: %r input_workers devices: %r" %
+                       (workers, input_workers.worker_devices))
+    self._iterators = tuple(i for _, i in iterators)
+    self._input_workers = input_workers
 
   @property
   def initializer(self):
     return control_flow_ops.group(
-        [iterator.initializer for iterator in self._iterators])
+        tuple(iterator.initializer for iterator in self._iterators))
 
   def get_iterator(self, worker):
-    for i, w in enumerate(self._workers):
+    for i, w in enumerate(self._input_workers.worker_devices):
       if worker == w:
         return self._iterators[i]
     return None
@@ -1241,26 +1558,20 @@ class MultiWorkerDataIterator(object):
 
   def get_next(self, name=None):
     """Scatter the input across hosts and devices."""
-    index = {}
-    worker_info = zip(self._workers, self._iterators, self._worker_devices)
-    for worker, iterator, worker_devices in worker_info:
+    replicas = []
+    for worker, iterator in zip(self._input_workers.worker_devices,
+                                self._iterators):
       if name is not None:
         d = tf_device.DeviceSpec.from_string(worker)
         new_name = "%s_%s_%d" % (name, d.job, d.task)
       else:
         new_name = None
       with ops.device(worker):
-        data_per_worker = iterator.get_next(name=new_name)
+        data_per_worker = iterator.get_next_as_list(name=new_name)
+        # Append to replicas to get a flat list of values indexed by replica.
+        replicas.extend(data_per_worker)
 
-      # Ungroup these per-replica value so as to get a flat map from devices to
-      # values.
-      for d in worker_devices:
-        v = select_device(d, data_per_worker)
-        if d in index:
-          raise ValueError("Duplicated devices in worker_device_pairs: %r" % v)
-        index[d] = v
-
-    return regroup(index)
+    return regroup(self._input_workers.device_map, replicas)
 
 
 class MultiWorkerDataset(object):
@@ -1270,41 +1581,40 @@ class MultiWorkerDataset(object):
   in eager mode.
   """
 
-  def __init__(self, dataset_fn, worker_device_pairs, prefetch_on_device=None,
+  def __init__(self, dataset_fn, input_workers, prefetch_on_device=None,
                auto_shard=False):
     """Initialize the MultiWorkerDataset object.
 
     Args:
       dataset_fn: a function or a list of functions that returns a
         `tf.data.Dataset`.
-      worker_device_pairs: a list of (worker, list of devices on that worker)
-        pairs; it must have same length with `dataset_fn` if `dataset_fn` is a
-        list.
+      input_workers: an `InputWorkers` object.
       prefetch_on_device: whether to prefetch to devices.
       auto_shard: whether to auto-shard the dataset.
     """
-    if isinstance(dataset_fn, list):
-      if len(dataset_fn) != len(worker_device_pairs):
-        raise ValueError("If `dataset_fn` is a list, it must have same length "
-                         "as `worker_device_pairs`")
+    assert isinstance(input_workers, InputWorkers)
+    if isinstance(dataset_fn, (list, tuple)):
+      if len(dataset_fn) != input_workers.num_workers:
+        raise ValueError("If `dataset_fn` is a list, it must have one entry "
+                         "per worker")
       if auto_shard:
         raise ValueError(
             "If `dataset_fn` is a list, `auto_shard` is not supported.")
-    self._worker_device_pairs = worker_device_pairs
+    self._input_workers = input_workers
     self._datasets = []
     # TODO(yuefengz, priyag): support different set of jobs for input
     # processing.
-    for i, (worker, worker_devices) in enumerate(worker_device_pairs):
+    for i, worker in enumerate(input_workers.worker_devices):
       with ops.device(worker):
-        if isinstance(dataset_fn, list):
+        if isinstance(dataset_fn, (list, tuple)):
           worker_input = dataset_fn[i]()
         else:
           worker_input = dataset_fn()
           if auto_shard:
             worker_input = input_ops.auto_shard_dataset(
-                worker_input, len(worker_device_pairs), i)
-        dataset = PerReplicaDataset(
-            worker_input, worker_devices, prefetch_on_device=prefetch_on_device)
+                worker_input, input_workers.num_workers, i)
+        dataset = PerReplicaDataset(worker_input, input_workers, i,
+                                    prefetch_on_device=prefetch_on_device)
         self._datasets.append((worker, dataset))
 
   def make_one_shot_iterator(self):
@@ -1312,7 +1622,7 @@ class MultiWorkerDataset(object):
     for worker, dataset in self._datasets:
       with ops.device(worker):
         iterators.append((worker, dataset_ops.make_one_shot_iterator(dataset)))
-    return MultiWorkerDataIterator(iterators, self._worker_device_pairs)
+    return MultiWorkerDataIterator(iterators, self._input_workers)
 
   def make_initializable_iterator(self):
     iterators = []
@@ -1320,7 +1630,7 @@ class MultiWorkerDataset(object):
       with ops.device(worker):
         iterators.append(
             (worker, dataset_ops.make_initializable_iterator(dataset)))
-    return MultiWorkerDataIterator(iterators, self._worker_device_pairs)
+    return MultiWorkerDataIterator(iterators, self._input_workers)
 
 
 class InputIterator(object):
@@ -1350,12 +1660,13 @@ class InputIterator(object):
 class InputIteratorImpl(InputIterator):
   """Common implementation for all input iterators."""
 
-  def __init__(self, worker_device_pairs, iterators):
-    if not worker_device_pairs:
+  def __init__(self, input_workers, iterators):
+    assert isinstance(input_workers, InputWorkers)
+    if not input_workers.worker_devices:
       raise ValueError("Should have at least one worker for input iterator.")
 
     self._iterators = iterators
-    self._worker_device_pairs = worker_device_pairs
+    self._input_workers = input_workers
     self._is_eager = context.executing_eagerly()
 
   def get_next(self, name=None):
@@ -1363,25 +1674,18 @@ class InputIteratorImpl(InputIterator):
     assert self._is_eager == context.executing_eagerly(), (
         "Iterator should be created and used in same execution mode.")
 
-    index = {}
-    for i, (worker, worker_devices) in enumerate(self._worker_device_pairs):
+    replicas = []
+    for i, worker in enumerate(self._input_workers.worker_devices):
       if name is not None:
         d = tf_device.DeviceSpec.from_string(worker)
         new_name = "%s_%s_%d" % (name, d.job, d.task)
       else:
         new_name = None
       with ops.device(worker):
-        data_per_worker = self._iterators[i].get_next(new_name)
-
-      # Ungroup these per-replica value so as to get a flat map from devices to
-      # values.
-      for d in worker_devices:
-        v = select_device(d, data_per_worker)
-        if d in index:
-          raise ValueError("Duplicated devices in worker_device_pairs: %r" % v)
-        index[d] = v
+        # Make `replicas` a flat list of values across all replicas.
+        replicas.extend(self._iterators[i].get_next_as_list(new_name))
 
-    return regroup(index)
+    return regroup(self._input_workers.device_map, replicas)
 
   def initialize(self):
     """Initialze underlying iterators.
@@ -1414,7 +1718,7 @@ class InputIteratorImpl(InputIterator):
 
   # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs.
   def get_iterator(self, worker):
-    for i, (w, _) in enumerate(self._worker_device_pairs):
+    for i, w in enumerate(self._input_workers.worker_devices):
       if worker == w:
         return self._iterators[i]
     return None
@@ -1423,7 +1727,7 @@ class InputIteratorImpl(InputIterator):
 class InputFunctionIterator(InputIteratorImpl):
   """Iterator created from input function."""
 
-  def __init__(self, input_fn, worker_device_pairs, input_contexts):
+  def __init__(self, input_fn, input_workers, input_contexts):
     """Make an iterator for input provided via an input function.
 
     Currently implements PER_WORKER mode, in which the `input_fn` is called
@@ -1435,36 +1739,36 @@ class InputFunctionIterator(InputIteratorImpl):
 
     Args:
       input_fn: Input function that returns a `tf.data.Dataset` object.
-      worker_device_pairs: A list of (worker, list of devices on that worker)
-        pairs.
+      input_workers: an `InputWorkers` object.
       input_contexts: A list of `InputContext` instances to be passed to call(s)
         to `input_fn`. Length and order should match worker order in
         `worker_device_pairs`.
     """
-    if len(worker_device_pairs) != len(input_contexts):
+    assert isinstance(input_workers, InputWorkers)
+    if input_workers.num_workers != len(input_contexts):
       raise ValueError(
-          "Number of worker_device_pairs (%d) is not same as number of"
-          "input_contexts (%d)" % (
-              len(worker_device_pairs), len(input_contexts)))
+          "Number of input workers (%d) is not same as number of "
+          "input_contexts (%d)" %
+          (input_workers.num_workers, len(input_contexts)))
 
     iterators = []
-    for (worker, devices), ctx in zip(worker_device_pairs, input_contexts):
-      # TODO(priyag): We should probably explicitly specify CPU device on worker.
+    for i, ctx in enumerate(input_contexts):
+      worker = input_workers.worker_devices[i]
       with ops.device(worker):
         result = input_fn(ctx)
         if not isinstance(result, dataset_ops.DatasetV2):
           raise ValueError("input_fn must return a tf.data.Dataset.")
+        devices = input_workers.compute_devices_for_worker(i)
         iterator = _SingleWorkerDatasetIterator(result, worker, devices)
         iterators.append(iterator)
 
-    super(InputFunctionIterator, self).__init__(
-        worker_device_pairs, iterators)
+    super(InputFunctionIterator, self).__init__(input_workers, iterators)
 
 
 class DatasetIterator(InputIteratorImpl):
   """Iterator created from input dataset."""
 
-  def __init__(self, dataset, worker_device_pairs, split_batch_by=None):
+  def __init__(self, dataset, input_workers, split_batch_by=None):
     """Make an iterator for the dataset on given devices.
 
     If `split_batch_by` is not None, we "split" each batch of the
@@ -1486,21 +1790,22 @@ class DatasetIterator(InputIteratorImpl):
 
     Args:
       dataset: `tf.data.Dataset` that will be used as the input source.
-      worker_device_pairs: A list of (worker, list of devices on that worker)
-        pairs.
+      input_workers: an `InputWorkers` object.
       split_batch_by: Optional integer. If present, we "split" each batch of the
         dataset by `split_batch_by` value.
     """
+    assert isinstance(input_workers, InputWorkers)
     if split_batch_by:
       dataset = _split_dataset_batch(dataset, split_batch_by)
 
     iterators = []
-    for worker, worker_devices in worker_device_pairs:
+    for i, worker in enumerate(input_workers.worker_devices):
       with ops.device(worker):
+        worker_devices = input_workers.compute_devices_for_worker(i)
         iterator = _SingleWorkerDatasetIterator(dataset, worker, worker_devices)
         iterators.append(iterator)
 
-    super(DatasetIterator, self).__init__(worker_device_pairs, iterators)
+    super(DatasetIterator, self).__init__(input_workers, iterators)
 
 
 class _SingleWorkerDatasetIterator(object):
@@ -1537,23 +1842,23 @@ class _SingleWorkerDatasetIterator(object):
             self._dataset, self._devices)
     self._iterator = iterator
 
-  def get_next(self, name=None):
+  def get_next_as_list(self, name=None):
     """Get next element from the underlying iterator."""
     with ops.device(self._worker):
       if self._is_eager:
         # Batched dataset case.
         batch = self._iterator.get_next(name=name)
-        index = {}
+        data_list = []
         for i, d in enumerate(self._devices):
-          index[d] = nest.map_structure(operator.itemgetter(i), batch)
+          v = nest.map_structure(operator.itemgetter(i), batch)
           with ops.device(d):
-            index[d] = nest.map_structure(array_ops.identity, index[d])
+            v = nest.map_structure(array_ops.identity, v)
+          data_list.append(v)
       else:
         # MultiDeviceIterator case.
         data_list = self._iterator.get_next()
-        index = dict(zip(self._devices, data_list))
 
-      return regroup(index)
+      return data_list
 
   def initialize(self):
     """Initialze underlying iterator.
-- 
GitLab


From 0e1f4418b84170533b3d388ac29042fe486403ee Mon Sep 17 00:00:00 2001
From: Gunhan Gulsoy <gunan@google.com>
Date: Tue, 18 Dec 2018 20:56:29 -0800
Subject: [PATCH 802/873] Automated rollback of commit
 8d1099b51f7ff8a5ac0bae8cad5bd91fc0b43815

PiperOrigin-RevId: 226109993
---
 tensorflow/core/BUILD                         | 20 +++++++++++--
 .../core/platform/default/build_config/BUILD  |  5 ++++
 .../core/platform/{ => default}/logger.cc     | 25 +++++++---------
 tensorflow/core/platform/logger.h             | 19 +-----------
 tensorflow/stream_executor/BUILD              |  3 --
 .../stream_executor/stream_executor_pimpl.cc  | 30 +------------------
 6 files changed, 36 insertions(+), 66 deletions(-)
 rename tensorflow/core/platform/{ => default}/logger.cc (72%)

diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 258c46fbcb..8bf1480d33 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -445,8 +445,7 @@ cc_library(
 )
 
 cc_library(
-    name = "logger",
-    srcs = ["platform/logger.cc"],
+    name = "logger_interface",
     hdrs = ["platform/logger.h"],
     copts = tf_copts(),
     visibility = ["//visibility:public"],
@@ -456,6 +455,23 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "default_logger",
+    srcs = ["platform/default/logger.cc"],
+    hdrs = ["platform/logger.h"],
+    deps = [
+        "//tensorflow/core:lib_proto_parsing",
+        "//tensorflow/core:logger_interface",
+    ],
+)
+
+cc_library(
+    name = "logger",
+    hdrs = ["platform/logger.h"],
+    visibility = ["//visibility:public"],
+    deps = ["//tensorflow/core/platform/default/build_config:logger"],
+)
+
 filegroup(
     name = "platform_env_hdrs",
     srcs = [
diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD
index da1f66dc67..ee6936b372 100644
--- a/tensorflow/core/platform/default/build_config/BUILD
+++ b/tensorflow/core/platform/default/build_config/BUILD
@@ -275,3 +275,8 @@ alias(
     actual = ":mobile_srcs",
     visibility = ["//visibility:public"],
 )
+
+alias(
+    name = "logger",
+    actual = "//tensorflow/core:default_logger",
+)
diff --git a/tensorflow/core/platform/logger.cc b/tensorflow/core/platform/default/logger.cc
similarity index 72%
rename from tensorflow/core/platform/logger.cc
rename to tensorflow/core/platform/default/logger.cc
index f5a961e4d3..54b1a1a67c 100644
--- a/tensorflow/core/platform/logger.cc
+++ b/tensorflow/core/platform/default/logger.cc
@@ -18,20 +18,17 @@ limitations under the License.
 #include "tensorflow/core/platform/logging.h"
 
 namespace tensorflow {
-namespace {
 
-class DefaultLogger : public Logger {
- private:
-  void DoLogProto(google::protobuf::Any* proto) override {
-    VLOG(2) << proto->ShortDebugString();
-  }
-  void DoFlush() override {}
-};
-
-}  // namespace
-
-Logger::FactoryFunc Logger::singleton_factory_ = []() -> Logger* {
-  return new DefaultLogger();
-};
+Logger* Logger::Singleton() {
+  class DefaultLogger : public Logger {
+   private:
+    void DoLogProto(google::protobuf::Any* proto) override {
+      VLOG(2) << proto->ShortDebugString();
+    }
+    void DoFlush() override {}
+  };
+  static Logger* instance = new DefaultLogger();
+  return instance;
+}
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/platform/logger.h b/tensorflow/core/platform/logger.h
index f0bfef4f2d..5d304bea63 100644
--- a/tensorflow/core/platform/logger.h
+++ b/tensorflow/core/platform/logger.h
@@ -26,22 +26,7 @@ namespace tensorflow {
 // log anything to a non-local place, e.g. a database.
 class Logger {
  public:
-  // The singleton is supposed to be used in the following steps:
-  // * At program start time, REGISTER_MOUDLE_INITIALIZER calls
-  //   SetSingletonFactory.
-  // * At some point in the program execution, Singleton() is called for the
-  //   first time, initializing the logger.
-  // * Succeeding calls to Singleton() return the initiailized logger.
-  using FactoryFunc = Logger* (*)();
-
-  static void SetSingletonFactory(FactoryFunc factory) {
-    singleton_factory_ = factory;
-  }
-
-  static Logger* Singleton() {
-    static Logger* instance = singleton_factory_();
-    return instance;
-  }
+  static Logger* Singleton();
 
   virtual ~Logger() = default;
 
@@ -59,8 +44,6 @@ class Logger {
  private:
   virtual void DoLogProto(google::protobuf::Any* proto) = 0;
   virtual void DoFlush() = 0;
-
-  static FactoryFunc singleton_factory_;
 };
 
 }  // namespace tensorflow
diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD
index c43efc799c..00c23b8d17 100644
--- a/tensorflow/stream_executor/BUILD
+++ b/tensorflow/stream_executor/BUILD
@@ -54,9 +54,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc_impl",
-        ":logging_proto_cc_impl",
         "//tensorflow/core:lib",
-        "//tensorflow/core:logger",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
@@ -73,7 +71,6 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":dnn_proto_cc",
-        ":logging_proto_cc",
         "//tensorflow/core:lib",
         "//tensorflow/core:ptr_util",
         "@com_google_absl//absl/strings",
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.cc b/tensorflow/stream_executor/stream_executor_pimpl.cc
index cb67a906a8..ee3d2b6da0 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include <utility>
 
 #include "absl/strings/str_cat.h"
-#include "tensorflow/core/platform/logger.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/stream_executor/blas.h"
 #include "tensorflow/stream_executor/fft.h"
@@ -34,7 +33,6 @@ limitations under the License.
 #include "tensorflow/stream_executor/lib/str_util.h"
 #include "tensorflow/stream_executor/lib/stringprintf.h"
 #include "tensorflow/stream_executor/lib/threadpool.h"
-#include "tensorflow/stream_executor/logging.pb.h"
 #include "tensorflow/stream_executor/platform/port.h"
 #include "tensorflow/stream_executor/rng.h"
 #include "tensorflow/stream_executor/stream_executor_internal.h"
@@ -194,8 +192,6 @@ StreamExecutor::StreamExecutor(
     platform_kind_ = PlatformKind::kOpenCL;
   } else if (port::Lowercase(platform_->Name()) == "host") {
     platform_kind_ = PlatformKind::kHost;
-  } else {
-    platform_kind_ = PlatformKind::kInvalid;
   }
 }
 
@@ -221,31 +217,7 @@ StreamExecutor::~StreamExecutor() {
 port::Status StreamExecutor::Init(int device_ordinal,
                                   DeviceOptions device_options) {
   device_ordinal_ = device_ordinal;
-  TF_RETURN_IF_ERROR(
-      implementation_->Init(device_ordinal, std::move(device_options)));
-
-  if (platform_kind_ == PlatformKind::kCuda) {
-    CudaInfo info;
-
-    int cc_major, cc_minor;
-    GetDeviceDescription().cuda_compute_capability(&cc_major, &cc_minor);
-    info.mutable_compute_capability()->set_major(cc_major);
-    info.mutable_compute_capability()->set_minor(cc_minor);
-
-    if (auto *dnn = AsDnn()) {
-      port::StatusOr<dnn::VersionInfo> version_or = dnn->GetVersion();
-      if (version_or.ok()) {
-        const auto &version = version_or.ValueOrDie();
-        info.mutable_cudnn_version()->set_major(version.major_version());
-        info.mutable_cudnn_version()->set_minor(version.minor_version());
-        info.mutable_cudnn_version()->set_patch(version.patch());
-      }
-    }
-
-    tensorflow::Logger::Singleton()->LogProto(info);
-  }
-
-  return port::Status::OK();
+  return implementation_->Init(device_ordinal, std::move(device_options));
 }
 
 port::Status StreamExecutor::Init() {
-- 
GitLab


From af92068a66f0f089be46d40ca00b4c3ce2e7c5ab Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Tue, 18 Dec 2018 21:34:17 -0800
Subject: [PATCH 803/873] Migrate from creating two models in Keras with
 Distribution strategy to a single model created with the Distribution
 Strategy scope.

PiperOrigin-RevId: 226113167
---
 tensorflow/contrib/distribute/python/BUILD    |   22 +-
 .../python/keras_backward_compat_test.py      | 1420 +++++++++++++++++
 .../python/keras_optimizer_v2_test.py         |   11 +-
 .../contrib/distribute/python/keras_test.py   |  374 ++---
 .../contrib/tpu/python/tpu/keras_support.py   |    4 +
 tensorflow/python/distribute/values.py        |    3 +
 tensorflow/python/keras/engine/training.py    |   77 +-
 .../python/keras/engine/training_arrays.py    |   19 +-
 .../keras/engine/training_distributed.py      |  241 ++-
 tensorflow/python/keras/models.py             |   10 +-
 10 files changed, 1883 insertions(+), 298 deletions(-)
 create mode 100644 tensorflow/contrib/distribute/python/keras_backward_compat_test.py

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 9e27eb2b69..3a738efe3c 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -584,7 +584,10 @@ cuda_py_test(
 py_library(
     name = "keras_test_lib",
     testonly = 1,
-    srcs = ["keras_test.py"],
+    srcs = [
+        "keras_backward_compat_test.py",
+        "keras_test.py",
+    ],
     deps = [
         ":combinations",
         "//tensorflow/contrib/distribute/python:mirrored_strategy",
@@ -615,6 +618,23 @@ cuda_py_test(
     ],
 )
 
+# TODO(b/121200287): Remove this in 2.0
+cuda_py_test(
+    name = "keras_backward_compat_test",
+    srcs = ["keras_backward_compat_test.py"],
+    additional_deps = [
+        ":keras_test_lib",
+    ],
+    shard_count = 16,
+    tags = [
+        "multi_and_single_gpu",
+        "no_oss",  # TODO(b/117919883): Fix python error.
+        "no_pip",
+        "no_windows_gpu",
+        "notsan",
+    ],
+)
+
 py_library(
     name = "metrics_v1_test_lib",
     testonly = 1,
diff --git a/tensorflow/contrib/distribute/python/keras_backward_compat_test.py b/tensorflow/contrib/distribute/python/keras_backward_compat_test.py
new file mode 100644
index 0000000000..a63354eb37
--- /dev/null
+++ b/tensorflow/contrib/distribute/python/keras_backward_compat_test.py
@@ -0,0 +1,1420 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tf.keras models using DistributionStrategy."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.contrib.distribute.python import combinations
+from tensorflow.contrib.distribute.python import mirrored_strategy
+from tensorflow.contrib.distribute.python import tpu_strategy
+from tensorflow.python import keras
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import distribute_lib
+from tensorflow.python.distribute import values
+from tensorflow.python.eager import test
+from tensorflow.python.estimator import keras as keras_lib
+from tensorflow.python.estimator import run_config as run_config_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import test_util
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.keras.engine import distributed_training_utils
+from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras
+from tensorflow.python.ops.parsing_ops import gen_parsing_ops
+from tensorflow.python.platform import gfile
+from tensorflow.python.summary.writer import writer_cache
+from tensorflow.python.training import gradient_descent
+from tensorflow.python.training import rmsprop
+
+_RANDOM_SEED = 1337
+_TRAIN_SIZE = 200
+_INPUT_SIZE = (10,)
+_NUM_CLASS = 2
+
+
+# TODO(anjalisridhar): Add a decorator that will allow us to run these tests as
+# part of the tf.keras unit tests suite.
+def simple_sequential_model():
+  model = keras.models.Sequential()
+  model.add(keras.layers.Dense(16, activation='relu', input_shape=_INPUT_SIZE))
+  model.add(keras.layers.Dropout(0.1))
+  model.add(keras.layers.Dense(_NUM_CLASS, activation='softmax'))
+  return model
+
+
+def simple_functional_model():
+  a = keras.layers.Input(shape=_INPUT_SIZE)
+  b = keras.layers.Dense(16, activation='relu')(a)
+  b = keras.layers.Dropout(0.1)(b)
+  b = keras.layers.Dense(_NUM_CLASS, activation='softmax')(b)
+  model = keras.models.Model(inputs=[a], outputs=[b])
+  return model
+
+
+def multi_inputs_multi_outputs_model():
+  input_a = keras.layers.Input(shape=(16,), name='input_a')
+  input_b = keras.layers.Input(shape=(16,), name='input_b')
+  input_m = keras.layers.Input(shape=(8,), dtype='string', name='input_m')
+  dense = keras.layers.Dense(8, name='dense_1')
+
+  interm_a = dense(input_a)
+  # Read m
+  interm_m = keras.layers.Lambda(gen_parsing_ops.string_to_number)(input_m)
+  interm_s = keras.layers.Lambda(lambda k: k[0] * k[1])([interm_m, interm_a])
+  interm_b = dense(input_b)
+  merged = keras.layers.concatenate([interm_s, interm_b], name='merge')
+  output_c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged)
+  output_d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged)
+  model = keras.models.Model(
+      inputs=[input_a, input_b, input_m], outputs=[output_c, output_d])
+  model.compile(
+      loss='categorical_crossentropy',
+      optimizer=gradient_descent.GradientDescentOptimizer(0.001),
+      metrics={
+          'dense_2': 'categorical_accuracy',
+          'dense_3': 'categorical_accuracy'
+      })
+  return model
+
+
+def get_ds_train_input_fn():
+  np.random.seed(_RANDOM_SEED)
+  (x_train, y_train), _ = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=_INPUT_SIZE,
+      num_classes=_NUM_CLASS)
+  y_train = keras.utils.to_categorical(y_train)
+
+  dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
+  dataset = dataset.batch(32)
+  return dataset
+
+
+def get_ds_test_input_fn():
+  np.random.seed(_RANDOM_SEED)
+  _, (x_test, y_test) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=_INPUT_SIZE,
+      num_classes=_NUM_CLASS)
+  y_test = keras.utils.to_categorical(y_test)
+
+  dataset = dataset_ops.Dataset.from_tensor_slices((x_test, y_test))
+  dataset = dataset.batch(32)
+  return dataset
+
+
+def get_multi_inputs_multi_outputs_data():
+  (a_train, c_train), (a_test, c_test) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=(16,),
+      num_classes=3,
+      random_seed=_RANDOM_SEED)
+  (b_train, d_train), (b_test, d_test) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=(16,),
+      num_classes=2,
+      random_seed=_RANDOM_SEED)
+  (m_train, _), (m_test, _) = testing_utils.get_test_data(
+      train_samples=_TRAIN_SIZE,
+      test_samples=50,
+      input_shape=(8,),
+      num_classes=2,
+      random_seed=_RANDOM_SEED)
+
+  c_train = keras.utils.to_categorical(c_train)
+  c_test = keras.utils.to_categorical(c_test)
+  d_train = keras.utils.to_categorical(d_train)
+  d_test = keras.utils.to_categorical(d_test)
+
+  train_data = {
+      'input_a': a_train,
+      'input_b': b_train,
+      'input_m': m_train,
+      'output_c': c_train,
+      'output_d': d_train
+  }
+  test_data = {
+      'input_a': a_test,
+      'input_b': b_test,
+      'input_m': m_test,
+      'output_c': c_test,
+      'output_d': d_test
+  }
+
+  return (train_data, test_data)
+
+
+def batch_wrapper(dataset, batch_size, distribution, repeat=None):
+  if repeat:
+    dataset = dataset.repeat(repeat)
+  # TPUs currently require fully defined input shapes, drop_remainder ensures
+  # the input will have fully defined shapes.
+  if isinstance(distribution, tpu_strategy.TPUStrategy):
+    return dataset.batch(batch_size, drop_remainder=True)
+  else:
+    return dataset.batch(batch_size)
+
+
+def get_model():
+  x = keras.layers.Input(shape=(3,), name='input')
+  y = keras.layers.Dense(4, name='dense')(x)
+  model = keras.Model(x, y)
+  return model
+
+
+def get_dataset(distribution):
+  inputs = np.zeros((10, 3), dtype=np.float32)
+  targets = np.zeros((10, 4), dtype=np.float32)
+  dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+  dataset = dataset.repeat(100)
+  dataset = batch_wrapper(dataset, 10, distribution)
+  return dataset
+
+
+def get_predict_dataset(distribution):
+  inputs = np.zeros((10, 3), dtype=np.float32)
+  dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
+  dataset = dataset.repeat(100)
+  dataset = batch_wrapper(dataset, 10, distribution)
+  return dataset
+
+
+def multi_input_output_model():
+  a = keras.layers.Input(shape=(3,), name='input_a')
+  b = keras.layers.Input(shape=(5,), name='input_b')
+  # TODO(anjalisridhar): Change the output dimension of the second Dense layer
+  # once the iterator output validation issue has been fixed.
+  dense_1 = keras.layers.Dense(7, name='dense_1')
+  dense_2 = keras.layers.Dense(7, name='dense_2')
+  c = dense_1(a)
+  d = dense_2(b)
+  e = keras.layers.Dropout(0.5, name='dropout')(c)
+  model = keras.models.Model([a, b], [d, e])
+  return model
+
+
+def get_correctness_test_inputs(use_numpy, use_validation_data,
+                                with_distribution,
+                                x_train, y_train, x_predict):
+  """Generates the inputs for correctness check when enable Keras with DS."""
+  training_epochs = 2
+  global_batch_size = 64
+  batch_size = global_batch_size
+  # TODO(b/118776054): Use global batch size for Keras/DS support.
+  use_per_core_batch_size = (
+      with_distribution and
+      not distributed_training_utils.global_batch_size_supported(
+          with_distribution))
+  if use_per_core_batch_size:
+    batch_size //= with_distribution.num_replicas_in_sync
+
+  if use_numpy:
+    training_inputs = {
+        'batch_size': batch_size,
+        'x': x_train,
+        'y': y_train,
+        'epochs': training_epochs,
+        'shuffle': False,
+    }
+
+    if use_validation_data:
+      eval_inputs = None
+      training_inputs['validation_data'] = (x_train, y_train)
+    else:
+      eval_inputs = {
+          'batch_size': batch_size,
+          'x': x_train,
+          'y': y_train,
+      }
+    predict_inputs = {
+        'x': np.array(x_predict, dtype=np.float32),
+    }
+  else:
+    # For dataset inputs, we do not pass batch_size to
+    # keras.fit/evaluate/predict. The batch size is part of the dataset.
+    train_dataset = dataset_ops.Dataset.from_tensor_slices(
+        (x_train, y_train))
+    x = batch_wrapper(
+        train_dataset, batch_size, with_distribution, repeat=training_epochs)
+
+    training_inputs = {
+        'batch_size': None,
+        'x': x,
+        'y': None,
+        'epochs': training_epochs,
+        'shuffle': False,
+        'steps_per_epoch': len(x_train) // global_batch_size,
+    }
+    if use_validation_data:
+      eval_inputs = None  # Remove the eval_inputs
+      eval_dataset = dataset_ops.Dataset.from_tensor_slices(
+          (x_train, y_train))
+      x = batch_wrapper(eval_dataset, batch_size, with_distribution)
+      training_inputs['validation_data'] = x
+      training_inputs['validation_steps'] = 5
+    else:
+      eval_inputs = {
+          'batch_size': None,
+          'x': x,
+          'y': None,
+          'steps': 20,
+      }
+
+    predict_batch_size = len(x_predict)
+    if use_per_core_batch_size:
+      predict_batch_size //= with_distribution.num_replicas_in_sync
+    predict_dataset = dataset_ops.Dataset.from_tensor_slices(x_predict)
+    predict_dataset = batch_wrapper(predict_dataset,
+                                    predict_batch_size, with_distribution)
+    predict_inputs = {
+        'steps': 1,
+        'x': predict_dataset,
+    }
+
+  return training_inputs, eval_inputs, predict_inputs
+
+
+strategies_minus_tpu = [
+    combinations.default_strategy,
+    combinations.one_device_strategy,
+    combinations.mirrored_strategy_with_gpu_and_cpu,
+    combinations.mirrored_strategy_with_two_gpus,
+    combinations.core_mirrored_strategy_with_gpu_and_cpu,
+    combinations.core_mirrored_strategy_with_two_gpus]
+
+tpu_strategies = [
+    combinations.tpu_strategy,  # steps_per_run=2
+    combinations.tpu_strategy_one_step]
+
+
+def strategy_minus_tpu_combinations():
+  return combinations.combine(
+      distribution=strategies_minus_tpu,
+      mode=['graph', 'eager'])
+
+
+def tpu_strategy_combinations():
+  return combinations.combine(
+      distribution=tpu_strategies,
+      mode=['graph'])
+
+
+def all_strategy_combinations():
+  return strategy_minus_tpu_combinations() + tpu_strategy_combinations()
+
+
+# TODO(priyag): Add v2 optimizers here.
+def strategy_and_optimizer_combinations():
+  return combinations.times(
+      all_strategy_combinations(),
+      combinations.combine(
+          optimizer=[combinations.adagrad_optimizer_v1_fn,
+                     combinations.adam_optimizer_v1_fn,
+                     combinations.gradient_descent_optimizer_v1_fn,
+                     combinations.rmsprop_optimizer_v1_fn]))
+
+
+def strategy_and_input_combinations():
+  return (
+      combinations.times(
+          combinations.combine(distribution=strategies_minus_tpu),
+          combinations.combine(mode=['graph'],
+                               use_numpy=[True, False],
+                               use_validation_data=[True, False])
+          + combinations.combine(mode=['eager'],
+                                 use_numpy=[False],
+                                 use_validation_data=[False])) +
+      combinations.times(
+          combinations.combine(distribution=tpu_strategies),
+          combinations.combine(mode=['graph'],
+                               use_numpy=[True, False],
+                               use_validation_data=[True, False])))
+
+
+def strategy_for_numpy_input_combinations():
+  return combinations.combine(
+      distribution=strategies_minus_tpu + tpu_strategies,
+      mode=['graph'])
+
+
+class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
+                                        parameterized.TestCase):
+
+  def setUp(self):
+    self._base_dir = os.path.join(self.get_temp_dir(),
+                                  'keras_mirrored_strategy_test')
+    gfile.MakeDirs(self._base_dir)
+    self._config = run_config_lib.RunConfig(
+        tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir)
+
+  def tearDown(self):
+    writer_cache.FileWriterCache.clear()
+    if os.path.isdir(self._base_dir):
+      gfile.DeleteRecursively(self._base_dir)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.mirrored_strategy_with_two_gpus,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_two_gpus],
+      mode=['graph']))
+  def test_train_functional_with_distribution_strategy(self, distribution):
+    keras_model = simple_functional_model()
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        metrics=[keras.metrics.CategoricalAccuracy()],
+        optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01))
+    config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED,
+                                      model_dir=self._base_dir,
+                                      train_distribute=distribution,
+                                      eval_distribute=distribution)
+    with self.cached_session():
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=keras_model, config=config)
+      before_eval_results = est_keras.evaluate(
+          input_fn=get_ds_test_input_fn, steps=1)
+      est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16)
+      after_eval_results = est_keras.evaluate(input_fn=get_ds_test_input_fn,
+                                              steps=1)
+      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+    writer_cache.FileWriterCache.clear()
+    gfile.DeleteRecursively(self._config.model_dir)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.mirrored_strategy_with_two_gpus,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_two_gpus],
+      mode=['graph']))
+  def test_train_sequential_with_distribution_strategy(self, distribution):
+    keras_model = simple_sequential_model()
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        metrics=[keras.metrics.CategoricalAccuracy()],
+        optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01))
+    config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED,
+                                      model_dir=self._base_dir,
+                                      train_distribute=distribution)
+    with self.cached_session():
+      est_keras = keras_lib.model_to_estimator(
+          keras_model=keras_model, config=config)
+      before_eval_results = est_keras.evaluate(
+          input_fn=get_ds_test_input_fn, steps=1)
+      est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16)
+      after_eval_results = est_keras.evaluate(input_fn=get_ds_test_input_fn,
+                                              steps=1)
+      self.assertLess(after_eval_results['loss'], before_eval_results['loss'])
+
+    writer_cache.FileWriterCache.clear()
+    gfile.DeleteRecursively(self._config.model_dir)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph']))
+  def test_multi_inputs_multi_outputs_with_input_fn_as_dict(self, distribution):
+    train_data, test_data = get_multi_inputs_multi_outputs_data()
+
+    def train_input_fn():
+      input_dict = {
+          'input_a': train_data['input_a'],
+          'input_b': train_data['input_b'],
+          'input_m': train_data['input_m'].astype(np.str)
+      }
+      output_dict = {
+          'dense_2': train_data['output_c'],
+          'dense_3': train_data['output_d']
+      }
+      return dataset_ops.Dataset.from_tensor_slices((input_dict,
+                                                     output_dict)).batch(16)
+
+    def eval_input_fn():
+      input_dict = {
+          'input_a': test_data['input_a'],
+          'input_b': test_data['input_b'],
+          'input_m': test_data['input_m'].astype(np.str)
+      }
+      output_dict = {
+          'dense_2': test_data['output_c'],
+          'dense_3': test_data['output_d']
+      }
+      return dataset_ops.Dataset.from_tensor_slices((input_dict,
+                                                     output_dict)).batch(16)
+
+    self.do_test_multi_inputs_multi_outputs_with_input_fn(
+        distribution, train_input_fn, eval_input_fn)
+
+  def do_test_multi_inputs_multi_outputs_with_input_fn(
+      self, distribution, train_input_fn, eval_input_fn):
+    config = run_config_lib.RunConfig(
+        tf_random_seed=_RANDOM_SEED,
+        model_dir=self._base_dir,
+        train_distribute=distribution)
+    with self.cached_session():
+      model = multi_inputs_multi_outputs_model()
+      est_keras = keras_lib.model_to_estimator(keras_model=model, config=config)
+      baseline_eval_results = est_keras.evaluate(
+          input_fn=eval_input_fn, steps=1)
+      est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
+      eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
+      self.assertLess(eval_results['loss'], baseline_eval_results['loss'])
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph']))
+  def test_keras_optimizer_with_distribution_strategy(self, distribution):
+    keras_model = simple_sequential_model()
+    keras_model.compile(
+        loss='categorical_crossentropy',
+        optimizer=keras.optimizers.rmsprop(lr=0.01))
+
+    config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED,
+                                      model_dir=self._base_dir,
+                                      train_distribute=distribution)
+    with self.cached_session():
+      est_keras = keras_lib.model_to_estimator(keras_model=keras_model,
+                                               config=config)
+      with self.assertRaisesRegexp(ValueError,
+                                   'Only TensorFlow native optimizers are '
+                                   'supported with DistributionStrategy.'):
+        est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16)
+
+    writer_cache.FileWriterCache.clear()
+    gfile.DeleteRecursively(self._config.model_dir)
+
+
+class TestDistributionStrategyWithNumpyArrays(test.TestCase,
+                                              parameterized.TestCase):
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_creating_var_with_numpy_arrays(self, distribution):
+    with self.cached_session():
+      x = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      var_x = distributed_training_utils.get_var_for_numpy(distribution, x)
+      val = self.evaluate(var_x.value())
+      # Verify that the numpy value is copied to the variable.
+      self.assertAllEqual(x, val)
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_calculating_input_params_no_steps_no_batch_size(self, distribution):
+    # Calculate the per_replica_batch_size scaling factor for strategies
+    # that use per_core_batch_size
+    replica_scale_factor = 1.0
+    if not distributed_training_utils.global_batch_size_supported(distribution):
+      replica_scale_factor = distribution.num_replicas_in_sync
+
+    with self.cached_session():
+      # Input samples of different sizes
+      input_20_samples = np.zeros((20, 3), dtype=np.float32)
+      input_63_samples = np.zeros((63, 3), dtype=np.float32)
+      input_64_samples = np.zeros((64, 3), dtype=np.float32)
+
+      # Default global batch size 32 for input with 64 samples run in 2 steps
+      steps, batch_size = distributed_training_utils.get_input_params(
+          distribution, input_64_samples, steps=None, batch_size=None)
+      self.assertEqual(batch_size, 32 // replica_scale_factor)
+      self.assertEqual(steps, 2)
+
+      # Computed global batch size 20 is lower than 32 if we pass less samples.
+      steps, batch_size = distributed_training_utils.get_input_params(
+          distribution, input_20_samples, steps=None, batch_size=None)
+      self.assertEqual(batch_size, 20 // replica_scale_factor)
+      self.assertEqual(steps, 1)
+
+      #  Default global batch size 32 cannot be used with 63 samples.
+      with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'):
+        distributed_training_utils.get_input_params(
+            distribution, input_63_samples, steps=None, batch_size=None)
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_calculating_input_params_with_steps_no_batch_size(self,
+                                                             distribution):
+    # Calculate the per_replica_batch_size scaling factor for strategies
+    # that use per_core_batch_size
+    replica_scale_factor = 1.0
+    if not distributed_training_utils.global_batch_size_supported(distribution):
+      replica_scale_factor = distribution.num_replicas_in_sync
+
+    with self.cached_session():
+      # Input samples of different sizes
+      input_63_samples = np.zeros((63, 3), dtype=np.float32)
+      input_64_samples = np.zeros((64, 3), dtype=np.float32)
+
+      # Computed global batch size is correct for number of specified 1 step
+      steps, batch_size = distributed_training_utils.get_input_params(
+          distribution, input_64_samples, steps=1, batch_size=None)
+      self.assertEqual(batch_size, 64 // replica_scale_factor)
+      self.assertEqual(steps, 1)
+
+      # Computed global batch size is correct for number of specified 2 steps
+      steps, batch_size = distributed_training_utils.get_input_params(
+          distribution, input_64_samples, steps=2, batch_size=None)
+      self.assertEqual(batch_size, 32 // replica_scale_factor)
+      self.assertEqual(steps, 2)
+
+      # All samples can not be consumed in specified number of steps
+      with self.assertRaisesRegexp(ValueError, 'not divisible by steps'):
+        distributed_training_utils.get_input_params(
+            distribution, input_63_samples, steps=2, batch_size=None)
+
+      # This cases is different for different strategies due to the
+      # difference in supported batch size being global or per-replica.
+      if replica_scale_factor == 1:
+        # Computed global batch size is correct even if not sharadable
+        steps, batch_size = distributed_training_utils.get_input_params(
+            distribution, input_63_samples, steps=3, batch_size=None)
+        self.assertEqual(batch_size, 21)
+        self.assertEqual(steps, 3)
+      else:
+        # Computed global batch size can not be sharded across replicas
+        with self.assertRaisesRegexp(ValueError, 'could not be sharded evenly '
+                                     'across the sync replicas'):
+          distributed_training_utils.get_input_params(
+              distribution, input_63_samples, steps=1, batch_size=None)
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_calculating_input_params_no_steps_with_batch_size(self,
+                                                             distribution):
+    # Calculate the per_replica_batch_size scaling factor for strategies
+    # that use per_core_batch_size
+    replica_scale_factor = 1.0
+    if not distributed_training_utils.global_batch_size_supported(distribution):
+      replica_scale_factor = distribution.num_replicas_in_sync
+
+    with self.cached_session():
+      input_64_samples = np.zeros((64, 3), dtype=np.float32)
+
+      # Computed steps is correct for specified batch size
+      steps, batch_size = distributed_training_utils.get_input_params(
+          distribution, input_64_samples, steps=None, batch_size=16)
+      self.assertEqual(batch_size, 16)
+      self.assertEqual(steps, 4 // replica_scale_factor)
+
+      # Computed steps is correct for specified batch size
+      steps, batch_size = distributed_training_utils.get_input_params(
+          distribution, input_64_samples, steps=None, batch_size=32)
+      self.assertEqual(batch_size, 32)
+      self.assertEqual(steps, 2 // replica_scale_factor)
+
+      # Number of samples is not divisible by the global batch size
+      with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'):
+        distributed_training_utils.get_input_params(
+            distribution, input_64_samples, steps=None, batch_size=20)
+
+      # Number of samples is not divisible by the global batch size
+      with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'):
+        distributed_training_utils.get_input_params(
+            distribution, input_64_samples, steps=None, batch_size=3)
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_calculating_input_params_with_steps_with_batch_size(self,
+                                                               distribution):
+    with self.cached_session():
+      input_64_samples = np.zeros((64, 3), dtype=np.float32)
+
+      # No change to steps and batch size if both specified and feasible
+      steps, batch_size = distributed_training_utils.get_input_params(
+          distribution, input_64_samples, steps=5, batch_size=3)
+      self.assertEqual(batch_size, 3)
+      self.assertEqual(steps, 5)
+
+      # Number of samples is less than global batch size * steps
+      with self.assertRaisesRegexp(ValueError, 'less than samples required'):
+        distributed_training_utils.get_input_params(
+            distribution, input_64_samples, steps=10, batch_size=13)
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_calling_model_with_numpy_arrays(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae']
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+
+      inputs = np.zeros((64, 3), dtype=np.float32)
+      targets = np.zeros((64, 4), dtype=np.float32)
+
+      # Call fit with validation data
+      model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0,
+                validation_data=(inputs, targets))
+
+      # TODO(anjalisridhar): We need tests for when the batch size and steps are
+      # smaller and results in a 0 batch_size and steps value.
+      model.evaluate(inputs, targets)
+      # with steps
+      model.evaluate(inputs, targets, steps=2)
+      # with batch_size
+      model.evaluate(inputs, targets, batch_size=8)
+
+      model.predict(inputs)
+      # with steps
+      model.predict(inputs, steps=2)
+      # with batch_size
+      model.predict(inputs, batch_size=8)
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_calling_model_with_nested_numpy_arrays(self, distribution):
+    with self.cached_session():
+      model = multi_input_output_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32)
+      input_b_np = np.asarray(np.random.random((64, 5)), dtype=np.float32)
+      inputs = [input_a_np, input_b_np]
+
+      output_d_np = np.asarray(np.random.random((64, 7)), dtype=np.float32)
+      output_e_np = np.asarray(np.random.random((64, 7)), dtype=np.float32)
+      targets = [output_d_np, output_e_np]
+
+      # Call fit with validation data
+      model.fit(inputs, targets, epochs=1, batch_size=8, verbose=0)
+
+      # TODO(anjalisridhar): We need tests for when the batch size and steps are
+      # smaller and results in a 0 batch_size and steps value.
+      model.evaluate(inputs, targets)
+      # with steps
+      model.evaluate(inputs, targets, steps=2)
+      # with batch_size
+      model.evaluate(inputs, targets, batch_size=8)
+
+      model.predict(inputs)
+      # with steps
+      model.predict(inputs, steps=2)
+      # with batch_size
+      model.predict(inputs, batch_size=8)
+
+  @combinations.generate(combinations.combine(
+      distribution=strategies_minus_tpu, mode=['graph']))
+  def test_numpy_with_sample_weights(self, distribution):
+    model = get_model()
+    optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+    loss = 'mse'
+    model.compile(optimizer, loss, distribute=distribution)
+
+    inputs = np.zeros((20, 3), np.float32)
+    targets = np.zeros((20, 4), np.float32)
+    sample_weights = np.ones((20), np.float32)
+
+    model.fit(inputs, targets, sample_weight=sample_weights, epochs=1,
+              steps_per_epoch=2, verbose=1)
+
+  @combinations.generate(strategy_for_numpy_input_combinations())
+  def test_flatten_predict_outputs(self, distribution):
+    with self.cached_session():
+      model = multi_input_output_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      # We take 6 input samples with each input having a dimension of 3 or 5.
+      input_a_np = np.asarray(np.random.random((6, 3)), dtype=np.float32)
+      input_b_np = np.asarray(np.random.random((6, 5)), dtype=np.float32)
+      inputs = [input_a_np, input_b_np]
+
+      outs = model.predict(inputs, steps=1)
+      # `predict` a list that is equal in length to the number of model outputs.
+      # In this test our model has two outputs and each element of `outs`
+      # corresponds to all the samples of one of the model outputs.
+      self.assertLen(outs, 2)
+      # Each of the output samples have a dimension of 7. We should process all
+      # the available input samples(6).
+      self.assertAllEqual([6, 7], outs[0].shape)
+      self.assertAllEqual([6, 7], outs[1].shape)
+
+
+class TestDistributionStrategyWithDatasets(test.TestCase,
+                                           parameterized.TestCase):
+
+  @combinations.generate(all_strategy_combinations())
+  def test_calling_model_on_same_dataset(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      # Call fit with validation data
+      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                validation_data=dataset, validation_steps=2)
+      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                validation_data=dataset, validation_steps=2)
+      model.predict(get_predict_dataset(distribution), steps=2)
+
+  @combinations.generate(all_strategy_combinations())
+  def test_model_interleaved_eval_same_as_direct_eval(self, distribution):
+    with self.cached_session():
+      user_controlled_model = get_model()
+      user_controlled_model.compile(
+          gradient_descent.GradientDescentOptimizer(0.001),
+          loss='mse',
+          metrics=['mae', keras.metrics.CategoricalAccuracy()],
+          distribute=distribution)
+
+      interleaved_model = get_model()
+      interleaved_model.set_weights(user_controlled_model.get_weights())
+      interleaved_model.compile(
+          gradient_descent.GradientDescentOptimizer(0.001),
+          loss='mse',
+          metrics=['mae', keras.metrics.CategoricalAccuracy()],
+          distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      # Call fit with validation interleaved
+      interleaved_output = interleaved_model.fit(
+          dataset, epochs=2, steps_per_epoch=2, verbose=1,
+          validation_data=dataset, validation_steps=2, shuffle=False)
+
+      # Manually control the validation running after each epoch.
+      user_controlled_output = []
+      for _ in range(2):
+        user_controlled_model.fit(
+            dataset, epochs=1, steps_per_epoch=2, verbose=1, shuffle=False)
+        user_controlled_output.append(
+            user_controlled_model.evaluate(dataset, steps=2))
+
+      self.assertEqual(interleaved_output.history['val_loss'],
+                       [x[0] for x in user_controlled_output])
+      self.assertEqual(interleaved_output.history['val_mean_absolute_error'],
+                       [x[1] for x in user_controlled_output])
+      self.assertEqual(interleaved_output.history['val_categorical_accuracy'],
+                       [x[2] for x in user_controlled_output])
+
+  # TODO(priyag): Enable this test for TPU. Currently tuples/dict don't work
+  # as clone_model's input_tensors argument only seems to accept list and not
+  # tuples or dict.
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  def test_fit_with_tuple_and_dict_dataset_inputs(self, distribution):
+    with self.cached_session():
+      model = multi_input_output_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+
+      input_a_np = np.random.random((10, 3))
+      input_b_np = np.random.random((10, 5))
+      output_d_np = np.random.random((10, 7))
+      output_e_np = np.random.random((10, 7))
+
+      # Test with tuples
+      dataset_tuple = dataset_ops.Dataset.from_tensor_slices((
+          (input_a_np, input_b_np), (output_d_np, output_e_np)))
+      dataset_tuple = dataset_tuple.repeat(100)
+      dataset_tuple = dataset_tuple.batch(10)
+
+      model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1)
+
+      # Test with dict
+      dataset_dict = dataset_ops.Dataset.from_tensor_slices((
+          {'input_a': input_a_np, 'input_b': input_b_np},
+          (output_d_np, output_e_np)))
+      dataset_dict = dataset_dict.repeat(100)
+      dataset_dict = dataset_dict.batch(10)
+
+      model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)
+
+  @combinations.generate(all_strategy_combinations())
+  def test_fit_eval_and_predict_methods_on_dataset(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
+      model.evaluate(dataset, steps=2, verbose=1)
+      model.predict(get_predict_dataset(distribution), steps=2)
+
+  @combinations.generate(strategy_and_optimizer_combinations())
+  def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer):
+    with self.cached_session():
+      model = get_model()
+
+      loss = 'mse'
+      model.compile(optimizer(), loss, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
+      model.evaluate(dataset, steps=2, verbose=1)
+      model.predict(get_predict_dataset(distribution), steps=2)
+
+  @combinations.generate(strategy_minus_tpu_combinations())
+  def test_dataset_with_sample_weights(self, distribution):
+    model = get_model()
+    optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+    loss = 'mse'
+    model.compile(optimizer, loss, distribute=distribution)
+
+    inputs = np.zeros((10, 3), np.float32)
+    targets = np.zeros((10, 4), np.float32)
+    sample_weights = np.ones((10), np.float32)
+    dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets,
+                                                      sample_weights))
+    dataset = dataset.repeat()
+    dataset = dataset.batch(10)
+
+    model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
+    model.evaluate(dataset, steps=2, verbose=1)
+    model.predict(dataset, steps=2)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  # TODO(b/120943676, b/120957836): Re-enable once the validation code is
+  # restored.
+  def DISABLED_test_dataset_wrong_input_shape(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      # Wrong input shape
+      inputs = np.zeros((10, 5), dtype=np.float32)
+      targets = np.zeros((10, 4), dtype=np.float32)
+      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+      dataset = dataset.repeat(100)
+      dataset = dataset.batch(10)
+
+      with self.assertRaisesRegexp(ValueError,
+                                   'expected input to have shape'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
+
+  @combinations.generate(combinations.combine(
+      distribution=[combinations.mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  # TODO(b/120943676, b/120957836): Re-enable once the validation code is
+  # restored.
+  def DISABLED_test_dataset_no_batch_input_validation(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      # User forgets to batch the dataset
+      inputs = np.zeros((10, 3), dtype=np.float32)
+      targets = np.zeros((10, 4), dtype=np.float32)
+      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+      dataset = dataset.repeat(100)
+
+      with self.assertRaisesRegexp(ValueError, 'expected input to have shape'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
+
+  @combinations.generate(combinations.combine(
+      distribution=[combinations.tpu_strategy_one_step],
+      mode=['graph']))
+  def test_dataset_input_shape_fully_defined(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+      # Input shapes are not fully known. Batch dimension is unknown as we are
+      # not using the drop_remainder argument.
+      dataset = dataset.repeat(100).batch(10)
+
+      with self.assertRaisesRegexp(ValueError, 'requires fully defined shapes'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.mirrored_strategy_with_two_gpus,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_two_gpus],
+      mode=['graph', 'eager']))
+  def test_learning_phase_value(self, distribution):
+    # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare
+    # meaningful values. Currently we don't pass the learning phase if the
+    # Lambda layer uses the learning phase.
+    with self.cached_session():
+      x = keras.layers.Input(shape=(1,), name='input')
+      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
+      z = keras.layers.Dropout(0.9999)(y)
+      model = keras.Model(x, z)
+      initial_weights = model.get_weights()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.005)
+      loss = 'mse'
+      metrics = ['acc']
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+
+      batch_size = 8
+      if isinstance(distribution, mirrored_strategy.CoreMirroredStrategy):
+        # CoreMirroredStrategy uses global batch size.
+        batch_size = 8 * distribution.num_replicas_in_sync
+
+      inputs = np.ones((10, 1), dtype=np.float32)
+      targets = np.ones((10, 1), dtype=np.float32)
+      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+      dataset = dataset.repeat().batch(batch_size)
+      hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1)
+      self.assertAlmostEqual(hist.history['acc'][0], 0, 0)
+
+      model.set_weights(initial_weights)
+      # TODO(psv/anjalisridhar): Enable these lines after we fix b/117431185.
+      # evaluate_output = model.evaluate(dataset, steps=20)
+      # self.assertAlmostEqual(evaluate_output[1], 1, 0)
+
+      inputs = np.ones((10, 1), dtype=np.float32)
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
+
+      predict_dataset = predict_dataset.repeat().batch(batch_size)
+      output = model.predict(predict_dataset, steps=10)
+      # `predict` runs for 10 steps
+      ref_output = np.ones((160, 1), dtype=np.float32)
+      self.assertArrayNear(output, ref_output, 1e-1)
+
+  @combinations.generate(strategy_minus_tpu_combinations())
+  def testOptimizerWithCallbacks(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent_keras.SGD(0.01)
+      loss = 'mse'
+      model.compile(optimizer, loss, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      def schedule(_):
+        return 0.001
+
+      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
+      grouped_models = distribution.unwrap(model._distributed_model)
+      with distribution.scope():
+        for m in grouped_models:
+          self.assertAllClose(0.001, keras.backend.get_value(
+              m.optimizer.lr), atol=1e-05, rtol=1e-05)
+
+
+class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  def test_validating_dataset_input_tensors_with_shape_mismatch(self,
+                                                                distribution):
+    with self.cached_session():
+      a = constant_op.constant([1, 2], shape=(1, 2))
+      b = constant_op.constant([[1, 2], [1, 2]], shape=(2, 2))
+      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
+      x = values.DistributedValues(device_map, (a, b))
+      y = values.DistributedValues(device_map, (a, a))
+      with distribution.scope():
+        # Removed device and input tensor shape details from the error message
+        # since the order of the device and the corresponding input tensor shape
+        # is not deterministic over different runs.
+        with self.assertRaisesRegexp(ValueError,
+                                     'Input tensor shapes do not match for '
+                                     'distributed tensor inputs '
+                                     'DistributedValues:.+'):
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              distribution, x, y)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  def test_validating_dataset_input_tensors_with_dtype_mismatch(self,
+                                                                distribution):
+    with self.cached_session():
+      a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32)
+      b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64)
+      device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
+      x = values.DistributedValues(device_map, (a, b))
+      y = values.DistributedValues(device_map, (a, a))
+      with distribution.scope():
+        # Removed device and input tensor dtype details from the error message
+        # since the order of the device and the corresponding input tensor dtype
+        # is not deterministic over different runs.
+        with self.assertRaisesRegexp(ValueError,
+                                     'Input tensor dtypes do not match for '
+                                     'distributed tensor inputs '
+                                     'DistributedValues:.+'):
+          distributed_training_utils.validate_distributed_dataset_inputs(
+              distribution, x, y)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  def test_unsupported_features(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae']
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      # Test with validation split
+      with self.assertRaisesRegexp(
+          ValueError, '`validation_split` argument is not '
+                      'supported when input `x` is a dataset or a '
+                      'dataset iterator.+'):
+        model.fit(dataset,
+                  epochs=1, steps_per_epoch=2, verbose=0,
+                  validation_split=0.5, validation_steps=2)
+
+      # Test with sample weight.
+      sample_weight = np.random.random((10,))
+      with self.assertRaisesRegexp(
+          ValueError, '`sample_weight` argument is not supported when input '
+                      '`x` is a dataset or a dataset iterator.'):
+        model.fit(
+            dataset,
+            epochs=1,
+            steps_per_epoch=2,
+            verbose=0,
+            sample_weight=sample_weight)
+
+      # Test with not specifying the `steps` argument.
+      with self.assertRaisesRegexp(
+          ValueError, 'you should specify the `steps_per_epoch` argument'):
+        model.fit(dataset, epochs=1, verbose=0)
+      with self.assertRaisesRegexp(ValueError,
+                                   'you should specify the `steps` argument'):
+        model.evaluate(dataset, verbose=0)
+
+      with self.assertRaisesRegexp(ValueError,
+                                   'you should specify the `steps` argument'):
+        model.predict(dataset, verbose=0)
+
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  def test_calling_with_unsupported_predefined_callbacks(self, distribution):
+    with self.cached_session():
+      model = get_model()
+
+      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+      loss = 'mse'
+      metrics = ['mae']
+      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+
+      dataset = get_dataset(distribution)
+
+      def schedule(_):
+        return 0.001
+      with self.assertRaisesRegexp(ValueError,
+                                   'You must specify a Keras Optimizer V2 when '
+                                   'using'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
+
+      with self.assertRaisesRegexp(ValueError,
+                                   'You must specify a Keras Optimizer V2 when '
+                                   'using'):
+        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
+                  callbacks=[keras.callbacks.ReduceLROnPlateau()])
+
+
+class TestDistributionStrategyWithLossMasking(test.TestCase,
+                                              parameterized.TestCase):
+
+  # TODO(priyag): Enable all strategies for this test. Currently it does not
+  # work for TPU due to some invalid datatype.
+  @combinations.generate(combinations.combine(
+      distribution=[
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
+      mode=['graph', 'eager']))
+  def test_masking(self, distribution):
+    with self.cached_session():
+      np.random.seed(1337)
+      x = np.array([[[1], [1]], [[0], [0]]])
+      model = keras.models.Sequential()
+      model.add(keras.layers.Masking(mask_value=0, input_shape=(2, 1)))
+      model.add(
+          keras.layers.TimeDistributed(
+              keras.layers.Dense(1, kernel_initializer='one')))
+      model.compile(loss='mse',
+                    optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                    distribute=distribution)
+      y = np.array([[[1], [1]], [[1], [1]]])
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
+      dataset = dataset.repeat(100)
+      dataset = dataset.batch(10)
+      hist = model.fit(x=dataset, epochs=1, steps_per_epoch=2)
+      self.assertEqual(hist.history['loss'][0], 0)
+
+
+class TestDistributionStrategyWithNormalizationLayer(
+    test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(all_strategy_combinations())
+  def test_batchnorm_correctness(self, distribution):
+    with self.cached_session():
+      model = keras.models.Sequential()
+      norm = keras.layers.BatchNormalization(input_shape=(10,), momentum=0.8)
+      model.add(norm)
+      model.compile(loss='mse',
+                    optimizer=gradient_descent.GradientDescentOptimizer(0.01),
+                    distribute=distribution)
+
+      # centered on 5.0, variance 10.0
+      x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
+      x = x.astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, x))
+      dataset = dataset.repeat(100)
+      dataset = batch_wrapper(dataset, 32, distribution)
+
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(x)
+      predict_dataset = predict_dataset.repeat(100)
+      predict_dataset = batch_wrapper(predict_dataset, 32, distribution)
+
+      model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10)
+      out = model.predict(predict_dataset, steps=2)
+      out -= keras.backend.eval(norm.beta)
+      out /= keras.backend.eval(norm.gamma)
+      np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
+      np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
+
+
+class TestDistributionStrategyCorrectness(test.TestCase,
+                                          parameterized.TestCase):
+
+  @combinations.generate(all_strategy_combinations())
+  def test_metric_correctness(self, distribution):
+    with self.cached_session():
+      keras.backend.set_image_data_format('channels_last')
+      num_samples = 10000
+
+      x_train = np.random.randint(0, 2, num_samples)
+      x_train = np.reshape(x_train, (num_samples, 1))
+      y_train = x_train
+      x_train = x_train.astype('float32')
+      y_train = y_train.astype('float32')
+
+      # Create identity model.
+      model = keras.Sequential()
+      model.add(
+          keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones'))
+      model.compile(
+          loss=keras.losses.mean_squared_error,
+          optimizer=gradient_descent.GradientDescentOptimizer(0.5),
+          metrics=[keras.metrics.BinaryAccuracy()],
+          distribute=distribution)
+
+      batch_size = 64
+      if not distributed_training_utils.global_batch_size_supported(
+          distribution):
+        batch_size //= distribution.num_replicas_in_sync
+      train_dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
+      train_dataset = batch_wrapper(train_dataset, batch_size, distribution)
+
+      history = model.fit(x=train_dataset, epochs=2, steps_per_epoch=10)
+      self.assertEqual(history.history['binary_accuracy'], [1.0, 1.0])
+
+  @combinations.generate(all_strategy_combinations())
+  def test_eval_metrics_correctness(self, distribution):
+    with self.cached_session():
+      model = keras.Sequential()
+      model.add(
+          keras.layers.Dense(
+              3, activation='relu', input_dim=4, kernel_initializer='ones'))
+      model.add(
+          keras.layers.Dense(
+              1, activation='sigmoid', kernel_initializer='ones'))
+      model.compile(
+          loss='mae',
+          metrics=['accuracy', keras.metrics.BinaryAccuracy()],
+          optimizer=gradient_descent.GradientDescentOptimizer(0.001),
+          distribute=distribution)
+
+      # verify correctness of stateful and stateless metrics.
+      x = np.ones((100, 4)).astype('float32')
+      y = np.ones((100, 1)).astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
+      dataset = batch_wrapper(dataset, 4, distribution)
+      outs = model.evaluate(dataset, steps=10)
+      self.assertEqual(outs[1], 1.)
+      self.assertEqual(outs[2], 1.)
+
+      y = np.zeros((100, 1)).astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
+      dataset = batch_wrapper(dataset, 4, distribution)
+      outs = model.evaluate(dataset, steps=10)
+      self.assertEqual(outs[1], 0.)
+      self.assertEqual(outs[2], 0.)
+
+  @combinations.generate(strategy_and_input_combinations())
+  def test_correctness(self, distribution, use_numpy, use_validation_data):
+    # TODO(b/121224478): This test is flaky with default strategy. Remove this
+    # once the issue is fixed.
+    if isinstance(distribution, distribute_lib._DefaultDistributionStrategy):  # pylint: disable=protected-access
+      self.skipTest('Disable the test for default strategy.')
+
+    with self.cached_session():
+      default_tolerance = 1e-5
+      tol_table = {}
+
+      if isinstance(distribution, (mirrored_strategy.MirroredStrategy,
+                                   mirrored_strategy.CoreMirroredStrategy)):
+        # TODO(b/119257215): Weights are not exactly the same, so use larger
+        # tolerance for now. Predict should be related to weights.
+        tol_table = {
+            'weights_1': 1e-4,
+            'weights_2': 1e-4,
+            'predict_result_1': 1e-4,
+        }
+
+      keras.backend.set_image_data_format('channels_last')
+      np.random.seed(_RANDOM_SEED)
+      random_seed.set_random_seed(_RANDOM_SEED)
+
+      # Train, eval, and predict datasets are created with the same input numpy
+      # arrays.
+      # TODO(xiejw): Change this back to 10000, once we support final partial
+      # batch.
+      num_samples = 9984
+      x_train = np.random.rand(num_samples, 1)
+      y_train = 3 * x_train
+      x_train = x_train.astype('float32')
+      y_train = y_train.astype('float32')
+      x_predict = [[1.], [2.], [3.], [4.]]
+
+      # The model is built once and the initial weights are saved.
+      # This is used to initialize the model for both the distribution and
+      # non-distribution run. In addition, we add few non-linear layers to make
+      # it non-trivial.
+      def _create_model():
+        model = keras.Sequential()
+        model.add(keras.layers.Dense(10, activation='relu', input_shape=(1,)))
+        model.add(keras.layers.Dense(10, activation='relu'))
+        model.add(keras.layers.Dense(10, activation='relu'))
+        model.add(keras.layers.Dense(1))
+        return model
+
+      model = _create_model()
+      initial_weights = model.get_weights()
+      del model  # avoid accident usage.
+
+      def fit_eval_and_predict(with_distribution=None):
+        model = _create_model()
+        # We have initialized the model to the same weight for the distribution
+        # and non-distribution run.
+        model.set_weights(initial_weights)
+        model.compile(
+            loss=keras.losses.mean_squared_error,
+            optimizer=gradient_descent_keras.SGD(0.5),
+            metrics=['mse'],
+            distribute=with_distribution)
+
+        training_inputs, eval_inputs, predict_inputs = (
+            get_correctness_test_inputs(use_numpy, use_validation_data,
+                                        with_distribution,
+                                        x_train, y_train, x_predict))
+
+        result = {}
+        result['training_history_1'] = model.fit(**training_inputs).history
+
+        if eval_inputs is not None:
+          result['eval_result_1'] = model.evaluate(**eval_inputs)
+
+        result['weights_1'] = model.get_weights()
+        result['predict_result_1'] = model.predict(**predict_inputs)
+
+        # Train and eval again to mimic user's flow.
+
+        result['training_history_2'] = model.fit(**training_inputs).history
+
+        if eval_inputs is not None:
+          result['eval_result_2'] = model.evaluate(**eval_inputs)
+
+        result['weights_2'] = model.get_weights()
+
+        return result
+
+      results_with_ds = fit_eval_and_predict(with_distribution=distribution)
+      results_without_ds = fit_eval_and_predict(with_distribution=None)
+
+      # Verify that the weights, training history, eval results, predict outputs
+      # are the same within some limits of tolerance.
+      for key in results_with_ds:
+        if (key.startswith('training_history') and
+            isinstance(distribution, tpu_strategy.TPUStrategy) and
+            distribution.extended.steps_per_run > 1):
+          # TODO(b/119894254): Enable this test for all cases once the
+          # underlying bug is fixed.
+          continue
+
+        tolerance = tol_table.get(key, default_tolerance)
+
+        self.assertAllClose(
+            results_with_ds[key],
+            results_without_ds[key],
+            atol=tolerance,
+            rtol=tolerance,
+            msg='Fail to assert {}.'.format(key))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py b/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
index c7f6ba9bed..cce93b3c10 100644
--- a/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
+++ b/tensorflow/contrib/distribute/python/keras_optimizer_v2_test.py
@@ -148,11 +148,12 @@ class MirroredStrategyOptimizerV2Test(test.TestCase, parameterized.TestCase):
   def testOptimizerWithKerasModelAndNumpyArrays(self, distribution):
 
     with self.cached_session():
-      model = get_model()
-      optimizer = gradient_descent.SGD(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent.SGD(0.001)
+        loss = 'mse'
+        metrics = ['mae']
+        model.compile(optimizer, loss, metrics=metrics)
 
       inputs = np.zeros((64, 3), dtype=np.float32)
       targets = np.zeros((64, 4), dtype=np.float32)
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index b91c27e184..154d3fa8a3 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -49,6 +49,9 @@ _TRAIN_SIZE = 200
 _INPUT_SIZE = (10,)
 _NUM_CLASS = 2
 
+# Note: Please make sure the tests in this file are also covered in
+# keras_backward_compat_test for features that are supported with both APIs.
+
 
 # TODO(anjalisridhar): Add a decorator that will allow us to run these tests as
 # part of the tf.keras unit tests suite.
@@ -654,12 +657,12 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
   @combinations.generate(strategy_for_numpy_input_combinations())
   def test_calling_model_with_numpy_arrays(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae']
+        model.compile(optimizer, loss, metrics=metrics)
 
       inputs = np.zeros((64, 3), dtype=np.float32)
       targets = np.zeros((64, 4), dtype=np.float32)
@@ -685,11 +688,12 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
   @combinations.generate(strategy_for_numpy_input_combinations())
   def test_calling_model_with_nested_numpy_arrays(self, distribution):
     with self.cached_session():
-      model = multi_input_output_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
-      loss = 'mse'
-      model.compile(optimizer, loss, distribute=distribution)
+      with distribution.scope():
+        model = multi_input_output_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(
+            learning_rate=0.001)
+        loss = 'mse'
+        model.compile(optimizer, loss)
 
       input_a_np = np.asarray(np.random.random((64, 3)), dtype=np.float32)
       input_b_np = np.asarray(np.random.random((64, 5)), dtype=np.float32)
@@ -719,26 +723,29 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
   @combinations.generate(combinations.combine(
       distribution=strategies_minus_tpu, mode=['graph']))
   def test_numpy_with_sample_weights(self, distribution):
-    model = get_model()
-    optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
-    loss = 'mse'
-    model.compile(optimizer, loss, distribute=distribution)
+    with self.cached_session():
+      with distribution.scope():
+        model = get_model()
+        optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+        loss = 'mse'
+        model.compile(optimizer, loss)
 
-    inputs = np.zeros((20, 3), np.float32)
-    targets = np.zeros((20, 4), np.float32)
-    sample_weights = np.ones((20), np.float32)
+      inputs = np.zeros((20, 3), np.float32)
+      targets = np.zeros((20, 4), np.float32)
+      sample_weights = np.ones((20), np.float32)
 
-    model.fit(inputs, targets, sample_weight=sample_weights, epochs=1,
-              steps_per_epoch=2, verbose=1)
+      model.fit(inputs, targets, sample_weight=sample_weights, epochs=1,
+                steps_per_epoch=2, verbose=1)
 
   @combinations.generate(strategy_for_numpy_input_combinations())
   def test_flatten_predict_outputs(self, distribution):
     with self.cached_session():
-      model = multi_input_output_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
-      loss = 'mse'
-      model.compile(optimizer, loss, distribute=distribution)
+      with distribution.scope():
+        model = multi_input_output_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(
+            learning_rate=0.001)
+        loss = 'mse'
+        model.compile(optimizer, loss)
 
       # We take 6 input samples with each input having a dimension of 3 or 5.
       input_a_np = np.asarray(np.random.random((6, 3)), dtype=np.float32)
@@ -762,12 +769,12 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
   @combinations.generate(all_strategy_combinations())
   def test_calling_model_on_same_dataset(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+        model.compile(optimizer, loss, metrics=metrics)
 
       dataset = get_dataset(distribution)
 
@@ -781,20 +788,19 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
   @combinations.generate(all_strategy_combinations())
   def test_model_interleaved_eval_same_as_direct_eval(self, distribution):
     with self.cached_session():
-      user_controlled_model = get_model()
-      user_controlled_model.compile(
-          gradient_descent.GradientDescentOptimizer(0.001),
-          loss='mse',
-          metrics=['mae', keras.metrics.CategoricalAccuracy()],
-          distribute=distribution)
-
-      interleaved_model = get_model()
-      interleaved_model.set_weights(user_controlled_model.get_weights())
-      interleaved_model.compile(
-          gradient_descent.GradientDescentOptimizer(0.001),
-          loss='mse',
-          metrics=['mae', keras.metrics.CategoricalAccuracy()],
-          distribute=distribution)
+      with distribution.scope():
+        user_controlled_model = get_model()
+        user_controlled_model.compile(
+            gradient_descent.GradientDescentOptimizer(0.001),
+            loss='mse',
+            metrics=['mae', keras.metrics.CategoricalAccuracy()])
+
+        interleaved_model = get_model()
+        interleaved_model.set_weights(user_controlled_model.get_weights())
+        interleaved_model.compile(
+            gradient_descent.GradientDescentOptimizer(0.001),
+            loss='mse',
+            metrics=['mae', keras.metrics.CategoricalAccuracy()])
 
       dataset = get_dataset(distribution)
 
@@ -829,12 +835,13 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
       mode=['graph', 'eager']))
   def test_fit_with_tuple_and_dict_dataset_inputs(self, distribution):
     with self.cached_session():
-      model = multi_input_output_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001)
-      loss = 'mse'
-      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+      with distribution.scope():
+        model = multi_input_output_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(
+            learning_rate=0.001)
+        loss = 'mse'
+        metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+        model.compile(optimizer, loss, metrics=metrics)
 
       input_a_np = np.random.random((10, 3))
       input_b_np = np.random.random((10, 5))
@@ -861,12 +868,12 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
   @combinations.generate(all_strategy_combinations())
   def test_fit_eval_and_predict_methods_on_dataset(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae', keras.metrics.CategoricalAccuracy()]
+        model.compile(optimizer, loss, metrics=metrics)
 
       dataset = get_dataset(distribution)
 
@@ -877,10 +884,10 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
   @combinations.generate(strategy_and_optimizer_combinations())
   def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer):
     with self.cached_session():
-      model = get_model()
-
-      loss = 'mse'
-      model.compile(optimizer(), loss, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        loss = 'mse'
+        model.compile(optimizer(), loss)
 
       dataset = get_dataset(distribution)
 
@@ -890,22 +897,24 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
 
   @combinations.generate(strategy_minus_tpu_combinations())
   def test_dataset_with_sample_weights(self, distribution):
-    model = get_model()
-    optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
-    loss = 'mse'
-    model.compile(optimizer, loss, distribute=distribution)
-
-    inputs = np.zeros((10, 3), np.float32)
-    targets = np.zeros((10, 4), np.float32)
-    sample_weights = np.ones((10), np.float32)
-    dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets,
-                                                      sample_weights))
-    dataset = dataset.repeat()
-    dataset = dataset.batch(10)
-
-    model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
-    model.evaluate(dataset, steps=2, verbose=1)
-    model.predict(dataset, steps=2)
+    with self.cached_session():
+      with distribution.scope():
+        model = get_model()
+        optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+        loss = 'mse'
+        model.compile(optimizer, loss)
+
+      inputs = np.zeros((10, 3), np.float32)
+      targets = np.zeros((10, 4), np.float32)
+      sample_weights = np.ones((10), np.float32)
+      dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets,
+                                                        sample_weights))
+      dataset = dataset.repeat()
+      dataset = dataset.batch(10)
+
+      model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
+      model.evaluate(dataset, steps=2, verbose=1)
+      model.predict(dataset, steps=2)
 
   @combinations.generate(combinations.combine(
       distribution=[
@@ -916,11 +925,11 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
   # restored.
   def DISABLED_test_dataset_wrong_input_shape(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
-      loss = 'mse'
-      model.compile(optimizer, loss, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+        loss = 'mse'
+        model.compile(optimizer, loss)
 
       # Wrong input shape
       inputs = np.zeros((10, 5), dtype=np.float32)
@@ -940,11 +949,11 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
   # restored.
   def DISABLED_test_dataset_no_batch_input_validation(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
-      loss = 'mse'
-      model.compile(optimizer, loss, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+        loss = 'mse'
+        model.compile(optimizer, loss)
 
       # User forgets to batch the dataset
       inputs = np.zeros((10, 3), dtype=np.float32)
@@ -960,11 +969,11 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
       mode=['graph']))
   def test_dataset_input_shape_fully_defined(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
-      loss = 'mse'
-      model.compile(optimizer, loss, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001)
+        loss = 'mse'
+        model.compile(optimizer, loss)
 
       dataset = get_dataset(distribution)
       # Input shapes are not fully known. Batch dimension is unknown as we are
@@ -986,16 +995,17 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
     # meaningful values. Currently we don't pass the learning phase if the
     # Lambda layer uses the learning phase.
     with self.cached_session():
-      x = keras.layers.Input(shape=(1,), name='input')
-      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
-      z = keras.layers.Dropout(0.9999)(y)
-      model = keras.Model(x, z)
-      initial_weights = model.get_weights()
+      with distribution.scope():
+        x = keras.layers.Input(shape=(1,), name='input')
+        y = keras.layers.Dense(1, kernel_initializer='ones')(x)
+        z = keras.layers.Dropout(0.9999)(y)
+        model = keras.Model(x, z)
+        initial_weights = model.get_weights()
 
-      optimizer = gradient_descent.GradientDescentOptimizer(0.005)
-      loss = 'mse'
-      metrics = ['acc']
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+        optimizer = gradient_descent.GradientDescentOptimizer(0.005)
+        loss = 'mse'
+        metrics = ['acc']
+        model.compile(optimizer, loss, metrics=metrics)
 
       batch_size = 8
       if isinstance(distribution, mirrored_strategy.CoreMirroredStrategy):
@@ -1009,7 +1019,8 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
       hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1)
       self.assertAlmostEqual(hist.history['acc'][0], 0, 0)
 
-      model.set_weights(initial_weights)
+      with distribution.scope():
+        model.set_weights(initial_weights)
       # TODO(psv/anjalisridhar): Enable these lines after we fix b/117431185.
       # evaluate_output = model.evaluate(dataset, steps=20)
       # self.assertAlmostEqual(evaluate_output[1], 1, 0)
@@ -1026,11 +1037,14 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
   @combinations.generate(strategy_minus_tpu_combinations())
   def testOptimizerWithCallbacks(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent_keras.SGD(0.01)
-      loss = 'mse'
-      model.compile(optimizer, loss, distribute=distribution)
+      # TODO(b/120946189): Investigate why default strategy + eager fails.
+      if '_Default' in distribution.__class__.__name__:
+        self.skipTest('Disable the test for default strategy.')
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent_keras.SGD(0.01)
+        loss = 'mse'
+        model.compile(optimizer, loss)
 
       dataset = get_dataset(distribution)
 
@@ -1039,11 +1053,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
 
       model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                 callbacks=[keras.callbacks.LearningRateScheduler(schedule)])
-      grouped_models = distribution.unwrap(model._grouped_model)
-      with distribution.scope():
-        for m in grouped_models:
-          self.assertAllClose(0.001, keras.backend.get_value(
-              m.optimizer.lr), atol=1e-05, rtol=1e-05)
+      self.assertAllClose(0.001, keras.backend.get_value(model.optimizer.lr))
 
 
 class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
@@ -1061,14 +1071,14 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
       device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
       x = values.DistributedValues(device_map, (a, b))
       y = values.DistributedValues(device_map, (a, a))
-      with distribution.scope():
-        # Removed device and input tensor shape details from the error message
-        # since the order of the device and the corresponding input tensor shape
-        # is not deterministic over different runs.
-        with self.assertRaisesRegexp(ValueError,
-                                     'Input tensor shapes do not match for '
-                                     'distributed tensor inputs '
-                                     'DistributedValues:.+'):
+      # Removed device and input tensor shape details from the error message
+      # since the order of the device and the corresponding input tensor shape
+      # is not deterministic over different runs.
+      with self.assertRaisesRegexp(ValueError,
+                                   'Input tensor shapes do not match for '
+                                   'distributed tensor inputs '
+                                   'DistributedValues:.+'):
+        with distribution.scope():
           distributed_training_utils.validate_distributed_dataset_inputs(
               distribution, x, y)
 
@@ -1085,14 +1095,14 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
       device_map = values.ReplicaDeviceMap(('/device:CPU:0', '/device:GPU:0'))
       x = values.DistributedValues(device_map, (a, b))
       y = values.DistributedValues(device_map, (a, a))
-      with distribution.scope():
-        # Removed device and input tensor dtype details from the error message
-        # since the order of the device and the corresponding input tensor dtype
-        # is not deterministic over different runs.
-        with self.assertRaisesRegexp(ValueError,
-                                     'Input tensor dtypes do not match for '
-                                     'distributed tensor inputs '
-                                     'DistributedValues:.+'):
+      # Removed device and input tensor dtype details from the error message
+      # since the order of the device and the corresponding input tensor dtype
+      # is not deterministic over different runs.
+      with self.assertRaisesRegexp(ValueError,
+                                   'Input tensor dtypes do not match for '
+                                   'distributed tensor inputs '
+                                   'DistributedValues:.+'):
+        with distribution.scope():
           distributed_training_utils.validate_distributed_dataset_inputs(
               distribution, x, y)
 
@@ -1103,12 +1113,12 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
       mode=['graph', 'eager']))
   def test_unsupported_features(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae']
+        model.compile(optimizer, loss, metrics=metrics)
 
       dataset = get_dataset(distribution)
 
@@ -1152,12 +1162,12 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
       mode=['graph', 'eager']))
   def test_calling_with_unsupported_predefined_callbacks(self, distribution):
     with self.cached_session():
-      model = get_model()
-
-      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
-      loss = 'mse'
-      metrics = ['mae']
-      model.compile(optimizer, loss, metrics=metrics, distribute=distribution)
+      with distribution.scope():
+        model = get_model()
+        optimizer = gradient_descent.GradientDescentOptimizer(0.001)
+        loss = 'mse'
+        metrics = ['mae']
+        model.compile(optimizer, loss, metrics=metrics)
 
       dataset = get_dataset(distribution)
 
@@ -1190,14 +1200,14 @@ class TestDistributionStrategyWithLossMasking(test.TestCase,
     with self.cached_session():
       np.random.seed(1337)
       x = np.array([[[1], [1]], [[0], [0]]])
-      model = keras.models.Sequential()
-      model.add(keras.layers.Masking(mask_value=0, input_shape=(2, 1)))
-      model.add(
-          keras.layers.TimeDistributed(
-              keras.layers.Dense(1, kernel_initializer='one')))
-      model.compile(loss='mse',
-                    optimizer=gradient_descent.GradientDescentOptimizer(0.01),
-                    distribute=distribution)
+      with distribution.scope():
+        model = keras.models.Sequential()
+        model.add(keras.layers.Masking(mask_value=0, input_shape=(2, 1)))
+        model.add(
+            keras.layers.TimeDistributed(
+                keras.layers.Dense(1, kernel_initializer='one')))
+        model.compile(loss='mse',
+                      optimizer=gradient_descent.GradientDescentOptimizer(0.01))
       y = np.array([[[1], [1]], [[1], [1]]])
       dataset = dataset_ops.Dataset.from_tensor_slices((x, y))
       dataset = dataset.repeat(100)
@@ -1212,12 +1222,12 @@ class TestDistributionStrategyWithNormalizationLayer(
   @combinations.generate(all_strategy_combinations())
   def test_batchnorm_correctness(self, distribution):
     with self.cached_session():
-      model = keras.models.Sequential()
-      norm = keras.layers.BatchNormalization(input_shape=(10,), momentum=0.8)
-      model.add(norm)
-      model.compile(loss='mse',
-                    optimizer=gradient_descent.GradientDescentOptimizer(0.01),
-                    distribute=distribution)
+      with distribution.scope():
+        model = keras.models.Sequential()
+        norm = keras.layers.BatchNormalization(input_shape=(10,), momentum=0.8)
+        model.add(norm)
+        model.compile(loss='mse',
+                      optimizer=gradient_descent.GradientDescentOptimizer(0.01))
 
       # centered on 5.0, variance 10.0
       x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
@@ -1254,14 +1264,14 @@ class TestDistributionStrategyCorrectness(test.TestCase,
       y_train = y_train.astype('float32')
 
       # Create identity model.
-      model = keras.Sequential()
-      model.add(
-          keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones'))
-      model.compile(
-          loss=keras.losses.mean_squared_error,
-          optimizer=gradient_descent.GradientDescentOptimizer(0.5),
-          metrics=[keras.metrics.BinaryAccuracy()],
-          distribute=distribution)
+      with distribution.scope():
+        model = keras.Sequential()
+        model.add(
+            keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones'))
+        model.compile(
+            loss=keras.losses.mean_squared_error,
+            optimizer=gradient_descent.GradientDescentOptimizer(0.5),
+            metrics=[keras.metrics.BinaryAccuracy()])
 
       batch_size = 64
       if not distributed_training_utils.global_batch_size_supported(
@@ -1276,18 +1286,18 @@ class TestDistributionStrategyCorrectness(test.TestCase,
   @combinations.generate(all_strategy_combinations())
   def test_eval_metrics_correctness(self, distribution):
     with self.cached_session():
-      model = keras.Sequential()
-      model.add(
-          keras.layers.Dense(
-              3, activation='relu', input_dim=4, kernel_initializer='ones'))
-      model.add(
-          keras.layers.Dense(
-              1, activation='sigmoid', kernel_initializer='ones'))
-      model.compile(
-          loss='mae',
-          metrics=['accuracy', keras.metrics.BinaryAccuracy()],
-          optimizer=gradient_descent.GradientDescentOptimizer(0.001),
-          distribute=distribution)
+      with distribution.scope():
+        model = keras.Sequential()
+        model.add(
+            keras.layers.Dense(
+                3, activation='relu', input_dim=4, kernel_initializer='ones'))
+        model.add(
+            keras.layers.Dense(
+                1, activation='sigmoid', kernel_initializer='ones'))
+        model.compile(
+            loss='mae',
+            metrics=['accuracy', keras.metrics.BinaryAccuracy()],
+            optimizer=gradient_descent.GradientDescentOptimizer(0.001))
 
       # verify correctness of stateful and stateless metrics.
       x = np.ones((100, 4)).astype('float32')
@@ -1307,7 +1317,6 @@ class TestDistributionStrategyCorrectness(test.TestCase,
 
   @combinations.generate(strategy_and_input_combinations())
   def test_correctness(self, distribution, use_numpy, use_validation_data):
-
     # TODO(b/121224478): This test is flaky with default strategy. Remove this
     # once the issue is fixed.
     if isinstance(distribution, distribute_lib._DefaultDistributionStrategy):  # pylint: disable=protected-access
@@ -1358,16 +1367,23 @@ class TestDistributionStrategyCorrectness(test.TestCase,
       initial_weights = model.get_weights()
       del model  # avoid accident usage.
 
-      def fit_eval_and_predict(with_distribution=None):
-        model = _create_model()
+      def _build_and_compile_model():
         # We have initialized the model to the same weight for the distribution
         # and non-distribution run.
+        model = _create_model()
         model.set_weights(initial_weights)
         model.compile(
             loss=keras.losses.mean_squared_error,
             optimizer=gradient_descent_keras.SGD(0.5),
-            metrics=['mse'],
-            distribute=with_distribution)
+            metrics=['mse'])
+        return model
+
+      def fit_eval_and_predict(with_distribution=None):
+        if with_distribution:
+          with with_distribution.scope():
+            model = _build_and_compile_model()
+        else:
+          model = _build_and_compile_model()
 
         training_inputs, eval_inputs, predict_inputs = (
             get_correctness_test_inputs(use_numpy, use_validation_data,
diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py
index cf9672f8d8..37fe9af8c4 100644
--- a/tensorflow/contrib/tpu/python/tpu/keras_support.py
+++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py
@@ -1373,6 +1373,10 @@ class KerasTPUModel(models.Model):
     # not hashable.
     self._numpy_to_infeed_manager_list = []
 
+    # Add distribution specific arguments since we don't call the Model init.
+    self._distribution_strategy = None
+    self._compile_distribution = None
+
     self.predict_function = None
     self.test_function = None
     self.train_function = None
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index a2c834f893..55fc9c9e1f 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -843,6 +843,9 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
   def device(self):
     return self._get().device
 
+  def eval(self, session=None):
+    return self.primary.eval(session)
+
   # The arguments to update() are automatically unwrapped so the update()
   # function would normally see regular variables, not MirroredVariables.
   # However, the update function can still operate on wrapped MirroredVariables
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index d591495458..e1706fb310 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -22,6 +22,7 @@ import collections
 import weakref
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.eager import context
@@ -46,6 +47,7 @@ from tensorflow.python.keras.utils.generic_utils import slice_arrays
 from tensorflow.python.keras.utils.losses_utils import squeeze_or_expand_dimensions
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training import distribution_strategy_context
 from tensorflow.python.training import optimizer as tf_optimizer_module
 from tensorflow.python.training.checkpointable import base as checkpointable
 from tensorflow.python.training.mode_keys import ModeKeys
@@ -127,9 +129,24 @@ class Model(Network):
     # initializing _distribution_strategy here since it is possible to call
     # predict on a model without compiling it.
     self._distribution_strategy = None
+    # This flag is used to track if the user is using the deprecated path of
+    # passing distribution strategy to compile rather than creating the model
+    # under distribution strategy scope.
+    self._compile_distribution = False
 
     self.run_eagerly = None
 
+  def get_weights(self):
+    """Retrieves the weights of the model.
+
+    Returns:
+        A flat list of Numpy arrays.
+    """
+    if self._distribution_strategy:
+      with self._distribution_strategy.scope():
+        return super(Model, self).get_weights()
+    return super(Model, self).get_weights()
+
   @checkpointable.no_automatic_dependency_tracking
   def compile(self,
               optimizer,
@@ -182,9 +199,10 @@ class Model(Network):
             can specify them via the `target_tensors` argument. It can be
             a single tensor (for a single-output model), a list of tensors,
             or a dict mapping output names to target tensors.
-        distribute: The DistributionStrategy instance that we want to use to
-            distribute the training of the model.
-        **kwargs: These arguments are passed to `tf.Session.run`.
+        distribute: NOT SUPPORTED IN TF 2.0, please create and compile the
+            model under distribution strategy scope instead of passing it to
+            compile.
+        **kwargs: Any additional arguments.
 
     Raises:
         ValueError: In case of invalid arguments for
@@ -194,9 +212,28 @@ class Model(Network):
     self._run_eagerly = run_eagerly
     optimizer = optimizers.get(optimizer)
 
+    if distribute is not None:
+      if tf2.enabled():
+        raise ValueError(
+            'Distribute argument in compile is not available in TF 2.0 please '
+            'create the model under the distribution strategy scope.')
+      logging.warning('Distribute argument in compile is deprecated please '
+                      'create the model under the distribution strategy scope.')
+      self._distribution_strategy = distribute
+      self._compile_distribution = True
+    else:
+      if distribution_strategy_context.has_distribution_strategy():
+        # When the user builds the model in the DS scope and cross replica
+        # context we want distribution strategy to be set but when building the
+        # replica copies of the models internally we should not be compiling
+        # with distribution strategy and use the default compilation path.
+        if distribution_strategy_context.in_cross_replica_context():
+          self._distribution_strategy = (
+              distribution_strategy_context.get_distribution_strategy())
+
     # Validate that arguments passed by the user to `compile` are supported by
     # DistributionStrategy.
-    if distribute:
+    if self._distribution_strategy:
       if not isinstance(optimizer,
                         (tf_optimizer_module.Optimizer, optimizers.TFOptimizer,
                          optimizer_v2.OptimizerV2)):
@@ -240,9 +277,7 @@ class Model(Network):
     self.target_tensors = target_tensors
 
     # Set DistributionStrategy specific parameters.
-    self._distribution_strategy = distribute
-    # Reset the value of grouped_model
-    self._grouped_model = None
+    self._distributed_model = None
     if self._distribution_strategy is not None:
       distributed_training_utils.configure_and_create_session(
           self._distribution_strategy)
@@ -459,12 +494,8 @@ class Model(Network):
                 loss_fn, losses.Loss) else loss_fn.__name__
             mean_wrapped_loss = metrics_module.MeanMetricWrapper(
                 loss_fn, name=loss_name)
-            result_tensor = training_utils.call_metric_function(
-                mean_wrapped_loss,
-                y_true,
-                y_pred,
-                weights=sample_weight,
-                mask=mask)
+            result_tensor = self._call_metric_fn(mean_wrapped_loss, y_true,
+                                                 y_pred, sample_weight, mask)
             self._compile_stateful_metrics_tensors[self.output_names[i] +
                                                    '_loss'] = result_tensor
             self._compile_stateful_metric_functions.append(mean_wrapped_loss)
@@ -1784,6 +1815,22 @@ class Model(Network):
     self._per_output_metrics = updated_per_output_metrics
     self._per_output_weighted_metrics = updated_per_output_weighted_metrics
 
+  def _call_metric_fn(self, fn, y_true, y_pred, weights, mask):
+    """Helper function to call metric function with distribution strategy."""
+    # TODO(b/120571621): We want to avoid metric reductions here since
+    # since TPUStrategy does not implement replica local variables.
+    # Remove this hack once we support TPUReplicaLocalVariables.
+    is_tpu = distributed_training_utils.is_tpu_strategy(
+        self._distribution_strategy)
+    if ((not is_tpu) and self._distribution_strategy and
+        distribution_strategy_context.in_cross_replica_context()):
+      with self._distribution_strategy.scope():
+        return self._distribution_strategy.extended.call_for_each_replica(
+            training_utils.call_metric_function,
+            (fn, y_true, y_pred, weights, mask))
+    return training_utils.call_metric_function(
+        fn, y_true, y_pred, weights=weights, mask=mask)
+
   def _handle_per_output_metrics(self,
                                  metrics_dict,
                                  y_true,
@@ -1810,8 +1857,8 @@ class Model(Network):
       with K.name_scope(metric_name):
 
         def _call_stateful_fn(fn):
-          return training_utils.call_metric_function(
-              fn, y_true, y_pred, weights=weights, mask=mask)
+          """Create stateful metrics correctly."""
+          return self._call_metric_fn(fn, y_true, y_pred, weights, mask)
 
         def _call_stateless_fn(fn):
           weighted_metric_fn = training_utils.weighted_masked_objective(fn)
diff --git a/tensorflow/python/keras/engine/training_arrays.py b/tensorflow/python/keras/engine/training_arrays.py
index 03033c3334..af67444b01 100644
--- a/tensorflow/python/keras/engine/training_arrays.py
+++ b/tensorflow/python/keras/engine/training_arrays.py
@@ -165,9 +165,11 @@ def model_iteration(model,
       validation_steps: Number of steps to run validation for (only if doing
         validation from data tensors). Ignored with the default value of `None`.
       mode: One of 'train'/'test'/'predict'.
-      validation_in_fit: if true, then this method is invoked from within
-        training iteration (for validation). In this case, do not copy weights
-        when using a tf.distribute.Strategy.
+      validation_in_fit: DEPRECATED: if true, then this method is invoked from
+        within training iteration (for validation). In this case, do not copy
+        weights when using a tf.distribute.Strategy. The input is deprecated as
+        it is not required if the user creates a distributed model under the
+        distribution strategy scope rather than passing it to compile.
       **kwargs: Additional arguments for backwards compatibility.
 
   Returns:
@@ -234,9 +236,9 @@ def model_iteration(model,
     aggregator = training_utils.MetricsAggregator(use_steps,
                                                   num_samples_or_steps)
 
-  if model._distribution_strategy and not validation_in_fit:
+  if model._compile_distribution and not validation_in_fit:
     training_distributed._copy_weights_to_distributed_model(
-        model, model._grouped_model)
+        model, model._distributed_model)
 
   callbacks.model.stop_training = False
   callbacks._call_begin_hook(mode)
@@ -375,11 +377,10 @@ def model_iteration(model,
   callbacks._call_end_hook(mode)
 
   if model._distribution_strategy:
-    # TODO(priyag, psv): Copy back metrics to the original model as well?
-    if not validation_in_fit:
+    if model._compile_distribution and not validation_in_fit:
+      # TODO(priyag, psv): Copy back metrics to the original model as well?
       training_distributed._copy_weights_to_original_model(
-          model, model._grouped_model, mode)
-
+          model, model._distributed_model, mode)
     scope.__exit__(None, None, None)
 
   if mode == 'train':
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index ffb0266911..2affc4b0d6 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -41,7 +41,6 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
 
 
-# TODO(sourabhbajaj): Check if we can merge the test and prediction graphs
 class _Mode(enum.Enum):
   TRAIN = 'train'
   TEST = 'test'
@@ -84,8 +83,6 @@ def experimental_fit_loop(model,
   """
   current_strategy = model._distribution_strategy
 
-  K.get_session().run(current_strategy.initialize())
-
   def _per_device_fit_function(model):
     model._make_fit_function()
     return (model._fit_function.inputs, model._fit_function.outputs,
@@ -97,21 +94,18 @@ def experimental_fit_loop(model,
 
   def step_fn(ctx, inputs):
     """Clones the model and calls make_fit_function."""
-    # TODO(priyag, sourabhbajaj): The model gets cloned every time
-    # fit/test/predict is called. We should look into caching this keyed on
-    # input shapes.
     inputs, targets = inputs
-    clone_model_on_replicas(
-        model,
-        current_strategy,
-        make_callback_model=True,
-        inputs=inputs,
-        targets=targets,
-        mode=_Mode.TRAIN)
+    if model._compile_distribution:
+      clone_model_on_replicas(model, current_strategy,
+                              make_callback_model=True, inputs=inputs,
+                              targets=targets, mode=_Mode.TRAIN)
+    else:
+      _build_distributed_network(model, current_strategy, inputs,
+                                 targets, mode=_Mode.TRAIN)
 
     (grouped_inputs, grouped_outputs, grouped_updates,
      grouped_session_args) = current_strategy.extended.call_for_each_replica(
-         _per_device_fit_function, args=(model._grouped_model_train,))
+         _per_device_fit_function, args=(model._distributed_model_train,))
     (all_inputs, all_outputs, all_updates,
      all_session_args) = distributed_training_utils.unwrap_values(
          current_strategy, grouped_inputs, grouped_outputs,
@@ -162,9 +156,9 @@ def experimental_fit_loop(model,
 
   do_validation = bool(validation_steps)
 
-  # Copy the weights from the original model to each of the replicated models.
-  with current_strategy.scope():
-    _copy_weights_to_distributed_model(model, model._grouped_model_train)
+  if model._compile_distribution:
+    with current_strategy.scope():
+      _copy_weights_to_distributed_model(model, model._distributed_model_train)
 
   callbacks = cbks.configure_callbacks(
       callbacks,
@@ -184,7 +178,7 @@ def experimental_fit_loop(model,
   callbacks.on_train_begin()
   for epoch in range(initial_epoch, epochs):
     with current_strategy.scope():
-      _reset_metrics(model, model._grouped_model_train)
+      _reset_metrics(model, model._distributed_model_train)
     callbacks.on_epoch_begin(epoch)
     epoch_logs = {}
     step_index = 0
@@ -214,18 +208,18 @@ def experimental_fit_loop(model,
     if do_validation:
       logging.info('Running validation at fit epoch: %s', epoch)
 
-      # Since we create a new clone from the original model we need to copy
-      # the weights back to the original model before we can run validation.
-      with current_strategy.scope():
-        _copy_weights_to_original_model(model, model._grouped_model_train,
-                                        'train')
+      if model._compile_distribution:
+        # Since we create a new clone from the original model we need to copy
+        # the weights back to the original model before we can run validation.
+        with current_strategy.scope():
+          _copy_weights_to_original_model(
+              model, model._distributed_model_train, 'train')
 
       val_outs = experimental_test_loop(  # pylint: disable=undefined-variable
           model,
           val_iterator,
           steps=validation_steps,
-          verbose=verbose,
-          initialize_finalize_strategy=False)
+          verbose=verbose)
       if not isinstance(val_outs, list):
         val_outs = [val_outs]
       # Same labels assumed.
@@ -237,19 +231,19 @@ def experimental_fit_loop(model,
       break
   callbacks.on_train_end()
 
-  # Copy the weights back from the replicated model to the original model.
-  with current_strategy.scope():
-    _copy_weights_to_original_model(model, model._grouped_model_train, 'train')
+  if model._compile_distribution:
+    # Copy the weights back from the replicated model to the original model.
+    with current_strategy.scope():
+      _copy_weights_to_original_model(model, model._distributed_model_train,
+                                      'train')
 
-  K.get_session().run(current_strategy.finalize())
   return model.history
 
 
 def experimental_test_loop(model,
                            iterator,
                            verbose=0,
-                           steps=None,
-                           initialize_finalize_strategy=True):
+                           steps=None):
   """Test loop for evaluating with TPU DistributionStrategy.
 
   Arguments:
@@ -259,8 +253,6 @@ def experimental_test_loop(model,
       steps: Total number of steps (batches of samples)
           before declaring predictions finished.
           Ignored with the default value of `None`.
-      initialize_finalize_strategy: Should the strategy initialize and finalize
-          functions be called.
 
   Returns:
       Scalar loss (if the model has a single output and no metrics)
@@ -269,8 +261,6 @@ def experimental_test_loop(model,
       the display labels for the outputs.
   """
   current_strategy = model._distribution_strategy
-  if initialize_finalize_strategy:
-    K.get_session().run(current_strategy.initialize())
 
   def _per_device_eval_function(model):
     model._make_eval_function()
@@ -283,21 +273,18 @@ def experimental_test_loop(model,
 
   def step_fn(ctx, inputs):
     """Clones the model and calls make_eval_function."""
-    # TODO(priyag, sourabhbajaj): The model gets cloned every time
-    # fit/test/predict is called. We should look into caching this keyed on
-    # input shapes.
     inputs, targets = inputs
-    clone_model_on_replicas(
-        model,
-        current_strategy,
-        make_callback_model=False,
-        inputs=inputs,
-        targets=targets,
-        mode=_Mode.TEST)
+    if model._compile_distribution:
+      clone_model_on_replicas(model, current_strategy,
+                              make_callback_model=False, inputs=inputs,
+                              targets=targets, mode=_Mode.TEST)
+    else:
+      _build_distributed_network(model, current_strategy, inputs,
+                                 targets, mode=_Mode.TEST)
 
     (grouped_inputs, grouped_outputs, grouped_updates,
      grouped_session_args) = current_strategy.extended.call_for_each_replica(
-         _per_device_eval_function, args=(model._grouped_model_test,))
+         _per_device_eval_function, args=(model._distributed_model_test,))
 
     (all_inputs, all_outputs, all_updates,
      all_session_args) = distributed_training_utils.unwrap_values(
@@ -341,10 +328,12 @@ def experimental_test_loop(model,
   if verbose == 1:
     progbar = Progbar(target=steps)
 
-  # Copy the weights from the original model to each of the replicated models.
+  if model._compile_distribution:
+    with current_strategy.scope():
+      _copy_weights_to_distributed_model(model, model._distributed_model_test)
   with current_strategy.scope():
-    _copy_weights_to_distributed_model(model, model._grouped_model_test)
-    _reset_metrics(model, model._grouped_model_test)
+    _reset_metrics(model, model._distributed_model_test)
+
   assert steps is not None
   outs = [0.] * len(model.metrics_names)
   for step in range(steps):
@@ -363,9 +352,6 @@ def experimental_test_loop(model,
   if len(outs) >= 0:
     outs[0] /= (steps)
 
-  if initialize_finalize_strategy:
-    K.get_session().run(current_strategy.finalize())
-
   if len(outs) == 1:
     return outs[0]
   return outs
@@ -388,7 +374,6 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
       (if the model has multiple outputs).
   """
   current_strategy = model._distribution_strategy
-  K.get_session().run(current_strategy.initialize())
 
   # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here.
   K.set_learning_phase(0)
@@ -402,20 +387,17 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
 
   def step_fn(ctx, inputs):
     """Clones the model and calls make_predict_function."""
-
-    # TODO(priyag, sourabhbajaj): The model gets cloned every time
-    # fit/test/predict is called. We should look into caching this keyed on
-    # input shapes.
-    clone_model_on_replicas(
-        model,
-        current_strategy,
-        make_callback_model=False,
-        inputs=inputs,
-        mode=_Mode.PREDICT)
+    if model._compile_distribution:
+      clone_model_on_replicas(model, current_strategy,
+                              make_callback_model=False, inputs=inputs,
+                              mode=_Mode.PREDICT)
+    else:
+      _build_distributed_network(model, current_strategy, inputs,
+                                 mode=_Mode.PREDICT)
 
     (grouped_inputs, grouped_outputs, grouped_updates,
      grouped_session_args) = current_strategy.extended.call_for_each_replica(
-         _per_device_predict_function, args=(model._grouped_model_predict,))
+         _per_device_predict_function, args=(model._distributed_model_predict,))
 
     (all_inputs, all_outputs, all_updates,
      all_session_args) = distributed_training_utils.unwrap_values(
@@ -455,10 +437,13 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
   if verbose == 1:
     progbar = Progbar(target=steps)
 
-  # Copy the weights from the original model to each of the replicated models.
+  if model._compile_distribution:
+    with current_strategy.scope():
+      _copy_weights_to_distributed_model(
+          model, model._distributed_model_predict)
   with current_strategy.scope():
-    _copy_weights_to_distributed_model(model, model._grouped_model_predict)
-    _reset_metrics(model, model._grouped_model_predict)
+    _reset_metrics(model, model._distributed_model_predict)
+
   assert steps is not None
   # Since we do not know how many samples we will see, we cannot pre-allocate
   # the returned Numpy arrays. Instead, we store one array per batch seen
@@ -472,8 +457,6 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
     if verbose >= 1:
       progbar.update(step + 1)
 
-  K.get_session().run(current_strategy.finalize())
-
   if len(unconcatenated_outs) == 1:
     return np.concatenate(unconcatenated_outs[0], axis=0)
   return [
@@ -492,6 +475,90 @@ def _custom_compile_for_predict(model):
   model.predict_function = None
 
 
+def _build_network_on_replica(model, inputs=None, targets=None, mode=None):
+  """Build an updated model on replicas.
+
+  We create a new Keras model while sharing the variables from the old graph.
+  Building a new sub-graph is required since the original keras model creates
+  placeholders for the input and the output that are not accessible till we
+  call iterator.get_next() inside the step_fn for `fit`/`evaluate`/`predict`.
+
+  The sharing of weights and layers between the old and the new model gaurantee
+  that we're using Strategy variables and any updates on either model are
+  reflected correctly in callbacks and loop iterations.
+
+  We need to make sure we share the optimizers between the old and the new model
+  as well so that optimizer state is not lost if the user is running fit
+  multiple times.
+
+  Args:
+    model: Model to be replicated across Replicas
+    inputs: Input variables to be passed to the model
+    targets: Target tensor to be passed to model.compile
+    mode: Which of fit/eval/predict is building the distributed network
+
+  Returns:
+    A new model with shared layers with the old model.
+  """
+  # Need to do imports here since we run into a circular dependency error.
+  from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
+  from tensorflow.python.keras.engine import sequential  # pylint: disable=g-import-not-at-top
+
+  # We rely on the internal methods to avoid having share_weights weights in the
+  # public API.
+  if isinstance(model, sequential.Sequential):
+    updated_model = models._clone_sequential_model(model, input_tensors=inputs,
+                                                   share_weights=True)
+  else:
+    updated_model = models._clone_functional_model(model, input_tensors=inputs,
+                                                   share_weights=True)
+
+  # Recast all low precision outputs back to float32 since we only casted
+  # the inputs to bfloat16 and not targets. This is done so that we can preserve
+  # precision when calculating the loss value.
+  def _upcast_low_precision_outputs(output):
+    if output.dtype == dtypes.bfloat16:
+      return math_ops.cast(output, dtypes.float32)
+    else:
+      return output
+  updated_model.outputs = [_upcast_low_precision_outputs(o)
+                           for o in updated_model.outputs]
+
+  if isinstance(targets, tuple):
+    targets = nest.flatten(targets)
+
+  if mode == _Mode.PREDICT:
+    _custom_compile_for_predict(updated_model)
+  else:
+    updated_model.compile(
+        model.optimizer,
+        model.loss,
+        metrics=metrics_module.clone_metrics(model._compile_metrics),
+        loss_weights=model.loss_weights,
+        sample_weight_mode=model.sample_weight_mode,
+        weighted_metrics=metrics_module.clone_metrics(
+            model._compile_weighted_metrics),
+        target_tensors=targets)
+  return updated_model
+
+
+def _build_distributed_network(model, strategy, inputs=None, targets=None,
+                               mode=None):
+  """Create a cloned model on each replica."""
+  with K.get_graph().as_default(), strategy.scope():
+    distributed_model = strategy.extended.call_for_each_replica(
+        _build_network_on_replica,
+        args=(model, inputs, targets, mode))
+    if mode is _Mode.TRAIN:
+      model._distributed_model_train = distributed_model
+    elif mode is _Mode.TEST:
+      model._distributed_model_test = distributed_model
+    elif mode is _Mode.PREDICT:
+      model._distributed_model_predict = distributed_model
+    else:
+      model._distributed_model = distributed_model
+
+
 def _clone_and_build_model(model, inputs=None, targets=None, mode=None):
   """Clone and build the given keras_model."""
   # We need to set the import here since we run into a circular dependency
@@ -538,18 +605,18 @@ def clone_model_on_replicas(model, strategy, make_callback_model=False,
                             inputs=None, targets=None, mode=None):
   """Create a cloned model on each replica."""
   with K.get_graph().as_default(), strategy.scope():
-    grouped_model = strategy.extended.call_for_each_replica(
+    distributed_model = strategy.extended.call_for_each_replica(
         _clone_and_build_model, args=(model, inputs, targets, mode))
     if mode is _Mode.TRAIN:
-      model._grouped_model_train = grouped_model
+      model._distributed_model_train = distributed_model
     elif mode is _Mode.TEST:
-      model._grouped_model_test = grouped_model
+      model._distributed_model_test = distributed_model
     elif mode is _Mode.PREDICT:
-      model._grouped_model_predict = grouped_model
+      model._distributed_model_predict = distributed_model
     else:
-      model._grouped_model = grouped_model
+      model._distributed_model = distributed_model
   if make_callback_model:
-    model._make_callback_model(grouped_model)
+    model._make_callback_model(distributed_model)
 
 
 def _get_input_from_iterator(iterator, model):
@@ -581,9 +648,12 @@ def _make_execution_function(model, mode):
     return _make_eager_execution_function(model, mode)
 
   strategy = model._distribution_strategy
-  if not model._grouped_model:
-    clone_model_on_replicas(
-        model, strategy, make_callback_model=(mode == 'train'))
+  if not model._distributed_model:
+    if model._compile_distribution:
+      clone_model_on_replicas(
+          model, strategy, make_callback_model=(mode == 'train'))
+    else:
+      _build_distributed_network(model, strategy)
 
   def _per_device_function(model):
     f = model._make_execution_function(mode)
@@ -594,7 +664,7 @@ def _make_execution_function(model, mode):
     # `_per_device_fit_function`.
     (grouped_inputs, grouped_outputs, grouped_updates,
      grouped_session_args) = strategy.extended.call_for_each_replica(
-         _per_device_function, args=(model._grouped_model,))
+         _per_device_function, args=(model._distributed_model,))
 
     if mode == 'train':
       # Initialize the variables in the replicated model. This is necessary for
@@ -627,9 +697,12 @@ def _make_execution_function(model, mode):
 def _make_eager_execution_function(model, mode):
   """Makes function to run one step of distributed model eager execution."""
   strategy = model._distribution_strategy
-  if not model._grouped_model:
-    clone_model_on_replicas(
-        model, strategy, make_callback_model=(mode == 'train'))
+  if not model._distributed_model:
+    if model._compile_distribution:
+      clone_model_on_replicas(
+          model, strategy, make_callback_model=(mode == 'train'))
+    else:
+      _build_distributed_network(model, strategy)
 
   def _per_device_function(model):
     f = model._make_execution_function(mode)
@@ -641,7 +714,7 @@ def _make_eager_execution_function(model, mode):
     # Create train ops on each of the devices when we call
     # `_per_device_fit_function`.
     (grouped_inputs, grouped_outputs) = strategy.call_for_each_replica(
-        _per_device_function, args=(model._grouped_model,))
+        _per_device_function, args=(model._distributed_model,))
 
     # Unwrap all the per device values returned from `call_for_each_replica`.
     # Unwrapping per device values gives you a list of values that can be
@@ -726,5 +799,5 @@ def _reset_metrics(model, distributed_model=None):
   if model._distribution_strategy:
     distributed_model = (
         distributed_model or
-        model._distribution_strategy.unwrap(model._grouped_model)[0])
+        model._distribution_strategy.unwrap(model._distributed_model)[0])
     distributed_model.reset_metrics()
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index 3082988a81..45af953c61 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -43,7 +43,7 @@ model_from_yaml = saving.model_from_yaml
 model_from_json = saving.model_from_json
 
 
-def clone_layer(layer):
+def _clone_layer(layer):
   return layer.__class__.from_config(layer.get_config())
 
 
@@ -138,7 +138,7 @@ def _clone_functional_model(model, input_tensors=None, share_weights=False):
       if layer not in layer_map:
         if not share_weights:
           # Clone layer.
-          new_layer = clone_layer(layer)
+          new_layer = _clone_layer(layer)
           layer_map[layer] = new_layer
           layer = new_layer
       else:
@@ -227,11 +227,11 @@ def _clone_sequential_model(model, input_tensors=None, share_weights=False):
       layers = []
       for layer in model._layers:
         if isinstance(layer, InputLayer):
-          layers.append(clone_layer(layer))
+          layers.append(_clone_layer(layer))
         else:
           layers.append(layer)
     else:
-      layers = [clone_layer(layer) for layer in model._layers]
+      layers = [_clone_layer(layer) for layer in model._layers]
     return Sequential(layers=layers, name=model.name)
   else:
     # If input tensors are provided, the original model's InputLayer is
@@ -239,7 +239,7 @@ def _clone_sequential_model(model, input_tensors=None, share_weights=False):
     layers = [
         layer for layer in model._layers if not isinstance(layer, InputLayer)]
     if not share_weights:
-      layers = [clone_layer(layer) for layer in layers]
+      layers = [_clone_layer(layer) for layer in layers]
     if len(generic_utils.to_list(input_tensors)) != 1:
       raise ValueError('To clone a `Sequential` model, we expect '
                        ' at most one tensor '
-- 
GitLab


From c2dfcd9ceae567e562a0805afd9bc523b4c1a665 Mon Sep 17 00:00:00 2001
From: Anna R <annarev@google.com>
Date: Tue, 18 Dec 2018 22:00:42 -0800
Subject: [PATCH 804/873] Internal change.

PiperOrigin-RevId: 226115035
---
 third_party/toolchains/cpus/arm/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/third_party/toolchains/cpus/arm/BUILD b/third_party/toolchains/cpus/arm/BUILD
index 00350cb341..a565e1e55f 100644
--- a/third_party/toolchains/cpus/arm/BUILD
+++ b/third_party/toolchains/cpus/arm/BUILD
@@ -5,6 +5,7 @@ cc_toolchain_suite(
     toolchains = {
         "armeabi|compiler": ":cc-compiler-armeabi",
         "local|compiler": ":cc-compiler-local",
+        "armeabi": ":cc-compiler-armeabi",
     },
 )
 
-- 
GitLab


From 9a63f5b8432f0057d5099e2aea0c8f57467c65db Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Tue, 18 Dec 2018 22:03:38 -0800
Subject: [PATCH 805/873] Create dataset kernels as we go i.e. in the __init__
 method of the Dataset class.

This removes the _as_variant_tensor() method from the DatasetV2 class (the version going to be used in TF 2.0) and replaces it with a _variant_tensor property that returns the variant_tensor representing the dataset. Also the __init__() method of DatasetV2 now takes a variant_tensor input.

For the DatasetV1 class (current API), we run the _as_variant_tensor() method in the __init__() method, so classes subclassing DatasetV1 should make their super() calls in the end.

Another implication is for Estimator code. The estimator input_fn's are supposed to be self contained and can't have ops from other graphs (like default graphs) in them. Earlier on because we didn't add anything to the graph while creating the Dataset object, this wasn't an issue but now this is a problem and the dataset creation code now needs to move into the input_fns.

A few other changes were required to make this happen
1. The make_one_shot_iterator code captures inputs by value and since now inputs to a dataset could be other datasets which are not capturable, we use the whitelisting mechanism in functions to recreate these ops.
2. The distribution strategies multi-worker code relied on dataset kernel re-creation on different devices while we created the iterator. In the new world, with the kernels already created, we now have to "clone" the dataset on different devices.
3. Auto sharding in distribution strategies is broken with this CL. For now, this CL disables it, but we can subsequently fix it using some of the cloning logic done for 2).
4. AsGraphDefInternal for functions that capture inputs that are datasets now need to be handled differently as DT_VARIANT tensors representing datasets are not serializable.

PiperOrigin-RevId: 226115500
---
 .../bigtable/python/ops/bigtable_api.py       |  75 ++--
 tensorflow/contrib/data/python/ops/readers.py |   6 +-
 tensorflow/contrib/data/python/ops/sliding.py |   8 +-
 .../contrib/distribute/python/values_test.py  |  38 +-
 .../hadoop/python/ops/hadoop_dataset_ops.py   |   6 +-
 .../contrib/tpu/python/tpu/datasets_test.py   |  17 +-
 .../group_by_reducer_dataset_op.cc            |  18 +-
 .../group_by_window_dataset_op.cc             |  21 +-
 .../experimental/map_and_batch_dataset_op.cc  |   8 +-
 .../numa_map_and_batch_dataset_op.cc          |   8 +-
 .../parallel_interleave_dataset_op.cc         |   8 +-
 .../data/experimental/scan_dataset_op.cc      |   8 +-
 .../core/kernels/data/filter_dataset_op.cc    |   8 +-
 .../core/kernels/data/flat_map_dataset_op.cc  |   8 +-
 .../kernels/data/interleave_dataset_op.cc     |   8 +-
 .../core/kernels/data/map_dataset_op.cc       |   8 +-
 .../data/parallel_interleave_dataset_op.cc    |   8 +-
 .../kernels/data/parallel_map_dataset_op.cc   |   8 +-
 .../kernel_tests/csv_dataset_test.py          |  24 +-
 .../kernel_tests/map_and_batch_test.py        |   4 +-
 .../optimization/optimize_dataset_test.py     |  13 +-
 .../kernel_tests/sql_dataset_test.py          |  14 +-
 .../kernel_tests/stats_dataset_ops_test.py    |   9 +-
 .../python/data/experimental/ops/batching.py  |  37 +-
 .../data/experimental/ops/cardinality.py      |   2 +-
 .../python/data/experimental/ops/error_ops.py |  11 +-
 .../experimental/ops/get_single_element.py    |   3 +-
 .../python/data/experimental/ops/grouping.py  |  52 +--
 .../data/experimental/ops/interleave_ops.py   |   8 +-
 .../data/experimental/ops/matching_files.py   |   6 +-
 .../data/experimental/ops/optimization.py     |  24 +-
 .../data/experimental/ops/parsing_ops.py      |  22 +-
 .../data/experimental/ops/prefetching_ops.py  |  44 +-
 .../data/experimental/ops/random_ops.py       |  10 +-
 .../python/data/experimental/ops/readers.py   |  16 +-
 .../python/data/experimental/ops/scan_ops.py  |  15 +-
 .../data/experimental/ops/shuffle_ops.py      |  12 +-
 .../python/data/experimental/ops/sleep.py     |   8 +-
 .../python/data/experimental/ops/stats_ops.py |   8 +-
 .../data/experimental/ops/threadpool.py       |   8 +-
 .../python/data/experimental/ops/unique.py    |   8 +-
 .../python/data/experimental/ops/writers.py   |   2 +-
 .../python/data/kernel_tests/batch_test.py    |   6 +-
 .../python/data/kernel_tests/cache_test.py    |   4 +-
 .../python/data/kernel_tests/dataset_test.py  |  11 +-
 .../python/data/kernel_tests/prefetch_test.py |   7 +-
 .../python/data/kernel_tests/range_test.py    |   6 +-
 .../python/data/kernel_tests/window_test.py   |  11 +-
 tensorflow/python/data/ops/BUILD              |   1 +
 tensorflow/python/data/ops/dataset_ops.py     | 412 ++++++++++--------
 tensorflow/python/data/ops/iterator_ops.py    |   4 +-
 .../data/ops/multi_device_iterator_ops.py     |  12 +-
 tensorflow/python/data/ops/readers.py         |  58 +--
 tensorflow/python/data/util/BUILD             |  21 +
 tensorflow/python/data/util/traverse.py       |  56 +++
 tensorflow/python/data/util/traverse_test.py  | 109 +++++
 tensorflow/python/distribute/BUILD            |   1 +
 tensorflow/python/distribute/input_ops.py     | 159 +++----
 .../python/distribute/input_ops_test.py       | 102 ++++-
 tensorflow/python/distribute/values.py        |  15 +-
 .../golden/v2/tensorflow.data.-dataset.pbtxt  |   2 +-
 61 files changed, 946 insertions(+), 680 deletions(-)
 create mode 100644 tensorflow/python/data/util/traverse.py
 create mode 100644 tensorflow/python/data/util/traverse_test.py

diff --git a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
index b6cdc7aab0..fa64055dfd 100644
--- a/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
+++ b/tensorflow/contrib/bigtable/python/ops/bigtable_api.py
@@ -489,7 +489,7 @@ class BigtableTable(object):
                        "len(dataset.output_types))")
     return gen_bigtable_ops.dataset_to_bigtable(
         self._resource,
-        dataset._as_variant_tensor(),  # pylint: disable=protected-access
+        dataset._variant_tensor,  # pylint: disable=protected-access
         column_families,
         columns,
         timestamp)
@@ -582,13 +582,14 @@ class _BigtableKeyDataset(dataset_ops.DatasetSource):
   """_BigtableKeyDataset is an abstract class representing the keys of a table.
   """
 
-  def __init__(self, table):
+  def __init__(self, table, variant_tensor):
     """Constructs a _BigtableKeyDataset.
 
     Args:
       table: a Bigtable class.
+      variant_tensor: DT_VARIANT representation of the dataset.
     """
-    super(_BigtableKeyDataset, self).__init__()
+    super(_BigtableKeyDataset, self).__init__(variant_tensor)
     self._table = table
 
   @property
@@ -601,13 +602,11 @@ class _BigtablePrefixKeyDataset(_BigtableKeyDataset):
   """
 
   def __init__(self, table, prefix):
-    super(_BigtablePrefixKeyDataset, self).__init__(table)
     self._prefix = prefix
-
-  def _as_variant_tensor(self):
-    return gen_bigtable_ops.bigtable_prefix_key_dataset(
-        table=self._table._resource,  # pylint: disable=protected-access
+    variant_tensor = gen_bigtable_ops.bigtable_prefix_key_dataset(
+        table=table._resource,  # pylint: disable=protected-access
         prefix=self._prefix)
+    super(_BigtablePrefixKeyDataset, self).__init__(table, variant_tensor)
 
 
 class _BigtableRangeKeyDataset(_BigtableKeyDataset):
@@ -615,15 +614,13 @@ class _BigtableRangeKeyDataset(_BigtableKeyDataset):
   """
 
   def __init__(self, table, start, end):
-    super(_BigtableRangeKeyDataset, self).__init__(table)
     self._start = start
     self._end = end
-
-  def _as_variant_tensor(self):
-    return gen_bigtable_ops.bigtable_range_key_dataset(
-        table=self._table._resource,  # pylint: disable=protected-access
+    variant_tensor = gen_bigtable_ops.bigtable_range_key_dataset(
+        table=table._resource,  # pylint: disable=protected-access
         start_key=self._start,
         end_key=self._end)
+    super(_BigtableRangeKeyDataset, self).__init__(table, variant_tensor)
 
 
 class _BigtableSampleKeysDataset(_BigtableKeyDataset):
@@ -633,11 +630,9 @@ class _BigtableSampleKeysDataset(_BigtableKeyDataset):
   # TODO(saeta): Expose the data size offsets into the keys.
 
   def __init__(self, table):
-    super(_BigtableSampleKeysDataset, self).__init__(table)
-
-  def _as_variant_tensor(self):
-    return gen_bigtable_ops.bigtable_sample_keys_dataset(
-        table=self._table._resource)  # pylint: disable=protected-access
+    variant_tensor = gen_bigtable_ops.bigtable_sample_keys_dataset(
+        table=table._resource)  # pylint: disable=protected-access
+    super(_BigtableSampleKeysDataset, self).__init__(table, variant_tensor)
 
 
 class _BigtableLookupDataset(dataset_ops.DatasetSource):
@@ -651,20 +646,18 @@ class _BigtableLookupDataset(dataset_ops.DatasetSource):
     self._normalized = normalized
     self._column_families = [i[0] for i in normalized]
     self._columns = [i[1] for i in normalized]
+    variant_tensor = gen_bigtable_ops.bigtable_lookup_dataset(
+        keys_dataset=self._dataset._variant_tensor,  # pylint: disable=protected-access
+        table=self._table._resource,  # pylint: disable=protected-access
+        column_families=self._column_families,
+        columns=self._columns)
+    super(_BigtableLookupDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
     return structure.NestedStructure(tuple(
         [structure.TensorStructure(dtypes.string, [])] * self._num_outputs))
 
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return gen_bigtable_ops.bigtable_lookup_dataset(
-        keys_dataset=self._dataset._as_variant_tensor(),
-        table=self._table._resource,
-        column_families=self._column_families,
-        columns=self._columns)
-
 
 class _BigtableScanDataset(dataset_ops.DatasetSource):
   """_BigtableScanDataset represents a dataset that retrieves keys and values.
@@ -679,14 +672,7 @@ class _BigtableScanDataset(dataset_ops.DatasetSource):
     self._columns = [i[1] for i in normalized]
     self._probability = probability
     self._num_outputs = len(normalized) + 1  # 1 for row key
-
-  @property
-  def _element_structure(self):
-    return structure.NestedStructure(tuple(
-        [structure.TensorStructure(dtypes.string, [])] * self._num_outputs))
-
-  def _as_variant_tensor(self):
-    return gen_bigtable_ops.bigtable_scan_dataset(
+    variant_tensor = gen_bigtable_ops.bigtable_scan_dataset(
         table=self._table._resource,  # pylint: disable=protected-access
         prefix=self._prefix,
         start_key=self._start,
@@ -694,6 +680,13 @@ class _BigtableScanDataset(dataset_ops.DatasetSource):
         column_families=self._column_families,
         columns=self._columns,
         probability=self._probability)
+    super(_BigtableScanDataset, self).__init__(variant_tensor)
+
+  @property
+  def _element_structure(self):
+    return structure.NestedStructure(
+        tuple(
+            [structure.TensorStructure(dtypes.string, [])] * self._num_outputs))
 
 
 class _BigtableSampleKeyPairsDataset(dataset_ops.DatasetSource):
@@ -705,17 +698,15 @@ class _BigtableSampleKeyPairsDataset(dataset_ops.DatasetSource):
     self._prefix = prefix
     self._start = start
     self._end = end
+    variant_tensor = gen_bigtable_ops.bigtable_sample_key_pairs_dataset(
+        table=self._table._resource,  # pylint: disable=protected-access
+        prefix=self._prefix,
+        start_key=self._start,
+        end_key=self._end)
+    super(_BigtableSampleKeyPairsDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
     return structure.NestedStructure(
         (structure.TensorStructure(dtypes.string, []),
          structure.TensorStructure(dtypes.string, [])))
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return gen_bigtable_ops.bigtable_sample_key_pairs_dataset(
-        table=self._table._resource,
-        prefix=self._prefix,
-        start_key=self._start,
-        end_key=self._end)
diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py
index c0152156a1..c6bf5215c9 100644
--- a/tensorflow/contrib/data/python/ops/readers.py
+++ b/tensorflow/contrib/data/python/ops/readers.py
@@ -389,13 +389,11 @@ class LMDBDataset(dataset_ops.DatasetSource):
     Args:
       filenames: A `tf.string` tensor containing one or more filenames.
     """
-    super(LMDBDataset, self).__init__()
     self._filenames = ops.convert_to_tensor(
         filenames, dtype=dtypes.string, name="filenames")
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_lmdb_dataset(
+    variant_tensor = gen_experimental_dataset_ops.experimental_lmdb_dataset(
         self._filenames, **dataset_ops.flat_structure(self))
+    super(LMDBDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py
index 5c6ee6bfdc..6708e01d08 100644
--- a/tensorflow/contrib/data/python/ops/sliding.py
+++ b/tensorflow/contrib/data/python/ops/sliding.py
@@ -30,7 +30,6 @@ class _SlideDataset(dataset_ops.UnaryDataset):
 
   def __init__(self, input_dataset, window_size, window_shift, window_stride):
     """See `sliding_window_batch` for details."""
-    super(_SlideDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._window_size = ops.convert_to_tensor(
         window_size, dtype=dtypes.int64, name="window_stride")
@@ -43,14 +42,13 @@ class _SlideDataset(dataset_ops.UnaryDataset):
         input_dataset.output_types, input_dataset.output_shapes,
         input_dataset.output_classes)
     self._structure = input_structure._batch(None)  # pylint: disable=protected-access
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_sliding_window_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = ged_ops.experimental_sliding_window_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         window_size=self._window_size,
         window_shift=self._window_shift,
         window_stride=self._window_stride,
         **dataset_ops.flat_structure(self))
+    super(_SlideDataset, self).__init__(input_dataset, variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index f6cb3d6313..0e8e86f6b9 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -472,11 +472,11 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
                 for r in range(len(devices))])
 
   def _test_dataset(self, dataset_fn, worker_devices, devices,
-                    expected_values, auto_shard=True):
+                    expected_values):
     device_map = values.ReplicaDeviceMap(devices)
     input_workers = values.InputWorkers(device_map, worker_devices)
     multi_worker_dataset = values.MultiWorkerDataset(
-        dataset_fn, input_workers, auto_shard=auto_shard)
+        dataset_fn, input_workers)
     multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()
     with self.cached_session() as sess:
       sess.run(multi_worker_iterator.initializer)
@@ -518,16 +518,9 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
     worker_devices, devices = self._cpu_devices()
     with context.graph_mode():
       dataset_fn = lambda: dataset_ops.Dataset.range(8)
-      self._test_dataset(dataset_fn, worker_devices, devices,
-                         [[0, 1], [2, 3], [4, 5], [6, 7]])
-
-  def testDataDistributionNoAutoShard(self):
-    worker_devices, devices = self._cpu_devices()
-    with context.graph_mode():
-      dataset_fn = lambda: dataset_ops.Dataset.range(4)
-      self._test_dataset(dataset_fn, worker_devices, devices,
-                         [[0, 0], [1, 1], [2, 2], [3, 3]],
-                         auto_shard=False)
+      self._test_dataset(
+          dataset_fn, worker_devices, devices,
+          [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]])
 
   def testDataDistributionTwoDevicePerWorker(self):
     if context.num_gpus() < 1:
@@ -535,8 +528,9 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
     worker_devices, devices = self._cpu_and_one_gpu_devices()
     with context.graph_mode():
       dataset_fn = lambda: dataset_ops.Dataset.range(8)
-      self._test_dataset(dataset_fn, worker_devices, devices,
-                         [[0, 2, 1, 3], [4, 6, 5, 7]])
+      self._test_dataset(
+          dataset_fn, worker_devices, devices,
+          [[0, 1, 0, 1], [2, 3, 2, 3], [4, 5, 4, 5], [6, 7, 6, 7]])
 
   def testTupleDataset(self):
     worker_devices, devices = self._cpu_devices()
@@ -548,9 +542,7 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
         dataset2 = dataset_ops.Dataset.range(8).map(lambda x: x**2)
         return dataset_ops.Dataset.zip((dataset1, dataset2))
 
-      expected_values = [
-          [(i, i**2), (i + 1, (i + 1)**2)] for i in range(0, 8, 2)
-      ]
+      expected_values = [[(i, i**2), (i, i**2)] for i in range(8)]
       self._test_dataset(dataset_fn, worker_devices, devices,
                          expected_values)
 
@@ -561,17 +553,19 @@ class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase):
       device_map = values.ReplicaDeviceMap(devices)
       input_workers = values.InputWorkers(device_map, worker_devices)
       multi_worker_dataset = values.MultiWorkerDataset(
-          dataset_fn, input_workers, auto_shard=True)
+          dataset_fn, input_workers)
       multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()
 
       sess.run(multi_worker_iterator.initializer)
-      self._test_iterator(sess, multi_worker_iterator, devices,
-                          [[0, 1], [2, 3], [4, 5], [6, 7]])
+      self._test_iterator(
+          sess, multi_worker_iterator, devices,
+          [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]])
 
       # After re-initializing the iterator, should be able to iterate again.
       sess.run(multi_worker_iterator.initializer)
-      self._test_iterator(sess, multi_worker_iterator, devices,
-                          [[0, 1], [2, 3], [4, 5], [6, 7]])
+      self._test_iterator(
+          sess, multi_worker_iterator, devices,
+          [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]])
 
   def testValueErrorForIterator(self):
     # Incompatiable arguments.
diff --git a/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py b/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
index 77813519c1..71eac729a8 100644
--- a/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
+++ b/tensorflow/contrib/hadoop/python/ops/hadoop_dataset_ops.py
@@ -55,13 +55,11 @@ class SequenceFileDataset(dataset_ops.DatasetSource):
     Args:
       filenames: A `tf.string` tensor containing one or more filenames.
     """
-    super(SequenceFileDataset, self).__init__()
     self._filenames = ops.convert_to_tensor(
         filenames, dtype=dtypes.string, name="filenames")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.sequence_file_dataset(
+    variant_tensor = gen_dataset_ops.sequence_file_dataset(
         self._filenames, self._element_structure._flat_types)  # pylint: disable=protected-access
+    super(SequenceFileDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py
index 52d87b8004..8a94f527bb 100644
--- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py
+++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py
@@ -27,6 +27,7 @@ from tensorflow.python.client import session
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.platform import test
@@ -55,6 +56,7 @@ class DatasetsTest(test.TestCase):
     session_config = config_pb2.ConfigProto(cluster_def=self._cluster_def)
 
     self._sess = session.Session(self._worker.target, config=session_config)
+    self._worker_device = '/job:' + worker_job.name
 
   def testTextLineDataset(self):
     all_contents = []
@@ -70,7 +72,8 @@ class DatasetsTest(test.TestCase):
     dataset = datasets.StreamingFilesDataset(
         os.path.join(self.get_temp_dir(), 'text_line.*.txt'), filetype='text')
 
-    iterator = dataset_ops.make_initializable_iterator(dataset)
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
     self._sess.run(iterator.initializer)
     get_next = iterator.get_next()
 
@@ -94,7 +97,8 @@ class DatasetsTest(test.TestCase):
     dataset = datasets.StreamingFilesDataset(
         os.path.join(self.get_temp_dir(), 'tf_record*'), filetype='tfrecord')
 
-    iterator = dataset_ops.make_initializable_iterator(dataset)
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
     self._sess.run(iterator.initializer)
     get_next = iterator.get_next()
 
@@ -121,7 +125,8 @@ class DatasetsTest(test.TestCase):
 
     dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord')
 
-    iterator = dataset_ops.make_initializable_iterator(dataset)
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
     self._sess.run(iterator.initializer)
     get_next = iterator.get_next()
 
@@ -154,7 +159,8 @@ class DatasetsTest(test.TestCase):
         os.path.join(self.get_temp_dir(), 'fixed_length*'),
         filetype=FixedLengthFile)
 
-    iterator = dataset_ops.make_initializable_iterator(dataset)
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
     self._sess.run(iterator.initializer)
     get_next = iterator.get_next()
 
@@ -177,7 +183,8 @@ class DatasetsTest(test.TestCase):
     dataset = datasets.StreamingFilesDataset(
         dataset_ops.Dataset.range(10), filetype=gen_dataset)
 
-    iterator = dataset_ops.make_initializable_iterator(dataset)
+    with ops.device(self._worker_device):
+      iterator = dataset_ops.make_initializable_iterator(dataset)
     self._sess.run(iterator.initializer)
     get_next = iterator.get_next()
 
diff --git a/tensorflow/core/kernels/data/experimental/group_by_reducer_dataset_op.cc b/tensorflow/core/kernels/data/experimental/group_by_reducer_dataset_op.cc
index 1c298cfdd6..5f0c01be4b 100644
--- a/tensorflow/core/kernels/data/experimental/group_by_reducer_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/group_by_reducer_dataset_op.cc
@@ -119,25 +119,25 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
       std::vector<Node*> key_func_other_arguments_node;
       DataTypeVector key_func_other_arguments_types;
       TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
-          b, captured_key_func_, &key_func_other_arguments_node,
+          ctx, b, captured_key_func_, &key_func_other_arguments_node,
           &key_func_other_arguments_types));
 
       std::vector<Node*> init_func_other_arguments_node;
       DataTypeVector init_func_other_arguments_types;
       TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
-          b, captured_init_func_, &init_func_other_arguments_node,
+          ctx, b, captured_init_func_, &init_func_other_arguments_node,
           &init_func_other_arguments_types));
 
       std::vector<Node*> reduce_func_other_arguments_node;
       DataTypeVector reduce_func_other_arguments_types;
       TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
-          b, captured_reduce_func_, &reduce_func_other_arguments_node,
+          ctx, b, captured_reduce_func_, &reduce_func_other_arguments_node,
           &reduce_func_other_arguments_types));
 
       std::vector<Node*> finalize_func_other_arguments_node;
       DataTypeVector finalize_func_other_arguments_types;
       TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
-          b, captured_finalize_func_, &finalize_func_other_arguments_node,
+          ctx, b, captured_finalize_func_, &finalize_func_other_arguments_node,
           &finalize_func_other_arguments_types));
 
       AttrValue key_func;
@@ -406,7 +406,7 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
     }
 
     Status OtherArgumentsNodeAndType(
-        DatasetGraphDefBuilder* b,
+        SerializationContext* ctx, DatasetGraphDefBuilder* b,
         const std::unique_ptr<CapturedFunction>& captured_func,
         std::vector<Node*>* other_arguments_node,
         DataTypeVector* other_arguments_types) const {
@@ -414,7 +414,13 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel {
       other_arguments_types->reserve(captured_func->captured_inputs().size());
       for (const Tensor& t : captured_func->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments_node->emplace_back(node);
         other_arguments_types->emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/experimental/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/experimental/group_by_window_dataset_op.cc
index 98603d5a73..11491e00db 100644
--- a/tensorflow/core/kernels/data/experimental/group_by_window_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/group_by_window_dataset_op.cc
@@ -117,20 +117,21 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
       std::vector<Node*> key_func_other_arguments_node;
       DataTypeVector key_func_other_arguments_types;
       TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
-          b, captured_key_func_, &key_func_other_arguments_node,
+          ctx, b, captured_key_func_, &key_func_other_arguments_node,
           &key_func_other_arguments_types));
 
       std::vector<Node*> reduce_func_other_arguments_node;
       DataTypeVector reduce_func_other_arguments_types;
       TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
-          b, captured_reduce_func_, &reduce_func_other_arguments_node,
+          ctx, b, captured_reduce_func_, &reduce_func_other_arguments_node,
           &reduce_func_other_arguments_types));
 
       std::vector<Node*> window_size_func_other_arguments_node;
       DataTypeVector window_size_func_other_arguments_types;
-      TF_RETURN_IF_ERROR(OtherArgumentsNodeAndType(
-          b, captured_window_size_func_, &window_size_func_other_arguments_node,
-          &window_size_func_other_arguments_types));
+      TF_RETURN_IF_ERROR(
+          OtherArgumentsNodeAndType(ctx, b, captured_window_size_func_,
+                                    &window_size_func_other_arguments_node,
+                                    &window_size_func_other_arguments_types));
 
       AttrValue key_func;
       b->BuildAttrValue(key_func_, &key_func);
@@ -490,7 +491,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
     };
 
     Status OtherArgumentsNodeAndType(
-        DatasetGraphDefBuilder* b,
+        SerializationContext* ctx, DatasetGraphDefBuilder* b,
         const std::unique_ptr<CapturedFunction>& captured_func,
         std::vector<Node*>* other_arguments_node,
         DataTypeVector* other_arguments_types) const {
@@ -498,7 +499,13 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel {
       other_arguments_types->reserve(captured_func->captured_inputs().size());
       for (const Tensor& t : captured_func->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments_node->emplace_back(node);
         other_arguments_types->emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
index 3ff3135593..ef75c84456 100644
--- a/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/map_and_batch_dataset_op.cc
@@ -210,7 +210,13 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
index 921f8ad584..2b1aec358c 100644
--- a/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/numa_map_and_batch_dataset_op.cc
@@ -169,7 +169,13 @@ class NumaMapAndBatchDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc
index 0230f90aba..1c19119d88 100644
--- a/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/parallel_interleave_dataset_op.cc
@@ -154,7 +154,13 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc b/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc
index 0d9a629a27..76ab33fe98 100644
--- a/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/scan_dataset_op.cc
@@ -119,7 +119,13 @@ class ScanDatasetOp : public UnaryDatasetOpKernel {
       other_arguments_types.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc
index b8b657d343..30b2fc5db8 100644
--- a/tensorflow/core/kernels/data/filter_dataset_op.cc
+++ b/tensorflow/core/kernels/data/filter_dataset_op.cc
@@ -137,7 +137,13 @@ class FilterDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/flat_map_dataset_op.cc b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
index 3846334622..efa76ab34b 100644
--- a/tensorflow/core/kernels/data/flat_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/flat_map_dataset_op.cc
@@ -95,7 +95,13 @@ class FlatMapDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/interleave_dataset_op.cc b/tensorflow/core/kernels/data/interleave_dataset_op.cc
index 54e3645612..1a5e6edb5b 100644
--- a/tensorflow/core/kernels/data/interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/interleave_dataset_op.cc
@@ -121,7 +121,13 @@ class InterleaveDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/map_dataset_op.cc b/tensorflow/core/kernels/data/map_dataset_op.cc
index fc6e93a81c..02c0199a0c 100644
--- a/tensorflow/core/kernels/data/map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_dataset_op.cc
@@ -149,7 +149,13 @@ class MapDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
index f844a00576..fda7ae0cbb 100644
--- a/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_interleave_dataset_op.cc
@@ -160,7 +160,13 @@ class ParallelInterleaveDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
index 5c09b2d5dc..c0002c86d8 100644
--- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
+++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc
@@ -141,7 +141,13 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel {
       other_arguments.reserve(captured_func_->captured_inputs().size());
       for (const Tensor& t : captured_func_->captured_inputs()) {
         Node* node;
-        TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        DatasetBase* input;
+        Status s = GetDatasetFromVariantTensor(t, &input);
+        if (s.ok()) {
+          TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node));
+        } else {
+          TF_RETURN_IF_ERROR(b->AddTensor(t, &node));
+        }
         other_arguments.emplace_back(node);
         other_arguments_types.emplace_back(t.dtype());
       }
diff --git a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
index b2f1b43ecf..e523f36639 100644
--- a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
@@ -89,14 +89,12 @@ class CsvDatasetTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(nxt())
     else:
-      # Verify that OpError is produced as expected
-      with self.assertRaisesOpError(expected_err_re):
-        nxt = self.getNext(dataset)
-        while True:
-          try:
-            self.evaluate(nxt())
-          except errors.OutOfRangeError:
-            break
+      nxt = self.getNext(dataset)
+      while True:
+        try:
+          self.evaluate(nxt())
+        except errors.OutOfRangeError:
+          break
 
   def _test_dataset(
       self,
@@ -110,8 +108,14 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     # Convert str type because py3 tf strings are bytestrings
     filenames = self._setup_files(inputs, linebreak, compression_type)
     kwargs['compression_type'] = compression_type
-    dataset = readers.CsvDataset(filenames, **kwargs)
-    self._verify_output_or_err(dataset, expected_output, expected_err_re)
+    if expected_err_re is not None:
+      # Verify that OpError is produced as expected
+      with self.assertRaisesOpError(expected_err_re):
+        dataset = readers.CsvDataset(filenames, **kwargs)
+        self._verify_output_or_err(dataset, expected_output, expected_err_re)
+    else:
+      dataset = readers.CsvDataset(filenames, **kwargs)
+      self._verify_output_or_err(dataset, expected_output, expected_err_re)
 
   def testCsvDataset_requiredFields(self):
     record_defaults = [[]] * 4
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index ceadebc541..c90c5ed306 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -120,8 +120,8 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertDatasetProduces(dataset_fn(8, 0), expected_output=[])
 
     # Empty batch should be an initialization time error.
-    self.assertDatasetProduces(
-        dataset_fn(0, 14), expected_error=(errors.InvalidArgumentError, ""))
+    with self.assertRaises(errors.InvalidArgumentError):
+      self.assertDatasetProduces(dataset_fn(0, 14), expected_output=[])
 
   @parameterized.named_parameters(
       ("Even", False, False),
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
index dd432b8c15..c111567c1c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/optimize_dataset_test.py
@@ -211,16 +211,15 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
         "v", initializer=0, use_resource=False)
     assign_op = variable.assign_add(1)
 
-    unoptimized_dataset = dataset_fn(variable)
-
-    options = dataset_ops.Options()
-    options.experimental_optimization.noop_elimination = True
-    options.experimental_optimization.map_and_batch_fusion = True
-    optimized_dataset = unoptimized_dataset.with_options(options)
-
     # Check that warning is logged.
     warnings.simplefilter("always")
     with warnings.catch_warnings(record=True) as w:
+      unoptimized_dataset = dataset_fn(variable)
+
+      options = dataset_ops.Options()
+      options.experimental_optimization.noop_elimination = True
+      options.experimental_optimization.map_and_batch_fusion = True
+      optimized_dataset = unoptimized_dataset.with_options(options)
       optimized_it = optimized_dataset.make_initializable_iterator()
 
     self.assertGreaterEqual(len(w), 1)
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
index fd96c0b521..e97c80627c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
@@ -110,13 +110,13 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that an error is raised when `driver_name` is invalid.
   def testReadResultSetWithInvalidDriverName(self):
-    dataset = self._createSqlDataset(
-        driver_name="sqlfake",
-        query="SELECT first_name, last_name, motto FROM students "
-        "ORDER BY first_name DESC",
-        output_types=(dtypes.string, dtypes.string, dtypes.string))
-    self.assertDatasetProduces(
-        dataset, expected_error=(errors.InvalidArgumentError, ""))
+    with self.assertRaises(errors.InvalidArgumentError):
+      dataset = self._createSqlDataset(
+          driver_name="sqlfake",
+          query="SELECT first_name, last_name, motto FROM students "
+          "ORDER BY first_name DESC",
+          output_types=(dtypes.string, dtypes.string, dtypes.string))
+      self.assertDatasetProduces(dataset, expected_output=[])
 
   # Test that an error is raised when a column name in `query` is nonexistent
   def testReadResultSetWithInvalidColumnName(self):
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 59d0ebdb37..8b330559f5 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -197,10 +197,13 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
   def testInterleaveAutoTuneBufferUtilization(self, dataset_transformation):
 
     def dataset_fn():
-      dataset = dataset_ops.Dataset.range(10).map(
-          lambda x: array_ops.tile([x], ops.convert_to_tensor([x])))
+
+      def interleave_fn(_):
+        return dataset_ops.Dataset.range(
+            10).map(lambda x: array_ops.tile([x], ops.convert_to_tensor([x])))
+
       dataset = dataset_ops.Dataset.range(1).interleave(
-          lambda _: dataset,
+          interleave_fn,
           cycle_length=1,
           num_parallel_calls=optimization.AUTOTUNE)
       options = dataset_ops.Options()
diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
index 29df98f4ea..f0cf7f0a99 100644
--- a/tensorflow/python/data/experimental/ops/batching.py
+++ b/tensorflow/python/data/experimental/ops/batching.py
@@ -352,7 +352,6 @@ class _UnbatchDataset(dataset_ops.UnaryDataset):
 
   def __init__(self, input_dataset):
     """See `unbatch()` for more details."""
-    super(_UnbatchDataset, self).__init__(input_dataset)
     flat_shapes = nest.flatten(input_dataset.output_shapes)
     if any(s.ndims == 0 for s in flat_shapes):
       raise ValueError("Cannot unbatch an input with scalar components.")
@@ -370,10 +369,10 @@ class _UnbatchDataset(dataset_ops.UnaryDataset):
         nest.map_structure(lambda s: s[1:], input_dataset.output_shapes),
         input_dataset.output_classes)
 
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_unbatch_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = ged_ops.experimental_unbatch_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         **dataset_ops.flat_structure(self))
+    super(_UnbatchDataset, self).__init__(input_dataset, variant_tensor)
 
   @property
   def _element_structure(self):
@@ -440,7 +439,6 @@ class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
 
   def __init__(self, input_dataset, batch_size, row_shape):
     """See `Dataset.dense_to_sparse_batch()` for more details."""
-    super(_DenseToSparseBatchDataset, self).__init__(input_dataset)
     if not isinstance(input_dataset.output_types, dtypes.DType):
       raise TypeError("DenseToSparseDataset requires an input whose elements "
                       "have a single component, whereas the input has %r." %
@@ -452,12 +450,13 @@ class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
         input_dataset.output_types,
         tensor_shape.vector(None).concatenate(self._row_shape))
 
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_dense_to_sparse_batch_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = ged_ops.experimental_dense_to_sparse_batch_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._batch_size,
         row_shape=convert.partial_shape_to_tensor(self._row_shape),
         **dataset_ops.flat_structure(self))
+    super(_DenseToSparseBatchDataset, self).__init__(input_dataset,
+                                                     variant_tensor)
 
   @property
   def _element_structure(self):
@@ -499,7 +498,6 @@ class _RestructuredDataset(dataset_ops.UnaryDataset):
       ValueError: If either `output_types` or `output_shapes` is not compatible
         with the structure of `dataset`.
     """
-    super(_RestructuredDataset, self).__init__(dataset)
     self._input_dataset = dataset
 
     if not allow_unsafe_cast:
@@ -539,9 +537,8 @@ class _RestructuredDataset(dataset_ops.UnaryDataset):
 
     self._structure = structure.convert_legacy_structure(
         output_types, output_shapes, output_classes)
-
-  def _as_variant_tensor(self):
-    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+    variant_tensor = self._input_dataset._variant_tensor  # pylint: disable=protected-access
+    super(_RestructuredDataset, self).__init__(dataset, variant_tensor)
 
   @property
   def _element_structure(self):
@@ -554,8 +551,8 @@ class _MapAndBatchDataset(dataset_ops.UnaryDataset):
   def __init__(self, input_dataset, map_func, batch_size, num_parallel_calls,
                drop_remainder):
     """See `Dataset.map()` for details."""
-    super(_MapAndBatchDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
+
     self._map_func = dataset_ops.StructuredFunctionWrapper(
         map_func, "tf.data.experimental.map_and_batch()", dataset=input_dataset)
     self._batch_size_t = ops.convert_to_tensor(
@@ -573,14 +570,8 @@ class _MapAndBatchDataset(dataset_ops.UnaryDataset):
           tensor_util.constant_value(self._batch_size_t))
     else:
       self._structure = self._map_func.output_structure._batch(None)  # pylint: disable=protected-access
-
-  def _functions(self):
-    return [self._map_func]
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return ged_ops.experimental_map_and_batch_dataset(
-        self._input_dataset._as_variant_tensor(),
+    variant_tensor = ged_ops.experimental_map_and_batch_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._map_func.function.captured_inputs,
         f=self._map_func.function,
         batch_size=self._batch_size_t,
@@ -588,6 +579,10 @@ class _MapAndBatchDataset(dataset_ops.UnaryDataset):
         drop_remainder=self._drop_remainder_t,
         preserve_cardinality=True,
         **dataset_ops.flat_structure(self))
+    super(_MapAndBatchDataset, self).__init__(input_dataset, variant_tensor)
+
+  def _functions(self):
+    return [self._map_func]
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/experimental/ops/cardinality.py b/tensorflow/python/data/experimental/ops/cardinality.py
index 9cf0a8801e..0d596f68dd 100644
--- a/tensorflow/python/data/experimental/ops/cardinality.py
+++ b/tensorflow/python/data/experimental/ops/cardinality.py
@@ -47,4 +47,4 @@ def cardinality(dataset):
     the cardinality is infinite or unknown, the operation returns the named
     constant `INFINITE_CARDINALITY` and `UNKNOWN_CARDINALITY` respectively.
   """
-  return ged_ops.experimental_dataset_cardinality(dataset._as_variant_tensor())  # pylint: disable=protected-access
+  return ged_ops.experimental_dataset_cardinality(dataset._variant_tensor)  # pylint: disable=protected-access
diff --git a/tensorflow/python/data/experimental/ops/error_ops.py b/tensorflow/python/data/experimental/ops/error_ops.py
index 879b13ce09..eab29c7d88 100644
--- a/tensorflow/python/data/experimental/ops/error_ops.py
+++ b/tensorflow/python/data/experimental/ops/error_ops.py
@@ -57,10 +57,9 @@ class _IgnoreErrorsDataset(dataset_ops.UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset):
     """See `Dataset.ignore_errors()` for details."""
-    super(_IgnoreErrorsDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_ignore_errors_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
+    variant_tensor = (
+        gen_experimental_dataset_ops.experimental_ignore_errors_dataset(
+            self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+            **dataset_ops.flat_structure(self)))
+    super(_IgnoreErrorsDataset, self).__init__(input_dataset, variant_tensor)
diff --git a/tensorflow/python/data/experimental/ops/get_single_element.py b/tensorflow/python/data/experimental/ops/get_single_element.py
index d649a07012..46c215d685 100644
--- a/tensorflow/python/data/experimental/ops/get_single_element.py
+++ b/tensorflow/python/data/experimental/ops/get_single_element.py
@@ -64,5 +64,4 @@ def get_single_element(dataset):
   # pylint: disable=protected-access
   return dataset._element_structure._from_compatible_tensor_list(
       gen_dataset_ops.dataset_to_single_element(
-          dataset._as_variant_tensor(),
-          **dataset_ops.flat_structure(dataset)))
+          dataset._variant_tensor, **dataset_ops.flat_structure(dataset)))
diff --git a/tensorflow/python/data/experimental/ops/grouping.py b/tensorflow/python/data/experimental/ops/grouping.py
index ef6b232429..2435f0cfdb 100644
--- a/tensorflow/python/data/experimental/ops/grouping.py
+++ b/tensorflow/python/data/experimental/ops/grouping.py
@@ -242,14 +242,23 @@ class _GroupByReducerDataset(dataset_ops.UnaryDataset):
 
   def __init__(self, input_dataset, key_func, reducer):
     """See `group_by_reducer()` for details."""
-    super(_GroupByReducerDataset, self).__init__(input_dataset)
-
     self._input_dataset = input_dataset
-
     self._make_key_func(key_func, input_dataset)
     self._make_init_func(reducer.init_func)
     self._make_reduce_func(reducer.reduce_func, input_dataset)
     self._make_finalize_func(reducer.finalize_func)
+    variant_tensor = ged_ops.experimental_group_by_reducer_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+        self._key_func.function.captured_inputs,
+        self._init_func.function.captured_inputs,
+        self._reduce_func.function.captured_inputs,
+        self._finalize_func.function.captured_inputs,
+        key_func=self._key_func.function,
+        init_func=self._init_func.function,
+        reduce_func=self._reduce_func.function,
+        finalize_func=self._finalize_func.function,
+        **dataset_ops.flat_structure(self))
+    super(_GroupByReducerDataset, self).__init__(input_dataset, variant_tensor)
 
   def _make_key_func(self, key_func, input_dataset):
     """Make wrapping defun for key_func."""
@@ -347,19 +356,6 @@ class _GroupByReducerDataset(dataset_ops.UnaryDataset):
         self._key_func, self._init_func, self._reduce_func, self._finalize_func
     ]
 
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_group_by_reducer_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._key_func.function.captured_inputs,
-        self._init_func.function.captured_inputs,
-        self._reduce_func.function.captured_inputs,
-        self._finalize_func.function.captured_inputs,
-        key_func=self._key_func.function,
-        init_func=self._init_func.function,
-        reduce_func=self._reduce_func.function,
-        finalize_func=self._finalize_func.function,
-        **dataset_ops.flat_structure(self))
-
   def _transformation_name(self):
     return "tf.data.experimental.group_by_reducer()"
 
@@ -369,13 +365,20 @@ class _GroupByWindowDataset(dataset_ops.UnaryDataset):
 
   def __init__(self, input_dataset, key_func, reduce_func, window_size_func):
     """See `group_by_window()` for details."""
-    super(_GroupByWindowDataset, self).__init__(input_dataset)
-
     self._input_dataset = input_dataset
-
     self._make_key_func(key_func, input_dataset)
     self._make_reduce_func(reduce_func, input_dataset)
     self._make_window_size_func(window_size_func)
+    variant_tensor = ged_ops.experimental_group_by_window_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+        self._key_func.function.captured_inputs,
+        self._reduce_func.function.captured_inputs,
+        self._window_size_func.function.captured_inputs,
+        key_func=self._key_func.function,
+        reduce_func=self._reduce_func.function,
+        window_size_func=self._window_size_func.function,
+        **dataset_ops.flat_structure(self))
+    super(_GroupByWindowDataset, self).__init__(input_dataset, variant_tensor)
 
   def _make_window_size_func(self, window_size_func):
     """Make wrapping defun for window_size_func."""
@@ -426,17 +429,6 @@ class _GroupByWindowDataset(dataset_ops.UnaryDataset):
   def _functions(self):
     return [self._key_func, self._reduce_func, self._window_size_func]
 
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_group_by_window_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._key_func.function.captured_inputs,
-        self._reduce_func.function.captured_inputs,
-        self._window_size_func.function.captured_inputs,
-        key_func=self._key_func.function,
-        reduce_func=self._reduce_func.function,
-        window_size_func=self._window_size_func.function,
-        **dataset_ops.flat_structure(self))
-
   def _transformation_name(self):
     return "tf.data.experimental.group_by_window()"
 
diff --git a/tensorflow/python/data/experimental/ops/interleave_ops.py b/tensorflow/python/data/experimental/ops/interleave_ops.py
index 5a719f8ed8..f4b7123df1 100644
--- a/tensorflow/python/data/experimental/ops/interleave_ops.py
+++ b/tensorflow/python/data/experimental/ops/interleave_ops.py
@@ -113,15 +113,15 @@ class _DirectedInterleaveDataset(dataset_ops.Dataset):
     self._structure = structure.convert_legacy_structure(
         data_inputs[0].output_types, output_shapes,
         data_inputs[0].output_classes)
+    super(_DirectedInterleaveDataset, self).__init__()
 
   def _as_variant_tensor(self):
     # pylint: disable=protected-access
     return (
         gen_experimental_dataset_ops.experimental_directed_interleave_dataset(
-            self._selector_input._as_variant_tensor(), [
-                data_input._as_variant_tensor()
-                for data_input in self._data_inputs
-            ], **dataset_ops.flat_structure(self)))
+            self._selector_input._variant_tensor,
+            [data_input._variant_tensor for data_input in self._data_inputs],
+            **dataset_ops.flat_structure(self)))
     # pylint: enable=protected-access
 
   def _inputs(self):
diff --git a/tensorflow/python/data/experimental/ops/matching_files.py b/tensorflow/python/data/experimental/ops/matching_files.py
index 63b99cb1e4..29beda9fc3 100644
--- a/tensorflow/python/data/experimental/ops/matching_files.py
+++ b/tensorflow/python/data/experimental/ops/matching_files.py
@@ -29,12 +29,10 @@ class MatchingFilesDataset(dataset_ops.DatasetSource):
   """A `Dataset` that list the files according to the input patterns."""
 
   def __init__(self, patterns):
-    super(MatchingFilesDataset, self).__init__()
     self._patterns = ops.convert_to_tensor(
         patterns, dtype=dtypes.string, name="patterns")
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_matching_files_dataset(self._patterns)
+    variant_tensor = ged_ops.experimental_matching_files_dataset(self._patterns)
+    super(MatchingFilesDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/experimental/ops/optimization.py b/tensorflow/python/data/experimental/ops/optimization.py
index c6c7de9265..22a36646ea 100644
--- a/tensorflow/python/data/experimental/ops/optimization.py
+++ b/tensorflow/python/data/experimental/ops/optimization.py
@@ -105,18 +105,17 @@ class _AssertNextDataset(dataset_ops.UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, transformations):
     """See `assert_next()` for details."""
-    super(_AssertNextDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if transformations is None:
       raise ValueError("At least one transformation should be specified")
     self._transformations = ops.convert_to_tensor(
         transformations, dtype=dtypes.string, name="transformations")
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_assert_next_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._transformations,
-        **dataset_ops.flat_structure(self))
+    variant_tensor = (
+        gen_experimental_dataset_ops.experimental_assert_next_dataset(
+            self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+            self._transformations,
+            **dataset_ops.flat_structure(self)))
+    super(_AssertNextDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class _NonSerializableDataset(dataset_ops.UnaryUnchangedStructureDataset):
@@ -124,10 +123,9 @@ class _NonSerializableDataset(dataset_ops.UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset):
     """See `non_serializable()` for details."""
-    super(_NonSerializableDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_non_serializable_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        **dataset_ops.flat_structure(self))
+    variant_tensor = (
+        gen_experimental_dataset_ops.experimental_non_serializable_dataset(
+            self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+            **dataset_ops.flat_structure(self)))
+    super(_NonSerializableDataset, self).__init__(input_dataset, variant_tensor)
diff --git a/tensorflow/python/data/experimental/ops/parsing_ops.py b/tensorflow/python/data/experimental/ops/parsing_ops.py
index deb20d6188..a5ca96e89b 100644
--- a/tensorflow/python/data/experimental/ops/parsing_ops.py
+++ b/tensorflow/python/data/experimental/ops/parsing_ops.py
@@ -31,7 +31,6 @@ class _ParseExampleDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that parses `example` dataset into a `dict` dataset."""
 
   def __init__(self, input_dataset, features, num_parallel_calls):
-    super(_ParseExampleDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if not input_dataset._element_structure.is_compatible_with(  # pylint: disable=protected-access
         structure.TensorStructure(dtypes.string, [None])):
@@ -81,16 +80,17 @@ class _ParseExampleDataset(dataset_ops.UnaryDataset):
     self._structure = structure.convert_legacy_structure(
         output_types, output_shapes, output_classes)
 
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_parse_example_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
-        self._num_parallel_calls,
-        self._dense_defaults,
-        self._sparse_keys,
-        self._dense_keys,
-        self._sparse_types,
-        self._dense_shapes,
-        **dataset_ops.flat_structure(self))
+    variant_tensor = (
+        gen_experimental_dataset_ops.experimental_parse_example_dataset(
+            self._input_dataset._variant_tensor,  # pylint: disable=protected-access
+            self._num_parallel_calls,
+            self._dense_defaults,
+            self._sparse_keys,
+            self._dense_keys,
+            self._sparse_types,
+            self._dense_shapes,
+            **dataset_ops.flat_structure(self)))
+    super(_ParseExampleDataset, self).__init__(input_dataset, variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/experimental/ops/prefetching_ops.py b/tensorflow/python/data/experimental/ops/prefetching_ops.py
index e3a8622393..ef9db2f2d0 100644
--- a/tensorflow/python/data/experimental/ops/prefetching_ops.py
+++ b/tensorflow/python/data/experimental/ops/prefetching_ops.py
@@ -93,7 +93,6 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
       target_device: The name of the device to which elements would be copied.
       source_device: Device where input_dataset would be placed.
     """
-    super(_CopyToDeviceDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._target_device = target_device
     spec = framework_device.DeviceSpec().from_string(self._target_device)
@@ -101,6 +100,9 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
     self._source_device_string = source_device
     self._source_device = ops.convert_to_tensor(source_device)
 
+    wrap_ds_variant = gen_dataset_ops.wrap_dataset_variant(
+        self._input_dataset._variant_tensor)  # pylint: disable=protected-access
+
     @function.defun()
     def _init_func():
       """Creates an iterator for the input dataset.
@@ -108,8 +110,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
       Returns:
         A `string` tensor that encapsulates the iterator created.
       """
-      # pylint: disable=protected-access
-      ds_variant = self._input_dataset._as_variant_tensor()
+      ds_variant = gen_dataset_ops.unwrap_dataset_variant(wrap_ds_variant)
       resource = gen_dataset_ops.anonymous_iterator(
           **dataset_ops.flat_structure(self._input_dataset))
       with ops.control_dependencies(
@@ -195,6 +196,17 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
     self._finalize_func.add_to_graph(g)
     # pylint: enable=protected-scope
 
+    with ops.device(self._target_device):
+      variant_tensor = gen_dataset_ops.generator_dataset(
+          self._init_captured_args,
+          self._next_captured_args,
+          self._finalize_captured_args,
+          init_func=self._init_func,
+          next_func=self._next_func,
+          finalize_func=self._finalize_func,
+          **dataset_ops.flat_structure(self._input_dataset))
+    super(_CopyToDeviceDataset, self).__init__(input_dataset, variant_tensor)
+
   # The one_shot_iterator implementation needs a 0 arg _make_dataset function
   # that thereby captures all the inputs required to create the dataset. Since
   # there are strings that are inputs to the GeneratorDataset which can't be
@@ -208,24 +220,12 @@ class _CopyToDeviceDataset(dataset_ops.UnaryUnchangedStructureDataset):
     else:
       return super(_CopyToDeviceDataset, self).make_one_shot_iterator()
 
-  def _as_variant_tensor(self):
-    with ops.device(self._target_device):
-      return gen_dataset_ops.generator_dataset(
-          self._init_captured_args,
-          self._next_captured_args,
-          self._finalize_captured_args,
-          init_func=self._init_func,
-          next_func=self._next_func,
-          finalize_func=self._finalize_func,
-          **dataset_ops.flat_structure(self._input_dataset))
-
 
 class _MapOnGpuDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that maps a function over elements in its using a GPU."""
 
   def __init__(self, input_dataset, map_func, use_inter_op_parallelism=True):
     """See `Dataset.map()` for details."""
-    super(_MapOnGpuDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._use_inter_op_parallelism = use_inter_op_parallelism
 
@@ -234,18 +234,16 @@ class _MapOnGpuDataset(dataset_ops.UnaryDataset):
         self._transformation_name(),
         dataset=input_dataset,
         defun_kwargs={"experimental_ints_on_device": True})
-
-  def _functions(self):
-    return [self._map_func]
-
-  def _as_variant_tensor(self):
-    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-    return ged_ops.experimental_map_dataset(
-        input_t,
+    variant_tensor = ged_ops.experimental_map_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._map_func.function.captured_inputs,
         f=self._map_func.function,
         use_inter_op_parallelism=self._use_inter_op_parallelism,
         **dataset_ops.flat_structure(self))
+    super(_MapOnGpuDataset, self).__init__(input_dataset, variant_tensor)
+
+  def _functions(self):
+    return [self._map_func]
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/experimental/ops/random_ops.py b/tensorflow/python/data/experimental/ops/random_ops.py
index cbdf367db6..f96e4a84b4 100644
--- a/tensorflow/python/data/experimental/ops/random_ops.py
+++ b/tensorflow/python/data/experimental/ops/random_ops.py
@@ -33,14 +33,10 @@ class RandomDatasetV2(dataset_ops.DatasetSource):
 
   def __init__(self, seed=None):
     """A `Dataset` of pseudorandom values."""
-    super(RandomDatasetV2, self).__init__()
     self._seed, self._seed2 = random_seed.get_seed(seed)
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_random_dataset(
-        seed=self._seed,
-        seed2=self._seed2,
-        **dataset_ops.flat_structure(self))
+    variant_tensor = gen_experimental_dataset_ops.experimental_random_dataset(
+        seed=self._seed, seed2=self._seed2, **dataset_ops.flat_structure(self))
+    super(RandomDatasetV2, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/experimental/ops/readers.py b/tensorflow/python/data/experimental/ops/readers.py
index c2d82aeb59..177886e64b 100644
--- a/tensorflow/python/data/experimental/ops/readers.py
+++ b/tensorflow/python/data/experimental/ops/readers.py
@@ -622,7 +622,6 @@ class CsvDatasetV2(dataset_ops.DatasetSource):
         the input data. If specified, only this subset of columns will be
         parsed. Defaults to parsing all columns.
     """
-    super(CsvDatasetV2, self).__init__()
     self._filenames = ops.convert_to_tensor(
         filenames, dtype=dtypes.string, name="filenames")
     self._compression_type = convert.optional_param_to_tensor(
@@ -655,10 +654,7 @@ class CsvDatasetV2(dataset_ops.DatasetSource):
     self._structure = structure.NestedStructure(
         tuple(structure.TensorStructure(d.dtype, [])
               for d in self._record_defaults))
-
-  def _as_variant_tensor(self):
-    # Constructs graph node for the dataset op.
-    return gen_experimental_dataset_ops.experimental_csv_dataset(
+    variant_tensor = gen_experimental_dataset_ops.experimental_csv_dataset(
         filenames=self._filenames,
         record_defaults=self._record_defaults,
         buffer_size=self._buffer_size,
@@ -668,8 +664,8 @@ class CsvDatasetV2(dataset_ops.DatasetSource):
         use_quote_delim=self._use_quote_delim,
         na_value=self._na_value,
         select_cols=self._select_cols,
-        compression_type=self._compression_type,
-    )
+        compression_type=self._compression_type)
+    super(CsvDatasetV2, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
@@ -944,7 +940,6 @@ class SqlDatasetV2(dataset_ops.DatasetSource):
       output_types: A tuple of `tf.DType` objects representing the types of the
         columns returned by `query`.
     """
-    super(SqlDatasetV2, self).__init__()
     self._driver_name = ops.convert_to_tensor(
         driver_name, dtype=dtypes.string, name="driver_name")
     self._data_source_name = ops.convert_to_tensor(
@@ -954,11 +949,10 @@ class SqlDatasetV2(dataset_ops.DatasetSource):
     self._structure = structure.NestedStructure(
         nest.map_structure(
             lambda dtype: structure.TensorStructure(dtype, []), output_types))
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_sql_dataset(
+    variant_tensor = gen_experimental_dataset_ops.experimental_sql_dataset(
         self._driver_name, self._data_source_name, self._query,
         nest.flatten(self.output_types), nest.flatten(self.output_shapes))
+    super(SqlDatasetV2, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/experimental/ops/scan_ops.py b/tensorflow/python/data/experimental/ops/scan_ops.py
index 5c77ad7343..7662626c3a 100644
--- a/tensorflow/python/data/experimental/ops/scan_ops.py
+++ b/tensorflow/python/data/experimental/ops/scan_ops.py
@@ -33,7 +33,6 @@ class _ScanDataset(dataset_ops.UnaryDataset):
 
   def __init__(self, input_dataset, initial_state, scan_func):
     """See `scan()` for details."""
-    super(_ScanDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
 
     with ops.name_scope("initial_state"):
@@ -126,20 +125,18 @@ class _ScanDataset(dataset_ops.UnaryDataset):
 
     self._scan_func = wrapped_func
     self._scan_func.function.add_to_graph(ops.get_default_graph())
-
-  def _functions(self):
-    return [self._scan_func]
-
-  def _as_variant_tensor(self):
     # pylint: disable=protected-access
-    input_t = self._input_dataset._as_variant_tensor()
-    return gen_experimental_dataset_ops.experimental_scan_dataset(
-        input_t,
+    variant_tensor = gen_experimental_dataset_ops.experimental_scan_dataset(
+        self._input_dataset._variant_tensor,
         self._state_structure._to_tensor_list(self._initial_state),
         self._scan_func.function.captured_inputs,
         f=self._scan_func.function,
         preserve_cardinality=True,
         **dataset_ops.flat_structure(self))
+    super(_ScanDataset, self).__init__(input_dataset, variant_tensor)
+
+  def _functions(self):
+    return [self._scan_func]
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/experimental/ops/shuffle_ops.py b/tensorflow/python/data/experimental/ops/shuffle_ops.py
index d12328a714..86a615d524 100644
--- a/tensorflow/python/data/experimental/ops/shuffle_ops.py
+++ b/tensorflow/python/data/experimental/ops/shuffle_ops.py
@@ -30,7 +30,6 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryUnchangedStructureDataset):
   """A `Dataset` that fuses `shuffle` and `repeat`."""
 
   def __init__(self, input_dataset, buffer_size, count=None, seed=None):
-    super(_ShuffleAndRepeatDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
@@ -40,18 +39,15 @@ class _ShuffleAndRepeatDataset(dataset_ops.UnaryUnchangedStructureDataset):
       self._count = ops.convert_to_tensor(
           count, dtype=dtypes.int64, name="count")
     self._seed, self._seed2 = random_seed.get_seed(seed)
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    input_resource = self._input_dataset._as_variant_tensor()
-    return gen_dataset_ops.shuffle_and_repeat_dataset(
-        input_resource,
+    variant_tensor = gen_dataset_ops.shuffle_and_repeat_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
         count=self._count,
         seed=self._seed,
         seed2=self._seed2,
         **dataset_ops.flat_structure(self))
-    # pylint: enable=protected-access
+    super(_ShuffleAndRepeatDataset, self).__init__(input_dataset,
+                                                   variant_tensor)
 
 
 @tf_export("data.experimental.shuffle_and_repeat")
diff --git a/tensorflow/python/data/experimental/ops/sleep.py b/tensorflow/python/data/experimental/ops/sleep.py
index 2da832395b..b66edc7a19 100644
--- a/tensorflow/python/data/experimental/ops/sleep.py
+++ b/tensorflow/python/data/experimental/ops/sleep.py
@@ -25,15 +25,13 @@ class _SleepDataset(dataset_ops.UnaryUnchangedStructureDataset):
   """A `Dataset` that sleeps before producing each upstream element."""
 
   def __init__(self, input_dataset, sleep_microseconds):
-    super(_SleepDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._sleep_microseconds = sleep_microseconds
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_sleep_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_experimental_dataset_ops.experimental_sleep_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._sleep_microseconds,
         **dataset_ops.flat_structure(self))
+    super(_SleepDataset, self).__init__(input_dataset, variant_tensor)
 
 
 def sleep(sleep_microseconds):
diff --git a/tensorflow/python/data/experimental/ops/stats_ops.py b/tensorflow/python/data/experimental/ops/stats_ops.py
index 15a9d24546..13dcb92fa0 100644
--- a/tensorflow/python/data/experimental/ops/stats_ops.py
+++ b/tensorflow/python/data/experimental/ops/stats_ops.py
@@ -102,13 +102,11 @@ class _StatsDataset(dataset_ops.UnaryUnchangedStructureDataset):
   """A `Dataset` that acts as an identity, and also records statistics."""
 
   def __init__(self, input_dataset, op_function, tag):
-    super(_StatsDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._op_function = op_function
     self._tag = ops.convert_to_tensor(tag, dtype=dtypes.string)
-
-  def _as_variant_tensor(self):
-    return self._op_function(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = self._op_function(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._tag,
         **dataset_ops.flat_structure(self))
+    super(_StatsDataset, self).__init__(input_dataset, variant_tensor)
diff --git a/tensorflow/python/data/experimental/ops/threadpool.py b/tensorflow/python/data/experimental/ops/threadpool.py
index 69e8829d68..bc2c726822 100644
--- a/tensorflow/python/data/experimental/ops/threadpool.py
+++ b/tensorflow/python/data/experimental/ops/threadpool.py
@@ -64,15 +64,13 @@ class _ThreadPoolDataset(dataset_ops.UnaryUnchangedStructureDataset):
   """A `Dataset` that acts as an identity, and sets a custom threadpool."""
 
   def __init__(self, input_dataset, thread_pool):
-    super(_ThreadPoolDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._thread_pool = thread_pool
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_thread_pool_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = ged_ops.experimental_thread_pool_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._thread_pool._resource,  # pylint: disable=protected-access
         **dataset_ops.flat_structure(self))
+    super(_ThreadPoolDataset, self).__init__(input_dataset, variant_tensor)
 
 
 # TODO(b/73383364): Properly export in the `tf.data.experimental` API when
diff --git a/tensorflow/python/data/experimental/ops/unique.py b/tensorflow/python/data/experimental/ops/unique.py
index 55ed98d854..dd26cfa4ee 100644
--- a/tensorflow/python/data/experimental/ops/unique.py
+++ b/tensorflow/python/data/experimental/ops/unique.py
@@ -53,15 +53,13 @@ class _UniqueDataset(dataset_ops.UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset):
     """See `unique()` for details."""
-    super(_UniqueDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if input_dataset.output_types not in (dtypes.int32, dtypes.int64,
                                           dtypes.string):
       raise TypeError(
           "`tf.data.experimental.unique()` only supports inputs with a single "
           "`tf.int32`, `tf.int64`, or `tf.string` component.")
-
-  def _as_variant_tensor(self):
-    return gen_experimental_dataset_ops.experimental_unique_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_experimental_dataset_ops.experimental_unique_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         **dataset_ops.flat_structure(self))
+    super(_UniqueDataset, self).__init__(input_dataset, variant_tensor)
diff --git a/tensorflow/python/data/experimental/ops/writers.py b/tensorflow/python/data/experimental/ops/writers.py
index aef6da5140..49eae14652 100644
--- a/tensorflow/python/data/experimental/ops/writers.py
+++ b/tensorflow/python/data/experimental/ops/writers.py
@@ -57,4 +57,4 @@ class TFRecordWriter(object):
           "produces shape {0} and types {1}".format(dataset.output_shapes,
                                                     dataset.output_types))
     return gen_experimental_dataset_ops.experimental_dataset_to_tf_record(
-        dataset._as_variant_tensor(), self._filename, self._compression_type)  # pylint: disable=protected-access
+        dataset._variant_tensor, self._filename, self._compression_type)  # pylint: disable=protected-access
diff --git a/tensorflow/python/data/kernel_tests/batch_test.py b/tensorflow/python/data/kernel_tests/batch_test.py
index 5b035e5917..2551250346 100644
--- a/tensorflow/python/data/kernel_tests/batch_test.py
+++ b/tensorflow/python/data/kernel_tests/batch_test.py
@@ -91,9 +91,9 @@ class BatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       result = self.evaluate(get_next())
 
   def testBatchDatasetInvalidBatchSize(self):
-    dataset = (dataset_ops.Dataset.range(10).batch(0))
-    self.assertDatasetProduces(
-        dataset, expected_error=(errors.InvalidArgumentError, ''))
+    with self.assertRaises(errors.InvalidArgumentError):
+      dataset = (dataset_ops.Dataset.range(10).batch(0))
+      self.evaluate(dataset._variant_tensor)
 
   def testBatchSparse(self):
 
diff --git a/tensorflow/python/data/kernel_tests/cache_test.py b/tensorflow/python/data/kernel_tests/cache_test.py
index b561cd58ba..4806101d8c 100644
--- a/tensorflow/python/data/kernel_tests/cache_test.py
+++ b/tensorflow/python/data/kernel_tests/cache_test.py
@@ -139,8 +139,8 @@ class FileCacheTest(test_base.DatasetTestBase):
       self.evaluate(get_next1())
 
     # Re-initialize
-    get_next1 = self.getNext(cache_dataset1)
-    get_next2 = self.getNext(cache_dataset2)
+    get_next1 = self.getNext(cache_dataset1, requires_initialization=True)
+    get_next2 = self.getNext(cache_dataset2, requires_initialization=True)
 
     # Reading concurrently should succeed.
     elements_itr1 = []
diff --git a/tensorflow/python/data/kernel_tests/dataset_test.py b/tensorflow/python/data/kernel_tests/dataset_test.py
index 3926be9550..8193dffc7d 100644
--- a/tensorflow/python/data/kernel_tests/dataset_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_test.py
@@ -272,12 +272,8 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testSkipEagerSameGraphErrorOneShot(self):
     dataset = dataset_ops.Dataset.range(10)
     with ops.Graph().as_default():
-      dataset = dataset.batch(2)
-      with test.mock.patch.object(logging, "warning") as mock_log:
-        _ = dataset.make_one_shot_iterator()
-        self.assertRegexpMatches(
-            str(mock_log.call_args), "Please ensure that all datasets in the "
-            "pipeline are created in the same graph as the iterator.")
+      with self.assertRaisesRegexp(ValueError, "must be from the same graph"):
+        dataset = dataset.batch(2)
 
   @test_util.run_deprecated_v1
   def testSkipEagerSameGraphErrorOneShotSimple(self):
@@ -293,9 +289,8 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testSkipEagerSameGraphErrorInitializable(self):
     dataset = dataset_ops.Dataset.range(10)
     with ops.Graph().as_default():
-      dataset = dataset.batch(2)
       with self.assertRaisesRegexp(ValueError, "must be from the same graph"):
-        _ = dataset.make_initializable_iterator()
+        dataset = dataset.batch(2)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/kernel_tests/prefetch_test.py b/tensorflow/python/data/kernel_tests/prefetch_test.py
index a143ba0ac6..8d076f6e68 100644
--- a/tensorflow/python/data/kernel_tests/prefetch_test.py
+++ b/tensorflow/python/data/kernel_tests/prefetch_test.py
@@ -36,9 +36,10 @@ class PrefetchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @parameterized.parameters((-2), (-42))
   def testInvalidBufferSize(self, buffer_size):
-    dataset = dataset_ops.Dataset.range(10).prefetch(buffer_size=buffer_size)
-    self.assertDatasetProduces(
-        dataset, expected_error=(errors.InvalidArgumentError, "buffer_size"))
+    with self.assertRaises(errors.InvalidArgumentError):
+      dataset = dataset_ops.Dataset.range(10).prefetch(buffer_size=buffer_size)
+      self.evaluate(dataset._variant_tensor)
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/kernel_tests/range_test.py b/tensorflow/python/data/kernel_tests/range_test.py
index 3f5d25e7f3..b7ac60c3ff 100644
--- a/tensorflow/python/data/kernel_tests/range_test.py
+++ b/tensorflow/python/data/kernel_tests/range_test.py
@@ -43,9 +43,9 @@ class RangeTest(test_base.DatasetTestBase):
 
   def testZeroStep(self):
     start, stop, step = 2, 10, 0
-    dataset = dataset_ops.Dataset.range(start, stop, step)
-    self.assertDatasetProduces(
-        dataset, expected_error=(errors.InvalidArgumentError, ""))
+    with self.assertRaises(errors.InvalidArgumentError):
+      dataset = dataset_ops.Dataset.range(start, stop, step)
+      self.evaluate(dataset._variant_tensor)
 
   def testNegativeStep(self):
     start, stop, step = 2, 10, -1
diff --git a/tensorflow/python/data/kernel_tests/window_test.py b/tensorflow/python/data/kernel_tests/window_test.py
index d083142ab6..a7b4d86fcf 100644
--- a/tensorflow/python/data/kernel_tests/window_test.py
+++ b/tensorflow/python/data/kernel_tests/window_test.py
@@ -116,12 +116,11 @@ class WindowTest(test_base.DatasetTestBase, parameterized.TestCase):
       ("3", 14, 3, 3, 0),
   )
   def testWindowDatasetInvalid(self, count, size, shift, stride):
-    dataset = dataset_ops.Dataset.range(10).map(lambda x: x).repeat(
-        count).window(
-            size=size, shift=shift,
-            stride=stride).flat_map(lambda x: x.batch(batch_size=size))
-    self.assertDatasetProduces(
-        dataset, expected_error=(errors.InvalidArgumentError, ""))
+    with self.assertRaises(errors.InvalidArgumentError):
+      ds = dataset_ops.Dataset.range(10).map(lambda x: x).repeat(count).window(
+          size=size, shift=shift,
+          stride=stride).flat_map(lambda x: x.batch(batch_size=size))
+      self.evaluate(ds._variant_tensor)
 
   def testWindowSparse(self):
 
diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD
index fbff7df9c3..112aa926ae 100644
--- a/tensorflow/python/data/ops/BUILD
+++ b/tensorflow/python/data/ops/BUILD
@@ -35,6 +35,7 @@ py_library(
         "//tensorflow/python/data/util:random_seed",
         "//tensorflow/python/data/util:sparse",
         "//tensorflow/python/data/util:structure",
+        "//tensorflow/python/data/util:traverse",
         "//third_party/py/numpy",
     ],
 )
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index 2c1f69de60..7fa9ea59e8 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -38,6 +38,7 @@ from tensorflow.python.data.util import options as options_lib
 from tensorflow.python.data.util import random_seed
 from tensorflow.python.data.util import sparse
 from tensorflow.python.data.util import structure as structure_lib
+from tensorflow.python.data.util import traverse
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -75,9 +76,27 @@ class DatasetV2(object):
   plan" of transformations that act on those elements.
   """
 
-  def __init__(self):
+  def __init__(self, variant_tensor):
+    """Creates a DatasetV2 object.
+
+    This is a difference between DatasetV1 and DatasetV2. DatasetV1 does not
+    take anything in its constructor whereas in the DatasetV2, we expect
+    subclasses to create a variant_tensor and pass it in to the super() call.
+
+    Args:
+      variant_tensor: A DT_VARIANT tensor that represents the dataset.
+    """
+    self._dataset_variant_tensor = variant_tensor
     self._graph_attr = ops.get_default_graph()
 
+  @property
+  def _variant_tensor(self):
+    return self._dataset_variant_tensor
+
+  @_variant_tensor.setter
+  def _variant_tensor(self, _):
+    raise ValueError("The _variant_tensor property is read-only")
+
   def _as_serialized_graph(self):
     """Produces serialized graph representation of the dataset.
 
@@ -85,16 +104,7 @@ class DatasetV2(object):
       A scalar `tf.Tensor` of `tf.string` type, representing this dataset as a
       serialized graph.
     """
-    return gen_dataset_ops.dataset_to_graph(self._as_variant_tensor())
-
-  @abc.abstractmethod
-  def _as_variant_tensor(self):
-    """Creates a scalar `tf.Tensor` of `tf.variant` representing this dataset.
-
-    Returns:
-      A scalar `tf.Tensor` of `tf.variant` type, which represents this dataset.
-    """
-    raise NotImplementedError("Dataset._as_variant_tensor")
+    return gen_dataset_ops.dataset_to_graph(self._variant_tensor)
 
   @abc.abstractmethod
   def _inputs(self):
@@ -1279,7 +1289,7 @@ class DatasetV2(object):
     # pylint: disable=protected-access
     return state_structure._from_compatible_tensor_list(
         gen_dataset_ops.reduce_dataset(
-            self._as_variant_tensor(),
+            self._variant_tensor,
             state_structure._to_tensor_list(initial_state),
             reduce_func.captured_inputs,
             f=reduce_func,
@@ -1314,8 +1324,31 @@ class DatasetV1(DatasetV2):
   plan" of transformations that act on those elements.
   """
 
-  def __init__(self):  # pylint: disable=useless-super-delegation
-    super(DatasetV1, self).__init__()
+  def __init__(self):
+    try:
+      variant_tensor = self._as_variant_tensor()
+    except AttributeError as e:
+      if "_as_variant_tensor" in str(e):
+        raise AttributeError("Please use _variant_tensor instead of "
+                             "_as_variant_tensor() to obtain the variant "
+                             "associated with a dataset")
+      raise AttributeError("A likely cause of this error is that the super "
+                           "call for this dataset is not the last line of the "
+                           "__init__ method. The base class causes the "
+                           "_as_variant_tensor call in its constructor and "
+                           "if that uses attributes defined in the __init__ "
+                           "method, those attrs need to be defined before the "
+                           "super call.")
+    super(DatasetV1, self).__init__(variant_tensor)
+
+  @abc.abstractmethod
+  def _as_variant_tensor(self):
+    """Creates a scalar `tf.Tensor` of `tf.variant` representing this dataset.
+
+    Returns:
+      A scalar `tf.Tensor` of `tf.variant` type, which represents this dataset.
+    """
+    raise NotImplementedError("Dataset._as_variant_tensor")
 
   @deprecation.deprecated(
       None, "Use `for ... in dataset:` to iterate over a dataset. If using "
@@ -1335,11 +1368,19 @@ class DatasetV1(DatasetV2):
       return iterator_ops.EagerIterator(self)
 
     _ensure_same_dataset_graph(self)
+    # Now that we create datasets at python object creation time, the capture
+    # by value _make_dataset() function would try to capture these variant
+    # tensor dataset inputs, which are marked as stateful ops and would throw
+    # an error if we try and capture them. We therefore traverse the graph
+    # to find all these ops and whitelist them so that the capturing
+    # logic instead of throwing an error recreates these ops which is what was
+    # happening before.
+    all_ds_ops = traverse.obtain_all_variant_tensor_ops(self)
     graph_level_seed, op_level_seed = core_random_seed.get_seed(None)
 
     # NOTE(mrry): We capture by value here to ensure that `_make_dataset()` is
     # a 0-argument function.
-    @function.Defun(capture_by_value=True)
+    @function.Defun(capture_by_value=True, whitelisted_stateful_ops=all_ds_ops)
     def _make_dataset():
       """Factory function for a dataset."""
       # NOTE(mrry): `Defun` does not capture the graph-level seed from the
@@ -1351,7 +1392,7 @@ class DatasetV1(DatasetV2):
             (graph_level_seed + 87654321 * op_level_seed) % (2 ** 63 - 1))
 
       dataset = self._apply_options()
-      return dataset._as_variant_tensor()  # pylint: disable=protected-access
+      return dataset._variant_tensor  # pylint: disable=protected-access
 
     try:
       _make_dataset.add_to_graph(ops.get_default_graph())
@@ -1416,7 +1457,7 @@ class DatasetV1(DatasetV2):
           container="", shared_name=shared_name, **flat_structure(self))
     with ops.colocate_with(iterator_resource):
       initializer = gen_dataset_ops.make_iterator(
-          dataset._as_variant_tensor(),  # pylint: disable=protected-access
+          dataset._variant_tensor,  # pylint: disable=protected-access
           iterator_resource)
     return iterator_ops.Iterator(iterator_resource, initializer,
                                  dataset.output_types, dataset.output_shapes,
@@ -1621,11 +1662,11 @@ class DatasetV1Adapter(DatasetV1):
   """Wraps a V2 `Dataset` object in the `tf.compat.v1.data.Dataset` API."""
 
   def __init__(self, dataset):
-    super(DatasetV1Adapter, self).__init__()
     self._dataset = dataset
+    super(DatasetV1Adapter, self).__init__()
 
   def _as_variant_tensor(self):
-    return self._dataset._as_variant_tensor()  # pylint: disable=protected-access
+    return self._dataset._variant_tensor  # pylint: disable=protected-access
 
   def _has_captured_ref(self):
     return self._dataset._has_captured_ref()  # pylint: disable=protected-access
@@ -1657,14 +1698,14 @@ def _ensure_same_dataset_graph(dataset):
     if current_graph != ds_graph:
       logging.warning("The graph (" + str(current_graph) + ") of the iterator "
                       "is different from the graph (" + str(ds_graph) + ") "
-                      "the dataset: " + str(ds) + " was created in. "
-                      "If you are using the Estimator API, make sure that no "
-                      "part of the dataset returned by the `input_fn` function "
-                      "is defined outside the `input_fn` function."
-                      "Please ensure that all datasets in the pipeline are "
-                      "created in the same graph as the iterator. NOTE: This "
-                      "warning will become an error in future versions of "
-                      "TensorFlow.")
+                      "the dataset: " + str(ds._variant_tensor) + " was "  # pylint: disable=protected-access
+                      "created in. If you are using the Estimator API, "
+                      "make sure that no part of the dataset returned by the "
+                      "`input_fn` function is defined outside the `input_fn` "
+                      "function. Please ensure that all datasets in the "
+                      "pipeline are created in the same graph as the iterator. "
+                      "NOTE: This warning will become an error in future "
+                      "versions of TensorFlow.")
     for input_ds in ds._inputs():  # pylint: disable=protected-access
       if input_ds not in visited:
         bfs_q.put(input_ds)
@@ -1820,9 +1861,9 @@ class DatasetSource(DatasetV2):
 class UnaryDataset(DatasetV2):
   """Abstract class representing a dataset with one input."""
 
-  def __init__(self, input_dataset):
-    super(UnaryDataset, self).__init__()
+  def __init__(self, input_dataset, variant_tensor):
     self._input_dataset = input_dataset
+    super(UnaryDataset, self).__init__(variant_tensor)
 
   def _inputs(self):
     return [self._input_dataset]
@@ -1831,6 +1872,11 @@ class UnaryDataset(DatasetV2):
 class UnaryUnchangedStructureDataset(UnaryDataset):
   """Represents a unary dataset with the same input and output structure."""
 
+  def __init__(self, input_dataset, variant_tensor):
+    self._input_dataset = input_dataset
+    super(UnaryUnchangedStructureDataset, self).__init__(
+        input_dataset, variant_tensor)
+
   @property
   def _element_structure(self):
     return self._input_dataset._element_structure  # pylint: disable=protected-access
@@ -1841,7 +1887,6 @@ class TensorDataset(DatasetSource):
 
   def __init__(self, tensors):
     """See `Dataset.from_tensors()` for details."""
-    super(TensorDataset, self).__init__()
     with ops.name_scope("tensors"):
       tensors = nest.pack_sequence_as(tensors, [
           sparse_tensor_lib.SparseTensor.from_value(t)
@@ -1852,9 +1897,9 @@ class TensorDataset(DatasetSource):
     self._structure = structure_lib.Structure.from_value(tensors)
     self._tensors = self._structure._to_tensor_list(tensors)  # pylint: disable=protected-access
 
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.tensor_dataset(
+    variant_tensor = gen_dataset_ops.tensor_dataset(
         self._tensors, output_shapes=self._structure._flat_shapes)  # pylint: disable=protected-access
+    super(TensorDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
@@ -1866,7 +1911,6 @@ class TensorSliceDataset(DatasetSource):
 
   def __init__(self, tensors):
     """See `Dataset.from_tensor_slices()` for details."""
-    super(TensorSliceDataset, self).__init__()
     with ops.name_scope("tensors"):
       tensors = nest.pack_sequence_as(tensors, [
           sparse_tensor_lib.SparseTensor.from_value(t)
@@ -1887,9 +1931,9 @@ class TensorSliceDataset(DatasetSource):
       batch_dim.assert_is_compatible_with(tensor_shape.Dimension(
           tensor_shape.dimension_value(t.get_shape()[0])))
 
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.tensor_slice_dataset(
+    variant_tensor = gen_dataset_ops.tensor_slice_dataset(
         self._tensors, output_shapes=self._structure._flat_shapes)  # pylint: disable=protected-access
+    super(TensorSliceDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
@@ -1901,7 +1945,6 @@ class SparseTensorSliceDataset(DatasetSource):
 
   def __init__(self, sparse_tensor):
     """See `Dataset.from_sparse_tensor_slices()` for details."""
-    super(SparseTensorSliceDataset, self).__init__()
     if not isinstance(sparse_tensor, sparse_tensor_lib.SparseTensor):
       raise TypeError("`sparse_tensor` must be a `tf.SparseTensor` object.")
     self._sparse_tensor = sparse_tensor
@@ -1914,10 +1957,10 @@ class SparseTensorSliceDataset(DatasetSource):
          structure_lib.TensorStructure(self._sparse_tensor.dtype, [None]),
          structure_lib.TensorStructure(dtypes.int64, [rank])))
 
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.sparse_tensor_slice_dataset(
+    variant_tensor = gen_dataset_ops.sparse_tensor_slice_dataset(
         self._sparse_tensor.indices, self._sparse_tensor.values,
         self._sparse_tensor.dense_shape)
+    super(SparseTensorSliceDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
@@ -1928,12 +1971,8 @@ class _VariantDataset(DatasetV2):
   """A Dataset wrapper around a `tf.variant`-typed function argument."""
 
   def __init__(self, dataset_variant, structure):
-    super(_VariantDataset, self).__init__()
-    self._dataset_variant = dataset_variant
     self._structure = structure
-
-  def _as_variant_tensor(self):
-    return self._dataset_variant
+    super(_VariantDataset, self).__init__(dataset_variant)
 
   def _inputs(self):
     return []
@@ -1965,7 +2004,7 @@ class DatasetStructure(structure_lib.Structure):
                 other._element_structure))
 
   def _to_tensor_list(self, value):
-    return [value._as_variant_tensor()]  # pylint: disable=protected-access
+    return [value._variant_tensor]  # pylint: disable=protected-access
 
   def _to_batched_tensor_list(self, value):
     raise NotImplementedError("Unbatching for `tf.data.Dataset` objects.")
@@ -2153,7 +2192,7 @@ def flat_structure(dataset):
   Most Dataset op constructors expect `output_shapes` and `output_types`
   arguments that represent the flattened structure of an element. This helper
   function generates these attrs as a keyword argument dictionary, allowing
-  `Dataset._as_variant_tensor()` implementations to pass
+  `Dataset._variant_tensor` implementations to pass
   `**flat_structure(self)` to the op constructor.
 
   Args:
@@ -2189,7 +2228,6 @@ class _GeneratorDataset(DatasetSource):
         `init_func` immediately before a C++ iterator over this dataset is
         destroyed. The return value is ignored.
     """
-    super(_GeneratorDataset, self).__init__()
     self._init_args = init_args
 
     self._init_structure = structure_lib.Structure.from_value(init_args)
@@ -2208,9 +2246,7 @@ class _GeneratorDataset(DatasetSource):
         finalize_func,
         self._transformation_name(),
         input_structure=self._init_func.output_structure)
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.generator_dataset(
+    variant_tensor = gen_dataset_ops.generator_dataset(
         self._init_structure._to_tensor_list(self._init_args)  # pylint: disable=protected-access
         + self._init_func.function.captured_inputs,
         self._next_func.function.captured_inputs,
@@ -2219,6 +2255,7 @@ class _GeneratorDataset(DatasetSource):
         next_func=self._next_func.function,
         finalize_func=self._finalize_func.function,
         **flat_structure(self))
+    super(_GeneratorDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
@@ -2233,7 +2270,6 @@ class ZipDataset(DatasetV2):
 
   def __init__(self, datasets):
     """See `Dataset.zip()` for details."""
-    super(ZipDataset, self).__init__()
     for ds in nest.flatten(datasets):
       if not isinstance(ds, DatasetV2):
         if isinstance(ds, list):
@@ -2250,12 +2286,12 @@ class ZipDataset(DatasetV2):
             self._datasets,
             [ds._element_structure for ds in nest.flatten(self._datasets)]))  # pylint: disable=protected-access
 
-  def _as_variant_tensor(self):
     # pylint: disable=protected-access
-    return gen_dataset_ops.zip_dataset(
-        [ds._as_variant_tensor() for ds in nest.flatten(self._datasets)],
+    variant_tensor = gen_dataset_ops.zip_dataset(
+        [ds._variant_tensor for ds in nest.flatten(self._datasets)],
         **flat_structure(self))
     # pylint: enable=protected-access
+    super(ZipDataset, self).__init__(variant_tensor)
 
   def _inputs(self):
     return nest.flatten(self._datasets)
@@ -2270,7 +2306,6 @@ class ConcatenateDataset(DatasetV2):
 
   def __init__(self, input_dataset, dataset_to_concatenate):
     """See `Dataset.concatenate()` for details."""
-    super(ConcatenateDataset, self).__init__()
     self._input_dataset = input_dataset
     self._dataset_to_concatenate = dataset_to_concatenate
 
@@ -2298,17 +2333,15 @@ class ConcatenateDataset(DatasetV2):
         output_types, output_shapes, output_classes)
 
     self._input_datasets = [input_dataset, dataset_to_concatenate]
-
-  def _as_variant_tensor(self):
     # pylint: disable=protected-access
-    return gen_dataset_ops.concatenate_dataset(
-        self._input_dataset._as_variant_tensor(),
-        self._dataset_to_concatenate._as_variant_tensor(),
+    variant_tensor = gen_dataset_ops.concatenate_dataset(
+        input_dataset._variant_tensor, dataset_to_concatenate._variant_tensor,
         **flat_structure(self))
     # pylint: enable=protected-access
+    super(ConcatenateDataset, self).__init__(variant_tensor)
 
   def _inputs(self):
-    return [self._input_dataset, self._dataset_to_concatenate]
+    return self._input_datasets
 
   @property
   def _element_structure(self):
@@ -2320,19 +2353,17 @@ class RepeatDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, count):
     """See `Dataset.repeat()` for details."""
-    super(RepeatDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if count is None:
       self._count = constant_op.constant(-1, dtype=dtypes.int64, name="count")
     else:
       self._count = ops.convert_to_tensor(
           count, dtype=dtypes.int64, name="count")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.repeat_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.repeat_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         count=self._count,
         **flat_structure(self))
+    super(RepeatDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class RangeDataset(DatasetSource):
@@ -2340,8 +2371,13 @@ class RangeDataset(DatasetSource):
 
   def __init__(self, *args):
     """See `Dataset.range()` for details."""
-    super(RangeDataset, self).__init__()
     self._parse_args(*args)
+    variant_tensor = gen_dataset_ops.range_dataset(
+        start=self._start,
+        stop=self._stop,
+        step=self._step,
+        **flat_structure(self))
+    super(RangeDataset, self).__init__(variant_tensor)
 
   def _parse_args(self, *args):
     """Parse arguments according to the same rules as the `range()` builtin."""
@@ -2363,13 +2399,6 @@ class RangeDataset(DatasetSource):
   def _build_tensor(self, int64_value, name):
     return ops.convert_to_tensor(int64_value, dtype=dtypes.int64, name=name)
 
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.range_dataset(
-        start=self._start,
-        stop=self._stop,
-        step=self._step,
-        **flat_structure(self))
-
   @property
   def _element_structure(self):
     return structure_lib.TensorStructure(dtypes.int64, [])
@@ -2380,16 +2409,14 @@ class CacheDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, filename):
     """See `Dataset.cache()` for details."""
-    super(CacheDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._filename = ops.convert_to_tensor(
         filename, dtype=dtypes.string, name="filename")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.cache_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.cache_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         filename=self._filename,
         **flat_structure(self))
+    super(CacheDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class ShuffleDataset(UnaryUnchangedStructureDataset):
@@ -2420,7 +2447,6 @@ class ShuffleDataset(UnaryUnchangedStructureDataset):
     Raises:
       ValueError: if invalid arguments are provided.
     """
-    super(ShuffleDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
@@ -2430,15 +2456,14 @@ class ShuffleDataset(UnaryUnchangedStructureDataset):
       self._reshuffle_each_iteration = True
     else:
       self._reshuffle_each_iteration = reshuffle_each_iteration
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.shuffle_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.shuffle_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
         seed=self._seed,
         seed2=self._seed2,
         reshuffle_each_iteration=self._reshuffle_each_iteration,
         **flat_structure(self))
+    super(ShuffleDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class TakeDataset(UnaryUnchangedStructureDataset):
@@ -2446,15 +2471,13 @@ class TakeDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, count):
     """See `Dataset.take()` for details."""
-    super(TakeDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._count = ops.convert_to_tensor(count, dtype=dtypes.int64, name="count")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.take_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.take_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         count=self._count,
         **flat_structure(self))
+    super(TakeDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class SkipDataset(UnaryUnchangedStructureDataset):
@@ -2462,15 +2485,13 @@ class SkipDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, count):
     """See `Dataset.skip()` for details."""
-    super(SkipDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._count = ops.convert_to_tensor(count, dtype=dtypes.int64, name="count")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.skip_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.skip_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         count=self._count,
         **flat_structure(self))
+    super(SkipDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class BatchDataset(UnaryDataset):
@@ -2478,7 +2499,6 @@ class BatchDataset(UnaryDataset):
 
   def __init__(self, input_dataset, batch_size, drop_remainder):
     """See `Dataset.batch()` for details."""
-    super(BatchDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._batch_size = ops.convert_to_tensor(
         batch_size, dtype=dtypes.int64, name="batch_size")
@@ -2494,13 +2514,12 @@ class BatchDataset(UnaryDataset):
           tensor_util.constant_value(self._batch_size))
     else:
       self._structure = input_dataset._element_structure._batch(None)
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.batch_dataset_v2(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.batch_dataset_v2(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         batch_size=self._batch_size,
         drop_remainder=self._drop_remainder,
         **flat_structure(self))
+    super(BatchDataset, self).__init__(input_dataset, variant_tensor)
 
   @property
   def _element_structure(self):
@@ -2622,7 +2641,7 @@ class PaddedBatchDataset(UnaryDataset):
   def __init__(self, input_dataset, batch_size, padded_shapes, padding_values,
                drop_remainder):
     """See `Dataset.batch()` for details."""
-    super(PaddedBatchDataset, self).__init__(input_dataset)
+    self._input_dataset = input_dataset
     if sparse.any_sparse(input_dataset.output_classes):
       # TODO(b/63669786): support batching of sparse tensors
       raise TypeError(
@@ -2665,12 +2684,11 @@ class PaddedBatchDataset(UnaryDataset):
         self._input_dataset.output_types, output_shapes,
         self._input_dataset.output_classes)
 
-  def _as_variant_tensor(self):
     # pylint: disable=protected-access
     # TODO(jsimsa): Switch to using v2 only any time after 6/30/2018.
     if smart_cond.smart_constant_value(self._drop_remainder) is False:
-      return gen_dataset_ops.padded_batch_dataset(
-          self._input_dataset._as_variant_tensor(),
+      variant_tensor = gen_dataset_ops.padded_batch_dataset(
+          input_dataset._variant_tensor,  # pylint: disable=protected-access
           batch_size=self._batch_size,
           padded_shapes=[
               ops.convert_to_tensor(s, dtype=dtypes.int64)
@@ -2679,8 +2697,8 @@ class PaddedBatchDataset(UnaryDataset):
           padding_values=nest.flatten(self._padding_values),
           output_shapes=self._structure._flat_shapes)
     else:
-      return gen_dataset_ops.padded_batch_dataset_v2(
-          self._input_dataset._as_variant_tensor(),
+      variant_tensor = gen_dataset_ops.padded_batch_dataset_v2(
+          input_dataset._variant_tensor,  # pylint: disable=protected-access
           batch_size=self._batch_size,
           padded_shapes=[
               ops.convert_to_tensor(s, dtype=dtypes.int64)
@@ -2689,6 +2707,7 @@ class PaddedBatchDataset(UnaryDataset):
           padding_values=nest.flatten(self._padding_values),
           drop_remainder=self._drop_remainder,
           output_shapes=self._structure._flat_shapes)
+    super(PaddedBatchDataset, self).__init__(input_dataset, variant_tensor)
 
   @property
   def _element_structure(self):
@@ -2727,22 +2746,19 @@ class MapDataset(UnaryDataset):
                use_inter_op_parallelism=True,
                preserve_cardinality=False):
     """See `Dataset.map()` for details."""
-    super(MapDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._use_inter_op_parallelism = use_inter_op_parallelism
     self._preserve_cardinality = preserve_cardinality
     self._map_func = StructuredFunctionWrapper(
         map_func, self._transformation_name(), dataset=input_dataset)
-
-  def _as_variant_tensor(self):
-    input_t = self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
-    return gen_dataset_ops.map_dataset(
-        input_t,
+    variant_tensor = gen_dataset_ops.map_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._map_func.function.captured_inputs,
         f=self._map_func.function,
         use_inter_op_parallelism=self._use_inter_op_parallelism,
         preserve_cardinality=self._preserve_cardinality,
         **flat_structure(self))
+    super(MapDataset, self).__init__(input_dataset, variant_tensor)
 
   def _functions(self):
     return [self._map_func]
@@ -2755,7 +2771,7 @@ class MapDataset(UnaryDataset):
     return "Dataset.map()"
 
 
-class ParallelMapDataset(MapDataset):
+class ParallelMapDataset(UnaryDataset):
   """A `Dataset` that maps a function over elements in its input in parallel."""
 
   def __init__(self,
@@ -2765,23 +2781,32 @@ class ParallelMapDataset(MapDataset):
                use_inter_op_parallelism=True,
                preserve_cardinality=False):
     """See `Dataset.map()` for details."""
-    super(ParallelMapDataset, self).__init__(
-        input_dataset, map_func, use_inter_op_parallelism, preserve_cardinality)
-
+    self._input_dataset = input_dataset
+    self._use_inter_op_parallelism = use_inter_op_parallelism
+    self._map_func = StructuredFunctionWrapper(
+        map_func, self._transformation_name(), dataset=input_dataset)
     self._num_parallel_calls = ops.convert_to_tensor(
         num_parallel_calls, dtype=dtypes.int32, name="num_parallel_calls")
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    input_t = self._input_dataset._as_variant_tensor()
-    return gen_dataset_ops.parallel_map_dataset(
-        input_t,
+    self._preserve_cardinality = preserve_cardinality
+    variant_tensor = gen_dataset_ops.parallel_map_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._map_func.function.captured_inputs,
         f=self._map_func.function,
         num_parallel_calls=self._num_parallel_calls,
         use_inter_op_parallelism=self._use_inter_op_parallelism,
         preserve_cardinality=self._preserve_cardinality,
         **flat_structure(self))
+    super(ParallelMapDataset, self).__init__(input_dataset, variant_tensor)
+
+  def _functions(self):
+    return [self._map_func]
+
+  @property
+  def _element_structure(self):
+    return self._map_func.output_structure
+
+  def _transformation_name(self):
+    return "Dataset.map()"
 
 
 class FlatMapDataset(UnaryDataset):
@@ -2789,24 +2814,21 @@ class FlatMapDataset(UnaryDataset):
 
   def __init__(self, input_dataset, map_func):
     """See `Dataset.flat_map()` for details."""
-    super(FlatMapDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
-
     self._map_func = StructuredFunctionWrapper(
         map_func, self._transformation_name(), dataset=input_dataset)
     if not isinstance(self._map_func.output_structure, DatasetStructure):
       raise TypeError("`map_func` must return a `Dataset` object.")
     self._structure = self._map_func.output_structure._element_structure  # pylint: disable=protected-access
-
-  def _functions(self):
-    return [self._map_func]
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.flat_map_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.flat_map_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._map_func.function.captured_inputs,
         f=self._map_func.function,
         **flat_structure(self))
+    super(FlatMapDataset, self).__init__(input_dataset, variant_tensor)
+
+  def _functions(self):
+    return [self._map_func]
 
   @property
   def _element_structure(self):
@@ -2816,58 +2838,79 @@ class FlatMapDataset(UnaryDataset):
     return "Dataset.flat_map()"
 
 
-class InterleaveDataset(FlatMapDataset):
+class InterleaveDataset(UnaryDataset):
   """A `Dataset` that maps a function over its input and interleaves the result.
   """
 
   def __init__(self, input_dataset, map_func, cycle_length, block_length):
     """See `Dataset.interleave()` for details."""
-    super(InterleaveDataset, self).__init__(input_dataset, map_func)
+    self._input_dataset = input_dataset
+    self._map_func = StructuredFunctionWrapper(
+        map_func, self._transformation_name(), dataset=input_dataset)
+    if not isinstance(self._map_func.output_structure, DatasetStructure):
+      raise TypeError("`map_func` must return a `Dataset` object.")
+    self._structure = self._map_func.output_structure._element_structure  # pylint: disable=protected-access
     self._cycle_length = ops.convert_to_tensor(
         cycle_length, dtype=dtypes.int64, name="cycle_length")
     self._block_length = ops.convert_to_tensor(
         block_length, dtype=dtypes.int64, name="block_length")
 
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return gen_dataset_ops.interleave_dataset(
-        self._input_dataset._as_variant_tensor(),
-        self._map_func.function.captured_inputs,
+    variant_tensor = gen_dataset_ops.interleave_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
+        self._map_func.function.captured_inputs,  # pylint: disable=protected-access
         self._cycle_length,
         self._block_length,
         f=self._map_func.function,
         **flat_structure(self))
+    super(InterleaveDataset, self).__init__(input_dataset, variant_tensor)
+
+  def _functions(self):
+    return [self._map_func]
+
+  @property
+  def _element_structure(self):
+    return self._structure
 
   def _transformation_name(self):
     return "Dataset.interleave()"
 
 
-class ParallelInterleaveDataset(FlatMapDataset):
+class ParallelInterleaveDataset(UnaryDataset):
   """A `Dataset` that maps a function over its input and interleaves the result.
-
   """
 
   def __init__(self, input_dataset, map_func, cycle_length, block_length,
                num_parallel_calls):
     """See `Dataset.interleave()` for details."""
-    super(ParallelInterleaveDataset, self).__init__(input_dataset, map_func)
+    self._input_dataset = input_dataset
+    self._map_func = StructuredFunctionWrapper(
+        map_func, self._transformation_name(), dataset=input_dataset)
+    if not isinstance(self._map_func.output_structure, DatasetStructure):
+      raise TypeError("`map_func` must return a `Dataset` object.")
+    self._structure = self._map_func.output_structure._element_structure  # pylint: disable=protected-access
     self._cycle_length = ops.convert_to_tensor(
         cycle_length, dtype=dtypes.int64, name="cycle_length")
     self._block_length = ops.convert_to_tensor(
         block_length, dtype=dtypes.int64, name="block_length")
     self._num_parallel_calls = ops.convert_to_tensor(
         num_parallel_calls, dtype=dtypes.int64, name="num_parallel_calls")
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return gen_dataset_ops.parallel_interleave_dataset_v2(
-        self._input_dataset._as_variant_tensor(),
-        self._map_func.function.captured_inputs,
+    variant_tensor = gen_dataset_ops.parallel_interleave_dataset_v2(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
+        self._map_func.function.captured_inputs,  # pylint: disable=protected-access
         self._cycle_length,
         self._block_length,
         self._num_parallel_calls,
         f=self._map_func.function,
         **flat_structure(self))
+    super(ParallelInterleaveDataset, self).__init__(input_dataset,
+                                                    variant_tensor)
+
+  def _functions(self):
+    return [self._map_func]
+
+  @property
+  def _element_structure(self):
+    return self._structure
 
   def _transformation_name(self):
     return "Dataset.interleave()"
@@ -2878,7 +2921,6 @@ class FilterDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, predicate):
     """See `Dataset.filter()` for details."""
-    super(FilterDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     wrapped_func = StructuredFunctionWrapper(
         predicate, self._transformation_name(), dataset=input_dataset)
@@ -2886,16 +2928,15 @@ class FilterDataset(UnaryUnchangedStructureDataset):
         structure_lib.TensorStructure(dtypes.bool, [])):
       raise ValueError("`predicate` must return a scalar boolean tensor.")
     self._predicate = wrapped_func
-
-  def _functions(self):
-    return [self._predicate]
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.filter_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.filter_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         other_arguments=self._predicate.function.captured_inputs,
         predicate=self._predicate.function,
         **flat_structure(self))
+    super(FilterDataset, self).__init__(input_dataset, variant_tensor)
+
+  def _functions(self):
+    return [self._predicate]
 
   def _transformation_name(self):
     return "Dataset.filter()"
@@ -2906,18 +2947,16 @@ class PrefetchDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, buffer_size):
     """See `Dataset.prefetch()` for details."""
-    super(PrefetchDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if buffer_size is None:
       buffer_size = -1  # This is the sentinel for auto-tuning.
     self._buffer_size = ops.convert_to_tensor(
         buffer_size, dtype=dtypes.int64, name="buffer_size")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.prefetch_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.prefetch_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         buffer_size=self._buffer_size,
         **flat_structure(self))
+    super(PrefetchDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class WindowDataset(UnaryDataset):
@@ -2925,7 +2964,6 @@ class WindowDataset(UnaryDataset):
 
   def __init__(self, input_dataset, size, shift, stride, drop_remainder):
     """See `window_dataset()` for more details."""
-    super(WindowDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._size = ops.convert_to_tensor(size, dtype=dtypes.int64, name="size")
     self._shift = ops.convert_to_tensor(shift, dtype=dtypes.int64, name="shift")
@@ -2944,15 +2982,14 @@ class WindowDataset(UnaryDataset):
                 nest.flatten(input_dataset.output_types))
         ])
     self._structure = structure_lib.NestedStructure(nest_of_structures)
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.window_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.window_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._size,
         self._shift,
         self._stride,
         self._drop_remainder,
         **flat_structure(self))
+    super(WindowDataset, self).__init__(input_dataset, variant_tensor)
 
   @property
   def _element_structure(self):
@@ -2963,16 +3000,14 @@ class _OptionsDataset(UnaryUnchangedStructureDataset):
   """An identity `Dataset` that stores options."""
 
   def __init__(self, input_dataset, options):
-    super(_OptionsDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._options = input_dataset.options()
     if self._options:
       self._options = self._options.merge(options)
     else:
       self._options = options
-
-  def _as_variant_tensor(self):
-    return self._input_dataset._as_variant_tensor()  # pylint: disable=protected-access
+    variant_tensor = input_dataset._variant_tensor  # pylint: disable=protected-access
+    super(_OptionsDataset, self).__init__(input_dataset, variant_tensor)
 
   def options(self):
     return self._options
@@ -2983,13 +3018,11 @@ class _ModelDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset):
     """See `optimize()` for details."""
-    super(_ModelDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.model_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.model_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         **flat_structure(self))
+    super(_ModelDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class _OptimizeDataset(UnaryUnchangedStructureDataset):
@@ -2997,68 +3030,63 @@ class _OptimizeDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, optimizations):
     """See `optimize()` for details."""
-    super(_OptimizeDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     if optimizations is None:
       optimizations = []
     self._optimizations = ops.convert_to_tensor(
         optimizations, dtype=dtypes.string, name="optimizations")
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.optimize_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = gen_dataset_ops.optimize_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._optimizations,
         **flat_structure(self))
+    super(_OptimizeDataset, self).__init__(input_dataset, variant_tensor)
 
 
 class _SetStatsAggregatorDataset(UnaryUnchangedStructureDataset):
   """A `Dataset` that acts as an identity, and sets a stats aggregator."""
 
   def __init__(self, input_dataset, aggregator, prefix, counter_prefix):
-    super(_SetStatsAggregatorDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._stats_aggregator = aggregator
     self._prefix = prefix
     self._counter_prefix = counter_prefix
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_set_stats_aggregator_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = ged_ops.experimental_set_stats_aggregator_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._stats_aggregator._resource,  # pylint: disable=protected-access
         self._prefix,
         self._counter_prefix,
         **flat_structure(self))
+    super(_SetStatsAggregatorDataset, self).__init__(input_dataset,
+                                                     variant_tensor)
 
 
 class _MaxIntraOpParallelismDataset(UnaryUnchangedStructureDataset):
   """A `Dataset` that acts as an identity, overriding intra-op parallelism."""
 
   def __init__(self, input_dataset, max_intra_op_parallelism):
-    super(_MaxIntraOpParallelismDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._max_intra_op_parallelism = ops.convert_to_tensor(
         max_intra_op_parallelism,
         dtype=dtypes.int64,
         name="max_intra_op_parallelism")
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_max_intra_op_parallelism_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = ged_ops.experimental_max_intra_op_parallelism_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._max_intra_op_parallelism,
         **flat_structure(self))
+    super(_MaxIntraOpParallelismDataset, self).__init__(input_dataset,
+                                                        variant_tensor)
 
 
 class _PrivateThreadPoolDataset(UnaryUnchangedStructureDataset):
   """A `Dataset` that acts as an identity, setting a private threadpool."""
 
   def __init__(self, input_dataset, num_threads):
-    super(_PrivateThreadPoolDataset, self).__init__(input_dataset)
     self._input_dataset = input_dataset
     self._num_threads = ops.convert_to_tensor(
         num_threads, dtype=dtypes.int64, name="num_threads")
-
-  def _as_variant_tensor(self):
-    return ged_ops.experimental_private_thread_pool_dataset(
-        self._input_dataset._as_variant_tensor(),  # pylint: disable=protected-access
+    variant_tensor = ged_ops.experimental_private_thread_pool_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._num_threads,
         **flat_structure(self))
+    super(_PrivateThreadPoolDataset, self).__init__(input_dataset,
+                                                    variant_tensor)
diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py
index d0e91b01f9..bfa256f8d7 100644
--- a/tensorflow/python/data/ops/iterator_ops.py
+++ b/tensorflow/python/data/ops/iterator_ops.py
@@ -357,7 +357,7 @@ class Iterator(checkpointable.CheckpointableBase):
                           (self.output_shapes, dataset.output_shapes))
     with ops.colocate_with(self._iterator_resource):
       return gen_dataset_ops.make_iterator(
-          dataset._as_variant_tensor(), self._iterator_resource, name=name)  # pylint: disable=protected-access
+          dataset._variant_tensor, self._iterator_resource, name=name)  # pylint: disable=protected-access
 
   def get_next(self, name=None):
     """Returns a nested structure of `tf.Tensor`s representing the next element.
@@ -524,7 +524,7 @@ class EagerIterator(checkpointable.CheckpointableBase):
     with ops.device("/cpu:0"):
       # pylint: disable=protected-access
       dataset = dataset._apply_options()
-      ds_variant = dataset._as_variant_tensor()
+      ds_variant = dataset._variant_tensor
       self._structure = structure_lib.convert_legacy_structure(
           dataset.output_types, dataset.output_shapes, dataset.output_classes)
       self._flat_output_types = self._structure._flat_types
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 876b77b853..8192d53891 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -30,12 +30,11 @@ from tensorflow.python.ops import functional_ops
 from tensorflow.python.ops import gen_dataset_ops
 
 
-class _PerDeviceGenerator(dataset_ops.Dataset):
+class _PerDeviceGenerator(dataset_ops.DatasetV2):
   """A `dummy` generator dataset."""
 
   def __init__(self, shard_num, multi_device_iterator_resource, incarnation_id,
                source_device, target_device, element_structure):
-    super(_PerDeviceGenerator, self).__init__()
     self._target_device = target_device
     self._structure = element_structure
 
@@ -108,9 +107,8 @@ class _PerDeviceGenerator(dataset_ops.Dataset):
     )
     self._finalize_captured_args = self._finalize_func.captured_inputs
 
-  def _as_variant_tensor(self):
     with ops.device(self._target_device):
-      return gen_dataset_ops.generator_dataset(
+      variant_tensor = gen_dataset_ops.generator_dataset(
           self._init_captured_args,
           self._next_captured_args,
           self._finalize_captured_args,
@@ -118,6 +116,7 @@ class _PerDeviceGenerator(dataset_ops.Dataset):
           next_func=self._next_func,
           finalize_func=self._finalize_func,
           **dataset_ops.flat_structure(self))
+    super(_PerDeviceGenerator, self).__init__(variant_tensor)
 
   def _inputs(self):
     # TODO(b/116506223): Determine which datasets should be used as inputs here.
@@ -177,7 +176,7 @@ class MultiDeviceIterator(object):
       # The incarnation ID is used to ensure consistency between the per-device
       # iterators and the multi-device iterator.
       self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
-          self._dataset._as_variant_tensor(),  # pylint: disable=protected-access
+          self._dataset._variant_tensor,  # pylint: disable=protected-access
           self._multi_device_iterator_resource,
           max_buffer_size=max_buffer_size)
 
@@ -200,7 +199,8 @@ class MultiDeviceIterator(object):
       options.experimental_optimization.apply_default_optimizations = False
       ds = ds.with_options(options)
       with ops.device(device):
-        self._device_iterators.append(ds.make_initializable_iterator())
+        self._device_iterators.append(
+            dataset_ops.make_initializable_iterator(ds))
 
     device_iterator_initializers = [
         iterator.initializer for iterator in self._device_iterators
diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py
index 0d6023dea2..5e61bcf6be 100644
--- a/tensorflow/python/data/ops/readers.py
+++ b/tensorflow/python/data/ops/readers.py
@@ -49,7 +49,6 @@ class TextLineDatasetV2(dataset_ops.DatasetSource):
         to buffer. A value of 0 results in the default buffering values chosen
         based on the compression type.
     """
-    super(TextLineDatasetV2, self).__init__()
     self._filenames = ops.convert_to_tensor(
         filenames, dtype=dtypes.string, name="filenames")
     self._compression_type = convert.optional_param_to_tensor(
@@ -59,10 +58,9 @@ class TextLineDatasetV2(dataset_ops.DatasetSource):
         argument_dtype=dtypes.string)
     self._buffer_size = convert.optional_param_to_tensor(
         "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.text_line_dataset(
+    variant_tensor = gen_dataset_ops.text_line_dataset(
         self._filenames, self._compression_type, self._buffer_size)
+    super(TextLineDatasetV2, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
@@ -100,7 +98,6 @@ class _TFRecordDataset(dataset_ops.DatasetSource):
       buffer_size: (Optional.) A `tf.int64` scalar representing the number of
         bytes in the read buffer. 0 means no buffering.
     """
-    super(_TFRecordDataset, self).__init__()
     # Force the type to string even if filenames is an empty list.
     self._filenames = ops.convert_to_tensor(
         filenames, dtypes.string, name="filenames")
@@ -113,24 +110,32 @@ class _TFRecordDataset(dataset_ops.DatasetSource):
         "buffer_size",
         buffer_size,
         argument_default=_DEFAULT_READER_BUFFER_SIZE_BYTES)
-
-  def _as_variant_tensor(self):
-    return gen_dataset_ops.tf_record_dataset(
+    variant_tensor = gen_dataset_ops.tf_record_dataset(
         self._filenames, self._compression_type, self._buffer_size)
+    super(_TFRecordDataset, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
     return structure.TensorStructure(dtypes.string, [])
 
 
-class ParallelInterleaveDataset(dataset_ops.InterleaveDataset):
+class ParallelInterleaveDataset(dataset_ops.UnaryDataset):
   """A `Dataset` that maps a function over its input and flattens the result."""
 
   def __init__(self, input_dataset, map_func, cycle_length, block_length,
                sloppy, buffer_output_elements, prefetch_input_elements):
     """See `tf.data.experimental.parallel_interleave()` for details."""
-    super(ParallelInterleaveDataset, self).__init__(input_dataset, map_func,
-                                                    cycle_length, block_length)
+    self._input_dataset = input_dataset
+    self._map_func = dataset_ops.StructuredFunctionWrapper(
+        map_func, self._transformation_name(), dataset=input_dataset)
+    if not isinstance(self._map_func.output_structure,
+                      dataset_ops.DatasetStructure):
+      raise TypeError("`map_func` must return a `Dataset` object.")
+    self._structure = self._map_func.output_structure._element_structure  # pylint: disable=protected-access
+    self._cycle_length = ops.convert_to_tensor(
+        cycle_length, dtype=dtypes.int64, name="cycle_length")
+    self._block_length = ops.convert_to_tensor(
+        block_length, dtype=dtypes.int64, name="block_length")
     self._sloppy = ops.convert_to_tensor(
         sloppy, dtype=dtypes.bool, name="sloppy")
     self._buffer_output_elements = convert.optional_param_to_tensor(
@@ -141,11 +146,8 @@ class ParallelInterleaveDataset(dataset_ops.InterleaveDataset):
         "prefetch_input_elements",
         prefetch_input_elements,
         argument_default=2 * cycle_length)
-
-  def _as_variant_tensor(self):
-    # pylint: disable=protected-access
-    return ged_ops.experimental_parallel_interleave_dataset(
-        self._input_dataset._as_variant_tensor(),
+    variant_tensor = ged_ops.experimental_parallel_interleave_dataset(
+        self._input_dataset._variant_tensor,  # pylint: disable=protected-access
         self._map_func.function.captured_inputs,
         self._cycle_length,
         self._block_length,
@@ -154,7 +156,15 @@ class ParallelInterleaveDataset(dataset_ops.InterleaveDataset):
         self._prefetch_input_elements,
         f=self._map_func.function,
         **dataset_ops.flat_structure(self))
-    # pylint: enable=protected-access
+    super(ParallelInterleaveDataset, self).__init__(input_dataset,
+                                                    variant_tensor)
+
+  def _functions(self):
+    return [self._map_func]
+
+  @property
+  def _element_structure(self):
+    return self._structure
 
   def _transformation_name(self):
     return "tf.data.experimental.parallel_interleave()"
@@ -186,7 +196,6 @@ class TFRecordDatasetV2(dataset_ops.DatasetV2):
       TypeError: If any argument does not have the expected type.
       ValueError: If any argument does not have the expected shape.
     """
-    super(TFRecordDatasetV2, self).__init__()
     if isinstance(filenames, dataset_ops.DatasetV2):
       if filenames.output_types != dtypes.string:
         raise TypeError(
@@ -215,6 +224,8 @@ class TFRecordDatasetV2(dataset_ops.DatasetV2):
           filenames, read_one_file, cycle_length=num_parallel_reads,
           block_length=1, sloppy=False, buffer_output_elements=None,
           prefetch_input_elements=None)
+    variant_tensor = self._impl._variant_tensor  # pylint: disable=protected-access
+    super(TFRecordDatasetV2, self).__init__(variant_tensor)
 
   def _clone(self,
              filenames=None,
@@ -226,9 +237,6 @@ class TFRecordDatasetV2(dataset_ops.DatasetV2):
                              buffer_size or self._buffer_size,
                              num_parallel_reads or self._num_parallel_reads)
 
-  def _as_variant_tensor(self):
-    return self._impl._as_variant_tensor()  # pylint: disable=protected-access
-
   def _inputs(self):
     return self._impl._inputs()  # pylint: disable=protected-access
 
@@ -295,7 +303,6 @@ class FixedLengthRecordDatasetV2(dataset_ops.DatasetSource):
       compression_type: (Optional.) A `tf.string` scalar evaluating to one of
         `""` (no compression), `"ZLIB"`, or `"GZIP"`.
     """
-    super(FixedLengthRecordDatasetV2, self).__init__()
     self._filenames = ops.convert_to_tensor(
         filenames, dtype=dtypes.string, name="filenames")
     self._record_bytes = ops.convert_to_tensor(
@@ -312,17 +319,16 @@ class FixedLengthRecordDatasetV2(dataset_ops.DatasetSource):
         compression_type,
         argument_default="",
         argument_dtype=dtypes.string)
-
-  def _as_variant_tensor(self):
     if (self._compression_type is not None or
         compat.forward_compatible(2018, 11, 30)):
-      return gen_dataset_ops.fixed_length_record_dataset_v2(
+      variant_tensor = gen_dataset_ops.fixed_length_record_dataset_v2(
           self._filenames, self._header_bytes, self._record_bytes,
           self._footer_bytes, self._buffer_size, self._compression_type)
     else:
-      return gen_dataset_ops.fixed_length_record_dataset(
+      variant_tensor = gen_dataset_ops.fixed_length_record_dataset(
           self._filenames, self._header_bytes, self._record_bytes,
           self._footer_bytes, self._buffer_size)
+    super(FixedLengthRecordDatasetV2, self).__init__(variant_tensor)
 
   @property
   def _element_structure(self):
diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD
index 04e80299e0..c98b1f1729 100644
--- a/tensorflow/python/data/util/BUILD
+++ b/tensorflow/python/data/util/BUILD
@@ -163,3 +163,24 @@ py_test(
         "//tensorflow/python:util",
     ],
 )
+
+py_library(
+    name = "traverse",
+    srcs = ["traverse.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+    ],
+)
+
+py_test(
+    name = "traverse_test",
+    size = "small",
+    srcs = ["traverse_test.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":traverse",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python/data/ops:dataset_ops",
+    ],
+)
diff --git a/tensorflow/python/data/util/traverse.py b/tensorflow/python/data/util/traverse.py
new file mode 100644
index 0000000000..12e576fb41
--- /dev/null
+++ b/tensorflow/python/data/util/traverse.py
@@ -0,0 +1,56 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helpers to traverse the Dataset dependency structure."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from six.moves import queue as Queue  # pylint: disable=redefined-builtin
+
+from tensorflow.python.framework import dtypes
+
+
+def obtain_all_variant_tensor_ops(dataset):
+  """Given an input dataset, finds all dataset ops used for construction.
+
+  A series of transformations would have created this dataset with each
+  transformation including zero or more Dataset ops, each producing a dataset
+  variant tensor. This method outputs all of them.
+
+  Args:
+    dataset: Dataset to find variant tensors for.
+
+  Returns:
+    A list of variant_tensor producing dataset ops used to construct this
+    dataset.
+  """
+  all_variant_tensor_ops = []
+  bfs_q = Queue.Queue()
+  bfs_q.put(dataset._variant_tensor.op)  # pylint: disable=protected-access
+  visited = []
+  while not bfs_q.empty():
+    op = bfs_q.get()
+    visited.append(op)
+    # We look for all ops that produce variant tensors as output. This is a bit
+    # of overkill but the other dataset _inputs() traversal strategies can't
+    # cover the case of function inputs that capture dataset variants.
+    # TODO(b/120873778): Make this more efficient.
+    if op.outputs[0].dtype == dtypes.variant:
+      all_variant_tensor_ops.append(op)
+    for i in op.inputs:
+      input_op = i.op
+      if input_op not in visited:
+        bfs_q.put(input_op)
+  return all_variant_tensor_ops
diff --git a/tensorflow/python/data/util/traverse_test.py b/tensorflow/python/data/util/traverse_test.py
new file mode 100644
index 0000000000..53de1be897
--- /dev/null
+++ b/tensorflow/python/data/util/traverse_test.py
@@ -0,0 +1,109 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for utilities for traversing the dataset construction graph."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import traverse
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class _TestDataset(dataset_ops.UnaryUnchangedStructureDataset):
+
+  def __init__(self, input_dataset):
+    self._input_dataset = input_dataset
+    temp_variant_tensor = gen_dataset_ops.prefetch_dataset(
+        input_dataset._variant_tensor,
+        buffer_size=1,
+        **dataset_ops.flat_structure(self))
+    variant_tensor = gen_dataset_ops.model_dataset(
+        temp_variant_tensor, **dataset_ops.flat_structure(self))
+    super(_TestDataset, self).__init__(input_dataset, variant_tensor)
+
+
+class TraverseTest(test.TestCase):
+
+  @test_util.run_deprecated_v1
+  def testOnlySource(self):
+    ds = dataset_ops.Dataset.range(10)
+    variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
+    self.assertAllEqual(["RangeDataset"], [x.name for x in variant_tensor_ops])
+
+  @test_util.run_deprecated_v1
+  def testSimplePipeline(self):
+    ds = dataset_ops.Dataset.range(10).map(math_ops.square)
+    variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
+    self.assertSetEqual(
+        set(["MapDataset", "RangeDataset"]),
+        set([x.name for x in variant_tensor_ops]))
+
+  @test_util.run_deprecated_v1
+  def testConcat(self):
+    ds1 = dataset_ops.Dataset.range(10)
+    ds2 = dataset_ops.Dataset.range(10)
+    ds = ds1.concatenate(ds2)
+    variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
+    self.assertSetEqual(
+        set(["ConcatenateDataset", "RangeDataset", "RangeDataset_1"]),
+        set([x.name for x in variant_tensor_ops]))
+
+  @test_util.run_deprecated_v1
+  def testZip(self):
+    ds1 = dataset_ops.Dataset.range(10)
+    ds2 = dataset_ops.Dataset.range(10)
+    ds = dataset_ops.Dataset.zip((ds1, ds2))
+    variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
+    self.assertSetEqual(
+        set(["ZipDataset", "RangeDataset", "RangeDataset_1"]),
+        set([x.name for x in variant_tensor_ops]))
+
+  @test_util.run_deprecated_v1
+  def testMultipleVariantTensors(self):
+    ds = dataset_ops.Dataset.range(10)
+    ds = _TestDataset(ds)
+    variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds)
+    self.assertSetEqual(
+        set(["RangeDataset", "ModelDataset", "PrefetchDataset"]),
+        set([x.name for x in variant_tensor_ops]))
+
+  @test_util.run_deprecated_v1
+  def testFlatMap(self):
+    ds1 = dataset_ops.Dataset.range(10).repeat(10)
+
+    def map_fn(ds):
+
+      def _map(x):
+        return ds.batch(x)
+
+      return _map
+
+    ds2 = dataset_ops.Dataset.range(20).prefetch(1)
+    ds2 = ds2.flat_map(map_fn(ds1))
+    variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(ds2)
+    self.assertSetEqual(
+        set([
+            "FlatMapDataset", "PrefetchDataset", "RepeatDataset",
+            "RangeDataset", "RangeDataset_1"
+        ]), set([x.name for x in variant_tensor_ops]))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD
index 887c61cb8f..02957b2fef 100644
--- a/tensorflow/python/distribute/BUILD
+++ b/tensorflow/python/distribute/BUILD
@@ -270,6 +270,7 @@ cuda_py_test(
         ":input_ops",
         "//tensorflow/python/data/ops:dataset_ops",
         "//tensorflow/python/data/ops:readers",
+        "//tensorflow/python/data/util:traverse",
         "//tensorflow/python:errors",
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
diff --git a/tensorflow/python/distribute/input_ops.py b/tensorflow/python/distribute/input_ops.py
index 2ded209701..d9e833b6bc 100644
--- a/tensorflow/python/distribute/input_ops.py
+++ b/tensorflow/python/distribute/input_ops.py
@@ -18,15 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.data.experimental.ops import filter_for_shard_ops
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.data.ops import readers
-from tensorflow.python.data.util import nest
+from tensorflow.python.data.util import traverse
+from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging
 
+
 # TODO(priyag): Any other reader datasets to consider here?
 _READER_DATASET_OPS = [
     "TextLineDataset", "TFRecordDataset", "FixedLengthRecordDataset",
@@ -53,100 +51,57 @@ def auto_shard_dataset(dataset, num_shards, index):
     determine a good way to shard the input dataset.
   """
 
-  # TODO(priyag): Clone datasets instead of updating in place, similar to the
-  # clone method for TFRecordDataset.
-  def _auto_shard_impl(dataset, found_reader_op):
-    """Recursive implementation of auto sharding."""
-
-    if not found_reader_op:
-      # TODO(priyag): Make this check more robust by enforcing some common
-      # property on reader datasets.
-      if (isinstance(dataset, readers.TextLineDataset) or
-          isinstance(dataset, readers.FixedLengthRecordDataset)):
-        filenames_tensor = dataset._filenames
-        num_files = array_ops.size(filenames_tensor)
-        sharded_filenames_tensor = array_ops.gather(
-            filenames_tensor, math_ops.range(index, num_files, num_shards))
-        dataset._filenames = sharded_filenames_tensor
-        return dataset
-      elif isinstance(dataset, readers.TFRecordDataset):
-        # `TFRecordDataset` needs to be handled separately than other readers
-        # because it converts filenames to a dataset first. Also, we clone it
-        # instead of updating in place because it has special logic in the
-        # constructor. Eventually we will change all cases to clone datasets
-        # instead of updating in-place.
-        return dataset._clone(
-            filenames=dataset._filenames.apply(
-                filter_for_shard_ops.filter_for_shard(num_shards, index)))
-      elif isinstance(dataset, dataset_ops.RangeDataset):
-        return dataset.apply(
-            filter_for_shard_ops.filter_for_shard(num_shards, index))
-      elif hasattr(dataset, "_map_func"):
-        # TODO(priyag): Make this check more robust by enforcing some common
-        # property on all map/flatmap/interleave datasets.
-        map_func_def = dataset._map_func.function.definition
-        for node in map_func_def.node_def:
-          if node.op in _READER_DATASET_OPS:
-            found_reader_op = True
-            break
-          elif node.op == "FlatMapDataset":
-            # TODO(priyag): Should this check for other map datasets? Should it
-            # be recursive? It is too specific to implementation of
-            # TFRecordDataset right now.
-            nested_func_name = node.attr["f"].func.name
-            nested_func = ops.get_default_graph()._functions[nested_func_name]
-            for nested_node in nested_func.definition.node_def:
-              if nested_node.op in _READER_DATASET_OPS:
-                found_reader_op = True
-                break
-            if found_reader_op:
-              break
-        if found_reader_op:
-          dataset._input_dataset = _auto_shard_impl(
-              dataset._input_dataset, found_reader_op)
-          return dataset
-
-    if isinstance(dataset, dataset_ops.DatasetV1Adapter):
-      dataset._dataset = _auto_shard_impl(
-          dataset._dataset, found_reader_op)
-      return dataset
-
-    # TODO(priyag): Make _input_dataset(s) a common property of all datasets to
-    # make this check more robust.
-    if hasattr(dataset, "_input_dataset"):
-      dataset._input_dataset = _auto_shard_impl(
-          dataset._input_dataset, found_reader_op)
-      if hasattr(dataset, "_dataset_to_concatenate"):
-        # Special case for `ConcatentateDataset`. We want to shard all input
-        # datasets.
-        dataset._dataset_to_concatenate = _auto_shard_impl(
-            dataset._dataset_to_concatenate, found_reader_op)
-      return dataset
-
-    if hasattr(dataset, "_datasets"):
-      # Special case for `ZipDataset`.
-      dataset._datasets = nest.pack_sequence_as(dataset._datasets, [
-          _auto_shard_impl(ds, found_reader_op)
-          for ds in nest.flatten(dataset._datasets)
-      ])
-      return dataset
-
-    if not found_reader_op:
-      tf_logging.warn(
-          "Could not find a standard reader in the input pipeline"
-          "(one of TextLineDataset, TFRecordDataset, FixedLengthRecordDataset)."
-          "So auto-sharding is not done. Please verify correctness of "
-          "auto-sharding for your input.")
-      # TODO(yuefengz): maybe still shard it?
-      return dataset
-
-    # TODO(priyag): What do we want to do if the number of filenames is
-    # uneven in the number of shards? By default, this will just return as
-    # many items it can before throwing OutOfRangeError.
-    # TODO(priyag): This will shard the filenames before any shuffling of the
-    # filename dataset. It might be desirable to shard after shuffling
-    # filenames? If so, how do we achieve that?
-    return dataset.apply(
-        filter_for_shard_ops.filter_for_shard(num_shards, index))
-
-  return _auto_shard_impl(dataset=dataset, found_reader_op=False)
+  # TODO(rohanj): b/120673685 to track re-enabling auto sharding.
+  tf_logging.warn("Autosharding is currently disabled. Please shard your input "
+                  "manually.")
+  del num_shards, index
+  return dataset
+
+
+def _clone_dataset(dataset):
+  """Returns a cloned version of `dataset`."""
+  variant_tensor_ops = traverse.obtain_all_variant_tensor_ops(dataset)
+  remap_dict = _clone_helper(dataset._variant_tensor.op, variant_tensor_ops)
+  new_variant_tensor = remap_dict[dataset._variant_tensor.op].outputs[0]
+  return dataset_ops._VariantDataset(new_variant_tensor,
+                                     dataset._element_structure)
+
+
+def _get_op_def(op):
+  return op.op_def or op_def_registry.get_registered_ops()[op.type]
+
+
+def _clone_helper(op_to_clone, variant_tensor_ops):
+  """Helper method that recursively clones `op_to_clone`.
+
+  Args:
+    op_to_clone: The op we want to clone.
+    variant_tensor_ops: A list of ops that we have to clone along the way.
+
+  Returns:
+    A dictionary mapping old_ops to new_ops created. Includes op_to_clone
+    as a key.
+  """
+  remap_dict = {}
+  for input_tensor in op_to_clone.inputs:
+    input_tensor_op = input_tensor.op
+    if input_tensor_op in variant_tensor_ops:
+      recursive_map = _clone_helper(input_tensor_op, variant_tensor_ops)
+      remap_dict.update(recursive_map)
+  inputs_list = []
+  for input_tensor in op_to_clone.inputs:
+    input_tensor_op = input_tensor.op
+    if input_tensor_op in remap_dict:
+      remapped_input = remap_dict[input_tensor_op].outputs[0]
+      inputs_list.append(remapped_input)
+    else:
+      inputs_list.append(input_tensor_op.outputs[input_tensor.value_index])
+  g = ops.get_default_graph()
+  new_op = g.create_op(
+      op_to_clone.type,
+      inputs_list, [o.dtype for o in op_to_clone.outputs],
+      name=op_to_clone.name,
+      attrs=op_to_clone.node_def.attr,
+      op_def=_get_op_def(op_to_clone))
+  remap_dict[op_to_clone] = new_op
+  return remap_dict
diff --git a/tensorflow/python/distribute/input_ops_test.py b/tensorflow/python/distribute/input_ops_test.py
index dcf946ba47..7db75163ed 100644
--- a/tensorflow/python/distribute/input_ops_test.py
+++ b/tensorflow/python/distribute/input_ops_test.py
@@ -26,6 +26,8 @@ from tensorflow.python.distribute import input_ops
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import python_io
+from tensorflow.python.ops import gen_dataset_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
@@ -90,7 +92,7 @@ class AutoShardDatasetTest(test.TestCase):
   def _verifySimpleShardingOutput(self, dataset, record_fn):
     iterator = dataset.make_one_shot_iterator()
     next_element = iterator.get_next()
-    with self.cached_session() as sess:
+    with self.cached_session():
       for f in range(self._shard_index, self._num_files, self._num_shards):
         for r in range(self._num_records):
           self.assertAllEqual(record_fn(r, f), self.evaluate(next_element))
@@ -98,7 +100,7 @@ class AutoShardDatasetTest(test.TestCase):
         self.evaluate(next_element)
 
   @test_util.run_deprecated_v1
-  def testTFRecordDataset(self):
+  def DISABLED_testTFRecordDataset(self):
     dataset = readers.TFRecordDataset(self._createTFRecordFiles())
     dataset = input_ops.auto_shard_dataset(
         dataset, self._num_shards, self._shard_index)
@@ -106,7 +108,7 @@ class AutoShardDatasetTest(test.TestCase):
     self._verifySimpleShardingOutput(dataset, self._record)
 
   @test_util.run_deprecated_v1
-  def testFlatMap(self):
+  def DISABLED_testFlatMap(self):
     dataset = dataset_ops.Dataset.from_tensor_slices(
         self._createTFRecordFiles())
     dataset = dataset.flat_map(readers.TFRecordDataset)
@@ -116,7 +118,7 @@ class AutoShardDatasetTest(test.TestCase):
     self._verifySimpleShardingOutput(dataset, self._record)
 
   @test_util.run_deprecated_v1
-  def testInterleave(self):
+  def DISABLED_testInterleave(self):
     dataset = dataset_ops.Dataset.from_tensor_slices(
         self._createTFRecordFiles())
     dataset = dataset.interleave(
@@ -129,7 +131,7 @@ class AutoShardDatasetTest(test.TestCase):
     self._verifySimpleShardingOutput(dataset, self._record)
 
   @test_util.run_deprecated_v1
-  def testListfiles(self):
+  def DISABLED_testListfiles(self):
     filenames = self._createTFRecordFiles()
     file_pattern = filenames[0].rsplit(os.sep, 1)[0] + "/tf_record.*.txt"
     dataset = dataset_ops.Dataset.list_files(file_pattern, shuffle=False)
@@ -139,7 +141,7 @@ class AutoShardDatasetTest(test.TestCase):
 
     iterator = dataset.make_one_shot_iterator()
     next_element = iterator.get_next()
-    with self.cached_session() as sess:
+    with self.cached_session():
       actual, expected = [], []
       for f in range(self._shard_index, self._num_files, self._num_shards):
         for r in range(self._num_records):
@@ -150,7 +152,7 @@ class AutoShardDatasetTest(test.TestCase):
       self.assertAllEqual(expected, actual)
 
   @test_util.run_deprecated_v1
-  def testComplexPipeline(self):
+  def DISABLED_testComplexPipeline(self):
     # Setup a complex input pipeline.
     batch_size = 2
     num_epochs = 5
@@ -172,7 +174,7 @@ class AutoShardDatasetTest(test.TestCase):
     # Verify output.
     iterator = dataset.make_one_shot_iterator()
     next_element = iterator.get_next()
-    with self.cached_session() as sess:
+    with self.cached_session():
       actual = []
       num_iterations = (self._num_files * self._num_records * num_epochs) // (
           self._num_shards * batch_size)
@@ -190,7 +192,7 @@ class AutoShardDatasetTest(test.TestCase):
       self.assertAllEqual(sorted(expected), sorted(actual))
 
   @test_util.run_deprecated_v1
-  def testZip(self):
+  def DISABLED_testZip(self):
     dataset1 = readers.TFRecordDataset(self._createTFRecordFiles())
     dataset2 = readers.TextLineDataset(self._createTextFiles())
     dataset = dataset_ops.Dataset.zip((dataset1, dataset2))
@@ -201,7 +203,7 @@ class AutoShardDatasetTest(test.TestCase):
     self._verifySimpleShardingOutput(dataset, record_fn)
 
   @test_util.run_deprecated_v1
-  def testConcat(self):
+  def DISABLED_testConcat(self):
     dataset1 = readers.TFRecordDataset(self._createTFRecordFiles())
     dataset2 = readers.TextLineDataset(self._createTextFiles())
     dataset = dataset1.concatenate(dataset2)
@@ -222,7 +224,7 @@ class AutoShardDatasetTest(test.TestCase):
         self.evaluate(next_element)
 
   @test_util.run_deprecated_v1
-  def testTextLineReader(self):
+  def DISABLED_testTextLineReader(self):
     dataset = readers.TextLineDataset(self._createTextFiles())
     dataset = input_ops.auto_shard_dataset(
         dataset, self._num_shards, self._shard_index)
@@ -230,7 +232,7 @@ class AutoShardDatasetTest(test.TestCase):
     self._verifySimpleShardingOutput(dataset, self._text_line)
 
   @test_util.run_deprecated_v1
-  def testTextLineReaderWithFlatMap(self):
+  def DISABLED_testTextLineReaderWithFlatMap(self):
     dataset = dataset_ops.Dataset.from_tensor_slices(self._createTextFiles())
     dataset = dataset.flat_map(readers.TextLineDataset)
     dataset = input_ops.auto_shard_dataset(
@@ -239,7 +241,7 @@ class AutoShardDatasetTest(test.TestCase):
     self._verifySimpleShardingOutput(dataset, self._text_line)
 
   @test_util.run_deprecated_v1
-  def testFixedLengthReader(self):
+  def DISABLED_testFixedLengthReader(self):
     dataset = readers.FixedLengthRecordDataset(
         self._createFixedLengthRecordFiles(), self._record_bytes)
     dataset = input_ops.auto_shard_dataset(
@@ -248,7 +250,7 @@ class AutoShardDatasetTest(test.TestCase):
     self._verifySimpleShardingOutput(dataset, self._fixed_length_record)
 
   @test_util.run_deprecated_v1
-  def testFixedLengthReaderWithFlatMap(self):
+  def DISABLED_testFixedLengthReaderWithFlatMap(self):
     dataset = dataset_ops.Dataset.from_tensor_slices(
         self._createFixedLengthRecordFiles())
     dataset = dataset.flat_map(
@@ -258,5 +260,77 @@ class AutoShardDatasetTest(test.TestCase):
 
     self._verifySimpleShardingOutput(dataset, self._fixed_length_record)
 
+
+# A dataset that creates two variant tensors.
+class _TestDataset(dataset_ops.UnaryUnchangedStructureDataset):
+
+  def __init__(self, input_dataset):
+    self._input_dataset = input_dataset
+    temp_variant_tensor = gen_dataset_ops.prefetch_dataset(
+        input_dataset._variant_tensor,
+        buffer_size=1,
+        **dataset_ops.flat_structure(self))
+    variant_tensor = gen_dataset_ops.model_dataset(
+        temp_variant_tensor, **dataset_ops.flat_structure(self))
+    super(_TestDataset, self).__init__(input_dataset, variant_tensor)
+
+
+class CloneDatasetTest(test.TestCase):
+
+  def _assert_datasets_equal(self, ds1, ds2):
+    # First lets assert the structure is the same.
+    self.assertTrue(
+        ds1._element_structure.is_compatible_with(ds2._element_structure))
+    self.assertTrue(
+        ds2._element_structure.is_compatible_with(ds1._element_structure))
+
+    # Now create iterators on both and assert they produce the same values.
+    it1 = dataset_ops.make_initializable_iterator(ds1)
+    it2 = dataset_ops.make_initializable_iterator(ds2)
+
+    get_next1 = it1.get_next()
+    get_next2 = it2.get_next()
+
+    with self.cached_session():
+      self.evaluate([it1.initializer, it2.initializer])
+      val1, val2 = self.evaluate([get_next1, get_next2])
+      self.assertEqual(val1, val2)
+
+  @test_util.run_deprecated_v1
+  def testOnlySource(self):
+    ds = dataset_ops.Dataset.range(10)
+    cloned_ds = input_ops._clone_dataset(ds)
+    self._assert_datasets_equal(ds, cloned_ds)
+
+  @test_util.run_deprecated_v1
+  def testSimplePipeline(self):
+    ds = dataset_ops.Dataset.range(10).map(math_ops.square)
+    cloned_ds = input_ops._clone_dataset(ds)
+    self._assert_datasets_equal(ds, cloned_ds)
+
+  @test_util.run_deprecated_v1
+  def testConcat(self):
+    ds1 = dataset_ops.Dataset.range(10)
+    ds2 = dataset_ops.Dataset.range(10)
+    ds = ds1.concatenate(ds2)
+    cloned_ds = input_ops._clone_dataset(ds)
+    self._assert_datasets_equal(ds, cloned_ds)
+
+  @test_util.run_deprecated_v1
+  def testZip(self):
+    ds1 = dataset_ops.Dataset.range(10)
+    ds2 = dataset_ops.Dataset.range(10)
+    ds = dataset_ops.Dataset.zip((ds1, ds2))
+    cloned_ds = input_ops._clone_dataset(ds)
+    self._assert_datasets_equal(ds, cloned_ds)
+
+  @test_util.run_deprecated_v1
+  def testMultipleVariantTensors(self):
+    ds = dataset_ops.Dataset.range(10)
+    ds = _TestDataset(ds)
+    cloned_ds = input_ops._clone_dataset(ds)
+    self._assert_datasets_equal(ds, cloned_ds)
+
+
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index 55fc9c9e1f..e0c575b01c 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -1600,9 +1600,9 @@ class MultiWorkerDataset(object):
       if len(dataset_fn) != input_workers.num_workers:
         raise ValueError("If `dataset_fn` is a list, it must have one entry "
                          "per worker")
-      if auto_shard:
-        raise ValueError(
-            "If `dataset_fn` is a list, `auto_shard` is not supported.")
+    # TODO(rohanj): b/120673685 to track re-enabling auto sharding.
+    if auto_shard:
+      raise ValueError("Currently autosharding is not supported.")
     self._input_workers = input_workers
     self._datasets = []
     # TODO(yuefengz, priyag): support different set of jobs for input
@@ -1613,9 +1613,6 @@ class MultiWorkerDataset(object):
           worker_input = dataset_fn[i]()
         else:
           worker_input = dataset_fn()
-          if auto_shard:
-            worker_input = input_ops.auto_shard_dataset(
-                worker_input, input_workers.num_workers, i)
         dataset = PerReplicaDataset(worker_input, input_workers, i,
                                     prefetch_on_device=prefetch_on_device)
         self._datasets.append((worker, dataset))
@@ -1805,7 +1802,11 @@ class DatasetIterator(InputIteratorImpl):
     for i, worker in enumerate(input_workers.worker_devices):
       with ops.device(worker):
         worker_devices = input_workers.compute_devices_for_worker(i)
-        iterator = _SingleWorkerDatasetIterator(dataset, worker, worker_devices)
+        cloned_dataset = dataset
+        if not context.executing_eagerly():
+          cloned_dataset = input_ops._clone_dataset(dataset)  # pylint: disable=protected-access
+        iterator = _SingleWorkerDatasetIterator(cloned_dataset, worker,
+                                                worker_devices)
         iterators.append(iterator)
 
     super(DatasetIterator, self).__init__(input_workers, iterators)
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
index 3ecac329aa..951b2df05a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt
@@ -16,7 +16,7 @@ tf_class {
   }
   member_method {
     name: "__init__"
-    argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'variant_tensor\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "apply"
-- 
GitLab


From 627fd023a0899d43f1129d9852b4ece72567c8b9 Mon Sep 17 00:00:00 2001
From: Priya Gupta <priyag@google.com>
Date: Tue, 18 Dec 2018 22:22:30 -0800
Subject: [PATCH 806/873] Keras + Distribution Strategy: Use a slightly lower
 tolerance for correctness test for default distribution strategy as the
 weights have more variance whenever the training is run on GPUs.

PiperOrigin-RevId: 226117010
---
 tensorflow/contrib/distribute/python/keras_test.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 154d3fa8a3..31a389aaca 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1317,17 +1317,14 @@ class TestDistributionStrategyCorrectness(test.TestCase,
 
   @combinations.generate(strategy_and_input_combinations())
   def test_correctness(self, distribution, use_numpy, use_validation_data):
-    # TODO(b/121224478): This test is flaky with default strategy. Remove this
-    # once the issue is fixed.
-    if isinstance(distribution, distribute_lib._DefaultDistributionStrategy):  # pylint: disable=protected-access
-      self.skipTest('Disable the test for default strategy.')
-
     with self.cached_session():
       default_tolerance = 1e-5
       tol_table = {}
 
-      if isinstance(distribution, (mirrored_strategy.MirroredStrategy,
-                                   mirrored_strategy.CoreMirroredStrategy)):
+      if isinstance(distribution, (
+          mirrored_strategy.MirroredStrategy,
+          mirrored_strategy.CoreMirroredStrategy,
+          distribute_lib._DefaultDistributionStrategy)):  # pylint: disable=protected-access
         # TODO(b/119257215): Weights are not exactly the same, so use larger
         # tolerance for now. Predict should be related to weights.
         tol_table = {
-- 
GitLab


From 4e57040e940b4bdc2fc91e2f0b0f3dbbf1a59f6d Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Tue, 18 Dec 2018 23:21:26 -0800
Subject: [PATCH 807/873] Remove the wrapping of single inputs in experimental
 run steps

PiperOrigin-RevId: 226121383
---
 .../contrib/distribute/python/metrics_v1_test.py   |  4 ++--
 .../distribute/python/minimize_loss_test.py        | 14 +++++++-------
 .../distribute/python/one_device_strategy.py       |  5 +----
 tensorflow/contrib/distribute/python/step_fn.py    |  2 +-
 .../contrib/distribute/python/tpu_strategy.py      |  5 +----
 tensorflow/python/distribute/mirrored_strategy.py  |  5 +----
 6 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/metrics_v1_test.py b/tensorflow/contrib/distribute/python/metrics_v1_test.py
index 8ac659abe9..32a0d19943 100644
--- a/tensorflow/contrib/distribute/python/metrics_v1_test.py
+++ b/tensorflow/contrib/distribute/python/metrics_v1_test.py
@@ -100,7 +100,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
       if isinstance(distribution, tpu_strategy.TPUStrategy):
         def step_fn(ctx, inputs):
           value, update = distribution.call_for_each_replica(
-              metric_fn, args=inputs)
+              metric_fn, args=(inputs,))
           ctx.set_non_tensor_output(name="value", output=value)
           return distribution.group(update)
 
@@ -115,7 +115,7 @@ class MetricsV1Test(test.TestCase, parameterized.TestCase):
             distribution.extended.steps_per_run)
       else:
         value, update = distribution.call_for_each_replica(
-            metric_fn, iterator.get_next())
+            metric_fn, args=(iterator.get_next(),))
         update = distribution.group(update)
         # TODO(josh11b): Once we switch to using a global batch size for input,
         # replace "distribution.num_replicas_in_sync" with "1".
diff --git a/tensorflow/contrib/distribute/python/minimize_loss_test.py b/tensorflow/contrib/distribute/python/minimize_loss_test.py
index f09483cb56..824c4b0937 100644
--- a/tensorflow/contrib/distribute/python/minimize_loss_test.py
+++ b/tensorflow/contrib/distribute/python/minimize_loss_test.py
@@ -67,7 +67,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       def step_fn(ctx, inputs):
         del ctx  # Unused
         return distribution.group(
-            distribution.call_for_each_replica(model_fn, args=inputs))
+            distribution.call_for_each_replica(model_fn, args=(inputs,)))
 
       iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
@@ -161,7 +161,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       def step_fn(ctx, inputs):
         del ctx  # Unused
         return distribution.group(
-            distribution.call_for_each_replica(model_fn, args=inputs))
+            distribution.call_for_each_replica(model_fn, args=(inputs,)))
 
       iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
@@ -230,7 +230,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       def step_fn(ctx, inputs):
         del ctx  # Unused
         fetches = distribution.unwrap(
-            distribution.call_for_each_replica(model_fn, args=inputs))
+            distribution.call_for_each_replica(model_fn, args=(inputs,)))
         if update_ops_in_cross_replica_mode:
           fetches += tuple(ops.get_collection(ops.GraphKeys.UPDATE_OPS))
         return control_flow_ops.group(fetches)
@@ -302,8 +302,8 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
     with distribution.scope():
       all_vars = []
 
-      def model_fn(x, y):
-
+      def model_fn(inputs):
+        x, y = inputs
         def loss_fn():
           # Use fixed initialization to make the steps deterministic.
           w = variable_scope.get_variable("w", initializer=[[2.]])
@@ -327,7 +327,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       def step_fn(ctx, inputs):
         del ctx  # Unused
         return distribution.group(
-            distribution.call_for_each_replica(model_fn, args=inputs))
+            distribution.call_for_each_replica(model_fn, args=(inputs,)))
 
       iterator = self._get_iterator(distribution.distribute_dataset(dataset_fn))
 
@@ -413,7 +413,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
 
       def step_fn(output_context, inputs):
         (train_op, loss) = distribution.call_for_each_replica(
-            model_fn, args=(output_context,) + inputs)
+            model_fn, args=(output_context, inputs))
         output_context.set_last_step_output(
             name="cross_replica_loss_reduced",
             output=loss,
diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py
index 4b60f3c786..c9ea706b64 100644
--- a/tensorflow/contrib/distribute/python/one_device_strategy.py
+++ b/tensorflow/contrib/distribute/python/one_device_strategy.py
@@ -101,10 +101,7 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended):
     def body(i, *args):
       """A wrapper around `fn` to create the while loop body."""
       del args
-      fn_inputs = iterator.get_next()
-      if not isinstance(fn_inputs, tuple):
-        fn_inputs = (fn_inputs,)
-      fn_result = fn(ctx, fn_inputs)
+      fn_result = fn(ctx, iterator.get_next())
       flat_last_step_outputs = nest.flatten(ctx.last_step_outputs)
       with ops.control_dependencies([fn_result]):
         return [i + 1] + flat_last_step_outputs
diff --git a/tensorflow/contrib/distribute/python/step_fn.py b/tensorflow/contrib/distribute/python/step_fn.py
index c928b6d9f1..faeb96bcb7 100644
--- a/tensorflow/contrib/distribute/python/step_fn.py
+++ b/tensorflow/contrib/distribute/python/step_fn.py
@@ -100,7 +100,7 @@ class StandardSingleLossStep(StandardInputStep):
         gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn)
 
         grads_and_vars = self.distribution.call_for_each_replica(
-            gradients_fn, args=(ctx,) + inputs)
+            gradients_fn, args=(ctx, inputs))
         # If threads use layers, then we need to run the first step
         # sequentially, so that layers.build() is not executed in parallel.
         # Otherwise, multiple sets of mirrored variables are going to be
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index bdcad14704..c2f62c3ca2 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -331,10 +331,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
 
     def run_fn():
       """Single step on the TPU device."""
-      fn_inputs = dequeue_fn()
-      if not isinstance(fn_inputs, tuple):
-        fn_inputs = (fn_inputs,)
-      fn_result = fn(ctx, fn_inputs)
+      fn_result = fn(ctx, dequeue_fn())
       flat_last_step_outputs = nest.flatten(ctx.last_step_outputs)
       if flat_last_step_outputs:
         with ops.control_dependencies([fn_result]):
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index b4f9761b98..60b5232e16 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -574,10 +574,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
     def body(i, *args):
       """A wrapper around `fn` to create the while loop body."""
       del args
-      fn_inputs = iterator.get_next()
-      if not isinstance(fn_inputs, tuple):
-        fn_inputs = (fn_inputs,)
-      fn_result = fn(ctx, fn_inputs)
+      fn_result = fn(ctx, iterator.get_next())
       for (name, output) in ctx.last_step_outputs.items():
         # Convert all outputs to tensors, potentially from `DistributedValues`.
         ctx.last_step_outputs[name] = self._unwrap(output)
-- 
GitLab


From 1fc046c3a8eb62690cd78a6da1b62463e9133f6d Mon Sep 17 00:00:00 2001
From: Eugene Brevdo <ebrevdo@google.com>
Date: Wed, 19 Dec 2018 00:09:25 -0800
Subject: [PATCH 808/873] [TF port] Disable tests for GetCurrentCpu() on iOS/OS
 X because MacOS has a __cpuid bug.

PiperOrigin-RevId: 226125575
---
 tensorflow/core/platform/port_test.cc  |  7 ++++---
 tensorflow/core/platform/posix/port.cc | 18 +++++++++++-------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/tensorflow/core/platform/port_test.cc b/tensorflow/core/platform/port_test.cc
index 33c66a6f25..0567130e8b 100644
--- a/tensorflow/core/platform/port_test.cc
+++ b/tensorflow/core/platform/port_test.cc
@@ -35,10 +35,11 @@ TEST(Port, AlignedMalloc) {
 
 TEST(Port, GetCurrentCPU) {
   const int cpu = GetCurrentCPU();
-  // TODO(b/120919972): Re-enable this EXPECT_GE after fixing MacOS Kokoro
-  // failures.
-  // EXPECT_GE(cpu, 0);
+#if !defined(__APPLE__)
+  // GetCurrentCPU does not currently work on MacOS.
+  EXPECT_GE(cpu, 0);
   EXPECT_LT(cpu, NumTotalCPUs());
+#endif
 }
 
 TEST(ConditionVariable, WaitForMilliseconds_Timeout) {
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 0fac8b1a88..ea6066ac7b 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -29,7 +29,7 @@ limitations under the License.
 #include <sys/syscall.h>
 #endif
 
-#if !defined(__APPLE__) && (__x86_64__ || __i386__)
+#if (__x86_64__ || __i386__)
 #include <cpuid.h>
 #endif
 
@@ -78,20 +78,24 @@ int NumSchedulableCPUs() {
 
 int NumTotalCPUs() {
   int count = absl::base_internal::NumCPUs();
-  return (count == 0) ? kUnknownCPU : count;
+  return (count <= 0) ? kUnknownCPU : count;
 }
 
 int GetCurrentCPU() {
 #if defined(__linux__) && !defined(__ANDROID__)
   return sched_getcpu();
-#elif defined(__cpuid_count)
   // Attempt to use cpuid on all other platforms.  If that fails, perform a
   // syscall.
-  uint32_t eax, ebx, ecx, edx;
-  __cpuid_count(/*leaf=*/1, /*subleaf=*/0, eax, ebx, ecx, edx);
-  if ((edx & (1 << 9)) != 0) {
+#elif defined(__cpuid) && !defined(__APPLE__)
+  // TODO(b/120919972): __cpuid returns invalid APIC ids on OS X.
+  uint32_t eax = 0;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+  __cpuid(/*level=*/1, eax, ebx, ecx, edx);
+  if ((edx & /*bit_APIC=*/(1 << 9)) != 0) {
     // EBX bits 24-31 are APIC ID
-    return static_cast<unsigned int>(ebx >> 24);
+    return (ebx & 0xFF) >> 24;
   }
 #elif defined(__NR_getcpu)
   unsigned int cpu;
-- 
GitLab


From 2eb720ce512edef25a2ee13647255c245506cbfe Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Tue, 18 Dec 2018 16:36:59 -0800
Subject: [PATCH 809/873] Fix some typos and add built-tags output support

This change makes some minor changes:

- Fix some typos
- Add a "test your changes" example to the README
- Add a --nocache flag to ignore the Docker build cache
- Add a --write_tags_to flag to save a clean list of
  built tags to a file. Can be used to build tags, then
  run tests in parallel by combining xargs with --only_tags_matching.
---
 tensorflow/tools/dockerfiles/README.md    | 19 ++++++++++--------
 tensorflow/tools/dockerfiles/assembler.py | 24 ++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md
index b42dd9fc0c..6bb94bfa34 100644
--- a/tensorflow/tools/dockerfiles/README.md
+++ b/tensorflow/tools/dockerfiles/README.md
@@ -29,12 +29,12 @@ in the Dockerfile itself.
 After building the image with the tag `tf` (for example), use `docker run` to
 run the images.
 
-Note for new Docker users: the `-v` and `-u` flags share directories between
-the Docker container and your machine, and very important. Without
-`-v`, your work will be wiped once the container quits, and without `-u`, files
-created by the container will have the wrong file permissions on your host
-machine. If you are confused, check out the [Docker run
-documentation](https://docs.docker.com/engine/reference/run/).
+Note for new Docker users: the `-v` and `-u` flags share directories and
+permissions between the Docker container and your machine. Without `-v`, your
+work will be wiped once the container quits, and without `-u`, files created by
+the container will have the wrong file permissions on your host machine.
+Check out the [Docker run
+documentation](https://docs.docker.com/engine/reference/run/) for more info.
 
 ```bash
 # Volume mount (-v) is optional but highly recommended, especially for Jupyter.
@@ -83,7 +83,7 @@ $ alias asm_images="docker run --rm -v $(pwd):/tf -v /var/run/docker.sock:/var/r
 # If you're REBUILDING OR ADDING DOCKERFILES, remove docker.sock and add -u:
 $ alias asm_dockerfiles="docker run --rm -u $(id -u):$(id -g) -v $(pwd):/tf tf-tools python3 assembler.py "
 
-# Check flags
+# Check assembler flags
 $ asm_dockerfiles --help
 
 # Assemble all of the Dockerfiles
@@ -93,5 +93,8 @@ $ asm_dockerfiles --release dockerfiles --construct_dockerfiles
 $ asm_images --release nightly --build_images
 
 # Build version release for version 99.0, except "gpu" tags:
-$ asm_images --release versioned --arg _TAG_PREFIX=99.0 --build_images --exclude_tags_matching '*.gpu.*'
+$ asm_images --release versioned --arg _TAG_PREFIX=99.0 --build_images --exclude_tags_matching '.*gpu.*'
+
+# Test your changes to the devel images:
+$ asm_images --release nightly --build_images --run_tests_path=$(realpath tests) --only_tags_matching="^devel-gpu-py3$"
 ```
diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py
index 67a0320241..a73a701971 100644
--- a/tensorflow/tools/dockerfiles/assembler.py
+++ b/tensorflow/tools/dockerfiles/assembler.py
@@ -49,7 +49,7 @@ flags.DEFINE_string('hub_username', None,
 flags.DEFINE_string(
     'hub_password', None,
     ('Dockerhub password, only used with --upload_to_hub. Use from an env param'
-     'so your password isn\'t in your history.'))
+     ' so your password isn\'t in your history.'))
 
 flags.DEFINE_integer('hub_timeout', 3600,
                      'Abort Hub upload if it takes longer than this.')
@@ -89,6 +89,11 @@ flags.DEFINE_string(
      'Flag value must be a full path to the "tests" directory, which is usually'
      ' $(realpath ./tests). A failed tests counts the same as a failed build.'))
 
+flags.DEFINE_string(
+    'write_tags_to', None,
+    'Write the list of tagged images to a file. Useful for parallelizing tests.'
+)
+
 flags.DEFINE_boolean(
     'stop_on_failure', False,
     ('Stop processing tags if any one build fails. If False or not specified, '
@@ -142,6 +147,10 @@ flags.DEFINE_multi_string(
      'args will print a warning).'),
     short_name='a')
 
+flags.DEFINE_boolean(
+    'nocache', False,
+    'Disable the Docker build cache; identical to "docker build --no-cache"')
+
 flags.DEFINE_string(
     'spec_file',
     './spec.yml',
@@ -513,6 +522,7 @@ def main(argv):
   # Each tag has a name ('tag') and a definition consisting of the contents
   # of its Dockerfile, its build arg list, etc.
   failed_tags = []
+  succeeded_tags = []
   for tag, tag_defs in all_tags.items():
     for tag_def in tag_defs:
       eprint('> Working on {}'.format(tag))
@@ -569,6 +579,7 @@ def main(argv):
           image, logs = dock.images.build(
               timeout=FLAGS.hub_timeout,
               path='.',
+              nocache=FLAGS.nocache,
               dockerfile=dockerfile,
               buildargs=tag_def['cli_args'],
               tag=repo_tag)
@@ -656,12 +667,23 @@ def main(argv):
               args=(FLAGS.hub_repository, dock, image, tag))
           p.start()
 
+      if not tag_failed:
+        succeeded_tags.append(tag)
+
   if failed_tags:
     eprint(
         '> Some tags failed to build or failed testing, check scrollback for '
         'errors: {}'.format(','.join(failed_tags)))
     exit(1)
 
+  if FLAGS.write_tags_to:
+    eprint('> Writing built{} tags to {}.'.format(
+        ' and tested' if FLAGS.run_tests_path else '',
+        FLAGS.write_tags_to))
+    with open(FLAGS.write_tags_to, 'w') as f:
+      for tag in succeeded_tags:
+        f.write('{}:{}\n'.format(FLAGS.repository, tag))
+
 
 if __name__ == '__main__':
   app.run(main)
-- 
GitLab


From 55cc35ca35e1dfae9e39c02b8b1387cb9fd07fec Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Wed, 19 Dec 2018 10:11:10 -0800
Subject: [PATCH 810/873] Change file output to stdout

---
 tensorflow/tools/dockerfiles/README.md    |  3 +++
 tensorflow/tools/dockerfiles/assembler.py | 14 +++++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md
index 6bb94bfa34..4e8b6b1b04 100644
--- a/tensorflow/tools/dockerfiles/README.md
+++ b/tensorflow/tools/dockerfiles/README.md
@@ -92,6 +92,9 @@ $ asm_dockerfiles --release dockerfiles --construct_dockerfiles
 # Build all of the "nightly" images on your local machine:
 $ asm_images --release nightly --build_images
 
+# Save the list of built images to a file:
+$ asm_images --release nightly --build_images > tf-built.txt
+
 # Build version release for version 99.0, except "gpu" tags:
 $ asm_images --release versioned --arg _TAG_PREFIX=99.0 --build_images --exclude_tags_matching '.*gpu.*'
 
diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py
index a73a701971..d8f2e48411 100644
--- a/tensorflow/tools/dockerfiles/assembler.py
+++ b/tensorflow/tools/dockerfiles/assembler.py
@@ -18,6 +18,9 @@
 - Builds images (and optionally runs image tests)
 - Pushes images to Docker Hub (provided with credentials)
 
+Logs are written to stderr; the list of successfully built images is
+written to stdout.
+
 Read README.md (in this directory) for instructions!
 """
 
@@ -676,13 +679,10 @@ def main(argv):
         'errors: {}'.format(','.join(failed_tags)))
     exit(1)
 
-  if FLAGS.write_tags_to:
-    eprint('> Writing built{} tags to {}.'.format(
-        ' and tested' if FLAGS.run_tests_path else '',
-        FLAGS.write_tags_to))
-    with open(FLAGS.write_tags_to, 'w') as f:
-      for tag in succeeded_tags:
-        f.write('{}:{}\n'.format(FLAGS.repository, tag))
+  eprint('> Writing built{} tags to standard out.'.format(
+      ' and tested' if FLAGS.run_tests_path else ''))
+  for tag in succeeded_tags:
+    print('{}:{}'.format(FLAGS.repository, tag))
 
 
 if __name__ == '__main__':
-- 
GitLab


From 933bb8adbf9c3dd52ba487b44e2916009636e0b7 Mon Sep 17 00:00:00 2001
From: Austin Anderson <angerson@google.com>
Date: Wed, 19 Dec 2018 10:13:58 -0800
Subject: [PATCH 811/873] Remove old flag

---
 tensorflow/tools/dockerfiles/assembler.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py
index d8f2e48411..09537b7314 100644
--- a/tensorflow/tools/dockerfiles/assembler.py
+++ b/tensorflow/tools/dockerfiles/assembler.py
@@ -92,11 +92,6 @@ flags.DEFINE_string(
      'Flag value must be a full path to the "tests" directory, which is usually'
      ' $(realpath ./tests). A failed tests counts the same as a failed build.'))
 
-flags.DEFINE_string(
-    'write_tags_to', None,
-    'Write the list of tagged images to a file. Useful for parallelizing tests.'
-)
-
 flags.DEFINE_boolean(
     'stop_on_failure', False,
     ('Stop processing tags if any one build fails. If False or not specified, '
-- 
GitLab


From 36418256825b0318786fab77a17aca76595b9eee Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 00:41:21 -0800
Subject: [PATCH 812/873] \nInternal refactor\n

PiperOrigin-RevId: 226128745
---
 tensorflow/lite/python/create_custom_op.py    |   2 +-
 tensorflow/opensource_only.files              | 418 +++++++++---------
 tensorflow/python/tools/freeze_graph.py       |   6 +-
 .../python/tools/import_pb_to_tensorboard.py  |   2 +-
 .../python/tools/optimize_for_inference.py    |   2 +-
 tensorflow/python/tools/strip_unused_lib.py   |   2 +-
 6 files changed, 216 insertions(+), 216 deletions(-)

diff --git a/tensorflow/lite/python/create_custom_op.py b/tensorflow/lite/python/create_custom_op.py
index 344cd28d16..e793f7fe2b 100644
--- a/tensorflow/lite/python/create_custom_op.py
+++ b/tensorflow/lite/python/create_custom_op.py
@@ -62,7 +62,7 @@ def _read_graph_def(filename):
     raise ValueError("Input graph file '" + filename + "' does not exist!")
 
   graph_def = graph_pb2.GraphDef()
-  with gfile.FastGFile(filename, "rb") as f:
+  with gfile.GFile(filename, "rb") as f:
     graph_def.ParseFromString(f.read())
   return graph_def
 
diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 88800c2951..8b97add97d 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -1,246 +1,246 @@
-tensorflow/contrib/tpu/profiler/pip_package/BUILD
-tensorflow/contrib/tpu/profiler/pip_package/setup.py
-tensorflow/contrib/tpu/profiler/pip_package/README
-tensorflow/contrib/tpu/profiler/pip_package/build_pip_package.sh
-tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
-tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/__init__.py
-tensorflow/contrib/mpi/BUILD
-tensorflow/tools/ci_build/remote/BUILD
-tensorflow/tools/pip_package/README
-tensorflow/tools/pip_package/MANIFEST.in
-tensorflow/tools/pip_package/simple_console.py
-tensorflow/tools/pip_package/build_pip_package.sh
-tensorflow/tools/pip_package/check_load_py_test.py
-tensorflow/tools/pip_package/pip_smoke_test.py
-tensorflow/tools/pip_package/simple_console_for_windows.py
-tensorflow/tools/pip_package/setup.py
-tensorflow/tools/pip_package/BUILD
-tensorflow/tools/lib_package/concat_licenses.sh
-tensorflow/tools/lib_package/libtensorflow_test.c
-tensorflow/tools/lib_package/LibTensorFlowTest.java
-tensorflow/tools/lib_package/BUILD
-tensorflow/tools/lib_package/libtensorflow_test.sh
-tensorflow/tools/lib_package/README.md
-tensorflow/tools/lib_package/libtensorflow_java_test.sh
-tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
-tensorflow/tools/def_file_filter/BUILD
-tensorflow/tools/def_file_filter/BUILD.tpl
-tensorflow/tools/def_file_filter/def_file_filter.py.tpl
-tensorflow/third_party/mkl/MKL_LICENSE
-tensorflow/third_party/mkl/LICENSE
-tensorflow/third_party/mkl/BUILD
-tensorflow/third_party/mkl/mkl.BUILD
-tensorflow/third_party/mkl/build_defs.bzl
-tensorflow/third_party/backports_weakref.BUILD
-tensorflow/third_party/toolchains/clang6/BUILD
-tensorflow/third_party/toolchains/clang6/README.md
+tensorflow/api_template.__init__.py
+tensorflow/stream_executor/BUILD
+tensorflow/third_party/py/BUILD
+tensorflow/third_party/py/remote.BUILD.tpl
+tensorflow/third_party/py/python_configure.bzl
+tensorflow/third_party/py/BUILD.tpl
+tensorflow/third_party/py/numpy/BUILD
+tensorflow/third_party/snappy.BUILD
+tensorflow/third_party/protobuf/BUILD
+tensorflow/third_party/termcolor.BUILD
+tensorflow/third_party/systemlibs/snappy.BUILD
+tensorflow/third_party/systemlibs/protobuf.BUILD
+tensorflow/third_party/systemlibs/termcolor.BUILD
+tensorflow/third_party/systemlibs/absl_py.absl.flags.BUILD
+tensorflow/third_party/systemlibs/curl.BUILD
+tensorflow/third_party/systemlibs/astor.BUILD
+tensorflow/third_party/systemlibs/jsoncpp.BUILD
+tensorflow/third_party/systemlibs/png.BUILD
+tensorflow/third_party/systemlibs/boringssl.BUILD
+tensorflow/third_party/systemlibs/swig.BUILD
+tensorflow/third_party/systemlibs/nsync.BUILD
+tensorflow/third_party/systemlibs/BUILD
+tensorflow/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
+tensorflow/third_party/systemlibs/pcre.BUILD
+tensorflow/third_party/systemlibs/syslibs_configure.bzl
+tensorflow/third_party/systemlibs/absl_py.BUILD
+tensorflow/third_party/systemlibs/lmdb.BUILD
+tensorflow/third_party/systemlibs/six.BUILD
+tensorflow/third_party/systemlibs/google_cloud_cpp.BUILD
+tensorflow/third_party/systemlibs/gast.BUILD
+tensorflow/third_party/systemlibs/absl_py.absl.testing.BUILD
+tensorflow/third_party/systemlibs/BUILD.tpl
+tensorflow/third_party/systemlibs/re2.BUILD
+tensorflow/third_party/systemlibs/grpc.BUILD
+tensorflow/third_party/systemlibs/build_defs.bzl.tpl
+tensorflow/third_party/systemlibs/sqlite.BUILD
+tensorflow/third_party/systemlibs/gif.BUILD
+tensorflow/third_party/systemlibs/protobuf.bzl
+tensorflow/third_party/systemlibs/cython.BUILD
+tensorflow/third_party/systemlibs/googleapis.BUILD
+tensorflow/third_party/systemlibs/double_conversion.BUILD
+tensorflow/third_party/systemlibs/zlib.BUILD
+tensorflow/third_party/eigen3/LICENSE
+tensorflow/third_party/eigen3/BUILD
+tensorflow/third_party/eigen3/Eigen/QR
+tensorflow/third_party/eigen3/Eigen/Core
+tensorflow/third_party/eigen3/Eigen/LU
+tensorflow/third_party/eigen3/Eigen/Eigenvalues
+tensorflow/third_party/eigen3/Eigen/SVD
+tensorflow/third_party/eigen3/Eigen/Cholesky
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/ThreadPool
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
+tensorflow/third_party/eigen3/unsupported/Eigen/MatrixFunctions
+tensorflow/third_party/eigen3/unsupported/Eigen/SpecialFunctions
+tensorflow/third_party/curl.BUILD
+tensorflow/third_party/tflite_smartreply.BUILD
+tensorflow/third_party/astor.BUILD
+tensorflow/third_party/boringssl/BUILD
+tensorflow/third_party/repo.bzl
+tensorflow/third_party/grpc/BUILD
+tensorflow/third_party/llvm/BUILD
+tensorflow/third_party/llvm/llvm.autogenerated.BUILD
+tensorflow/third_party/llvm/expand_cmake_vars.py
+tensorflow/third_party/llvm/llvm.bzl
+tensorflow/third_party/jsoncpp.BUILD
+tensorflow/third_party/png.BUILD
+tensorflow/third_party/icu/udata.patch
+tensorflow/third_party/swig.BUILD
+tensorflow/third_party/fft2d/LICENSE
+tensorflow/third_party/fft2d/fft.h
+tensorflow/third_party/fft2d/BUILD
+tensorflow/third_party/fft2d/fft2d.BUILD
+tensorflow/third_party/BUILD
+tensorflow/third_party/android/android.bzl.tpl
+tensorflow/third_party/android/BUILD
+tensorflow/third_party/android/android_configure.bzl
+tensorflow/third_party/android/android_configure.BUILD.tpl
+tensorflow/third_party/pcre.BUILD
+tensorflow/third_party/tflite_mobilenet_quant.BUILD
+tensorflow/third_party/python_runtime/BUILD
+tensorflow/third_party/mpi/.gitignore
+tensorflow/third_party/mpi/BUILD
+tensorflow/third_party/lmdb.BUILD
+tensorflow/third_party/six.BUILD
+tensorflow/third_party/tflite_mobilenet.BUILD
+tensorflow/third_party/sycl/crosstool/BUILD
+tensorflow/third_party/eigen.BUILD
+tensorflow/third_party/toolchains/cpus/py/BUILD
+tensorflow/third_party/toolchains/cpus/py3/BUILD
+tensorflow/third_party/toolchains/cpus/arm/CROSSTOOL.tpl
+tensorflow/third_party/toolchains/cpus/arm/BUILD
+tensorflow/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
+tensorflow/third_party/toolchains/BUILD
+tensorflow/third_party/toolchains/clang6/clang.BUILD
 tensorflow/third_party/toolchains/clang6/repo.bzl
+tensorflow/third_party/toolchains/clang6/README.md
 tensorflow/third_party/toolchains/clang6/CROSSTOOL.tpl
-tensorflow/third_party/toolchains/clang6/clang.BUILD
+tensorflow/third_party/toolchains/clang6/BUILD
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda10.0/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/BUILD
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/build_defs.bzl
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/nccl2/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/py3/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda9.0/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/build_defs.bzl
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/BUILD
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda10.0/BUILD
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/build_defs.bzl
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/nccl2/BUILD
-tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/build_defs.bzl
 tensorflow/third_party/toolchains/preconfig/generate/containers.bzl
+tensorflow/third_party/toolchains/preconfig/generate/BUILD
 tensorflow/third_party/toolchains/preconfig/generate/generate.bzl
 tensorflow/third_party/toolchains/preconfig/generate/archives.bzl
-tensorflow/third_party/toolchains/preconfig/generate/BUILD
+tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
+tensorflow/third_party/toolchains/preconfig/win_1803/BUILD
+tensorflow/third_party/toolchains/preconfig/win_1803/py36/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/dummy_toolchain.bzl
-tensorflow/third_party/toolchains/preconfig/win_1803/py36/BUILD
-tensorflow/third_party/toolchains/preconfig/win_1803/BUILD
-tensorflow/third_party/toolchains/gpus/cuda/build_defs.bzl
+tensorflow/third_party/toolchains/gpus/py/BUILD
 tensorflow/third_party/toolchains/gpus/cuda/BUILD
+tensorflow/third_party/toolchains/gpus/cuda/build_defs.bzl
 tensorflow/third_party/toolchains/gpus/cuda/cuda/cuda_config.h
 tensorflow/third_party/toolchains/gpus/crosstool/BUILD
 tensorflow/third_party/toolchains/gpus/crosstool/CROSSTOOL
-tensorflow/third_party/toolchains/gpus/py/BUILD
-tensorflow/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
-tensorflow/third_party/toolchains/cpus/arm/CROSSTOOL.tpl
-tensorflow/third_party/toolchains/cpus/arm/BUILD
-tensorflow/third_party/toolchains/cpus/py3/BUILD
-tensorflow/third_party/toolchains/cpus/py/BUILD
-tensorflow/third_party/toolchains/BUILD
-tensorflow/third_party/nccl/remote.BUILD.tpl
-tensorflow/third_party/nccl/archive.BUILD
-tensorflow/third_party/nccl/LICENSE
-tensorflow/third_party/nccl/system.BUILD.tpl
-tensorflow/third_party/nccl/nccl_configure.bzl
-tensorflow/third_party/nccl/build_defs.bzl.tpl
-tensorflow/third_party/nccl/BUILD
+tensorflow/third_party/tflite_ovic_testdata.BUILD
+tensorflow/third_party/libxsmm.BUILD
+tensorflow/third_party/ngraph/LICENSE
+tensorflow/third_party/ngraph/BUILD
+tensorflow/third_party/ngraph/ngraph_tf.BUILD
+tensorflow/third_party/ngraph/build_defs.bzl
+tensorflow/third_party/ngraph/tbb.BUILD
+tensorflow/third_party/ngraph/ngraph.BUILD
+tensorflow/third_party/ngraph/NGRAPH_LICENSE
+tensorflow/third_party/ngraph/nlohmann_json.BUILD
+tensorflow/third_party/git/git_configure.bzl
+tensorflow/third_party/git/BUILD
+tensorflow/third_party/git/BUILD.tpl
+tensorflow/third_party/mkl/LICENSE
+tensorflow/third_party/mkl/BUILD
+tensorflow/third_party/mkl/build_defs.bzl
+tensorflow/third_party/mkl/mkl.BUILD
+tensorflow/third_party/mkl/MKL_LICENSE
+tensorflow/third_party/gast.BUILD
+tensorflow/third_party/nanopb.BUILD
+tensorflow/third_party/farmhash.BUILD
 tensorflow/third_party/gpus/BUILD
-tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
-tensorflow/third_party/gpus/crosstool/CROSSTOOL.tpl
-tensorflow/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
-tensorflow/third_party/gpus/crosstool/LICENSE
-tensorflow/third_party/gpus/crosstool/remote.BUILD.tpl
-tensorflow/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
-tensorflow/third_party/gpus/crosstool/BUILD.tpl
-tensorflow/third_party/gpus/crosstool/BUILD
+tensorflow/third_party/gpus/cuda_configure.bzl
+tensorflow/third_party/gpus/rocm_configure.bzl
 tensorflow/third_party/gpus/cuda/LICENSE
-tensorflow/third_party/gpus/cuda/BUILD.tpl
 tensorflow/third_party/gpus/cuda/BUILD.windows.tpl
-tensorflow/third_party/gpus/cuda/cuda_config.h.tpl
-tensorflow/third_party/gpus/cuda/remote.BUILD.tpl
 tensorflow/third_party/gpus/cuda/BUILD
+tensorflow/third_party/gpus/cuda/remote.BUILD.tpl
+tensorflow/third_party/gpus/cuda/cuda_config.h.tpl
+tensorflow/third_party/gpus/cuda/BUILD.tpl
 tensorflow/third_party/gpus/cuda/build_defs.bzl.tpl
-tensorflow/third_party/gpus/rocm/rocm_config.h.tpl
 tensorflow/third_party/gpus/rocm/BUILD
 tensorflow/third_party/gpus/rocm/BUILD.tpl
 tensorflow/third_party/gpus/rocm/build_defs.bzl.tpl
-tensorflow/third_party/gpus/cuda_configure.bzl
-tensorflow/third_party/gpus/rocm_configure.bzl
-tensorflow/third_party/snappy.BUILD
-tensorflow/third_party/cython.BUILD
-tensorflow/third_party/farmhash.BUILD
-tensorflow/third_party/eigen3/Eigen/Cholesky
-tensorflow/third_party/eigen3/Eigen/QR
-tensorflow/third_party/eigen3/Eigen/LU
-tensorflow/third_party/eigen3/Eigen/Core
-tensorflow/third_party/eigen3/Eigen/SVD
-tensorflow/third_party/eigen3/Eigen/Eigenvalues
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/ThreadPool
-tensorflow/third_party/eigen3/unsupported/Eigen/SpecialFunctions
-tensorflow/third_party/eigen3/unsupported/Eigen/MatrixFunctions
-tensorflow/third_party/eigen3/LICENSE
-tensorflow/third_party/eigen3/BUILD
-tensorflow/third_party/systemlibs/build_defs.bzl.tpl
-tensorflow/third_party/systemlibs/absl_py.BUILD
-tensorflow/third_party/systemlibs/curl.BUILD
-tensorflow/third_party/systemlibs/termcolor.BUILD
-tensorflow/third_party/systemlibs/absl_py.absl.flags.BUILD
-tensorflow/third_party/systemlibs/grpc.BUILD
-tensorflow/third_party/systemlibs/swig.BUILD
-tensorflow/third_party/systemlibs/protobuf.bzl
-tensorflow/third_party/systemlibs/protobuf.BUILD
-tensorflow/third_party/systemlibs/BUILD
-tensorflow/third_party/systemlibs/google_cloud_cpp.BUILD
-tensorflow/third_party/systemlibs/astor.BUILD
-tensorflow/third_party/systemlibs/six.BUILD
-tensorflow/third_party/systemlibs/absl_py.absl.testing.BUILD
-tensorflow/third_party/systemlibs/boringssl.BUILD
-tensorflow/third_party/systemlibs/nsync.BUILD
-tensorflow/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
-tensorflow/third_party/systemlibs/gif.BUILD
-tensorflow/third_party/systemlibs/pcre.BUILD
-tensorflow/third_party/systemlibs/BUILD.tpl
-tensorflow/third_party/systemlibs/snappy.BUILD
-tensorflow/third_party/systemlibs/gast.BUILD
-tensorflow/third_party/systemlibs/cython.BUILD
-tensorflow/third_party/systemlibs/double_conversion.BUILD
-tensorflow/third_party/systemlibs/zlib.BUILD
-tensorflow/third_party/systemlibs/jsoncpp.BUILD
-tensorflow/third_party/systemlibs/re2.BUILD
-tensorflow/third_party/systemlibs/lmdb.BUILD
-tensorflow/third_party/systemlibs/googleapis.BUILD
-tensorflow/third_party/systemlibs/png.BUILD
-tensorflow/third_party/systemlibs/syslibs_configure.bzl
-tensorflow/third_party/systemlibs/sqlite.BUILD
-tensorflow/third_party/python_runtime/BUILD
-tensorflow/third_party/sycl/crosstool/BUILD
-tensorflow/third_party/ngraph/LICENSE
-tensorflow/third_party/ngraph/tbb.BUILD
-tensorflow/third_party/ngraph/BUILD
-tensorflow/third_party/ngraph/ngraph.BUILD
-tensorflow/third_party/ngraph/build_defs.bzl
-tensorflow/third_party/ngraph/NGRAPH_LICENSE
-tensorflow/third_party/ngraph/ngraph_tf.BUILD
-tensorflow/third_party/ngraph/nlohmann_json.BUILD
-tensorflow/third_party/clang_toolchain/download_clang.bzl
-tensorflow/third_party/clang_toolchain/BUILD
-tensorflow/third_party/clang_toolchain/cc_configure_clang.bzl
-tensorflow/third_party/gast.BUILD
-tensorflow/third_party/llvm/BUILD
-tensorflow/third_party/llvm/expand_cmake_vars.py
-tensorflow/third_party/llvm/llvm.autogenerated.BUILD
-tensorflow/third_party/llvm/llvm.bzl
-tensorflow/third_party/icu/udata.patch
-tensorflow/third_party/fft2d/BUILD
-tensorflow/third_party/fft2d/fft.h
-tensorflow/third_party/fft2d/LICENSE
-tensorflow/third_party/fft2d/fft2d.BUILD
-tensorflow/third_party/boringssl/BUILD
-tensorflow/third_party/mpi/.gitignore
-tensorflow/third_party/mpi/BUILD
+tensorflow/third_party/gpus/rocm/rocm_config.h.tpl
+tensorflow/third_party/gpus/crosstool/LICENSE
+tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
+tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
+tensorflow/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
+tensorflow/third_party/gpus/crosstool/CROSSTOOL.tpl
+tensorflow/third_party/gpus/crosstool/BUILD
+tensorflow/third_party/gpus/crosstool/remote.BUILD.tpl
+tensorflow/third_party/gpus/crosstool/BUILD.tpl
+tensorflow/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
+tensorflow/third_party/sqlite.BUILD
+tensorflow/third_party/arm_neon_2_x86_sse.BUILD
 tensorflow/third_party/tensorrt/LICENSE
-tensorflow/third_party/tensorrt/BUILD
-tensorflow/third_party/tensorrt/build_defs.bzl.tpl
-tensorflow/third_party/tensorrt/BUILD.tpl
 tensorflow/third_party/tensorrt/tensorrt_configure.bzl
+tensorflow/third_party/tensorrt/BUILD
 tensorflow/third_party/tensorrt/remote.BUILD.tpl
+tensorflow/third_party/tensorrt/BUILD.tpl
+tensorflow/third_party/tensorrt/build_defs.bzl.tpl
+tensorflow/third_party/gif.BUILD
+tensorflow/third_party/png_fix_rpi.patch
+tensorflow/third_party/pprof.BUILD
+tensorflow/third_party/backports_weakref.BUILD
+tensorflow/third_party/mpi_collectives/BUILD
+tensorflow/third_party/cub.BUILD
+tensorflow/third_party/cython.BUILD
+tensorflow/third_party/googleapis.BUILD
 tensorflow/third_party/kafka/config.patch
 tensorflow/third_party/kafka/BUILD
-tensorflow/third_party/android/BUILD
-tensorflow/third_party/android/android.bzl.tpl
-tensorflow/third_party/android/android_configure.bzl
-tensorflow/third_party/android/android_configure.BUILD.tpl
-tensorflow/third_party/tflite_smartreply.BUILD
+tensorflow/third_party/nccl/system.BUILD.tpl
+tensorflow/third_party/nccl/archive.BUILD
+tensorflow/third_party/nccl/LICENSE
+tensorflow/third_party/nccl/BUILD
+tensorflow/third_party/nccl/nccl_configure.bzl
+tensorflow/third_party/nccl/remote.BUILD.tpl
+tensorflow/third_party/nccl/build_defs.bzl.tpl
+tensorflow/third_party/common.bzl
+tensorflow/third_party/linenoise.BUILD
 tensorflow/third_party/mkl_dnn/LICENSE
 tensorflow/third_party/mkl_dnn/mkldnn.BUILD
-tensorflow/third_party/pcre.BUILD
-tensorflow/third_party/linenoise.BUILD
-tensorflow/third_party/sqlite.BUILD
-tensorflow/third_party/common.bzl
-tensorflow/third_party/com_google_absl.BUILD
-tensorflow/third_party/pprof.BUILD
-tensorflow/third_party/BUILD
-tensorflow/third_party/tflite_mobilenet_quant.BUILD
-tensorflow/third_party/lmdb.BUILD
-tensorflow/third_party/git/BUILD.tpl
-tensorflow/third_party/git/BUILD
-tensorflow/third_party/git/git_configure.bzl
-tensorflow/third_party/protobuf/BUILD
-tensorflow/third_party/tflite_mobilenet.BUILD
-tensorflow/third_party/py/BUILD
-tensorflow/third_party/py/BUILD.tpl
-tensorflow/third_party/py/remote.BUILD.tpl
-tensorflow/third_party/py/numpy/BUILD
-tensorflow/third_party/py/python_configure.bzl
-tensorflow/third_party/termcolor.BUILD
-tensorflow/third_party/png_fix_rpi.patch
-tensorflow/third_party/swig.BUILD
-tensorflow/third_party/astor.BUILD
-tensorflow/third_party/grpc/BUILD
-tensorflow/third_party/curl.BUILD
-tensorflow/third_party/arm_neon_2_x86_sse.BUILD
-tensorflow/third_party/png.BUILD
-tensorflow/third_party/googleapis.BUILD
-tensorflow/third_party/mpi_collectives/BUILD
-tensorflow/third_party/nanopb.BUILD
-tensorflow/third_party/gif.BUILD
-tensorflow/third_party/double_conversion.BUILD
-tensorflow/third_party/six.BUILD
-tensorflow/third_party/tflite_mobilenet_float.BUILD
-tensorflow/third_party/repo.bzl
 tensorflow/third_party/codegen.BUILD
-tensorflow/third_party/cub.BUILD
-tensorflow/third_party/jsoncpp.BUILD
-tensorflow/third_party/tflite_ovic_testdata.BUILD
-tensorflow/third_party/libxsmm.BUILD
+tensorflow/third_party/double_conversion.BUILD
 tensorflow/third_party/zlib.BUILD
-tensorflow/third_party/eigen.BUILD
-tensorflow/stream_executor/BUILD
+tensorflow/third_party/clang_toolchain/BUILD
+tensorflow/third_party/clang_toolchain/cc_configure_clang.bzl
+tensorflow/third_party/clang_toolchain/download_clang.bzl
+tensorflow/third_party/tflite_mobilenet_float.BUILD
+tensorflow/third_party/com_google_absl.BUILD
+tensorflow/tools/ci_build/remote/BUILD
+tensorflow/tools/pip_package/build_pip_package.sh
+tensorflow/tools/pip_package/setup.py
+tensorflow/tools/pip_package/BUILD
+tensorflow/tools/pip_package/simple_console.py
+tensorflow/tools/pip_package/README
+tensorflow/tools/pip_package/MANIFEST.in
+tensorflow/tools/pip_package/simple_console_for_windows.py
+tensorflow/tools/pip_package/check_load_py_test.py
+tensorflow/tools/pip_package/pip_smoke_test.py
+tensorflow/tools/lib_package/libtensorflow_java_test.sh
+tensorflow/tools/lib_package/README.md
+tensorflow/tools/lib_package/BUILD
+tensorflow/tools/lib_package/libtensorflow_test.sh
+tensorflow/tools/lib_package/libtensorflow_test.c
+tensorflow/tools/lib_package/LibTensorFlowTest.java
+tensorflow/tools/lib_package/concat_licenses.sh
+tensorflow/tools/def_file_filter/def_file_filter.py.tpl
+tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
+tensorflow/tools/def_file_filter/BUILD
+tensorflow/tools/def_file_filter/BUILD.tpl
 tensorflow/api_template_v1.__init__.py
 tensorflow/compat_template_v1.__init__.py
-tensorflow/api_template.__init__.py
+tensorflow/contrib/mpi/BUILD
+tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
+tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/__init__.py
+tensorflow/contrib/tpu/profiler/pip_package/build_pip_package.sh
+tensorflow/contrib/tpu/profiler/pip_package/setup.py
+tensorflow/contrib/tpu/profiler/pip_package/BUILD
+tensorflow/contrib/tpu/profiler/pip_package/README
 tensorflow/__init__.py
\ No newline at end of file
diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py
index 893309f35a..06a6e7dc08 100644
--- a/tensorflow/python/tools/freeze_graph.py
+++ b/tensorflow/python/tools/freeze_graph.py
@@ -246,7 +246,7 @@ def _parse_input_graph_proto(input_graph, input_binary):
     return -1
   input_graph_def = graph_pb2.GraphDef()
   mode = "rb" if input_binary else "r"
-  with gfile.FastGFile(input_graph, mode) as f:
+  with gfile.GFile(input_graph, mode) as f:
     if input_binary:
       input_graph_def.ParseFromString(f.read())
     else:
@@ -261,7 +261,7 @@ def _parse_input_meta_graph_proto(input_graph, input_binary):
     return -1
   input_meta_graph_def = MetaGraphDef()
   mode = "rb" if input_binary else "r"
-  with gfile.FastGFile(input_graph, mode) as f:
+  with gfile.GFile(input_graph, mode) as f:
     if input_binary:
       input_meta_graph_def.ParseFromString(f.read())
     else:
@@ -276,7 +276,7 @@ def _parse_input_saver_proto(input_saver, input_binary):
     print("Input saver file '" + input_saver + "' does not exist!")
     return -1
   mode = "rb" if input_binary else "r"
-  with gfile.FastGFile(input_saver, mode) as f:
+  with gfile.GFile(input_saver, mode) as f:
     saver_def = saver_pb2.SaverDef()
     if input_binary:
       saver_def.ParseFromString(f.read())
diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py
index 6d2fec3ad6..edfdb77b90 100644
--- a/tensorflow/python/tools/import_pb_to_tensorboard.py
+++ b/tensorflow/python/tools/import_pb_to_tensorboard.py
@@ -53,7 +53,7 @@ def import_to_tensorboard(model_dir, log_dir):
     View your imported `.pb` model as a graph.
   """
   with session.Session(graph=ops.Graph()) as sess:
-    with gfile.FastGFile(model_dir, "rb") as f:
+    with gfile.GFile(model_dir, "rb") as f:
       graph_def = graph_pb2.GraphDef()
       graph_def.ParseFromString(f.read())
       importer.import_graph_def(graph_def)
diff --git a/tensorflow/python/tools/optimize_for_inference.py b/tensorflow/python/tools/optimize_for_inference.py
index fbf8c2d709..693e34348b 100644
--- a/tensorflow/python/tools/optimize_for_inference.py
+++ b/tensorflow/python/tools/optimize_for_inference.py
@@ -92,7 +92,7 @@ def main(unused_args):
       FLAGS.toco_compatible)
 
   if FLAGS.frozen_graph:
-    f = gfile.FastGFile(FLAGS.output, "w")
+    f = gfile.GFile(FLAGS.output, "w")
     f.write(output_graph_def.SerializeToString())
   else:
     graph_io.write_graph(output_graph_def,
diff --git a/tensorflow/python/tools/strip_unused_lib.py b/tensorflow/python/tools/strip_unused_lib.py
index b1d1956076..decd7e2fc8 100644
--- a/tensorflow/python/tools/strip_unused_lib.py
+++ b/tensorflow/python/tools/strip_unused_lib.py
@@ -102,7 +102,7 @@ def strip_unused_from_files(input_graph, input_binary, output_graph,
 
   input_graph_def = graph_pb2.GraphDef()
   mode = "rb" if input_binary else "r"
-  with gfile.FastGFile(input_graph, mode) as f:
+  with gfile.GFile(input_graph, mode) as f:
     if input_binary:
       input_graph_def.ParseFromString(f.read())
     else:
-- 
GitLab


From 0a2ead8abb5c80a76747e5063761cbd9f21217fd Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 01:06:29 -0800
Subject: [PATCH 813/873] compat: Update forward compatibility horizon to
 2018-12-19

PiperOrigin-RevId: 226131919
---
 tensorflow/opensource_only.files   | 418 ++++++++++++++---------------
 tensorflow/python/compat/compat.py |   2 +-
 2 files changed, 210 insertions(+), 210 deletions(-)

diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files
index 8b97add97d..88800c2951 100644
--- a/tensorflow/opensource_only.files
+++ b/tensorflow/opensource_only.files
@@ -1,246 +1,246 @@
-tensorflow/api_template.__init__.py
-tensorflow/stream_executor/BUILD
-tensorflow/third_party/py/BUILD
-tensorflow/third_party/py/remote.BUILD.tpl
-tensorflow/third_party/py/python_configure.bzl
-tensorflow/third_party/py/BUILD.tpl
-tensorflow/third_party/py/numpy/BUILD
-tensorflow/third_party/snappy.BUILD
-tensorflow/third_party/protobuf/BUILD
-tensorflow/third_party/termcolor.BUILD
-tensorflow/third_party/systemlibs/snappy.BUILD
-tensorflow/third_party/systemlibs/protobuf.BUILD
-tensorflow/third_party/systemlibs/termcolor.BUILD
-tensorflow/third_party/systemlibs/absl_py.absl.flags.BUILD
-tensorflow/third_party/systemlibs/curl.BUILD
-tensorflow/third_party/systemlibs/astor.BUILD
-tensorflow/third_party/systemlibs/jsoncpp.BUILD
-tensorflow/third_party/systemlibs/png.BUILD
-tensorflow/third_party/systemlibs/boringssl.BUILD
-tensorflow/third_party/systemlibs/swig.BUILD
-tensorflow/third_party/systemlibs/nsync.BUILD
-tensorflow/third_party/systemlibs/BUILD
-tensorflow/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
-tensorflow/third_party/systemlibs/pcre.BUILD
-tensorflow/third_party/systemlibs/syslibs_configure.bzl
-tensorflow/third_party/systemlibs/absl_py.BUILD
-tensorflow/third_party/systemlibs/lmdb.BUILD
-tensorflow/third_party/systemlibs/six.BUILD
-tensorflow/third_party/systemlibs/google_cloud_cpp.BUILD
-tensorflow/third_party/systemlibs/gast.BUILD
-tensorflow/third_party/systemlibs/absl_py.absl.testing.BUILD
-tensorflow/third_party/systemlibs/BUILD.tpl
-tensorflow/third_party/systemlibs/re2.BUILD
-tensorflow/third_party/systemlibs/grpc.BUILD
-tensorflow/third_party/systemlibs/build_defs.bzl.tpl
-tensorflow/third_party/systemlibs/sqlite.BUILD
-tensorflow/third_party/systemlibs/gif.BUILD
-tensorflow/third_party/systemlibs/protobuf.bzl
-tensorflow/third_party/systemlibs/cython.BUILD
-tensorflow/third_party/systemlibs/googleapis.BUILD
-tensorflow/third_party/systemlibs/double_conversion.BUILD
-tensorflow/third_party/systemlibs/zlib.BUILD
-tensorflow/third_party/eigen3/LICENSE
-tensorflow/third_party/eigen3/BUILD
-tensorflow/third_party/eigen3/Eigen/QR
-tensorflow/third_party/eigen3/Eigen/Core
-tensorflow/third_party/eigen3/Eigen/LU
-tensorflow/third_party/eigen3/Eigen/Eigenvalues
-tensorflow/third_party/eigen3/Eigen/SVD
-tensorflow/third_party/eigen3/Eigen/Cholesky
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/ThreadPool
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
-tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
-tensorflow/third_party/eigen3/unsupported/Eigen/MatrixFunctions
-tensorflow/third_party/eigen3/unsupported/Eigen/SpecialFunctions
-tensorflow/third_party/curl.BUILD
-tensorflow/third_party/tflite_smartreply.BUILD
-tensorflow/third_party/astor.BUILD
-tensorflow/third_party/boringssl/BUILD
-tensorflow/third_party/repo.bzl
-tensorflow/third_party/grpc/BUILD
-tensorflow/third_party/llvm/BUILD
-tensorflow/third_party/llvm/llvm.autogenerated.BUILD
-tensorflow/third_party/llvm/expand_cmake_vars.py
-tensorflow/third_party/llvm/llvm.bzl
-tensorflow/third_party/jsoncpp.BUILD
-tensorflow/third_party/png.BUILD
-tensorflow/third_party/icu/udata.patch
-tensorflow/third_party/swig.BUILD
-tensorflow/third_party/fft2d/LICENSE
-tensorflow/third_party/fft2d/fft.h
-tensorflow/third_party/fft2d/BUILD
-tensorflow/third_party/fft2d/fft2d.BUILD
-tensorflow/third_party/BUILD
-tensorflow/third_party/android/android.bzl.tpl
-tensorflow/third_party/android/BUILD
-tensorflow/third_party/android/android_configure.bzl
-tensorflow/third_party/android/android_configure.BUILD.tpl
-tensorflow/third_party/pcre.BUILD
-tensorflow/third_party/tflite_mobilenet_quant.BUILD
-tensorflow/third_party/python_runtime/BUILD
-tensorflow/third_party/mpi/.gitignore
-tensorflow/third_party/mpi/BUILD
-tensorflow/third_party/lmdb.BUILD
-tensorflow/third_party/six.BUILD
-tensorflow/third_party/tflite_mobilenet.BUILD
-tensorflow/third_party/sycl/crosstool/BUILD
-tensorflow/third_party/eigen.BUILD
-tensorflow/third_party/toolchains/cpus/py/BUILD
-tensorflow/third_party/toolchains/cpus/py3/BUILD
-tensorflow/third_party/toolchains/cpus/arm/CROSSTOOL.tpl
-tensorflow/third_party/toolchains/cpus/arm/BUILD
-tensorflow/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
-tensorflow/third_party/toolchains/BUILD
-tensorflow/third_party/toolchains/clang6/clang.BUILD
-tensorflow/third_party/toolchains/clang6/repo.bzl
+tensorflow/contrib/tpu/profiler/pip_package/BUILD
+tensorflow/contrib/tpu/profiler/pip_package/setup.py
+tensorflow/contrib/tpu/profiler/pip_package/README
+tensorflow/contrib/tpu/profiler/pip_package/build_pip_package.sh
+tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
+tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/__init__.py
+tensorflow/contrib/mpi/BUILD
+tensorflow/tools/ci_build/remote/BUILD
+tensorflow/tools/pip_package/README
+tensorflow/tools/pip_package/MANIFEST.in
+tensorflow/tools/pip_package/simple_console.py
+tensorflow/tools/pip_package/build_pip_package.sh
+tensorflow/tools/pip_package/check_load_py_test.py
+tensorflow/tools/pip_package/pip_smoke_test.py
+tensorflow/tools/pip_package/simple_console_for_windows.py
+tensorflow/tools/pip_package/setup.py
+tensorflow/tools/pip_package/BUILD
+tensorflow/tools/lib_package/concat_licenses.sh
+tensorflow/tools/lib_package/libtensorflow_test.c
+tensorflow/tools/lib_package/LibTensorFlowTest.java
+tensorflow/tools/lib_package/BUILD
+tensorflow/tools/lib_package/libtensorflow_test.sh
+tensorflow/tools/lib_package/README.md
+tensorflow/tools/lib_package/libtensorflow_java_test.sh
+tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
+tensorflow/tools/def_file_filter/BUILD
+tensorflow/tools/def_file_filter/BUILD.tpl
+tensorflow/tools/def_file_filter/def_file_filter.py.tpl
+tensorflow/third_party/mkl/MKL_LICENSE
+tensorflow/third_party/mkl/LICENSE
+tensorflow/third_party/mkl/BUILD
+tensorflow/third_party/mkl/mkl.BUILD
+tensorflow/third_party/mkl/build_defs.bzl
+tensorflow/third_party/backports_weakref.BUILD
+tensorflow/third_party/toolchains/clang6/BUILD
 tensorflow/third_party/toolchains/clang6/README.md
+tensorflow/third_party/toolchains/clang6/repo.bzl
 tensorflow/third_party/toolchains/clang6/CROSSTOOL.tpl
-tensorflow/third_party/toolchains/clang6/BUILD
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda10.0/BUILD
+tensorflow/third_party/toolchains/clang6/clang.BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/tensorrt5/build_defs.bzl
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/BUILD
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/build_defs.bzl
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/nccl2/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/py3/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda9.0/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/BUILD
-tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/build_defs.bzl
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda10.0-cudnn7/cuda/BUILD
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc-cuda10.0/BUILD
 tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/build_defs.bzl
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/cuda9.0-cudnn7/cuda/BUILD
+tensorflow/third_party/toolchains/preconfig/ubuntu14.04/nccl2/BUILD
+tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
 tensorflow/third_party/toolchains/preconfig/generate/containers.bzl
-tensorflow/third_party/toolchains/preconfig/generate/BUILD
 tensorflow/third_party/toolchains/preconfig/generate/generate.bzl
 tensorflow/third_party/toolchains/preconfig/generate/archives.bzl
-tensorflow/third_party/toolchains/preconfig/generate/workspace.bzl
-tensorflow/third_party/toolchains/preconfig/win_1803/BUILD
-tensorflow/third_party/toolchains/preconfig/win_1803/py36/BUILD
+tensorflow/third_party/toolchains/preconfig/generate/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/BUILD
 tensorflow/third_party/toolchains/preconfig/win_1803/bazel_018/dummy_toolchain.bzl
-tensorflow/third_party/toolchains/gpus/py/BUILD
-tensorflow/third_party/toolchains/gpus/cuda/BUILD
+tensorflow/third_party/toolchains/preconfig/win_1803/py36/BUILD
+tensorflow/third_party/toolchains/preconfig/win_1803/BUILD
 tensorflow/third_party/toolchains/gpus/cuda/build_defs.bzl
+tensorflow/third_party/toolchains/gpus/cuda/BUILD
 tensorflow/third_party/toolchains/gpus/cuda/cuda/cuda_config.h
 tensorflow/third_party/toolchains/gpus/crosstool/BUILD
 tensorflow/third_party/toolchains/gpus/crosstool/CROSSTOOL
-tensorflow/third_party/tflite_ovic_testdata.BUILD
-tensorflow/third_party/libxsmm.BUILD
-tensorflow/third_party/ngraph/LICENSE
-tensorflow/third_party/ngraph/BUILD
-tensorflow/third_party/ngraph/ngraph_tf.BUILD
-tensorflow/third_party/ngraph/build_defs.bzl
-tensorflow/third_party/ngraph/tbb.BUILD
-tensorflow/third_party/ngraph/ngraph.BUILD
-tensorflow/third_party/ngraph/NGRAPH_LICENSE
-tensorflow/third_party/ngraph/nlohmann_json.BUILD
-tensorflow/third_party/git/git_configure.bzl
-tensorflow/third_party/git/BUILD
-tensorflow/third_party/git/BUILD.tpl
-tensorflow/third_party/mkl/LICENSE
-tensorflow/third_party/mkl/BUILD
-tensorflow/third_party/mkl/build_defs.bzl
-tensorflow/third_party/mkl/mkl.BUILD
-tensorflow/third_party/mkl/MKL_LICENSE
-tensorflow/third_party/gast.BUILD
-tensorflow/third_party/nanopb.BUILD
-tensorflow/third_party/farmhash.BUILD
+tensorflow/third_party/toolchains/gpus/py/BUILD
+tensorflow/third_party/toolchains/cpus/arm/arm_compiler_configure.bzl
+tensorflow/third_party/toolchains/cpus/arm/CROSSTOOL.tpl
+tensorflow/third_party/toolchains/cpus/arm/BUILD
+tensorflow/third_party/toolchains/cpus/py3/BUILD
+tensorflow/third_party/toolchains/cpus/py/BUILD
+tensorflow/third_party/toolchains/BUILD
+tensorflow/third_party/nccl/remote.BUILD.tpl
+tensorflow/third_party/nccl/archive.BUILD
+tensorflow/third_party/nccl/LICENSE
+tensorflow/third_party/nccl/system.BUILD.tpl
+tensorflow/third_party/nccl/nccl_configure.bzl
+tensorflow/third_party/nccl/build_defs.bzl.tpl
+tensorflow/third_party/nccl/BUILD
 tensorflow/third_party/gpus/BUILD
-tensorflow/third_party/gpus/cuda_configure.bzl
-tensorflow/third_party/gpus/rocm_configure.bzl
+tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
+tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
+tensorflow/third_party/gpus/crosstool/CROSSTOOL.tpl
+tensorflow/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
+tensorflow/third_party/gpus/crosstool/LICENSE
+tensorflow/third_party/gpus/crosstool/remote.BUILD.tpl
+tensorflow/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
+tensorflow/third_party/gpus/crosstool/BUILD.tpl
+tensorflow/third_party/gpus/crosstool/BUILD
 tensorflow/third_party/gpus/cuda/LICENSE
+tensorflow/third_party/gpus/cuda/BUILD.tpl
 tensorflow/third_party/gpus/cuda/BUILD.windows.tpl
-tensorflow/third_party/gpus/cuda/BUILD
-tensorflow/third_party/gpus/cuda/remote.BUILD.tpl
 tensorflow/third_party/gpus/cuda/cuda_config.h.tpl
-tensorflow/third_party/gpus/cuda/BUILD.tpl
+tensorflow/third_party/gpus/cuda/remote.BUILD.tpl
+tensorflow/third_party/gpus/cuda/BUILD
 tensorflow/third_party/gpus/cuda/build_defs.bzl.tpl
+tensorflow/third_party/gpus/rocm/rocm_config.h.tpl
 tensorflow/third_party/gpus/rocm/BUILD
 tensorflow/third_party/gpus/rocm/BUILD.tpl
 tensorflow/third_party/gpus/rocm/build_defs.bzl.tpl
-tensorflow/third_party/gpus/rocm/rocm_config.h.tpl
-tensorflow/third_party/gpus/crosstool/LICENSE
-tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl
-tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl
-tensorflow/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl
-tensorflow/third_party/gpus/crosstool/CROSSTOOL.tpl
-tensorflow/third_party/gpus/crosstool/BUILD
-tensorflow/third_party/gpus/crosstool/remote.BUILD.tpl
-tensorflow/third_party/gpus/crosstool/BUILD.tpl
-tensorflow/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl
-tensorflow/third_party/sqlite.BUILD
-tensorflow/third_party/arm_neon_2_x86_sse.BUILD
+tensorflow/third_party/gpus/cuda_configure.bzl
+tensorflow/third_party/gpus/rocm_configure.bzl
+tensorflow/third_party/snappy.BUILD
+tensorflow/third_party/cython.BUILD
+tensorflow/third_party/farmhash.BUILD
+tensorflow/third_party/eigen3/Eigen/Cholesky
+tensorflow/third_party/eigen3/Eigen/QR
+tensorflow/third_party/eigen3/Eigen/LU
+tensorflow/third_party/eigen3/Eigen/Core
+tensorflow/third_party/eigen3/Eigen/SVD
+tensorflow/third_party/eigen3/Eigen/Eigenvalues
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/FixedPoint
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/Tensor
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
+tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/ThreadPool
+tensorflow/third_party/eigen3/unsupported/Eigen/SpecialFunctions
+tensorflow/third_party/eigen3/unsupported/Eigen/MatrixFunctions
+tensorflow/third_party/eigen3/LICENSE
+tensorflow/third_party/eigen3/BUILD
+tensorflow/third_party/systemlibs/build_defs.bzl.tpl
+tensorflow/third_party/systemlibs/absl_py.BUILD
+tensorflow/third_party/systemlibs/curl.BUILD
+tensorflow/third_party/systemlibs/termcolor.BUILD
+tensorflow/third_party/systemlibs/absl_py.absl.flags.BUILD
+tensorflow/third_party/systemlibs/grpc.BUILD
+tensorflow/third_party/systemlibs/swig.BUILD
+tensorflow/third_party/systemlibs/protobuf.bzl
+tensorflow/third_party/systemlibs/protobuf.BUILD
+tensorflow/third_party/systemlibs/BUILD
+tensorflow/third_party/systemlibs/google_cloud_cpp.BUILD
+tensorflow/third_party/systemlibs/astor.BUILD
+tensorflow/third_party/systemlibs/six.BUILD
+tensorflow/third_party/systemlibs/absl_py.absl.testing.BUILD
+tensorflow/third_party/systemlibs/boringssl.BUILD
+tensorflow/third_party/systemlibs/nsync.BUILD
+tensorflow/third_party/systemlibs/google_cloud_cpp.google.cloud.bigtable.BUILD
+tensorflow/third_party/systemlibs/gif.BUILD
+tensorflow/third_party/systemlibs/pcre.BUILD
+tensorflow/third_party/systemlibs/BUILD.tpl
+tensorflow/third_party/systemlibs/snappy.BUILD
+tensorflow/third_party/systemlibs/gast.BUILD
+tensorflow/third_party/systemlibs/cython.BUILD
+tensorflow/third_party/systemlibs/double_conversion.BUILD
+tensorflow/third_party/systemlibs/zlib.BUILD
+tensorflow/third_party/systemlibs/jsoncpp.BUILD
+tensorflow/third_party/systemlibs/re2.BUILD
+tensorflow/third_party/systemlibs/lmdb.BUILD
+tensorflow/third_party/systemlibs/googleapis.BUILD
+tensorflow/third_party/systemlibs/png.BUILD
+tensorflow/third_party/systemlibs/syslibs_configure.bzl
+tensorflow/third_party/systemlibs/sqlite.BUILD
+tensorflow/third_party/python_runtime/BUILD
+tensorflow/third_party/sycl/crosstool/BUILD
+tensorflow/third_party/ngraph/LICENSE
+tensorflow/third_party/ngraph/tbb.BUILD
+tensorflow/third_party/ngraph/BUILD
+tensorflow/third_party/ngraph/ngraph.BUILD
+tensorflow/third_party/ngraph/build_defs.bzl
+tensorflow/third_party/ngraph/NGRAPH_LICENSE
+tensorflow/third_party/ngraph/ngraph_tf.BUILD
+tensorflow/third_party/ngraph/nlohmann_json.BUILD
+tensorflow/third_party/clang_toolchain/download_clang.bzl
+tensorflow/third_party/clang_toolchain/BUILD
+tensorflow/third_party/clang_toolchain/cc_configure_clang.bzl
+tensorflow/third_party/gast.BUILD
+tensorflow/third_party/llvm/BUILD
+tensorflow/third_party/llvm/expand_cmake_vars.py
+tensorflow/third_party/llvm/llvm.autogenerated.BUILD
+tensorflow/third_party/llvm/llvm.bzl
+tensorflow/third_party/icu/udata.patch
+tensorflow/third_party/fft2d/BUILD
+tensorflow/third_party/fft2d/fft.h
+tensorflow/third_party/fft2d/LICENSE
+tensorflow/third_party/fft2d/fft2d.BUILD
+tensorflow/third_party/boringssl/BUILD
+tensorflow/third_party/mpi/.gitignore
+tensorflow/third_party/mpi/BUILD
 tensorflow/third_party/tensorrt/LICENSE
-tensorflow/third_party/tensorrt/tensorrt_configure.bzl
 tensorflow/third_party/tensorrt/BUILD
-tensorflow/third_party/tensorrt/remote.BUILD.tpl
-tensorflow/third_party/tensorrt/BUILD.tpl
 tensorflow/third_party/tensorrt/build_defs.bzl.tpl
-tensorflow/third_party/gif.BUILD
-tensorflow/third_party/png_fix_rpi.patch
-tensorflow/third_party/pprof.BUILD
-tensorflow/third_party/backports_weakref.BUILD
-tensorflow/third_party/mpi_collectives/BUILD
-tensorflow/third_party/cub.BUILD
-tensorflow/third_party/cython.BUILD
-tensorflow/third_party/googleapis.BUILD
+tensorflow/third_party/tensorrt/BUILD.tpl
+tensorflow/third_party/tensorrt/tensorrt_configure.bzl
+tensorflow/third_party/tensorrt/remote.BUILD.tpl
 tensorflow/third_party/kafka/config.patch
 tensorflow/third_party/kafka/BUILD
-tensorflow/third_party/nccl/system.BUILD.tpl
-tensorflow/third_party/nccl/archive.BUILD
-tensorflow/third_party/nccl/LICENSE
-tensorflow/third_party/nccl/BUILD
-tensorflow/third_party/nccl/nccl_configure.bzl
-tensorflow/third_party/nccl/remote.BUILD.tpl
-tensorflow/third_party/nccl/build_defs.bzl.tpl
-tensorflow/third_party/common.bzl
-tensorflow/third_party/linenoise.BUILD
+tensorflow/third_party/android/BUILD
+tensorflow/third_party/android/android.bzl.tpl
+tensorflow/third_party/android/android_configure.bzl
+tensorflow/third_party/android/android_configure.BUILD.tpl
+tensorflow/third_party/tflite_smartreply.BUILD
 tensorflow/third_party/mkl_dnn/LICENSE
 tensorflow/third_party/mkl_dnn/mkldnn.BUILD
-tensorflow/third_party/codegen.BUILD
+tensorflow/third_party/pcre.BUILD
+tensorflow/third_party/linenoise.BUILD
+tensorflow/third_party/sqlite.BUILD
+tensorflow/third_party/common.bzl
+tensorflow/third_party/com_google_absl.BUILD
+tensorflow/third_party/pprof.BUILD
+tensorflow/third_party/BUILD
+tensorflow/third_party/tflite_mobilenet_quant.BUILD
+tensorflow/third_party/lmdb.BUILD
+tensorflow/third_party/git/BUILD.tpl
+tensorflow/third_party/git/BUILD
+tensorflow/third_party/git/git_configure.bzl
+tensorflow/third_party/protobuf/BUILD
+tensorflow/third_party/tflite_mobilenet.BUILD
+tensorflow/third_party/py/BUILD
+tensorflow/third_party/py/BUILD.tpl
+tensorflow/third_party/py/remote.BUILD.tpl
+tensorflow/third_party/py/numpy/BUILD
+tensorflow/third_party/py/python_configure.bzl
+tensorflow/third_party/termcolor.BUILD
+tensorflow/third_party/png_fix_rpi.patch
+tensorflow/third_party/swig.BUILD
+tensorflow/third_party/astor.BUILD
+tensorflow/third_party/grpc/BUILD
+tensorflow/third_party/curl.BUILD
+tensorflow/third_party/arm_neon_2_x86_sse.BUILD
+tensorflow/third_party/png.BUILD
+tensorflow/third_party/googleapis.BUILD
+tensorflow/third_party/mpi_collectives/BUILD
+tensorflow/third_party/nanopb.BUILD
+tensorflow/third_party/gif.BUILD
 tensorflow/third_party/double_conversion.BUILD
-tensorflow/third_party/zlib.BUILD
-tensorflow/third_party/clang_toolchain/BUILD
-tensorflow/third_party/clang_toolchain/cc_configure_clang.bzl
-tensorflow/third_party/clang_toolchain/download_clang.bzl
+tensorflow/third_party/six.BUILD
 tensorflow/third_party/tflite_mobilenet_float.BUILD
-tensorflow/third_party/com_google_absl.BUILD
-tensorflow/tools/ci_build/remote/BUILD
-tensorflow/tools/pip_package/build_pip_package.sh
-tensorflow/tools/pip_package/setup.py
-tensorflow/tools/pip_package/BUILD
-tensorflow/tools/pip_package/simple_console.py
-tensorflow/tools/pip_package/README
-tensorflow/tools/pip_package/MANIFEST.in
-tensorflow/tools/pip_package/simple_console_for_windows.py
-tensorflow/tools/pip_package/check_load_py_test.py
-tensorflow/tools/pip_package/pip_smoke_test.py
-tensorflow/tools/lib_package/libtensorflow_java_test.sh
-tensorflow/tools/lib_package/README.md
-tensorflow/tools/lib_package/BUILD
-tensorflow/tools/lib_package/libtensorflow_test.sh
-tensorflow/tools/lib_package/libtensorflow_test.c
-tensorflow/tools/lib_package/LibTensorFlowTest.java
-tensorflow/tools/lib_package/concat_licenses.sh
-tensorflow/tools/def_file_filter/def_file_filter.py.tpl
-tensorflow/tools/def_file_filter/def_file_filter_configure.bzl
-tensorflow/tools/def_file_filter/BUILD
-tensorflow/tools/def_file_filter/BUILD.tpl
+tensorflow/third_party/repo.bzl
+tensorflow/third_party/codegen.BUILD
+tensorflow/third_party/cub.BUILD
+tensorflow/third_party/jsoncpp.BUILD
+tensorflow/third_party/tflite_ovic_testdata.BUILD
+tensorflow/third_party/libxsmm.BUILD
+tensorflow/third_party/zlib.BUILD
+tensorflow/third_party/eigen.BUILD
+tensorflow/stream_executor/BUILD
 tensorflow/api_template_v1.__init__.py
 tensorflow/compat_template_v1.__init__.py
-tensorflow/contrib/mpi/BUILD
-tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py
-tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/__init__.py
-tensorflow/contrib/tpu/profiler/pip_package/build_pip_package.sh
-tensorflow/contrib/tpu/profiler/pip_package/setup.py
-tensorflow/contrib/tpu/profiler/pip_package/BUILD
-tensorflow/contrib/tpu/profiler/pip_package/README
+tensorflow/api_template.__init__.py
 tensorflow/__init__.py
\ No newline at end of file
diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 0245fe1c25..db99b2e452 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 18)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 19)
 
 
 @tf_export("compat.forward_compatible")
-- 
GitLab


From ebf048060a7e510df6b80c1451c713a76ffa8edf Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 01:49:06 -0800
Subject: [PATCH 814/873] Prepare FunctionSpec for serialization.

PiperOrigin-RevId: 226137312
---
 tensorflow/python/eager/function.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 7ba9f9290b..f7b83fcb2f 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -772,20 +772,33 @@ def _deterministic_dict_values(dictionary):
 class FunctionSpec(object):
   """Specification of how to bind arguments to a function."""
 
-  def __init__(self, python_function, input_signature):
+  @staticmethod
+  def from_function_and_signature(python_function, input_signature):
+    """Create a FunctionSpec instance given a python function and signature."""
     if isinstance(python_function, functools.partial):
       python_function_to_inspect = python_function.func
-      self._args_to_prepend = python_function.args or tuple()
-      self._kwargs_to_include = python_function.keywords or {}
+      args_to_prepend = python_function.args or tuple()
+      kwargs_to_include = python_function.keywords or {}
     else:
       python_function_to_inspect = python_function
-      self._args_to_prepend = tuple()
-      self._kwargs_to_include = {}
+      args_to_prepend = tuple()
+      kwargs_to_include = {}
 
     fullargspec = tf_inspect.getfullargspec(python_function_to_inspect)
+    is_method = tf_inspect.ismethod(python_function_to_inspect)
+
+    return FunctionSpec(fullargspec, is_method, args_to_prepend,
+                        kwargs_to_include, input_signature)
+
+  def __init__(self, fullargspec, is_method, args_to_prepend, kwargs_to_include,
+               input_signature):
+    self._fullargspec = fullargspec
+    self._is_method = is_method
+    self._args_to_prepend = args_to_prepend
+    self._kwargs_to_include = kwargs_to_include
     self._default_values = fullargspec.defaults
 
-    if tf_inspect.ismethod(python_function_to_inspect):
+    if self._is_method:
       # Remove `self`: default arguments shouldn't be matched to it.
       args = fullargspec.args[1:]
     else:
@@ -949,7 +962,8 @@ class PolymorphicFunction(object):
       self._python_function = python_function.func
     else:
       self._python_function = python_function
-    self._function_spec = FunctionSpec(python_function, input_signature)
+    self._function_spec = FunctionSpec.from_function_and_signature(
+        python_function, input_signature)
     self._name = name
     self._autograph = autograph
     self._function_cache = collections.OrderedDict()
-- 
GitLab


From 6dbe2cd22fa8d6b48ef9bb913c00de74886be07f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 04:09:13 -0800
Subject: [PATCH 815/873] Add proto encoder/decoder for general nested
 structures.

PiperOrigin-RevId: 226152256
---
 tensorflow/python/saved_model/BUILD           |  45 +-
 .../saved_model/nested_structure_coder.py     | 442 ++++++++++++++++++
 .../nested_structure_coder_test.py            | 183 ++++++++
 tensorflow/python/saved_model/struct.proto    |  86 ++++
 tensorflow/tools/pip_package/BUILD            |   1 +
 5 files changed, 749 insertions(+), 8 deletions(-)
 create mode 100644 tensorflow/python/saved_model/nested_structure_coder.py
 create mode 100644 tensorflow/python/saved_model/nested_structure_coder_test.py
 create mode 100644 tensorflow/python/saved_model/struct.proto

diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD
index 71d9e34592..8066b092b0 100644
--- a/tensorflow/python/saved_model/BUILD
+++ b/tensorflow/python/saved_model/BUILD
@@ -270,14 +270,6 @@ py_test(
     ],
 )
 
-tf_proto_library(
-    name = "saved_object_graph",
-    srcs = ["saved_object_graph.proto"],
-    cc_api_version = 2,
-    protodeps = tf_additional_all_protos(),
-    visibility = ["//tensorflow:internal"],
-)
-
 py_library(
     name = "save",
     srcs = [
@@ -385,3 +377,40 @@ py_library(
     srcs_version = "PY2AND3",
     deps = ["//tensorflow/python/eager:def_function"],
 )
+
+tf_proto_library(
+    name = "struct",
+    srcs = ["struct.proto"],
+    cc_api_version = 2,
+    protodeps = tf_additional_all_protos(),
+    visibility = ["//tensorflow:internal"],
+)
+
+tf_proto_library(
+    name = "saved_object_graph",
+    srcs = ["saved_object_graph.proto"],
+    cc_api_version = 2,
+    protodeps = tf_additional_all_protos(),
+    visibility = ["//tensorflow:internal"],
+)
+
+py_library(
+    name = "nested_structure_coder",
+    srcs = ["nested_structure_coder.py"],
+    deps = [
+        ":struct_py",
+        "//tensorflow/python:framework",
+        "@six_archive//:six",
+    ],
+)
+
+py_test(
+    name = "nested_structure_coder_test",
+    srcs = ["nested_structure_coder_test.py"],
+    deps = [
+        ":nested_structure_coder",
+        ":struct_py",
+        "//tensorflow/python:framework",
+        "//tensorflow/python/eager:test",
+    ],
+)
diff --git a/tensorflow/python/saved_model/nested_structure_coder.py b/tensorflow/python/saved_model/nested_structure_coder.py
new file mode 100644
index 0000000000..a4b9092a86
--- /dev/null
+++ b/tensorflow/python/saved_model/nested_structure_coder.py
@@ -0,0 +1,442 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Module that encodes (decodes) nested structures into (from) protos.
+
+The intended use is to serialize everything needed to restore a
+PolymorphicFunction that was saved into a SavedModel. This may include concrete
+function inputs and outputs, signatures, function specs, etc.
+
+Example use:
+coder = nested_structure_coder.StructureCoder()
+# Encode into proto.
+signature_proto = coder.encode_structure(polymorphic_function.input_signature)
+# Decode into a Python object.
+restored_signature = coder.decode_proto(signature_proto)
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import six
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.saved_model import struct_pb2
+
+
+class NotEncodableError(Exception):
+  """Error raised when a coder cannot encode an object."""
+
+
+class StructureCoder(object):
+  """Encoder and decoder for nested structures into protos."""
+
+  _codecs = []
+
+  @classmethod
+  def register_codec(cls, x):
+    cls._codecs.append(x)
+
+  @classmethod
+  def _get_encoders(cls):
+    return [(c.can_encode, c.do_encode) for c in cls._codecs]
+
+  @classmethod
+  def _get_decoders(cls):
+    return [(c.can_decode, c.do_decode) for c in cls._codecs]
+
+  def _map_structure(self, pyobj, coders, recursive_encode_fn):
+    for can, do in coders:
+      if can(pyobj):
+        return do(pyobj, recursive_encode_fn)
+    raise NotEncodableError(
+        "No encoder for object [%s] of type [%s]." % (str(pyobj), type(pyobj)))
+
+  def encode_structure(self, nested_structure):
+    """Encodes nested structures composed of encodable types into a proto.
+
+    Args:
+      nested_structure: Structure to encode.
+
+    Returns:
+      Encoded proto.
+
+    Raises:
+      NotEncodableError: For values for which there are no encoders.
+    """
+
+    def encode_fn(nested_structure):
+      return self._map_structure(nested_structure, self._get_encoders(),
+                                 encode_fn)
+
+    return encode_fn(nested_structure)
+
+  def can_encode(self, nested_structure):
+    """Determines whether a nested structure can be encoded into a proto.
+
+    Args:
+      nested_structure: Structure to encode.
+
+    Returns:
+      True if the nested structured can be encoded.
+    """
+    try:
+      self.encode_structure(nested_structure)
+    except NotEncodableError:
+      return False
+    return True
+
+  def decode_proto(self, proto):
+    """Decodes proto representing a nested structure.
+
+    Args:
+      proto: Proto to decode.
+
+    Returns:
+      Decoded structure.
+
+    Raises:
+      NotEncodableError: For values for which there are no encoders.
+    """
+
+    def decode_fn(proto):
+      return self._map_structure(proto, self._get_decoders(), decode_fn)
+
+    return decode_fn(proto)
+
+
+class _ListCodec(object):
+  """Codec for lists."""
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, list)
+
+  def do_encode(self, list_value, encode_fn):
+    encoded_list = struct_pb2.StructuredValue()
+    encoded_list.list_value.CopyFrom(struct_pb2.ListValue())
+    for element in list_value:
+      encoded_list.list_value.values.add().CopyFrom(encode_fn(element))
+    return encoded_list
+
+  def can_decode(self, value):
+    return value.HasField("list_value")
+
+  def do_decode(self, value, decode_fn):
+    return [decode_fn(element) for element in value.list_value.values]
+
+
+StructureCoder.register_codec(_ListCodec())
+
+
+def _is_tuple(obj):
+  return not _is_named_tuple(obj) and isinstance(obj, tuple)
+
+
+def _is_named_tuple(instance):
+  """Returns True iff `instance` is a `namedtuple`.
+
+  Args:
+    instance: An instance of a Python object.
+
+  Returns:
+    True if `instance` is a `namedtuple`.
+  """
+  if not isinstance(instance, tuple):
+    return False
+  return (hasattr(instance, "_fields") and
+          isinstance(instance._fields, collections.Sequence) and
+          all(isinstance(f, six.string_types) for f in instance._fields))
+
+
+class _TupleCodec(object):
+  """Codec for tuples."""
+
+  def can_encode(self, pyobj):
+    return _is_tuple(pyobj)
+
+  def do_encode(self, tuple_value, encode_fn):
+    encoded_tuple = struct_pb2.StructuredValue()
+    encoded_tuple.tuple_value.CopyFrom(struct_pb2.TupleValue())
+    for element in tuple_value:
+      encoded_tuple.tuple_value.values.add().CopyFrom(encode_fn(element))
+    return encoded_tuple
+
+  def can_decode(self, value):
+    return value.HasField("tuple_value")
+
+  def do_decode(self, value, decode_fn):
+    return tuple(decode_fn(element) for element in value.tuple_value.values)
+
+
+StructureCoder.register_codec(_TupleCodec())
+
+
+class _DictCodec(object):
+  """Codec for dicts."""
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, dict)
+
+  def do_encode(self, dict_value, encode_fn):
+    encoded_dict = struct_pb2.StructuredValue()
+    encoded_dict.dict_value.CopyFrom(struct_pb2.DictValue())
+    for key, value in dict_value.items():
+      encoded_dict.dict_value.fields[key].CopyFrom(encode_fn(value))
+    return encoded_dict
+
+  def can_decode(self, value):
+    return value.HasField("dict_value")
+
+  def do_decode(self, value, decode_fn):
+    return {key: decode_fn(val) for key, val in value.dict_value.fields.items()}
+
+
+StructureCoder.register_codec(_DictCodec())
+
+
+class _NamedTupleCodec(object):
+  """Codec for namedtuples.
+
+  Encoding and decoding a namedtuple reconstructs a namedtuple with a different
+  actual Python type, but with same `typename` and `fields`.
+  """
+
+  def can_encode(self, pyobj):
+    return _is_named_tuple(pyobj)
+
+  def do_encode(self, named_tuple_value, encode_fn):
+    encoded_named_tuple = struct_pb2.StructuredValue()
+    encoded_named_tuple.named_tuple_value.CopyFrom(struct_pb2.NamedTupleValue())
+    encoded_named_tuple.named_tuple_value.name = \
+      named_tuple_value.__class__.__name__
+    for key in named_tuple_value._fields:
+      pair = encoded_named_tuple.named_tuple_value.values.add()
+      pair.key = key
+      pair.value.CopyFrom(encode_fn(named_tuple_value._asdict()[key]))
+    return encoded_named_tuple
+
+  def can_decode(self, value):
+    return value.HasField("named_tuple_value")
+
+  def do_decode(self, value, decode_fn):
+    key_value_pairs = value.named_tuple_value.values
+    items = [(pair.key, decode_fn(pair.value)) for pair in key_value_pairs]
+    named_tuple_type = collections.namedtuple(value.named_tuple_value.name,
+                                              [item[0] for item in items])
+    return named_tuple_type(**dict(items))
+
+
+StructureCoder.register_codec(_NamedTupleCodec())
+
+
+class _Float64Codec(object):
+  """Codec for floats."""
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, float)
+
+  def do_encode(self, float64_value, encode_fn):
+    del encode_fn
+    value = struct_pb2.StructuredValue()
+    value.float64_value = float64_value
+    return value
+
+  def can_decode(self, value):
+    return value.HasField("float64_value")
+
+  def do_decode(self, value, decode_fn):
+    del decode_fn
+    return value.float64_value
+
+
+StructureCoder.register_codec(_Float64Codec())
+
+
+class _Int64Codec(object):
+  """Codec for Python integers (limited to 64 bit values)."""
+
+  def can_encode(self, pyobj):
+    return not isinstance(pyobj, bool) and isinstance(pyobj, int)
+
+  def do_encode(self, int_value, encode_fn):
+    del encode_fn
+    value = struct_pb2.StructuredValue()
+    value.int64_value = int_value
+    return value
+
+  def can_decode(self, value):
+    return value.HasField("int64_value")
+
+  def do_decode(self, value, decode_fn):
+    del decode_fn
+    return int(value.int64_value)
+
+
+StructureCoder.register_codec(_Int64Codec())
+
+
+class _StringCodec(object):
+  """Codec for strings.
+
+  See StructuredValue.string_value in proto/struct.proto for more detailed
+  explanation.
+  """
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, str)
+
+  def do_encode(self, string_value, encode_fn):
+    del encode_fn
+    value = struct_pb2.StructuredValue()
+    value.string_value = string_value
+    return value
+
+  def can_decode(self, value):
+    return value.HasField("string_value")
+
+  def do_decode(self, value, decode_fn):
+    del decode_fn
+    return value.string_value
+
+
+StructureCoder.register_codec(_StringCodec())
+
+
+class _NoneCodec(object):
+  """Codec for None."""
+
+  def can_encode(self, pyobj):
+    return pyobj is None
+
+  def do_encode(self, none_value, encode_fn):
+    del encode_fn, none_value
+    value = struct_pb2.StructuredValue()
+    value.none_value.CopyFrom(struct_pb2.NoneValue())
+    return value
+
+  def can_decode(self, value):
+    return value.HasField("none_value")
+
+  def do_decode(self, value, decode_fn):
+    del decode_fn, value
+    return None
+
+
+StructureCoder.register_codec(_NoneCodec())
+
+
+class _BoolCodec(object):
+  """Codec for booleans."""
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, bool)
+
+  def do_encode(self, bool_value, encode_fn):
+    del encode_fn
+    value = struct_pb2.StructuredValue()
+    value.bool_value = bool_value
+    return value
+
+  def can_decode(self, value):
+    return value.HasField("bool_value")
+
+  def do_decode(self, value, decode_fn):
+    del decode_fn
+    return value.bool_value
+
+
+StructureCoder.register_codec(_BoolCodec())
+
+
+class _TensorShapeCodec(object):
+  """Codec for `TensorShape`."""
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, tensor_shape.TensorShape)
+
+  def do_encode(self, tensor_shape_value, encode_fn):
+    del encode_fn
+    encoded_tensor_shape = struct_pb2.StructuredValue()
+    encoded_tensor_shape.tensor_shape_value.CopyFrom(
+        tensor_shape_value.as_proto())
+    return encoded_tensor_shape
+
+  def can_decode(self, value):
+    return value.HasField("tensor_shape_value")
+
+  def do_decode(self, value, decode_fn):
+    del decode_fn
+    return tensor_shape.TensorShape(value.tensor_shape_value)
+
+
+StructureCoder.register_codec(_TensorShapeCodec())
+
+
+class _TensorTypeCodec(object):
+  """Codec for `TensorType`."""
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, dtypes.DType)
+
+  def do_encode(self, tensor_dtype_value, encode_fn):
+    del encode_fn
+    encoded_tensor_type = struct_pb2.StructuredValue()
+    encoded_tensor_type.tensor_dtype_value = tensor_dtype_value.as_datatype_enum
+    return encoded_tensor_type
+
+  def can_decode(self, value):
+    return value.HasField("tensor_dtype_value")
+
+  def do_decode(self, value, decode_fn):
+    del decode_fn
+    return dtypes.DType(value.tensor_dtype_value)
+
+
+StructureCoder.register_codec(_TensorTypeCodec())
+
+
+class _TensorSpecCodec(object):
+  """Codec for `TensorSpec`."""
+
+  def can_encode(self, pyobj):
+    return isinstance(pyobj, tensor_spec.TensorSpec)
+
+  def do_encode(self, tensor_spec_value, encode_fn):
+    encoded_tensor_spec = struct_pb2.StructuredValue()
+    encoded_tensor_spec.tensor_spec_value.CopyFrom(
+        struct_pb2.TensorSpecProto(
+            shape=encode_fn(tensor_spec_value.shape).tensor_shape_value,
+            dtype=encode_fn(tensor_spec_value.dtype).tensor_dtype_value,
+            name=tensor_spec_value.name))
+    return encoded_tensor_spec
+
+  def can_decode(self, value):
+    return value.HasField("tensor_spec_value")
+
+  def do_decode(self, value, decode_fn):
+    return tensor_spec.TensorSpec(
+        shape=decode_fn(
+            struct_pb2.StructuredValue(
+                tensor_shape_value=value.tensor_spec_value.shape)),
+        dtype=decode_fn(
+            struct_pb2.StructuredValue(
+                tensor_dtype_value=value.tensor_spec_value.dtype)),
+        name=value.tensor_spec_value.name)
+
+
+StructureCoder.register_codec(_TensorSpecCodec())
diff --git a/tensorflow/python/saved_model/nested_structure_coder_test.py b/tensorflow/python/saved_model/nested_structure_coder_test.py
new file mode 100644
index 0000000000..8636301719
--- /dev/null
+++ b/tensorflow/python/saved_model/nested_structure_coder_test.py
@@ -0,0 +1,183 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for nested structure coding."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.platform import test
+from tensorflow.python.saved_model import nested_structure_coder
+from tensorflow.python.saved_model import struct_pb2
+
+
+class NestedStructureTest(test.TestCase):
+
+  def setUp(self):
+    self._coder = nested_structure_coder.StructureCoder()
+
+  def testEncodeDecodeList(self):
+    structure = [1.5, 2.5, 3.0]
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected.list_value.values.add().float64_value = 1.5
+    expected.list_value.values.add().float64_value = 2.5
+    expected.list_value.values.add().float64_value = 3.0
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testEncodeDecodeTuple(self):
+    structure = ("hello", [3, (2, 1)])
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected.tuple_value.values.add().string_value = "hello"
+    list_value = expected.tuple_value.values.add().list_value
+    list_value.values.add().int64_value = 3
+    tuple_value = list_value.values.add().tuple_value
+    tuple_value.values.add().int64_value = 2
+    tuple_value.values.add().int64_value = 1
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testEncodeDecodeDict(self):
+    structure = dict(a=3, b=[7, 2.5])
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected.dict_value.fields["a"].int64_value = 3
+    list_value = expected.dict_value.fields["b"].list_value
+    list_value.values.add().int64_value = 7
+    list_value.values.add().float64_value = 2.5
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertIsInstance(decoded["a"], int)
+    self.assertEqual(structure, decoded)
+
+  def testEncodeDecodeTensorShape(self):
+    structure = [tensor_shape.TensorShape([1, 2, 3]), "hello"]
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected_list = expected.list_value
+    expected_tensor_shape = expected_list.values.add().tensor_shape_value
+    expected_tensor_shape.dim.add().size = 1
+    expected_tensor_shape.dim.add().size = 2
+    expected_tensor_shape.dim.add().size = 3
+    expected_tensor_shape = expected_list.values.add().string_value = "hello"
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testEncodeDecodeNamedTuple(self):
+    named_tuple_type = collections.namedtuple("NamedTuple", ["x", "y"])
+    named_tuple = named_tuple_type(x=[1, 2], y="hello")
+    self.assertTrue(self._coder.can_encode(named_tuple))
+    encoded = self._coder.encode_structure(named_tuple)
+    expected = struct_pb2.StructuredValue()
+    expected_named_tuple = expected.named_tuple_value
+    expected_named_tuple.name = "NamedTuple"
+    key_value_pair = expected_named_tuple.values.add()
+    key_value_pair.key = "x"
+    list_value = key_value_pair.value.list_value
+    list_value.values.add().int64_value = 1
+    list_value.values.add().int64_value = 2
+    key_value_pair = expected_named_tuple.values.add()
+    key_value_pair.key = "y"
+    key_value_pair.value.string_value = "hello"
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(named_tuple._asdict(), decoded._asdict())
+    self.assertEqual(named_tuple.__class__.__name__, decoded.__class__.__name__)
+
+  def testNone(self):
+    structure = [1.0, None]
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected.list_value.values.add().float64_value = 1.0
+    expected.list_value.values.add().none_value.CopyFrom(struct_pb2.NoneValue())
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testBool(self):
+    structure = [False]
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected.list_value.values.add().bool_value = False
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testEmptyStructures(self):
+    structure = [list(), dict(), tuple()]
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected.list_value.values.add().list_value.CopyFrom(struct_pb2.ListValue())
+    expected.list_value.values.add().dict_value.CopyFrom(struct_pb2.DictValue())
+    expected.list_value.values.add().tuple_value.CopyFrom(
+        struct_pb2.TupleValue())
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testDtype(self):
+    structure = [dtypes.int64]
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    list_value = expected.list_value.values.add()
+    list_value.tensor_dtype_value = dtypes.int64.as_datatype_enum
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testEncodeDecodeTensorSpec(self):
+    structure = [tensor_spec.TensorSpec([1, 2, 3], dtypes.int64, "hello")]
+    self.assertTrue(self._coder.can_encode(structure))
+    encoded = self._coder.encode_structure(structure)
+    expected = struct_pb2.StructuredValue()
+    expected_list = expected.list_value
+    expected_tensor_spec = expected_list.values.add().tensor_spec_value
+    expected_tensor_spec.shape.dim.add().size = 1
+    expected_tensor_spec.shape.dim.add().size = 2
+    expected_tensor_spec.shape.dim.add().size = 3
+    expected_tensor_spec.name = "hello"
+    expected_tensor_spec.dtype = dtypes.int64.as_datatype_enum
+    self.assertEqual(expected, encoded)
+    decoded = self._coder.decode_proto(encoded)
+    self.assertEqual(structure, decoded)
+
+  def testNotEncodable(self):
+
+    class NotEncodable(object):
+      pass
+
+    self.assertFalse(self._coder.can_encode([NotEncodable()]))
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/saved_model/struct.proto b/tensorflow/python/saved_model/struct.proto
new file mode 100644
index 0000000000..fd7db84e05
--- /dev/null
+++ b/tensorflow/python/saved_model/struct.proto
@@ -0,0 +1,86 @@
+syntax = "proto3";
+
+import "tensorflow/core/framework/tensor_shape.proto";
+import "tensorflow/core/framework/types.proto";
+
+package tensorflow;
+
+// `StructuredValue` represents a dynamically typed value representing various
+// data structures that are inspired by Python data structures typically used in
+// TensorFlow functions as inputs and outputs.
+message StructuredValue {
+  // The kind of value.
+  oneof kind {
+    // Represents None.
+    NoneValue none_value = 1;
+
+    // Represents a double-precision floating-point value (a Python `float`).
+    double float64_value = 11;
+    // Represents a signed integer value, limited to 64 bits.
+    // Larger values from Python's arbitrary-precision integers are unsupported.
+    sint64 int64_value = 12;
+    // Represents a string of Unicode characters stored in a Python `str`.
+    // In Python 3, this is exactly what type `str` is.
+    // In Python 2, this is the UTF-8 encoding of the characters.
+    // For strings with ASCII characters only (as often used in TensorFlow code)
+    // there is effectively no difference between the language versions.
+    // The obsolescent `unicode` type of Python 2 is not supported here.
+    string string_value = 13;
+    // Represents a boolean value.
+    bool bool_value = 14;
+
+    // Represents a tf.TensorShape.
+    tensorflow.TensorShapeProto tensor_shape_value = 31;
+    // Represents an enum value for tf.DType.
+    tensorflow.DataType tensor_dtype_value = 32;
+    // Represents a value for tf.TensorShape.
+    TensorSpecProto tensor_spec_value = 33;
+
+    // Represents a list of `Value`.
+    ListValue list_value = 51;
+    // Represents a tuple of `Value`.
+    TupleValue tuple_value = 52;
+    // Represents a dict `Value`.
+    DictValue dict_value = 53;
+    // Represents Python's namedtuple.
+    NamedTupleValue named_tuple_value = 54;
+  }
+}
+
+// Represents None.
+message NoneValue {}
+
+// Represents a Python list.
+message ListValue {
+  repeated StructuredValue values = 1;
+}
+
+// Represents a Python tuple.
+message TupleValue {
+  repeated StructuredValue values = 1;
+}
+
+// Represents a Python dict keyed by `str`.
+// The comment on Unicode from Value.string_value applies analogously.
+message DictValue {
+  map<string, StructuredValue> fields = 1;
+}
+
+// Represents a (key, value) pair.
+message PairValue {
+  string key = 1;
+  StructuredValue value = 2;
+}
+
+// Represents Python's namedtuple.
+message NamedTupleValue {
+  string name = 1;
+  repeated PairValue values = 2;
+}
+
+// A protobuf to tf.TensorSpec.
+message TensorSpecProto {
+  string name = 1;
+  tensorflow.TensorShapeProto shape = 2;
+  tensorflow.DataType dtype = 3;
+};
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 2de00ea957..3e82b49b5b 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -93,6 +93,7 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python/kernel_tests/signal:test_util",
     "//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files",
     "//tensorflow/python/ops/ragged:ragged_test_util",
+    "//tensorflow/python/saved_model:nested_structure_coder",
     "//tensorflow/python/saved_model:saved_model",
     "//tensorflow/python/tools:tools_pip",
     "//tensorflow/python/tools/api/generator:create_python_api",
-- 
GitLab


From e1d8157b27b06ccfc6ae1a8bad117310ed232cd6 Mon Sep 17 00:00:00 2001
From: Yu-Cheng Ling <ycling@google.com>
Date: Wed, 19 Dec 2018 06:44:40 -0800
Subject: [PATCH 816/873] TFLite iOS example: Restrict iOS target to arm64.

PiperOrigin-RevId: 226164875
---
 .../ios/camera/tflite_camera_example.xcodeproj/project.pbxproj  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj b/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
index 9b5c2b32a8..bbab17b400 100644
--- a/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
+++ b/tensorflow/lite/examples/ios/camera/tflite_camera_example.xcodeproj/project.pbxproj
@@ -234,6 +234,7 @@
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 3.0;
+				VALID_ARCHS = arm64;
 			};
 			name = Debug;
 		};
@@ -253,6 +254,7 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule";
 				SWIFT_VERSION = 3.0;
+				VALID_ARCHS = arm64;
 			};
 			name = Release;
 		};
-- 
GitLab


From 23057a683adb1b5af4d202c44001bf0aa66c982c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 06:47:16 -0800
Subject: [PATCH 817/873] Update DropoutWrapper to LayerRNNCell.

PiperOrigin-RevId: 226165140
---
 tensorflow/contrib/rnn/BUILD                  |   1 +
 .../python/kernel_tests/core_rnn_cell_test.py | 145 +++++++++++++++---
 tensorflow/python/ops/rnn_cell_impl.py        |  99 +++++++++++-
 .../tools/api/generator/api_init_files.bzl    |   1 +
 .../golden/v2/tensorflow.nn.rnn_cell.pbtxt    |   4 -
 .../tools/api/golden/v2/tensorflow.pbtxt      |   4 +
 ... => tensorflow.rnn.-dropout-wrapper.pbtxt} |   8 +-
 .../tools/api/golden/v2/tensorflow.rnn.pbtxt  |   7 +
 tensorflow/tools/compatibility/renames_v2.py  |   1 +
 9 files changed, 238 insertions(+), 32 deletions(-)
 rename tensorflow/tools/api/golden/v2/{tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt => tensorflow.rnn.-dropout-wrapper.pbtxt} (94%)
 create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.rnn.pbtxt

diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD
index e124867415..44b232e0f2 100644
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
@@ -118,6 +118,7 @@ cuda_py_tests(
         "//tensorflow/python:rnn_cell",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
+        "@absl_py//absl/testing:parameterized",
     ],
 )
 
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
index 7d57b0413a..a0d013c618 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import os
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.contrib import rnn as contrib_rnn
@@ -31,6 +32,8 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
+from tensorflow.python.keras import layers as keras_layers
+from tensorflow.python.layers import base as base_layer
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
@@ -805,12 +808,13 @@ class RNNCellTest(test.TestCase):
         self.assertAllClose(res[1], [[0.13248, 0.13248]])
 
 
-class DropoutWrapperTest(test.TestCase):
+class DropoutWrapperTest(test.TestCase, parameterized.TestCase):
 
   def _testDropoutWrapper(self,
                           batch_size=None,
                           time_steps=None,
                           parallel_iterations=None,
+                          wrapper_type=None,
                           **kwargs):
     with self.cached_session() as sess:
       with variable_scope.variable_scope(
@@ -832,7 +836,7 @@ class DropoutWrapperTest(test.TestCase):
               constant([[0.1, 0.1, 0.1]] * batch_size, dtype=dtypes.float32)
           ] * 2)
         outputs, final_state = rnn.dynamic_rnn(
-            cell=rnn_cell_impl.DropoutWrapper(
+            cell=wrapper_type(
                 rnn_cell_impl.LSTMCell(3), dtype=x.dtype, **kwargs),
             time_major=True,
             parallel_iterations=parallel_iterations,
@@ -845,16 +849,34 @@ class DropoutWrapperTest(test.TestCase):
         self.assertEqual(res[1].h.shape, (batch_size, 3))
         return res
 
-  def testWrappedCellProperty(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperProperties(self, wrapper_type):
     cell = rnn_cell_impl.BasicRNNCell(10)
-    wrapper = rnn_cell_impl.DropoutWrapper(cell)
+    wrapper = wrapper_type(cell)
     # Github issue 15810
     self.assertEqual(wrapper.wrapped_cell, cell)
-
-  def testDropoutWrapperKeepAllConstantInput(self):
+    self.assertEqual(wrapper.state_size, 10)
+    self.assertEqual(wrapper.output_size, 10)
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperZeroState(self, wrapper_type):
+    class _Cell(rnn_cell_impl.BasicRNNCell):
+
+      def zero_state(self, batch_size=None, dtype=None):
+        return "wrapped_cell_zero_state"
+    wrapper = wrapper_type(_Cell(10))
+    self.assertEqual(wrapper.zero_state(10, dtypes.float32),
+                     "wrapped_cell_zero_state")
+
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepAllConstantInput(self, wrapper_type):
     keep = array_ops.ones([])
     res = self._testDropoutWrapper(
-        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep)
+        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep,
+        wrapper_type=wrapper_type)
     true_full_output = np.array(
         [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
         dtype=np.float32)
@@ -864,10 +886,13 @@ class DropoutWrapperTest(test.TestCase):
     self.assertAllClose(true_full_output[1], res[1].h)
     self.assertAllClose(true_full_final_c, res[1].c)
 
-  def testDropoutWrapperKeepAll(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepAll(self, wrapper_type):
     keep = variable_scope.get_variable("all", initializer=1.0)
     res = self._testDropoutWrapper(
-        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep)
+        input_keep_prob=keep, output_keep_prob=keep, state_keep_prob=keep,
+        wrapper_type=wrapper_type)
     true_full_output = np.array(
         [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
         dtype=np.float32)
@@ -877,7 +902,9 @@ class DropoutWrapperTest(test.TestCase):
     self.assertAllClose(true_full_output[1], res[1].h)
     self.assertAllClose(true_full_final_c, res[1].c)
 
-  def testDropoutWrapperWithSeed(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperWithSeed(self, wrapper_type):
     keep_some = 0.5
     random_seed.set_random_seed(2)
     ## Use parallel_iterations = 1 in both calls to
@@ -889,7 +916,8 @@ class DropoutWrapperTest(test.TestCase):
         output_keep_prob=keep_some,
         state_keep_prob=keep_some,
         seed=10,
-        parallel_iterations=1)
+        parallel_iterations=1,
+        wrapper_type=wrapper_type)
     # Clear away the graph and the test session (which keeps variables around)
     ops.reset_default_graph()
     self._ClearCachedSession()
@@ -899,18 +927,22 @@ class DropoutWrapperTest(test.TestCase):
         output_keep_prob=keep_some,
         state_keep_prob=keep_some,
         seed=10,
-        parallel_iterations=1)
+        parallel_iterations=1,
+        wrapper_type=wrapper_type)
     self.assertAllClose(res_standard_1[0], res_standard_2[0])
     self.assertAllClose(res_standard_1[1].c, res_standard_2[1].c)
     self.assertAllClose(res_standard_1[1].h, res_standard_2[1].h)
 
-  def testDropoutWrapperKeepNoOutput(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepNoOutput(self, wrapper_type):
     keep_all = variable_scope.get_variable("all", initializer=1.0)
     keep_none = variable_scope.get_variable("none", initializer=1e-6)
     res = self._testDropoutWrapper(
         input_keep_prob=keep_all,
         output_keep_prob=keep_none,
-        state_keep_prob=keep_all)
+        state_keep_prob=keep_all,
+        wrapper_type=wrapper_type)
     true_full_output = np.array(
         [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
         dtype=np.float32)
@@ -920,7 +952,9 @@ class DropoutWrapperTest(test.TestCase):
     self.assertAllClose(true_full_output[1], res[1].h)
     self.assertAllClose(true_full_final_c, res[1].c)
 
-  def testDropoutWrapperKeepNoStateExceptLSTMCellMemory(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepNoStateExceptLSTMCellMemory(self, wrapper_type):
     keep_all = variable_scope.get_variable("all", initializer=1.0)
     keep_none = variable_scope.get_variable("none", initializer=1e-6)
     # Even though we dropout state, by default DropoutWrapper never
@@ -928,7 +962,8 @@ class DropoutWrapperTest(test.TestCase):
     res = self._testDropoutWrapper(
         input_keep_prob=keep_all,
         output_keep_prob=keep_all,
-        state_keep_prob=keep_none)
+        state_keep_prob=keep_none,
+        wrapper_type=wrapper_type)
     true_c_state = np.array([[1.713925, 1.713925, 1.713925]], dtype=np.float32)
     true_full_output = np.array(
         [[[0.751109, 0.751109, 0.751109]], [[0.895509, 0.895509, 0.895509]]],
@@ -941,7 +976,9 @@ class DropoutWrapperTest(test.TestCase):
     # c state of an LSTMStateTuple is NEVER modified.
     self.assertAllClose(true_c_state, res[1].c)
 
-  def testDropoutWrapperKeepNoInput(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperKeepNoInput(self, wrapper_type):
     keep_all = variable_scope.get_variable("all", initializer=1.0)
     keep_none = variable_scope.get_variable("none", initializer=1e-6)
     true_full_output = np.array(
@@ -953,12 +990,15 @@ class DropoutWrapperTest(test.TestCase):
     res = self._testDropoutWrapper(
         input_keep_prob=keep_none,
         output_keep_prob=keep_all,
-        state_keep_prob=keep_all)
+        state_keep_prob=keep_all,
+        wrapper_type=wrapper_type)
     self.assertGreater(np.linalg.norm(res[0] - true_full_output), 1e-4)
     self.assertGreater(np.linalg.norm(res[1].h - true_full_output[1]), 1e-4)
     self.assertGreater(np.linalg.norm(res[1].c - true_full_final_c), 1e-4)
 
-  def testDropoutWrapperRecurrentOutput(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperRecurrentOutput(self, wrapper_type):
     keep_some = 0.8
     keep_all = variable_scope.get_variable("all", initializer=1.0)
     res = self._testDropoutWrapper(
@@ -966,6 +1006,7 @@ class DropoutWrapperTest(test.TestCase):
         output_keep_prob=keep_some,
         state_keep_prob=keep_all,
         variational_recurrent=True,
+        wrapper_type=wrapper_type,
         input_size=3,
         batch_size=5,
         time_steps=7)
@@ -974,13 +1015,16 @@ class DropoutWrapperTest(test.TestCase):
     for m in output_mask[1:]:
       self.assertAllClose(output_mask[0], m)
 
-  def testDropoutWrapperRecurrentStateInputAndOutput(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperRecurrentStateInputAndOutput(self, wrapper_type):
     keep_some = 0.9
     res = self._testDropoutWrapper(
         input_keep_prob=keep_some,
         output_keep_prob=keep_some,
         state_keep_prob=keep_some,
         variational_recurrent=True,
+        wrapper_type=wrapper_type,
         input_size=3,
         batch_size=5,
         time_steps=7)
@@ -1002,7 +1046,10 @@ class DropoutWrapperTest(test.TestCase):
     for batch_entry in state_h_mask:
       self.assertAllClose(batch_entry, state_h_mask[0])
 
-  def testDropoutWrapperRecurrentStateInputAndOutputWithSeed(self):
+  @parameterized.parameters(
+      [rnn_cell_impl.DropoutWrapper, rnn_cell_impl.DropoutWrapperV2])
+  def testDropoutWrapperRecurrentStateInputAndOutputWithSeed(
+      self, wrapper_type):
     keep_some = 0.9
     random_seed.set_random_seed(2347)
     np.random.seed(23487)
@@ -1011,6 +1058,7 @@ class DropoutWrapperTest(test.TestCase):
         output_keep_prob=keep_some,
         state_keep_prob=keep_some,
         variational_recurrent=True,
+        wrapper_type=wrapper_type,
         input_size=3,
         batch_size=5,
         time_steps=7,
@@ -1024,6 +1072,7 @@ class DropoutWrapperTest(test.TestCase):
         output_keep_prob=keep_some,
         state_keep_prob=keep_some,
         variational_recurrent=True,
+        wrapper_type=wrapper_type,
         input_size=3,
         batch_size=5,
         time_steps=7,
@@ -1050,6 +1099,60 @@ class DropoutWrapperTest(test.TestCase):
     self.assertAllClose(res0[1].c, res1[1].c)
     self.assertAllClose(res0[1].h, res1[1].h)
 
+  def testDropoutWrapperKerasStyle(self):
+    """Tests if DropoutWrapperV2 cell is instantiated in keras style scope."""
+    wrapped_cell_v2 = rnn_cell_impl.DropoutWrapperV2(
+        rnn_cell_impl.BasicRNNCell(1))
+    self.assertTrue(wrapped_cell_v2._keras_style)
+
+    wrapped_cell = rnn_cell_impl.DropoutWrapper(rnn_cell_impl.BasicRNNCell(1))
+    self.assertFalse(wrapped_cell._keras_style)
+
+  def testDropoutWrapperV2VariableNames(self):
+    """Tests that variables names do not depend on wrapper in RNN layer."""
+
+    def _rnn_input(apply_wrapper):
+      """Creates a RNN layer with/without wrapper and returns built rnn cell."""
+      with base_layer.keras_style_scope():
+        base_cell = rnn_cell_impl.MultiRNNCell(
+            [rnn_cell_impl.BasicRNNCell(1) for _ in range(2)])
+      if apply_wrapper:
+        rnn_cell = rnn_cell_impl.DropoutWrapperV2(base_cell)
+      else:
+        rnn_cell = base_cell
+      rnn_layer = keras_layers.RNN(rnn_cell)
+      inputs = ops.convert_to_tensor([[[1]]], dtype=dtypes.float32)
+      _ = rnn_layer(inputs)
+      return base_cell._cells[0]
+
+    rnn_1 = _rnn_input(True)
+    ops.reset_default_graph()
+    rnn_2 = _rnn_input(False)
+
+    self.assertLen(rnn_1.weights, expected_len=2)
+    self.assertCountEqual([v.name for v in rnn_1.weights],
+                          [v.name for v in rnn_2.weights])
+
+  def testDropoutWrapperV2Caller(self):
+    """Tests that DropoutWrapperV2 is using the LayerRNNCell's caller."""
+
+    with base_layer.keras_style_scope():
+      base_cell = rnn_cell_impl.MultiRNNCell(
+          [rnn_cell_impl.BasicRNNCell(1) for _ in range(2)])
+    rnn_cell = rnn_cell_impl.DropoutWrapperV2(base_cell)
+    inputs = ops.convert_to_tensor([[1]], dtype=dtypes.float32)
+    state = ops.convert_to_tensor([[1]], dtype=dtypes.float32)
+    _ = rnn_cell(inputs, [state, state])
+    weights = base_cell._cells[0].weights
+    self.assertLen(weights, expected_len=2)
+    self.assertTrue(all(["dropout_wrapper" in v.name for v in weights]))
+
+  def testDropoutWrapperV2Build(self):
+    cell = rnn_cell_impl.LSTMCell(10)
+    wrapper = rnn_cell_impl.DropoutWrapperV2(cell)
+    wrapper.build((1,))
+    self.assertTrue(cell.built)
+
 
 def basic_rnn_cell(inputs, state, num_units, scope=None):
   if state is None:
diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py
index d808a0c56e..a4ec90d36c 100644
--- a/tensorflow/python/ops/rnn_cell_impl.py
+++ b/tensorflow/python/ops/rnn_cell_impl.py
@@ -1076,7 +1076,7 @@ def _default_dropout_state_filter_visitor(substate):
   return True
 
 
-@tf_export("nn.rnn_cell.DropoutWrapper")
+@tf_export(v1=["nn.rnn_cell.DropoutWrapper"])
 class DropoutWrapper(RNNCell):
   """Operator adding dropout to inputs and outputs of the given cell."""
 
@@ -1282,8 +1282,25 @@ class DropoutWrapper(RNNCell):
           shallow_filtered_substructure, dropout,
           *[shallow_filtered_substructure, values, recurrent_noise])
 
-  def __call__(self, inputs, state, scope=None):
-    """Run the cell with the declared dropouts."""
+  def _call(self, inputs, state, call_fn, **kwargs):
+    """Defines a helper method that runs the wrapped cell and applies dropout.
+
+    This helper is called from the DropoutWrapper's `call` or `__call__`
+    methods.
+
+    Args:
+      inputs: A tensor with wrapped cell's input.
+      state: A tensor or tuple of tensors with wrapped cell's state.
+      call_fn: Wrapped cell's method to use for step computation (cell's
+        `__call__` or 'call' method).
+      **kwargs: Additional arguments.
+
+    Returns:
+      A pair containing:
+
+      - Output: A tensor with cell's output.
+      - New state: A tensor or tuple of tensors with new wrapped cell's state.
+    """
     def _should_dropout(p):
       return (not isinstance(p, float)) or p < 1
 
@@ -1291,7 +1308,7 @@ class DropoutWrapper(RNNCell):
       inputs = self._dropout(inputs, "input",
                              self._recurrent_input_noise,
                              self._input_keep_prob)
-    output, new_state = self._cell(inputs, state, scope=scope)
+    output, new_state = call_fn(inputs, state, **kwargs)
     if _should_dropout(self._state_keep_prob):
       # Identify which subsets of the state to perform dropout on and
       # which ones to keep.
@@ -1307,6 +1324,80 @@ class DropoutWrapper(RNNCell):
                              self._output_keep_prob)
     return output, new_state
 
+  def __call__(self, inputs, state, scope=None):
+    """Runs the cell with the declared dropouts.
+
+    We assume that the wrapped RNNCell is being built within its `__call__`
+    method. We directly use the wrapped cell's `__call__` in the overridden
+    DropoutWrapper `__call__` method.
+
+    This should allow to use the wrapped cell and the non-wrapped cell
+    equivalently when using `__call__`.
+
+    Args:
+      inputs: A tensor with wrapped cell's input.
+      state: A tensor or tuple of tensors with wrapped cell's state.
+      scope: VariableScope for the subgraph created in the wrapped cells'
+        `__call__`.
+
+    Returns:
+      A pair containing:
+
+      - Output: A tensor with cell's output.
+      - New state: A tensor or tuple of tensors with new wrapped cell's state.
+    """
+    return self._call(inputs, state, call_fn=self._cell.__call__, scope=scope)
+
+
+@tf_export("rnn.DropoutWrapper", v1=[])
+class DropoutWrapperV2(LayerRNNCell, DropoutWrapper):
+  """Operator adding dropout to inputs and outputs of the given cell."""
+
+  def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,
+               state_keep_prob=1.0, variational_recurrent=False,
+               input_size=None, dtype=None, seed=None,
+               dropout_state_filter_visitor=None):
+    """Runs init in Keras style scope to use Keras-style variable management."""
+
+    with base_layer.keras_style_scope():
+      super(DropoutWrapperV2, self).__init__(
+          cell=cell,
+          input_keep_prob=input_keep_prob,
+          output_keep_prob=output_keep_prob,
+          state_keep_prob=state_keep_prob,
+          variational_recurrent=variational_recurrent,
+          input_size=input_size,
+          dtype=dtype,
+          seed=seed,
+          dropout_state_filter_visitor=dropout_state_filter_visitor)
+
+  def build(self, inputs_shape):
+    self._cell.build(inputs_shape)
+    self.built = True
+
+  def call(self, inputs, state, **kwargs):
+    """Runs the cell with the declared dropouts.
+
+    When `call` is being used, we assume that the DropoutWrapper object has
+    been built and therefore the wrapped cells has been built via its `build`
+    method and its `call` method can be used directly.
+
+    This should allow to use the wrapped cell and the non-wrapped cell
+    equivalently when using `call` and `build`.
+
+    Args:
+      inputs: A tensor with wrapped cell's input.
+      state: A tensor or tuple of tensors with wrapped cell's state.
+      **kwargs: Additional arguments passed to the wrapped cell's `call`.
+
+    Returns:
+      A pair containing:
+
+      - Output: A tensor with cell's output.
+      - New state: A tensor or tuple of tensors with new wrapped cell's state.
+    """
+    return self._call(inputs, state, call_fn=self._cell.call, **kwargs)
+
 
 @tf_export("nn.rnn_cell.ResidualWrapper")
 class ResidualWrapper(RNNCell):
diff --git a/tensorflow/python/tools/api/generator/api_init_files.bzl b/tensorflow/python/tools/api/generator/api_init_files.bzl
index 6776d1bea6..8a3d6b31fd 100644
--- a/tensorflow/python/tools/api/generator/api_init_files.bzl
+++ b/tensorflow/python/tools/api/generator/api_init_files.bzl
@@ -33,6 +33,7 @@ TENSORFLOW_API_INIT_FILES = [
     "quantization/__init__.py",
     "ragged/__init__.py",
     "random/__init__.py",
+    "rnn/__init__.py",
     "saved_model/__init__.py",
     "sets/__init__.py",
     "signal/__init__.py",
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
index b1f687f529..e2496dff63 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.pbtxt
@@ -4,10 +4,6 @@ tf_module {
     name: "DeviceWrapper"
     mtype: "<type \'type\'>"
   }
-  member {
-    name: "DropoutWrapper"
-    mtype: "<type \'type\'>"
-  }
   member {
     name: "LSTMStateTuple"
     mtype: "<type \'type\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
index 0cd525167d..87d8e2ae7c 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt
@@ -288,6 +288,10 @@ tf_module {
     name: "resource"
     mtype: "<class \'tensorflow.python.framework.dtypes.DType\'>"
   }
+  member {
+    name: "rnn"
+    mtype: "<type \'module\'>"
+  }
   member {
     name: "saved_model"
     mtype: "<type \'module\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.rnn.-dropout-wrapper.pbtxt
similarity index 94%
rename from tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
rename to tensorflow/tools/api/golden/v2/tensorflow.rnn.-dropout-wrapper.pbtxt
index 7582fd52b6..7721eed65b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.rnn.-dropout-wrapper.pbtxt
@@ -1,5 +1,7 @@
-path: "tensorflow.nn.rnn_cell.DropoutWrapper"
+path: "tensorflow.rnn.DropoutWrapper"
 tf_class {
+  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.DropoutWrapperV2\'>"
+  is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.LayerRNNCell\'>"
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.DropoutWrapper\'>"
   is_instance: "<class \'tensorflow.python.ops.rnn_cell_impl.RNNCell\'>"
   is_instance: "<class \'tensorflow.python.layers.base.Layer\'>"
@@ -136,11 +138,11 @@ tf_class {
   }
   member_method {
     name: "build"
-    argspec: "args=[\'self\', \'_\'], varargs=None, keywords=None, defaults=None"
+    argspec: "args=[\'self\', \'inputs_shape\'], varargs=None, keywords=None, defaults=None"
   }
   member_method {
     name: "call"
-    argspec: "args=[\'self\', \'inputs\'], varargs=None, keywords=kwargs, defaults=None"
+    argspec: "args=[\'self\', \'inputs\', \'state\'], varargs=None, keywords=kwargs, defaults=None"
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.rnn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.rnn.pbtxt
new file mode 100644
index 0000000000..42b13533dd
--- /dev/null
+++ b/tensorflow/tools/api/golden/v2/tensorflow.rnn.pbtxt
@@ -0,0 +1,7 @@
+path: "tensorflow.rnn"
+tf_module {
+  member {
+    name: "DropoutWrapper"
+    mtype: "<type \'type\'>"
+  }
+}
diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py
index ba72d1d202..6235eb3eed 100644
--- a/tensorflow/tools/compatibility/renames_v2.py
+++ b/tensorflow/tools/compatibility/renames_v2.py
@@ -396,6 +396,7 @@ renames = {
     'tf.nn.relu_layer': 'tf.compat.v1.nn.relu_layer',
     'tf.nn.rnn_cell.BasicLSTMCell': 'tf.compat.v1.nn.rnn_cell.BasicLSTMCell',
     'tf.nn.rnn_cell.BasicRNNCell': 'tf.compat.v1.nn.rnn_cell.BasicRNNCell',
+    'tf.nn.rnn_cell.DropoutWrapper': 'tf.compat.v1.nn.rnn_cell.DropoutWrapper',
     'tf.nn.rnn_cell.GRUCell': 'tf.compat.v1.nn.rnn_cell.GRUCell',
     'tf.nn.rnn_cell.LSTMCell': 'tf.compat.v1.nn.rnn_cell.LSTMCell',
     'tf.nn.rnn_cell.MultiRNNCell': 'tf.compat.v1.nn.rnn_cell.MultiRNNCell',
-- 
GitLab


From a6cb087ae7897a95944d7e2eec087af3cd5eb043 Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Wed, 19 Dec 2018 07:29:13 -0800
Subject: [PATCH 818/873] Enable dynamic learning rate in Keras with TPU
 Strategy

PiperOrigin-RevId: 226169788
---
 tensorflow/contrib/distribute/python/keras_test.py     |  2 +-
 .../python/keras/engine/distributed_training_utils.py  | 10 +---------
 tensorflow/python/keras/engine/training.py             |  3 +--
 tensorflow/python/keras/engine/training_distributed.py | 10 +++++++++-
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index 31a389aaca..ece8d66887 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -1034,7 +1034,7 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
       ref_output = np.ones((160, 1), dtype=np.float32)
       self.assertArrayNear(output, ref_output, 1e-1)
 
-  @combinations.generate(strategy_minus_tpu_combinations())
+  @combinations.generate(all_strategy_combinations())
   def testOptimizerWithCallbacks(self, distribution):
     with self.cached_session():
       # TODO(b/120946189): Investigate why default strategy + eager fails.
diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py
index 32129afe64..4598f63c02 100644
--- a/tensorflow/python/keras/engine/distributed_training_utils.py
+++ b/tensorflow/python/keras/engine/distributed_training_utils.py
@@ -152,14 +152,12 @@ def flatten_perdevice_values(distribution_strategy, perdevice_values):
           for e in distribution_strategy.unwrap(flattened)]
 
 
-def validate_callbacks(input_callbacks, optimizer, current_strategy):
+def validate_callbacks(input_callbacks, optimizer):
   """Validate whether given callbacks are supported by DistributionStrategy.
 
   Args:
     input_callbacks: List of callbacks passed by the user to fit.
     optimizer: Optimizer instance used to train the model.
-    current_strategy: The DistributionStrategy used to distribute training
-      and validation.
 
   Raises:
     ValueError: If `LearningRateScheduler` or `ReduceLROnPlateau` is one of the
@@ -183,12 +181,6 @@ def validate_callbacks(input_callbacks, optimizer, current_strategy):
                         '`_grouped_model` attribute of your original model.')
       if isinstance(callback, (callbacks.LearningRateScheduler,
                                callbacks.ReduceLROnPlateau)):
-        strategy_name = current_strategy.__class__.__name__
-        # TODO(anjalisridhar): We might need to add a condition for multi
-        # worker strategy when we support it in Keras.
-        if is_tpu_strategy(current_strategy):
-          raise ValueError('%s callback is not supported with %s.' %
-                           (callback, strategy_name))
 
         if not isinstance(optimizer, optimizer_v2.OptimizerV2):
           raise ValueError('You must specify a Keras Optimizer V2 when using '
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index e1706fb310..4a398cdb16 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -786,8 +786,7 @@ class Model(Network):
 
     # Validate and standardize user data.
     if self._distribution_strategy:
-      distributed_training_utils.validate_callbacks(callbacks, self.optimizer,
-                                                    self._distribution_strategy)
+      distributed_training_utils.validate_callbacks(callbacks, self.optimizer)
 
       distributed_training_utils.validate_inputs(
           x, y, self._distribution_strategy)
diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py
index 2affc4b0d6..7842228e05 100644
--- a/tensorflow/python/keras/engine/training_distributed.py
+++ b/tensorflow/python/keras/engine/training_distributed.py
@@ -82,6 +82,8 @@ def experimental_fit_loop(model,
       ValueError: in case of invalid arguments.
   """
   current_strategy = model._distribution_strategy
+  scope = current_strategy.scope()
+  scope.__enter__()
 
   def _per_device_fit_function(model):
     model._make_fit_function()
@@ -236,7 +238,7 @@ def experimental_fit_loop(model,
     with current_strategy.scope():
       _copy_weights_to_original_model(model, model._distributed_model_train,
                                       'train')
-
+  scope.__exit__(None, None, None)
   return model.history
 
 
@@ -261,6 +263,8 @@ def experimental_test_loop(model,
       the display labels for the outputs.
   """
   current_strategy = model._distribution_strategy
+  scope = current_strategy.scope()
+  scope.__enter__()
 
   def _per_device_eval_function(model):
     model._make_eval_function()
@@ -349,6 +353,7 @@ def experimental_test_loop(model,
     if verbose >= 1:
       progbar.update(step + 1)
 
+  scope.__exit__(None, None, None)
   if len(outs) >= 0:
     outs[0] /= (steps)
 
@@ -374,6 +379,8 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
       (if the model has multiple outputs).
   """
   current_strategy = model._distribution_strategy
+  scope = current_strategy.scope()
+  scope.__enter__()
 
   # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here.
   K.set_learning_phase(0)
@@ -457,6 +464,7 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
     if verbose >= 1:
       progbar.update(step + 1)
 
+  scope.__exit__(None, None, None)
   if len(unconcatenated_outs) == 1:
     return np.concatenate(unconcatenated_outs[0], axis=0)
   return [
-- 
GitLab


From 0822126d7e0b9cd612dffaf5a89eb930e15e37f9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 07:37:40 -0800
Subject: [PATCH 819/873] Add FunctionSpec to def_funcion.PolymorphicFunction.
 In the future, this should be consolidated with
 function.PolymorphicFunction's FunctionSpec.

PiperOrigin-RevId: 226170883
---
 tensorflow/python/eager/def_function.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index fc14558cc7..ebc47d1566 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -236,6 +236,10 @@ class PolymorphicFunction(object):
     """
     self._python_function = python_function
     self._input_signature = input_signature
+    # TODO(vbardiovsky): Both _stateful_fn and _stateless_fn are populating the
+    # same FunctionSpec. Consider removing it from both and passing in instead.
+    self._function_spec = function_lib.FunctionSpec.from_function_and_signature(
+        python_function, input_signature)
     self._autograph = autograph
     self._experimental_autograph_options = experimental_autograph_options
     if self._experimental_autograph_options is not None:
@@ -265,15 +269,8 @@ class PolymorphicFunction(object):
 
   def _canonicalize_function_inputs(self, args, kwds):
     """Canonicalize the inputs to the Python function."""
-    if not self._stateful_fn:
-      raise ValueError(
-          "_canonicalize_function_inputs must be called only after _initialize "
-          "has run.")
-    # pylint: disable=protected-access
     if self._input_signature is None or args or kwds:
-      return self._stateful_fn._function_spec.canonicalize_function_inputs(
-          *args, **kwds)
-    # pylint: enable=protected-access
+      return self._function_spec.canonicalize_function_inputs(*args, **kwds)  # pylint: disable=protected-access
     # If an input signature is defined, we may need to fetch a concrete function
     # without any inputs specified. In this case args and kwds should be ignored
     # but running _canonicalize_function_inputs would raise an exception.
@@ -405,6 +402,10 @@ class PolymorphicFunction(object):
   def input_signature(self):
     return self._input_signature
 
+  @property
+  def function_spec(self):
+    return self._function_spec
+
   def get_initialization_function(self, *args, **kwargs):
     """Returns a `Function` object which initializes this function's variables.
 
-- 
GitLab


From 306919b06b9fe34af9f1dcfe0dcfe31ef27852ef Mon Sep 17 00:00:00 2001
From: Sourabh Bajaj <sourabhbajaj@google.com>
Date: Wed, 19 Dec 2018 07:59:41 -0800
Subject: [PATCH 820/873] Keras + Distribution Strategy: Use a slightly lower
 tolerance for correctness test for default distribution strategy as the
 weights have more variance whenever the training is run on GPUs.

PiperOrigin-RevId: 226173270
---
 .../distribute/python/keras_backward_compat_test.py   | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/keras_backward_compat_test.py b/tensorflow/contrib/distribute/python/keras_backward_compat_test.py
index a63354eb37..93c0280c82 100644
--- a/tensorflow/contrib/distribute/python/keras_backward_compat_test.py
+++ b/tensorflow/contrib/distribute/python/keras_backward_compat_test.py
@@ -1307,17 +1307,14 @@ class TestDistributionStrategyCorrectness(test.TestCase,
 
   @combinations.generate(strategy_and_input_combinations())
   def test_correctness(self, distribution, use_numpy, use_validation_data):
-    # TODO(b/121224478): This test is flaky with default strategy. Remove this
-    # once the issue is fixed.
-    if isinstance(distribution, distribute_lib._DefaultDistributionStrategy):  # pylint: disable=protected-access
-      self.skipTest('Disable the test for default strategy.')
-
     with self.cached_session():
       default_tolerance = 1e-5
       tol_table = {}
 
-      if isinstance(distribution, (mirrored_strategy.MirroredStrategy,
-                                   mirrored_strategy.CoreMirroredStrategy)):
+      if isinstance(distribution, (
+          mirrored_strategy.MirroredStrategy,
+          mirrored_strategy.CoreMirroredStrategy,
+          distribute_lib._DefaultDistributionStrategy)):  # pylint: disable=protected-access
         # TODO(b/119257215): Weights are not exactly the same, so use larger
         # tolerance for now. Predict should be related to weights.
         tol_table = {
-- 
GitLab


From 13bf86ca32c8b20ba5a1ae50deec978dee6e2bb2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 09:05:39 -0800
Subject: [PATCH 821/873] Remove reference cycles from nested structure coder.

PiperOrigin-RevId: 226182183
---
 .../saved_model/nested_structure_coder.py      | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/tensorflow/python/saved_model/nested_structure_coder.py b/tensorflow/python/saved_model/nested_structure_coder.py
index a4b9092a86..410ebda5c1 100644
--- a/tensorflow/python/saved_model/nested_structure_coder.py
+++ b/tensorflow/python/saved_model/nested_structure_coder.py
@@ -31,6 +31,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import functools
 import six
 
 from tensorflow.python.framework import dtypes
@@ -60,10 +61,11 @@ class StructureCoder(object):
   def _get_decoders(cls):
     return [(c.can_decode, c.do_decode) for c in cls._codecs]
 
-  def _map_structure(self, pyobj, coders, recursive_encode_fn):
+  def _map_structure(self, pyobj, coders):
     for can, do in coders:
       if can(pyobj):
-        return do(pyobj, recursive_encode_fn)
+        recursion_fn = functools.partial(self._map_structure, coders=coders)
+        return do(pyobj, recursion_fn)
     raise NotEncodableError(
         "No encoder for object [%s] of type [%s]." % (str(pyobj), type(pyobj)))
 
@@ -79,12 +81,8 @@ class StructureCoder(object):
     Raises:
       NotEncodableError: For values for which there are no encoders.
     """
+    return self._map_structure(nested_structure, self._get_encoders())
 
-    def encode_fn(nested_structure):
-      return self._map_structure(nested_structure, self._get_encoders(),
-                                 encode_fn)
-
-    return encode_fn(nested_structure)
 
   def can_encode(self, nested_structure):
     """Determines whether a nested structure can be encoded into a proto.
@@ -113,11 +111,7 @@ class StructureCoder(object):
     Raises:
       NotEncodableError: For values for which there are no encoders.
     """
-
-    def decode_fn(proto):
-      return self._map_structure(proto, self._get_decoders(), decode_fn)
-
-    return decode_fn(proto)
+    return self._map_structure(proto, self._get_decoders())
 
 
 class _ListCodec(object):
-- 
GitLab


From 885d785161202645e88b79063669c69c878137a1 Mon Sep 17 00:00:00 2001
From: David Majnemer <majnemer@google.com>
Date: Wed, 19 Dec 2018 09:16:02 -0800
Subject: [PATCH 822/873] [XLA] Propogate errors in
 ClientLibraryTestBase::ComputeAnd*

Errors returned by Client::TransferToServer should propagate instead of
triggering a CHECK-failure.

PiperOrigin-RevId: 226183625
---
 .../xla/tests/client_library_test_base.cc     | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tensorflow/compiler/xla/tests/client_library_test_base.cc b/tensorflow/compiler/xla/tests/client_library_test_base.cc
index 697236dc62..a350715597 100644
--- a/tensorflow/compiler/xla/tests/client_library_test_base.cc
+++ b/tensorflow/compiler/xla/tests/client_library_test_base.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/tests/client_library_test_base.h"
 
+#include <memory>
 #include <string>
 
 #include "absl/memory/memory.h"
@@ -279,9 +280,10 @@ StatusOr<Literal> ClientLibraryTestBase::ComputeAndTransfer(
   if (!arguments_.empty()) {
     CHECK(arguments.empty());
     for (const auto& argument : arguments_) {
-      owning_arguments.push_back(
-          client_->TransferToServer(MaybeConvertLiteralToBfloat16(argument))
-              .ValueOrDie());
+      TF_ASSIGN_OR_RETURN(
+          std::unique_ptr<GlobalData> owned_argument,
+          client_->TransferToServer(MaybeConvertLiteralToBfloat16(argument)));
+      owning_arguments.push_back(std::move(owned_argument));
       arguments.push_back(owning_arguments.back().get());
     }
   }
@@ -302,9 +304,10 @@ Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
   if (!arguments_.empty()) {
     CHECK(arguments.empty());
     for (const auto& argument : arguments_) {
-      owning_arguments.push_back(
-          client_->TransferToServer(MaybeConvertLiteralToBfloat16(argument))
-              .ValueOrDie());
+      TF_ASSIGN_OR_RETURN(
+          std::unique_ptr<GlobalData> owned_argument,
+          client_->TransferToServer(MaybeConvertLiteralToBfloat16(argument)));
+      owning_arguments.push_back(std::move(owned_argument));
       arguments.push_back(owning_arguments.back().get());
     }
   }
@@ -362,9 +365,10 @@ Status ClientLibraryTestBase::ComputeAndCompareLiteralWithStatus(
   if (!arguments_.empty()) {
     CHECK(arguments.empty());
     for (const auto& argument : arguments_) {
-      owning_arguments.push_back(
-          client_->TransferToServer(MaybeConvertLiteralToBfloat16(argument))
-              .ValueOrDie());
+      TF_ASSIGN_OR_RETURN(
+          std::unique_ptr<GlobalData> owned_argument,
+          client_->TransferToServer(MaybeConvertLiteralToBfloat16(argument)));
+      owning_arguments.push_back(std::move(owned_argument));
       arguments.push_back(owning_arguments.back().get());
     }
   }
-- 
GitLab


From cdd763bdf3801e3b1dad129d6a84f154f0c8ce00 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 09:42:28 -0800
Subject: [PATCH 823/873] Additions for polymorphic functions serialization.

1. Serialization of FunctionSpec.
2. Full arg to param binding.
3. Serialization of inputs canonicalized inputs used to trace this function.

PiperOrigin-RevId: 226187111
---
 tensorflow/python/eager/function.py           | 26 ++++++-
 tensorflow/python/saved_model/BUILD           |  8 ++-
 .../saved_model/function_deserialization.py   | 63 +++++++++++++----
 .../saved_model/function_serialization.py     | 22 +++++-
 tensorflow/python/saved_model/load_test.py    | 69 +++++++++++++++++++
 tensorflow/python/saved_model/save.py         |  2 +-
 .../saved_model/saved_object_graph.proto      |  7 ++
 tensorflow/tools/pip_package/BUILD            |  1 -
 8 files changed, 175 insertions(+), 23 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index f7b83fcb2f..8b8f6af93b 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -757,8 +757,13 @@ def _encode_arg_for_serialization(arg):
   """A representation for this argument, for serializing signatures."""
   if isinstance(arg, ops.Tensor):
     return tensor_spec.TensorSpec(arg.shape, arg.dtype)
-  else:
-    return UnknownArgument()
+  if isinstance(arg, int):
+    return arg
+  if isinstance(arg, float):
+    return arg
+  if isinstance(arg, bool):
+    return arg
+  return UnknownArgument()
 
 
 pywrap_tensorflow.RegisterType("Tensor", ops.Tensor)
@@ -772,6 +777,14 @@ def _deterministic_dict_values(dictionary):
 class FunctionSpec(object):
   """Specification of how to bind arguments to a function."""
 
+  def as_tuple(self):
+    return (self._fullargspec, self._is_method, self._args_to_prepend,
+            self._kwargs_to_include, self.input_signature)
+
+  @staticmethod
+  def from_tuple(spec_tuple):
+    return FunctionSpec(*spec_tuple)
+
   @staticmethod
   def from_function_and_signature(python_function, input_signature):
     """Create a FunctionSpec instance given a python function and signature."""
@@ -846,7 +859,10 @@ class FunctionSpec(object):
       **kwargs: The keyword args this function was called with.
 
     Returns:
-      A canonicalized ordering of the inputs.
+      A canonicalized ordering of the inputs representened by a tuple in the
+      form (args, kwargs). Here: `args` is a full list of bound arguments, and
+      `kwargs` contains only true keyword arguments, as opposed to named
+      arguments called in a keyword-like fashion.
 
     Raises:
       ValueError: If a keyword in `kwargs` cannot be matched with a positional
@@ -987,6 +1003,10 @@ class PolymorphicFunction(object):
     """Returns the wrapped Python function."""
     return self._python_function  # pylint: disable=protected-access
 
+  @property
+  def function_spec(self):
+    return self._function_spec
+
   @property
   def _input_signature(self):
     """Returns the wrapped Python function."""
diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD
index 8066b092b0..40d7e2f25e 100644
--- a/tensorflow/python/saved_model/BUILD
+++ b/tensorflow/python/saved_model/BUILD
@@ -363,6 +363,7 @@ py_library(
     ],
     srcs_version = "PY2AND3",
     deps = [
+        ":nested_structure_coder",
         ":saved_object_graph_py",
         "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:function",
@@ -375,7 +376,10 @@ py_library(
         "function_deserialization.py",
     ],
     srcs_version = "PY2AND3",
-    deps = ["//tensorflow/python/eager:def_function"],
+    deps = [
+        ":nested_structure_coder",
+        "//tensorflow/python/eager:def_function",
+    ],
 )
 
 tf_proto_library(
@@ -390,7 +394,7 @@ tf_proto_library(
     name = "saved_object_graph",
     srcs = ["saved_object_graph.proto"],
     cc_api_version = 2,
-    protodeps = tf_additional_all_protos(),
+    protodeps = tf_additional_all_protos() + [":struct"],
     visibility = ["//tensorflow:internal"],
 )
 
diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
index 99a82326aa..7845aab089 100644
--- a/tensorflow/python/saved_model/function_deserialization.py
+++ b/tensorflow/python/saved_model/function_deserialization.py
@@ -19,20 +19,33 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function as function_lib
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
+from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.saved_model import nested_structure_coder
 from tensorflow.python.util import nest
 
 
-def _inputs_compatible(args, function):
-  """Check if args are compatible with a concrete function."""
+def _is_tensor(t):
+  return isinstance(t, (ops.Tensor, resource_variable_ops.ResourceVariable))
+
+
+def _inputs_compatible(args, stored_inputs):
+  """Checks whether function arguments are compatible with parameters."""
   # TODO(vbardiovsky): The compatibility check should be about the signature,
   # not the flattened version of it.
-  flattened_inputs = nest.flatten(args)
-  expected_input_count = len(function.inputs) - len(function.captured_inputs)
-  if len(flattened_inputs) != expected_input_count:
+  if len(args) != len(stored_inputs):
     return False
-  for a, b in zip(flattened_inputs, function.inputs):
-    if a.dtype != b.dtype or not b.shape.is_compatible_with(a.shape):
-      return False
+  for a, b in zip(args, stored_inputs):
+    if _is_tensor(a):
+      if not isinstance(b, tensor_spec.TensorSpec):
+        return False
+      if a.dtype != b.dtype or not b.shape.is_compatible_with(a.shape):
+        return False
+    else:
+      if a != b:
+        return False
   return True
 
 
@@ -51,18 +64,42 @@ def recreate_polymorphic_function(
   # instead of creating a new PolymorphicFunction backed by a Python layer to
   # glue things together. Current approach is nesting functions deeper for each
   # serialization cycle.
+
+  coder = nested_structure_coder.StructureCoder()
+  function_spec_tuple = coder.decode_proto(
+      saved_polymorphic_function.function_spec_tuple)
+  function_spec = function_lib.FunctionSpec.from_tuple(function_spec_tuple)
+
   # TODO(mdan): We may enable autograph once exceptions are supported.
   @def_function.function(autograph=False)
-  def restored_function(*args):
+  def restored_function(*args, **kwargs):
     """Calls a restored function."""
     # TODO(allenl): Functions saved with input_signatures should revive with
     # input_signatures.
     for monomorphic_function in saved_polymorphic_function.monomorphic_function:
       function_obj = functions[monomorphic_function.concrete_function]
-      if _inputs_compatible(args, function_obj):
-        flattened_inputs = nest.flatten(args)
-        flattened_outputs = function_obj._call_flat(flattened_inputs)  # pylint: disable=protected-access
-        # TODO(vbardiovsky): rebuild output structure.
+      canonicalized_original_inputs = coder.decode_proto(
+          monomorphic_function.canonicalized_input)
+
+      try:
+        can_args, can_kwargs = function_spec.canonicalize_function_inputs(
+            *args, **kwargs)
+        if can_kwargs:
+          # TODO(vbardiovsky): Enable this along with the structured input and
+          # structured output.
+          raise ValueError(
+              "Received keywords arguments that could not be bound: %s" %
+              kwargs)
+      except ValueError:
+        continue
+
+      canonicalized_inputs = nest.flatten(can_args)
+
+      if _inputs_compatible(canonicalized_inputs,
+                            canonicalized_original_inputs):
+        filtered_inputs = [t for t in canonicalized_inputs if _is_tensor(t)]
+        flattened_outputs = function_obj._call_flat(filtered_inputs)  # pylint: disable=protected-access
+        # TODO(vbardiovsky): Rebuild output structure.
         single_output, = flattened_outputs
         return single_output
 
diff --git a/tensorflow/python/saved_model/function_serialization.py b/tensorflow/python/saved_model/function_serialization.py
index 6bc6542491..27e8e476cb 100644
--- a/tensorflow/python/saved_model/function_serialization.py
+++ b/tensorflow/python/saved_model/function_serialization.py
@@ -21,13 +21,19 @@ from __future__ import print_function
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import function as defun_lib
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.saved_model import nested_structure_coder
 from tensorflow.python.saved_model import saved_object_graph_pb2
 
 
 def _serialize_polymorphic_function(polymorphic_function, node_ids):
   """Build a SavedPolymorphicProto."""
+  coder = nested_structure_coder.StructureCoder()
   proto = saved_object_graph_pb2.SavedPolymorphicFunction()
-  for concrete_function in list_all_concrete_functions(polymorphic_function):
+
+  proto.function_spec_tuple.CopyFrom(
+      coder.encode_structure(polymorphic_function.function_spec.as_tuple()))  # pylint: disable=protected-access
+  for signature, concrete_function in list_all_concrete_functions(
+      polymorphic_function):
     bound_inputs = []
     try:
       for capture in concrete_function.captured_inputs:
@@ -41,12 +47,22 @@ def _serialize_polymorphic_function(polymorphic_function, node_ids):
       continue
     function_proto = proto.monomorphic_function.add()
     function_proto.concrete_function = concrete_function.name
+    function_proto.canonicalized_input.CopyFrom(
+        coder.encode_structure(signature))
     function_proto.bound_inputs.extend(bound_inputs)
   return proto
 
 
 def list_all_concrete_functions(polymorphic_function):
-  """Given a polymorphic function, returns all of its concrete functions."""
+  """Given a polymorphic function, returns all of its concrete functions.
+
+  Args:
+    polymorphic_function: Instance of `PolymorphicFunction`.
+
+  Returns:
+    A list of tuples in the form (signature, concrete_function), where concrete
+    function is an instance of `Function`.
+  """
   input_signature = polymorphic_function._input_signature  # pylint: disable=protected-access
   if input_signature is not None:
     polymorphic_function.get_concrete_function()
@@ -55,7 +71,7 @@ def list_all_concrete_functions(polymorphic_function):
     if any(isinstance(arg, defun_lib.UnknownArgument) for arg in signature):
       continue
     concrete_function = polymorphic_function.get_concrete_function(*signature)
-    concrete_functions.append(concrete_function)
+    concrete_functions.append((signature, concrete_function))
   return concrete_functions
 
 
diff --git a/tensorflow/python/saved_model/load_test.py b/tensorflow/python/saved_model/load_test.py
index 9ed84e5c8f..0f7fba0c66 100644
--- a/tensorflow/python/saved_model/load_test.py
+++ b/tensorflow/python/saved_model/load_test.py
@@ -154,6 +154,75 @@ class LoadTest(test.TestCase):
     imported = self.cycle(root)
     self.assertEqual(4., imported.f(constant_op.constant(2.0)).numpy())
 
+  def test_function_with_default_bool_input(self):
+
+    def func(x, training=False):
+      if training:
+        return 2 * x
+      else:
+        return 7
+
+    root = tracking.Checkpointable()
+    root.f = def_function.function(func)
+
+    self.assertEqual(20, root.f(constant_op.constant(10), True).numpy())
+    self.assertEqual(7, root.f(constant_op.constant(1)).numpy())
+    self.assertEqual(2, root.f(constant_op.constant(1), True).numpy())
+
+    imported = self.cycle(root)
+
+    self.assertEqual(4, imported.f(constant_op.constant(2), True).numpy())
+    self.assertEqual(7, imported.f(constant_op.constant(2)).numpy())
+
+  def test_positional_arguments(self):
+    def func(x, training=False, abc=7.1, defg=7.7):
+      del abc
+      if training:
+        return 2 * x
+      if defg == 7:
+        return 6
+      else:
+        return 7
+
+    root = tracking.Checkpointable()
+    root.f = def_function.function(func)
+
+    self.assertEqual(20, root.f(constant_op.constant(10), True).numpy())
+    self.assertEqual(7, root.f(constant_op.constant(1)).numpy())
+    self.assertEqual(2, root.f(constant_op.constant(1), True).numpy())
+    self.assertEqual(6, root.f(constant_op.constant(1), defg=7.0).numpy())
+
+    imported = self.cycle(root)
+
+    self.assertEqual(4, imported.f(constant_op.constant(2), True).numpy())
+    self.assertEqual(7, imported.f(constant_op.constant(2)).numpy())
+    self.assertEqual(6, imported.f(constant_op.constant(1), defg=7.0).numpy())
+
+  def test_member_function(self):
+    class CheckpointableWithMember(tracking.Checkpointable):
+
+      def __init__(self):
+        super(CheckpointableWithMember, self).__init__()
+        self._some_value = 20
+
+      @def_function.function
+      def f(self, x, training=False):
+        if training:
+          return 2 * x
+        else:
+          return 7 + self._some_value
+
+    root = CheckpointableWithMember()
+
+    self.assertEqual(20, root.f(constant_op.constant(10), True).numpy())
+    self.assertEqual(27, root.f(constant_op.constant(1)).numpy())
+    self.assertEqual(2, root.f(constant_op.constant(1), True).numpy())
+
+    imported = self.cycle(root)
+
+    self.assertEqual(4, imported.f(constant_op.constant(2), True).numpy())
+    self.assertEqual(27, imported.f(constant_op.constant(2)).numpy())
+
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py
index 38a459da2e..e69343208c 100644
--- a/tensorflow/python/saved_model/save.py
+++ b/tensorflow/python/saved_model/save.py
@@ -494,7 +494,7 @@ def _fill_meta_graph_def(meta_graph_def, obj, signature_functions,
 
   with exported_graph.as_default():
     signatures = _generate_signatures(signature_functions, resource_map)
-    for concrete_function in concrete_functions:
+    for _, concrete_function in concrete_functions:
       concrete_function.add_to_graph()
     saver_def = saver.to_proto()
     meta_graph_def.saver_def.CopyFrom(saver_def)
diff --git a/tensorflow/python/saved_model/saved_object_graph.proto b/tensorflow/python/saved_model/saved_object_graph.proto
index 1341cca7ff..f46927d6e8 100644
--- a/tensorflow/python/saved_model/saved_object_graph.proto
+++ b/tensorflow/python/saved_model/saved_object_graph.proto
@@ -3,6 +3,7 @@ syntax = "proto3";
 import "tensorflow/core/protobuf/checkpointable_object_graph.proto";
 import "tensorflow/core/framework/tensor_shape.proto";
 import "tensorflow/core/framework/types.proto";
+import "tensorflow/python/saved_model/struct.proto";
 
 option cc_enable_arenas = true;
 
@@ -79,6 +80,9 @@ message SavedAsset {
 // A function with multiple signatures, possibly with non-Tensor arguments.
 message SavedPolymorphicFunction {
   repeated SavedMonomorphicFunction monomorphic_function = 1;
+  // Tuple representing a `FunctionSpec`.
+  // TODO(vbardiovsky): Make this a proto.
+  StructuredValue function_spec_tuple = 2;
 }
 
 message SavedMonomorphicFunction {
@@ -90,6 +94,9 @@ message SavedMonomorphicFunction {
   // The only types of SavedObjects valid here are SavedVariable, SavedResource
   // and SavedAsset.
   repeated int32 bound_inputs = 2;
+  // Input in canonicalized form that was received to create this concrete
+  // function.
+  StructuredValue canonicalized_input = 3;
 }
 
 // Represents a Variable that is initialized by loading the contents from the
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 3e82b49b5b..2de00ea957 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -93,7 +93,6 @@ COMMON_PIP_DEPS = [
     "//tensorflow/python/kernel_tests/signal:test_util",
     "//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files",
     "//tensorflow/python/ops/ragged:ragged_test_util",
-    "//tensorflow/python/saved_model:nested_structure_coder",
     "//tensorflow/python/saved_model:saved_model",
     "//tensorflow/python/tools:tools_pip",
     "//tensorflow/python/tools/api/generator:create_python_api",
-- 
GitLab


From 264ce77f848dc5f690d48040fa5f623d5296b55e Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 09:49:32 -0800
Subject: [PATCH 824/873] disable failing test

PiperOrigin-RevId: 226188042
---
 tensorflow/python/keras/BUILD | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 8f9d70b15c..47fb480aa1 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -838,6 +838,7 @@ py_test(
     shard_count = 3,
     srcs_version = "PY2AND3",
     tags = [
+        "no_oss",
         "notsan",
     ],
     deps = [
-- 
GitLab


From 6744f3c0fe5435df84e69d7165731d24dbc93e7b Mon Sep 17 00:00:00 2001
From: Andy Ly <lyandy@google.com>
Date: Wed, 19 Dec 2018 10:26:41 -0800
Subject: [PATCH 825/873] [Grappler] Add helper methods for controlling fanin
 deduping and adding of controlling fanins for Switch ops in MutableGraphView.

PiperOrigin-RevId: 226194053
---
 tensorflow/core/grappler/BUILD                |   1 +
 .../core/grappler/mutable_graph_view.cc       | 134 +++++++++--
 tensorflow/core/grappler/mutable_graph_view.h |  53 ++++-
 .../core/grappler/mutable_graph_view_test.cc  | 214 +++++++++++++++++-
 4 files changed, 381 insertions(+), 21 deletions(-)

diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD
index 6e3012000f..6de12192ba 100644
--- a/tensorflow/core/grappler/BUILD
+++ b/tensorflow/core/grappler/BUILD
@@ -196,6 +196,7 @@ tf_cc_test(
         ":utils",
         "//tensorflow/cc:cc_ops",
         "//tensorflow/core:graph",
+        "//tensorflow/core:protos_all_cc",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
diff --git a/tensorflow/core/grappler/mutable_graph_view.cc b/tensorflow/core/grappler/mutable_graph_view.cc
index 224b720328..ca4d5255c0 100644
--- a/tensorflow/core/grappler/mutable_graph_view.cc
+++ b/tensorflow/core/grappler/mutable_graph_view.cc
@@ -19,6 +19,7 @@ limitations under the License.
 #include <utility>
 
 #include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
 #include "absl/strings/substitute.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
@@ -178,27 +179,21 @@ void MutableGraphView::UpdateFanouts(NodeDef* from_node, NodeDef* to_node) {
   }
 }
 
-bool MutableGraphView::AddFanin(NodeDef* node, const TensorId& fanin) {
-  NodeDef* fanin_node = GetNode(fanin.node());
-  if (fanin_node == nullptr) {
-    return false;
-  }
-
+bool MutableGraphView::AddFaninInternal(NodeDef* node,
+                                        const OutputPort& fanin) {
   int num_non_controlling_fanins =
       NumFanins(*node, /*include_controlling_nodes=*/false);
   InputPort input;
   input.node = node;
-  input.port_id = fanin.index() == Graph::kControlSlot
+  input.port_id = fanin.port_id == Graph::kControlSlot
                       ? Graph::kControlSlot
                       : num_non_controlling_fanins;
 
-  OutputPort fanin_port(fanin_node, fanin.index());
-
-  if (!gtl::InsertIfNotPresent(&fanouts()[fanin_port], input)) {
+  if (!gtl::InsertIfNotPresent(&fanouts()[fanin], input)) {
     return false;
   }
-  node->add_input(TensorIdToString(fanin));
-  if (fanin.index() > Graph::kControlSlot) {
+  node->add_input(TensorIdToString({fanin.node->name(), fanin.port_id}));
+  if (fanin.port_id > Graph::kControlSlot) {
     int node_input_size = node->input_size() - 1;
     // If there are control dependencies in node, move newly inserted fanin to
     // be before such control dependencies.
@@ -210,6 +205,14 @@ bool MutableGraphView::AddFanin(NodeDef* node, const TensorId& fanin) {
   return true;
 }
 
+bool MutableGraphView::AddFaninInternal(NodeDef* node, const TensorId& fanin) {
+  NodeDef* fanin_node = GetNode(fanin.node());
+  if (fanin_node == nullptr) {
+    return false;
+  }
+  return AddFaninInternal(node, {fanin_node, fanin.index()});
+}
+
 bool MutableGraphView::AddFanin(absl::string_view node_name,
                                 const TensorId& fanin) {
   if (!IsTensorIdPortValid(fanin)) {
@@ -219,7 +222,7 @@ bool MutableGraphView::AddFanin(absl::string_view node_name,
   if (node == nullptr) {
     return false;
   }
-  return AddFanin(node, fanin);
+  return AddFaninInternal(node, fanin);
 }
 
 bool MutableGraphView::RemoveFanins(NodeDef* node,
@@ -318,7 +321,7 @@ bool MutableGraphView::UpdateFanin(absl::string_view node_name,
   if (is_from_fanin_control || is_to_fanin_control) {
     bool modified = RemoveFanins(node, {from_fanin});
     if (!HasFanin(*node, to_fanin)) {
-      modified |= AddFanin(node, to_fanin);
+      modified |= AddFaninInternal(node, to_fanin);
     }
     return modified;
   }
@@ -357,6 +360,109 @@ bool MutableGraphView::UpdateFanin(absl::string_view node_name,
   return modified;
 }
 
+bool MutableGraphView::DedupControllingFanins(NodeDef* node) {
+  absl::flat_hash_set<absl::string_view> fanins;
+  absl::flat_hash_set<string> removed_fanins;
+  int pos = 0;
+  const int last_idx = node->input_size() - 1;
+  int last_pos = last_idx;
+  while (pos <= last_pos) {
+    const string& input = node->input(pos);
+    TensorId tensor_id = ParseTensorName(input);
+    if (!gtl::InsertIfNotPresent(&fanins, tensor_id.node()) &&
+        IsControlInput(tensor_id)) {
+      node->mutable_input()->SwapElements(pos, last_pos--);
+      removed_fanins.insert(input);
+    } else {
+      ++pos;
+    }
+  }
+
+  if (last_pos < last_idx) {
+    absl::flat_hash_set<string> retained_fanins(
+        node->input().begin(), node->input().begin() + last_pos + 1);
+    for (const auto& removed : removed_fanins) {
+      if (!retained_fanins.contains(removed)) {
+        OutputPort fanin(nodes()[ParseTensorName(removed).node()],
+                         Graph::kControlSlot);
+        fanouts()[fanin].erase({node, Graph::kControlSlot});
+      }
+    }
+    node->mutable_input()->DeleteSubrange(last_pos + 1, last_idx - last_pos);
+    return true;
+  }
+
+  return false;
+}
+
+bool MutableGraphView::DedupControllingFanins(absl::string_view node_name) {
+  NodeDef* node = GetNode(node_name);
+  if (node == nullptr) {
+    return false;
+  }
+  return DedupControllingFanins(node);
+}
+
+bool MutableGraphView::DedupControllingFanins() {
+  const int num_nodes = graph()->node_size();
+  bool modified = false;
+  for (int i = 0; i < num_nodes; ++i) {
+    modified |= DedupControllingFanins(graph()->mutable_node(i));
+  }
+  return modified;
+}
+
+bool MutableGraphView::AddControllingFanin(absl::string_view node_name,
+                                           const TensorId& fanin) {
+  NodeDef* node = GetNode(node_name);
+  if (node == nullptr) {
+    return false;
+  }
+  NodeDef* fanin_node = GetNode(fanin.node());
+  if (fanin_node == nullptr) {
+    return false;
+  }
+  if (fanin.index() == Graph::kControlSlot) {
+    return AddFaninInternal(node, {fanin_node, Graph::kControlSlot});
+  }
+
+  if (!IsSwitch(*fanin_node)) {
+    return AddFaninInternal(node, {fanin_node, Graph::kControlSlot});
+  } else {
+    // We can't anchor control dependencies directly on the switch node: unlike
+    // other nodes only one of the outputs of the switch node will be generated
+    // when the switch node is executed, and we need to make sure the control
+    // dependency is only triggered when the corresponding output is triggered.
+    // We start by looking for an identity node connected to the output of the
+    // switch node, and use it to anchor the control dependency.
+    auto fanouts = GetFanouts(*fanin_node, /*include_controlled_nodes=*/false);
+    for (auto fanout : fanouts) {
+      if (IsIdentity(*fanout.node) || IsIdentityNSingleInput(*fanout.node)) {
+        if (ParseTensorName(fanout.node->input(0)) == fanin) {
+          return AddFaninInternal(node, {fanout.node, Graph::kControlSlot});
+        }
+      }
+    }
+    // We haven't found an existing node where we can anchor the control
+    // dependency: add a new identity node.
+    string ctrl_dep_name = AddPrefixToNodeName(
+        absl::StrCat(fanin.node(), "_", fanin.index()), kMutableGraphViewCtrl);
+
+    NodeDef* ctrl_dep_node = GetNode(ctrl_dep_name);
+    if (ctrl_dep_node == nullptr) {
+      NodeDef new_node;
+      new_node.set_name(ctrl_dep_name);
+      new_node.set_op("Identity");
+      new_node.set_device(fanin_node->device());
+      (*new_node.mutable_attr())["T"].set_type(
+          fanin_node->attr().at("T").type());
+      new_node.add_input(TensorIdToString(fanin));
+      ctrl_dep_node = AddNode(std::move(new_node));
+    }
+    return AddFaninInternal(node, {ctrl_dep_node, Graph::kControlSlot});
+  }
+}
+
 void MutableGraphView::DeleteNodes(const std::set<string>& nodes_to_delete) {
   for (const string& node_name_to_delete : nodes_to_delete)
     RemoveFaninsInternal(nodes().at(node_name_to_delete),
diff --git a/tensorflow/core/grappler/mutable_graph_view.h b/tensorflow/core/grappler/mutable_graph_view.h
index 8025b8ca77..f7c2a1118e 100644
--- a/tensorflow/core/grappler/mutable_graph_view.h
+++ b/tensorflow/core/grappler/mutable_graph_view.h
@@ -31,6 +31,8 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
+const char kMutableGraphViewCtrl[] = "ConstantFoldingCtrl";
+
 // A utility class to simplify the traversal of a GraphDef that, unlike
 // GraphView, supports updating the graph.  Note that you should not modify the
 // graph separately, because the view will get out of sync.
@@ -102,6 +104,38 @@ class MutableGraphView : public internal::GraphViewInternal<GraphDef, NodeDef> {
   bool UpdateFanin(absl::string_view node_name, const TensorId& from_fanin,
                    const TensorId& to_fanin);
 
+  // Removes redundant control fanins from node `node_name`.
+  //
+  // This will return true iff the node is modified.
+  // TODO(lyandy): Measure performance of deduping on every AddFanin compared to
+  // deduping once at the end.
+  bool DedupControllingFanins(absl::string_view node_name);
+
+  // Removes redundant control fanins from all nodes in the graph.
+  //
+  // This will return true iff the node is modified.
+  bool DedupControllingFanins();
+
+  // Adds a control dependency to the target node named `node_name`.
+  //
+  // Case 1: If the fanin is not a Switch node, the control dependency is simply
+  // added to the target node:
+  //
+  //   fanin -^> target node.
+  //
+  // Case 2: If the fanin is a Switch node, we cannot anchor a control
+  // dependency on it, because unlike other nodes, only one of its outputs will
+  // be generated when the node is activated. In this case, we try to find an
+  // Identity/IdentityN node in the fanout of the relevant port of the Switch
+  // and add it as a fanin to the target node. If no such Identity/IdentityN
+  // node can be found, a new Identity node will be created. In both cases, we
+  // end up with:
+  //
+  //   fanin -> Identity{N} -^> target node.
+  //
+  // This will return true iff the node is modified.
+  bool AddControllingFanin(absl::string_view node_name, const TensorId& fanin);
+
   // Deletes nodes from the graph.
   void DeleteNodes(const std::set<string>& nodes_to_delete);
 
@@ -121,11 +155,19 @@ class MutableGraphView : public internal::GraphViewInternal<GraphDef, NodeDef> {
   // behavior is undefined.
   void UpdateFanouts(NodeDef* from_node, NodeDef* to_node);
 
-  // Remove fanins of the deleted node from internal state. Control dependencies
-  // are retained iff keep_controlling_fanins is true.
+  // Removes fanins of the deleted node from internal state. Control
+  // dependencies are retained iff keep_controlling_fanins is true.
   void RemoveFaninsInternal(NodeDef* deleted_node,
                             bool keep_controlling_fanins);
 
+  // Add fanin to node. If fanin is a control dependency, existing control
+  // dependencies will be checked first before adding. Otherwise fanin will be
+  // added after existing non control dependency inputs.
+  //
+  // This will return true iff the node is modified. If a control dependency
+  // already exists, the node will not be modified.
+  bool AddFaninInternal(NodeDef* node, const OutputPort& fanin);
+
   // Add fanin to node. If the node or fanin do not exist in the graph, nothing
   // will be modified in the graph. If fanin is a control dependency, existing
   // control dependencies will be checked first before adding. Otherwise fanin
@@ -133,10 +175,13 @@ class MutableGraphView : public internal::GraphViewInternal<GraphDef, NodeDef> {
   //
   // This will return true iff the node is modified. If a control dependency
   // already exists, the node will not be modified.
-  bool AddFanin(NodeDef* node, const TensorId& fanin);
+  bool AddFaninInternal(NodeDef* node, const TensorId& fanin);
 
-  // Remove any fanin in node that matches to a fanin in fanins.
+  // Removes any fanin in node that matches to a fanin in fanins.
   bool RemoveFanins(NodeDef* node, absl::Span<const TensorId> fanins);
+
+  // Removes redundant control fanins from node.
+  bool DedupControllingFanins(NodeDef* node);
 };
 
 }  // end namespace grappler
diff --git a/tensorflow/core/grappler/mutable_graph_view_test.cc b/tensorflow/core/grappler/mutable_graph_view_test.cc
index cd7e638595..cdc212f6f9 100644
--- a/tensorflow/core/grappler/mutable_graph_view_test.cc
+++ b/tensorflow/core/grappler/mutable_graph_view_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/grappler/mutable_graph_view.h"
 #include "tensorflow/cc/ops/standard_ops.h"
 #include "tensorflow/core/framework/function_testlib.h"
+#include "tensorflow/core/framework/types.pb.h"
 #include "tensorflow/core/graph/tensor_id.h"
 #include "tensorflow/core/grappler/grappler_item.h"
 #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h"
@@ -35,7 +36,7 @@ TEST(MutableGraphViewTest, AddAndUpdateFanouts) {
        NDef("other", "NotImportant", {}, {}),
        NDef("foo_1", "NotImportant", {"bar", "other", "bar:1", "^bar"}),
        NDef("foo_2", "NotImportant", {"other:1", "bar:2", "^bar"})},
-      /* empty function library */ {});
+      /*funcs=*/{});
 
   MutableGraphView graph(&graph_def);
 
@@ -78,7 +79,7 @@ TEST(MutableGraphViewTest, AddAndUpdateFanoutsWithoutSelfLoops) {
   GraphDef graph_def =
       test::function::GDef({NDef("bar", "NotImportant", {}, {}),
                             NDef("foo", "NotImportant", {"bar", "^bar"})},
-                           /* empty function library */ {});
+                           /*funcs=*/{});
 
   MutableGraphView graph(&graph_def);
 
@@ -462,6 +463,213 @@ TEST(MutableGraphViewTest, UpdateFanin) {
                   /*modified=*/false, /*expected_node=*/nullptr);
 }
 
+GraphDef SimpleDuplicateControllingFaninsGraph() {
+  // Actual node.op() is not important in this test.
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("b", "NotImportant", {}, {}),
+       NDef("foo_1", "NotImportant", {"a", "b:1", "^b"}),
+       NDef("foo_2", "NotImportant", {"a", "^b", "^b"}),
+       NDef("foo_3", "NotImportant", {"a", "b:1", "^b", "^b"}),
+       NDef("foo_4", "NotImportant", {"a:2", "b:1", "^b", "^b", "^a", "^a"})},
+      /*funcs=*/{});
+  return graph_def;
+}
+
+void CheckDedupControllingFaninsForNode(MutableGraphView* graph,
+                                        absl::string_view node_name,
+                                        const NodeDef* expected_node) {
+  // Deduping again should result in no change.
+  EXPECT_FALSE(graph->DedupControllingFanins(node_name));
+  NodeDef* node = graph->GetNode(node_name);
+  ASSERT_NE(node, nullptr);
+  ASSERT_EQ(node->input_size(), expected_node->input_size());
+  CompareNodeInputs(*graph, expected_node, node);
+  for (int i = 0; i < node->input_size(); ++i) {
+    TensorId tensor_id = ParseTensorName(node->input(i));
+    if (tensor_id.index() > Graph::kControlSlot) {
+      CheckFanout(*graph, {tensor_id.node(), Graph::kControlSlot}, node_name);
+    }
+  }
+}
+
+void TestDedupControllingFaninsForNode(MutableGraphView* graph,
+                                       absl::string_view node_name,
+                                       const NodeDef* expected_node) {
+  EXPECT_TRUE(graph->DedupControllingFanins(node_name));
+  CheckDedupControllingFaninsForNode(graph, node_name, expected_node);
+}
+
+TEST(MutableGraphViewTest, DedupControllingFaninsForNode) {
+  GraphDef graph_def = SimpleDuplicateControllingFaninsGraph();
+
+  MutableGraphView graph(&graph_def);
+
+  NodeDef expected_node;
+  // Remove redundant control dependency '^b'.
+  expected_node = NDef("", "", {"a", "b:1"});
+  TestDedupControllingFaninsForNode(&graph, "foo_1", &expected_node);
+  // Remove extra control dependency '^b'.
+  expected_node = NDef("", "", {"a", "^b"});
+  TestDedupControllingFaninsForNode(&graph, "foo_2", &expected_node);
+  // Remove redundant and extra control dependencies '^b'.
+  expected_node = NDef("", "", {"a", "b:1"});
+  TestDedupControllingFaninsForNode(&graph, "foo_3", &expected_node);
+  // Remove multiple redundant control dependencies.
+  expected_node = NDef("", "", {"a:2", "b:1"});
+  TestDedupControllingFaninsForNode(&graph, "foo_4", &expected_node);
+  // Missing node.
+  EXPECT_FALSE(graph.DedupControllingFanins("missing"));
+}
+
+TEST(MutableGraphViewTest, DedupControllingFaninsForGraph) {
+  GraphDef graph_def = SimpleDuplicateControllingFaninsGraph();
+
+  MutableGraphView graph(&graph_def);
+  EXPECT_TRUE(graph.DedupControllingFanins());
+  // Deduping again should result in no change.
+  EXPECT_FALSE(graph.DedupControllingFanins());
+
+  NodeDef expected_node;
+  // Remove redundant control dependency '^b'.
+  expected_node = NDef("", "", {"a", "b:1"});
+  CheckDedupControllingFaninsForNode(&graph, "foo_1", &expected_node);
+  // Remove extra control dependency '^b'.
+  expected_node = NDef("", "", {"a", "^b"});
+  CheckDedupControllingFaninsForNode(&graph, "foo_2", &expected_node);
+  // Remove redundant and extra control dependencies '^b'.
+  expected_node = NDef("", "", {"a", "b:1"});
+  CheckDedupControllingFaninsForNode(&graph, "foo_3", &expected_node);
+  // Remove multiple redundant control dependencies.
+  expected_node = NDef("", "", {"a:2", "b:1"});
+  CheckDedupControllingFaninsForNode(&graph, "foo_4", &expected_node);
+}
+
+TEST(MutableGraphViewTest, AddControllingFaninMissing) {
+  // Actual node.op() is not important in this test.
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("b", "NotImportant", {}, {})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+  // Missing fanin.
+  EXPECT_FALSE(graph.AddControllingFanin("a", {"c", Graph::kControlSlot}));
+  // Missing node.
+  EXPECT_FALSE(graph.AddControllingFanin("d", {"a", Graph::kControlSlot}));
+  // Missing node and fanin.
+  EXPECT_FALSE(graph.AddControllingFanin("c", {"d", Graph::kControlSlot}));
+
+  ASSERT_EQ(graph.graph()->node_size(), 2);
+  NodeDef* a = graph.GetNode("a");
+  ASSERT_NE(a, nullptr);
+  ASSERT_EQ(a->input_size(), 0);
+  NodeDef* b = graph.GetNode("b");
+  ASSERT_NE(b, nullptr);
+  ASSERT_EQ(b->input_size(), 0);
+}
+
+TEST(MutableGraphViewTest, AddControllingFaninExistingControl) {
+  // Actual node.op() is not important in this test.
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("b", "NotImportant", {}, {})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+  EXPECT_TRUE(graph.AddControllingFanin("a", {"b", Graph::kControlSlot}));
+  EXPECT_FALSE(graph.AddControllingFanin("a", {"b", Graph::kControlSlot}));
+
+  ASSERT_EQ(graph.graph()->node_size(), 2);
+  NodeDef* a = graph.GetNode("a");
+  ASSERT_NE(a, nullptr);
+  ASSERT_EQ(a->input_size(), 1);
+  EXPECT_EQ(a->input(0), "^b");
+  NodeDef* b = graph.GetNode("b");
+  ASSERT_NE(b, nullptr);
+  ASSERT_EQ(b->input_size(), 0);
+}
+
+TEST(MutableGraphViewTest, AddControllingFaninNotSwitch) {
+  // Actual node.op() is not important in this test.
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("b", "NotImportant", {}, {})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+  EXPECT_TRUE(graph.AddControllingFanin("a", {"b", 2}));
+  EXPECT_FALSE(graph.AddControllingFanin("a", {"b", 2}));
+
+  ASSERT_EQ(graph.graph()->node_size(), 2);
+  NodeDef* a = graph.GetNode("a");
+  ASSERT_NE(a, nullptr);
+  ASSERT_EQ(a->input_size(), 1);
+  EXPECT_EQ(a->input(0), "^b");
+  NodeDef* b = graph.GetNode("b");
+  ASSERT_NE(b, nullptr);
+  ASSERT_EQ(b->input_size(), 0);
+}
+
+TEST(MutableGraphViewTest, AddControllingFaninSwitchWithIdentity) {
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("switch", "Switch", {}, {}),
+       NDef("identity", "Identity", {"switch"})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+
+  EXPECT_TRUE(graph.AddControllingFanin("a", {"switch", 0}));
+  EXPECT_FALSE(graph.AddControllingFanin("a", {"switch", 0}));
+
+  ASSERT_EQ(graph.graph()->node_size(), 3);
+  NodeDef* a = graph.GetNode("a");
+  ASSERT_NE(a, nullptr);
+  ASSERT_EQ(a->input_size(), 1);
+  EXPECT_EQ(a->input(0), "^identity");
+}
+
+TEST(MutableGraphViewTest, AddControllingFaninSwitchWithNoExistingIdentity) {
+  constexpr char kDevice[] = "/device:foo:0";
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}),
+       NDef("switch", "Switch", {}, {{"T", DT_FLOAT}}, kDevice)},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+
+  EXPECT_TRUE(graph.AddControllingFanin("a", {"switch", 0}));
+  EXPECT_FALSE(graph.AddControllingFanin("a", {"switch", 0}));
+
+  ASSERT_EQ(graph.graph()->node_size(), 3);
+  NodeDef* a = graph.GetNode("a");
+  ASSERT_NE(a, nullptr);
+  ASSERT_EQ(a->input_size(), 1);
+  EXPECT_EQ(a->input(0), "^ConstantFoldingCtrl/switch_0");
+  NodeDef* identity = graph.GetNode("ConstantFoldingCtrl/switch_0");
+  ASSERT_NE(identity, nullptr);
+  ASSERT_EQ(identity->input_size(), 1);
+  EXPECT_EQ(identity->input(0), "switch");
+  EXPECT_EQ(identity->op(), "Identity");
+  EXPECT_EQ(identity->device(), kDevice);
+  ASSERT_TRUE(identity->attr().count("T"));
+  EXPECT_EQ(identity->attr().at("T").type(), DT_FLOAT);
+}
+
+TEST(MutableGraphViewTest, AddControllingFaninSwitchWithExistingAddedIdentity) {
+  GraphDef graph_def = test::function::GDef(
+      {NDef("a", "NotImportant", {}, {}), NDef("switch", "Switch", {}, {}),
+       NDef("ConstantFoldingCtrl/switch_0", "Identity", {}, {})},
+      /*funcs=*/{});
+
+  MutableGraphView graph(&graph_def);
+
+  EXPECT_TRUE(graph.AddControllingFanin("a", {"switch", 0}));
+  EXPECT_FALSE(graph.AddControllingFanin("a", {"switch", 0}));
+
+  ASSERT_EQ(graph.graph()->node_size(), 3);
+  NodeDef* a = graph.GetNode("a");
+  ASSERT_NE(a, nullptr);
+  ASSERT_EQ(a->input_size(), 1);
+  EXPECT_EQ(a->input(0), "^ConstantFoldingCtrl/switch_0");
+}
+
 TEST(MutableGraphViewTest, DeleteNodes) {
   // Actual node.op() is not important in this test.
   GraphDef graph_def = test::function::GDef(
@@ -469,7 +677,7 @@ TEST(MutableGraphViewTest, DeleteNodes) {
        NDef("other", "NotImportant", {}, {}),
        NDef("foo_1", "NotImportant", {"bar", "other", "bar:1", "^bar"}),
        NDef("foo_2", "NotImportant", {"other:1", "bar:2", "^bar"})},
-      /* empty function library */ {});
+      /*funcs=*/{});
 
   MutableGraphView graph(&graph_def);
 
-- 
GitLab


From a7c8a800607bb63c8ef9f0ce7b62dd6efa8ea630 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 10:56:23 -0800
Subject: [PATCH 826/873] Add tolerance on resize_bilinear_op_test.cc to stop
 failing tests.

PiperOrigin-RevId: 226199175
---
 .../core/kernels/resize_bilinear_op_test.cc   | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/core/kernels/resize_bilinear_op_test.cc b/tensorflow/core/kernels/resize_bilinear_op_test.cc
index 6d57892828..f2062915b8 100644
--- a/tensorflow/core/kernels/resize_bilinear_op_test.cc
+++ b/tensorflow/core/kernels/resize_bilinear_op_test.cc
@@ -122,7 +122,7 @@ class ResizeBilinearOpTest : public OpsTestBase {
         TensorShape({batch_size, output_width, output_height, channels})));
     ResizeBilinearBaseline(input->tensor<float, 4>(),
                            expected->tensor<float, 4>());
-    test::ExpectTensorEqual<float>(*expected, *GetOutput(0));
+    test::ExpectClose(*expected, *GetOutput(0));
   }
 
   void RunManyRandomTests(int channels) {
@@ -177,7 +177,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To1x1) {
   // original input. In this case, we choose the top/left most pixel.
   Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 1, 1}));
   test::FillValues<float>(&expected, {1.0});
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinearRandom2x2To1x1) {
@@ -194,7 +194,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinearRandom2x2To1x1) {
   ResizeBilinearBaseline(input->tensor<float, 4>(),
                          expected->tensor<float, 4>());
   EXPECT_EQ(input->flat<float>()(0), output->flat<float>()(0));
-  test::ExpectTensorEqual<float>(*expected, *output);
+  test::ExpectClose(*expected, *output);
 }
 
 TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners2x2To1x1) {
@@ -209,7 +209,7 @@ TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners2x2To1x1) {
   // original input. In this case, we choose the top/left most pixel.
   Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 1, 1, 1}));
   test::FillValues<float>(&expected, {1.0});
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3) {
@@ -229,7 +229,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3) {
      3,        11.0f / 3, 4});
 
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners2x2To3x3) {
@@ -252,7 +252,7 @@ TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners2x2To3x3) {
      3,  3.5,  4});
 
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinear3x3To2x2) {
@@ -273,7 +273,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear3x3To2x2) {
      5.5,   7});
 
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners3x3To2x2) {
@@ -294,7 +294,7 @@ TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners3x3To2x2) {
      7,  9});
 
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinear3x3To4x4) {
@@ -316,7 +316,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear3x3To4x4) {
      7,  7.75, 8.5, 9});
 
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinear4x4To3x3) {
@@ -340,7 +340,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear4x4To3x3) {
      35.0f/3, 39.0f/3, 43.0f/3});
 
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners4x4To3x3) {
@@ -364,7 +364,7 @@ TEST_F(ResizeBilinearOpAlignCornersTest, TestBilinearAlignCorners4x4To3x3) {
      13, 14.5, 16});
 
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3Batch2) {
@@ -384,7 +384,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To3x3Batch2) {
      1, 5.0f/3, 2, 7.0f/3, 3, 10.0f/3, 3, 11.0f/3, 4
     });
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinear2x2x2To3x3x2) {
@@ -408,7 +408,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2x2To3x3x2) {
       4,       -4
     });
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 TEST_F(ResizeBilinearOpTest, TestBilinear2x2To4x4) {
@@ -427,7 +427,7 @@ TEST_F(ResizeBilinearOpTest, TestBilinear2x2To4x4) {
      3,  3.5, 4, 4,
      3,  3.5, 4, 4});
   // clang-format on
-  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+  test::ExpectClose(expected, *GetOutput(0));
 }
 
 // similar_size case
-- 
GitLab


From 49348b894d754b42ec145f66298810a689cef984 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 10:59:33 -0800
Subject: [PATCH 827/873] Internal clean up: "distribution_strategy" ->
 "strategy".

PiperOrigin-RevId: 226199738
---
 .../contrib/distribute/python/combinations.py |  2 +-
 .../distribute/python/one_device_strategy.py  |  9 ++-
 .../contrib/distribute/python/tpu_strategy.py | 13 ++---
 .../python/distribute/distribute_lib.py       | 55 +++++++++----------
 .../python/distribute/distribute_lib_test.py  | 20 +++----
 .../distribution_strategy_context.py          | 53 +++++++++---------
 .../python/distribute/mirrored_strategy.py    |  3 +-
 7 files changed, 76 insertions(+), 79 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py
index 365ce5cdec..4a934953ad 100644
--- a/tensorflow/contrib/distribute/python/combinations.py
+++ b/tensorflow/contrib/distribute/python/combinations.py
@@ -324,7 +324,7 @@ class NamedDistribution(object):
 # pylint: disable=g-long-lambda
 default_strategy = NamedDistribution(
     "Default",
-    distribution_strategy_context._get_default_distribution_strategy,  # pylint: disable=protected-access
+    distribution_strategy_context._get_default_strategy,  # pylint: disable=protected-access
     required_gpus=None)
 one_device_strategy = NamedDistribution(
     "OneDeviceCPU", lambda: one_device_lib.OneDeviceStrategy("/cpu:0"),
diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py
index c9ea706b64..5986bc4661 100644
--- a/tensorflow/contrib/distribute/python/one_device_strategy.py
+++ b/tensorflow/contrib/distribute/python/one_device_strategy.py
@@ -204,12 +204,11 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended):
 class _OneDeviceReplicaContext(distribute_lib.ReplicaContext):
   """ReplicaContext for OneDeviceStrategy."""
 
-  def __init__(self, distribution_strategy):
+  def __init__(self, strategy):
+    zero = constant_op.constant(0, dtypes.int32)
     distribute_lib.ReplicaContext.__init__(
-        self,
-        distribution_strategy,
-        replica_id_in_sync_group=constant_op.constant(0, dtypes.int32))
+        self, strategy, replica_id_in_sync_group=zero)
 
   @property
   def devices(self):
-    return self._distribution_strategy.extended.worker_devices
+    return self._strategy.extended.worker_devices
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index c2f62c3ca2..7352203fe1 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -641,17 +641,16 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
 class _TPUReplicaContext(distribute_lib.ReplicaContext):
   """Replication Context class for TPU Strategy."""
 
-  # TODO(sourabhbajaj): Call for each tower should be updating this.
-  def __init__(self, distribution_strategy):
+  # TODO(sourabhbajaj): Call for each replica should be updating this.
+  def __init__(self, strategy):
+    # TODO(b/118385803): properly initialize replica_id, instead of always 0
+    replica_id = constant_op.constant(0, dtypes.int32)
     distribute_lib.ReplicaContext.__init__(
-        self,
-        distribution_strategy,
-        # TODO(b/118385803): properly initialize replica_id, instead of always 0
-        replica_id_in_sync_group=constant_op.constant(0, dtypes.int32))
+        self, strategy, replica_id_in_sync_group=replica_id)
 
   @property
   def devices(self):
     distribute_lib.require_replica_context(self)
-    ds = self._distribution_strategy
+    ds = self._strategy
     replica_id = tensor_util.constant_value(self._replica_id_in_sync_group)
     return (ds.extended.worker_devices[replica_id],)
diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index edb80d7fb4..9c6bcea4cd 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -99,7 +99,7 @@ def _require_cross_replica_context_extended(extended):
     return
   strategy = extended._container_strategy()  # pylint: disable=protected-access
   # We have an error to report, figure out the right message.
-  if context.distribution_strategy is not strategy:
+  if context.strategy is not strategy:
     _wrong_strategy_scope(strategy, context)
   assert cross_replica is None
   raise RuntimeError("Method requires being in cross-replica context, use "
@@ -108,14 +108,14 @@ def _require_cross_replica_context_extended(extended):
 
 def _wrong_strategy_scope(strategy, context):
   # Figure out the right error message.
-  if not distribution_strategy_context.has_distribution_strategy():
+  if not distribution_strategy_context.has_strategy():
     raise RuntimeError(
         'Need to be inside "with strategy.scope()" for %s' %
         (strategy,))
   else:
     raise RuntimeError(
         "Mixing different tf.distribute.Strategy objects: %s is not %s" %
-        (context.distribution_strategy, strategy))
+        (context.strategy, strategy))
 
 
 def require_replica_context(replica_ctx):
@@ -125,25 +125,25 @@ def require_replica_context(replica_ctx):
   # We have an error to report, figure out the right message.
   if context.replica_context is None:
     raise RuntimeError("Need to be inside `call_for_each_replica()`")
-  if context.distribution_strategy is replica_ctx.distribution_strategy:
+  if context.strategy is replica_ctx.strategy:
     # Two different ReplicaContexts with the same tf.distribute.Strategy.
     raise RuntimeError("Mismatching ReplicaContext.")
   raise RuntimeError(
       "Mismatching tf.distribute.Strategy objects: %s is not %s." %
-      (context.distribution_strategy, replica_ctx.distribution_strategy))
+      (context.strategy, replica_ctx.strategy))
 
 
-def _require_distribution_strategy_scope_strategy(strategy):
+def _require_strategy_scope_strategy(strategy):
   """Verify in a `strategy.scope()` in this thread."""
   context = _get_per_thread_mode()
-  if context.distribution_strategy is strategy: return
+  if context.strategy is strategy: return
   _wrong_strategy_scope(strategy, context)
 
 
-def _require_distribution_strategy_scope_extended(extended):
+def _require_strategy_scope_extended(extended):
   """Verify in a `distribution_strategy.scope()` in this thread."""
   context = _get_per_thread_mode()
-  if context.distribution_strategy.extended is extended: return
+  if context.strategy.extended is extended: return
   # Report error.
   strategy = extended._container_strategy()  # pylint: disable=protected-access
   _wrong_strategy_scope(strategy, context)
@@ -181,7 +181,7 @@ class _CurrentDistributionContext(object):
     self._var_creator_scope.__enter__()
     if self._device_scope:
       self._device_scope.__enter__()
-    return self._context.distribution_strategy
+    return self._context.strategy
 
   def __exit__(self, exception_type, exception_value, traceback):
     if self._device_scope:
@@ -196,10 +196,10 @@ class _SameScopeAgainContext(object):
   """Trivial context manager when you are already in `scope()`."""
 
   def __init__(self, strategy):
-    self._distribution_strategy = strategy
+    self._strategy = strategy
 
   def __enter__(self):
-    return self._distribution_strategy
+    return self._strategy
 
   def __exit__(self, exception_type, exception_value, traceback):
     del exception_type, exception_value, traceback
@@ -964,12 +964,12 @@ class DistributionStrategyExtended(object):
 
   def _scope(self, strategy):
     """Implementation of DistributionStrategy.scope()."""
-    if distribution_strategy_context.has_distribution_strategy():
+    if distribution_strategy_context.has_strategy():
       _require_cross_replica_context_extended(self)
       return _SameScopeAgainContext(strategy)
 
     def creator_with_resource_vars(*args, **kwargs):
-      _require_distribution_strategy_scope_extended(self)
+      _require_strategy_scope_extended(self)
       kwargs["use_resource"] = True
       return self._create_variable(*args, **kwargs)
 
@@ -1046,12 +1046,12 @@ class DistributionStrategyExtended(object):
       A context manager.
     """
     def create_colocated_variable(next_creator, *args, **kwargs):
-      _require_distribution_strategy_scope_extended(self)
+      _require_strategy_scope_extended(self)
       kwargs["use_resource"] = True
       kwargs["colocate_with"] = colocate_with_variable
       return next_creator(*args, **kwargs)
 
-    _require_distribution_strategy_scope_extended(self)
+    _require_strategy_scope_extended(self)
     return variable_scope.variable_creator_scope(create_colocated_variable)
 
   def _call_dataset_fn(self, dataset_fn):
@@ -1465,7 +1465,7 @@ class ReplicaContext(object):
   """
 
   def __init__(self, strategy, replica_id_in_sync_group):
-    self._distribution_strategy = strategy
+    self._strategy = strategy
     self._thread_context = distribution_strategy_context._InReplicaThreadMode(  # pylint: disable=protected-access
         self)
     self._replica_id_in_sync_group = replica_id_in_sync_group
@@ -1513,17 +1513,16 @@ class ReplicaContext(object):
   def _merge_call(self, merge_fn, args, kwargs):
     """Default implementation for single replica."""
     _push_per_thread_mode(  # thread-local, so not needed with multiple threads
-        distribution_strategy_context._CrossReplicaThreadMode(  # pylint: disable=protected-access
-            self._distribution_strategy))
+        distribution_strategy_context._CrossReplicaThreadMode(self._strategy))  # pylint: disable=protected-access
     try:
-      return merge_fn(self._distribution_strategy, *args, **kwargs)
+      return merge_fn(self._strategy, *args, **kwargs)
     finally:
       _pop_per_thread_mode()
 
   @property
   def num_replicas_in_sync(self):
     """Returns number of replicas over which gradients are aggregated."""
-    return self._distribution_strategy.num_replicas_in_sync
+    return self._strategy.num_replicas_in_sync
 
   @property
   def replica_id_in_sync_group(self):
@@ -1534,13 +1533,13 @@ class ReplicaContext(object):
   @property
   @doc_controls.do_not_generate_docs  # DEPRECATED, use `strategy`
   def distribution_strategy(self):
-    """DEPRECATED: use `self.stratgey` instead."""
-    return self._distribution_strategy
+    """DEPRECATED: use `self.strategy` instead."""
+    return self._strategy
 
   @property
   def strategy(self):
     """The current `tf.distribute.Strategy` object."""
-    return self._distribution_strategy
+    return self._strategy
 
   @property
   def devices(self):
@@ -1574,11 +1573,11 @@ class _DefaultDistributionExtended(DistributionStrategyExtended):
 
   def _scope(self, strategy):
     """Context manager setting a variable creator and `self` as current."""
-    if distribution_strategy_context.has_distribution_strategy():
+    if distribution_strategy_context.has_strategy():
       raise RuntimeError("Must not nest tf.distribute.Strategy scopes.")
 
     def creator(next_creator, *args, **kwargs):
-      _require_distribution_strategy_scope_strategy(strategy)
+      _require_strategy_scope_strategy(strategy)
       return next_creator(*args, **kwargs)
 
     return _CurrentDistributionContext(
@@ -1586,7 +1585,7 @@ class _DefaultDistributionExtended(DistributionStrategyExtended):
 
   def colocate_vars_with(self, colocate_with_variable):
     """Does not require `self.scope`."""
-    _require_distribution_strategy_scope_extended(self)
+    _require_strategy_scope_extended(self)
     return ops.colocate_with(colocate_with_variable)
 
   def _distribute_dataset(self, dataset_fn):
@@ -1695,7 +1694,7 @@ _original_from_proto = resource_variable_ops._from_proto_fn
 
 
 def _from_proto_fn(v, import_scope=None):
-  if distribution_strategy_context.has_distribution_strategy():
+  if distribution_strategy_context.has_strategy():
     raise NotImplementedError(
         "Deserialization of variables is not yet supported when using a "
         "tf.distribute.Strategy.")
diff --git a/tensorflow/python/distribute/distribute_lib_test.py b/tensorflow/python/distribute/distribute_lib_test.py
index d63d1fe3c3..c147849e5d 100644
--- a/tensorflow/python/distribute/distribute_lib_test.py
+++ b/tensorflow/python/distribute/distribute_lib_test.py
@@ -64,9 +64,9 @@ def _assert_in_default_state(t):
              distribution_strategy_context.get_replica_context())
   t.assertIs(None, distribution_strategy_context.get_cross_replica_context())
   t.assertFalse(distribution_strategy_context.in_cross_replica_context())
-  t.assertIs(distribution_strategy_context._get_default_distribution_strategy(),
-             distribution_strategy_context.get_distribution_strategy())
-  t.assertFalse(distribution_strategy_context.has_distribution_strategy())
+  t.assertIs(distribution_strategy_context._get_default_strategy(),
+             distribution_strategy_context.get_strategy())
+  t.assertFalse(distribution_strategy_context.has_strategy())
 
 
 class TestStrategyTest(test.TestCase):
@@ -81,9 +81,9 @@ class TestStrategyTest(test.TestCase):
       self.assertIs(None,
                     distribution_strategy_context.get_cross_replica_context())
       self.assertFalse(distribution_strategy_context.in_cross_replica_context())
-      self.assertTrue(distribution_strategy_context.has_distribution_strategy())
+      self.assertTrue(distribution_strategy_context.has_strategy())
       self.assertIs(dist,
-                    distribution_strategy_context.get_distribution_strategy())
+                    distribution_strategy_context.get_strategy())
       self.assertEqual("foo", replica_context.merge_call(None, test_arg="foo"))
       expected_value = _get_test_variable(
           "bar", variable_scope.VariableSynchronization.AUTO,
@@ -105,9 +105,9 @@ class TestStrategyTest(test.TestCase):
       self.assertIs(dist,
                     distribution_strategy_context.get_cross_replica_context())
       self.assertTrue(distribution_strategy_context.in_cross_replica_context())
-      self.assertTrue(distribution_strategy_context.has_distribution_strategy())
+      self.assertTrue(distribution_strategy_context.has_strategy())
       self.assertIs(dist,
-                    distribution_strategy_context.get_distribution_strategy())
+                    distribution_strategy_context.get_strategy())
       expected_value = _get_test_variable(
           "baz", variable_scope.VariableSynchronization.AUTO,
           variable_scope.VariableAggregation.NONE)
@@ -139,16 +139,16 @@ class DefaultDistributionStrategyTest(test.TestCase):
 
     def merge_fn(dist, s):
       self.assertIs(
-          distribution_strategy_context._get_default_distribution_strategy(),
+          distribution_strategy_context._get_default_strategy(),
           dist)
       self.assertIs(None, distribution_strategy_context.get_replica_context())
       self.assertIs(dist,
                     distribution_strategy_context.get_cross_replica_context())
       self.assertTrue(distribution_strategy_context.in_cross_replica_context())
       self.assertIs(dist,
-                    distribution_strategy_context.get_distribution_strategy())
+                    distribution_strategy_context.get_strategy())
       self.assertFalse(
-          distribution_strategy_context.has_distribution_strategy())
+          distribution_strategy_context.has_strategy())
       return "foo_" + s
 
     replica_ctx = distribution_strategy_context.get_replica_context()
diff --git a/tensorflow/python/distribute/distribution_strategy_context.py b/tensorflow/python/distribute/distribution_strategy_context.py
index 78e096e286..e6648bf7c4 100644
--- a/tensorflow/python/distribute/distribution_strategy_context.py
+++ b/tensorflow/python/distribute/distribution_strategy_context.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utility to get distribution strategy related contexts."""
+"""Utility to get tf.distribute.Strategy related contexts."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -31,29 +31,27 @@ distribute_lib = LazyLoader(
 
 # ------------------------------------------------------------------------------
 # Internal API for setting the current thread mode as being either in a
-# replica or cross-replica context for a particular distribution strategy.
+# replica or cross-replica context for a particular tf.distribute.Strategy.
 
 
 class _ThreadMode(object):
 
   def __init__(self, dist, cross, replica):
-    self.distribution_strategy = dist
+    self.strategy = dist
     self.cross_replica_context = cross
     self.replica_context = replica
 
 
 class _CrossReplicaThreadMode(_ThreadMode):
 
-  def __init__(self, distribution_strategy):
-    _ThreadMode.__init__(
-        self, distribution_strategy, distribution_strategy, None)
+  def __init__(self, strategy):
+    _ThreadMode.__init__(self, strategy, strategy, None)
 
 
 class _InReplicaThreadMode(_ThreadMode):
 
   def __init__(self, replica_ctx):
-    _ThreadMode.__init__(
-        self, replica_ctx.distribution_strategy, None, replica_ctx)
+    _ThreadMode.__init__(self, replica_ctx.strategy, None, replica_ctx)
 
 
 def _push_per_thread_mode(context):
@@ -71,7 +69,7 @@ class _DefaultReplicaThreadMode(_ThreadMode):
   """
 
   def __init__(self):
-    _ThreadMode.__init__(self, _get_default_distribution_strategy(), None,
+    _ThreadMode.__init__(self, _get_default_strategy(), None,
                          _get_default_replica_context())
 
 
@@ -129,7 +127,7 @@ def get_cross_replica_context():
   """Returns the current tf.distribute.Strategy if in a cross-replica context.
 
   DEPRECATED: Please use `in_cross_replica_context()` and
-  `get_distribution_strategy()` instead.
+  `get_strategy()` instead.
 
   Note that execution:
 
@@ -174,7 +172,7 @@ def in_cross_replica_context():
 
 
 @tf_export("distribute.get_strategy")
-def get_distribution_strategy():
+def get_strategy():
   """Returns the current `tf.distribute.Strategy` object.
 
   Typically only used in a cross-replica context:
@@ -186,47 +184,45 @@ def get_distribution_strategy():
   ```
 
   Returns:
-    A `tf.distribute.Strategy` object. Inside a
-    `with distribution_strategy.scope()` block, it returns
-    `distribution_strategy`, otherwise it returns the default
-    (single-replica) `tf.distribute.Strategy` object.
+    A `tf.distribute.Strategy` object. Inside a `with strategy.scope()` block,
+    it returns `strategy`, otherwise it returns the default (single-replica)
+    `tf.distribute.Strategy` object.
   """
-  return _get_per_thread_mode().distribution_strategy
+  return _get_per_thread_mode().strategy
 
 
 @tf_export("distribute.has_strategy")
-def has_distribution_strategy():
+def has_strategy():
   """Return if there is a current non-default `tf.distribute.Strategy`.
 
   Returns:
     True if inside a `with strategy.scope():`.
   """
-  return get_distribution_strategy() is not _get_default_distribution_strategy()
+  return get_strategy() is not _get_default_strategy()
 
 
 # ------------------------------------------------------------------------------
-# Defaults that are used when no distribution strategy is explicitly created.
+# Defaults that are used when no tf.distribute.Strategy is explicitly created.
 # We create them lazily in a function so that we can workaround the circular
 # dependency on distribute_lib. See lazy loader at the top of this file.
 
 _defaults = {
-    "distribution_strategy": None,
+    "strategy": None,
     "replica_context": None,
     "replica_mode": None
 }
 
 
-def _get_default_distribution_strategy():
-  if _defaults["distribution_strategy"] is None:
-    _defaults["distribution_strategy"] = (
-        distribute_lib._DefaultDistributionStrategy())  # pylint: disable=protected-access
-  return _defaults["distribution_strategy"]
+def _get_default_strategy():
+  if _defaults["strategy"] is None:
+    _defaults["strategy"] = distribute_lib._DefaultDistributionStrategy()  # pylint: disable=protected-access
+  return _defaults["strategy"]
 
 
 def _get_default_replica_context():
   if _defaults["replica_context"] is None:
     _defaults["replica_context"] = distribute_lib.ReplicaContext(
-        _get_default_distribution_strategy(), replica_id_in_sync_group=0)
+        _get_default_strategy(), replica_id_in_sync_group=0)
   return _defaults["replica_context"]
 
 
@@ -234,3 +230,8 @@ def _get_default_replica_mode():
   if _defaults["replica_mode"] is None:
     _defaults["replica_mode"] = _DefaultReplicaThreadMode()
   return _defaults["replica_mode"]
+
+
+# Aliases for compatibility with old names.
+get_distribution_strategy = get_strategy
+has_distribution_strategy = has_strategy
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index 60b5232e16..4f29d916c4 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -886,5 +886,4 @@ class MirroredReplicaContext(distribute_lib.ReplicaContext):
   def devices(self):
     distribute_lib.require_replica_context(self)
     replica_id = tensor_util.constant_value(self._replica_id_in_sync_group)
-    extended = self._distribution_strategy.extended
-    return extended.worker_devices_by_replica[replica_id]
+    return [self._strategy.extended.worker_devices_by_replica[replica_id]]
-- 
GitLab


From a0e2d093973904208bb2e53b3e29c9fbc219a3fe Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 19 Dec 2018 11:00:03 -0800
Subject: [PATCH 828/873] Fix a compiler warning in TF core triggered
 frequently when building XLA's Python bindings on Mac OS X.

PiperOrigin-RevId: 226199843
---
 tensorflow/core/framework/node_def_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/framework/node_def_util.h b/tensorflow/core/framework/node_def_util.h
index 4e4a5c38d5..619d44e65b 100644
--- a/tensorflow/core/framework/node_def_util.h
+++ b/tensorflow/core/framework/node_def_util.h
@@ -29,7 +29,7 @@ limitations under the License.
 namespace tensorflow {
 
 class Node;
-class NodeDebugInfo;
+struct NodeDebugInfo;
 
 // We forward declare protos so that kernels don't need to depend on them
 class NodeDef;
-- 
GitLab


From f66961fe7987bbb2c41c8dec56bb80990c465701 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Wed, 19 Dec 2018 11:11:31 -0800
Subject: [PATCH 829/873] Add GPU implementation for FusedConv2D:
 Conv+BiasAdd+Relu

PiperOrigin-RevId: 226202011
---
 .../core/grappler/optimizers/remapper.cc      |   1 +
 tensorflow/core/kernels/BUILD                 |   4 +-
 tensorflow/core/kernels/conv_ops_fused.cc     | 797 ++++++++++++++----
 tensorflow/core/kernels/conv_ops_test.cc      | 185 ++--
 tensorflow/core/ops/nn_ops.cc                 |   1 +
 5 files changed, 768 insertions(+), 220 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc
index 3fb3f2b0ec..f0c81f29e6 100644
--- a/tensorflow/core/grappler/optimizers/remapper.cc
+++ b/tensorflow/core/grappler/optimizers/remapper.cc
@@ -366,6 +366,7 @@ void CopyConv2DAttributes(const NodeDef* conv2d, NodeDef* fused_conv2d,
   (*attr)["padding"] = src_attr.at("padding");
   (*attr)["dilations"] = src_attr.at("dilations");
   (*attr)["data_format"] = src_attr.at("data_format");
+  (*attr)["use_cudnn_on_gpu"] = src_attr.at("use_cudnn_on_gpu");
 
   auto* fused_ops_attr = (*attr)["fused_ops"].mutable_list();
   for (const string& fused_op : fused_ops) {
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 73c11dab92..74a6d5f6bb 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1272,7 +1272,7 @@ tf_cc_test(
     }),
 )
 
-tf_cc_test(
+tf_cuda_cc_test(
     name = "conv_ops_test",
     size = "medium",
     srcs = ["conv_ops_test.cc"],
@@ -1291,6 +1291,7 @@ tf_cc_test(
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "//tensorflow/core:testlib",
+        "@com_google_absl//absl/algorithm:container",
     ],
 )
 
@@ -3640,6 +3641,7 @@ tf_kernel_library(
         ":image_resizer_state",
         ":fill_functor",
         ":ops_util",
+        "@com_google_absl//absl/strings",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
diff --git a/tensorflow/core/kernels/conv_ops_fused.cc b/tensorflow/core/kernels/conv_ops_fused.cc
index 798a7325cd..fd0c565677 100644
--- a/tensorflow/core/kernels/conv_ops_fused.cc
+++ b/tensorflow/core/kernels/conv_ops_fused.cc
@@ -22,11 +22,25 @@ limitations under the License.
 //
 // Kernels for convolutions fused with image transformations (resize and mirror
 // padding) defined in `conv_ops_fused_image_transform.cc`.
+//
+// For the CPU device we implement fusion with an Eigen tensor contraction
+// output kernel. For the GPU device we rely on CuDNN primitives.
+//
+// NOTE: GPU only supports fusion of Conv2D + BiasAdd + <optional Relu>.
 
+#define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
+#if GOOGLE_CUDA
+#define EIGEN_USE_GPU
+#endif  // GOOGLE_CUDA
+
 #include <string>
 #include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/substitute.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/register_types.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -36,15 +50,50 @@ limitations under the License.
 #include "tensorflow/core/kernels/conv_ops.h"
 #include "tensorflow/core/kernels/ops_util.h"
 #include "tensorflow/core/util/tensor_format.h"
+#include "tensorflow/core/util/use_cudnn.h"
+
+#if GOOGLE_CUDA
+#include "cuda/include/cudnn.h"
+#include "tensorflow/core/kernels/conv_ops_gpu.h"
+#include "tensorflow/core/platform/stream_executor.h"
+#endif  // GOOGLE_CUDA
 
 namespace tensorflow {
 namespace {
 
-typedef Eigen::ThreadPoolDevice CPUDevice;
+using CPUDevice = ::Eigen::ThreadPoolDevice;
+using GPUDevice = ::Eigen::GpuDevice;
+
+// Supported Conv2D fusions. Not all of them supported on all type of devices.
+enum class FusedComputationType {
+  // NOTE(ezhulenev): CuDNN `cudnnConvolutionBiasActivationForward` supports
+  // identity activation function, it in theory should allow to fuse convolution
+  // with BiasAdd, but in practice it doesn't work, cuDNN ignores this parameter
+  // and always does Relu activation.
+  kBiasAdd,                // CPU
+  kBiasAddWithRelu,        // CPU and GPU
+  kFusedBatchNorm,         // CPU only
+  kFusedBatchNormWithRelu  // CPU only
+};
+
+// We have to pass around additional arguments for all possible fusion types.
+struct FusedComputationArgs {
+  float epsilon = 0.0;  // Used by `FusedBatchNorm` fusion only
+};
+
+template <typename Device, typename T>
+struct LaunchFusedConv2DOp {
+  void operator()(OpKernelContext* context, bool use_cudnn,
+                  bool cudnn_use_autotune, const Tensor& input,
+                  const Tensor& filter, FusedComputationType fusion,
+                  const FusedComputationArgs& fusion_args,
+                  const Conv2DParameters& params,
+                  const Conv2DDimensions& dimensions, Tensor* output);
+};
 
 // Type aliases for the unaligned tensors (tensor maps) used in output kernels.
 template <typename T>
-struct OutputTypes {
+struct Unaligned {
   // There is no guarantee that the output block passed to the output kernel
   // will be aligned.
 
@@ -99,8 +148,8 @@ struct Relu {
 // depends only on a channel value (e.g. add channel bias).
 
 // Output kernel that fuses BiasAdd operation into the output of tensor
-// contraction + any other transformation defined by Transform.
-template <typename T, typename Transform = Identity>
+// contraction + activation function defined by Activation.
+template <typename T, typename Activation = Identity>
 struct BiasAddOutputKernel {
   explicit BiasAddOutputKernel(const T* bias_data) : bias_data(bias_data) {}
 
@@ -112,13 +161,13 @@ struct BiasAddOutputKernel {
     DCHECK(params.swapped_arguments);
 
     const T* bias_base = bias_data + i;
-    typename OutputTypes<T>::ConstTensor bias(bias_base, num_rows);
+    typename Unaligned<T>::ConstTensor bias(bias_base, num_rows);
 
     for (int col = 0; col < num_cols; ++col) {
       T* output_base = &output_mapper(0, col);
-      typename OutputTypes<T>::Tensor output(output_base, num_rows);
+      typename Unaligned<T>::Tensor output(output_base, num_rows);
       const auto expr = output + bias;
-      output = Transform::template apply<decltype(expr)>(expr);
+      output = Activation::template apply<decltype(expr)>(expr);
     }
   }
 
@@ -127,8 +176,8 @@ struct BiasAddOutputKernel {
 };
 
 // Output kernel that fuses FusedBatchNorm operation into the output of tensor
-// contraction + any other transformation defined by Transform.
-template <typename T, typename Transform = Identity>
+// contraction + activation function defined by Activation.
+template <typename T, typename Activation = Identity>
 struct FusedBatchNormOutputKernel {
   FusedBatchNormOutputKernel(T epsilon, const T* scaling_factor_data,
                              const T* offset_data, const T* estimated_mean_data)
@@ -148,19 +197,19 @@ struct FusedBatchNormOutputKernel {
     const T* offset_base = offset_data + i;
     const T* mean_base = estimated_mean_data + i;
 
-    typename OutputTypes<T>::ConstTensor scaling_factor(scaling_factor_base,
-                                                        num_rows);
-    typename OutputTypes<T>::ConstTensor offset(offset_base, num_rows);
-    typename OutputTypes<T>::ConstTensor mean(mean_base, num_rows);
+    typename Unaligned<T>::ConstTensor scaling_factor(scaling_factor_base,
+                                                      num_rows);
+    typename Unaligned<T>::ConstTensor offset(offset_base, num_rows);
+    typename Unaligned<T>::ConstTensor mean(mean_base, num_rows);
 
     for (int col = 0; col < num_cols; ++col) {
       T* output_base = &output_mapper(0, col);
-      typename OutputTypes<T>::Tensor output(output_base, num_rows);
+      typename Unaligned<T>::Tensor output(output_base, num_rows);
 
       auto scaled = (output - mean) * scaling_factor;
       auto shifted = scaled + offset;
 
-      output = Transform::template apply<decltype(shifted)>(shifted);
+      output = Activation::template apply<decltype(shifted)>(shifted);
     }
   }
 
@@ -182,16 +231,18 @@ using WithFusedBatchNorm = FusedBatchNormOutputKernel<T>;
 template <typename T>
 using WithFusedBatchNormAndRelu = FusedBatchNormOutputKernel<T, Relu>;
 
+// This is CPU-only implementation that uses Eigen contraction output kernels.
+//
 // Dispatch 2D convolution to the appropriate primitive operation:
 //   (1) MatMul for the case of 1x1 convolution.
 //   (2) MatMul for the case when filter size equals to the input size.
 //   (3) General spatial 2D convolution for all other cases.
 template <typename T>
-class LaunchConv2DWithOutputKernel {
+class LaunchFusedConv2DWithOutputKernel {
  public:
-  LaunchConv2DWithOutputKernel(int row_stride, int col_stride,      //
-                               int row_dilation, int col_dilation,  //
-                               Padding padding)
+  LaunchFusedConv2DWithOutputKernel(int row_stride, int col_stride,      //
+                                    int row_dilation, int col_dilation,  //
+                                    Padding padding)
       : row_stride_(row_stride),
         col_stride_(col_stride),
         row_dilation_(row_dilation),
@@ -251,118 +302,29 @@ class LaunchConv2DWithOutputKernel {
   const Padding padding_;
 };
 
-}  // namespace
-
-// Conv2D op with fused output kernels. Supports only CPUDevice.
 template <typename T>
-class FusedConv2DOp : public OpKernel {
- public:
-  explicit FusedConv2DOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, InitConv2DParameters(context, &params_));
-
-    // 'fused_ops' and 'num_args' attributes are specified by the Grappler
-    // Remapper optimizer.
-
-    std::vector<string> fused_ops;
-    OP_REQUIRES_OK(context, context->GetAttr("fused_ops", &fused_ops));
-    OP_REQUIRES(context, !fused_ops.empty(),
-                errors::InvalidArgument(
-                    "Fused Conv2D must have at least one fused op."));
-
-    int num_args;
-    OP_REQUIRES_OK(context, context->GetAttr("num_args", &num_args));
-
-    // TODO(ezhulenev): Add support for fusion element-wise op chains defined
-    // at runtime, e.g. Relu+Sqrt+Tanh+etc...
-
-    // Match combination of fused ops to one of the supported fusions.
-    if (FusedOpsMatches(fused_ops, {"BiasAdd"})) {
-      fused_computation_ = FusedComputationType::kBiasAdd;
-    } else if (FusedOpsMatches(fused_ops, {"BiasAdd", "Relu"})) {
-      fused_computation_ = FusedComputationType::kBiasAddWithRelu;
-    } else if (FusedOpsMatches(fused_ops, {"FusedBatchNorm"})) {
-      fused_computation_ = FusedComputationType::kFusedBatchNorm;
-    } else if (FusedOpsMatches(fused_ops, {"FusedBatchNorm", "Relu"})) {
-      fused_computation_ = FusedComputationType::kFusedBatchNormWithRelu;
-    } else {
-      OP_REQUIRES(context, false,
-                  errors::Unimplemented("Fusion is not implemented: [",
-                                        str_util::Join(fused_ops, ","), "]"));
-    }
-
-    // Depending on a picked fusion type validate fusion-specific arguments.
-
-    if (fused_computation_ == FusedComputationType::kBiasAdd ||
-        fused_computation_ == FusedComputationType::kBiasAddWithRelu) {
-      OP_REQUIRES(context, num_args == 1,
-                  errors::InvalidArgument(
-                      "Fused Conv2D must have one extra argument: bias."));
-    }
-
-    if (fused_computation_ == FusedComputationType::kFusedBatchNorm ||
-        fused_computation_ == FusedComputationType::kFusedBatchNormWithRelu) {
-      OP_REQUIRES(
-          context, num_args == 4,
-          errors::InvalidArgument("Fused FusedBatchNorm must have four extra "
-                                  "arguments: scale, offset, mean, variance."));
-      OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon_));
-    }
-  }
-
-  void Compute(OpKernelContext* context) override {
-    // Input tensor is of the following dimensions:
-    // [ batch, in_rows, in_cols, in_depth ]
-    const Tensor& input = context->input(0);
-
-    // Input filter is of the following dimensions:
-    // [ filter_rows, filter_cols, in_depth, out_depth]
-    const Tensor& filter = context->input(1);
-
-    Conv2DDimensions dimensions;
-    OP_REQUIRES_OK(context,
-                   ComputeConv2DDimension(params_, input, filter, &dimensions));
-
-    TensorShape out_shape = ShapeFromFormat(
-        params_.data_format, dimensions.batch, dimensions.out_rows,
-        dimensions.out_cols, dimensions.out_depth);
-
-    // Output tensor is of the following dimensions:
-    // [ in_batch, out_rows, out_cols, out_depth ]
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
-
-    VLOG(2) << "FusedConv2DWithBias: in_depth = " << dimensions.in_depth
-            << ", patch_depth = " << dimensions.patch_depth
-            << ", input_cols = " << dimensions.input_cols
-            << ", filter_cols = " << dimensions.filter_cols
-            << ", input_rows = " << dimensions.input_rows
-            << ", filter_rows = " << dimensions.filter_rows
-            << ", stride_rows = " << dimensions.stride_rows
-            << ", stride_cols = " << dimensions.stride_cols
-            << ", dilation_rows = " << dimensions.dilation_rows
-            << ", dilation_cols = " << dimensions.dilation_cols
-            << ", out_depth = " << dimensions.out_depth;
-
-    // If there is nothing to compute, return.
-    if (out_shape.num_elements() == 0) {
-      return;
-    }
-
-    OP_REQUIRES(context, params_.data_format == FORMAT_NHWC,
-                errors::Unimplemented("Fused conv implementation only supports "
-                                      "NHWC tensor format for now."));
+struct LaunchFusedConv2DOp<CPUDevice, T> {
+  void operator()(OpKernelContext* context, bool use_cudnn,
+                  bool cudnn_use_autotune, const Tensor& input,
+                  const Tensor& filter, const FusedComputationType fusion,
+                  const FusedComputationArgs& fusion_args,
+                  const Conv2DParameters& params,
+                  const Conv2DDimensions& dimensions, Tensor* output) {
     OP_REQUIRES(context, dimensions.in_depth == filter.dim_size(2),
                 errors::Unimplemented("Fused conv implementation does not "
                                       "support grouped convolutions for now."));
+    OP_REQUIRES(context, params.data_format == FORMAT_NHWC,
+                errors::Unimplemented("Fused conv implementation only supports "
+                                      "NHWC tensor format for now."));
 
     BiasAddArgs bias_add;
     FusedBatchNormArgs fused_batch_norm;
 
-    LaunchConv2DWithOutputKernel<T> conv2d(
+    LaunchFusedConv2DWithOutputKernel<T> conv2d(
         dimensions.stride_rows, dimensions.stride_cols,
-        dimensions.dilation_rows, dimensions.dilation_cols, params_.padding);
+        dimensions.dilation_rows, dimensions.dilation_cols, params.padding);
 
-    switch (fused_computation_) {
+    switch (fusion) {
       case FusedComputationType::kBiasAdd:
         OP_REQUIRES_OK(context, InitBiasAddArgs(context, &bias_add));
         conv2d(WithBiasAdd<T>(bias_add.bias_add_data), context, input, filter,
@@ -377,8 +339,9 @@ class FusedConv2DOp : public OpKernel {
 
       case FusedComputationType::kFusedBatchNorm:
         OP_REQUIRES_OK(context,
-                       InitFusedBatchNormArgs(context, &fused_batch_norm));
-        conv2d(WithFusedBatchNorm<T>(epsilon_,
+                       InitFusedBatchNormArgs(context, fusion_args.epsilon,
+                                              &fused_batch_norm));
+        conv2d(WithFusedBatchNorm<T>(fusion_args.epsilon,
                                      fused_batch_norm.scaling_factor.data(),
                                      fused_batch_norm.offset_data,
                                      fused_batch_norm.estimated_mean_data),
@@ -387,9 +350,10 @@ class FusedConv2DOp : public OpKernel {
 
       case FusedComputationType::kFusedBatchNormWithRelu:
         OP_REQUIRES_OK(context,
-                       InitFusedBatchNormArgs(context, &fused_batch_norm));
+                       InitFusedBatchNormArgs(context, fusion_args.epsilon,
+                                              &fused_batch_norm));
         conv2d(WithFusedBatchNormAndRelu<T>(
-                   epsilon_, fused_batch_norm.scaling_factor.data(),
+                   fusion_args.epsilon, fused_batch_norm.scaling_factor.data(),
                    fused_batch_norm.offset_data,
                    fused_batch_norm.estimated_mean_data),
                context, input, filter, output);
@@ -398,11 +362,6 @@ class FusedConv2DOp : public OpKernel {
   }
 
  private:
-  bool FusedOpsMatches(const std::vector<string>& fused_ops,
-                       const std::vector<string>& expected) const {
-    return fused_ops == expected;
-  }
-
   struct BiasAddArgs {
     const T* bias_add_data = nullptr;
   };
@@ -438,7 +397,7 @@ class FusedConv2DOp : public OpKernel {
     return Status::OK();
   }
 
-  Status InitFusedBatchNormArgs(OpKernelContext* context,
+  Status InitFusedBatchNormArgs(OpKernelContext* context, float epsilon,
                                 FusedBatchNormArgs* args) const {
     const Tensor& scale = context->input(2);
     const Tensor& offset = context->input(3);
@@ -466,44 +425,590 @@ class FusedConv2DOp : public OpKernel {
 
     // Precompute scaling factor once for all output blocks (kernels).
     args->scaling_factor =
-        (estimated_variance.flat<T>() + static_cast<T>(epsilon_)).rsqrt() *
+        (estimated_variance.flat<T>() + static_cast<T>(epsilon)).rsqrt() *
         scale.flat<T>();
 
     return Status::OK();
   }
 
 #undef TF_REQUIRES
+};
 
-  // Element-wise ops applied to the result of Conv2D.
-  // TODO(ezhulenev): Add support for runtime-defined op chains.
-  enum class FusedComputationType {
-    kBiasAdd,
-    kBiasAddWithRelu,
-    kFusedBatchNorm,
-    kFusedBatchNormWithRelu
-  };
+#if GOOGLE_CUDA
+
+// Encapsulate the default shape information that is used by the convolution
+// operation, and add an activation mode for the fusion.
+class FusedConvParameters : public ConvParameters {
+ public:
+  FusedConvParameters(const ConvParameters& base,
+                      const se::dnn::ActivationMode activation_mode)
+      : ConvParameters(base), activation_mode_(activation_mode) {}
+
+  string ToString() const {
+    return absl::StrCat(ConvParameters::ToString(), ", ", activation_mode_);
+  }
+
+ private:
+  friend bool operator==(const FusedConvParameters& lhs,
+                         const FusedConvParameters& rhs);
+
+  using ParameterDataType =
+      std::tuple<ConvParameters::ParameterDataType, se::dnn::ActivationMode>;
+
+  ParameterDataType get_data_as_tuple() const {
+    return std::make_tuple(ConvParameters::get_data_as_tuple(),
+                           activation_mode_);
+  }
+
+  se::dnn::ActivationMode activation_mode_;
+};
+
+bool operator==(const FusedConvParameters& lhs,
+                const FusedConvParameters& rhs) {
+  return lhs.get_data_as_tuple() == rhs.get_data_as_tuple();
+}
+
+bool operator!=(const FusedConvParameters& lhs,
+                const FusedConvParameters& rhs) {
+  return !(lhs == rhs);
+}
+
+// A dummy type to group forward convolution autotune results together.
+struct FusedConvAutoTuneGroup {
+  static string name() { return "FusedConv"; }
+};
+
+using AutoTuneFusedConv =
+    AutoTuneSingleton<FusedConvAutoTuneGroup, FusedConvParameters,
+                      se::dnn::AlgorithmConfig>;
+
+int64 ConvolveScratchSize() {
+  static int64 convolve_scratch_size = GetDnnWorkspaceLimit(
+      // default value is in bytes despite the name of the environment variable
+      "TF_CUDNN_WORKSPACE_LIMIT_IN_MB", 1LL << 32  // 4GB
+  );
+  return convolve_scratch_size;
+}
+
+// Finds the best convolutiun algorithm for the given ConvLaunch (cuda
+// convolution on the stream) and parameters, by running all possible
+// algorithms and measuring execution time.
+// TODO(ezhulenev): Move it to conv_ops_gpu.h and share with conv_ops.cc.
+template <typename T, typename ConvLaunch>
+Status FindBestConvolveAlgorithm(const FusedConvParameters& params,
+                                 const ConvLaunch launch,
+                                 OpKernelContext* context, se::Stream* stream,
+                                 se::dnn::AlgorithmConfig* algorithm_config) {
+  // Check if we already have an algorithm selected for the given parameters.
+  if (AutoTuneFusedConv::GetInstance()->Find(params, algorithm_config)) {
+    return Status::OK();
+  }
+
+  // Find all candidate algorithms.
+  std::vector<se::dnn::AlgorithmDesc> algorithms;
+  if (!stream->parent()->GetConvolveAlgorithms(
+          params.ShouldIncludeWinogradNonfusedAlgo<T>(stream->parent()),
+          &algorithms)) {
+    return errors::Unknown(
+        "Failed to get convolution algorithm. This is probably "
+        "because cuDNN failed to initialize, so try looking to "
+        "see if a warning log message was printed above.");
+  }
+
+  se::dnn::ProfileResult best_result;
+  se::dnn::ProfileResult best_result_no_scratch;
+
+  for (auto profile_algorithm : algorithms) {
+    DnnScratchAllocator scratch_allocator(ConvolveScratchSize(), context);
+    se::dnn::ProfileResult profile_result;
+
+    bool cudnn_launch_status =
+        launch(se::dnn::AlgorithmConfig(profile_algorithm), &scratch_allocator,
+               &profile_result);
+
+    if (cudnn_launch_status && profile_result.is_valid()) {
+      if (profile_result.elapsed_time_in_ms() <
+          best_result.elapsed_time_in_ms()) {
+        best_result = profile_result;
+      }
+      if (scratch_allocator.TotalByteSize() == 0 &&
+          profile_result.elapsed_time_in_ms() <
+              best_result_no_scratch.elapsed_time_in_ms()) {
+        best_result_no_scratch = profile_result;
+      }
+    }
+  }
+
+  if (!best_result.is_valid() && !best_result_no_scratch.is_valid()) {
+    return errors::NotFound("No algorithm worked!");
+  }
+  if (best_result.is_valid()) {
+    algorithm_config->set_algorithm(best_result.algorithm());
+  }
+  if (best_result_no_scratch.is_valid()) {
+    algorithm_config->set_algorithm_no_scratch(
+        best_result_no_scratch.algorithm());
+  }
+
+  AutoTuneFusedConv::GetInstance()->Insert(params, *algorithm_config);
+  return Status::OK();
+}
+
+template <typename T>
+struct LaunchFusedConv2DOp<GPUDevice, T> {
+  void operator()(OpKernelContext* context, bool use_cudnn,
+                  bool cudnn_use_autotune, const Tensor& input_param,
+                  const Tensor& filter, FusedComputationType fusion,
+                  const FusedComputationArgs& fusion_args,
+                  const Conv2DParameters& params,
+                  const Conv2DDimensions& dimensions, Tensor* output) {
+    OP_REQUIRES(
+        context,
+        params.data_format == FORMAT_NHWC || params.data_format == FORMAT_NCHW,
+        errors::Unimplemented("Fused conv implementation only supports "
+                              "NHWC and HCHW tensor formats for now."));
+
+    auto* stream = context->op_device_context()->stream();
+    OP_REQUIRES(context, stream, errors::Internal("No GPU stream available."));
+    OP_REQUIRES(
+        context, use_cudnn,
+        errors::Unimplemented("FusedConv2D for GPU is not currently supported "
+                              "without cudnn"));
+
+    OP_REQUIRES(
+        context, fusion == FusedComputationType::kBiasAddWithRelu,
+        errors::Unimplemented("FusedConv2D implementation only supports "
+                              "fusing with `BiasAdd + Relu` for now."));
+
+    Tensor input = input_param;
+
+    const int64 in_batch = GetTensorDim(input, params.data_format, 'N');
+    int64 in_rows = GetTensorDim(input, params.data_format, 'H');
+    int64 in_cols = GetTensorDim(input, params.data_format, 'W');
+    const int64 in_depths = GetTensorDim(input, params.data_format, 'C');
+
+    const int64 patch_rows = filter.dim_size(0);
+    const int64 patch_cols = filter.dim_size(1);
+    const int64 patch_depths = filter.dim_size(2);
+
+    int64 padding_rows = 0;
+    int64 padding_cols = 0;
+    const int64 out_batch = GetTensorDim(*output, params.data_format, 'N');
+    const int64 out_rows = GetTensorDim(*output, params.data_format, 'H');
+    const int64 out_cols = GetTensorDim(*output, params.data_format, 'W');
+    const int64 out_depths = GetTensorDim(*output, params.data_format, 'C');
+
+    // Bias of the following dimensions: [ output_depth ]
+    const Tensor& bias = context->input(2);
+    OP_REQUIRES(context, bias.dims() == 1,
+                errors::InvalidArgument("bias must be 1-dimensional",
+                                        bias.shape().DebugString()));
+    OP_REQUIRES(context, bias.dim_size(0) == out_depths,
+                errors::InvalidArgument("bias depth must be equal to out depth",
+                                        bias.shape().DebugString()));
+
+    if (params.padding == SAME) {
+      // Total padding on rows and cols is
+      // Pr = (R' - 1) * S + (Kr - 1) * Dr + 1 - R
+      // Pc = (C' - 1) * S + (Kc - 1) * Dc + 1 - C
+      // where (R', C') are output dimensions, (R, C) are input dimensions, S
+      // is stride, (Dr, Dc) are dilations, (Kr, Kc) are filter dimensions.
+      // We pad Pr/2 on the left and Pr - Pr/2 on the right, Pc/2 on the top
+      // and Pc - Pc/2 on the bottom.  When Pr or Pc is odd, this means
+      // we pad more on the right and bottom than on the top and left.
+      padding_rows = std::max<int>(
+          0, (out_rows - 1) * dimensions.stride_rows +
+                 (patch_rows - 1) * dimensions.dilation_rows + 1 - in_rows);
+      padding_cols = std::max<int>(
+          0, (out_cols - 1) * dimensions.stride_cols +
+                 (patch_cols - 1) * dimensions.dilation_cols + 1 - in_cols);
+      const bool rows_odd = (padding_rows % 2 != 0);
+      const bool cols_odd = (padding_cols % 2 != 0);
+      if (rows_odd || cols_odd) {
+        Tensor transformed_input;
+        int64 new_in_rows = in_rows + rows_odd;
+        int64 new_in_cols = in_cols + cols_odd;
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(
+                           DataTypeToEnum<T>::value,
+                           ShapeFromFormat(params.data_format, in_batch,
+                                           new_in_rows, new_in_cols, in_depths),
+                           &transformed_input));
+
+        functor::PadInput<GPUDevice, T, int, 4>()(
+            context->eigen_device<GPUDevice>(),
+            To32Bit(input_param.tensor<T, 4>()), {{0, 0}},
+            {{rows_odd, cols_odd}}, To32Bit(transformed_input.tensor<T, 4>()),
+            params.data_format);
+
+        input = transformed_input;
+        in_rows = new_in_rows;
+        in_cols = new_in_cols;
+      }
+    }
+
+    if (params.data_format == FORMAT_NHWC) {
+      // Convert the input tensor from NHWC to NCHW.
+      TensorShape nchw_shape =
+          ShapeFromFormat(FORMAT_NCHW, in_batch, in_rows, in_cols, in_depths);
+      if (in_depths > 1) {
+        Tensor transformed_input;
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(DataTypeToEnum<T>::value,
+                                              nchw_shape, &transformed_input));
+        functor::NHWCToNCHW<GPUDevice, T, 4>()(
+            context->eigen_device<GPUDevice>(),
+            const_cast<const Tensor&>(input).tensor<T, 4>(),
+            transformed_input.tensor<T, 4>());
+        input = transformed_input;
+      } else {
+        // If depth <= 1, then just reshape.
+        CHECK(input.CopyFrom(input, nchw_shape));  // Crash OK
+      }
+    }
+
+    CHECK(padding_rows >= 0) << "Negative padding rows";  // Crash OK
+    CHECK(padding_cols >= 0) << "Negative padding cols";  // Crash OK
+
+    se::dnn::ActivationMode dnn_activation_mode;
+    switch (fusion) {
+      case FusedComputationType::kBiasAddWithRelu:
+        dnn_activation_mode = se::dnn::ActivationMode::kRelu;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported fusion type";  // Crash OK
+    }
+
+    se::dnn::BatchDescriptor input_desc;
+    input_desc.set_count(in_batch)
+        .set_feature_map_count(in_depths)
+        .set_height(in_rows)
+        .set_width(in_cols)
+        .set_layout(se::dnn::DataLayout::kBatchDepthYX);
+    se::dnn::FilterDescriptor filter_desc;
+    filter_desc.set_input_filter_height(patch_rows)
+        .set_input_filter_width(patch_cols)
+        .set_input_feature_map_count(patch_depths)
+        .set_output_feature_map_count(filter.dim_size(3));
+    se::dnn::BatchDescriptor bias_desc;
+    bias_desc.set_count(1)
+        .set_height(1)
+        .set_width(1)
+        .set_feature_map_count(out_depths)
+        .set_layout(se::dnn::DataLayout::kBatchDepthYX);
+    se::dnn::ConvolutionDescriptor conv_desc;
+    conv_desc.set_vertical_dilation_rate(dimensions.dilation_rows)
+        .set_horizontal_dilation_rate(dimensions.dilation_cols)
+        .set_vertical_filter_stride(dimensions.stride_rows)
+        .set_horizontal_filter_stride(dimensions.stride_cols)
+        .set_zero_padding_height(padding_rows / 2)
+        .set_zero_padding_width(padding_cols / 2)
+        .set_group_count(in_depths / patch_depths);
+    se::dnn::BatchDescriptor output_desc;
+    output_desc.set_count(out_batch)
+        .set_height(out_rows)
+        .set_width(out_cols)
+        .set_feature_map_count(out_depths)
+        .set_layout(se::dnn::DataLayout::kBatchDepthYX);
+
+    Tensor transformed_filter;
+    OP_REQUIRES_OK(context,
+                   context->allocate_temp(
+                       DataTypeToEnum<T>::value,
+                       TensorShape({filter.dim_size(3), filter.dim_size(2),
+                                    filter.dim_size(0), filter.dim_size(1)}),
+                       &transformed_filter));
+    functor::TransformFilter<GPUDevice, T, int, 4>()(
+        context->eigen_device<GPUDevice>(), FORMAT_OIHW,
+        To32Bit(filter.tensor<T, 4>()),
+        To32Bit(transformed_filter.tensor<T, 4>()));
+
+    Tensor transformed_output;
+    if (params.data_format == FORMAT_NHWC) {
+      // Only allocate temporary memory when a layout transformation is needed.
+      OP_REQUIRES_OK(context,
+                     context->allocate_temp(
+                         DataTypeToEnum<T>::value,
+                         ShapeFromFormat(FORMAT_NCHW, out_batch, out_rows,
+                                         out_cols, out_depths),
+                         &transformed_output));
+    } else {
+      transformed_output = *output;
+    }
+
+    const auto tensor_on_device = [](const Tensor& t) -> se::DeviceMemory<T> {
+      return AsDeviceMemory(t.template flat<T>().data(),
+                            t.template flat<T>().size());
+    };
+
+    se::DeviceMemory<T> input_ptr = tensor_on_device(input);
+    se::DeviceMemory<T> filter_ptr = tensor_on_device(transformed_filter);
+    se::DeviceMemory<T> bias_ptr = tensor_on_device(bias);
+    se::DeviceMemory<T> output_ptr = tensor_on_device(transformed_output);
+
+    // We do not use side inputs, so we can safely pass nullptr.
+    se::DeviceMemory<T> side_input_ptr =
+        AsDeviceMemory(static_cast<T*>(nullptr), 0);
+
+    int device_id = stream->parent()->device_ordinal();
+    DataType dtype = input.dtype();
+    FusedConvParameters conv_parameters = {
+        {
+            in_batch,                      // batch
+            in_depths,                     // in_depths
+            {{in_rows,                     // in_rows
+              in_cols}},                   // in_cols
+            FORMAT_NCHW,                   // compute_data_format
+            out_depths,                    // out_depths
+            {{patch_rows,                  // filter_rows
+              patch_cols,                  // filter_cols
+              patch_depths}},              // filter_depths
+            {{dimensions.dilation_rows,    // dilation_rows
+              dimensions.dilation_cols}},  // dilation_cols
+            {{dimensions.stride_rows,      // stride_rows
+              dimensions.stride_cols}},    // stride_cols
+            {{padding_rows,                // padding_rows
+              padding_cols}},              // padding_cols
+            dtype,                         // tensor datatype
+            device_id,                     // device_id
+        },
+        dnn_activation_mode  // activation_mode
+    };
+
+    // Launch fused convolution with given parameters and scratch allocator.
+    // Record profile result into `profile_result` if it's not nullptr.
+    const auto launch = [&](se::dnn::AlgorithmConfig algorithm_config,
+                            DnnScratchAllocator* scratch_allocator,
+                            se::dnn::ProfileResult* profile_result) -> bool {
+      return stream
+          ->ThenFusedConvolveWithAlgorithm(
+              input_desc, input_ptr,                     // input
+              /*conv_input_scale=*/1.0,                  // input_scale
+              filter_desc, filter_ptr,                   // filter
+              conv_desc,                                 // conv
+              side_input_ptr, /*side_input_scale=*/0.0,  // side_input
+              bias_desc, bias_ptr,                       // bias
+              dnn_activation_mode,                       // activation
+              output_desc, &output_ptr,                  // output
+              scratch_allocator, algorithm_config, profile_result)
+          .ok();
+    };
+
+    se::dnn::AlgorithmConfig algorithm_config;
+    if (cudnn_use_autotune) {
+      OP_REQUIRES_OK(context, FindBestConvolveAlgorithm<T>(
+                                  conv_parameters, launch, context, stream,
+                                  &algorithm_config));
+    }
+
+    DnnScratchAllocator scratch_allocator(ConvolveScratchSize(), context);
+    bool cudnn_launch_status = launch(algorithm_config, &scratch_allocator,
+                                      /*profile_result=*/nullptr);
+    OP_REQUIRES(
+        context, cudnn_launch_status,
+        errors::Internal(absl::Substitute(
+            "cuDNN launch failure: input shape($0) filter shape($1)",
+            input.shape().DebugString(), filter.shape().DebugString())));
+
+    // Convert the output tensor back from NCHW to NHWC.
+    if (params.data_format == FORMAT_NHWC) {
+      functor::NCHWToNHWC<GPUDevice, T, 4>()(
+          context->eigen_device<GPUDevice>(),
+          const_cast<const Tensor&>(transformed_output).tensor<T, 4>(),
+          output->tensor<T, 4>());
+    }
+  }
+};
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace
+
+template <typename Device, typename T>
+class FusedConv2DOp : public OpKernel {
+ public:
+  explicit FusedConv2DOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, InitConv2DParameters(context, &params_));
+
+    OP_REQUIRES_OK(context, context->GetAttr("use_cudnn_on_gpu", &use_cudnn_));
+    use_cudnn_ &= CanUseCudnn();
+    cudnn_use_autotune_ = CudnnUseAutotune();
+
+    // 'fused_ops' and 'num_args' attributes are specified by the Grappler
+    // Remapper optimizer (see grappler/optimizers/remapper.cc).
+
+    std::vector<string> fused_ops;
+    OP_REQUIRES_OK(context, context->GetAttr("fused_ops", &fused_ops));
+    OP_REQUIRES(context, !fused_ops.empty(),
+                errors::InvalidArgument(
+                    "Fused Conv2D must have at least one fused op."));
+
+    int num_args;
+    OP_REQUIRES_OK(context, context->GetAttr("num_args", &num_args));
+
+    // TODO(ezhulenev): Add support for fusion element-wise op chains defined
+    // at runtime, e.g. Relu+Sqrt+Tanh+etc.
+
+    // Match combination of fused ops to one of the supported fusions.
+    if (FusedOpsMatchAndSupportedOnDevice(fused_ops, {"BiasAdd"},
+                                          /*cpu_only=*/true)) {
+      fused_computation_ = FusedComputationType::kBiasAdd;
+    } else if (FusedOpsMatchAndSupportedOnDevice(fused_ops, {"BiasAdd", "Relu"},
+                                                 /*cpu_only=*/false)) {
+      fused_computation_ = FusedComputationType::kBiasAddWithRelu;
+    } else if (FusedOpsMatchAndSupportedOnDevice(fused_ops, {"FusedBatchNorm"},
+                                                 /*cpu_only=*/true)) {
+      fused_computation_ = FusedComputationType::kFusedBatchNorm;
+    } else if (FusedOpsMatchAndSupportedOnDevice(fused_ops,
+                                                 {"FusedBatchNorm", "Relu"},
+                                                 /*cpu_only=*/true)) {
+      fused_computation_ = FusedComputationType::kFusedBatchNormWithRelu;
+    } else {
+      OP_REQUIRES(context, false,
+                  errors::Unimplemented("Fusion is not implemented: [",
+                                        absl::StrJoin(fused_ops, ","), "]"));
+    }
+
+    // Depending on a picked fusion type validate fusion-specific arguments.
+
+    if (fused_computation_ == FusedComputationType::kBiasAdd ||
+        fused_computation_ == FusedComputationType::kBiasAddWithRelu) {
+      OP_REQUIRES(context, num_args == 1,
+                  errors::InvalidArgument(
+                      "Fused Conv2D must have one extra argument: bias."));
+    }
+
+    if (fused_computation_ == FusedComputationType::kFusedBatchNorm ||
+        fused_computation_ == FusedComputationType::kFusedBatchNormWithRelu) {
+      OP_REQUIRES(
+          context, num_args == 4,
+          errors::InvalidArgument("Fused FusedBatchNorm must have four extra "
+                                  "arguments: scale, offset, mean, variance."));
+      OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon_));
+    }
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // Input tensor is of the following dimensions:
+    // [ batch, in_rows, in_cols, in_depth ]
+    const Tensor& input = context->input(0);
+
+    // Input filter is of the following dimensions:
+    // [ filter_rows, filter_cols, in_depth, out_depth]
+    const Tensor& filter = context->input(1);
+
+    Conv2DDimensions dimensions;
+    OP_REQUIRES_OK(context,
+                   ComputeConv2DDimension(params_, input, filter, &dimensions));
+
+    TensorShape out_shape = ShapeFromFormat(
+        params_.data_format, dimensions.batch, dimensions.out_rows,
+        dimensions.out_cols, dimensions.out_depth);
+
+    // Output tensor is of the following dimensions:
+    // [ in_batch, out_rows, out_cols, out_depth ]
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(0, out_shape, &output));
+
+    VLOG(2) << "FusedConv2D: in_depth = " << dimensions.in_depth
+            << ", patch_depth = " << dimensions.patch_depth
+            << ", input_cols = " << dimensions.input_cols
+            << ", filter_cols = " << dimensions.filter_cols
+            << ", input_rows = " << dimensions.input_rows
+            << ", filter_rows = " << dimensions.filter_rows
+            << ", stride_rows = " << dimensions.stride_rows
+            << ", stride_cols = " << dimensions.stride_cols
+            << ", dilation_rows = " << dimensions.dilation_rows
+            << ", dilation_cols = " << dimensions.dilation_cols
+            << ", out_depth = " << dimensions.out_depth;
+
+    // If there is nothing to compute, return.
+    if (out_shape.num_elements() == 0) {
+      return;
+    }
+
+    FusedComputationArgs args;
+    args.epsilon = epsilon_;
+
+    LaunchFusedConv2DOp<Device, T>()(context, use_cudnn_, cudnn_use_autotune_,
+                                     input, filter, fused_computation_, args,
+                                     params_, dimensions, output);
+  }
+
+ private:
+  bool FusedOpsMatchAndSupportedOnDevice(const std::vector<string>& fused_ops,
+                                         const std::vector<string>& expected,
+                                         bool cpu_only) const {
+    if (std::is_same<Device, GPUDevice>::value && cpu_only) {
+      return false;
+    }
+    return fused_ops == expected;
+  }
 
   Conv2DParameters params_;
+  bool use_cudnn_;
+  bool cudnn_use_autotune_;
+
   FusedComputationType fused_computation_;
 
-  // FusedBatchNorm attributes.
-  float epsilon_;
+  float epsilon_;  // Used only in FusedBatchNorm fusion
 
   TF_DISALLOW_COPY_AND_ASSIGN(FusedConv2DOp);
 };
 
-#define REGISTER_FUSED_CONV2D(T)                                      \
+// Registration of the CPU implementations.
+#define REGISTER_FUSED_CPU_CONV2D(T)                                  \
   REGISTER_KERNEL_BUILDER(                                            \
       Name("_FusedConv2D").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-      FusedConv2DOp<T>);
+      FusedConv2DOp<CPUDevice, T>);
 
 // If we're using the alternative GEMM-based implementation of Conv2D for the
 // CPU implementation, don't register this EigenTensor-based version.
 // TODO(b/119765980): Upgrade upstream Eigen to set `m_can_use_xsmm=false` for
 // contractions with non-default contraction output kernels.
 #if !defined(USE_GEMM_FOR_CONV) && !defined(EIGEN_USE_LIBXSMM)
-TF_CALL_float(REGISTER_FUSED_CONV2D);
-TF_CALL_double(REGISTER_FUSED_CONV2D);
+TF_CALL_float(REGISTER_FUSED_CPU_CONV2D);
+TF_CALL_double(REGISTER_FUSED_CPU_CONV2D);
 #endif  // !USE_GEMM_FOR_CONV
 
+#undef REGISTER_FUSED_CPU_CONV2D
+
+#if GOOGLE_CUDA
+
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                              \
+  template <>                                                            \
+  void TransformFilter<GPUDevice, T, int, 4>::operator()(                \
+      const GPUDevice& d, FilterTensorFormat dst_filter_format,          \
+      typename TTypes<T, 4, int>::ConstTensor in,                        \
+      typename TTypes<T, 4, int>::Tensor out);                           \
+  extern template struct TransformFilter<GPUDevice, T, int, 4>;          \
+  template <>                                                            \
+  void PadInput<GPUDevice, T, int, 4>::operator()(                       \
+      const GPUDevice& d, typename TTypes<T, 4, int>::ConstTensor in,    \
+      const std::array<int, 2>& padding_left,                            \
+      const std::array<int, 2>& padding_right,                           \
+      typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format); \
+  extern template struct PadInput<GPUDevice, T, int, 4>
+
+DECLARE_GPU_SPEC(float);
+DECLARE_GPU_SPEC(Eigen::half);
+DECLARE_GPU_SPEC(double);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER_FUSED_GPU_CONV2D(T)                                  \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("_FusedConv2D").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      FusedConv2DOp<GPUDevice, T>);
+
+TF_CALL_float(REGISTER_FUSED_GPU_CONV2D);
+TF_CALL_double(REGISTER_FUSED_GPU_CONV2D);
+
+#undef REGISTER_FUSED_GPU_CONV2D
+
+#endif  // GOOGLE_CUDA
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc
index bf98acdecf..ae4132bb0a 100644
--- a/tensorflow/core/kernels/conv_ops_test.cc
+++ b/tensorflow/core/kernels/conv_ops_test.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include "absl/algorithm/container.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/image_ops.h"
 #include "tensorflow/cc/ops/nn_ops.h"
@@ -182,7 +183,7 @@ class FusedResizePadConvOpTest : public OpsTestBase {
                                bool resize_align_corners,
                                const string& pad_mode, int stride,
                                const string& padding, DataType dtype) {
-    auto root = tensorflow::Scope::NewRootScope();
+    Scope root = tensorflow::Scope::NewRootScope();
     using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
 
     Tensor input_data(DT_FLOAT,
@@ -243,7 +244,7 @@ class FusedResizePadConvOpTest : public OpsTestBase {
                                       int filter_count, const string& pad_mode,
                                       int stride, const string& padding,
                                       DataType dtype) {
-    auto root = tensorflow::Scope::NewRootScope();
+    Scope root = tensorflow::Scope::NewRootScope();
     using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
 
     Tensor input_data(DT_FLOAT,
@@ -544,28 +545,59 @@ class FusedConv2DOpTest : public OpsTestBase {
       const Tensor& mean_data, const Tensor& variance_data, Tensor* out)>;
 
   // Runs a Tensorflow graph defined by the root scope, and fetches the result
-  // of 'fetch' node into the output Tensor.
+  // of 'fetch' node into the output Tensor. Optional `fetch_node` parameter
+  // allows to define a fetch node directly using a NodeDef for the ops that are
+  // not supported by the C++ Api.
   void RunAndFetch(const tensorflow::Scope& root, const string& fetch,
-                   Tensor* output) {
+                   Tensor* output, bool allow_gpu_device,
+                   const NodeDef* fetch_node = nullptr) {
     tensorflow::GraphDef graph;
     TF_ASSERT_OK(root.ToGraphDef(&graph));
 
-    // `FusedConv2D` is available only on CPU, and in this test we don't want to
-    // compare GPU vs CPU numbers, so place all nodes on CPU.
-    for (NodeDef& mutable_node : *graph.mutable_node()) {
-      mutable_node.set_device("/device:CPU:0");
+    if (fetch_node) {
+      *graph.add_node() = *fetch_node;
     }
 
-    // Disable Grappler constant folding for the test graphs.
+    // We really want to make sure that graph executed exactly as we passed it
+    // to the session, so we disable various optimizations.
     tensorflow::SessionOptions session_options;
+
+    // Disable common runtime constant folding.
+    session_options.config.mutable_graph_options()
+        ->mutable_optimizer_options()
+        ->set_opt_level(OptimizerOptions::L0);
+
+    // Disable Grappler optimizations for tests.
     tensorflow::RewriterConfig* cfg =
         session_options.config.mutable_graph_options()
             ->mutable_rewrite_options();
     cfg->set_constant_folding(tensorflow::RewriterConfig::OFF);
+    cfg->set_layout_optimizer(tensorflow::RewriterConfig::OFF);
+    cfg->set_remapping(tensorflow::RewriterConfig::OFF);
 
     std::unique_ptr<tensorflow::Session> session(
         tensorflow::NewSession(session_options));
 
+    std::vector<DeviceAttributes> available_devices;
+    TF_ASSERT_OK(session->ListDevices(&available_devices))
+        << "Failed to get available session devices";
+
+    // Check if session has an available GPU device.
+    const bool has_gpu_device =
+        absl::c_any_of(available_devices, [](const DeviceAttributes& device) {
+          return device.device_type() == DEVICE_GPU;
+        });
+
+    // Some of the `FusedConv2D` fusion types are implemented only for CPU, and
+    // in this test we don't want to compare GPU vs CPU numbers, so place all
+    // nodes on CPU in this case.
+    const bool place_all_on_gpu = allow_gpu_device && has_gpu_device;
+
+    const string device = place_all_on_gpu ? "/device:GPU:0" : "/device:CPU:0";
+    for (NodeDef& mutable_node : *graph.mutable_node()) {
+      mutable_node.set_device(device);
+    }
+
     TF_ASSERT_OK(session->Create(graph));
 
     std::vector<Tensor> unfused_tensors;
@@ -576,41 +608,41 @@ class FusedConv2DOpTest : public OpsTestBase {
 
   void RunConv2DWithBias(const Tensor& input_data, const Tensor& filter_data,
                          const Tensor& bias_data, Tensor* output,
-                         int stride = 1) {
-    auto root = tensorflow::Scope::NewRootScope();
+                         bool allow_gpu_device = false, int stride = 1) {
+    Scope root = tensorflow::Scope::NewRootScope();
 
-    auto conv = ops::Conv2D(
+    ops::Conv2D conv = ops::Conv2D(
         root.WithOpName("conv"),
         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
         {1, stride, stride, 1}, "SAME");
 
-    auto with_bias = ops::BiasAdd(
+    ops::BiasAdd with_bias = ops::BiasAdd(
         root.WithOpName("with_bias"), conv,
         ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
 
-    RunAndFetch(root, "with_bias", output);
+    RunAndFetch(root, "with_bias", output, allow_gpu_device);
   }
 
   void RunConv2DWithBiasAndRelu(const Tensor& input_data,
                                 const Tensor& filter_data,
                                 const Tensor& bias_data, Tensor* output,
-                                int stride = 1) {
-    auto root = tensorflow::Scope::NewRootScope();
+                                bool allow_gpu_device = false, int stride = 1) {
+    Scope root = tensorflow::Scope::NewRootScope();
 
-    auto conv = ops::Conv2D(
+    ops::Conv2D conv = ops::Conv2D(
         root.WithOpName("conv"),
         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
         {1, stride, stride, 1}, "SAME");
 
-    auto with_bias = ops::BiasAdd(
+    ops::BiasAdd with_bias = ops::BiasAdd(
         root.WithOpName("with_bias"), conv,
         ops::Const(root.WithOpName("bias"), Input::Initializer(bias_data)));
 
-    auto with_relu = ops::Relu(root.WithOpName("with_relu"), with_bias);
+    ops::Relu with_relu = ops::Relu(root.WithOpName("with_relu"), with_bias);
 
-    RunAndFetch(root, "with_relu", output);
+    RunAndFetch(root, "with_relu", output, allow_gpu_device);
   }
 
   void RunConv2DWithBatchNorm(const Tensor& input_data,
@@ -619,10 +651,10 @@ class FusedConv2DOpTest : public OpsTestBase {
                               const Tensor& offset_data,
                               const Tensor& mean_data,
                               const Tensor& variance_data, Tensor* output,
-                              int stride = 1) {
-    auto root = tensorflow::Scope::NewRootScope();
+                              bool allow_gpu_device = false, int stride = 1) {
+    Scope root = tensorflow::Scope::NewRootScope();
 
-    auto conv = ops::Conv2D(
+    ops::Conv2D conv = ops::Conv2D(
         root.WithOpName("conv"),
         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
@@ -631,7 +663,7 @@ class FusedConv2DOpTest : public OpsTestBase {
     ops::FusedBatchNorm::Attrs attr;
     attr = attr.IsTraining(false);
 
-    auto with_fused_batch_norm = ops::FusedBatchNorm(
+    ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
         root.WithOpName("with_fused_batch_norm"), conv,
         ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
         ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
@@ -639,19 +671,17 @@ class FusedConv2DOpTest : public OpsTestBase {
         ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
         attr);
 
-    RunAndFetch(root, "with_fused_batch_norm", output);
+    RunAndFetch(root, "with_fused_batch_norm", output, allow_gpu_device);
   }
 
-  void RunConv2DWithBatchNormAndRelu(const Tensor& input_data,
-                                     const Tensor& filter_data,
-                                     const Tensor& scale_data,
-                                     const Tensor& offset_data,
-                                     const Tensor& mean_data,
-                                     const Tensor& variance_data,
-                                     Tensor* output, int stride = 1) {
-    auto root = tensorflow::Scope::NewRootScope();
+  void RunConv2DWithBatchNormAndRelu(
+      const Tensor& input_data, const Tensor& filter_data,
+      const Tensor& scale_data, const Tensor& offset_data,
+      const Tensor& mean_data, const Tensor& variance_data, Tensor* output,
+      bool allow_gpu_device = false, int stride = 1) {
+    Scope root = tensorflow::Scope::NewRootScope();
 
-    auto conv = ops::Conv2D(
+    ops::Conv2D conv = ops::Conv2D(
         root.WithOpName("conv"),
         ops::Const(root.WithOpName("input"), Input::Initializer(input_data)),
         ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data)),
@@ -660,7 +690,7 @@ class FusedConv2DOpTest : public OpsTestBase {
     ops::FusedBatchNorm::Attrs attr;
     attr = attr.IsTraining(false);
 
-    auto with_fused_batch_norm = ops::FusedBatchNorm(
+    ops::FusedBatchNorm with_fused_batch_norm = ops::FusedBatchNorm(
         root.WithOpName("with_fused_batch_norm"), conv,
         ops::Const(root.WithOpName("scale"), Input::Initializer(scale_data)),
         ops::Const(root.WithOpName("offset"), Input::Initializer(offset_data)),
@@ -668,39 +698,47 @@ class FusedConv2DOpTest : public OpsTestBase {
         ops::Const(root.WithOpName("var"), Input::Initializer(variance_data)),
         attr);
 
-    auto with_relu =
+    ops::Relu with_relu =
         ops::Relu(root.WithOpName("with_relu"), with_fused_batch_norm.y);
 
-    RunAndFetch(root, "with_relu", output);
+    RunAndFetch(root, "with_relu", output, allow_gpu_device);
   }
 
-  void RunFusedConv2DOp(const Tensor& image, const Tensor& filter,
-                        const std::vector<Tensor>& args,
+  void RunFusedConv2DOp(const Tensor& input_data, const Tensor& filter_data,
+                        const std::vector<Tensor>& args_data,
                         const std::vector<string>& fused_ops, Tensor* output,
-                        int stride = 1) {
+                        bool allow_gpu_device = false, int stride = 1) {
+    Scope root = tensorflow::Scope::NewRootScope();
+
     DataType dtype = DataTypeToEnum<T>::v();
-    int num_args = static_cast<int>(args.size());
+    int num_args = static_cast<int>(args_data.size());
 
-    TF_EXPECT_OK(NodeDefBuilder("fused_conv_op", "_FusedConv2D")
-                     .Input(FakeInput(dtype))
-                     .Input(FakeInput(dtype))
+    Output input =
+        ops::Const(root.WithOpName("input"), Input::Initializer(input_data));
+    Output filter =
+        ops::Const(root.WithOpName("filter"), Input::Initializer(filter_data));
+
+    std::vector<NodeDefBuilder::NodeOut> args;
+    for (int i = 0; i < num_args; ++i) {
+      Output arg = ops::Const(root.WithOpName(absl::StrCat("arg", i)),
+                              Input::Initializer(args_data[i]));
+      args.emplace_back(arg.name(), 0, dtype);
+    }
+
+    NodeDef fused_conv2d;
+    TF_EXPECT_OK(NodeDefBuilder("fused_conv", "_FusedConv2D")
+                     .Input({input.name(), 0, dtype})
+                     .Input({filter.name(), 0, dtype})
+                     .Input(args)
                      .Attr("num_args", num_args)
-                     .Input(FakeInput(num_args, dtype))
                      .Attr("T", dtype)
                      .Attr("strides", {1, stride, stride, 1})
                      .Attr("padding", "SAME")
                      .Attr("fused_ops", fused_ops)
-                     .Finalize(node_def()));
-
-    TF_EXPECT_OK(InitOp());
+                     .Finalize(&fused_conv2d));
 
-    AddInputFromArray<T>(image.shape(), image.flat<T>());
-    AddInputFromArray<T>(filter.shape(), filter.flat<T>());
-    for (const Tensor& arg : args)
-      AddInputFromArray<T>(arg.shape(), arg.flat<T>());
-    TF_ASSERT_OK(RunOpKernel());
-
-    *output = *GetOutput(0);
+    RunAndFetch(root, fused_conv2d.name(), output, allow_gpu_device,
+                &fused_conv2d);
   }
 
   void VerifyBiasAddTensorsNear(int depth, int image_width, int image_height,
@@ -732,14 +770,7 @@ class FusedConv2DOpTest : public OpsTestBase {
     ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
     ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
 
-    // NOTE(ezhulenev): When filter size is equal to the input image size, we
-    // effectevily do element-wise product and full sum reduction, and these
-    // operations intoroduce higher than "normal" numerical errors.
-    if (image_width == filter_size && image_height == filter_size) {
-      test::ExpectTensorNear<T>(conv_2d, fused_conv_2d, 1e-3);
-    } else {
-      test::ExpectClose(conv_2d, fused_conv_2d);
-    }
+    test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6);
   }
 
   void VerifyFusedBatchNormTensorsNear(int depth, int image_width,
@@ -781,14 +812,7 @@ class FusedConv2DOpTest : public OpsTestBase {
     ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype());
     ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape());
 
-    // NOTE(ezhulenev): When filter size is equal to the input image size, we
-    // effectevily do element-wise product and full sum reduction, and these
-    // operations intoroduce higher than "normal" numerical errors.
-    if (image_width == filter_size && image_height == filter_size) {
-      test::ExpectTensorNear<T>(conv_2d, fused_conv_2d, 1e-3);
-    } else {
-      test::ExpectClose(conv_2d, fused_conv_2d);
-    }
+    test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6);
   }
 
   // Verifies that computing Conv2D+BiasAdd in a graph is identical to
@@ -825,14 +849,15 @@ class FusedConv2DOpTest : public OpsTestBase {
     const BiasAddGraphRunner run_default =
         [this](const Tensor& input_data, const Tensor& filter_data,
                const Tensor& bias_data, Tensor* out) {
-          RunConv2DWithBiasAndRelu(input_data, filter_data, bias_data, out);
+          RunConv2DWithBiasAndRelu(input_data, filter_data, bias_data, out,
+                                   /*allow_gpu_device=*/true);
         };
 
     const BiasAddGraphRunner run_fused =
         [this](const Tensor& input_data, const Tensor& filter_data,
                const Tensor& bias_data, Tensor* out) {
           RunFusedConv2DOp(input_data, filter_data, {bias_data},
-                           {"BiasAdd", "Relu"}, out);
+                           {"BiasAdd", "Relu"}, out, /*allow_gpu_device=*/true);
         };
 
     VerifyBiasAddTensorsNear(depth, image_width, image_height,
@@ -1455,4 +1480,18 @@ BM_FusedConv2DWithBatchNormAndRelu(16, 32, 32, 128, 3, 3, 1024, cpu,
 BM_FusedConv2DWithBatchNormAndRelu(32, 32, 32, 128, 3, 3, 1024, cpu,
                                    "3x3 /b 32");
 
+#if GOOGLE_CUDA
+BM_Conv2D(8, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 8");
+BM_Conv2D(16, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 16");
+BM_Conv2D(32, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 32");
+
+BM_Conv2DWithBiasAndRelu(8, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 8");
+BM_Conv2DWithBiasAndRelu(16, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 16");
+BM_Conv2DWithBiasAndRelu(32, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 32");
+
+BM_FusedConv2DWithBiasAndRelu(8, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 8");
+BM_FusedConv2DWithBiasAndRelu(16, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 16");
+BM_FusedConv2DWithBiasAndRelu(32, 32, 32, 128, 3, 3, 1024, gpu, "3x3 /b 32");
+#endif
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc
index ee528c706d..c7cd3140be 100644
--- a/tensorflow/core/ops/nn_ops.cc
+++ b/tensorflow/core/ops/nn_ops.cc
@@ -326,6 +326,7 @@ REGISTER_OP("_FusedConv2D")
     .Attr(GetPaddingAttrString())
     .Attr(GetConvnetDataFormatAttrString())
     .Attr("dilations: list(int) = [1, 1, 1, 1]")
+    .Attr("use_cudnn_on_gpu: bool = true")
     .Attr("fused_ops: list(string) = []")
     // Attributes for the FusedBatchNorm ------------------------------------ //
     .Attr("epsilon: float = 0.0001")
-- 
GitLab


From 44cb836a6779436220331b081121b333a9c26cf5 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 19 Dec 2018 11:22:47 -0800
Subject: [PATCH 830/873] Remove failing assertion

Mixing v1/v2 TensorShapes seems fine except for this one assertion. I've verified that the object-based checkpointing tests pass with enable_v2_behavior() now (while failing on this assertion before). It was never an issue when setting the environmental variable, since everything saw a consistent TensorShape class.

PiperOrigin-RevId: 226203984
---
 tensorflow/python/eager/graph_only_ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tensorflow/python/eager/graph_only_ops.py b/tensorflow/python/eager/graph_only_ops.py
index 77a9e7db20..a7374ab14a 100644
--- a/tensorflow/python/eager/graph_only_ops.py
+++ b/tensorflow/python/eager/graph_only_ops.py
@@ -44,7 +44,6 @@ def graph_placeholder(dtype, shape, name=None):
   dtype_value = attr_value_pb2.AttrValue(type=dtype.as_datatype_enum)
   if isinstance(shape, (list, tuple)):
     shape = tensor_shape.TensorShape(shape)
-  assert isinstance(shape, tensor_shape.TensorShape)
   shape = attr_value_pb2.AttrValue(shape=shape.as_proto())
   g = ops.get_default_graph()
   with ops.name_scope(name, "placeholder", []) as name:
-- 
GitLab


From c93b568c00c5507264026a3be770f33048f9ac9b Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 11:26:20 -0800
Subject: [PATCH 831/873] Include more relevant info in this `constant_value()`
 error message.

PiperOrigin-RevId: 226204557
---
 tensorflow/python/framework/tensor_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index f98f301b38..51f71616a1 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -708,7 +708,7 @@ def ShapeEquals(tensor_proto, shape):
 def _ConstantValue(tensor, partial):
   # TODO(touts): Support Variables?
   if not isinstance(tensor, ops.Tensor):
-    raise TypeError("tensor is not a Tensor")
+    raise TypeError("%r is not a Tensor, has type %s" % (tensor, type(tensor)))
   if tensor.op.type == "Const":
     return MakeNdarray(tensor.op.get_attr("value"))
   elif tensor.op.type == "Shape":
-- 
GitLab


From d2e68e7287601c820e1ed72654f69e015ff1a1bb Mon Sep 17 00:00:00 2001
From: Jian Li <jianlijianli@google.com>
Date: Wed, 19 Dec 2018 11:30:28 -0800
Subject: [PATCH 832/873] Add builtin layer norm lstm.

PiperOrigin-RevId: 226205306
---
 .../kernels/bidirectional_sequence_lstm.cc    |  60 +-
 tensorflow/lite/kernels/lstm.cc               | 114 ++-
 tensorflow/lite/kernels/lstm_eval.cc          | 242 +++++--
 tensorflow/lite/kernels/lstm_eval.h           |  14 +-
 tensorflow/lite/kernels/lstm_test.cc          | 674 +++++++++++++++++-
 .../kernels/unidirectional_sequence_lstm.cc   |   8 +
 6 files changed, 1038 insertions(+), 74 deletions(-)

diff --git a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
index 1620374f46..b0be6d0dbd 100644
--- a/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/bidirectional_sequence_lstm.cc
@@ -886,11 +886,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_recurrent_to_input_weights, fw_recurrent_to_forget_weights,
           fw_recurrent_to_cell_weights, fw_recurrent_to_output_weights,
           fw_cell_to_input_weights, fw_cell_to_forget_weights,
-          fw_cell_to_output_weights, aux_input, fw_aux_input_to_input_weights,
-          fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
-          fw_aux_input_to_output_weights, fw_input_gate_bias,
-          fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
-          fw_projection_weights, fw_projection_bias, &lstm_params,
+          fw_cell_to_output_weights,
+          /*input_layer_norm_coefficients=*/nullptr,
+          /*forget_layer_norm_coefficients=*/nullptr,
+          /*cell_layer_norm_coefficients=*/nullptr,
+          /*output_layer_norm_coefficients=*/nullptr, aux_input,
+          fw_aux_input_to_input_weights, fw_aux_input_to_forget_weights,
+          fw_aux_input_to_cell_weights, fw_aux_input_to_output_weights,
+          fw_input_gate_bias, fw_forget_gate_bias, fw_cell_bias,
+          fw_output_gate_bias, fw_projection_weights, fw_projection_bias,
+          &lstm_params,
           /*forward_sequence=*/true, time_major, /*output_offset=*/0,
           fw_scratch_buffer, fw_activation_state, fw_cell_state, fw_output);
       TF_LITE_ENSURE_OK(context, fw_pass_status);
@@ -901,11 +906,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_recurrent_to_input_weights, bw_recurrent_to_forget_weights,
           bw_recurrent_to_cell_weights, bw_recurrent_to_output_weights,
           bw_cell_to_input_weights, bw_cell_to_forget_weights,
-          bw_cell_to_output_weights, aux_input, bw_aux_input_to_input_weights,
-          bw_aux_input_to_forget_weights, bw_aux_input_to_cell_weights,
-          bw_aux_input_to_output_weights, bw_input_gate_bias,
-          bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
-          bw_projection_weights, bw_projection_bias, &lstm_params,
+          bw_cell_to_output_weights,
+          /*input_layer_norm_coefficients=*/nullptr,
+          /*forget_layer_norm_coefficients=*/nullptr,
+          /*cell_layer_norm_coefficients=*/nullptr,
+          /*output_layer_norm_coefficients=*/nullptr, aux_input,
+          bw_aux_input_to_input_weights, bw_aux_input_to_forget_weights,
+          bw_aux_input_to_cell_weights, bw_aux_input_to_output_weights,
+          bw_input_gate_bias, bw_forget_gate_bias, bw_cell_bias,
+          bw_output_gate_bias, bw_projection_weights, bw_projection_bias,
+          &lstm_params,
           /*forward_sequence=*/false, time_major, bw_output_offset,
           bw_scratch_buffer, bw_activation_state, bw_cell_state,
           actual_bw_output);
@@ -940,11 +950,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           fw_recurrent_to_input_weights, fw_recurrent_to_forget_weights,
           fw_recurrent_to_cell_weights, fw_recurrent_to_output_weights,
           fw_cell_to_input_weights, fw_cell_to_forget_weights,
-          fw_cell_to_output_weights, aux_input, fw_aux_input_to_input_weights,
-          fw_aux_input_to_forget_weights, fw_aux_input_to_cell_weights,
-          fw_aux_input_to_output_weights, fw_input_gate_bias,
-          fw_forget_gate_bias, fw_cell_bias, fw_output_gate_bias,
-          fw_projection_weights, fw_projection_bias, &lstm_params,
+          fw_cell_to_output_weights,
+          /*input_layer_norm_coefficients=*/nullptr,
+          /*forget_layer_norm_coefficients=*/nullptr,
+          /*cell_layer_norm_coefficients=*/nullptr,
+          /*output_layer_norm_coefficients=*/nullptr, aux_input,
+          fw_aux_input_to_input_weights, fw_aux_input_to_forget_weights,
+          fw_aux_input_to_cell_weights, fw_aux_input_to_output_weights,
+          fw_input_gate_bias, fw_forget_gate_bias, fw_cell_bias,
+          fw_output_gate_bias, fw_projection_weights, fw_projection_bias,
+          &lstm_params,
           /*forward_sequence=*/true, /*time_major=*/true, /*output_offset=*/0,
           fw_scratch_buffer, scaling_factors, prod_scaling_factors,
           recovered_cell_weights, input_quantized, aux_input_quantized,
@@ -958,11 +973,16 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           bw_recurrent_to_input_weights, bw_recurrent_to_forget_weights,
           bw_recurrent_to_cell_weights, bw_recurrent_to_output_weights,
           bw_cell_to_input_weights, bw_cell_to_forget_weights,
-          bw_cell_to_output_weights, aux_input, bw_aux_input_to_input_weights,
-          bw_aux_input_to_forget_weights, bw_aux_input_to_cell_weights,
-          bw_aux_input_to_output_weights, bw_input_gate_bias,
-          bw_forget_gate_bias, bw_cell_bias, bw_output_gate_bias,
-          bw_projection_weights, bw_projection_bias, &lstm_params,
+          bw_cell_to_output_weights,
+          /*input_layer_norm_coefficients=*/nullptr,
+          /*forget_layer_norm_coefficients=*/nullptr,
+          /*cell_layer_norm_coefficients=*/nullptr,
+          /*output_layer_norm_coefficients=*/nullptr, aux_input,
+          bw_aux_input_to_input_weights, bw_aux_input_to_forget_weights,
+          bw_aux_input_to_cell_weights, bw_aux_input_to_output_weights,
+          bw_input_gate_bias, bw_forget_gate_bias, bw_cell_bias,
+          bw_output_gate_bias, bw_projection_weights, bw_projection_bias,
+          &lstm_params,
           /*forward_sequence=*/false, /*time_major=*/true, bw_output_offset,
           bw_scratch_buffer, scaling_factors, prod_scaling_factors,
           recovered_cell_weights, input_quantized, aux_input_quantized,
diff --git a/tensorflow/lite/kernels/lstm.cc b/tensorflow/lite/kernels/lstm.cc
index b57e2883b0..3689d77b01 100644
--- a/tensorflow/lite/kernels/lstm.cc
+++ b/tensorflow/lite/kernels/lstm.cc
@@ -38,17 +38,24 @@ namespace builtin {
 namespace lstm {
 
 struct OpData {
-  // Which kernel type to use. Full kernel (20 inputs) or basic kernel
-  // (5 inputs).
+  // Which kernel type to use. Full kernel (24 inputs) or basic kernel (5
+  // inputs).
+  // Please note the 20-input full kernel is deprecated and only kept
+  // here for backward compatibility.
   TfLiteLSTMKernelType kernel_type;
 
+  // If the lstm is layer norm.
+  bool is_layer_norm_lstm;
+
   // These fields are only used by full kernel.
   int activation_state_tensor_index;
   int cell_state_tensor_index;
   int scratch_tensor_index;
 };
 
-// For full inputs kernel (20-inputs).
+// For full inputs kernel (24-inputs).
+// Please note the 20-input full kernel is deprecated and only kept
+// here for backward compatibility.
 namespace full {
 
 // Input Tensors of size {n_batch, n_input}
@@ -87,6 +94,13 @@ constexpr int kProjectionBiasTensor = 17;  // Optional
 constexpr int kInputActivationStateTensor = 18;
 constexpr int kInputCellStateTensor = 19;
 
+// Layer norm coefficient tensors of size {n_cell}, representing a diagonal
+// matrix.
+constexpr int kInputLayerNormCoefficientsTensor = 20;   // Optional
+constexpr int kForgetLayerNormCoefficientsTensor = 21;  // Optional
+constexpr int kCellLayerNormCoefficientsTensor = 22;    // Optional
+constexpr int kOutputLayerNormCoefficientsTensor = 23;  // Optional
+
 // Output tensors.
 constexpr int kOutputTensor = 0;
 
@@ -101,7 +115,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 // Check that input tensor dimensions matches with each other.
 TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
                                         TfLiteNode* node, int n_input,
-                                        int n_output, int n_cell) {
+                                        int n_output, int n_cell,
+                                        bool is_layer_norm_lstm) {
   const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
 
   // Making sure clipping parameters have valid values.
@@ -112,7 +127,8 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
 
   const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
-  if (input_to_input_weights != nullptr) {
+  const bool use_cifg = (input_to_input_weights == nullptr);
+  if (!use_cifg) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell);
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input);
@@ -186,7 +202,6 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   }
 
   // Making sure the peephole weights are there all or none.
-  const bool use_cifg = (input_to_input_weights == nullptr);
   const bool peephole_weights_all_or_none =
       ((cell_to_input_weights != nullptr || use_cifg) &&
        (cell_to_forget_weights != nullptr) &&
@@ -244,6 +259,40 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
       ((projection_weights != nullptr) || (projection_bias == nullptr));
   TF_LITE_ENSURE(context, projection_tensors_consistent == true);
 
+  if (is_layer_norm_lstm) {
+    const TfLiteTensor* input_layer_norm_coefficients = GetOptionalInputTensor(
+        context, node, kInputLayerNormCoefficientsTensor);
+    if (use_cifg) {
+      TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients, nullptr);
+    } else {
+      TF_LITE_ENSURE(context, input_layer_norm_coefficients != nullptr);
+      TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->size, 1);
+      TF_LITE_ENSURE_EQ(context, input_layer_norm_coefficients->dims->data[0],
+                        n_cell);
+    }
+
+    const TfLiteTensor* forget_layer_norm_coefficients =
+        GetInput(context, node, kForgetLayerNormCoefficientsTensor);
+    TF_LITE_ENSURE(context, forget_layer_norm_coefficients != nullptr);
+    TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->size, 1);
+    TF_LITE_ENSURE_EQ(context, forget_layer_norm_coefficients->dims->data[0],
+                      n_cell);
+
+    const TfLiteTensor* cell_layer_norm_coefficients =
+        GetInput(context, node, kCellLayerNormCoefficientsTensor);
+    TF_LITE_ENSURE(context, cell_layer_norm_coefficients != nullptr);
+    TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->size, 1);
+    TF_LITE_ENSURE_EQ(context, cell_layer_norm_coefficients->dims->data[0],
+                      n_cell);
+
+    const TfLiteTensor* output_layer_norm_coefficients =
+        GetInput(context, node, kOutputLayerNormCoefficientsTensor);
+    TF_LITE_ENSURE(context, output_layer_norm_coefficients != nullptr);
+    TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->size, 1);
+    TF_LITE_ENSURE_EQ(context, output_layer_norm_coefficients->dims->data[0],
+                      n_cell);
+  }
+
   return kTfLiteOk;
 }
 
@@ -254,8 +303,32 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
 
   TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
-  TF_LITE_ENSURE_EQ(context, node->inputs->size, 20);
+  // Logic for determining regular lstm and layer norm lstm:
+  // input_size, forget_gate_layer_norm_tensor (20) null? is_layer_norm?
+  // 20,         N/A,                                     No.
+  // 24,         null,                                    No.
+  // 24,         not null,                                Yes.
+  // 20-inputs lstm are deprecated and is only kept here for backward
+  // compatibility.
+  if (node->inputs->size == 24) {
+    const TfLiteTensor* forget_layer_norm_coefficients =
+        GetInput(context, node, kForgetLayerNormCoefficientsTensor);
+    if (forget_layer_norm_coefficients == nullptr) {
+      op_data->is_layer_norm_lstm = false;
+    } else {
+      op_data->is_layer_norm_lstm = true;
+    }
+  } else if (node->inputs->size == 20) {
+    // This is deprecated and is only kept here for backward compatibility.
+    op_data->is_layer_norm_lstm = false;
+  } else {
+    context->ReportError(
+        context, "The LSTM Full kernel expects 20 or 24 inputs. Got %d inputs",
+        node->inputs->size);
+    return kTfLiteError;
+  }
 
+  const bool is_layer_norm_lstm = op_data->is_layer_norm_lstm;
   op_data->activation_state_tensor_index =
       node->inputs->data[kInputActivationStateTensor];
   op_data->cell_state_tensor_index = node->inputs->data[kInputCellStateTensor];
@@ -282,8 +355,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int n_output = recurrent_to_output_weights->dims->data[1];
 
   // Check that input tensor dimensions matches with each other.
-  TF_LITE_ENSURE_OK(context, CheckInputTensorDimensions(context, node, n_input,
-                                                        n_output, n_cell));
+  TF_LITE_ENSURE_OK(context,
+                    CheckInputTensorDimensions(context, node, n_input, n_output,
+                                               n_cell, is_layer_norm_lstm));
 
   // Get the pointer to output, activation_state and cell_state tensors.
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
@@ -430,6 +504,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
   OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  const bool is_layer_norm_lstm = op_data->is_layer_norm_lstm;
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
 
@@ -458,6 +533,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
 
+  const TfLiteTensor* input_layer_norm_coefficients =
+      is_layer_norm_lstm ? GetOptionalInputTensor(
+                               context, node, kInputLayerNormCoefficientsTensor)
+                         : nullptr;
+  const TfLiteTensor* forget_layer_norm_coefficients =
+      is_layer_norm_lstm
+          ? GetInput(context, node, kForgetLayerNormCoefficientsTensor)
+          : nullptr;
+  const TfLiteTensor* cell_layer_norm_coefficients =
+      is_layer_norm_lstm
+          ? GetInput(context, node, kCellLayerNormCoefficientsTensor)
+          : nullptr;
+  const TfLiteTensor* output_layer_norm_coefficients =
+      is_layer_norm_lstm
+          ? GetInput(context, node, kOutputLayerNormCoefficientsTensor)
+          : nullptr;
+
   const TfLiteTensor* input_gate_bias =
       GetOptionalInputTensor(context, node, kInputGateBiasTensor);
   const TfLiteTensor* forget_gate_bias =
@@ -490,6 +582,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          input_layer_norm_coefficients, forget_layer_norm_coefficients,
+          cell_layer_norm_coefficients, output_layer_norm_coefficients,
           /*aux_input=*/nullptr,
           /*aux_input_to_input_weights=*/nullptr,
           /*aux_input_to_forget_weights=*/nullptr,
@@ -518,6 +612,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          input_layer_norm_coefficients, forget_layer_norm_coefficients,
+          cell_layer_norm_coefficients, output_layer_norm_coefficients,
           /*aux_input=*/nullptr,
           /*aux_input_to_input_weights=*/nullptr,
           /*aux_input_to_forget_weights=*/nullptr,
diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc
index 50b2bca7b5..6ba1e19343 100644
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@@ -27,6 +27,10 @@ namespace lstm_eval {
 
 namespace {
 
+// Small float to avoid divergence during calculation of deviation for layer
+// norm lstm.
+const float kLayerNormEpsilon = 1e-8;
+
 // Performs an LSTM batch inference step for input specified by input_ptr_batch.
 // The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and
 // biases (*_bias_ptr), and buffers (*_scratch), along with additional
@@ -65,30 +69,47 @@ inline void LstmStepWithAuxInput(
     const float* recurrent_to_output_weights_ptr,
     const float* cell_to_input_weights_ptr,
     const float* cell_to_forget_weights_ptr,
-    const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const float* projection_weights_ptr,
-    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
-    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
-    int output_batch_leading_dim, float* output_state_ptr,
-    float* cell_state_ptr, float* input_gate_scratch,
+    const float* cell_to_output_weights_ptr,
+    const float* input_layer_norm_coefficients_ptr,
+    const float* forget_layer_norm_coefficients_ptr,
+    const float* cell_layer_norm_coefficients_ptr,
+    const float* output_layer_norm_coefficients_ptr,
+    const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr,
+    const float* cell_bias_ptr, const float* output_gate_bias_ptr,
+    const float* projection_weights_ptr, const float* projection_bias_ptr,
+    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
+    int n_aux_input, int n_output, int output_batch_leading_dim,
+    float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch,
     float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
     float* output_ptr_batch) {
   // Since we have already checked that weights are all there or none, we can
   // check the existense of only one to the get the condition.
   const bool use_cifg = (input_to_input_weights_ptr == nullptr);
   const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
-  // Initialize scratch buffers with bias.
-  if (!use_cifg) {
-    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
-                                          input_gate_scratch);
+  const bool is_layer_norm_lstm =
+      (forget_layer_norm_coefficients_ptr != nullptr);
+
+  // Initialize scratch buffers with bias for regular lstm or initialize with
+  // zero for layer norm lstm.
+  if (is_layer_norm_lstm) {
+    if (!use_cifg) {
+      tensor_utils::ZeroVector(input_gate_scratch, n_cell * n_batch);
+    }
+    tensor_utils::ZeroVector(forget_gate_scratch, n_cell * n_batch);
+    tensor_utils::ZeroVector(cell_scratch, n_cell * n_batch);
+    tensor_utils::ZeroVector(output_gate_scratch, n_cell * n_batch);
+  } else {
+    if (!use_cifg) {
+      tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
+                                            n_batch, input_gate_scratch);
+    }
+    tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
+                                          forget_gate_scratch);
+    tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
+                                          cell_scratch);
+    tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
+                                          output_gate_scratch);
   }
-  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
-                                        forget_gate_scratch);
-  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
-                                        cell_scratch);
-  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
-                                        output_gate_scratch);
 
   // For each batch and cell: compute input_weight * input.
   if (!use_cifg) {
@@ -152,6 +173,16 @@ inline void LstmStepWithAuxInput(
           cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch,
           input_gate_scratch);
     }
+    if (is_layer_norm_lstm) {
+      tensor_utils::MeanStddevNormalization(input_gate_scratch,
+                                            input_gate_scratch, n_cell, n_batch,
+                                            kLayerNormEpsilon);
+      tensor_utils::VectorBatchVectorCwiseProduct(
+          input_layer_norm_coefficients_ptr, n_cell, input_gate_scratch,
+          n_batch, input_gate_scratch);
+      tensor_utils::VectorBatchVectorAdd(input_gate_bias_ptr, n_cell, n_batch,
+                                         input_gate_scratch);
+    }
     tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
                                        input_gate_scratch);
   }
@@ -162,12 +193,31 @@ inline void LstmStepWithAuxInput(
         cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch,
         forget_gate_scratch);
   }
+  if (is_layer_norm_lstm) {
+    tensor_utils::MeanStddevNormalization(forget_gate_scratch,
+                                          forget_gate_scratch, n_cell, n_batch,
+                                          kLayerNormEpsilon);
+    tensor_utils::VectorBatchVectorCwiseProduct(
+        forget_layer_norm_coefficients_ptr, n_cell, forget_gate_scratch,
+        n_batch, forget_gate_scratch);
+    tensor_utils::VectorBatchVectorAdd(forget_gate_bias_ptr, n_cell, n_batch,
+                                       forget_gate_scratch);
+  }
   tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
                                      forget_gate_scratch);
 
   // For each batch and cell: update the cell.
   tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
                                          n_batch * n_cell, cell_state_ptr);
+  if (is_layer_norm_lstm) {
+    tensor_utils::MeanStddevNormalization(cell_scratch, cell_scratch, n_cell,
+                                          n_batch, kLayerNormEpsilon);
+    tensor_utils::VectorBatchVectorCwiseProduct(
+        cell_layer_norm_coefficients_ptr, n_cell, cell_scratch, n_batch,
+        cell_scratch);
+    tensor_utils::VectorBatchVectorAdd(cell_bias_ptr, n_cell, n_batch,
+                                       cell_scratch);
+  }
   tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
                                         params->activation, cell_scratch);
   if (use_cifg) {
@@ -190,6 +240,16 @@ inline void LstmStepWithAuxInput(
         cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch,
         output_gate_scratch);
   }
+  if (is_layer_norm_lstm) {
+    tensor_utils::MeanStddevNormalization(output_gate_scratch,
+                                          output_gate_scratch, n_cell, n_batch,
+                                          kLayerNormEpsilon);
+    tensor_utils::VectorBatchVectorCwiseProduct(
+        output_layer_norm_coefficients_ptr, n_cell, output_gate_scratch,
+        n_batch, output_gate_scratch);
+    tensor_utils::VectorBatchVectorAdd(output_gate_bias_ptr, n_cell, n_batch,
+                                       output_gate_scratch);
+  }
   tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
                                      output_gate_scratch);
   tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
@@ -344,33 +404,50 @@ inline void LstmStepWithAuxInput(
     const int8_t* cell_to_forget_weights_ptr,
     float cell_to_forget_weights_scale,
     const int8_t* cell_to_output_weights_ptr,
-    float cell_to_output_weights_scale, const float* input_gate_bias_ptr,
-    const float* forget_gate_bias_ptr, const float* cell_bias_ptr,
-    const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr,
-    float projection_weights_scale, const float* projection_bias_ptr,
-    const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input,
-    int n_aux_input, int n_output, int output_batch_leading_dim,
-    float* input_gate_scratch, float* forget_gate_scratch, float* cell_scratch,
-    float* output_gate_scratch, float* scaling_factors,
-    float* product_scaling_factors, float* recovered_cell_weights,
-    int8_t* quantized_input_ptr_batch, int8_t* quantized_aux_input_ptr_batch,
-    int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr,
-    float* output_state_ptr, float* cell_state_ptr, float* output_ptr_batch) {
+    float cell_to_output_weights_scale,
+    const float* input_layer_norm_coefficients_ptr,
+    const float* forget_layer_norm_coefficients_ptr,
+    const float* cell_layer_norm_coefficients_ptr,
+    const float* output_layer_norm_coefficients_ptr,
+    const float* input_gate_bias_ptr, const float* forget_gate_bias_ptr,
+    const float* cell_bias_ptr, const float* output_gate_bias_ptr,
+    const int8_t* projection_weights_ptr, float projection_weights_scale,
+    const float* projection_bias_ptr, const TfLiteLSTMParams* params,
+    int n_batch, int n_cell, int n_input, int n_aux_input, int n_output,
+    int output_batch_leading_dim, float* input_gate_scratch,
+    float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch,
+    float* scaling_factors, float* product_scaling_factors,
+    float* recovered_cell_weights, int8_t* quantized_input_ptr_batch,
+    int8_t* quantized_aux_input_ptr_batch, int8_t* quantized_output_state_ptr,
+    int8_t* quantized_cell_state_ptr, float* output_state_ptr,
+    float* cell_state_ptr, float* output_ptr_batch) {
   // Since we have already checked that weights are all there or none, we
   // can check the existense of only one to the get the condition.
   const bool use_cifg = (input_to_input_weights_ptr == nullptr);
   const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
+  const bool is_layer_norm_lstm =
+      (forget_layer_norm_coefficients_ptr != nullptr);
+
   // Initialize scratch buffers with bias.
-  if (!use_cifg) {
-    tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch,
-                                          input_gate_scratch);
+  if (is_layer_norm_lstm) {
+    if (!use_cifg) {
+      tensor_utils::ZeroVector(input_gate_scratch, n_cell * n_batch);
+    }
+    tensor_utils::ZeroVector(forget_gate_scratch, n_cell * n_batch);
+    tensor_utils::ZeroVector(cell_scratch, n_cell * n_batch);
+    tensor_utils::ZeroVector(output_gate_scratch, n_cell * n_batch);
+  } else {
+    if (!use_cifg) {
+      tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
+                                            n_batch, input_gate_scratch);
+    }
+    tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
+                                          forget_gate_scratch);
+    tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
+                                          cell_scratch);
+    tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
+                                          output_gate_scratch);
   }
-  tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
-                                        forget_gate_scratch);
-  tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch,
-                                        cell_scratch);
-  tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
-                                        output_gate_scratch);
 
   if (!tensor_utils::IsZeroVector(input_ptr_batch, n_batch * n_input)) {
     // Save quantization and matmul computation for all zero input.
@@ -535,6 +612,16 @@ inline void LstmStepWithAuxInput(
           recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
           input_gate_scratch);
     }
+    if (is_layer_norm_lstm) {
+      tensor_utils::MeanStddevNormalization(input_gate_scratch,
+                                            input_gate_scratch, n_cell, n_batch,
+                                            kLayerNormEpsilon);
+      tensor_utils::VectorBatchVectorCwiseProduct(
+          input_layer_norm_coefficients_ptr, n_cell, input_gate_scratch,
+          n_batch, input_gate_scratch);
+      tensor_utils::VectorBatchVectorAdd(input_gate_bias_ptr, n_cell, n_batch,
+                                         input_gate_scratch);
+    }
     tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch,
                                        input_gate_scratch);
   }
@@ -548,12 +635,31 @@ inline void LstmStepWithAuxInput(
         recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
         forget_gate_scratch);
   }
+  if (is_layer_norm_lstm) {
+    tensor_utils::MeanStddevNormalization(forget_gate_scratch,
+                                          forget_gate_scratch, n_cell, n_batch,
+                                          kLayerNormEpsilon);
+    tensor_utils::VectorBatchVectorCwiseProduct(
+        forget_layer_norm_coefficients_ptr, n_cell, forget_gate_scratch,
+        n_batch, forget_gate_scratch);
+    tensor_utils::VectorBatchVectorAdd(forget_gate_bias_ptr, n_cell, n_batch,
+                                       forget_gate_scratch);
+  }
   tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch,
                                      forget_gate_scratch);
 
   // For each batch and cell: update the cell.
   tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
                                          n_batch * n_cell, cell_state_ptr);
+  if (is_layer_norm_lstm) {
+    tensor_utils::MeanStddevNormalization(cell_scratch, cell_scratch, n_cell,
+                                          n_batch, kLayerNormEpsilon);
+    tensor_utils::VectorBatchVectorCwiseProduct(
+        cell_layer_norm_coefficients_ptr, n_cell, cell_scratch, n_batch,
+        cell_scratch);
+    tensor_utils::VectorBatchVectorAdd(cell_bias_ptr, n_cell, n_batch,
+                                       cell_scratch);
+  }
   tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell,
                                         params->activation, cell_scratch);
   if (use_cifg) {
@@ -581,6 +687,16 @@ inline void LstmStepWithAuxInput(
         recovered_cell_weights, n_cell, cell_state_ptr, n_batch,
         output_gate_scratch);
   }
+  if (is_layer_norm_lstm) {
+    tensor_utils::MeanStddevNormalization(output_gate_scratch,
+                                          output_gate_scratch, n_cell, n_batch,
+                                          kLayerNormEpsilon);
+    tensor_utils::VectorBatchVectorCwiseProduct(
+        output_layer_norm_coefficients_ptr, n_cell, output_gate_scratch,
+        n_batch, output_gate_scratch);
+    tensor_utils::VectorBatchVectorAdd(output_gate_bias_ptr, n_cell, n_batch,
+                                       output_gate_scratch);
+  }
   tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
                                      output_gate_scratch);
   tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell,
@@ -702,7 +818,12 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* recurrent_to_output_weights,
     const TfLiteTensor* cell_to_input_weights,
     const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* cell_to_output_weights,
+    const TfLiteTensor* input_layer_norm_coefficients,
+    const TfLiteTensor* forget_layer_norm_coefficients,
+    const TfLiteTensor* cell_layer_norm_coefficients,
+    const TfLiteTensor* output_layer_norm_coefficients,
+    const TfLiteTensor* aux_input,
     const TfLiteTensor* aux_input_to_input_weights,
     const TfLiteTensor* aux_input_to_forget_weights,
     const TfLiteTensor* aux_input_to_cell_weights,
@@ -735,6 +856,7 @@ TfLiteStatus EvalFloat(
   // check the existense of only one to the get the condition.
   const bool use_cifg = (input_to_input_weights == nullptr);
   const bool use_peephole = (cell_to_output_weights != nullptr);
+  const bool is_layer_norm_lstm = (forget_layer_norm_coefficients != nullptr);
 
   // Index the scratch buffers pointers to the global scratch buffer.
   float* input_gate_scratch = nullptr;
@@ -765,6 +887,15 @@ TfLiteStatus EvalFloat(
       (use_peephole) ? cell_to_forget_weights->data.f : nullptr;
   const float* cell_to_output_weights_ptr =
       (use_peephole) ? cell_to_output_weights->data.f : nullptr;
+  const float* input_layer_norm_coefficients_ptr =
+      (is_layer_norm_lstm && !use_cifg) ? input_layer_norm_coefficients->data.f
+                                        : nullptr;
+  const float* forget_layer_norm_coefficients_ptr =
+      is_layer_norm_lstm ? forget_layer_norm_coefficients->data.f : nullptr;
+  const float* cell_layer_norm_coefficients_ptr =
+      is_layer_norm_lstm ? cell_layer_norm_coefficients->data.f : nullptr;
+  const float* output_layer_norm_coefficients_ptr =
+      is_layer_norm_lstm ? output_layer_norm_coefficients->data.f : nullptr;
   const float* projection_weights_ptr =
       (projection_weights == nullptr) ? nullptr : projection_weights->data.f;
   const float* projection_bias_ptr =
@@ -811,6 +942,8 @@ TfLiteStatus EvalFloat(
           recurrent_to_cell_weights->data.f,
           recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
           cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
+          input_layer_norm_coefficients_ptr, forget_layer_norm_coefficients_ptr,
+          cell_layer_norm_coefficients_ptr, output_layer_norm_coefficients_ptr,
           input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
           output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr,
           params, n_batch, n_cell, n_input, aux_input_size, n_output,
@@ -855,7 +988,11 @@ TfLiteStatus EvalFloat(
             recurrent_to_cell_weights->data.f,
             recurrent_to_output_weights->data.f, cell_to_input_weights_ptr,
             cell_to_forget_weights_ptr, cell_to_output_weights_ptr,
-            input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f,
+            input_layer_norm_coefficients_ptr,
+            forget_layer_norm_coefficients_ptr,
+            cell_layer_norm_coefficients_ptr,
+            output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
+            forget_gate_bias->data.f, cell_bias->data.f,
             output_gate_bias->data.f, projection_weights_ptr,
             projection_bias_ptr, params, /*n_batch=*/1, n_cell, n_input,
             aux_input_size, n_output, output_batch_leading_dim,
@@ -879,7 +1016,12 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* recurrent_to_output_weights,
     const TfLiteTensor* cell_to_input_weights,
     const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* cell_to_output_weights,
+    const TfLiteTensor* input_layer_norm_coefficients,
+    const TfLiteTensor* forget_layer_norm_coefficients,
+    const TfLiteTensor* cell_layer_norm_coefficients,
+    const TfLiteTensor* output_layer_norm_coefficients,
+    const TfLiteTensor* aux_input,
     const TfLiteTensor* aux_input_to_input_weights,
     const TfLiteTensor* aux_input_to_forget_weights,
     const TfLiteTensor* aux_input_to_cell_weights,
@@ -914,6 +1056,7 @@ TfLiteStatus EvalHybrid(
   // check the existence of only one to get the condition.
   const bool use_cifg = (input_to_input_weights == nullptr);
   const bool use_peephole = (cell_to_output_weights != nullptr);
+  const bool is_layer_norm_lstm = (forget_layer_norm_coefficients != nullptr);
 
   float* input_gate_scratch = nullptr;
   float* cell_scratch = nullptr;
@@ -966,6 +1109,16 @@ TfLiteStatus EvalHybrid(
     cell_to_output_weights_scale = cell_to_output_weights->params.scale;
   }
 
+  const float* input_layer_norm_coefficients_ptr =
+      (is_layer_norm_lstm && !use_cifg) ? input_layer_norm_coefficients->data.f
+                                        : nullptr;
+  const float* forget_layer_norm_coefficients_ptr =
+      is_layer_norm_lstm ? forget_layer_norm_coefficients->data.f : nullptr;
+  const float* cell_layer_norm_coefficients_ptr =
+      is_layer_norm_lstm ? cell_layer_norm_coefficients->data.f : nullptr;
+  const float* output_layer_norm_coefficients_ptr =
+      is_layer_norm_lstm ? output_layer_norm_coefficients->data.f : nullptr;
+
   const int8_t* projection_weights_ptr =
       (projection_weights == nullptr)
           ? nullptr
@@ -1084,6 +1237,8 @@ TfLiteStatus EvalHybrid(
           cell_to_input_weights_ptr, cell_to_input_weights_scale,
           cell_to_forget_weights_ptr, cell_to_forget_weights_scale,
           cell_to_output_weights_ptr, cell_to_output_weights_scale,
+          input_layer_norm_coefficients_ptr, forget_layer_norm_coefficients_ptr,
+          cell_layer_norm_coefficients_ptr, output_layer_norm_coefficients_ptr,
           input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr,
           output_gate_bias_ptr, projection_weights_ptr,
           projection_weights_scale, projection_bias_ptr, params, n_batch,
@@ -1138,7 +1293,10 @@ TfLiteStatus EvalHybrid(
             recurrent_to_output_weights_scale, cell_to_input_weights_ptr,
             cell_to_input_weights_scale, cell_to_forget_weights_ptr,
             cell_to_forget_weights_scale, cell_to_output_weights_ptr,
-            cell_to_output_weights_scale, input_gate_bias_ptr,
+            cell_to_output_weights_scale, input_layer_norm_coefficients_ptr,
+            forget_layer_norm_coefficients_ptr,
+            cell_layer_norm_coefficients_ptr,
+            output_layer_norm_coefficients_ptr, input_gate_bias_ptr,
             forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr,
             projection_weights_ptr, projection_weights_scale,
             projection_bias_ptr, params,
diff --git a/tensorflow/lite/kernels/lstm_eval.h b/tensorflow/lite/kernels/lstm_eval.h
index c8a4d284f3..33e5bc0781 100644
--- a/tensorflow/lite/kernels/lstm_eval.h
+++ b/tensorflow/lite/kernels/lstm_eval.h
@@ -34,7 +34,12 @@ TfLiteStatus EvalFloat(
     const TfLiteTensor* recurrent_to_output_weights,
     const TfLiteTensor* cell_to_input_weights,
     const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* cell_to_output_weights,
+    const TfLiteTensor* input_layer_norm_coefficients,
+    const TfLiteTensor* forget_layer_norm_coefficients,
+    const TfLiteTensor* cell_layer_norm_coefficients,
+    const TfLiteTensor* output_layer_norm_coefficients,
+    const TfLiteTensor* aux_input,
     const TfLiteTensor* aux_input_to_input_weights,
     const TfLiteTensor* aux_input_to_forget_weights,
     const TfLiteTensor* aux_input_to_cell_weights,
@@ -58,7 +63,12 @@ TfLiteStatus EvalHybrid(
     const TfLiteTensor* recurrent_to_output_weights,
     const TfLiteTensor* cell_to_input_weights,
     const TfLiteTensor* cell_to_forget_weights,
-    const TfLiteTensor* cell_to_output_weights, const TfLiteTensor* aux_input,
+    const TfLiteTensor* cell_to_output_weights,
+    const TfLiteTensor* input_layer_norm_coefficients,
+    const TfLiteTensor* forget_layer_norm_coefficients,
+    const TfLiteTensor* cell_layer_norm_coefficients,
+    const TfLiteTensor* output_layer_norm_coefficients,
+    const TfLiteTensor* aux_input,
     const TfLiteTensor* aux_input_to_input_weights,
     const TfLiteTensor* aux_input_to_forget_weights,
     const TfLiteTensor* aux_input_to_cell_weights,
diff --git a/tensorflow/lite/kernels/lstm_test.cc b/tensorflow/lite/kernels/lstm_test.cc
index 03ad2e899d..fea95aacb1 100644
--- a/tensorflow/lite/kernels/lstm_test.cc
+++ b/tensorflow/lite/kernels/lstm_test.cc
@@ -38,7 +38,8 @@ class LSTMOpModel : public SingleOpModel {
               bool use_peephole, bool use_projection_weights,
               bool use_projection_bias, float cell_clip, float proj_clip,
               const std::vector<std::vector<int>>& input_shapes,
-              const TensorType& weight_type = TensorType_FLOAT32)
+              const TensorType& weight_type = TensorType_FLOAT32,
+              bool is_layer_norm = false)
       : n_batch_(n_batch),
         n_input_(n_input),
         n_cell_(n_cell),
@@ -106,6 +107,18 @@ class LSTMOpModel : public SingleOpModel {
     input_cell_state_ =
         AddInput(TensorData{TensorType_FLOAT32, {n_cell_ * n_batch_}}, true);
 
+    // Layer norm weights.
+    if (is_layer_norm) {
+      if (use_cifg) {
+        input_layer_norm_coefficients_ = AddNullInput();
+      } else {
+        input_layer_norm_coefficients_ = AddInput(TensorType_FLOAT32);
+      }
+      forget_layer_norm_coefficients_ = AddInput(TensorType_FLOAT32);
+      cell_layer_norm_coefficients_ = AddInput(TensorType_FLOAT32);
+      output_layer_norm_coefficients_ = AddInput(TensorType_FLOAT32);
+    }
+
     output_ = AddOutput(TensorType_FLOAT32);
 
     SetBuiltinOp(BuiltinOperator_LSTM, BuiltinOptions_LSTMOptions,
@@ -160,6 +173,22 @@ class LSTMOpModel : public SingleOpModel {
     PopulateTensor(cell_to_output_weights_, f);
   }
 
+  void SetInputLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(input_layer_norm_coefficients_, f);
+  }
+
+  void SetForgetLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(forget_layer_norm_coefficients_, f);
+  }
+
+  void SetCellLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(cell_layer_norm_coefficients_, f);
+  }
+
+  void SetOutputLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(output_layer_norm_coefficients_, f);
+  }
+
   void SetInputGateBias(std::vector<float> f) {
     PopulateTensor(input_gate_bias_, f);
   }
@@ -210,6 +239,11 @@ class LSTMOpModel : public SingleOpModel {
   int cell_to_forget_weights_;
   int cell_to_output_weights_;
 
+  int input_layer_norm_coefficients_;
+  int forget_layer_norm_coefficients_;
+  int cell_layer_norm_coefficients_;
+  int output_layer_norm_coefficients_;
+
   int input_gate_bias_;
   int forget_gate_bias_;
   int cell_bias_;
@@ -1392,6 +1426,644 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLstmTest, HybridLstmBlackBoxTest) {
   VerifyGoldens(lstm_input_, lstm_golden_output_, &lstm, /*tolerance=*/0.00467);
 }
 
+class LayerNormLSTMOpModel : public LSTMOpModel {
+ public:
+  LayerNormLSTMOpModel(int n_batch, int n_input, int n_cell, int n_output,
+                       bool use_cifg, bool use_peephole,
+                       bool use_projection_weights, bool use_projection_bias,
+                       float cell_clip, float proj_clip,
+                       const std::vector<std::vector<int>>& input_shapes,
+                       const TensorType& weight_type = TensorType_FLOAT32)
+      : LSTMOpModel(n_batch, n_input, n_cell, n_output, use_cifg, use_peephole,
+                    use_projection_weights, use_projection_bias, cell_clip,
+                    proj_clip, input_shapes, weight_type,
+                    /*is_layer_norm*/ true) {}
+};
+
+class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel {
+ public:
+  HybridLayerNormLSTMOpModel(int n_batch, int n_input, int n_cell, int n_output,
+                             bool use_cifg, bool use_peephole,
+                             bool use_projection_weights,
+                             bool use_projection_bias, float cell_clip,
+                             float proj_clip,
+                             const std::vector<std::vector<int>>& input_shapes)
+      : LayerNormLSTMOpModel(n_batch, n_input, n_cell, n_output, use_cifg,
+                             use_peephole, use_projection_weights,
+                             use_projection_bias, cell_clip, proj_clip,
+                             input_shapes, TensorType_UINT8) {}
+
+  void SetInputToInputWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(input_to_input_weights_, f);
+  }
+
+  void SetInputToForgetWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(input_to_forget_weights_, f);
+  }
+
+  void SetInputToCellWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(input_to_cell_weights_, f);
+  }
+
+  void SetInputToOutputWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(input_to_output_weights_, f);
+  }
+
+  void SetRecurrentToInputWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f);
+  }
+
+  void SetRecurrentToForgetWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f);
+  }
+
+  void SetRecurrentToCellWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f);
+  }
+
+  void SetRecurrentToOutputWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f);
+  }
+
+  void SetCellToInputWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(cell_to_input_weights_, f);
+  }
+
+  void SetCellToForgetWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f);
+  }
+
+  void SetCellToOutputWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(cell_to_output_weights_, f);
+  }
+
+  void SetInputLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(input_layer_norm_coefficients_, f);
+  }
+
+  void SetForgetLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(forget_layer_norm_coefficients_, f);
+  }
+
+  void SetCellLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(cell_layer_norm_coefficients_, f);
+  }
+
+  void SetOutputLayerNormCoefficients(std::vector<float> f) {
+    PopulateTensor(output_layer_norm_coefficients_, f);
+  }
+
+  void SetProjectionWeights(std::vector<float> f) {
+    SymmetricQuantizeAndPopulate(projection_weights_, f);
+  }
+};
+
+class BaseLayerNormLstmTest : public ::testing::Test {
+ protected:
+  // Weights of the Layer Norm LSTM model. Some are optional.
+  std::vector<float> input_to_input_weights_;
+  std::vector<float> input_to_cell_weights_;
+  std::vector<float> input_to_forget_weights_;
+  std::vector<float> input_to_output_weights_;
+  std::vector<float> input_gate_bias_;
+  std::vector<float> cell_gate_bias_;
+  std::vector<float> forget_gate_bias_;
+  std::vector<float> output_gate_bias_;
+  std::vector<float> recurrent_to_input_weights_;
+  std::vector<float> recurrent_to_cell_weights_;
+  std::vector<float> recurrent_to_forget_weights_;
+  std::vector<float> recurrent_to_output_weights_;
+  std::vector<float> cell_to_input_weights_;
+  std::vector<float> cell_to_forget_weights_;
+  std::vector<float> cell_to_output_weights_;
+  std::vector<float> projection_weights_;
+  std::vector<float> input_layer_norm_coefficients_;
+  std::vector<float> forget_layer_norm_coefficients_;
+  std::vector<float> cell_layer_norm_coefficients_;
+  std::vector<float> output_layer_norm_coefficients_;
+
+  // Layer Norm LSTM input is stored as num_batch x num_inputs vector.
+  std::vector<std::vector<float>> layer_norm_lstm_input_;
+
+  // Compares output up to tolerance to the result of the layer_norm_lstm given
+  // the input.
+  void VerifyGoldens(const std::vector<std::vector<float>>& input,
+                     const std::vector<std::vector<float>>& output,
+                     LayerNormLSTMOpModel* layer_norm_lstm,
+                     float tolerance = 1e-5) {
+    const int num_batches = input.size();
+    EXPECT_GT(num_batches, 0);
+    const int num_inputs = layer_norm_lstm->num_inputs();
+    EXPECT_GT(num_inputs, 0);
+    const int input_sequence_size = input[0].size() / num_inputs;
+    EXPECT_GT(input_sequence_size, 0);
+    for (int i = 0; i < input_sequence_size; ++i) {
+      for (int b = 0; b < num_batches; ++b) {
+        const float* batch_start = input[b].data() + i * num_inputs;
+        const float* batch_end = batch_start + num_inputs;
+
+        layer_norm_lstm->SetInput(b * layer_norm_lstm->num_inputs(),
+                                  batch_start, batch_end);
+      }
+
+      layer_norm_lstm->Invoke();
+
+      const int num_outputs = layer_norm_lstm->num_outputs();
+      std::vector<float> expected;
+      for (int b = 0; b < num_batches; ++b) {
+        const float* golden_start_batch = output[b].data() + i * num_outputs;
+        const float* golden_end_batch = golden_start_batch + num_outputs;
+        expected.insert(expected.end(), golden_start_batch, golden_end_batch);
+      }
+      EXPECT_THAT(layer_norm_lstm->GetOutput(),
+                  ElementsAreArray(ArrayFloatNear(expected, tolerance)));
+    }
+  }
+};
+
+class NoCifgPeepholeProjectionNoClippingLayerNormLstmTest
+    : public BaseLayerNormLstmTest {
+  void SetUp() override {
+    input_to_input_weights_ = {0.5,  0.6,  0.7,  -0.8, -0.9, 0.1,  0.2,
+                               0.3,  -0.4, 0.5,  -0.8, 0.7,  -0.6, 0.5,
+                               -0.4, -0.5, -0.4, -0.3, -0.2, -0.1};
+
+    input_to_forget_weights_ = {-0.6, -0.1, 0.3,  0.2,  0.9,  -0.5, -0.2,
+                                -0.4, 0.3,  -0.8, -0.4, 0.3,  -0.5, -0.4,
+                                -0.6, 0.3,  -0.4, -0.6, -0.5, -0.5};
+
+    input_to_cell_weights_ = {-0.4, -0.3, -0.2, -0.1, -0.5, 0.5,  -0.2,
+                              -0.3, -0.2, -0.6, 0.6,  -0.1, -0.4, -0.3,
+                              -0.7, 0.7,  -0.9, -0.5, 0.8,  0.6};
+
+    input_to_output_weights_ = {-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3,
+                                -0.3, -0.8, -0.2, 0.6,  -0.2, 0.4,  -0.7,
+                                -0.3, -0.5, 0.1,  0.5,  -0.6, -0.4};
+
+    input_gate_bias_ = {0.03, 0.15, 0.22, 0.38};
+
+    forget_gate_bias_ = {0.1, -0.3, -0.2, 0.1};
+
+    cell_gate_bias_ = {-0.05, 0.72, 0.25, 0.08};
+
+    output_gate_bias_ = {0.05, -0.01, 0.2, 0.1};
+
+    recurrent_to_input_weights_ = {-0.2, -0.3, 0.4,  0.1,  -0.5, 0.9,
+                                   -0.2, -0.3, -0.7, 0.05, -0.2, -0.6};
+
+    recurrent_to_cell_weights_ = {-0.3, 0.2, 0.1, -0.3, 0.8,  -0.08,
+                                  -0.2, 0.3, 0.8, -0.6, -0.1, 0.2};
+
+    recurrent_to_forget_weights_ = {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4,
+                                    0.9,  0.3,  -0.1, 0.2,  0.5, 0.2};
+
+    recurrent_to_output_weights_ = {0.3,  -0.1, 0.1,  -0.2, -0.5, -0.7,
+                                    -0.2, -0.6, -0.1, -0.4, -0.7, -0.2};
+
+    cell_to_input_weights_ = {0.05, 0.1, 0.25, 0.15};
+
+    cell_to_forget_weights_ = {-0.02, -0.15, -0.25, -0.03};
+
+    cell_to_output_weights_ = {0.1, -0.1, -0.5, 0.05};
+
+    input_layer_norm_coefficients_ = {0.1, 0.2, 0.3, 0.5};
+    forget_layer_norm_coefficients_ = {0.2, 0.2, 0.4, 0.3};
+    cell_layer_norm_coefficients_ = {0.7, 0.2, 0.3, 0.8};
+    output_layer_norm_coefficients_ = {0.6, 0.2, 0.2, 0.5};
+
+    projection_weights_ = {-0.1, 0.2,  0.01, -0.2, 0.1,  0.5,
+                           0.3,  0.08, 0.07, 0.2,  -0.4, 0.2};
+
+    layer_norm_lstm_input_ = {
+        {// Batch0: 3 (input_sequence_size) * 5 (n_input)
+         0.7, 0.8, 0.1, 0.2, 0.3,   // seq 0
+         0.8, 0.1, 0.2, 0.4, 0.5,   // seq 1
+         0.2, 0.7, 0.7, 0.1, 0.7},  // seq 2
+
+        {// Batch1: 3 (input_sequence_size) * 5 (n_input)
+         0.3, 0.2, 0.9, 0.8, 0.1,   // seq 0
+         0.1, 0.5, 0.2, 0.4, 0.2,   // seq 1
+         0.6, 0.9, 0.2, 0.5, 0.7},  // seq 2
+    };
+  }
+};
+
+TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest,
+       LayerNormLstmBlackBoxTest) {
+  const int n_batch = 2;
+  const int n_input = 5;
+  const int n_cell = 4;
+  const int n_output = 3;
+  const float ceil_clip = 0.0;
+  const float proj_clip = 0.0;
+
+  LayerNormLSTMOpModel layer_norm_lstm(
+      n_batch, n_input, n_cell, n_output,
+      /*use_cifg=*/false, /*use_peephole=*/true,
+      /*use_projection_weights=*/true,
+      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      {
+          {n_batch, n_input},  // input tensor
+
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {n_cell},  // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_cell},  // input_layer_norm_coefficient tensor
+          {n_cell},  // forget_layer_norm_coefficient tensor
+          {n_cell},  // cell_layer_norm_coefficient tensor
+          {n_cell},  // output_layer_norm_coefficient tensor
+      });
+
+  layer_norm_lstm.SetInputToInputWeights(input_to_input_weights_);
+  layer_norm_lstm.SetInputToCellWeights(input_to_cell_weights_);
+  layer_norm_lstm.SetInputToForgetWeights(input_to_forget_weights_);
+  layer_norm_lstm.SetInputToOutputWeights(input_to_output_weights_);
+
+  layer_norm_lstm.SetInputGateBias(input_gate_bias_);
+  layer_norm_lstm.SetCellBias(cell_gate_bias_);
+  layer_norm_lstm.SetForgetGateBias(forget_gate_bias_);
+  layer_norm_lstm.SetOutputGateBias(output_gate_bias_);
+
+  layer_norm_lstm.SetRecurrentToInputWeights(recurrent_to_input_weights_);
+  layer_norm_lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_);
+  layer_norm_lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_);
+  layer_norm_lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_);
+
+  layer_norm_lstm.SetCellToInputWeights(cell_to_input_weights_);
+  layer_norm_lstm.SetCellToForgetWeights(cell_to_forget_weights_);
+  layer_norm_lstm.SetCellToOutputWeights(cell_to_output_weights_);
+
+  layer_norm_lstm.SetInputLayerNormCoefficients(input_layer_norm_coefficients_);
+  layer_norm_lstm.SetForgetLayerNormCoefficients(
+      forget_layer_norm_coefficients_);
+  layer_norm_lstm.SetCellLayerNormCoefficients(cell_layer_norm_coefficients_);
+  layer_norm_lstm.SetOutputLayerNormCoefficients(
+      output_layer_norm_coefficients_);
+
+  layer_norm_lstm.SetProjectionWeights(projection_weights_);
+
+  // Verify the final output.
+  const std::vector<std::vector<float>> layer_norm_lstm_golden_output = {
+      {
+          // Batch0: 3 (input_sequence_size) * 3 (n_output)
+          0.0244077, 0.128027, -0.00170918,  // seq 0
+          0.0137642, 0.140751, 0.0395835,    // seq 1
+          -0.00459231, 0.155278, 0.0837377,  // seq 2
+      },
+      {
+          // Batch1: 3 (input_sequence_size) * 3 (n_output)
+          -0.00692428, 0.0848741, 0.063445,  // seq 0
+          -0.00403912, 0.139963, 0.072681,   // seq 1
+          0.00752706, 0.161903, 0.0561371,   // seq 2
+      }};
+
+  VerifyGoldens(layer_norm_lstm_input_, layer_norm_lstm_golden_output,
+                &layer_norm_lstm);
+}
+
+TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest,
+       HybridLayerNormLstmBlackBoxTest) {
+  const int n_batch = 2;
+  const int n_input = 5;
+  const int n_cell = 4;
+  const int n_output = 3;
+  const float ceil_clip = 0.0;
+  const float proj_clip = 0.0;
+
+  HybridLayerNormLSTMOpModel layer_norm_lstm(
+      n_batch, n_input, n_cell, n_output,
+      /*use_cifg=*/false, /*use_peephole=*/true,
+      /*use_projection_weights=*/true,
+      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      {
+          {n_batch, n_input},  // input tensor
+
+          {n_cell, n_input},  // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {n_cell, n_output},  // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {n_cell},  // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {n_cell},  // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {n_cell},  // input_layer_norm_coefficient tensor
+          {n_cell},  // forget_layer_norm_coefficient tensor
+          {n_cell},  // cell_layer_norm_coefficient tensor
+          {n_cell},  // output_layer_norm_coefficient tensor
+      });
+
+  layer_norm_lstm.SetInputToInputWeights(input_to_input_weights_);
+  layer_norm_lstm.SetInputToCellWeights(input_to_cell_weights_);
+  layer_norm_lstm.SetInputToForgetWeights(input_to_forget_weights_);
+  layer_norm_lstm.SetInputToOutputWeights(input_to_output_weights_);
+
+  layer_norm_lstm.SetInputGateBias(input_gate_bias_);
+  layer_norm_lstm.SetCellBias(cell_gate_bias_);
+  layer_norm_lstm.SetForgetGateBias(forget_gate_bias_);
+  layer_norm_lstm.SetOutputGateBias(output_gate_bias_);
+
+  layer_norm_lstm.SetRecurrentToInputWeights(recurrent_to_input_weights_);
+  layer_norm_lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_);
+  layer_norm_lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_);
+  layer_norm_lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_);
+
+  layer_norm_lstm.SetCellToInputWeights(cell_to_input_weights_);
+  layer_norm_lstm.SetCellToForgetWeights(cell_to_forget_weights_);
+  layer_norm_lstm.SetCellToOutputWeights(cell_to_output_weights_);
+
+  layer_norm_lstm.SetInputLayerNormCoefficients(input_layer_norm_coefficients_);
+  layer_norm_lstm.SetForgetLayerNormCoefficients(
+      forget_layer_norm_coefficients_);
+  layer_norm_lstm.SetCellLayerNormCoefficients(cell_layer_norm_coefficients_);
+  layer_norm_lstm.SetOutputLayerNormCoefficients(
+      output_layer_norm_coefficients_);
+
+  layer_norm_lstm.SetProjectionWeights(projection_weights_);
+
+  const std::vector<std::vector<float>> layer_norm_lstm_golden_output = {
+      {
+          // Batch0: 3 (input_sequence_size) * 3 (n_output)
+          0.0244576, 0.127847, -0.00181765,  // seq 0
+          0.0137518, 0.140892, 0.0402234,    // seq 1
+          -0.0048839, 0.155096, 0.0840309,   // seq 2
+      },
+      {
+          // Batch1: 3 (input_sequence_size) * 3 (n_output)
+          -0.00728636, 0.0843957, 0.0634786,  // seq 0
+          -0.00448382, 0.139278, 0.0737372,   // seq 1
+          0.00734616, 0.161793, 0.0560238,    // seq 2
+      }};
+
+  VerifyGoldens(layer_norm_lstm_input_, layer_norm_lstm_golden_output,
+                &layer_norm_lstm);
+}
+
+class CifgPeepholeProjectionNoClippingLayerNormLstmTest
+    : public BaseLayerNormLstmTest {
+  void SetUp() override {
+    input_to_forget_weights_ = {-0.6, -0.1, 0.3,  0.2,  0.9,  -0.5, -0.2,
+                                -0.4, 0.3,  -0.8, -0.4, 0.3,  -0.5, -0.4,
+                                -0.6, 0.3,  -0.4, -0.6, -0.5, -0.5};
+    input_to_cell_weights_ = {-0.4, -0.3, -0.2, -0.1, -0.5, 0.5,  -0.2,
+                              -0.3, -0.2, -0.6, 0.6,  -0.1, -0.4, -0.3,
+                              -0.7, 0.7,  -0.9, -0.5, 0.8,  0.6};
+    input_to_output_weights_ = {-0.8, -0.4, -0.2, -0.9, -0.1, -0.7, 0.3,
+                                -0.3, -0.8, -0.2, 0.6,  -0.2, 0.4,  -0.7,
+                                -0.3, -0.5, 0.1,  0.5,  -0.6, -0.4};
+
+    forget_gate_bias_ = {0.1, -0.3, -0.2, 0.1};
+    cell_gate_bias_ = {-0.05, 0.72, 0.25, 0.08};
+    output_gate_bias_ = {0.05, -0.01, 0.2, 0.1};
+
+    recurrent_to_cell_weights_ = {-0.3, 0.2, 0.1, -0.3, 0.8,  -0.08,
+                                  -0.2, 0.3, 0.8, -0.6, -0.1, 0.2};
+    recurrent_to_forget_weights_ = {-0.5, -0.3, -0.5, -0.2, 0.6, 0.4,
+                                    0.9,  0.3,  -0.1, 0.2,  0.5, 0.2};
+    recurrent_to_output_weights_ = {0.3,  -0.1, 0.1,  -0.2, -0.5, -0.7,
+                                    -0.2, -0.6, -0.1, -0.4, -0.7, -0.2};
+
+    cell_to_forget_weights_ = {-0.02, -0.15, -0.25, -0.03};
+    cell_to_output_weights_ = {0.1, -0.1, -0.5, 0.05};
+
+    forget_layer_norm_coefficients_ = {0.2, 0.2, 0.4, 0.3};
+    cell_layer_norm_coefficients_ = {0.7, 0.2, 0.3, 0.8};
+    output_layer_norm_coefficients_ = {0.6, 0.2, 0.2, 0.5};
+    projection_weights_ = {-0.1, 0.2,  0.01, -0.2, 0.1,  0.5,
+                           0.3,  0.08, 0.07, 0.2,  -0.4, 0.2};
+
+    layer_norm_lstm_input_ = {
+        {// Batch0: 3 (input_sequence_size) * 5 (n_input)
+         0.7, 0.8, 0.1, 0.2, 0.3,   // seq 0
+         0.8, 0.1, 0.2, 0.4, 0.5,   // seq 1
+         0.2, 0.7, 0.7, 0.1, 0.7},  // seq 2
+
+        {// Batch1: 3 (input_sequence_size) * 5 (n_input)
+         0.3, 0.2, 0.9, 0.8, 0.1,   // seq 0
+         0.1, 0.5, 0.2, 0.4, 0.2,   // seq 1
+         0.6, 0.9, 0.2, 0.5, 0.7},  // seq 2
+    };
+  }
+};
+
+TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest,
+       LayerNormLstmBlackBoxTest) {
+  const int n_batch = 2;
+  const int n_input = 5;
+  const int n_cell = 4;
+  const int n_output = 3;
+  const float ceil_clip = 0.0;
+  const float proj_clip = 0.0;
+
+  LayerNormLSTMOpModel layer_norm_lstm(
+      n_batch, n_input, n_cell, n_output,
+      /*use_cifg=*/true, /*use_peephole=*/true,
+      /*use_projection_weights=*/true,
+      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      {
+          {n_batch, n_input},  // input tensor
+
+          {0, 0},             // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {0, 0},              // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},       // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {0},       // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {0},       // input_layer_norm_coefficient tensor
+          {n_cell},  // forget_layer_norm_coefficient tensor
+          {n_cell},  // cell_layer_norm_coefficient tensor
+          {n_cell},  // output_layer_norm_coefficient tensor
+      });
+
+  layer_norm_lstm.SetInputToCellWeights(input_to_cell_weights_);
+  layer_norm_lstm.SetInputToForgetWeights(input_to_forget_weights_);
+  layer_norm_lstm.SetInputToOutputWeights(input_to_output_weights_);
+
+  layer_norm_lstm.SetCellBias(cell_gate_bias_);
+  layer_norm_lstm.SetForgetGateBias(forget_gate_bias_);
+  layer_norm_lstm.SetOutputGateBias(output_gate_bias_);
+
+  layer_norm_lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_);
+  layer_norm_lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_);
+  layer_norm_lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_);
+
+  layer_norm_lstm.SetCellToForgetWeights(cell_to_forget_weights_);
+  layer_norm_lstm.SetCellToOutputWeights(cell_to_output_weights_);
+
+  layer_norm_lstm.SetForgetLayerNormCoefficients(
+      forget_layer_norm_coefficients_);
+  layer_norm_lstm.SetCellLayerNormCoefficients(cell_layer_norm_coefficients_);
+  layer_norm_lstm.SetOutputLayerNormCoefficients(
+      output_layer_norm_coefficients_);
+
+  layer_norm_lstm.SetProjectionWeights(projection_weights_);
+
+  // Verify the final output.
+  const std::vector<std::vector<float>> layer_norm_lstm_golden_output = {
+      {
+          // Batch0: 3 (input_sequence_size) * 3 (n_output)
+          0.02129706, 0.140816242, 0.0112733059,     // seq 0
+          0.0132302344, 0.152308047, 0.0346313119,   // seq 1
+          -0.0123688057, 0.165790111, 0.0893077999,  // seq 2
+      },
+      {
+          // Batch1: 3 (input_sequence_size) * 3 (n_output)
+          -0.0226350538, 0.0916948169, 0.0769175813,  // seq 0
+          -0.0269966982, 0.149707705, 0.094149217,    // seq 1
+          -0.0103429332, 0.173016444, 0.0720508844,   // seq 2
+      }};
+
+  VerifyGoldens(layer_norm_lstm_input_, layer_norm_lstm_golden_output,
+                &layer_norm_lstm);
+}
+
+TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest,
+       HybridLayerNormLstmBlackBoxTest) {
+  const int n_batch = 2;
+  const int n_input = 5;
+  const int n_cell = 4;
+  const int n_output = 3;
+  const float ceil_clip = 0.0;
+  const float proj_clip = 0.0;
+
+  HybridLayerNormLSTMOpModel layer_norm_lstm(
+      n_batch, n_input, n_cell, n_output,
+      /*use_cifg=*/true, /*use_peephole=*/true,
+      /*use_projection_weights=*/true,
+      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      {
+          {n_batch, n_input},  // input tensor
+
+          {0, 0},             // input_to_input_weight tensor
+          {n_cell, n_input},  // input_to_forget_weight tensor
+          {n_cell, n_input},  // input_to_cell_weight tensor
+          {n_cell, n_input},  // input_to_output_weight tensor
+
+          {0, 0},              // recurrent_to_input_weight tensor
+          {n_cell, n_output},  // recurrent_to_forget_weight tensor
+          {n_cell, n_output},  // recurrent_to_cell_weight tensor
+          {n_cell, n_output},  // recurrent_to_output_weight tensor
+
+          {0},       // cell_to_input_weight tensor
+          {n_cell},  // cell_to_forget_weight tensor
+          {n_cell},  // cell_to_output_weight tensor
+
+          {0},       // input_gate_bias tensor
+          {n_cell},  // forget_gate_bias tensor
+          {n_cell},  // cell_bias tensor
+          {n_cell},  // output_gate_bias tensor
+
+          {n_output, n_cell},  // projection_weight tensor
+          {0},                 // projection_bias tensor
+
+          {n_batch, n_output},  // activation_state tensor
+          {n_batch, n_cell},    // cell_state tensor
+
+          {0},       // input_layer_norm_coefficient tensor
+          {n_cell},  // forget_layer_norm_coefficient tensor
+          {n_cell},  // cell_layer_norm_coefficient tensor
+          {n_cell},  // output_layer_norm_coefficient tensor
+      });
+
+  layer_norm_lstm.SetInputToCellWeights(input_to_cell_weights_);
+  layer_norm_lstm.SetInputToForgetWeights(input_to_forget_weights_);
+  layer_norm_lstm.SetInputToOutputWeights(input_to_output_weights_);
+
+  layer_norm_lstm.SetCellBias(cell_gate_bias_);
+  layer_norm_lstm.SetForgetGateBias(forget_gate_bias_);
+  layer_norm_lstm.SetOutputGateBias(output_gate_bias_);
+
+  layer_norm_lstm.SetRecurrentToCellWeights(recurrent_to_cell_weights_);
+  layer_norm_lstm.SetRecurrentToForgetWeights(recurrent_to_forget_weights_);
+  layer_norm_lstm.SetRecurrentToOutputWeights(recurrent_to_output_weights_);
+
+  layer_norm_lstm.SetCellToForgetWeights(cell_to_forget_weights_);
+  layer_norm_lstm.SetCellToOutputWeights(cell_to_output_weights_);
+
+  layer_norm_lstm.SetForgetLayerNormCoefficients(
+      forget_layer_norm_coefficients_);
+  layer_norm_lstm.SetCellLayerNormCoefficients(cell_layer_norm_coefficients_);
+  layer_norm_lstm.SetOutputLayerNormCoefficients(
+      output_layer_norm_coefficients_);
+
+  layer_norm_lstm.SetProjectionWeights(projection_weights_);
+
+  // Verify the final output.
+  const std::vector<std::vector<float>> layer_norm_lstm_golden_output = {
+      {
+          // Batch0: 3 (input_sequence_size) * 3 (n_output)
+          0.0212250091, 0.140474007, 0.0115012666,   // seq 0
+          0.0130806509, 0.152660668, 0.0347516984,   // seq 1
+          -0.0124010444, 0.166042402, 0.0898982584,  // seq 2
+      },
+      {
+          // Batch1: 3 (input_sequence_size) * 3 (n_output)
+          -0.0228835996, 0.0917588323, 0.0778886303,  // seq 0
+          -0.0275101066, 0.148769245, 0.0938384682,   // seq 1
+          -0.0103605557, 0.172605693, 0.0728750974,   // seq 2
+      }};
+
+  VerifyGoldens(layer_norm_lstm_input_, layer_norm_lstm_golden_output,
+                &layer_norm_lstm);
+}
+
 }  // namespace
 }  // namespace tflite
 
diff --git a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
index 08e56b0ebd..7d41491ba3 100644
--- a/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
+++ b/tensorflow/lite/kernels/unidirectional_sequence_lstm.cc
@@ -502,6 +502,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          /*input_layer_norm_coefficients=*/nullptr,
+          /*forget_layer_norm_coefficients=*/nullptr,
+          /*cell_layer_norm_coefficients=*/nullptr,
+          /*output_layer_norm_coefficients=*/nullptr,
           /*aux_input=*/nullptr,
           /*aux_input_to_input_weights=*/nullptr,
           /*aux_input_to_forget_weights=*/nullptr,
@@ -529,6 +533,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
           recurrent_to_input_weights, recurrent_to_forget_weights,
           recurrent_to_cell_weights, recurrent_to_output_weights,
           cell_to_input_weights, cell_to_forget_weights, cell_to_output_weights,
+          /*input_layer_norm_coefficients=*/nullptr,
+          /*forget_layer_norm_coefficients=*/nullptr,
+          /*cell_layer_norm_coefficients=*/nullptr,
+          /*output_layer_norm_coefficients=*/nullptr,
           /*aux_input=*/nullptr,
           /*aux_input_to_input_weights=*/nullptr,
           /*aux_input_to_forget_weights=*/nullptr,
-- 
GitLab


From c5328715d477404d1c9dfae4ad6a77f768a68298 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 19 Dec 2018 12:00:16 -0800
Subject: [PATCH 833/873] Register resource GPU kernels for NextIteration and
 Exit.

Prior to this change, accessing a GPU resource variable from a while
loop would fail, because the resource NextIteration and Exit nodes
would cause the placer to put the whole loop body on CPU, including
variable reads, etc. which must be colocated with the resource.

PiperOrigin-RevId: 226210621
---
 tensorflow/core/kernels/control_flow_ops.cc   |  2 ++
 .../python/keras/layers/unified_gru_test.py   | 14 +-------------
 .../python/keras/layers/unified_lstm_test.py  | 12 ++----------
 .../kernel_tests/control_flow_ops_py_test.py  | 19 +++++++++++++++++++
 4 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc
index 36def4a530..4bd90d36b8 100644
--- a/tensorflow/core/kernels/control_flow_ops.cc
+++ b/tensorflow/core/kernels/control_flow_ops.cc
@@ -514,6 +514,7 @@ REGISTER_SYCL_HOST_KERNEL(string);
 
 REGISTER_GPU_HOST_KERNEL(int32);
 REGISTER_GPU_HOST_KERNEL(string);
+REGISTER_GPU_HOST_KERNEL(ResourceHandle);
 
 #undef REGISTER_GPU_HOST_KERNEL
 
@@ -562,6 +563,7 @@ REGISTER_GPU_KERNEL(bool);
 
 REGISTER_GPU_HOST_KERNEL(int32);
 REGISTER_GPU_HOST_KERNEL(string);
+REGISTER_GPU_HOST_KERNEL(ResourceHandle);
 
 #undef REGISTER_GPU_HOST_KERNEL
 
diff --git a/tensorflow/python/keras/layers/unified_gru_test.py b/tensorflow/python/keras/layers/unified_gru_test.py
index 87587228b6..6e77acc20a 100644
--- a/tensorflow/python/keras/layers/unified_gru_test.py
+++ b/tensorflow/python/keras/layers/unified_gru_test.py
@@ -166,11 +166,8 @@ class GRULayerGradientTapeTest(test.TestCase):
     tape.gradient(loss, gru.variables)
 
 
-# TODO(scottzhu): Re-enable those tests in v2 mode once bugs attached are fixed.
-@test_util.run_v1_only
 class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
 
-  # b/120911602
   def test_unified_gru_feature_parity_with_canonical_gru(self):
     with context.eager_mode():
       # Run this test under eager only due to b/120160788 for model.set_weights.
@@ -213,7 +210,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
       self.assertAllClose(y_1, y_3)
       self.assertAllClose(y_2, y_4)
 
-  # b/120911602
   @parameterized.named_parameters(
       # test_name, use_bias, bias_initializer, activation
       ('normal', True, 'zeros'),
@@ -254,7 +250,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
     self.assertAllClose(y, y_ref)
     self.assertAllClose(layer.get_weights(), new_layer.get_weights())
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_unified_gru_output_on_multiple_kernel(self):
     input_shape = 10
@@ -295,7 +290,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
     self.assertAllClose(y_1, y_2)
     self.assertAllClose(y_2, y_3)
 
-  # b/120911602
   @parameterized.named_parameters(
       # test_name, time_major, go_backwards
       ('normal', False, False),
@@ -341,7 +335,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllClose(y, y_ref)
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_with_masking_layer_GRU(self):
     layer_class = keras.layers.UnifiedGRU
@@ -355,7 +348,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
                   optimizer=gradient_descent.GradientDescentOptimizer(0.001))
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_masking_with_stacking_GRU(self):
     inputs = np.random.random((2, 3, 4))
@@ -370,7 +362,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
         optimizer=gradient_descent.GradientDescentOptimizer(0.01))
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_return_sequences_GRU(self):
     num_samples = 2
@@ -383,7 +374,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
                 'return_sequences': True},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_dropout_GRU(self):
     num_samples = 2
@@ -397,7 +387,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
                 'recurrent_dropout': 0.1},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_constraints_GRU(self):
     embedding_dim = 4
@@ -418,7 +407,6 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(layer.cell.recurrent_kernel.constraint, r_constraint)
     self.assertEqual(layer.cell.bias.constraint, b_constraint)
 
-  # b/120911602
   @parameterized.parameters([0, 1, 2])
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_implementation_mode_GRU(self, implementation_mode):
@@ -432,7 +420,7 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
                 'implementation': implementation_mode},
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  # b/120911602
+  @test_util.run_v1_only("b/120941292")
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_statefulness_GRU(self):
     num_samples = 2
diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py
index c51304666d..15086a53a8 100644
--- a/tensorflow/python/keras/layers/unified_lstm_test.py
+++ b/tensorflow/python/keras/layers/unified_lstm_test.py
@@ -425,11 +425,8 @@ class LSTMLayerGraphOnlyTest(test.TestCase):
     self.assertEqual(len(layer.get_losses_for(x)), 1)
 
 
-# TODO(scottzhu): Re-enable those tests in v2 mode once bugs attached are fixed.
-@test_util.run_v1_only
 class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_dropout_LSTM(self):
     num_samples = 2
@@ -445,7 +442,6 @@ class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
         },
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  # b/120911602
   def test_unified_lstm_feature_parity_with_canonical_lstm(self):
     with context.eager_mode():
       # Run this test under eager only due to b/120160788 for model.set_weights.
@@ -485,7 +481,6 @@ class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
       self.assertAllClose(y_1, y_3)
       self.assertAllClose(y_2, y_4)
 
-  # b/120911602
   @parameterized.named_parameters(('v0', 0), ('v1', 1), ('v2', 2))
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_implementation_mode_LSTM(self, implementation_mode):
@@ -530,7 +525,6 @@ class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
         optimizer=gradient_descent.GradientDescentOptimizer(0.01))
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_masking_with_stacking_LSTM(self):
     inputs = np.random.random((2, 3, 4))
@@ -545,7 +539,6 @@ class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
         optimizer=gradient_descent.GradientDescentOptimizer(0.01))
     model.fit(inputs, targets, epochs=1, batch_size=2, verbose=1)
 
-  # b/120911602
   @parameterized.named_parameters(
       # test_name, time_major, go_backwards
       ('normal', False, False),
@@ -616,7 +609,6 @@ class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
     model.evaluate(x_train, y_train)
     model.predict(x_train)
 
-  # b/120911602
   @parameterized.named_parameters(
       # test_name, use_bias, bias_initializer, activation
       ('normal', True, 'zeros'),
@@ -657,7 +649,6 @@ class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
     self.assertAllClose(y, y_ref)
     self.assertAllClose(layer.get_weights(), new_layer.get_weights())
 
-  # b/120911602
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_unified_lstm_output_on_multiple_kernel(self):
     input_shape = 10
@@ -711,7 +702,8 @@ class LSTMLayerV1OnlyTest(test.TestCase, parameterized.TestCase):
         },
         input_shape=(num_samples, timesteps, embedding_dim))
 
-  # b/120911602
+
+  @test_util.run_v1_only("b/120941292")
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_statefulness_LSTM(self):
     num_samples = 2
diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
index 256b9c3166..457766c622 100644
--- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
+++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py
@@ -32,6 +32,7 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import device_lib
 from tensorflow.python.client import session
 from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
 from tensorflow.python.eager import function as eager_function
 from tensorflow.python.eager import wrap_function
 from tensorflow.python.framework import constant_op
@@ -2398,6 +2399,24 @@ class ControlFlowTest(test.TestCase):
       self.assertEqual(i_val, 3)
       self.assertAllClose(x_val, 1.0)
 
+  @test_util.run_gpu_only
+  def testGpuResourceAccess(self):
+    with ops.device(test.gpu_device_name()):
+      var = resource_variable_ops.ResourceVariable(constant_op.constant(3.0))
+
+    @def_function.function
+    def foo():
+      return control_flow_ops.while_loop(
+          lambda i, _: i < 3,
+          lambda i, x: (i + 1, control_flow_ops.cond(
+              constant_op.constant(True),
+              lambda: x + var,
+              lambda: x)),
+          [0, 0.0])[1]
+
+    self.evaluate(variables.global_variables_initializer())
+    self.assertEqual(self.evaluate(foo()), 9.0)
+
   def testNestedResourceAccess(self):
     var = resource_variable_ops.ResourceVariable(constant_op.constant(3.0))
 
-- 
GitLab


From a4b375265195333e08454bac29a4179d58ab7ee0 Mon Sep 17 00:00:00 2001
From: Shivani Agrawal <shivaniagrawal@google.com>
Date: Wed, 19 Dec 2018 12:02:54 -0800
Subject: [PATCH 834/873] Fixing some left eager coverage.

PiperOrigin-RevId: 226211207
---
 .../kernel_tests/map_and_batch_test.py        | 24 +++++++------------
 .../data/kernel_tests/list_files_test.py      | 10 ++++----
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
index c90c5ed306..775dc61e48 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_and_batch_test.py
@@ -169,13 +169,10 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
                        [[64], [81]]]
     self.assertDatasetProduces(dataset, expected_output=expected_output)
 
-# TODO(b/117581999): eager expected not same as actual, debug.
-
   @parameterized.named_parameters(
       ("Normal", False),
       ("NUMA", True),
   )
-  @test_util.run_deprecated_v1
   def testMapAndBatchParallelGetNext(self, numa_aware):
     dataset = dataset_ops.Dataset.range(50000).apply(
         batching.map_and_batch(lambda x: x, batch_size=100))
@@ -427,27 +424,24 @@ class MapAndBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
       self.assertAllEqual([element for _ in range(10)],
                           self.evaluate(get_next()))
 
-  # TODO(b/117581999): add eager coverage.
   @parameterized.named_parameters(
       ("Identity", None, lambda x: x, None),
       ("Replicate", None, lambda x: (x, x), None),
       ("Swap", (None, None), lambda x, y: (y, x), None),
       ("Project", (None, None), lambda x, y: x, None),
   )
-  @test_util.run_deprecated_v1
-  def testSkipEagerShortCircuit(self, structure, map_fn, num_parallel_calls):
+  def testShortCircuit(self, structure, map_fn, num_parallel_calls):
     dataset = self.structuredDataset(structure).repeat().apply(
         batching.map_and_batch(map_fn, batch_size=10))
-    get_next = dataset_ops.make_one_shot_iterator(dataset).get_next()
+    get_next = self.getNext(dataset)
 
-    with self.cached_session() as sess:
-      if isinstance(structure, tuple):
-        expected = map_fn(
-            *sess.run(self.structuredElement(structure, shape=[10])))
-      else:
-        expected = map_fn(
-            sess.run(self.structuredElement(structure, shape=[10])))
-      self.assertAllEqual(expected, self.evaluate(get_next))
+    if isinstance(structure, tuple):
+      expected = map_fn(
+          *self.evaluate(self.structuredElement(structure, shape=[10])))
+    else:
+      expected = map_fn(
+          self.evaluate(self.structuredElement(structure, shape=[10])))
+    self.assertAllEqual(expected, self.evaluate(get_next()))
 
   def testShortCircuitCapturedInput(self):
     captured_t = variables.Variable(42)
diff --git a/tensorflow/python/data/kernel_tests/list_files_test.py b/tensorflow/python/data/kernel_tests/list_files_test.py
index a70c4b081d..03cec7efa5 100644
--- a/tensorflow/python/data/kernel_tests/list_files_test.py
+++ b/tensorflow/python/data/kernel_tests/list_files_test.py
@@ -106,11 +106,13 @@ class ListFilesTest(test_base.DatasetTestBase):
     self.assertEqual(all_actual_filenames[0], all_actual_filenames[1])
     self.assertEqual(all_actual_filenames[0], all_actual_filenames[2])
 
-  # TODO(b/117581999): eager mode assertion fail wrapped, debug.
-  def tesSkipEagerEmptyDirectoryInitializer(self):
-    dataset = dataset_ops.Dataset.list_files(path.join(self.tmp_dir, '*'))
+  def tesEmptyDirectoryInitializer(self):
+
+    def dataset_fn():
+      return dataset_ops.Dataset.list_files(path.join(self.tmp_dir, '*'))
+
     self.assertDatasetProduces(
-        dataset,
+        dataset_fn(),
         expected_error=(errors.InvalidArgumentError,
                         'No files matched pattern'),
         requires_initialization=True)
-- 
GitLab


From 75daa59871293b2dc5dc5d56d9b0d2cd99ca94b5 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 12:04:43 -0800
Subject: [PATCH 835/873] CPU implementation anti-aliasing scale and translate
 op which can be used to create anti-aliasing resize op.

PiperOrigin-RevId: 226211628
---
 tensorflow/cc/gradients/image_grad.cc         |  17 +
 tensorflow/cc/gradients/image_grad_test.cc    |  41 ++
 .../base_api/api_def_ScaleAndTranslate.pbtxt  |   3 +
 .../api_def_ScaleAndTranslateGrad.pbtxt       |   4 +
 .../api_def_ScaleAndTranslate.pbtxt           |   7 +
 tensorflow/core/kernels/BUILD                 |  29 +
 tensorflow/core/kernels/sampling_kernels.cc   |  38 ++
 tensorflow/core/kernels/sampling_kernels.h    | 190 ++++++
 .../core/kernels/sampling_kernels_test.cc     |  76 +++
 .../core/kernels/scale_and_translate_op.cc    | 610 ++++++++++++++++++
 .../core/kernels/scale_and_translate_op.h     |  75 +++
 .../kernels/scale_and_translate_op_test.cc    | 377 +++++++++++
 tensorflow/core/ops/image_ops.cc              |  27 +
 13 files changed, 1494 insertions(+)
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ScaleAndTranslate.pbtxt
 create mode 100644 tensorflow/core/api_def/base_api/api_def_ScaleAndTranslateGrad.pbtxt
 create mode 100644 tensorflow/core/api_def/python_api/api_def_ScaleAndTranslate.pbtxt
 create mode 100644 tensorflow/core/kernels/sampling_kernels.cc
 create mode 100644 tensorflow/core/kernels/sampling_kernels.h
 create mode 100644 tensorflow/core/kernels/sampling_kernels_test.cc
 create mode 100644 tensorflow/core/kernels/scale_and_translate_op.cc
 create mode 100644 tensorflow/core/kernels/scale_and_translate_op.h
 create mode 100644 tensorflow/core/kernels/scale_and_translate_op_test.cc

diff --git a/tensorflow/cc/gradients/image_grad.cc b/tensorflow/cc/gradients/image_grad.cc
index 882709e1e2..05c287bdc6 100644
--- a/tensorflow/cc/gradients/image_grad.cc
+++ b/tensorflow/cc/gradients/image_grad.cc
@@ -69,6 +69,23 @@ Status ResizeBicubicGradHelper(const Scope& scope, const Operation& op,
 }
 REGISTER_GRADIENT_OP("ResizeBicubic", ResizeBicubicGradHelper);
 
+Status ScaleAndTranslateGradHelper(const Scope& scope, const Operation& op,
+                                   const std::vector<Output>& grad_inputs,
+                                   std::vector<Output>* grad_outputs) {
+  string kernel_type;
+  TF_RETURN_IF_ERROR(
+      GetNodeAttr(op.node()->attrs(), "kernel_type", &kernel_type));
+  grad_outputs->push_back(internal::ScaleAndTranslateGrad(
+      scope, grad_inputs[0], op.input(0), op.input(2), op.input(3),
+      internal::ScaleAndTranslateGrad::KernelType(kernel_type)));
+
+  grad_outputs->push_back(NoGradient());
+  grad_outputs->push_back(NoGradient());
+  grad_outputs->push_back(NoGradient());
+  return scope.status();
+}
+REGISTER_GRADIENT_OP("ScaleAndTranslate", ScaleAndTranslateGradHelper);
+
 }  // anonymous namespace
 }  // namespace ops
 }  // namespace tensorflow
diff --git a/tensorflow/cc/gradients/image_grad_test.cc b/tensorflow/cc/gradients/image_grad_test.cc
index 2e55c7561b..1d15022653 100644
--- a/tensorflow/cc/gradients/image_grad_test.cc
+++ b/tensorflow/cc/gradients/image_grad_test.cc
@@ -30,6 +30,7 @@ using ops::Const;
 using ops::ResizeBicubic;
 using ops::ResizeBilinear;
 using ops::ResizeNearestNeighbor;
+using ops::ScaleAndTranslate;
 
 class ImageGradTest : public ::testing::Test {
  protected:
@@ -153,5 +154,45 @@ TEST_F(ImageGradTest, TestBicubic) {
   TestResize<double, float, double>(RESIZE_BICUBIC);
 }
 
+class ScaleAndTranslateGradTest : public ::testing::Test {
+ protected:
+  ScaleAndTranslateGradTest() : scope_(Scope::NewRootScope()) {}
+
+  template <typename T>
+  Tensor MakeData(const TensorShape& data_shape) {
+    DataType data_type = DataTypeToEnum<T>::v();
+    Tensor data(data_type, data_shape);
+    auto data_flat = data.flat<T>();
+    for (int i = 0; i < data_flat.size(); ++i) {
+      data_flat(i) = T(i);
+    }
+    return data;
+  }
+
+  template <typename T>
+  void MakeOp(const Tensor& x_data, const Input& y_shape, Output* x,
+              Output* y) {
+    *x = Const<T>(scope_, x_data);
+    *y = ScaleAndTranslate(scope_, *x, y_shape, {1.8f, 2.1f}, {0.5f, 0.7f});
+    TF_ASSERT_OK(scope_.status());
+  }
+
+  template <typename X_T, typename Y_T, typename JAC_T>
+  void TestResize() {
+    TensorShape x_shape({1, 2, 3, 1});
+    Tensor x_data = MakeData<X_T>(x_shape);
+    Output x, y;
+    MakeOp<X_T>(x_data, {4, 6}, &x, &y);
+    JAC_T max_error;
+    TF_ASSERT_OK((ComputeGradientError<X_T, Y_T, JAC_T>(
+        scope_, x, x_data, y, {1, 4, 6, 1}, &max_error)));
+    EXPECT_LT(max_error, 1e-3);
+  }
+
+  Scope scope_;
+};
+
+TEST_F(ScaleAndTranslateGradTest, Works) { TestResize<float, float, float>(); }
+
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/core/api_def/base_api/api_def_ScaleAndTranslate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScaleAndTranslate.pbtxt
new file mode 100644
index 0000000000..ff6794b07e
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ScaleAndTranslate.pbtxt
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "ScaleAndTranslate"
+}
diff --git a/tensorflow/core/api_def/base_api/api_def_ScaleAndTranslateGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScaleAndTranslateGrad.pbtxt
new file mode 100644
index 0000000000..3cda76226c
--- /dev/null
+++ b/tensorflow/core/api_def/base_api/api_def_ScaleAndTranslateGrad.pbtxt
@@ -0,0 +1,4 @@
+op {
+  graph_op_name: "ScaleAndTranslateGrad"
+  visibility: HIDDEN
+}
diff --git a/tensorflow/core/api_def/python_api/api_def_ScaleAndTranslate.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScaleAndTranslate.pbtxt
new file mode 100644
index 0000000000..c10e7ab752
--- /dev/null
+++ b/tensorflow/core/api_def/python_api/api_def_ScaleAndTranslate.pbtxt
@@ -0,0 +1,7 @@
+op {
+  graph_op_name: "ScaleAndTranslate"
+  visibility: HIDDEN
+  endpoint {
+    name: "image.scale_and_translate"
+  }
+}
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 74a6d5f6bb..c8aa2b3265 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -655,6 +655,26 @@ cc_header_only_library(
     deps = [":image_resizer_state"],
 )
 
+cc_library(
+    name = "sampling_kernels",
+    srcs = ["sampling_kernels.cc"],
+    hdrs = ["sampling_kernels.h"],
+    visibility = ["//visibility:private"],
+    deps = ["//tensorflow/core:lib"],
+)
+
+tf_cc_test(
+    name = "sampling_kernels_test",
+    srcs = ["sampling_kernels_test.cc"],
+    deps = [
+        ":sampling_kernels",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/core:testlib",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 # OpKernel libraries ----------------------------------------------------------
 
 ARRAY_DEPS = [
@@ -2308,6 +2328,7 @@ cc_library(
         ":resize_bilinear_op",
         ":resize_nearest_neighbor_op",
         ":sample_distorted_bounding_box_op",
+        ":scale_and_translate_op",
     ],
 )
 
@@ -2410,6 +2431,12 @@ tf_kernel_library(
     deps = IMAGE_DEPS,
 )
 
+tf_kernel_library(
+    name = "scale_and_translate_op",
+    prefix = "scale_and_translate_op",
+    deps = IMAGE_DEPS + [":sampling_kernels"],
+)
+
 tf_kernel_library(
     name = "random_crop_op",
     prefix = "random_crop_op",
@@ -2564,6 +2591,7 @@ tf_cc_tests(
         "resize_bicubic_op_test.cc",
         "resize_bilinear_op_test.cc",
         "resize_nearest_neighbor_op_test.cc",
+        "scale_and_translate_op_test.cc",
     ],
     linkopts = select({
         "//tensorflow:darwin": ["-headerpad_max_install_names"],
@@ -2573,6 +2601,7 @@ tf_cc_tests(
         ":image",
         ":ops_testutil",
         ":ops_util",
+        ":sampling_kernels",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
diff --git a/tensorflow/core/kernels/sampling_kernels.cc b/tensorflow/core/kernels/sampling_kernels.cc
new file mode 100644
index 0000000000..a18379ddbb
--- /dev/null
+++ b/tensorflow/core/kernels/sampling_kernels.cc
@@ -0,0 +1,38 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/sampling_kernels.h"
+#include <string>
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace tensorflow {
+namespace functor {
+
+SamplingKernelType SamplingKernelTypeFromString(const StringPiece str) {
+  const string lower_case = str_util::Lowercase(str);
+  if (lower_case == "lanczos1") return Lanczos1Kernel;
+  if (lower_case == "lanczos3") return Lanczos3Kernel;
+  if (lower_case == "lanczos5") return Lanczos5Kernel;
+  if (lower_case == "gaussian") return GaussianKernel;
+  if (lower_case == "box") return BoxKernel;
+  if (lower_case == "triangle") return TriangleKernel;
+  if (lower_case == "keyscubic") return KeysCubicKernel;
+  if (lower_case == "mitchellcubic") return MitchellCubicKernel;
+  return SamplingKernelTypeEnd;
+}
+
+}  // namespace functor
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/sampling_kernels.h b/tensorflow/core/kernels/sampling_kernels.h
new file mode 100644
index 0000000000..4e79d89831
--- /dev/null
+++ b/tensorflow/core/kernels/sampling_kernels.h
@@ -0,0 +1,190 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_SAMPLING_KERNELS_H_
+#define TENSORFLOW_CORE_KERNELS_SAMPLING_KERNELS_H_
+
+#include <cmath>
+#include "tensorflow/core/lib/core/stringpiece.h"
+
+namespace tensorflow {
+namespace functor {
+// Defines functions for different types of sampling kernels.
+enum SamplingKernelType {
+  // Lanczos kernel with radius 1.  Aliases but does not ring.
+  Lanczos1Kernel,
+
+  // Lanczos kernel with radius 3.  High-quality practical filter but may have
+  // some ringing especially on synthetic images.
+  Lanczos3Kernel,
+
+  // Lanczos kernel with radius 5.  Very-high-quality filter but may have
+  // stronger ringing.
+  Lanczos5Kernel,
+
+  // Gaussian kernel with radius 3, sigma = 1.5 / 3.  Less commonly used.
+  GaussianKernel,
+
+  // Rectangle function.  Equivalent to "nearest" sampling when upscaling.
+  // Has value 1 in interval (-0.5, 0.5), value 0.5 on edge, and 0 elsewhere.
+  BoxKernel,
+
+  // Hat/tent function with radius 1.  Equivalent to "bilinear" reconstruction
+  // when upsampling.
+  // Has value zero at -1.0 and 1.0.
+  TriangleKernel,
+
+  // Cubic interpolant of Keys.  Equivalent to Catmull-Rom kernel.  Reasonably
+  // good quality and faster than Lanczos3Kernel.
+  KeysCubicKernel,
+
+  // Cubic non-interpolating scheme.  For synthetic images (especially those
+  // lacking proper prefiltering), less ringing than Keys cubic kernel but less
+  // sharp.
+  MitchellCubicKernel,
+
+  // Always insert new kernel types before this.
+  SamplingKernelTypeEnd
+};
+
+// Converts a string into the corresponding kernel type.
+// Returns SamplingKernelTypeEnd if the string couldn't be converted.
+SamplingKernelType SamplingKernelTypeFromString(const StringPiece str);
+
+// A function object for a Lanczos kernel.
+struct LanczosKernelFunc {
+  // Pass 1 for Lanczos1 kernel, 3 for Lanczos3 etc.
+  explicit LanczosKernelFunc(float _radius) : radius(_radius) {}
+  float operator()(float x) const {
+    x = std::abs(x);
+    if (x > radius) return 0.0;
+    // Need to special case the limit case of sin(x) / x when x is zero.
+    if (x <= 1e-3) {
+      return 1.0;
+    }
+    return radius * std::sin(M_PI * x) * std::sin(M_PI * x / radius) /
+           (M_PI * M_PI * x * x);
+  }
+  float Radius() const { return radius; }
+  const float radius;
+};
+
+struct GaussianKernelFunc {
+  static constexpr float kRadiusMultiplier = 3.0f;
+  // https://en.wikipedia.org/wiki/Gaussian_function
+  // We use sigma = 0.5, as suggested on p. 4 of Ken Turkowski's "Filters
+  // for Common Resampling Tasks" for kernels with a support of 3 pixels:
+  // www.realitypixels.com/turk/computergraphics/ResamplingFilters.pdf
+  // This implies a radius of 1.5,
+  explicit GaussianKernelFunc(float _radius = 1.5f)
+      : radius(_radius), sigma(_radius / kRadiusMultiplier) {}
+  float operator()(float x) const {
+    x = std::abs(x);
+    if (x >= radius) return 0.0;
+    return std::exp(-x * x / (2.0 * sigma * sigma));
+  }
+  float Radius() const { return radius; }
+  const float radius;
+  const float sigma;  // Gaussian standard deviation
+};
+
+struct BoxKernelFunc {
+  float operator()(float x) const {
+    x = std::abs(x);
+    return x < 0.5f ? 1. : x == 0.5f ? 0.5f : 0.0f;
+  }
+  float Radius() const { return 1.f; }
+};
+
+struct TriangleKernelFunc {
+  // https://en.wikipedia.org/wiki/Triangle_function
+  float operator()(float x) const {
+    x = std::abs(x);
+    return x < 1.0f ? 1.0f - x : 0.0f;
+  }
+  float Radius() const { return 1.f; }
+};
+
+struct KeysCubicKernelFunc {
+  // http://ieeexplore.ieee.org/document/1163711/
+  // R. G. Keys. Cubic convolution interpolation for digital image
+  // processing. IEEE Transactions on Acoustics, Speech, and Signal
+  // Processing, 29(6):1153–1160, 1981.
+  float operator()(float x) const {
+    x = std::abs(x);
+    if (x >= 2.0f) {
+      return 0.0f;
+    } else if (x >= 1.0f) {
+      return ((-0.5f * x + 2.5f) * x - 4.0f) * x + 2.0f;
+    } else {
+      return ((1.5f * x - 2.5f) * x) * x + 1.0f;
+    }
+  }
+  float Radius() const { return 2.f; }
+};
+
+struct MitchellCubicKernelFunc {
+  // https://doi.org/10.1145/378456.378514
+  // D. P. Mitchell and A. N. Netravali. Reconstruction filters in computer
+  // graphics.  Computer Graphics (Proceedings of ACM SIGGRAPH 1988),
+  // 22(4):221–228, 1988.
+  float operator()(float x) const {
+    x = std::abs(x);
+    if (x >= 2.0f) {
+      return 0.0f;
+    } else if (x >= 1.0f) {
+      return (((-7.0f / 18.0f) * x + 2.0f) * x - 10.0f / 3.0f) * x +
+             16.0f / 9.0f;
+    } else {
+      return (((7.0f / 6.0f) * x - 2.0f) * x) * x + 8.0f / 9.0f;
+    }
+  }
+  float Radius() const { return 2.f; }
+};
+
+inline LanczosKernelFunc CreateLanczos1Kernel() {
+  return LanczosKernelFunc(1.0);
+}
+
+inline LanczosKernelFunc CreateLanczos3Kernel() {
+  return LanczosKernelFunc(3.0);
+}
+
+inline LanczosKernelFunc CreateLanczos5Kernel() {
+  return LanczosKernelFunc(5.0);
+}
+
+inline GaussianKernelFunc CreateGaussianKernel() {
+  return GaussianKernelFunc(1.5);
+}
+
+inline BoxKernelFunc CreateBoxKernel() { return BoxKernelFunc(); }
+
+inline TriangleKernelFunc CreateTriangleKernel() {
+  return TriangleKernelFunc();
+}
+
+inline KeysCubicKernelFunc CreateKeysCubicKernel() {
+  return KeysCubicKernelFunc();
+}
+
+inline MitchellCubicKernelFunc CreateMitchellCubicKernel() {
+  return MitchellCubicKernelFunc();
+}
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_SAMPLING_KERNELS_H_
diff --git a/tensorflow/core/kernels/sampling_kernels_test.cc b/tensorflow/core/kernels/sampling_kernels_test.cc
new file mode 100644
index 0000000000..37c2edc14a
--- /dev/null
+++ b/tensorflow/core/kernels/sampling_kernels_test.cc
@@ -0,0 +1,76 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/kernels/sampling_kernels.h"
+
+#include "tensorflow/core/platform/test.h"
+
+namespace tensorflow {
+namespace functor {
+namespace {
+
+class KernelsTest : public ::testing::Test {
+ protected:
+  template <typename KernelType>
+  void TestKernelValues(const KernelType& kernel, const std::vector<float>& x,
+                        const std::vector<float>& expected) const {
+    ASSERT_EQ(x.size(), expected.size());
+    for (int i = 0; i < x.size(); ++i) {
+      constexpr float kTolerance = 1e-3;
+      EXPECT_NEAR(kernel(x[i]), expected[i], kTolerance);
+      EXPECT_NEAR(kernel(-x[i]), expected[i], kTolerance);
+    }
+  }
+};
+
+TEST_F(KernelsTest, TestKernelValues) {
+  // Tests kernel values against a set of known golden values
+  TestKernelValues(CreateLanczos1Kernel(), {0.0f, 0.5f, 1.0f, 1.5},
+                   {1.0f, 0.4052f, 0.0f, 0.0f});
+  TestKernelValues(CreateLanczos3Kernel(), {0.0f, 0.5f, 1.0f, 1.5f, 2.5f, 3.5},
+                   {1.0f, 0.6079f, 0.0f, -0.1351f, 0.0243f, 0.0f});
+  TestKernelValues(
+      CreateLanczos5Kernel(), {0.0f, 0.5f, 1.0f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5},
+      {1.0f, 0.6262f, 0.0f, -0.1822f, 0.0810569f, -0.0334f, 0.0077f, 0.0f});
+  TestKernelValues(CreateGaussianKernel(), {0.0f, 0.5f, 1.0f, 1.5},
+                   {1.0f, 0.6065f, 0.1353f, 0.0f});
+
+  TestKernelValues(CreateBoxKernel(), {0.0f, 0.25f, 0.5f, 1.0f},
+                   {1.0f, 1.0f, 0.5f, 0.0f});
+  TestKernelValues(CreateTriangleKernel(), {0.0f, 0.5f, 1.0f},
+                   {1.0f, 0.5f, 0.0f});
+
+  TestKernelValues(CreateKeysCubicKernel(), {0.0f, 0.5f, 1.0f, 1.5f, 2.5},
+                   {1.0f, 0.5625f, 0.0f, -0.0625f, 0.0f});
+  TestKernelValues(CreateMitchellCubicKernel(), {0.0f, 0.5f, 1.0f, 1.5f, 2.5},
+                   {0.8889f, 0.5347f, 0.0556f, -0.0347f, 0.0f});
+}
+
+TEST(SamplingKernelTypeFromStringTest, Works) {
+  EXPECT_EQ(SamplingKernelTypeFromString("lanczos1"), Lanczos1Kernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("lanczos3"), Lanczos3Kernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("lanczos5"), Lanczos5Kernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("gaussian"), GaussianKernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("box"), BoxKernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("triangle"), TriangleKernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("mitchellcubic"), MitchellCubicKernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("keyscubic"), KeysCubicKernel);
+  EXPECT_EQ(SamplingKernelTypeFromString("not a kernel"),
+            SamplingKernelTypeEnd);
+}
+
+}  // namespace
+}  // namespace functor
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/scale_and_translate_op.cc b/tensorflow/core/kernels/scale_and_translate_op.cc
new file mode 100644
index 0000000000..149c5526ae
--- /dev/null
+++ b/tensorflow/core/kernels/scale_and_translate_op.cc
@@ -0,0 +1,610 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/image_ops.cc
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/scale_and_translate_op.h"
+
+#include <memory>
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/bounds_check.h"
+#include "tensorflow/core/kernels/sampling_kernels.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+using strings::Printf;
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+namespace functor {
+namespace {
+template <typename T>
+inline const T& Clamp(const T& low, const T& high, const T& value) {
+  if (high < value) return high;
+  if (value < low) return low;
+  return value;
+}
+
+template <typename Kernel>
+Status ComputeSpansCore(OpKernelContext* context, const Kernel& kernel,
+                        const int64 output_size, const int64 input_size,
+                        const float scale, const float translate,
+                        Spans* spans) {
+  // When sampling, we need the inverse scale and translation, to map from an
+  // output to an input pixel.
+  const float inv_scale = 1.0 / scale;
+  const float inv_translate = -inv_scale * translate;
+  // When downsampling the kernel should be scaled since we want to low pass
+  // filter and interpolate, but when upsampling it should not be since we only
+  // want to interpolate.
+  const float kernel_scale = std::max(inv_scale, 1.0f);
+  spans->span_size = std::min(
+      2 * static_cast<int>(std::ceil(kernel.Radius() * kernel_scale)) + 1,
+      static_cast<int>(input_size));
+  AllocatorAttributes alloc_attr;
+  alloc_attr.set_on_host(true);
+  TF_RETURN_IF_ERROR(context->allocate_temp(
+      tensorflow::DT_INT32, tensorflow::TensorShape({output_size}),
+      &spans->starts, alloc_attr));
+  auto starts_vec = spans->starts.vec<int32>();
+  TF_RETURN_IF_ERROR(context->allocate_temp(
+      tensorflow::DT_FLOAT,
+      tensorflow::TensorShape({spans->span_size * output_size}),
+      &spans->weights, alloc_attr));
+  auto weights_vec = spans->weights.vec<float>();
+  weights_vec.setZero();
+
+  const float one_over_kernel_scale = 1.0f / kernel_scale;
+  int max_span_size = 0;
+  std::vector<float> temp_weights;
+  for (int x = 0; x < output_size; ++x) {
+    const float col_f = x + 0.5f;
+    const float sample_f = col_f * inv_scale + inv_translate;
+
+    // Don't sample when the sampling *kernel* is completely outside the
+    // source image.
+    if (sample_f < 0 - kernel.Radius() * kernel_scale ||
+        sample_f > input_size + kernel.Radius() * kernel_scale) {
+      // Add an empty span.
+      starts_vec(x) = 0;
+      continue;
+    }
+    int64 span_start =
+        std::ceil(sample_f - kernel.Radius() * kernel_scale - 0.5f);
+    int64 span_end =
+        std::floor(sample_f + kernel.Radius() * kernel_scale - 0.5f);
+    span_start = Clamp(static_cast<int64>(0), input_size - 1, span_start);
+    span_end = Clamp(static_cast<int64>(0), input_size - 1, span_end) + 1;
+    const int this_span_size = span_end - span_start;
+    if (this_span_size > spans->span_size) {
+      return errors::Internal(Printf("Span is too large: %d vs %d.",
+                                     this_span_size, spans->span_size));
+    }
+    float total_weight_sum = 0.0f;
+    temp_weights.clear();
+    for (int source = span_start; source < span_end; ++source) {
+      float kernel_pos = static_cast<float>(source) + 0.5f - sample_f;
+      float weight = kernel(std::abs(kernel_pos * one_over_kernel_scale));
+      total_weight_sum += weight;
+      temp_weights.push_back(weight);
+    }
+    max_span_size = std::max(max_span_size, this_span_size);
+    if (std::abs(total_weight_sum) >=
+        1000.0f * std::numeric_limits<float>::min()) {
+      float one_over_total_weight_sum = 1.0f / total_weight_sum;
+      int out_index = spans->span_size * x;
+      for (float weight : temp_weights) {
+        weights_vec(out_index) = weight * one_over_total_weight_sum;
+        ++out_index;
+      }
+    }
+    starts_vec(x) = span_start;
+  }
+  return Status::OK();
+}
+
+Status ComputeGradSpansCore(OpKernelContext* context, const Spans& spans,
+                            const int64 forward_output_size,
+                            const int64 forward_input_size, Spans* grad_spans) {
+  struct GradComponent {
+    int index;
+    float weight;
+  };
+  std::vector<std::vector<GradComponent>> grad_components(forward_input_size);
+  auto weights_vec = spans.weights.vec<float>();
+  auto starts_vec = spans.starts.vec<int32>();
+  for (int output_index = 0; output_index < forward_output_size;
+       ++output_index) {
+    int input_index = starts_vec(output_index);
+    for (int j = 0; j < spans.span_size; ++j, ++input_index) {
+      const float weight = weights_vec(output_index * spans.span_size + j);
+      if (weight != 0.0f && input_index < forward_input_size) {
+        grad_components[input_index].push_back(
+            GradComponent{output_index, weight});
+      }
+    }
+  }
+  int max_size = 0;
+  for (std::vector<GradComponent>& gc : grad_components) {
+    if (!gc.empty()) {
+      std::sort(gc.begin(), gc.end(),
+                [](const GradComponent& x1, const GradComponent& x2) {
+                  return x1.index < x2.index;
+                });
+      max_size = std::max(gc.back().index - gc.front().index + 1, max_size);
+    }
+  }
+  grad_spans->span_size = max_size;
+  AllocatorAttributes alloc_attr;
+  alloc_attr.set_on_host(true);
+  TF_RETURN_IF_ERROR(context->allocate_temp(
+      tensorflow::DT_INT32, tensorflow::TensorShape({forward_input_size}),
+      &grad_spans->starts, alloc_attr));
+  auto grad_starts_vec = grad_spans->starts.vec<int32>();
+  TF_RETURN_IF_ERROR(context->allocate_temp(
+      tensorflow::DT_FLOAT,
+      tensorflow::TensorShape({grad_spans->span_size * forward_input_size}),
+      &grad_spans->weights, alloc_attr));
+  auto grad_weights_vec = grad_spans->weights.vec<float>();
+  grad_weights_vec.setZero();
+  for (int input_index = 0; input_index < forward_input_size; ++input_index) {
+    const int start_span = grad_components[input_index].front().index;
+    grad_starts_vec(input_index) = start_span;
+    for (const GradComponent& gc : grad_components[input_index]) {
+      grad_weights_vec(input_index * grad_spans->span_size + gc.index -
+                       start_span) += gc.weight;
+    }
+  }
+  return Status::OK();
+}
+
+// Computes the spans for the passed kernel, for a input dimension of length
+// input_size transformed by scale and translate to an output dimension of
+// length output_size. Note that there's no requirement that;
+// output_size = input_size * scale.
+Status ComputeSpans(OpKernelContext* context,
+                    const functor::SamplingKernelType kernel_type,
+                    const int64 output_size, const int64 input_size,
+                    const float scale, const float translate, Spans* spans) {
+  switch (kernel_type) {
+    case functor::Lanczos1Kernel: {
+      return ComputeSpansCore(context, CreateLanczos1Kernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    case functor::Lanczos3Kernel: {
+      return ComputeSpansCore(context, CreateLanczos3Kernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    case functor::Lanczos5Kernel: {
+      return ComputeSpansCore(context, CreateLanczos5Kernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    case functor::GaussianKernel: {
+      return ComputeSpansCore(context, CreateGaussianKernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    case functor::BoxKernel: {
+      return ComputeSpansCore(context, CreateBoxKernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    case functor::TriangleKernel: {
+      return ComputeSpansCore(context, CreateTriangleKernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    case functor::KeysCubicKernel: {
+      return ComputeSpansCore(context, CreateKeysCubicKernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    case functor::MitchellCubicKernel: {
+      return ComputeSpansCore(context, CreateMitchellCubicKernel(), output_size,
+                              input_size, scale, translate, spans);
+    }
+    default:
+      return errors::InvalidArgument(Printf("Unrecognized kernel type: %d",
+                                            static_cast<int>(kernel_type)));
+  }
+  return Status::OK();
+}
+
+// Computes the grad spans for the passed kernel.
+// forward_input_size and forward_output_size are the input and output size from
+// the forward operation.
+Status ComputeGradSpans(OpKernelContext* context,
+                        const functor::SamplingKernelType kernel_type,
+                        const int64 forward_output_size,
+                        const int64 forward_input_size, const float scale,
+                        const float translate, Spans* grad_spans) {
+  Spans spans;
+  TF_RETURN_IF_ERROR(ComputeSpans(context, kernel_type, forward_output_size,
+                                  forward_input_size, scale, translate,
+                                  &spans));
+  return ComputeGradSpansCore(context, spans, forward_output_size,
+                              forward_input_size, grad_spans);
+}
+
+void GetValues(OpKernelContext* context, int input_index, float* v_1,
+               float* v_2) {
+  // Tensor mutable_input(int index, False);
+  const Tensor& t = context->input(input_index);
+  OP_REQUIRES(context, t.dims() == 1,
+              errors::InvalidArgument("t must be 1-dimensional",
+                                      t.shape().DebugString()));
+  OP_REQUIRES(context, t.NumElements() == 2,
+              errors::InvalidArgument("t must have two elements",
+                                      t.shape().DebugString()));
+
+  auto data_vec = t.flat<float>().data();
+  *v_1 = data_vec[0];
+  *v_2 = data_vec[1];
+}
+
+template <typename Device, typename T>
+class ScaleAndTranslateOp : public OpKernel {
+ public:
+  explicit ScaleAndTranslateOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string kernel_type_str;
+    OP_REQUIRES_OK(context, context->GetAttr("kernel_type", &kernel_type_str));
+    kernel_type_ = functor::SamplingKernelTypeFromString(kernel_type_str);
+    OP_REQUIRES(context, kernel_type_ != functor::SamplingKernelTypeEnd,
+                errors::InvalidArgument("Unrecognized kernel type: " +
+                                        kernel_type_str));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, input.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input.shape().DebugString()));
+    const Tensor& output_shape_t = context->input(1);
+    OP_REQUIRES(context, output_shape_t.dims() == 1,
+                errors::InvalidArgument("output_shape_t must be 1-dimensional",
+                                        output_shape_t.shape().DebugString()));
+    OP_REQUIRES(context, output_shape_t.NumElements() == 2,
+                errors::InvalidArgument("output_shape_t must have two elements",
+                                        output_shape_t.shape().DebugString()));
+    auto output_shape_vec = output_shape_t.vec<int32>();
+    const int64 output_height = internal::SubtleMustCopy(output_shape_vec(0));
+    const int64 output_width = internal::SubtleMustCopy(output_shape_vec(1));
+
+    OP_REQUIRES(
+        context,
+        FastBoundsCheck(input.dim_size(1), std::numeric_limits<int32>::max()) &&
+            FastBoundsCheck(input.dim_size(2),
+                            std::numeric_limits<int32>::max()),
+        errors::InvalidArgument("input sizes must be between 0 and max int32"));
+
+    const int64 batch_size = input.dim_size(0);
+    const int64 input_height = input.dim_size(1);
+    const int64 input_width = input.dim_size(2);
+    const int64 channels = input.dim_size(3);
+    OP_REQUIRES(context, output_height > 0 && output_width > 0,
+                errors::InvalidArgument("output dimensions must be positive"));
+    OP_REQUIRES(
+        context, channels > 0,
+        errors::InvalidArgument("image must have at least one channel"));
+    OP_REQUIRES(
+        context, input.dim_size(1) > 0 && input.dim_size(2) > 0,
+        errors::InvalidArgument("input image must be of non-zero size"));
+
+    float row_scale, col_scale;
+    GetValues(context, 2, &row_scale, &col_scale);
+    OP_REQUIRES(context, row_scale > 0 && col_scale > 0,
+                errors::InvalidArgument("Scale must be greater than zero."));
+    float row_translation, col_translation;
+    GetValues(context, 3, &row_translation, &col_translation);
+
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0,
+                                TensorShape({input.dim_size(0), output_height,
+                                             output_width, input.dim_size(3)}),
+                                &output));
+    if (!context->status().ok()) return;
+
+    // Return if the output is empty.
+    if (output->NumElements() == 0) return;
+
+    typename TTypes<T, 4>::ConstTensor image_data(input.tensor<T, 4>());
+    TTypes<float, 4>::Tensor output_data = output->tensor<float, 4>();
+
+    functor::Spans col_spans;
+    OP_REQUIRES_OK(
+        context, ComputeSpans(context, kernel_type_, output_width, input_width,
+                              col_scale, col_translation, &col_spans));
+    functor::Spans row_spans;
+    OP_REQUIRES_OK(context, ComputeSpans(context, kernel_type_, output_height,
+                                         input_height, row_scale,
+                                         row_translation, &row_spans));
+    Tensor intermediate_t;
+    OP_REQUIRES_OK(
+        context, context->allocate_temp(DT_FLOAT,
+                                        TensorShape({batch_size, output_height,
+                                                     input_width, channels}),
+                                        &intermediate_t));
+    TTypes<float, 4>::Tensor intermediate_data =
+        intermediate_t.tensor<float, 4>();
+
+    const functor::Spans& const_row_spans = row_spans;
+    typename TTypes<int32, 1>::ConstTensor row_starts(
+        const_row_spans.starts.tensor<int32, 1>());
+    typename TTypes<float, 1>::ConstTensor row_weights(
+        const_row_spans.weights.tensor<float, 1>());
+    const functor::Spans& const_col_spans = col_spans;
+    typename TTypes<int32, 1>::ConstTensor col_starts(
+        const_col_spans.starts.tensor<int32, 1>());
+    typename TTypes<float, 1>::ConstTensor col_weights(
+        const_col_spans.weights.tensor<float, 1>());
+
+    functor::GatherSpans<Device, T>()(
+        context->eigen_device<Device>(), row_spans.span_size, row_starts,
+        row_weights, col_spans.span_size, col_starts, col_weights, image_data,
+        intermediate_data, output_data);
+  }
+  functor::SamplingKernelType kernel_type_;
+};
+
+template <typename Device, typename T>
+class ScaleAndTranslateGradOp : public OpKernel {
+ public:
+  explicit ScaleAndTranslateGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    string kernel_type_str;
+    OP_REQUIRES_OK(context, context->GetAttr("kernel_type", &kernel_type_str));
+    kernel_type_ = functor::SamplingKernelTypeFromString(kernel_type_str);
+    OP_REQUIRES(context, kernel_type_ != functor::SamplingKernelTypeEnd,
+                errors::InvalidArgument("Unrecognized kernel type: " +
+                                        kernel_type_str));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    const Tensor& original_image = context->input(1);
+
+    OP_REQUIRES(context, input.dims() == 4,
+                errors::InvalidArgument("input_grad must be 4-dimensional",
+                                        input.shape().DebugString()));
+    // Resizers always produce float images, so input gradient must
+    // always be a float.
+    OP_REQUIRES(context, input.dtype() == DT_FLOAT,
+                errors::InvalidArgument("input_grad must be of type float",
+                                        DataTypeString(input.dtype())));
+
+    OP_REQUIRES(context, original_image.dims() == 4,
+                errors::InvalidArgument("original_image must be 4-dimensional",
+                                        original_image.shape().DebugString()));
+
+    // Allocate output and initialize to zeros.
+    const int64 batch_size = input.dim_size(0);
+    const int64 channels = input.dim_size(3);
+    const int64 forward_input_height = original_image.dim_size(1);
+    const int64 forward_input_width = original_image.dim_size(2);
+
+    OP_REQUIRES(context,
+                FastBoundsCheck(forward_input_height,
+                                std::numeric_limits<int32>::max()) &&
+                    FastBoundsCheck(forward_input_width,
+                                    std::numeric_limits<int32>::max()),
+                errors::InvalidArgument(
+                    "original sizes must be between 0 and max int32"));
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context, context->allocate_output(
+                                0,
+                                TensorShape({batch_size, forward_input_height,
+                                             forward_input_width, channels}),
+                                &output));
+
+    float row_scale, col_scale;
+    GetValues(context, 2, &row_scale, &col_scale);
+    OP_REQUIRES(context, row_scale > 0 && col_scale > 0,
+                errors::InvalidArgument("Scale must be greater than zero."));
+    float row_translation, col_translation;
+    GetValues(context, 3, &row_translation, &col_translation);
+
+    if (!context->status().ok()) return;
+
+    TTypes<float, 4>::ConstTensor input_grad = input.tensor<float, 4>();
+    typename TTypes<T, 4>::Tensor output_grad(output->tensor<T, 4>());
+
+    const int64 forward_output_height = input_grad.dimension(1);
+    const int64 forward_output_width = input_grad.dimension(2);
+
+    functor::Spans col_spans;
+    OP_REQUIRES_OK(context,
+                   ComputeGradSpans(context, kernel_type_, forward_output_width,
+                                    forward_input_width, col_scale,
+                                    col_translation, &col_spans));
+    functor::Spans row_spans;
+    OP_REQUIRES_OK(
+        context, ComputeGradSpans(context, kernel_type_, forward_output_height,
+                                  forward_input_height, row_scale,
+                                  row_translation, &row_spans));
+    Tensor intermediate_t;
+    OP_REQUIRES_OK(context, context->allocate_temp(
+                                DT_FLOAT,
+                                TensorShape({batch_size, forward_input_height,
+                                             forward_output_width, channels}),
+                                &intermediate_t));
+    TTypes<float, 4>::Tensor intermediate_data =
+        intermediate_t.tensor<float, 4>();
+
+    const functor::Spans& const_row_spans = row_spans;
+    typename TTypes<int32, 1>::ConstTensor row_starts =
+        const_row_spans.starts.tensor<int32, 1>();
+    typename TTypes<float, 1>::ConstTensor row_weights(
+        const_row_spans.weights.tensor<float, 1>());
+    const functor::Spans& const_col_spans = col_spans;
+    typename TTypes<int32, 1>::ConstTensor col_starts(
+        const_col_spans.starts.tensor<int32, 1>());
+    typename TTypes<float, 1>::ConstTensor col_weights(
+        const_col_spans.weights.tensor<float, 1>());
+
+    functor::GatherSpans<Device, T>()(
+        context->eigen_device<Device>(), row_spans.span_size, row_starts,
+        row_weights, col_spans.span_size, col_starts, col_weights, input_grad,
+        intermediate_data, output_grad);
+  }
+
+  functor::SamplingKernelType kernel_type_;
+};
+
+template <typename T>
+void GatherColumns(int span_size, const int32* starts, const float* weights,
+                   const T* image, const int64 input_height,
+                   const int64 input_width, const int64 output_height,
+                   const int64 output_width, const int channels,
+                   float* output) {
+  const int64 in_row_size = input_width * channels;
+  const int64 out_row_size = output_width * channels;
+
+  for (int y = 0; y < output_height; ++y) {
+    const T* input_row_start = image + in_row_size * y;
+    float* out_pix = output + out_row_size * y;
+    for (int x = 0; x < output_width; ++x, out_pix += channels) {
+      const T* in_pix = input_row_start + starts[x] * channels;
+      const float* weights_start = weights + x * span_size;
+      const int real_span_size =
+          std::min(starts[x] + span_size, static_cast<int>(input_width)) -
+          starts[x];
+      const float* weights_end = weights_start + real_span_size;
+      for (int c = 0; c < channels; ++c) {
+        out_pix[c] = 0.0f;
+      }
+      for (const float* weight_ptr = weights_start; weight_ptr != weights_end;
+           ++weight_ptr) {
+        float w = *weight_ptr;
+        for (int c = 0; c < channels; ++c) {
+          out_pix[c] += w * static_cast<float>(in_pix[c]);
+        }
+        in_pix += channels;
+      }
+    }
+  }
+}
+
+template <typename T>
+inline void AddScaledVector(const T* in_vec, int vec_len, float weight,
+                            float* out_vec) {
+  float* out_vec_end = out_vec + vec_len;
+  for (; out_vec != out_vec_end; ++out_vec, ++in_vec) {
+    *out_vec += weight * static_cast<float>(*in_vec);
+  }
+}
+
+template <typename T>
+void GatherRows(int span_size, const int32* starts, const float* weights,
+                const T* image, const int64 input_height,
+                const int64 input_width, const int64 output_height,
+                const int64 output_width, const int channels, float* output) {
+  const int64 in_row_size = input_width * channels;
+  const int64 out_row_size = output_width * channels;
+
+  for (int y = 0; y < output_height; ++y) {
+    float* out_row_data = output + out_row_size * y;
+    std::fill(out_row_data, out_row_data + out_row_size, 0.0f);
+    int in_row = starts[y];
+    const T* in_row_data = image + in_row_size * in_row;
+    const float* weights_start = weights + y * span_size;
+    const int real_span_size =
+        std::min(starts[y] + span_size, static_cast<int>(input_height)) -
+        starts[y];
+    const float* const weights_end = weights_start + real_span_size;
+    for (const float* weight_it = weights_start; weight_it != weights_end;
+         ++weight_it) {
+      AddScaledVector(in_row_data, in_row_size, *weight_it, out_row_data);
+      in_row_data += in_row_size;
+    }
+  }
+}
+
+}  // namespace
+
+// Partial specialization of GatherSpans functor for a CPUDevice.
+template <typename T>
+struct GatherSpans<CPUDevice, T> {
+  void operator()(const CPUDevice& d, int row_span_size,
+                  typename TTypes<int32, 1>::ConstTensor row_starts,
+                  typename TTypes<float, 1>::ConstTensor row_weights,
+                  int col_span_size,
+                  typename TTypes<int32, 1>::ConstTensor col_starts,
+                  typename TTypes<float, 1>::ConstTensor col_weights,
+                  typename TTypes<T, 4>::ConstTensor images,
+                  typename TTypes<float, 4>::Tensor intermediate_buffer,
+                  typename TTypes<float, 4>::Tensor resized_images) {
+    const int batch_size = images.dimension(0);
+    const int64 input_height = images.dimension(1);
+    const int64 input_width = images.dimension(2);
+    const int channels = images.dimension(3);
+
+    const int64 output_height = resized_images.dimension(1);
+    const int64 output_width = resized_images.dimension(2);
+
+    const int64 input_pix_per_batch = input_width * input_height * channels;
+    const int64 intermediate_pix_per_batch =
+        input_width * output_height * channels;
+    const int64 output_pix_per_batch = output_width * output_height * channels;
+    float* intermediate_ptr = intermediate_buffer.data();
+
+    const T* image_ptr = images.data();
+    float* out_ptr = resized_images.data();
+    for (int b = 0; b < batch_size; ++b, image_ptr += input_pix_per_batch,
+             intermediate_ptr += intermediate_pix_per_batch,
+             out_ptr += output_pix_per_batch) {
+      GatherRows(row_span_size, row_starts.data(), row_weights.data(),
+                 image_ptr, input_height, input_width, output_height,
+                 input_width, channels, intermediate_ptr);
+      GatherColumns(col_span_size, col_starts.data(), col_weights.data(),
+                    intermediate_ptr, output_height, input_width, output_height,
+                    output_width, channels, out_ptr);
+    }
+  }
+};
+
+#define REGISTER_KERNEL(T)                                \
+  REGISTER_KERNEL_BUILDER(Name("ScaleAndTranslate")       \
+                              .Device(DEVICE_CPU)         \
+                              .TypeConstraint<T>("T")     \
+                              .HostMemory("size")         \
+                              .HostMemory("scale")        \
+                              .HostMemory("translation"), \
+                          ScaleAndTranslateOp<CPUDevice, T>);
+
+TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
+
+#undef REGISTER_KERNEL
+
+#define REGISTER_GRAD_KERNEL(T)                           \
+  REGISTER_KERNEL_BUILDER(Name("ScaleAndTranslateGrad")   \
+                              .Device(DEVICE_CPU)         \
+                              .TypeConstraint<T>("T")     \
+                              .HostMemory("scale")        \
+                              .HostMemory("translation"), \
+                          ScaleAndTranslateGradOp<CPUDevice, T>);
+
+TF_CALL_float(REGISTER_GRAD_KERNEL);
+
+#undef REGISTER_GRAD_KERNEL
+
+}  // namespace functor
+}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/scale_and_translate_op.h b/tensorflow/core/kernels/scale_and_translate_op.h
new file mode 100644
index 0000000000..74bc87ecc7
--- /dev/null
+++ b/tensorflow/core/kernels/scale_and_translate_op.h
@@ -0,0 +1,75 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_SCALE_AND_TRANSLATE_OP_H_
+#define TENSORFLOW_CORE_KERNELS_SCALE_AND_TRANSLATE_OP_H_
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/kernels/sampling_kernels.h"
+
+namespace tensorflow {
+namespace functor {
+
+// The scale and translate op works by scaling and translating the row and
+// column dimensions separately.
+// When scaling and translating the rows the set of input pixels and kernel
+// weights used to compute a given output pixel within a row is constant across
+// rows and can thus be precomputed and reused for every row. Similarly for the
+// columns. This precomputed data structure is called a 'span'.
+
+// To compute the gradient we use the spans computed on the forward pass and
+// essentially reverse them: we record for each input pixel which output
+// pixels it contributes to. This means that the forward and backward passes
+// use the same core algorithm, only the spans are computed differently.
+
+// A pre-computed span of pixels along a single dimension.
+// The output pixel will be the weighted sum of pixels starting from start.
+struct Spans {
+  // The maximum span size of any output pixel.
+  int span_size;
+  // int32 tensor of size [output_dim].
+  Tensor starts;
+  // float tensor of size [output_dim, span_size].
+  // The output pixel at x is computed as:
+  //   dot_product(input[starts[x]:starts[x]+span_size], weights[x]).
+  Tensor weights;
+};
+
+// Gather spans in both dimensions.
+// row_span_size, row_starts and row_weights correspond to the variables in
+// the row Spans data structure, similarly for col_span_size etc.
+// intermediate_buffer is a Tensor used to store the result of the
+// resize in the column dimension and is of size:
+//    [batch_size, input_height, output_width, channels]
+template <typename Device, typename T>
+struct GatherSpans {
+  void operator()(const Device& d, int row_span_size,
+                  typename TTypes<int32, 1>::ConstTensor row_starts,
+                  typename TTypes<float, 1>::ConstTensor row_weights,
+                  int col_span_size,
+                  typename TTypes<int32, 1>::ConstTensor col_starts,
+                  typename TTypes<float, 1>::ConstTensor col_weights,
+                  typename TTypes<T, 4>::ConstTensor input_images,
+                  typename TTypes<float, 4>::Tensor intermediate_buffer,
+                  typename TTypes<float, 4>::Tensor output_images);
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_SCALE_AND_TRANSLATE_OP_H_
diff --git a/tensorflow/core/kernels/scale_and_translate_op_test.cc b/tensorflow/core/kernels/scale_and_translate_op_test.cc
new file mode 100644
index 0000000000..23176f9f2d
--- /dev/null
+++ b/tensorflow/core/kernels/scale_and_translate_op_test.cc
@@ -0,0 +1,377 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/fake_input.h"
+#include "tensorflow/core/framework/node_def_builder.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_testutil.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "tensorflow/core/kernels/ops_util.h"
+#include "tensorflow/core/kernels/sampling_kernels.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/lib/png/png_io.h"
+#include "tensorflow/core/lib/random/random.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/public/session.h"
+#include "tensorflow/core/util/ptr_util.h"
+
+namespace tensorflow {
+using Eigen::Vector2f;
+
+class DynamicKernel {
+ public:
+  virtual ~DynamicKernel() {}
+  virtual float Value(const float x) const = 0;
+  virtual float Radius() const = 0;
+};
+
+// Wraps a sampling kernel in a common interface.
+template <typename KernelType>
+class TypedDynamicKernel : public DynamicKernel {
+ public:
+  explicit TypedDynamicKernel(const KernelType& kernel) : kernel_(kernel) {}
+  float Value(const float x) const override { return kernel_(x); }
+  float Radius() const override { return kernel_.Radius(); }
+  const KernelType kernel_;
+};
+
+template <typename KernelType>
+std::unique_ptr<const DynamicKernel> CreateKernel(const KernelType& kernel) {
+  return MakeUnique<TypedDynamicKernel<KernelType>>(kernel);
+}
+
+std::unique_ptr<const DynamicKernel> Create(
+    functor::SamplingKernelType kernel_type) {
+  switch (kernel_type) {
+    case functor::Lanczos1Kernel:
+      return CreateKernel(functor::CreateLanczos1Kernel());
+    case functor::Lanczos3Kernel:
+      return CreateKernel(functor::CreateLanczos3Kernel());
+    case functor::Lanczos5Kernel:
+      return CreateKernel(functor::CreateLanczos5Kernel());
+    case functor::GaussianKernel:
+      return CreateKernel(functor::CreateGaussianKernel());
+    case functor::BoxKernel:
+      return CreateKernel(functor::CreateBoxKernel());
+    case functor::TriangleKernel:
+      return CreateKernel(functor::CreateTriangleKernel());
+    case functor::KeysCubicKernel:
+      return CreateKernel(functor::CreateKeysCubicKernel());
+    case functor::MitchellCubicKernel:
+      return CreateKernel(functor::CreateMitchellCubicKernel());
+    default:
+      LOG(FATAL) << "Unknown kernel type.";
+      return nullptr;
+  }
+}
+
+template <typename T>
+inline const T& Clamp(const T& low, const T& high, const T& value) {
+  return std::min(high, std::max(low, value));
+}
+
+// Samples from the image at the passed batch at pixel location sample_f with a
+// kernel scaled by scale.
+void Sample(const DynamicKernel& kernel, TTypes<float, 4>::Tensor images,
+            int batch, const Vector2f& scale, const Vector2f& sample_f,
+            float* dest) {
+  const Vector2f kernel_scale(std::max(scale.x(), 1.0f),
+                              std::max(scale.y(), 1.0f));
+
+  const int64 in_height = images.dimension(1);
+  const int64 in_width = images.dimension(2);
+  const int channels = images.dimension(3);
+  const int64 y_span_start = Clamp(
+      static_cast<int64>(0), in_height - 1,
+      static_cast<int64>(
+          std::ceil(sample_f.y() - kernel.Radius() * kernel_scale.y() - 0.5f)));
+  const int64 y_span_end =
+      Clamp(static_cast<int64>(0), in_height - 1,
+            static_cast<int64>(std::floor(
+                sample_f.y() + kernel.Radius() * kernel_scale.y() - 0.5f))) +
+      1;
+  const int64 x_span_start = Clamp(
+      static_cast<int64>(0), in_width - 1,
+      static_cast<int64>(
+          std::ceil(sample_f.x() - kernel.Radius() * kernel_scale.x() - 0.5f)));
+
+  const int64 x_span_end =
+      Clamp(static_cast<int64>(0), in_width - 1,
+            static_cast<int64>(std::floor(
+                sample_f.x() + kernel.Radius() * kernel_scale.x() - 0.5f))) +
+      1;
+
+  std::fill(dest, dest + channels, 0.0f);
+  if (y_span_end <= y_span_start || x_span_end <= x_span_start) {
+    return;
+  }
+  const Vector2f one_over_kernel_scale(1.0f / kernel_scale.x(),
+                                       1.0f / kernel_scale.y());
+  float total_weight = 0.0f;
+  for (int64 y = y_span_start; y < y_span_end; ++y) {
+    float y_kernel_pos = static_cast<float>(y) + 0.5f - sample_f.y();
+    float y_weight = kernel.Value(y_kernel_pos * one_over_kernel_scale.y());
+    for (int64 x = x_span_start; x < x_span_end; ++x) {
+      float x_kernel_pos = static_cast<float>(x) + 0.5f - sample_f.x();
+      float x_weight = kernel.Value(x_kernel_pos * one_over_kernel_scale.x());
+      float kernel_weight = y_weight * x_weight;
+      total_weight += kernel_weight;
+      for (int c = 0; c < channels; ++c) {
+        dest[c] += static_cast<float>(images(batch, y, x, c)) * kernel_weight;
+      }
+    }
+  }
+  if (std::abs(total_weight) >= 1000.0f * std::numeric_limits<float>::min()) {
+    CHECK_NE(total_weight, 0.0f) << y_span_start << "," << y_span_end << " "
+                                 << x_span_start << "," << x_span_end;
+    for (int c = 0; c < channels; ++c) {
+      dest[c] /= total_weight;
+    }
+  }
+}
+
+// This is the straight forward unoptimized implementation of ScaleAndTranslate
+// We use this to confirm that the optimized version is almost identical. The
+// only difference will be small floating point differences, since this version
+// does not to separable passes in x and y dimensions.
+void ScaleAndTranslateBaseline(const DynamicKernel& kernel,
+                               TTypes<float, 4>::Tensor images,
+                               const Vector2f& orig_scale,
+                               const Vector2f& orig_translate,
+                               TTypes<float, 4>::Tensor output) {
+  const Vector2f scale(1.0f / orig_scale[0], 1.0f / orig_scale[1]);
+  const Vector2f translate(-orig_translate[0] / orig_scale[0],
+                           -orig_translate[1] / orig_scale[1]);
+
+  const int batch = images.dimension(0);
+  const int channels = images.dimension(3);
+
+  ASSERT_EQ(batch, output.dimension(0));
+  ASSERT_EQ(channels, output.dimension(3));
+
+  const int64 out_height = output.dimension(1);
+  const int64 out_width = output.dimension(2);
+
+  for (int b = 0; b < batch; ++b) {
+    for (int64 y = 0; y < out_height; ++y) {
+      const float out_y_f = static_cast<float>(y) + 0.5;
+      const float in_y_f = out_y_f * scale.y() + translate.y();
+      for (int64 x = 0; x < out_width; ++x) {
+        const float out_x_f = static_cast<float>(x) + 0.5;
+        const float in_x_f = out_x_f * scale.x() + translate.x();
+        Sample(kernel, images, b, scale, Vector2f(in_x_f, in_y_f),
+               &output(b, y, x, 0));
+      }
+    }
+  }
+}
+
+class ScaleAndTranslateOpTest : public OpsTestBase {
+ protected:
+  void CreateOp(const string& kernel_type_str = "lanczos3") {
+    TF_EXPECT_OK(NodeDefBuilder("scale_and_translate_op", "ScaleAndTranslate")
+                     .Input(FakeInput(DT_FLOAT))
+                     .Input(FakeInput(DT_INT32))
+                     .Input(FakeInput(DT_FLOAT))
+                     .Input(FakeInput(DT_FLOAT))
+                     .Attr("kernel_type", kernel_type_str)
+                     .Finalize(node_def()));
+    TF_EXPECT_OK(InitOp());
+    kernel_type_ = functor::SamplingKernelTypeFromString(kernel_type_str);
+  }
+
+  void SetCheckerboardImageInput(int batch_size, int num_row_squares,
+                                 int num_col_squares, int square_size,
+                                 int num_channels) {
+    inputs_.clear();
+    std::vector<float> data;
+    const int64 row_size = num_col_squares * square_size * num_channels;
+    const int64 image_size = num_row_squares * square_size * row_size;
+    data.resize(batch_size * image_size);
+    random::PhiloxRandom philox(42);
+    random::SimplePhilox rnd(&philox);
+    std::vector<float> col(num_channels);
+    for (int b = 0; b < batch_size; ++b) {
+      for (int y = 0; y < num_row_squares; ++y) {
+        for (int x = 0; x < num_col_squares; ++x) {
+          for (int n = 0; n < num_channels; ++n) {
+            col[n] = rnd.RandFloat();
+          }
+          for (int r = y * square_size; r < (y + 1) * square_size; ++r) {
+            auto it = data.begin() + b * image_size + r * row_size +
+                      x * square_size * num_channels;
+            for (int n = 0; n < square_size; ++n) {
+              for (int chan = 0; chan < num_channels; ++chan, ++it) {
+                *it = col[chan] * 255.0;
+              }
+            }
+          }
+        }
+      }
+    }
+    AddInputFromArray<float>(
+        TensorShape({batch_size, num_row_squares * square_size,
+                     num_col_squares * square_size, num_channels}),
+        data);
+  }
+
+  void RunTest(int output_image_height, int output_image_width,
+               const Vector2f& scale, const Vector2f& translate) {
+    AddInputFromArray<int32>(TensorShape({2}),
+                             {output_image_height, output_image_width});
+    AddInputFromArray<float>(TensorShape({2}), {scale[1], scale[0]});
+    AddInputFromArray<float>(TensorShape({2}), {translate[1], translate[0]});
+    Status s = RunOpKernel();
+    const int batch_size = GetOutput(0)->dim_size(0);
+    const int channels = GetOutput(0)->dim_size(3);
+    Tensor expected(allocator(), DT_FLOAT,
+                    TensorShape({batch_size, output_image_height,
+                                 output_image_width, channels}));
+
+    std::unique_ptr<const DynamicKernel> kernel = Create(kernel_type_);
+    ScaleAndTranslateBaseline(*kernel, mutable_input(0)->tensor<float, 4>(),
+                              scale, translate, expected.tensor<float, 4>());
+    constexpr double kAbs = 1e-2f;
+    test::ExpectTensorNear<float>(expected, *GetOutput(0), kAbs);
+  }
+
+  functor::SamplingKernelType kernel_type_;
+};
+
+TEST_F(ScaleAndTranslateOpTest, IdentityTest) {
+  CreateOp();
+  constexpr int64 kBatchSize = 2;
+  constexpr int64 kNumRowSquares = 16;
+  constexpr int64 kNumColSquares = 13;
+  constexpr int64 kSquareSize = 12;
+  constexpr int64 kNumChannels = 3;
+  SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
+                            kSquareSize, kNumChannels);
+  constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize;
+  constexpr int kOutputImageWidth = kNumColSquares * kSquareSize;
+  const Vector2f kScale(1.0f, 1.0f);
+  const Vector2f kTranslate(0.0f, 0.0f);
+  RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
+}
+
+TEST_F(ScaleAndTranslateOpTest, UpsampleTest) {
+  CreateOp();
+  constexpr int64 kBatchSize = 2;
+  constexpr int64 kNumRowSquares = 16;
+  constexpr int64 kNumColSquares = 13;
+  constexpr int64 kSquareSize = 12;
+  constexpr int64 kNumChannels = 3;
+  SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
+                            kSquareSize, kNumChannels);
+  constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize * 2;
+  constexpr int kOutputImageWidth = kNumColSquares * kSquareSize * 2;
+  const Vector2f kScale(2.0f, 2.0f);
+  const Vector2f kTranslate(0.0f, 0.0f);
+  RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
+}
+
+TEST_F(ScaleAndTranslateOpTest, DownsampleTest) {
+  CreateOp();
+  constexpr int64 kBatchSize = 2;
+  constexpr int64 kNumRowSquares = 16;
+  constexpr int64 kNumColSquares = 13;
+  constexpr int64 kSquareSize = 12;
+  constexpr int64 kNumChannels = 3;
+  SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
+                            kSquareSize, kNumChannels);
+  constexpr int kOutputImageHeight = kNumRowSquares * kSquareSize / 2;
+  constexpr int kOutputImageWidth = kNumColSquares * kSquareSize / 2;
+  const Vector2f kScale(0.5f, 0.5f);
+  const Vector2f kTranslate(0.0f, 0.0f);
+  RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
+}
+
+TEST_F(ScaleAndTranslateOpTest, DownsampleToASinglePixelTest) {
+  CreateOp();
+  constexpr int64 kBatchSize = 2;
+  constexpr int64 kNumRowSquares = 16;
+  constexpr int64 kNumColSquares = 13;
+  constexpr int64 kSquareSize = 12;
+  constexpr int64 kNumChannels = 3;
+  SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
+                            kSquareSize, kNumChannels);
+  constexpr int kOutputImageHeight = 1;
+  constexpr int kOutputImageWidth = 1;
+  const Vector2f kScale(1.0f / (kNumRowSquares * kSquareSize),
+                        1.0f / (kNumColSquares * kSquareSize));
+  const Vector2f kTranslate(0.0f, 0.0f);
+  RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
+}
+
+TEST_F(ScaleAndTranslateOpTest, UsampleFromASinglePixelTest) {
+  CreateOp();
+  constexpr int64 kBatchSize = 2;
+  constexpr int64 kNumRowSquares = 1;
+  constexpr int64 kNumColSquares = 1;
+  constexpr int64 kSquareSize = 1;
+  constexpr int64 kNumChannels = 3;
+  SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
+                            kSquareSize, kNumChannels);
+  constexpr int kOutputImageHeight = 10;
+  constexpr int kOutputImageWidth = 17;
+  const Vector2f kScale(17.0f, 10.0f);
+  const Vector2f kTranslate(0.0f, 0.0f);
+  RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
+}
+
+TEST_F(ScaleAndTranslateOpTest, ScaleAndTranslationTest) {
+  CreateOp();
+  constexpr int64 kBatchSize = 2;
+  constexpr int64 kNumRowSquares = 11;
+  constexpr int64 kNumColSquares = 7;
+  constexpr int64 kSquareSize = 5;
+  constexpr int64 kNumChannels = 3;
+  SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
+                            kSquareSize, kNumChannels);
+  constexpr int kOutputImageHeight = 49;
+  constexpr int kOutputImageWidth = 51;
+  const Vector2f kScale(1.1f, 0.9f);
+  const Vector2f kTranslate(4.1f, -3.1f);
+  RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
+}
+
+TEST_F(ScaleAndTranslateOpTest, TestKernelTypes) {
+  const std::vector<string> kKernelTypes = {
+      "lanczos1", "lanczos3",  "lanczos5",     "box",
+      "triangle", "keyscubic", "mitchellcubic"};
+  for (const string& kernel_type : kKernelTypes) {
+    CreateOp(kernel_type);
+    constexpr int64 kBatchSize = 2;
+    constexpr int64 kNumRowSquares = 10;
+    constexpr int64 kNumColSquares = 11;
+    constexpr int64 kSquareSize = 1;
+    constexpr int64 kNumChannels = 3;
+    SetCheckerboardImageInput(kBatchSize, kNumRowSquares, kNumColSquares,
+                              kSquareSize, kNumChannels);
+    constexpr int kOutputImageHeight = 9;
+    constexpr int kOutputImageWidth = 11;
+    const Vector2f kScale(1.9f, 1.9f);
+    const Vector2f kTranslate(0.3f, 2.1f);
+    RunTest(kOutputImageHeight, kOutputImageWidth, kScale, kTranslate);
+  }
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index 5427275284..ee8b1e58d6 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -174,6 +174,19 @@ REGISTER_OP("ResizeBilinear")
     .Attr("align_corners: bool = false")
     .SetShapeFn(ResizeShapeFn);
 
+// --------------------------------------------------------------------------
+REGISTER_OP("ScaleAndTranslate")
+    .Input("images: T")
+    .Input("size: int32")
+    .Input("scale: float")
+    .Input("translation: float")
+    .Output("resized_images: float")
+    .Attr(
+        "T: {int8, uint8, int16, uint16, int32, int64, bfloat16, half, "
+        "float, double}")
+    .Attr("kernel_type: string = 'lanczos3'")
+    .SetShapeFn(ResizeShapeFn);
+
 // --------------------------------------------------------------------------
 REGISTER_OP("QuantizedResizeBilinear")
     .Input("images: T")
@@ -208,6 +221,20 @@ REGISTER_OP("ResizeBilinearGrad")
       return Status::OK();
     });
 
+// --------------------------------------------------------------------------
+REGISTER_OP("ScaleAndTranslateGrad")
+    .Input("grads: T")
+    .Input("original_image: T")
+    .Input("scale: float")
+    .Input("translation: float")
+    .Output("output: T")
+    .Attr("T: {float}")
+    .Attr("kernel_type: string = 'lanczos3'")
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(1));
+      return Status::OK();
+    });
+
 // --------------------------------------------------------------------------
 REGISTER_OP("ResizeNearestNeighbor")
     .Input("images: T")
-- 
GitLab


From f02f163f7227deee4d0613f7b0501fba117e8507 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 12:15:05 -0800
Subject: [PATCH 836/873] Add an `iterations` setter to Optimizer V2.

PiperOrigin-RevId: 226213241
---
 tensorflow/python/keras/models_test.py        |  4 +++
 .../python/keras/optimizer_v2/optimizer_v2.py | 27 +++++++++++++------
 .../keras/optimizer_v2/optimizer_v2_test.py   | 15 +++++++++++
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index b497a9fc55..0a5f9a7bea 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -631,6 +631,10 @@ class TestCloneAndBuildModel(test.TestCase):
   def test_replace_keras_optimizer_iterations_variable(self):
     self.assert_optimizer_iterations_increases('adam')
 
+  def test_replace_keras_optimizer_v2_iterations_variable(self):
+    self.assert_optimizer_iterations_increases(
+        keras.optimizer_v2.adam.Adam(0.01))
+
   def test_clone_and_build_sequential_model_without_inputs_defined(self):
     with self.cached_session():
       model = sequential_model(False, False)
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index ed780cb50d..d0f16f0b4f 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -169,6 +169,7 @@ class OptimizerV2(checkpointable.CheckpointableBase):
     self._slots = {}
     self._slot_names = []
     self._weights = []
+    self._iterations = None
 
     # For implementing Checkpointable. Stores information about how to restore
     # slot variables which have not yet been created
@@ -463,14 +464,15 @@ class OptimizerV2(checkpointable.CheckpointableBase):
   def _prepare(self):
     if self._prepared:
       return
-    with ops.device("cpu:0"):
-      self._iterations = self.add_weight(
-          "iter",
-          shape=[],
-          dtype=dtypes.int64,
-          trainable=False,
-          aggregation=tf_variables.VariableAggregation.ONLY_FIRST_REPLICA)
-      self._weights.append(self._iterations)
+    if self._iterations is None:
+      with ops.device("cpu:0"):
+        self._iterations = self.add_weight(
+            "iter",
+            shape=[],
+            dtype=dtypes.int64,
+            trainable=False,
+            aggregation=tf_variables.VariableAggregation.ONLY_FIRST_REPLICA)
+        self._weights.append(self._iterations)
     for name, value in self._hyper.items():
       if isinstance(value, ops.Tensor) or callable(value):
         pass
@@ -485,10 +487,19 @@ class OptimizerV2(checkpointable.CheckpointableBase):
 
   @property
   def iterations(self):
+    """Variable. The number of training steps this Optimizer has run."""
     if not self._prepared:
       self._prepare()
     return self._iterations
 
+  @iterations.setter
+  def iterations(self, variable):
+    if self._prepared:
+      raise RuntimeError("Cannot set `iterations` to a new Variable after"
+                         "the Optimizer weights have been created")
+    self._iterations = variable
+    self._weights.append(self._iterations)
+
   def _decayed_lr(self, var_dtype):
     """Get decayed learning rate as a Tensor with dtype=var_dtype."""
     lr_t = self._get_hyper("learning_rate", var_dtype)
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
index 8b2865e2aa..42f9fcaea8 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2_test.py
@@ -52,6 +52,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.training import momentum
+from tensorflow.python.training import training_util
 
 
 class OptimizerTest(test.TestCase):
@@ -468,6 +469,20 @@ class OptimizerTest(test.TestCase):
     self.assertAllClose(
         float(backend.get_value(model.optimizer.lr)), 0.01, atol=1e-4)
 
+  def testOptimizerSetIterations(self):
+    global_step = training_util.get_or_create_global_step()
+    opt = adam.Adam(learning_rate=1.0)
+    opt.iterations = global_step
+    var = resource_variable_ops.ResourceVariable([1.0, 2.0],
+                                                 dtype=dtypes.float32)
+    loss = lambda: 3 * var
+    opt_op = opt.minimize(loss, [var])
+    self.evaluate(variables.global_variables_initializer())
+    init_step_value = self.evaluate(global_step)
+    self.evaluate(opt_op)
+    new_step_value = self.evaluate(global_step)
+    self.assertEqual(new_step_value, init_step_value + 1)
+
 
 class OptimizersCompatibilityTest(test.TestCase, parameterized.TestCase):
 
-- 
GitLab


From 5df47b8808bf29d9b9ae9d7a56448dd70a5687be Mon Sep 17 00:00:00 2001
From: James Keeling <jtkeeling@google.com>
Date: Wed, 19 Dec 2018 12:17:37 -0800
Subject: [PATCH 837/873] Fix memory leak in Go API.

The code previously wrapped a C pointer without setting the enclosing TensorHandle's finalizer. I add a new method, newTensorHandleFromC, to do this.

PiperOrigin-RevId: 226213607
---
 tensorflow/go/tensor_handle.go | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tensorflow/go/tensor_handle.go b/tensorflow/go/tensor_handle.go
index befc1c43ba..3b06773dd1 100644
--- a/tensorflow/go/tensor_handle.go
+++ b/tensorflow/go/tensor_handle.go
@@ -59,6 +59,13 @@ func (th *TensorHandle) finalizer() {
 	C.TFE_DeleteTensorHandle(th.c)
 }
 
+// newTensorHandleFromC takes ownership of c and returns the owning TensorHandle.
+func newTensorHandleFromC(c *C.TFE_TensorHandle) *TensorHandle {
+	th := &TensorHandle{c: c}
+	runtime.SetFinalizer(th, (*TensorHandle).finalizer)
+	return th
+}
+
 // DataType returns the TensorHandle's datatype.
 func (th *TensorHandle) DataType() DataType {
 	return DataType(C.TFE_TensorHandleDataType(th.c))
@@ -150,5 +157,5 @@ func (th *TensorHandle) CopyToDevice(c *Context, deviceName string) (*TensorHand
 	if err := status.Err(); err != nil {
 		return nil, err
 	}
-	return &TensorHandle{c: newTh}, nil
+	return newTensorHandleFromC(newTh), nil
 }
-- 
GitLab


From 81e98fcb01d7abd40340be087f57ff543ab5012c Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Wed, 19 Dec 2018 12:20:47 -0800
Subject: [PATCH 838/873] Refactoring the test to grow correctness (numerical)
 test to a separated file.

PiperOrigin-RevId: 226214083
---
 tensorflow/contrib/distribute/python/BUILD    |  34 ++
 .../python/keras_correctness_test.py          | 362 ++++++++++++++++++
 .../contrib/distribute/python/keras_test.py   | 282 --------------
 3 files changed, 396 insertions(+), 282 deletions(-)
 create mode 100644 tensorflow/contrib/distribute/python/keras_correctness_test.py

diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD
index 3a738efe3c..d2fb878f96 100644
--- a/tensorflow/contrib/distribute/python/BUILD
+++ b/tensorflow/contrib/distribute/python/BUILD
@@ -635,6 +635,40 @@ cuda_py_test(
     ],
 )
 
+py_library(
+    name = "keras_correctness_test_lib",
+    testonly = 1,
+    srcs = ["keras_correctness_test.py"],
+    deps = [
+        ":combinations",
+        "//tensorflow/contrib/distribute/python:mirrored_strategy",
+        "//tensorflow/contrib/distribute/python:tpu_strategy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:training",
+        "//tensorflow/python/eager:test",
+        "//tensorflow/python/estimator:estimator_py",
+        "//tensorflow/python/keras",
+        "//third_party/py/numpy",
+        "@absl_py//absl/testing:parameterized",
+    ],
+)
+
+cuda_py_test(
+    name = "keras_correctness_test",
+    srcs = ["keras_correctness_test.py"],
+    additional_deps = [
+        ":keras_correctness_test_lib",
+    ],
+    shard_count = 16,
+    tags = [
+        "multi_and_single_gpu",
+        "no_oss",  # TODO(b/117919883): Fix python error.
+        "no_pip",
+        "no_windows_gpu",
+        "notsan",
+    ],
+)
+
 py_library(
     name = "metrics_v1_test_lib",
     testonly = 1,
diff --git a/tensorflow/contrib/distribute/python/keras_correctness_test.py b/tensorflow/contrib/distribute/python/keras_correctness_test.py
new file mode 100644
index 0000000000..e078731610
--- /dev/null
+++ b/tensorflow/contrib/distribute/python/keras_correctness_test.py
@@ -0,0 +1,362 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Correctness tests for tf.keras using DistributionStrategy."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.contrib.distribute.python import combinations
+from tensorflow.contrib.distribute.python import mirrored_strategy
+from tensorflow.contrib.distribute.python import tpu_strategy
+from tensorflow.python import keras
+from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.distribute import distribute_lib
+from tensorflow.python.eager import test
+from tensorflow.python.framework import random_seed
+from tensorflow.python.keras.engine import distributed_training_utils
+from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras
+from tensorflow.python.training import gradient_descent
+
+_RANDOM_SEED = 1337
+
+# Note: Please make sure the tests in this file are also covered in
+# keras_backward_compat_test for features that are supported with both APIs.
+
+
+def batch_wrapper(dataset, batch_size, distribution, repeat=None):
+  if repeat:
+    dataset = dataset.repeat(repeat)
+  # TPUs currently require fully defined input shapes, drop_remainder ensures
+  # the input will have fully defined shapes.
+  if isinstance(distribution, tpu_strategy.TPUStrategy):
+    return dataset.batch(batch_size, drop_remainder=True)
+  else:
+    return dataset.batch(batch_size)
+
+
+def get_correctness_test_inputs(use_numpy, use_validation_data,
+                                with_distribution,
+                                x_train, y_train, x_predict):
+  """Generates the inputs for correctness check when enable Keras with DS."""
+  training_epochs = 2
+  global_batch_size = 64
+  batch_size = global_batch_size
+  # TODO(b/118776054): Use global batch size for Keras/DS support.
+  use_per_core_batch_size = (
+      with_distribution and
+      not distributed_training_utils.global_batch_size_supported(
+          with_distribution))
+  if use_per_core_batch_size:
+    batch_size //= with_distribution.num_replicas_in_sync
+
+  if use_numpy:
+    training_inputs = {
+        'batch_size': batch_size,
+        'x': x_train,
+        'y': y_train,
+        'epochs': training_epochs,
+        'shuffle': False,
+    }
+
+    if use_validation_data:
+      eval_inputs = None
+      training_inputs['validation_data'] = (x_train, y_train)
+    else:
+      eval_inputs = {
+          'batch_size': batch_size,
+          'x': x_train,
+          'y': y_train,
+      }
+    predict_inputs = {
+        'x': np.array(x_predict, dtype=np.float32),
+    }
+  else:
+    # For dataset inputs, we do not pass batch_size to
+    # keras.fit/evaluate/predict. The batch size is part of the dataset.
+    train_dataset = dataset_ops.Dataset.from_tensor_slices(
+        (x_train, y_train))
+    x = batch_wrapper(
+        train_dataset, batch_size, with_distribution, repeat=training_epochs)
+
+    training_inputs = {
+        'batch_size': None,
+        'x': x,
+        'y': None,
+        'epochs': training_epochs,
+        'shuffle': False,
+        'steps_per_epoch': len(x_train) // global_batch_size,
+    }
+    if use_validation_data:
+      eval_inputs = None  # Remove the eval_inputs
+      eval_dataset = dataset_ops.Dataset.from_tensor_slices(
+          (x_train, y_train))
+      x = batch_wrapper(eval_dataset, batch_size, with_distribution)
+      training_inputs['validation_data'] = x
+      training_inputs['validation_steps'] = 5
+    else:
+      eval_inputs = {
+          'batch_size': None,
+          'x': x,
+          'y': None,
+          'steps': 20,
+      }
+
+    predict_batch_size = len(x_predict)
+    if use_per_core_batch_size:
+      predict_batch_size //= with_distribution.num_replicas_in_sync
+    predict_dataset = dataset_ops.Dataset.from_tensor_slices(x_predict)
+    predict_dataset = batch_wrapper(predict_dataset,
+                                    predict_batch_size, with_distribution)
+    predict_inputs = {
+        'steps': 1,
+        'x': predict_dataset,
+    }
+
+  return training_inputs, eval_inputs, predict_inputs
+
+
+strategies_minus_tpu = [
+    combinations.default_strategy,
+    combinations.one_device_strategy,
+    combinations.mirrored_strategy_with_gpu_and_cpu,
+    combinations.mirrored_strategy_with_two_gpus,
+    combinations.core_mirrored_strategy_with_gpu_and_cpu,
+    combinations.core_mirrored_strategy_with_two_gpus]
+
+tpu_strategies = [
+    combinations.tpu_strategy,  # steps_per_run=2
+    combinations.tpu_strategy_one_step]
+
+
+def strategy_minus_tpu_combinations():
+  return combinations.combine(
+      distribution=strategies_minus_tpu,
+      mode=['graph', 'eager'])
+
+
+def tpu_strategy_combinations():
+  return combinations.combine(
+      distribution=tpu_strategies,
+      mode=['graph'])
+
+
+def all_strategy_combinations():
+  return strategy_minus_tpu_combinations() + tpu_strategy_combinations()
+
+
+def strategy_and_input_combinations():
+  return (
+      combinations.times(
+          combinations.combine(distribution=strategies_minus_tpu),
+          combinations.combine(mode=['graph'],
+                               use_numpy=[True, False],
+                               use_validation_data=[True, False])
+          + combinations.combine(mode=['eager'],
+                                 use_numpy=[False],
+                                 use_validation_data=[False])) +
+      combinations.times(
+          combinations.combine(distribution=tpu_strategies),
+          combinations.combine(mode=['graph'],
+                               use_numpy=[True, False],
+                               use_validation_data=[True, False])))
+
+
+class TestDistributionStrategyCorrectness(test.TestCase,
+                                          parameterized.TestCase):
+
+  @combinations.generate(all_strategy_combinations())
+  def test_metric_correctness(self, distribution):
+    with self.cached_session():
+      keras.backend.set_image_data_format('channels_last')
+      num_samples = 10000
+
+      x_train = np.random.randint(0, 2, num_samples)
+      x_train = np.reshape(x_train, (num_samples, 1))
+      y_train = x_train
+      x_train = x_train.astype('float32')
+      y_train = y_train.astype('float32')
+
+      # Create identity model.
+      with distribution.scope():
+        model = keras.Sequential()
+        model.add(
+            keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones'))
+        model.compile(
+            loss=keras.losses.mean_squared_error,
+            optimizer=gradient_descent.GradientDescentOptimizer(0.5),
+            metrics=[keras.metrics.BinaryAccuracy()])
+
+      batch_size = 64
+      if not distributed_training_utils.global_batch_size_supported(
+          distribution):
+        batch_size //= distribution.num_replicas_in_sync
+      train_dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
+      train_dataset = batch_wrapper(train_dataset, batch_size, distribution)
+
+      history = model.fit(x=train_dataset, epochs=2, steps_per_epoch=10)
+      self.assertEqual(history.history['binary_accuracy'], [1.0, 1.0])
+
+  @combinations.generate(all_strategy_combinations())
+  def test_eval_metrics_correctness(self, distribution):
+    with self.cached_session():
+      with distribution.scope():
+        model = keras.Sequential()
+        model.add(
+            keras.layers.Dense(
+                3, activation='relu', input_dim=4, kernel_initializer='ones'))
+        model.add(
+            keras.layers.Dense(
+                1, activation='sigmoid', kernel_initializer='ones'))
+        model.compile(
+            loss='mae',
+            metrics=['accuracy', keras.metrics.BinaryAccuracy()],
+            optimizer=gradient_descent.GradientDescentOptimizer(0.001))
+
+      # verify correctness of stateful and stateless metrics.
+      x = np.ones((100, 4)).astype('float32')
+      y = np.ones((100, 1)).astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
+      dataset = batch_wrapper(dataset, 4, distribution)
+      outs = model.evaluate(dataset, steps=10)
+      self.assertEqual(outs[1], 1.)
+      self.assertEqual(outs[2], 1.)
+
+      y = np.zeros((100, 1)).astype('float32')
+      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
+      dataset = batch_wrapper(dataset, 4, distribution)
+      outs = model.evaluate(dataset, steps=10)
+      self.assertEqual(outs[1], 0.)
+      self.assertEqual(outs[2], 0.)
+
+  @combinations.generate(strategy_and_input_combinations())
+  def test_correctness(self, distribution, use_numpy, use_validation_data):
+    with self.cached_session():
+      default_tolerance = 1e-5
+      tol_table = {}
+
+      if isinstance(distribution, (
+          mirrored_strategy.MirroredStrategy,
+          mirrored_strategy.CoreMirroredStrategy,
+          distribute_lib._DefaultDistributionStrategy)):  # pylint: disable=protected-access
+        # TODO(b/119257215): Weights are not exactly the same, so use larger
+        # tolerance for now. Predict should be related to weights.
+        tol_table = {
+            'weights_1': 1e-4,
+            'weights_2': 1e-4,
+            'predict_result_1': 1e-4,
+        }
+
+      keras.backend.set_image_data_format('channels_last')
+      np.random.seed(_RANDOM_SEED)
+      random_seed.set_random_seed(_RANDOM_SEED)
+
+      # Train, eval, and predict datasets are created with the same input numpy
+      # arrays.
+      # TODO(xiejw): Change this back to 10000, once we support final partial
+      # batch.
+      num_samples = 9984
+      x_train = np.random.rand(num_samples, 1)
+      y_train = 3 * x_train
+      x_train = x_train.astype('float32')
+      y_train = y_train.astype('float32')
+      x_predict = [[1.], [2.], [3.], [4.]]
+
+      # The model is built once and the initial weights are saved.
+      # This is used to initialize the model for both the distribution and
+      # non-distribution run. In addition, we add few non-linear layers to make
+      # it non-trivial.
+      def _create_model():
+        model = keras.Sequential()
+        model.add(keras.layers.Dense(10, activation='relu', input_shape=(1,)))
+        model.add(keras.layers.Dense(10, activation='relu'))
+        model.add(keras.layers.Dense(10, activation='relu'))
+        model.add(keras.layers.Dense(1))
+        return model
+
+      model = _create_model()
+      initial_weights = model.get_weights()
+      del model  # avoid accident usage.
+
+      def _build_and_compile_model():
+        # We have initialized the model to the same weight for the distribution
+        # and non-distribution run.
+        model = _create_model()
+        model.set_weights(initial_weights)
+        model.compile(
+            loss=keras.losses.mean_squared_error,
+            optimizer=gradient_descent_keras.SGD(0.5),
+            metrics=['mse'])
+        return model
+
+      def fit_eval_and_predict(with_distribution=None):
+        if with_distribution:
+          with with_distribution.scope():
+            model = _build_and_compile_model()
+        else:
+          model = _build_and_compile_model()
+
+        training_inputs, eval_inputs, predict_inputs = (
+            get_correctness_test_inputs(use_numpy, use_validation_data,
+                                        with_distribution,
+                                        x_train, y_train, x_predict))
+
+        result = {}
+        result['training_history_1'] = model.fit(**training_inputs).history
+
+        if eval_inputs is not None:
+          result['eval_result_1'] = model.evaluate(**eval_inputs)
+
+        result['weights_1'] = model.get_weights()
+        result['predict_result_1'] = model.predict(**predict_inputs)
+
+        # Train and eval again to mimic user's flow.
+
+        result['training_history_2'] = model.fit(**training_inputs).history
+
+        if eval_inputs is not None:
+          result['eval_result_2'] = model.evaluate(**eval_inputs)
+
+        result['weights_2'] = model.get_weights()
+
+        return result
+
+      results_with_ds = fit_eval_and_predict(with_distribution=distribution)
+      results_without_ds = fit_eval_and_predict(with_distribution=None)
+
+      # Verify that the weights, training history, eval results, predict outputs
+      # are the same within some limits of tolerance.
+      for key in results_with_ds:
+        if (key.startswith('training_history') and
+            isinstance(distribution, tpu_strategy.TPUStrategy) and
+            distribution.extended.steps_per_run > 1):
+          # TODO(b/119894254): Enable this test for all cases once the
+          # underlying bug is fixed.
+          continue
+
+        tolerance = tol_table.get(key, default_tolerance)
+
+        self.assertAllClose(
+            results_with_ds[key],
+            results_without_ds[key],
+            atol=tolerance,
+            rtol=tolerance,
+            msg='Fail to assert {}.'.format(key))
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py
index ece8d66887..84e9aea228 100644
--- a/tensorflow/contrib/distribute/python/keras_test.py
+++ b/tensorflow/contrib/distribute/python/keras_test.py
@@ -26,14 +26,12 @@ from tensorflow.contrib.distribute.python import mirrored_strategy
 from tensorflow.contrib.distribute.python import tpu_strategy
 from tensorflow.python import keras
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import distribute_lib
 from tensorflow.python.distribute import values
 from tensorflow.python.eager import test
 from tensorflow.python.estimator import keras as keras_lib
 from tensorflow.python.estimator import run_config as run_config_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.engine import distributed_training_utils
@@ -218,87 +216,6 @@ def multi_input_output_model():
   return model
 
 
-def get_correctness_test_inputs(use_numpy, use_validation_data,
-                                with_distribution,
-                                x_train, y_train, x_predict):
-  """Generates the inputs for correctness check when enable Keras with DS."""
-  training_epochs = 2
-  global_batch_size = 64
-  batch_size = global_batch_size
-  # TODO(b/118776054): Use global batch size for Keras/DS support.
-  use_per_core_batch_size = (
-      with_distribution and
-      not distributed_training_utils.global_batch_size_supported(
-          with_distribution))
-  if use_per_core_batch_size:
-    batch_size //= with_distribution.num_replicas_in_sync
-
-  if use_numpy:
-    training_inputs = {
-        'batch_size': batch_size,
-        'x': x_train,
-        'y': y_train,
-        'epochs': training_epochs,
-        'shuffle': False,
-    }
-
-    if use_validation_data:
-      eval_inputs = None
-      training_inputs['validation_data'] = (x_train, y_train)
-    else:
-      eval_inputs = {
-          'batch_size': batch_size,
-          'x': x_train,
-          'y': y_train,
-      }
-    predict_inputs = {
-        'x': np.array(x_predict, dtype=np.float32),
-    }
-  else:
-    # For dataset inputs, we do not pass batch_size to
-    # keras.fit/evaluate/predict. The batch size is part of the dataset.
-    train_dataset = dataset_ops.Dataset.from_tensor_slices(
-        (x_train, y_train))
-    x = batch_wrapper(
-        train_dataset, batch_size, with_distribution, repeat=training_epochs)
-
-    training_inputs = {
-        'batch_size': None,
-        'x': x,
-        'y': None,
-        'epochs': training_epochs,
-        'shuffle': False,
-        'steps_per_epoch': len(x_train) // global_batch_size,
-    }
-    if use_validation_data:
-      eval_inputs = None  # Remove the eval_inputs
-      eval_dataset = dataset_ops.Dataset.from_tensor_slices(
-          (x_train, y_train))
-      x = batch_wrapper(eval_dataset, batch_size, with_distribution)
-      training_inputs['validation_data'] = x
-      training_inputs['validation_steps'] = 5
-    else:
-      eval_inputs = {
-          'batch_size': None,
-          'x': x,
-          'y': None,
-          'steps': 20,
-      }
-
-    predict_batch_size = len(x_predict)
-    if use_per_core_batch_size:
-      predict_batch_size //= with_distribution.num_replicas_in_sync
-    predict_dataset = dataset_ops.Dataset.from_tensor_slices(x_predict)
-    predict_dataset = batch_wrapper(predict_dataset,
-                                    predict_batch_size, with_distribution)
-    predict_inputs = {
-        'steps': 1,
-        'x': predict_dataset,
-    }
-
-  return training_inputs, eval_inputs, predict_inputs
-
-
 strategies_minus_tpu = [
     combinations.default_strategy,
     combinations.one_device_strategy,
@@ -339,23 +256,6 @@ def strategy_and_optimizer_combinations():
                      combinations.rmsprop_optimizer_v1_fn]))
 
 
-def strategy_and_input_combinations():
-  return (
-      combinations.times(
-          combinations.combine(distribution=strategies_minus_tpu),
-          combinations.combine(mode=['graph'],
-                               use_numpy=[True, False],
-                               use_validation_data=[True, False])
-          + combinations.combine(mode=['eager'],
-                                 use_numpy=[False],
-                                 use_validation_data=[False])) +
-      combinations.times(
-          combinations.combine(distribution=tpu_strategies),
-          combinations.combine(mode=['graph'],
-                               use_numpy=[True, False],
-                               use_validation_data=[True, False])))
-
-
 def strategy_for_numpy_input_combinations():
   return combinations.combine(
       distribution=strategies_minus_tpu + tpu_strategies,
@@ -1248,187 +1148,5 @@ class TestDistributionStrategyWithNormalizationLayer(
       np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
 
 
-class TestDistributionStrategyCorrectness(test.TestCase,
-                                          parameterized.TestCase):
-
-  @combinations.generate(all_strategy_combinations())
-  def test_metric_correctness(self, distribution):
-    with self.cached_session():
-      keras.backend.set_image_data_format('channels_last')
-      num_samples = 10000
-
-      x_train = np.random.randint(0, 2, num_samples)
-      x_train = np.reshape(x_train, (num_samples, 1))
-      y_train = x_train
-      x_train = x_train.astype('float32')
-      y_train = y_train.astype('float32')
-
-      # Create identity model.
-      with distribution.scope():
-        model = keras.Sequential()
-        model.add(
-            keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones'))
-        model.compile(
-            loss=keras.losses.mean_squared_error,
-            optimizer=gradient_descent.GradientDescentOptimizer(0.5),
-            metrics=[keras.metrics.BinaryAccuracy()])
-
-      batch_size = 64
-      if not distributed_training_utils.global_batch_size_supported(
-          distribution):
-        batch_size //= distribution.num_replicas_in_sync
-      train_dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train))
-      train_dataset = batch_wrapper(train_dataset, batch_size, distribution)
-
-      history = model.fit(x=train_dataset, epochs=2, steps_per_epoch=10)
-      self.assertEqual(history.history['binary_accuracy'], [1.0, 1.0])
-
-  @combinations.generate(all_strategy_combinations())
-  def test_eval_metrics_correctness(self, distribution):
-    with self.cached_session():
-      with distribution.scope():
-        model = keras.Sequential()
-        model.add(
-            keras.layers.Dense(
-                3, activation='relu', input_dim=4, kernel_initializer='ones'))
-        model.add(
-            keras.layers.Dense(
-                1, activation='sigmoid', kernel_initializer='ones'))
-        model.compile(
-            loss='mae',
-            metrics=['accuracy', keras.metrics.BinaryAccuracy()],
-            optimizer=gradient_descent.GradientDescentOptimizer(0.001))
-
-      # verify correctness of stateful and stateless metrics.
-      x = np.ones((100, 4)).astype('float32')
-      y = np.ones((100, 1)).astype('float32')
-      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
-      dataset = batch_wrapper(dataset, 4, distribution)
-      outs = model.evaluate(dataset, steps=10)
-      self.assertEqual(outs[1], 1.)
-      self.assertEqual(outs[2], 1.)
-
-      y = np.zeros((100, 1)).astype('float32')
-      dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat()
-      dataset = batch_wrapper(dataset, 4, distribution)
-      outs = model.evaluate(dataset, steps=10)
-      self.assertEqual(outs[1], 0.)
-      self.assertEqual(outs[2], 0.)
-
-  @combinations.generate(strategy_and_input_combinations())
-  def test_correctness(self, distribution, use_numpy, use_validation_data):
-    with self.cached_session():
-      default_tolerance = 1e-5
-      tol_table = {}
-
-      if isinstance(distribution, (
-          mirrored_strategy.MirroredStrategy,
-          mirrored_strategy.CoreMirroredStrategy,
-          distribute_lib._DefaultDistributionStrategy)):  # pylint: disable=protected-access
-        # TODO(b/119257215): Weights are not exactly the same, so use larger
-        # tolerance for now. Predict should be related to weights.
-        tol_table = {
-            'weights_1': 1e-4,
-            'weights_2': 1e-4,
-            'predict_result_1': 1e-4,
-        }
-
-      keras.backend.set_image_data_format('channels_last')
-      np.random.seed(_RANDOM_SEED)
-      random_seed.set_random_seed(_RANDOM_SEED)
-
-      # Train, eval, and predict datasets are created with the same input numpy
-      # arrays.
-      # TODO(xiejw): Change this back to 10000, once we support final partial
-      # batch.
-      num_samples = 9984
-      x_train = np.random.rand(num_samples, 1)
-      y_train = 3 * x_train
-      x_train = x_train.astype('float32')
-      y_train = y_train.astype('float32')
-      x_predict = [[1.], [2.], [3.], [4.]]
-
-      # The model is built once and the initial weights are saved.
-      # This is used to initialize the model for both the distribution and
-      # non-distribution run. In addition, we add few non-linear layers to make
-      # it non-trivial.
-      def _create_model():
-        model = keras.Sequential()
-        model.add(keras.layers.Dense(10, activation='relu', input_shape=(1,)))
-        model.add(keras.layers.Dense(10, activation='relu'))
-        model.add(keras.layers.Dense(10, activation='relu'))
-        model.add(keras.layers.Dense(1))
-        return model
-
-      model = _create_model()
-      initial_weights = model.get_weights()
-      del model  # avoid accident usage.
-
-      def _build_and_compile_model():
-        # We have initialized the model to the same weight for the distribution
-        # and non-distribution run.
-        model = _create_model()
-        model.set_weights(initial_weights)
-        model.compile(
-            loss=keras.losses.mean_squared_error,
-            optimizer=gradient_descent_keras.SGD(0.5),
-            metrics=['mse'])
-        return model
-
-      def fit_eval_and_predict(with_distribution=None):
-        if with_distribution:
-          with with_distribution.scope():
-            model = _build_and_compile_model()
-        else:
-          model = _build_and_compile_model()
-
-        training_inputs, eval_inputs, predict_inputs = (
-            get_correctness_test_inputs(use_numpy, use_validation_data,
-                                        with_distribution,
-                                        x_train, y_train, x_predict))
-
-        result = {}
-        result['training_history_1'] = model.fit(**training_inputs).history
-
-        if eval_inputs is not None:
-          result['eval_result_1'] = model.evaluate(**eval_inputs)
-
-        result['weights_1'] = model.get_weights()
-        result['predict_result_1'] = model.predict(**predict_inputs)
-
-        # Train and eval again to mimic user's flow.
-
-        result['training_history_2'] = model.fit(**training_inputs).history
-
-        if eval_inputs is not None:
-          result['eval_result_2'] = model.evaluate(**eval_inputs)
-
-        result['weights_2'] = model.get_weights()
-
-        return result
-
-      results_with_ds = fit_eval_and_predict(with_distribution=distribution)
-      results_without_ds = fit_eval_and_predict(with_distribution=None)
-
-      # Verify that the weights, training history, eval results, predict outputs
-      # are the same within some limits of tolerance.
-      for key in results_with_ds:
-        if (key.startswith('training_history') and
-            isinstance(distribution, tpu_strategy.TPUStrategy) and
-            distribution.extended.steps_per_run > 1):
-          # TODO(b/119894254): Enable this test for all cases once the
-          # underlying bug is fixed.
-          continue
-
-        tolerance = tol_table.get(key, default_tolerance)
-
-        self.assertAllClose(
-            results_with_ds[key],
-            results_without_ds[key],
-            atol=tolerance,
-            rtol=tolerance,
-            msg='Fail to assert {}.'.format(key))
-
-
 if __name__ == '__main__':
   test.main()
-- 
GitLab


From 2d1a3052e11a8b5b194c75e4ad441c2fedd900b2 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 19 Dec 2018 12:25:34 -0800
Subject: [PATCH 839/873] Wire TFLite thread count to the eager context

PiperOrigin-RevId: 226214825
---
 tensorflow/lite/delegates/flex/delegate.cc    | 30 ++++++-----
 tensorflow/lite/delegates/flex/delegate.h     |  4 +-
 .../lite/delegates/flex/delegate_data.cc      | 25 ++++++----
 .../lite/delegates/flex/delegate_data.h       | 20 +++++---
 .../lite/delegates/flex/delegate_data_test.cc | 14 +++---
 .../lite/delegates/flex/delegate_test.cc      | 50 +++++++++++++++++++
 tensorflow/lite/delegates/flex/kernel_test.cc | 13 ++---
 .../tools/benchmark/benchmark_tflite_model.cc |  9 +---
 8 files changed, 111 insertions(+), 54 deletions(-)

diff --git a/tensorflow/lite/delegates/flex/delegate.cc b/tensorflow/lite/delegates/flex/delegate.cc
index ca7314fbae..dcf5b795d8 100644
--- a/tensorflow/lite/delegates/flex/delegate.cc
+++ b/tensorflow/lite/delegates/flex/delegate.cc
@@ -30,6 +30,21 @@ namespace flex {
 namespace delegate {
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
+  // If the TensorFlow Lite thread count is explicitly configured, use it,
+  // otherwise rely on the default TensorFlow threading behavior.
+  tensorflow::SessionOptions session_options;
+  if (context->recommended_num_threads > 0) {
+    session_options.config.set_intra_op_parallelism_threads(
+        context->recommended_num_threads);
+  }
+
+  if (!reinterpret_cast<DelegateData*>(delegate->data_)
+           ->Prepare(session_options)
+           .ok()) {
+    context->ReportError(context, "Failed to initialize TensorFlow context.");
+    return kTfLiteError;
+  }
+
   // Get the nodes in the current execution plan. Interpreter owns this array.
   TfLiteIntArray* plan;
   TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
@@ -118,20 +133,11 @@ AcquireFlexDelegate() {
 }
 
 std::unique_ptr<FlexDelegate> FlexDelegate::Create() {
-  std::unique_ptr<flex::DelegateData> delegate_data;
-  if (!flex::DelegateData::Create(&delegate_data).ok()) {
-    fprintf(stderr, "Unable to initialize TensorFlow context.\n");
-    return nullptr;
-  }
-
-  return std::unique_ptr<FlexDelegate>(
-      new FlexDelegate(std::move(delegate_data)));
+  return std::unique_ptr<FlexDelegate>(new FlexDelegate());
 }
 
-FlexDelegate::FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data)
-    : TfLiteDelegate(TfLiteDelegateCreate()),
-      delegate_data_(std::move(delegate_data)) {
-  data_ = delegate_data_.get();
+FlexDelegate::FlexDelegate() : TfLiteDelegate(TfLiteDelegateCreate()) {
+  data_ = &delegate_data_;
   Prepare = &flex::delegate::Prepare;
   CopyFromBufferHandle = &flex::delegate::CopyFromBufferHandle;
   flags = kTfLiteDelegateFlagsAllowDynamicTensors;
diff --git a/tensorflow/lite/delegates/flex/delegate.h b/tensorflow/lite/delegates/flex/delegate.h
index 018ff3e0b0..767cbe13c4 100644
--- a/tensorflow/lite/delegates/flex/delegate.h
+++ b/tensorflow/lite/delegates/flex/delegate.h
@@ -49,9 +49,9 @@ class FlexDelegate : public TfLiteDelegate {
   ~FlexDelegate();
 
  private:
-  explicit FlexDelegate(std::unique_ptr<flex::DelegateData> delegate_data);
+  FlexDelegate();
 
-  std::unique_ptr<flex::DelegateData> delegate_data_;
+  flex::DelegateData delegate_data_;
 };
 
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/flex/delegate_data.cc b/tensorflow/lite/delegates/flex/delegate_data.cc
index 1483a53038..87f3769746 100644
--- a/tensorflow/lite/delegates/flex/delegate_data.cc
+++ b/tensorflow/lite/delegates/flex/delegate_data.cc
@@ -20,29 +20,32 @@ limitations under the License.
 
 namespace tflite {
 namespace flex {
-tensorflow::Status DelegateData::Create(std::unique_ptr<DelegateData>* data) {
+DelegateData::DelegateData() {}
+
+DelegateData::~DelegateData() {}
+
+tensorflow::Status DelegateData::Prepare(
+    const tensorflow::SessionOptions& session_options) {
+  if (eager_context_) {
+    return tensorflow::Status();
+  }
+
   std::vector<std::unique_ptr<tensorflow::Device>> devices;
 
   TF_RETURN_IF_ERROR(tensorflow::DeviceFactory::AddDevices(
-      tensorflow::SessionOptions(), "/job:localhost/replica:0/task:0",
-      &devices));
+      session_options, "/job:localhost/replica:0/task:0", &devices));
 
   std::unique_ptr<tensorflow::DeviceMgr> device_mgr =
       absl::make_unique<tensorflow::DeviceMgr>(std::move(devices));
   // Note that Rendezvous is ref-counted so it will be automatically deleted.
   tensorflow::Rendezvous* rendezvous =
       new tensorflow::IntraProcessRendezvous(device_mgr.get());
-  data->reset(new DelegateData(new tensorflow::EagerContext(
-      tensorflow::SessionOptions(),
+  eager_context_.reset(new tensorflow::EagerContext(
+      session_options,
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
-      /*async=*/false, std::move(device_mgr), rendezvous)));
+      /*async=*/false, std::move(device_mgr), rendezvous));
   return tensorflow::Status();
 }
 
-DelegateData::DelegateData(tensorflow::EagerContext* eager_context)
-    : eager_context_(eager_context) {}
-
-DelegateData::~DelegateData() {}
-
 }  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/flex/delegate_data.h b/tensorflow/lite/delegates/flex/delegate_data.h
index a88cc98d03..20d6b40a5d 100644
--- a/tensorflow/lite/delegates/flex/delegate_data.h
+++ b/tensorflow/lite/delegates/flex/delegate_data.h
@@ -15,21 +15,30 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
 #define TENSORFLOW_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_
 
-#include "tensorflow/lite/delegates/flex/buffer_map.h"
 #include "tensorflow/core/common_runtime/eager/context.h"
+#include "tensorflow/core/public/session_options.h"
+#include "tensorflow/lite/delegates/flex/buffer_map.h"
 
 namespace tflite {
 namespace flex {
 
 // Data kept by the Flex delegate for the lifetime of an Interpreter.
+//
+// Note: This class is *not* thread-safe; any dependent delegates should not be
+// used concurrently.
 class DelegateData {
  public:
-  // Create a new DelegateData, initialized with a newly-created EagerContext.
-  static tensorflow::Status Create(std::unique_ptr<DelegateData>* data);
-
+  DelegateData();
   ~DelegateData();
 
+  // Prepare the necessary EagerContext and data for execution.
+  // This must be called at least once before execution. After preparation
+  // succeeds, redundant calls will be ignored (even if the session_options
+  // differ).
+  tensorflow::Status Prepare(const tensorflow::SessionOptions& session_options);
+
   // The EagerContext that is required for execution of Flex Ops.
+  // Note: The context is lazily created after the first call to |Prepare()|.
   tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); }
 
   // Map from TF Lite tensor index to TensorFlow tensor for a given context.
@@ -38,8 +47,7 @@ class DelegateData {
   }
 
  private:
-  explicit DelegateData(tensorflow::EagerContext* eager_context);
-
+  // Will be null until Prepare() is called and completes successfully.
   std::unique_ptr<tensorflow::EagerContext> eager_context_;
   // TODO(b/112439500): Clean up stale BufferMap instances after adding the
   // necessary cleanup hook from a TfLiteContext to a TfLiteDelegate.
diff --git a/tensorflow/lite/delegates/flex/delegate_data_test.cc b/tensorflow/lite/delegates/flex/delegate_data_test.cc
index cd274e7cb1..22b8e436fb 100644
--- a/tensorflow/lite/delegates/flex/delegate_data_test.cc
+++ b/tensorflow/lite/delegates/flex/delegate_data_test.cc
@@ -24,18 +24,20 @@ namespace flex {
 namespace {
 
 TEST(DelegateDataTest, Basic) {
-  std::unique_ptr<DelegateData> data;
+  DelegateData data;
   // We only check for success because it is hard to make initialization fail.
   // It only happens if we manage to not link the CPU device factory into the
   // binary.
-  EXPECT_TRUE(DelegateData::Create(&data).ok());
+  tensorflow::SessionOptions session_options;
+  session_options.config.set_intra_op_parallelism_threads(2);
+  EXPECT_TRUE(data.Prepare(session_options).ok());
 
   TfLiteContext dummy_context1 = {};
   TfLiteContext dummy_context2 = {};
-  EXPECT_NE(data->GetEagerContext(), nullptr);
-  EXPECT_NE(data->GetBufferMap(&dummy_context1), nullptr);
-  EXPECT_NE(data->GetBufferMap(&dummy_context1),
-            data->GetBufferMap(&dummy_context2));
+  EXPECT_NE(data.GetEagerContext(), nullptr);
+  EXPECT_NE(data.GetBufferMap(&dummy_context1), nullptr);
+  EXPECT_NE(data.GetBufferMap(&dummy_context1),
+            data.GetBufferMap(&dummy_context2));
 }
 
 }  // namespace
diff --git a/tensorflow/lite/delegates/flex/delegate_test.cc b/tensorflow/lite/delegates/flex/delegate_test.cc
index ee37090d94..b48fe181e1 100644
--- a/tensorflow/lite/delegates/flex/delegate_test.cc
+++ b/tensorflow/lite/delegates/flex/delegate_test.cc
@@ -252,6 +252,56 @@ TEST_F(DelegateTest, MultipleInterpretersSameDelegate) {
   }
 }
 
+TEST_F(DelegateTest, SingleThreaded) {
+  AddTensors(9, {0, 3}, {8}, kTfLiteFloat32, {3});
+  AddTfOp(testing::kUnpack, {0}, {1, 2});
+  AddTfOp(testing::kUnpack, {3}, {4, 5});
+  AddTfOp(testing::kAdd, {1, 4}, {6});
+  AddTfOp(testing::kAdd, {2, 5}, {7});
+  AddTfOp(testing::kMul, {6, 7}, {8});
+
+  // Explicitly disable multi-threading before installing the delegate.
+  interpreter_->SetNumThreads(1);
+  ConfigureDelegate();
+
+  SetShape(0, {2, 2, 1});
+  SetValues(0, {1.1f, 2.2f, 3.3f, 4.4f});
+  SetShape(3, {2, 2, 1});
+  SetValues(3, {1.1f, 2.2f, 3.3f, 4.4f});
+
+  // Invocation should behave as expected.
+  ASSERT_TRUE(Invoke());
+
+  ASSERT_THAT(GetShape(8), ElementsAre(2, 1));
+  ASSERT_THAT(GetValues(8), ElementsAre(14.52f, 38.72f));
+  ASSERT_EQ(GetType(8), kTfLiteFloat32);
+}
+
+TEST_F(DelegateTest, MultiThreaded) {
+  AddTensors(9, {0, 3}, {8}, kTfLiteFloat32, {3});
+  AddTfOp(testing::kUnpack, {0}, {1, 2});
+  AddTfOp(testing::kUnpack, {3}, {4, 5});
+  AddTfOp(testing::kAdd, {1, 4}, {6});
+  AddTfOp(testing::kAdd, {2, 5}, {7});
+  AddTfOp(testing::kMul, {6, 7}, {8});
+
+  // Explicitly enable multi-threading before installing the delegate.
+  interpreter_->SetNumThreads(4);
+  ConfigureDelegate();
+
+  SetShape(0, {2, 2, 1});
+  SetValues(0, {1.1f, 2.2f, 3.3f, 4.4f});
+  SetShape(3, {2, 2, 1});
+  SetValues(3, {1.1f, 2.2f, 3.3f, 4.4f});
+
+  // Invocation should behave as expected.
+  ASSERT_TRUE(Invoke());
+
+  ASSERT_THAT(GetShape(8), ElementsAre(2, 1));
+  ASSERT_THAT(GetValues(8), ElementsAre(14.52f, 38.72f));
+  ASSERT_EQ(GetType(8), kTfLiteFloat32);
+}
+
 }  // namespace
 }  // namespace flex
 }  // namespace tflite
diff --git a/tensorflow/lite/delegates/flex/kernel_test.cc b/tensorflow/lite/delegates/flex/kernel_test.cc
index efb7300b0b..cc5c8b32a0 100644
--- a/tensorflow/lite/delegates/flex/kernel_test.cc
+++ b/tensorflow/lite/delegates/flex/kernel_test.cc
@@ -39,20 +39,13 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate,
 class KernelTest : public testing::FlexModelTest {
  public:
   KernelTest() {
-    CHECK(DelegateData::Create(&delegate_data_).ok());
+    CHECK(delegate_data_.Prepare(tensorflow::SessionOptions{}).ok());
     interpreter_.reset(new Interpreter(&error_reporter_));
   }
 
-  ~KernelTest() override {
-    // The data needs to be released before the interpreter because the
-    // interpreter references the data.
-    delegate_data_.reset();
-    interpreter_.reset();
-  }
-
   template <typename T>
   void ConfigureDelegate(T prepare_function) {
-    delegate_.data_ = delegate_data_.get();
+    delegate_.data_ = &delegate_data_;
     delegate_.flags = kTfLiteDelegateFlagsAllowDynamicTensors;
     delegate_.FreeBufferHandle = nullptr;
     delegate_.Prepare = prepare_function;
@@ -71,7 +64,7 @@ class KernelTest : public testing::FlexModelTest {
   }
 
  private:
-  std::unique_ptr<DelegateData> delegate_data_;
+  DelegateData delegate_data_;
   TfLiteDelegate delegate_;
 };
 
diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
index 32cf4e4292..0bc7565e82 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -316,18 +316,13 @@ void BenchmarkTfLiteModel::Init() {
   tflite::ops::builtin::BuiltinOpResolver resolver;
 #endif
 
-  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
+  const int32_t num_threads = params_.Get<int32_t>("num_threads");
+  tflite::InterpreterBuilder(*model, resolver)(&interpreter, num_threads);
   if (!interpreter) {
     TFLITE_LOG(FATAL) << "Failed to construct interpreter";
   }
   profiling_listener_.SetInterpreter(interpreter.get());
 
-  const int32_t num_threads = params_.Get<int32_t>("num_threads");
-
-  if (num_threads != -1) {
-    interpreter->SetNumThreads(num_threads);
-  }
-
   bool use_nnapi = params_.Get<bool>("use_nnapi");
 
   interpreter->UseNNAPI(use_nnapi);
-- 
GitLab


From 80eaba14d115ddef6e47a35c60d4dda446e8d8ff Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 12:27:17 -0800
Subject: [PATCH 840/873] Update ops-related pbtxt files.

PiperOrigin-RevId: 226215094
---
 .../core/ops/compat/ops_history.v1.pbtxt      | 87 +++++++++++++++++++
 tensorflow/core/ops/ops.pbtxt                 | 87 +++++++++++++++++++
 2 files changed, 174 insertions(+)

diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
index 9b7776bbf3..d2e53f0196 100644
--- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt
@@ -58221,6 +58221,93 @@ op {
     }
   }
 }
+op {
+  name: "ScaleAndTranslate"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "translation"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "kernel_type"
+    type: "string"
+    default_value {
+      s: "lanczos3"
+    }
+  }
+}
+op {
+  name: "ScaleAndTranslateGrad"
+  input_arg {
+    name: "grads"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "original_image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "translation"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "kernel_type"
+    type: "string"
+    default_value {
+      s: "lanczos3"
+    }
+  }
+}
 op {
   name: "ScatterAdd"
   input_arg {
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index 1157380b8f..2c0980e203 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -28526,6 +28526,93 @@ op {
     }
   }
 }
+op {
+  name: "ScaleAndTranslate"
+  input_arg {
+    name: "images"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "size"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "translation"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "resized_images"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_INT8
+        type: DT_UINT8
+        type: DT_INT16
+        type: DT_UINT16
+        type: DT_INT32
+        type: DT_INT64
+        type: DT_BFLOAT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "kernel_type"
+    type: "string"
+    default_value {
+      s: "lanczos3"
+    }
+  }
+}
+op {
+  name: "ScaleAndTranslateGrad"
+  input_arg {
+    name: "grads"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "original_image"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "scale"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "translation"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+      }
+    }
+  }
+  attr {
+    name: "kernel_type"
+    type: "string"
+    default_value {
+      s: "lanczos3"
+    }
+  }
+}
 op {
   name: "ScatterAdd"
   input_arg {
-- 
GitLab


From 2fe0e5265cb74926df25552f7533b404ecaf2c3d Mon Sep 17 00:00:00 2001
From: Francois Chollet <fchollet@google.com>
Date: Wed, 19 Dec 2018 12:29:16 -0800
Subject: [PATCH 841/873] Make sure current model type is listed in the stack
 trace for parameterized Keras tests.

PiperOrigin-RevId: 226215395
---
 .../python/keras/keras_parameterized.py       | 26 ++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/keras/keras_parameterized.py b/tensorflow/python/keras/keras_parameterized.py
index f470033b6e..f505ced038 100644
--- a/tensorflow/python/keras/keras_parameterized.py
+++ b/tensorflow/python/keras/keras_parameterized.py
@@ -145,14 +145,34 @@ def run_with_all_model_types(
     @functools.wraps(f)
     def decorated(self, model_type, *args, **kwargs):
       """A run of a single test case w/ the specified model type."""
-      with testing_utils.model_type_scope(model_type):
-        f(self, *args, **kwargs)
-
+      if model_type == 'functional':
+        _test_functional_model_type(f, self, *args, **kwargs)
+      elif model_type == 'subclass':
+        _test_subclass_model_type(f, self, *args, **kwargs)
+      elif model_type == 'sequential':
+        _test_sequential_model_type(f, self, *args, **kwargs)
+      else:
+        raise ValueError('Unknown model type: %s' % (model_type,))
     return decorated
 
   return _test_or_class_decorator(test_or_class, single_method_decorator)
 
 
+def _test_functional_model_type(f, test_or_class, *args, **kwargs):
+  with testing_utils.model_type_scope('functional'):
+    f(test_or_class, *args, **kwargs)
+
+
+def _test_subclass_model_type(f, test_or_class, *args, **kwargs):
+  with testing_utils.model_type_scope('subclass'):
+    f(test_or_class, *args, **kwargs)
+
+
+def _test_sequential_model_type(f, test_or_class, *args, **kwargs):
+  with testing_utils.model_type_scope('sequential'):
+    f(test_or_class, *args, **kwargs)
+
+
 def run_all_keras_modes(
     test_or_class=None,
     config=None,
-- 
GitLab


From 2260d4e7b1a321e44a08c59df17858a5a8b1268a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 12:46:51 -0800
Subject: [PATCH 842/873] Go: Update generated wrapper functions for TensorFlow
 ops. PiperOrigin-RevId: 226217979

---
 tensorflow/go/op/wrappers.go | 294 +++++++++++++++++------------------
 1 file changed, 147 insertions(+), 147 deletions(-)

diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index 4624d12061..52742716f1 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -6049,6 +6049,153 @@ func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...
 	return values
 }
 
+// UnstageAttr is an optional argument to Unstage.
+type UnstageAttr func(optionalAttr)
+
+// UnstageCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func UnstageCapacity(value int64) UnstageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// UnstageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func UnstageMemoryLimit(value int64) UnstageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// UnstageContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func UnstageContainer(value string) UnstageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// UnstageSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func UnstageSharedName(value string) UnstageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op is similar to a lightweight Dequeue.
+//
+// The basic functionality is similar to dequeue with many fewer
+// capabilities and options.  This Op is optimized for performance.
+func Unstage(scope *Scope, dtypes []tf.DataType, optional ...UnstageAttr) (values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Unstage",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("Unstage", err)
+		return
+	}
+	return values
+}
+
+// StageAttr is an optional argument to Stage.
+type StageAttr func(optionalAttr)
+
+// StageCapacity sets the optional capacity attribute to value.
+//
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageCapacity(value int64) StageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// StageMemoryLimit sets the optional memory_limit attribute to value.
+//
+// value: The maximum number of bytes allowed for Tensors in the Staging Area.
+// If > 0, inserts will block until sufficient space is available.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StageMemoryLimit(value int64) StageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// StageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
+// If not specified, defaults to ""
+func StageContainer(value string) StageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// StageSharedName sets the optional shared_name attribute to value.
+//
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func StageSharedName(value string) StageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Stage values similar to a lightweight Enqueue.
+//
+// The basic functionality of this Op is similar to a queue with many
+// fewer capabilities and options.  This Op is optimized for performance.
+//
+// Arguments:
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
+//
+// Returns the created operation.
+func Stage(scope *Scope, values []tf.Output, optional ...StageAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Stage",
+		Input: []tf.Input{
+			tf.OutputList(values),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
 //
 // The regularized incomplete beta integral is defined as:
@@ -32022,75 +32169,6 @@ func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV
 	return op.Output(0)
 }
 
-// UnstageAttr is an optional argument to Unstage.
-type UnstageAttr func(optionalAttr)
-
-// UnstageCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func UnstageCapacity(value int64) UnstageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// UnstageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func UnstageMemoryLimit(value int64) UnstageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// UnstageContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func UnstageContainer(value string) UnstageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// UnstageSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func UnstageSharedName(value string) UnstageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op is similar to a lightweight Dequeue.
-//
-// The basic functionality is similar to dequeue with many fewer
-// capabilities and options.  This Op is optimized for performance.
-func Unstage(scope *Scope, dtypes []tf.DataType, optional ...UnstageAttr) (values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Unstage",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("Unstage", err)
-		return
-	}
-	return values
-}
-
 // QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2.
 type QueueEnqueueV2Attr func(optionalAttr)
 
@@ -33592,81 +33670,3 @@ func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
-
-// StageAttr is an optional argument to Stage.
-type StageAttr func(optionalAttr)
-
-// StageCapacity sets the optional capacity attribute to value.
-//
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageCapacity(value int64) StageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// StageMemoryLimit sets the optional memory_limit attribute to value.
-//
-// value: The maximum number of bytes allowed for Tensors in the Staging Area.
-// If > 0, inserts will block until sufficient space is available.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageMemoryLimit(value int64) StageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// StageContainer sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
-// If not specified, defaults to ""
-func StageContainer(value string) StageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// StageSharedName sets the optional shared_name attribute to value.
-//
-// value: It is necessary to match this name to the matching Unstage Op.
-// If not specified, defaults to ""
-func StageSharedName(value string) StageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Stage values similar to a lightweight Enqueue.
-//
-// The basic functionality of this Op is similar to a queue with many
-// fewer capabilities and options.  This Op is optimized for performance.
-//
-// Arguments:
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
-//
-// Returns the created operation.
-func Stage(scope *Scope, values []tf.Output, optional ...StageAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Stage",
-		Input: []tf.Input{
-			tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-- 
GitLab


From 84b2ef21cf0b5f60d3bf7fd71153c1f8fc07ce11 Mon Sep 17 00:00:00 2001
From: Artem Belevich <tra@google.com>
Date: Wed, 19 Dec 2018 12:48:45 -0800
Subject: [PATCH 843/873] Tweak generated HTML to correctly size SVG in
 Firefox.

PiperOrigin-RevId: 226218242
---
 tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
index 07cc379ac6..dbf0d2c113 100644
--- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
+++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc
@@ -1597,8 +1597,10 @@ string WrapDotInHTML(const string& dot) {
 <head>
   <meta charset="utf-8">
   <style type="text/css">
-    html, body { height: 100%; }
-    body { margin: 0; }
+    body {
+      height: 100vh;
+      margin: 0;
+    }
   </style>
 </head>
 <body>
@@ -1612,7 +1614,7 @@ string WrapDotInHTML(const string& dot) {
   <script src="https://cdn.jsdelivr.net/npm/svg-pan-zoom@3.6.0/dist/svg-pan-zoom.min.js"
      integrity="sha384-3008WpYB2pOBvE7lwkrKf+qTmbTPGGPYxA9C1YVhvbPukns4ZFj7E98QPLkNW9dS"
      crossorigin="anonymous"></script>
-  <div id="container" style="height:95%; border:1px solid black; "></div>
+  <div id="container" style="height:95vh; border:1px solid black; "></div>
   <script>
     var data = `
 )html";
@@ -1680,7 +1682,7 @@ string WrapDotInHTML(const string& dot) {
                 var node = document.createTextNode(css_data);
                 style.appendChild(node);
                 svg.setAttribute('width', '100%');
-                svg.setAttribute('height', 'auto');
+                svg.setAttribute('height', '100%');
                 svg.setAttribute('id', 'graph');
                 svg.appendChild(style);
                 container.appendChild(svg);
-- 
GitLab


From 9914c904be57ebde6b96a73d674dda194282b05f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 13:07:32 -0800
Subject: [PATCH 844/873] Processing CONST nodes in toco. (Drawback: toco
 doesn't import everything correctly)

PiperOrigin-RevId: 226221104
---
 tensorflow/lite/toco/import_tensorflow.cc     | 33 ++++++++++++++++++-
 .../lite/toco/import_tensorflow_test.cc       | 28 ++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc
index 0b2f810394..6a496875f9 100644
--- a/tensorflow/lite/toco/import_tensorflow.cc
+++ b/tensorflow/lite/toco/import_tensorflow.cc
@@ -1399,6 +1399,36 @@ tensorflow::Status ConvertUnsupportedOperator(
   return tensorflow::Status::OK();
 }
 
+// Same as ConvertConstOperator, but revert to ConvertUnsupportedOperator if
+// the types are not supported. Converting Const operators here avoids
+// expensive copies of the protocol buffers downstream in the flex delegate.
+tensorflow::Status ConditionallyConvertConstOperator(
+    const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
+    Model* model) {
+  // We avoid incomplete and zero shapes because the resulting arrays
+  // are not completely compatible with Eager/TensorFlow.
+  const auto& tensor = GetTensorAttr(node, "value");
+  const auto& shape = tensor.tensor_shape();
+  for (const auto& dim : shape.dim()) {
+    if (dim.size() <= 0) {
+      return ConvertUnsupportedOperator(node, tf_import_flags, model);
+    }
+  }
+
+  switch (GetDataTypeAttr(node, "dtype")) {
+    case DT_FLOAT:
+    case DT_INT32:
+    case DT_QUINT8:
+    case DT_INT64:
+    case DT_STRING:
+    case DT_BOOL:
+    case DT_COMPLEX64:
+      return ConvertConstOperator(node, tf_import_flags, model);
+    default:
+      return ConvertUnsupportedOperator(node, tf_import_flags, model);
+  }
+}
+
 tensorflow::Status ConvertStridedSliceOperator(
     const NodeDef& node, const TensorFlowImportFlags& tf_import_flags,
     Model* model) {
@@ -2290,10 +2320,11 @@ using ConverterMapType = std::unordered_map<std::string, ConverterType>;
 
 ConverterMapType GetTensorFlowNodeConverterMapForFlex() {
   return std::unordered_map<std::string, ConverterType>({
-      // We need to let TCO convert Placeholder information into
+      // We need to let TOCO convert Placeholder information into
       // array data, so that the data types are correct.
       {"LegacyFedInput", ConvertPlaceholderOperator},
       {"Placeholder", ConvertPlaceholderOperator},
+      {"Const", ConditionallyConvertConstOperator},
   });
 }
 
diff --git a/tensorflow/lite/toco/import_tensorflow_test.cc b/tensorflow/lite/toco/import_tensorflow_test.cc
index 0be358b1f7..ac020c1049 100644
--- a/tensorflow/lite/toco/import_tensorflow_test.cc
+++ b/tensorflow/lite/toco/import_tensorflow_test.cc
@@ -32,6 +32,7 @@ using tensorflow::DT_COMPLEX64;
 using tensorflow::DT_FLOAT;
 using tensorflow::DT_INT32;
 using tensorflow::DT_INT64;
+using tensorflow::DT_INVALID;
 using tensorflow::DT_QUINT8;
 using tensorflow::DT_STRING;
 using tensorflow::NodeDef;
@@ -44,6 +45,7 @@ using ConverterType = tensorflow::Status (*)(
 using ConverterMapType = std::unordered_map<std::string, ConverterType>;
 
 ConverterMapType GetTensorFlowNodeConverterMap();
+ConverterMapType GetTensorFlowNodeConverterMapForFlex();
 Status ImportTensorFlowNode(const NodeDef&, const TensorFlowImportFlags&,
                             Model*, const ConverterMapType&);
 }  // namespace internal
@@ -155,6 +157,32 @@ void BuildConstNode(std::initializer_list<int64_t> shape,
 }
 }  //  namespace
 
+TEST(FlexImportTest, ConditionalConst) {
+  Model model;
+  auto build_and_import_node =
+      [&model](const string& name, std::initializer_list<int64_t> shape,
+               tensorflow::DataType dtype, int64_t num_elements) {
+        NodeDef node;
+        BuildConstNode(shape, dtype, num_elements, &node);
+        node.set_name(name);
+
+        const auto converter = internal::GetTensorFlowNodeConverterMapForFlex();
+        return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(),
+                                              &model, converter);
+      };
+
+  EXPECT_TRUE(build_and_import_node("Known", {1, 2, 3}, DT_INT32, 6).ok());
+  EXPECT_TRUE(build_and_import_node("BadType", {1, 2, 3}, DT_INVALID, 6).ok());
+  EXPECT_TRUE(build_and_import_node("Unknown", {1, -2, 3}, DT_INT32, 6).ok());
+
+  // We expect the "Known" node to be converted into an array, while the
+  // "Unknown" and "BadType" nodes are kept as operators.
+  EXPECT_EQ(model.operators.size(), 2);
+  EXPECT_TRUE(model.HasArray("Known"));
+  EXPECT_FALSE(model.HasArray("Unknown"));
+  EXPECT_FALSE(model.HasArray("BadType"));
+}
+
 class ShapeImportTest : public ::testing::TestWithParam<tensorflow::DataType> {
 };
 
-- 
GitLab


From caf2d701d27bf5b2e9378db5dfa63e08054ff497 Mon Sep 17 00:00:00 2001
From: Mark Daoust <markdaoust@google.com>
Date: Wed, 19 Dec 2018 13:08:19 -0800
Subject: [PATCH 845/873] fix git clone command.

Fixes #24286

PiperOrigin-RevId: 226221213
---
 tensorflow/lite/experimental/micro/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/lite/experimental/micro/README.md b/tensorflow/lite/experimental/micro/README.md
index 673daed74c..e20efb2253 100644
--- a/tensorflow/lite/experimental/micro/README.md
+++ b/tensorflow/lite/experimental/micro/README.md
@@ -31,7 +31,7 @@ This initial preview release is designed to get early feedback, and is not inten
 Building requires a Linux or OS X machine.
 
  - Open a terminal
- - Download the TensorFlow source with `git clone https://github.com/tensorflow`
+ - Download the TensorFlow source with `git clone https://github.com/tensorflow/tensorflow.git`
  - Enter the source root directory by running `cd tensorflow`
  - Download the dependencies by running `tensorflow/lite/experimental/micro/tools/make/download_dependencies.sh`. This may take a few minutes
  - Build and test the library with `make -f tensorflow/lite/experimental/micro/tools/make/Makefile test`
-- 
GitLab


From 5adc74c3e4cac1de5945377dad419484d0c3b794 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 19 Dec 2018 13:08:34 -0800
Subject: [PATCH 846/873] Now that we have the Captured Function fix and create
 dataset kernels as we go, we can re-enable this dataset structure test in
 Eager mode.

PiperOrigin-RevId: 226221250
---
 tensorflow/python/data/kernel_tests/dataset_test.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/dataset_test.py b/tensorflow/python/data/kernel_tests/dataset_test.py
index 8193dffc7d..db8a999491 100644
--- a/tensorflow/python/data/kernel_tests/dataset_test.py
+++ b/tensorflow/python/data/kernel_tests/dataset_test.py
@@ -209,7 +209,6 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertEqual(2, inputs.count(ds2))
     self.assertEqual(1, inputs.count(ds3))
 
-  # TODO(b/119882922): use-after-free bug in eager mode.
   # pylint: disable=g-long-lambda
   @parameterized.named_parameters(
       ("Tensor", lambda: constant_op.constant(37.0),
@@ -233,8 +232,7 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
        optional_ops.OptionalStructure(
            structure.TensorStructure(dtypes.float32, []))),
   )
-  def testSkipEagerDatasetStructure(self, tf_value_fn,
-                                    expected_element_structure):
+  def testDatasetStructure(self, tf_value_fn, expected_element_structure):
     dataset = dataset_ops.Dataset.from_tensors(0).map(lambda _: tf_value_fn())
     dataset_structure = structure.Structure.from_value(dataset)
     self.assertIsInstance(dataset_structure, dataset_ops.DatasetStructure)
-- 
GitLab


From 2f2344684c2a0e6af17248da94f9a2c6835d6dea Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Wed, 19 Dec 2018 13:11:07 -0800
Subject: [PATCH 847/873] Handle Python 2 and Python 3 compatibility by forcing
 to bytes.

This had previously been done but not uniformly. Change to remove
#!/usr/bin/env python in October caused this problems in latest release.

PiperOrigin-RevId: 226221579
---
 tensorflow/tools/git/gen_git_source.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py
index 8e7cd9b104..645d817d9f 100755
--- a/tensorflow/tools/git/gen_git_source.py
+++ b/tensorflow/tools/git/gen_git_source.py
@@ -159,12 +159,14 @@ def get_git_version(git_base_path, git_tag_override):
   """
   unknown_label = b"unknown"
   try:
+    # Force to bytes so this works on python 2 and python 3
     val = bytes(subprocess.check_output([
         "git", str("--git-dir=%s/.git" % git_base_path),
         str("--work-tree=" + git_base_path), "describe", "--long", "--tags"
     ]).strip())
+    version_separator = b"-"
     if git_tag_override and val:
-      split_val = val.split("-")
+      split_val = val.split(version_separator)
       if len(split_val) < 3:
         raise Exception(
             ("Expected git version in format 'TAG-COMMITS AFTER TAG-HASH' "
@@ -173,7 +175,7 @@ def get_git_version(git_base_path, git_tag_override):
       # two "-" are those inserted by the git describe command.
       abbrev_commit = split_val[-1]
       val = bytes(
-          "-".join([git_tag_override, "0", abbrev_commit]))
+          version_separator.join([git_tag_override, "0", abbrev_commit]))
     return val if val else unknown_label
   except (subprocess.CalledProcessError, OSError):
     return unknown_label
-- 
GitLab


From ca24162a806e63d6a058489863b989b09edf43e9 Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Wed, 19 Dec 2018 13:31:26 -0800
Subject: [PATCH 848/873] Silence __del__ exceptions

We have tests that they do their job, so printing exceptions is pointless

PiperOrigin-RevId: 226224643
---
 tensorflow/python/eager/function.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 8b8f6af93b..58d1f6b886 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -1763,9 +1763,12 @@ class _PolymorphicFunctionGarbageCollector(object):
   def __del__(self):
     if func_graph_module is None or memory is None:
       return
-    while self._cache:
-      self._cache.popitem()
-    memory.dismantle_ordered_dict(self._cache)
+    try:
+      while self._cache:
+        self._cache.popitem()
+      memory.dismantle_ordered_dict(self._cache)
+    except:  # pylint: disable=bare-except
+      pass
 
 
 class _FunctionGarbageCollector(object):
@@ -1781,4 +1784,7 @@ class _FunctionGarbageCollector(object):
   def __del__(self):
     if func_graph_module is None or memory is None or self._func_graph is None:
       return
-    func_graph_module.dismantle_func_graph(self._func_graph)
+    try:
+      func_graph_module.dismantle_func_graph(self._func_graph)
+    except:  # pylint: disable=bare-except
+      pass
-- 
GitLab


From 0c4de546c11e90cd5b7401cec812a916b4c1298c Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 19 Dec 2018 13:31:47 -0800
Subject: [PATCH 849/873] Marking contrib/tpu:datasets_test as medium as it
 rarely times out

PiperOrigin-RevId: 226224704
---
 tensorflow/contrib/tpu/BUILD | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD
index ec8a273ea8..7664131813 100644
--- a/tensorflow/contrib/tpu/BUILD
+++ b/tensorflow/contrib/tpu/BUILD
@@ -337,6 +337,7 @@ py_library(
 
 tf_py_test(
     name = "datasets_test",
+    size = "medium",
     srcs = ["python/tpu/datasets_test.py"],
     additional_deps = [
         "//tensorflow/python:client_testlib",
@@ -344,6 +345,7 @@ tf_py_test(
     ],
     flaky = 1,  # TODO(b/117363808): fails 1/1000 OSS runs
     grpc_enabled = True,
+    shard_count = 4,
 )
 
 tf_py_test(
-- 
GitLab


From c1f29584c37df02bfc3eea654114e188284f5d74 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 13:44:13 -0800
Subject: [PATCH 850/873] The error_interpolation.py code already traverses the
 graph and determines the nodes causing error, their inputs and their
 stacktraces. So we don't need the old error reporting framework.

PiperOrigin-RevId: 226226764
---
 .../python/framework/error_interpolation.py   | 17 ++++++-----
 tensorflow/python/framework/errors_impl.py    | 29 +++++++++++++++----
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/tensorflow/python/framework/error_interpolation.py b/tensorflow/python/framework/error_interpolation.py
index 557f947291..b4f99017a2 100644
--- a/tensorflow/python/framework/error_interpolation.py
+++ b/tensorflow/python/framework/error_interpolation.py
@@ -29,7 +29,6 @@ import re
 
 import six
 
-from tensorflow.python.framework.ops import Tensor
 from tensorflow.python.util import tf_stack
 
 _NAME_REGEX = r"[A-Za-z0-9.][A-Za-z0-9_.\-/]*?"
@@ -270,7 +269,7 @@ def compute_field_dict(op, strip_file_prefix=""):
   return field_dict
 
 
-def _common_prefix(all_ops):
+def traceback_files_common_prefix(all_ops):
   """Determines the common prefix from the paths of the stacktrace of 'all_ops'.
 
   For example, if the paths are '/foo/bar/baz/' and '/foo/car', this would
@@ -315,11 +314,13 @@ def _sources_for_node(name, graph):
     if name.startswith("^"):
       name = name[1:]
     try:
-      op = graph.as_graph_element(name)
-    except KeyError:
-      return
-    if isinstance(op, Tensor):
-      op = op.op
+      tensor = graph.get_tensor_by_name(name)
+      op = tensor.op
+    except (KeyError, ValueError):
+      try:
+        op = graph.get_operation_by_name(name)
+      except KeyError:
+        return
     name = op.name
     if name in seen_names:
       return
@@ -398,7 +399,7 @@ def interpolate(error_message, graph):
     else:
       tagged_ops.append([op] + _sources_for_node(op.name, graph))
 
-  common_prefix = _common_prefix(tagged_ops)
+  common_prefix = traceback_files_common_prefix(tagged_ops)
   for tag, ops in zip(tags, tagged_ops):
     msg = "{{%s %s}}" % (tag.type, tag.name)
     if ops is not None:
diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py
index faa4fa7c6f..ee2aa6eb44 100644
--- a/tensorflow/python/framework/errors_impl.py
+++ b/tensorflow/python/framework/errors_impl.py
@@ -24,12 +24,31 @@ import warnings
 from tensorflow.core.lib.core import error_codes_pb2
 from tensorflow.python import pywrap_tensorflow as c_api
 from tensorflow.python.framework import c_api_util
+from tensorflow.python.framework import error_interpolation
 from tensorflow.python.util import compat
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import tf_inspect
+from tensorflow.python.util import tf_stack
 from tensorflow.python.util.tf_export import tf_export
 
 
+def _compact_stack_trace(op):
+  """Returns a traceback for `op` with common file prefixes stripped."""
+  compact_traces = []
+  common_prefix = error_interpolation.traceback_files_common_prefix([[op]])
+  # pylint: disable=protected-access
+  tf_traceback = tf_stack.convert_stack(op._traceback)
+  # pylint: enable=protected-access
+  for frame in tf_traceback:
+    frame = list(frame)
+    filename = frame[tf_stack.TB_FILENAME]
+    if filename.startswith(common_prefix):
+      filename = filename[len(common_prefix):]
+      frame[tf_stack.TB_FILENAME] = filename
+    compact_traces.append(tuple(frame))
+  return compact_traces
+
+
 @tf_export("errors.OpError", v1=["errors.OpError", "OpError"])
 @deprecation.deprecated_endpoints("OpError")
 class OpError(Exception):
@@ -94,9 +113,10 @@ class OpError(Exception):
 
   def __str__(self):
     if self._op is not None:
-      output = ["%s\n\nCaused by op %r, defined at:\n" % (self.message,
+      output = ["%s\n\nOriginal stack trace for %r:\n" % (self.message,
                                                           self._op.name,)]
-      curr_traceback_list = traceback.format_list(self._op.traceback)
+      curr_traceback_list = traceback.format_list(
+          _compact_stack_trace(self._op))
       output.extend(curr_traceback_list)
       # pylint: disable=protected-access
       original_op = self._op._original_op
@@ -106,7 +126,8 @@ class OpError(Exception):
             "\n...which was originally created as op %r, defined at:\n"
             % (original_op.name,))
         prev_traceback_list = curr_traceback_list
-        curr_traceback_list = traceback.format_list(original_op.traceback)
+        curr_traceback_list = traceback.format_list(
+            _compact_stack_trace(original_op))
 
         # Attempt to elide large common subsequences of the subsequent
         # stack traces.
@@ -136,8 +157,6 @@ class OpError(Exception):
         # pylint: disable=protected-access
         original_op = original_op._original_op
         # pylint: enable=protected-access
-      output.append("\n%s (see above for traceback): %s\n" %
-                    (type(self).__name__, self.message))
       return "".join(output)
     else:
       return self.message
-- 
GitLab


From 1f7cf13a868db07aefc23c87afa597ec678cbb94 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 13:45:05 -0800
Subject: [PATCH 851/873] Typo fix in docs.

PiperOrigin-RevId: 226226879
---
 tensorflow/python/ops/ragged/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/ops/ragged/__init__.py b/tensorflow/python/ops/ragged/__init__.py
index 7806f56978..30ed9c53ab 100644
--- a/tensorflow/python/ops/ragged/__init__.py
+++ b/tensorflow/python/ops/ragged/__init__.py
@@ -21,7 +21,7 @@ different lengths.  For example, the inner (column) dimension of
 `rt=[[3, 1, 4, 1], [], [5, 9, 2], [6], []]` is ragged, since the column slices
 (`rt[0, :]`, ..., `rt[4, :]`) have different lengths.  For a more detailed
 description of ragged tensors, see the `tf.RaggedTensor` class documentation
-and the [Ragged Tensor Guide](/guides/ragged_tensor).
+and the [Ragged Tensor Guide](/guide/ragged_tensors).
 """
 
 from __future__ import absolute_import
-- 
GitLab


From 9b34185c81d6d98ff3b254f6ed620346dabcb1c1 Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Wed, 19 Dec 2018 14:03:34 -0800
Subject: [PATCH 852/873] Export
 estimator.experimental.stop_if_no_increase_hook and
 estimator.experimental.stop_if_no_decrease_hook to
 tf.estimator.experimental.*.

PiperOrigin-RevId: 226229945
---
 .../api/golden/v1/tensorflow.estimator.experimental.pbtxt | 8 ++++++++
 .../api/golden/v2/tensorflow.estimator.experimental.pbtxt | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt
index 741102466d..b1bd5a2661 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.estimator.experimental.pbtxt
@@ -40,4 +40,12 @@ tf_module {
     name: "stop_if_lower_hook"
     argspec: "args=[\'estimator\', \'metric_name\', \'threshold\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
   }
+  member_method {
+    name: "stop_if_no_decrease_hook"
+    argspec: "args=[\'estimator\', \'metric_name\', \'max_steps_without_decrease\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
+  }
+  member_method {
+    name: "stop_if_no_increase_hook"
+    argspec: "args=[\'estimator\', \'metric_name\', \'max_steps_without_increase\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
+  }
 }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt
index 741102466d..b1bd5a2661 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.estimator.experimental.pbtxt
@@ -40,4 +40,12 @@ tf_module {
     name: "stop_if_lower_hook"
     argspec: "args=[\'estimator\', \'metric_name\', \'threshold\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
   }
+  member_method {
+    name: "stop_if_no_decrease_hook"
+    argspec: "args=[\'estimator\', \'metric_name\', \'max_steps_without_decrease\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
+  }
+  member_method {
+    name: "stop_if_no_increase_hook"
+    argspec: "args=[\'estimator\', \'metric_name\', \'max_steps_without_increase\', \'eval_dir\', \'min_steps\', \'run_every_secs\', \'run_every_steps\'], varargs=None, keywords=None, defaults=[\'None\', \'0\', \'60\', \'None\'], "
+  }
 }
-- 
GitLab


From 08c051745904767b43e3007bc0e60d311e82fdaf Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Wed, 19 Dec 2018 14:05:12 -0800
Subject: [PATCH 853/873] Remove test that is no longer in respository.

PiperOrigin-RevId: 226230363
---
 tensorflow/tools/ci_build/builds/test_tutorials.sh | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tensorflow/tools/ci_build/builds/test_tutorials.sh b/tensorflow/tools/ci_build/builds/test_tutorials.sh
index db335f14ca..a8672220c8 100755
--- a/tensorflow/tools/ci_build/builds/test_tutorials.sh
+++ b/tensorflow/tools/ci_build/builds/test_tutorials.sh
@@ -212,16 +212,6 @@ test_word2vec() {
 }
 
 
-# -----------------------------------------------------------
-# Estimator: abalone
-test_estimator_abalone() {
-  LOG_FILE=$1
-
-  run_in_directory "${TEST_DIR}" "${LOG_FILE}" \
-    "tensorflow/examples/tutorials/estimators/abalone.py"
-}
-
-
 # -----------------------------------------------------------
 # ptb_word_lm
 test_ptb_word_lm() {
-- 
GitLab


From b185ef78847a5ba823348c55f0e5481e904dee27 Mon Sep 17 00:00:00 2001
From: Sanjoy Das <sanjoy@google.com>
Date: Wed, 19 Dec 2018 14:19:21 -0800
Subject: [PATCH 854/873] [TF:XLA] Bump open source llvm revision to r349610

PiperOrigin-RevId: 226232734
---
 tensorflow/workspace.bzl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index 700aa065b1..157cb52806 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -498,11 +498,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""):
     tf_http_archive(
         name = "llvm",
         build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"),
-        sha256 = "65b48c80eba736ab834a9790b78a72cd0e3919b6dace44a96259d3e6936624ec",
-        strip_prefix = "llvm-cfa2cf74cd9ba0e759974ce11bfd7b9e051dd8ff",
+        sha256 = "65a1aeb29e5940f9f480a41e904659d944e738458afd139caa7bde14bd6aab8a",
+        strip_prefix = "llvm-331ffd31b3dd49b3f02a27556938b836b679f564",
         urls = [
-            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/cfa2cf74cd9ba0e759974ce11bfd7b9e051dd8ff.tar.gz",
-            "https://github.com/llvm-mirror/llvm/archive/cfa2cf74cd9ba0e759974ce11bfd7b9e051dd8ff.tar.gz",
+            "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/331ffd31b3dd49b3f02a27556938b836b679f564.tar.gz",
+            "https://github.com/llvm-mirror/llvm/archive/331ffd31b3dd49b3f02a27556938b836b679f564.tar.gz",
         ],
     )
 
-- 
GitLab


From 9b46c486fd50eb44a3403f16e68a661e9273ef59 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 14:26:25 -0800
Subject: [PATCH 855/873] Remove second bazel installation

PiperOrigin-RevId: 226233807
---
 tensorflow/tools/ci_build/install/install_pi_toolchain.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tensorflow/tools/ci_build/install/install_pi_toolchain.sh b/tensorflow/tools/ci_build/install/install_pi_toolchain.sh
index 03c43cc838..0238cc5895 100755
--- a/tensorflow/tools/ci_build/install/install_pi_toolchain.sh
+++ b/tensorflow/tools/ci_build/install/install_pi_toolchain.sh
@@ -25,5 +25,4 @@ apt-get install -y libpython-all-dev:armhf
 echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | sudo tee /etc/apt/sources.list.d/bazel.list
 curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add -
 apt-get update
-rm -rf /usr/local/bin/bazel
-apt-get install -y bazel python python-numpy python-dev python-pip
+apt-get install -y python python-numpy python-dev python-pip
-- 
GitLab


From 0d9b8ac2ece6904c009272e5be73561413bc9126 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 19 Dec 2018 14:34:06 -0800
Subject: [PATCH 856/873] Removes assertion which is not always true.

It's perfectly fine to have float tensors which are loop variables which are
not related to the gradient objective.

PiperOrigin-RevId: 226235207
---
 tensorflow/python/ops/while_v2.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 3e5a8fcdfa..25fd2460ae 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -265,10 +265,6 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
       for grad, output in zip(grads, body_graph.outputs)
   ]
 
-  # Ensure that all non-resource trainable outputs have incoming gradients.
-  assert all(g is not None or o.dtype == dtypes.resource or not _is_trainable(o)
-             for o, g in zip(body_graph.outputs, grads)
-            ), "All trainable loop vars must receive incoming gradients."
   # We compute the gradient for the sub-graph between trainable ys and xs
   # with non-None incoming gradients. We later pad the None's to the list of
   # outputs.
-- 
GitLab


From c62d244e1f4aebf3b9ad7451d09cc974953a4cf8 Mon Sep 17 00:00:00 2001
From: Mihai Maruseac <mihaimaruseac@google.com>
Date: Wed, 19 Dec 2018 14:41:34 -0800
Subject: [PATCH 857/873] Add fuzzer for ScatterNd.

PiperOrigin-RevId: 226236341
---
 tensorflow/core/kernels/fuzzing/BUILD         |   4 +
 .../5b0e5f8d2990c3cac80fa792ba141c43          | Bin 0 -> 10 bytes
 .../5b61fa3a30dd267828f12d9ea2b2a191          | Bin 0 -> 13 bytes
 .../8bc8b7d8beb3483c48158739791e56b0          | Bin 0 -> 9 bytes
 .../d2ef31d47578e9de8323bb0e4806f1be          | Bin 0 -> 10 bytes
 .../e2791edcf2c8d9f4af3678a75d43a3e4          | Bin 0 -> 10 bytes
 .../core/kernels/fuzzing/scatter_nd_fuzz.cc   | 133 ++++++++++++++++++
 7 files changed, 137 insertions(+)
 create mode 100644 tensorflow/core/kernels/fuzzing/corpus/scatter_nd/5b0e5f8d2990c3cac80fa792ba141c43
 create mode 100644 tensorflow/core/kernels/fuzzing/corpus/scatter_nd/5b61fa3a30dd267828f12d9ea2b2a191
 create mode 100644 tensorflow/core/kernels/fuzzing/corpus/scatter_nd/8bc8b7d8beb3483c48158739791e56b0
 create mode 100644 tensorflow/core/kernels/fuzzing/corpus/scatter_nd/d2ef31d47578e9de8323bb0e4806f1be
 create mode 100644 tensorflow/core/kernels/fuzzing/corpus/scatter_nd/e2791edcf2c8d9f4af3678a75d43a3e4
 create mode 100644 tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc

diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD
index 7300f7a4e2..c9f025a5b0 100644
--- a/tensorflow/core/kernels/fuzzing/BUILD
+++ b/tensorflow/core/kernels/fuzzing/BUILD
@@ -72,3 +72,7 @@ tf_oss_fuzz_dict("decode_json_example")
 tf_ops_fuzz_target_lib("check_numerics")
 
 tf_ops_fuzz_target_lib("one_hot")
+
+tf_ops_fuzz_target_lib("scatter_nd")
+
+tf_oss_fuzz_corpus("scatter_nd")
diff --git a/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/5b0e5f8d2990c3cac80fa792ba141c43 b/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/5b0e5f8d2990c3cac80fa792ba141c43
new file mode 100644
index 0000000000000000000000000000000000000000..d1239633c843b1b8fd64d232604a3d61e9eb07dc
GIT binary patch
literal 10
RcmZSJU}RxrVP<6K0ssKF04M+e

literal 0
HcmV?d00001

diff --git a/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/5b61fa3a30dd267828f12d9ea2b2a191 b/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/5b61fa3a30dd267828f12d9ea2b2a191
new file mode 100644
index 0000000000000000000000000000000000000000..1bd0905cdd6efab2b8450e6cb03f1d15ffae9993
GIT binary patch
literal 13
UcmZSJU}9osWME-pWM<<600930D*ylh

literal 0
HcmV?d00001

diff --git a/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/8bc8b7d8beb3483c48158739791e56b0 b/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/8bc8b7d8beb3483c48158739791e56b0
new file mode 100644
index 0000000000000000000000000000000000000000..65a6d0083ee72a2920014fbe252970bff43ca75d
GIT binary patch
literal 9
QcmZQzWn^Y#U}53{003zK8vp<R

literal 0
HcmV?d00001

diff --git a/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/d2ef31d47578e9de8323bb0e4806f1be b/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/d2ef31d47578e9de8323bb0e4806f1be
new file mode 100644
index 0000000000000000000000000000000000000000..c6948b6a25f2c1a4fa6de401aaeb681be9a8dbd2
GIT binary patch
literal 10
RcmZQ#VPRopVq{?A0ssJ@03QGV

literal 0
HcmV?d00001

diff --git a/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/e2791edcf2c8d9f4af3678a75d43a3e4 b/tensorflow/core/kernels/fuzzing/corpus/scatter_nd/e2791edcf2c8d9f4af3678a75d43a3e4
new file mode 100644
index 0000000000000000000000000000000000000000..0e8a48e21096eb7b4f4642f754c18728e575e396
GIT binary patch
literal 10
RcmZQ%WoBe%WME<90ssJ!03QGV

literal 0
HcmV?d00001

diff --git a/tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc b/tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc
new file mode 100644
index 0000000000..35f876b3b1
--- /dev/null
+++ b/tensorflow/core/kernels/fuzzing/scatter_nd_fuzz.cc
@@ -0,0 +1,133 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <vector>
+
+#include "tensorflow/cc/ops/array_ops.h"
+#include "tensorflow/cc/ops/standard_ops.h"
+#include "tensorflow/core/kernels/fuzzing/fuzz_session.h"
+
+namespace tensorflow {
+namespace fuzzing {
+
+class FuzzScatterNd : public FuzzSession {
+  void BuildGraph(const Scope& scope) override {
+    auto indices =
+        tensorflow::ops::Placeholder(scope.WithOpName("indices"), DT_INT32);
+    auto updates =
+        tensorflow::ops::Placeholder(scope.WithOpName("updates"), DT_INT32);
+    auto shape =
+        tensorflow::ops::Placeholder(scope.WithOpName("shape"), DT_INT32);
+    (void)tensorflow::ops::ScatterNd(scope.WithOpName("output"), indices,
+                                     updates, shape);
+  }
+
+  void FuzzImpl(const uint8_t* data, size_t size) override {
+    // This op's runtime is heavily determined by the shape of the tensor
+    // arguments and almost not at all by the values of those tensors. Hence,
+    // the fuzzing data here is only used to determine the shape of the
+    // arguments and the output and the data of these tensors is just a constant
+    // value. Furthermore, the shape of the updates_tensor tensor is fully
+    // determined by the contents of the shape_tensor and the shape of the
+    // indices_tensor. Rather than using random values for the
+    // updates_tensor.shape and getting most of the fuzz runs stopped in the
+    // check, it's better to just create a proper update_tensor.
+
+    // First element of the data buffer gives the number of dimensions of the
+    // shape tensor.
+    size_t i;
+    size_t data_ix = 0;
+    size_t shape_dims = 1 + (data[data_ix++] % kMaxShapeDims);
+    Tensor shape_tensor(tensorflow::DT_INT32,
+                        TensorShape({static_cast<int64>(shape_dims)}));
+
+    // Check that we have enough elements left for the shape tensor
+    if (data_ix + shape_dims >= size) {
+      return;  // not enough elements, no fuzz
+    }
+
+    // Subsequent elements give the contents of the shape tensor.
+    // To not get out of memory, reduce all dimensions to at most kMaxDim
+    auto flat_shape = shape_tensor.flat<int32>();
+    for (i = 0; i < shape_dims; i++) {
+      flat_shape(i) = data[data_ix++] % kMaxDim;
+    }
+
+    // Next, we have to fill in the indices tensor. Take the next element from
+    // the buffer to represent the rank of this tensor.
+    if (data_ix >= size) {
+      return;
+    }
+    size_t indices_rank = 1 + (data[data_ix++] % kMaxIndicesRank);
+
+    // Now, read the dimensions of the indices_tensor
+    if (data_ix + indices_rank >= size) {
+      return;
+    }
+    std::vector<int64> indices_dims;
+    size_t num_indices = 1;
+    for (i = 0; i < indices_rank; i++) {
+      // Modulo kMaxDim to not request too much memory
+      int64 dim = data[data_ix++] % kMaxDim;
+      num_indices *= dim;
+      indices_dims.push_back(dim);
+    }
+    Tensor indices_tensor(tensorflow::DT_INT32, TensorShape(indices_dims));
+
+    // Rest of the buffer is used to fill in the indices_tensor
+    auto flat_indices = indices_tensor.flat<int32>();
+    for (i = 0; i < num_indices && data_ix < size; i++) {
+      flat_indices(i) = data[data_ix++];
+    }
+    for (; i < num_indices; i++) {
+      flat_indices(i) = 0;  // ensure that indices_tensor has all values
+    }
+
+    // Given the values in the shape_tensor and the dimensions of the
+    // indices_tensor, the shape of updates_tensor is fixed.
+    num_indices = 1;
+    std::vector<int64> updates_dims;
+    for (i = 0; i < indices_rank - 1; i++) {
+      updates_dims.push_back(indices_dims[i]);
+      num_indices *= indices_dims[i];
+    }
+    int64 last = indices_dims[indices_rank - 1];
+    for (i = last; i < shape_dims; i++) {
+      updates_dims.push_back(flat_shape(i));
+      num_indices *= flat_shape(i);
+    }
+    Tensor updates_tensor(tensorflow::DT_INT32, TensorShape(updates_dims));
+
+    // We don't care about the values in the updates_tensor, make them all be 1
+    auto flat_updates = updates_tensor.flat<int32>();
+    for (i = 0; i < num_indices; i++) {
+      flat_updates(i) = 1;
+    }
+
+    RunInputs({{"indices", indices_tensor},
+               {"updates", updates_tensor},
+               {"shape", shape_tensor}});
+  }
+
+ private:
+  const size_t kMaxShapeDims = 5;
+  const size_t kMaxIndicesRank = 3;
+  const size_t kMaxDim = 10;
+};
+
+STANDARD_TF_FUZZ_FUNCTION(FuzzScatterNd);
+
+}  // end namespace fuzzing
+}  // end namespace tensorflow
-- 
GitLab


From b6185defa48abd2af99d3a171c59640f06b65f5a Mon Sep 17 00:00:00 2001
From: Jiri Simsa <jsimsa@google.com>
Date: Wed, 19 Dec 2018 14:43:34 -0800
Subject: [PATCH 858/873] [tf.data] Refining the modeling framework to take
 buffer size into account.

PiperOrigin-RevId: 226236644
---
 tensorflow/core/framework/model.cc            |  34 +-
 tensorflow/core/framework/model_test.cc       | 291 +++++++++---------
 .../benchmarks/autotune_benchmark.py          |  61 +++-
 3 files changed, 225 insertions(+), 161 deletions(-)

diff --git a/tensorflow/core/framework/model.cc b/tensorflow/core/framework/model.cc
index b7c6d80910..96cc7e583a 100644
--- a/tensorflow/core/framework/model.cc
+++ b/tensorflow/core/framework/model.cc
@@ -29,6 +29,32 @@ std::shared_ptr<Parameter> MakeParameter(const string& name,
 
 namespace {
 
+// Given the average time between output events (`output_time`), the average
+// time between input events (`input_time`) and the buffer size, the method
+// computes the expected time an input event will have to wait.
+//
+// The wait time is approximated as the product of the probability the buffer
+// will be empty and the time it takes to produce an element into the buffer.
+//
+// The formula used for computing the probability is derived by modeling the
+// problem as an M/M/1/K queue
+// (https://en.wikipedia.org/wiki/Birth%E2%80%93death_process#M/M/1/K_queue).
+int64 ComputeWaitTime(int64 output_time, int64 input_time, int64 buffer_size) {
+  if (output_time == 0 || input_time == 0) {
+    return output_time;
+  }
+  if (input_time == output_time) {
+    const double p_buffer_empty = 1.0L / static_cast<double>(buffer_size + 1);
+    return p_buffer_empty * output_time;
+  }
+  const double alpha = 1.0L / static_cast<double>(input_time);
+  const double beta = 1.0L / static_cast<double>(output_time);
+  const double p_buffer_empty =
+      (1.0L - beta / alpha) /
+      (1.0L - std::pow((beta / alpha), static_cast<double>(buffer_size + 1)));
+  return p_buffer_empty * output_time;
+}
+
 // The first input of InterleaveMany corresponds to the input dataset whose
 // elements are used to create the (derived) input datasets whose elements are
 // interleaved as output.
@@ -119,8 +145,8 @@ class AsyncInterleaveMany : public Node {
         static_cast<double>(OutputTimeForInputs(input_times) -
                             inputs_.front()->OutputTime(input_times)) /
         static_cast<double>(inputs_.size() - 1) / parallelism;
-    return std::max(0LL,
-                    NanosPerElementLocked() + output_time - old_input_time);
+    return ComputeWaitTime(NanosPerElementLocked() + output_time,
+                           old_input_time, parallelism);
   }
 
   int64 ProcessingTimeLocked() const override SHARED_LOCKS_REQUIRED(mu_) {
@@ -202,7 +228,7 @@ class AsyncKnownRatio : public Node {
     if (ratio_ == 0.0) {
       int64 output_time =
           static_cast<double>(NanosPerElementLocked()) / parallelism;
-      return std::max(0LL, output_time - input_times->back());
+      return ComputeWaitTime(output_time, input_times->back(), parallelism);
     }
     int64 old_input_time = input_times->back();
     int64 new_input_time = static_cast<int64>(
@@ -213,7 +239,7 @@ class AsyncKnownRatio : public Node {
     int64 output_time = static_cast<int64>(
         static_cast<double>(NanosPerElementLocked()) / parallelism +
         ratio_ * OutputTimeForInputs(input_times));
-    return std::max(0LL, output_time - old_input_time);
+    return ComputeWaitTime(output_time, old_input_time, parallelism);
   }
 
   int64 ProcessingTimeLocked() const override SHARED_LOCKS_REQUIRED(mu_) {
diff --git a/tensorflow/core/framework/model_test.cc b/tensorflow/core/framework/model_test.cc
index 90bd570f90..013f1e61c8 100644
--- a/tensorflow/core/framework/model_test.cc
+++ b/tensorflow/core/framework/model_test.cc
@@ -57,28 +57,30 @@ TEST_P(AsyncInterleaveManyTest, Model) {
   });
   std::vector<int64> input_times(1, input_time);
   async_interleave_many->add_processing_time(100);
-  EXPECT_EQ(100, async_interleave_many->processing_time());
-  EXPECT_EQ(0, async_interleave_many->ProcessingTime());
-  EXPECT_EQ(0, async_interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(async_interleave_many->processing_time(), 100);
+  EXPECT_EQ(async_interleave_many->ProcessingTime(), 0);
+  EXPECT_EQ(async_interleave_many->OutputTime(&input_times), 0);
   async_interleave_many->record_element();
-  EXPECT_EQ(1, async_interleave_many->num_elements());
-  EXPECT_EQ(100, async_interleave_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, 100 - input_time),
-            async_interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(async_interleave_many->num_elements(), 1);
+  EXPECT_EQ(async_interleave_many->ProcessingTime(), 100);
+  EXPECT_LE(async_interleave_many->OutputTime(&input_times), 100);
+  EXPECT_GE(async_interleave_many->OutputTime(&input_times), 0);
   source1->add_processing_time(200);
   source2->add_processing_time(300);
-  EXPECT_EQ(100, async_interleave_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, 100 - input_time),
-            async_interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(async_interleave_many->ProcessingTime(), 100);
+  EXPECT_LE(async_interleave_many->OutputTime(&input_times), 100);
+  EXPECT_GE(async_interleave_many->OutputTime(&input_times), 0);
   source1->record_element();
   source2->record_element();
-  EXPECT_EQ(100 + 250, async_interleave_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, 100 + 250 / parallelism - input_time),
-            async_interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(async_interleave_many->ProcessingTime(), 100 + 250);
+  EXPECT_LE(async_interleave_many->OutputTime(&input_times),
+            100 + 250 / parallelism);
+  EXPECT_GE(async_interleave_many->OutputTime(&input_times), 0);
   async_interleave_many->record_element();
-  EXPECT_EQ(50 + 250, async_interleave_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, 50 + 250 / parallelism - input_time),
-            async_interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(async_interleave_many->ProcessingTime(), 50 + 250);
+  EXPECT_LE(async_interleave_many->OutputTime(&input_times),
+            50 + 250 / parallelism);
+  EXPECT_GE(async_interleave_many->OutputTime(&input_times), 0);
 }
 
 INSTANTIATE_TEST_CASE_P(Test, AsyncInterleaveManyTest,
@@ -106,47 +108,52 @@ TEST_P(AsyncKnownRatioTest, Model) {
   async_known_many->add_input(source2);
   std::vector<int64> input_times(1, input_time);
   source1->add_processing_time(100);
-  EXPECT_EQ(0, async_known_many->ProcessingTime());
-  EXPECT_EQ(0, async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(), 0);
+  EXPECT_EQ(async_known_many->OutputTime(&input_times), 0);
   source2->add_processing_time(200);
-  EXPECT_EQ(0, async_known_many->ProcessingTime());
-  EXPECT_EQ(0, async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(), 0);
+  EXPECT_EQ(async_known_many->OutputTime(&input_times), 0);
   source1->record_element();
-  EXPECT_EQ(num_inputs_per_output * 100, async_known_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, num_inputs_per_output * 100 - input_time),
-            async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(), num_inputs_per_output * 100);
+  EXPECT_LE(async_known_many->OutputTime(&input_times),
+            num_inputs_per_output * 100);
+  EXPECT_GE(async_known_many->OutputTime(&input_times), 0);
   source2->record_element();
-  EXPECT_EQ(num_inputs_per_output * (100 + 200),
-            async_known_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, num_inputs_per_output * (100 + 200) - input_time),
-            async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(),
+            num_inputs_per_output * (100 + 200));
+  EXPECT_LE(async_known_many->OutputTime(&input_times),
+            num_inputs_per_output * (100 + 200));
+  EXPECT_GE(async_known_many->OutputTime(&input_times), 0);
   source1->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 200),
-            async_known_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, num_inputs_per_output * (50 + 200) - input_time),
-            async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(),
+            num_inputs_per_output * (50 + 200));
+  EXPECT_LE(async_known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 200));
+  EXPECT_GE(async_known_many->OutputTime(&input_times), 0);
   source2->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 100),
-            async_known_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, num_inputs_per_output * (50 + 100) - input_time),
-            async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(),
+            num_inputs_per_output * (50 + 100));
+  EXPECT_LE(async_known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100));
+  EXPECT_GE(async_known_many->OutputTime(&input_times), 0);
   async_known_many->add_processing_time(128);
-  EXPECT_EQ(num_inputs_per_output * (50 + 100),
-            async_known_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, num_inputs_per_output * (50 + 100) - input_time),
-            async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(),
+            num_inputs_per_output * (50 + 100));
+  EXPECT_LE(async_known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100));
+  EXPECT_GE(async_known_many->OutputTime(&input_times), 0);
   async_known_many->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 100) + 128,
-            async_known_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, num_inputs_per_output * (50 + 100) +
-                              128 / parallelism - input_time),
-            async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(),
+            num_inputs_per_output * (50 + 100) + 128);
+  EXPECT_LE(async_known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100) + 128 / parallelism);
+  EXPECT_GE(async_known_many->OutputTime(&input_times), 0);
   async_known_many->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 100) + 64,
-            async_known_many->ProcessingTime());
-  EXPECT_EQ(std::max(0LL, num_inputs_per_output * (50 + 100) +
-                              64 / parallelism - input_time),
-            async_known_many->OutputTime(&input_times));
+  EXPECT_EQ(async_known_many->ProcessingTime(),
+            num_inputs_per_output * (50 + 100) + 64);
+  EXPECT_LE(async_known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100) + 64 / parallelism);
+  EXPECT_GE(async_known_many->OutputTime(&input_times), 0);
 }
 
 INSTANTIATE_TEST_CASE_P(Test, AsyncKnownRatioTest,
@@ -168,24 +175,24 @@ TEST(InterleaveManyTest, Model) {
   interleave_many->add_input(source2);
   std::vector<int64> input_times(1, 0);
   interleave_many->add_processing_time(100);
-  EXPECT_EQ(100, interleave_many->processing_time());
-  EXPECT_EQ(0, interleave_many->ProcessingTime());
-  EXPECT_EQ(0, interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(interleave_many->processing_time(), 100);
+  EXPECT_EQ(interleave_many->ProcessingTime(), 0);
+  EXPECT_EQ(interleave_many->OutputTime(&input_times), 0);
   interleave_many->record_element();
-  EXPECT_EQ(1, interleave_many->num_elements());
-  EXPECT_EQ(100, interleave_many->ProcessingTime());
-  EXPECT_EQ(100, interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(interleave_many->num_elements(), 1);
+  EXPECT_EQ(interleave_many->ProcessingTime(), 100);
+  EXPECT_EQ(interleave_many->OutputTime(&input_times), 100);
   source1->add_processing_time(200);
   source2->add_processing_time(300);
-  EXPECT_EQ(100, interleave_many->ProcessingTime());
-  EXPECT_EQ(100, interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(interleave_many->ProcessingTime(), 100);
+  EXPECT_EQ(interleave_many->OutputTime(&input_times), 100);
   source1->record_element();
   source2->record_element();
-  EXPECT_EQ(350, interleave_many->ProcessingTime());
-  EXPECT_EQ(350, interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(interleave_many->ProcessingTime(), 350);
+  EXPECT_EQ(interleave_many->OutputTime(&input_times), 350);
   interleave_many->record_element();
-  EXPECT_EQ(300, interleave_many->ProcessingTime());
-  EXPECT_EQ(300, interleave_many->OutputTime(&input_times));
+  EXPECT_EQ(interleave_many->ProcessingTime(), 300);
+  EXPECT_EQ(interleave_many->OutputTime(&input_times), 300);
 }
 
 class KnownRatioTest : public ::testing::TestWithParam<int64> {};
@@ -202,40 +209,40 @@ TEST_P(KnownRatioTest, Model) {
   known_many->add_input(source2);
   std::vector<int64> input_times(1, 0);
   source1->add_processing_time(100);
-  EXPECT_EQ(0, known_many->ProcessingTime());
-  EXPECT_EQ(0, known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(), 0);
+  EXPECT_EQ(known_many->OutputTime(&input_times), 0);
   source2->add_processing_time(200);
-  EXPECT_EQ(0, known_many->ProcessingTime());
-  EXPECT_EQ(0, known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(), 0);
+  EXPECT_EQ(known_many->OutputTime(&input_times), 0);
   source1->record_element();
-  EXPECT_EQ(num_inputs_per_output * 100, known_many->ProcessingTime());
-  EXPECT_EQ(num_inputs_per_output * 100, known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(), num_inputs_per_output * 100);
+  EXPECT_EQ(known_many->OutputTime(&input_times), num_inputs_per_output * 100);
   source2->record_element();
-  EXPECT_EQ(num_inputs_per_output * (100 + 200), known_many->ProcessingTime());
-  EXPECT_EQ(num_inputs_per_output * (100 + 200),
-            known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(), num_inputs_per_output * (100 + 200));
+  EXPECT_EQ(known_many->OutputTime(&input_times),
+            num_inputs_per_output * (100 + 200));
   source1->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 200), known_many->ProcessingTime());
-  EXPECT_EQ(num_inputs_per_output * (50 + 200),
-            known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(), num_inputs_per_output * (50 + 200));
+  EXPECT_EQ(known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 200));
   source2->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 100), known_many->ProcessingTime());
-  EXPECT_EQ(num_inputs_per_output * (50 + 100),
-            known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(), num_inputs_per_output * (50 + 100));
+  EXPECT_EQ(known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100));
   known_many->add_processing_time(128);
-  EXPECT_EQ(num_inputs_per_output * (50 + 100), known_many->ProcessingTime());
-  EXPECT_EQ(num_inputs_per_output * (50 + 100),
-            known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(), num_inputs_per_output * (50 + 100));
+  EXPECT_EQ(known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100));
   known_many->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 100) + 128,
-            known_many->ProcessingTime());
-  EXPECT_EQ(num_inputs_per_output * (50 + 100) + 128,
-            known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(),
+            num_inputs_per_output * (50 + 100) + 128);
+  EXPECT_EQ(known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100) + 128);
   known_many->record_element();
-  EXPECT_EQ(num_inputs_per_output * (50 + 100) + 64,
-            known_many->ProcessingTime());
-  EXPECT_EQ(num_inputs_per_output * (50 + 100) + 64,
-            known_many->OutputTime(&input_times));
+  EXPECT_EQ(known_many->ProcessingTime(),
+            num_inputs_per_output * (50 + 100) + 64);
+  EXPECT_EQ(known_many->OutputTime(&input_times),
+            num_inputs_per_output * (50 + 100) + 64);
 }
 
 INSTANTIATE_TEST_CASE_P(Test, KnownRatioTest, ::testing::Values(0, 1, 2, 4));
@@ -244,17 +251,17 @@ TEST(SourceTest, Model) {
   std::shared_ptr<Node> source = model::MakeSourceNode({0, "source", nullptr});
   std::vector<int64> input_times(1, 0);
   source->add_processing_time(100);
-  EXPECT_EQ(100, source->processing_time());
-  EXPECT_EQ(0, source->ProcessingTime());
-  EXPECT_EQ(0, source->OutputTime(&input_times));
+  EXPECT_EQ(source->processing_time(), 100);
+  EXPECT_EQ(source->ProcessingTime(), 0);
+  EXPECT_EQ(source->OutputTime(&input_times), 0);
   source->record_element();
-  EXPECT_EQ(1, source->num_elements());
-  EXPECT_EQ(100, source->ProcessingTime());
-  EXPECT_EQ(100, source->OutputTime(&input_times));
+  EXPECT_EQ(source->num_elements(), 1);
+  EXPECT_EQ(source->ProcessingTime(), 100);
+  EXPECT_EQ(source->OutputTime(&input_times), 100);
   source->record_element();
-  EXPECT_EQ(2, source->num_elements());
-  EXPECT_EQ(50, source->ProcessingTime());
-  EXPECT_EQ(50, source->OutputTime(&input_times));
+  EXPECT_EQ(source->num_elements(), 2);
+  EXPECT_EQ(source->ProcessingTime(), 50);
+  EXPECT_EQ(source->OutputTime(&input_times), 50);
 }
 
 TEST(UnknownRatioTest, Model) {
@@ -268,24 +275,24 @@ TEST(UnknownRatioTest, Model) {
   unknown_many->add_input(source2);
   std::vector<int64> input_times(1, 0);
   unknown_many->add_processing_time(100);
-  EXPECT_EQ(100, unknown_many->processing_time());
-  EXPECT_EQ(0, unknown_many->ProcessingTime());
-  EXPECT_EQ(0, unknown_many->OutputTime(&input_times));
+  EXPECT_EQ(unknown_many->processing_time(), 100);
+  EXPECT_EQ(unknown_many->ProcessingTime(), 0);
+  EXPECT_EQ(unknown_many->OutputTime(&input_times), 0);
   unknown_many->record_element();
-  EXPECT_EQ(1, unknown_many->num_elements());
-  EXPECT_EQ(100, unknown_many->ProcessingTime());
-  EXPECT_EQ(100, unknown_many->OutputTime(&input_times));
+  EXPECT_EQ(unknown_many->num_elements(), 1);
+  EXPECT_EQ(unknown_many->ProcessingTime(), 100);
+  EXPECT_EQ(unknown_many->OutputTime(&input_times), 100);
   source1->add_processing_time(100);
   source2->add_processing_time(200);
-  EXPECT_EQ(100, unknown_many->ProcessingTime());
-  EXPECT_EQ(100, unknown_many->OutputTime(&input_times));
+  EXPECT_EQ(unknown_many->ProcessingTime(), 100);
+  EXPECT_EQ(unknown_many->OutputTime(&input_times), 100);
   source1->record_element();
   source2->record_element();
-  EXPECT_EQ(400, unknown_many->ProcessingTime());
-  EXPECT_EQ(400, unknown_many->OutputTime(&input_times));
+  EXPECT_EQ(unknown_many->ProcessingTime(), 400);
+  EXPECT_EQ(unknown_many->OutputTime(&input_times), 400);
   unknown_many->record_element();
-  EXPECT_EQ(200, unknown_many->ProcessingTime());
-  EXPECT_EQ(200, unknown_many->OutputTime(&input_times));
+  EXPECT_EQ(unknown_many->ProcessingTime(), 200);
+  EXPECT_EQ(unknown_many->OutputTime(&input_times), 200);
 }
 
 TEST(UnknownTest, Model) {
@@ -299,35 +306,35 @@ TEST(UnknownTest, Model) {
   unknown->add_input(source2);
   std::vector<int64> input_times(1, 0);
   source1->add_processing_time(100);
-  EXPECT_EQ(0, unknown->ProcessingTime());
-  EXPECT_EQ(0, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->ProcessingTime(), 0);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 0);
   source2->add_processing_time(100);
-  EXPECT_EQ(0, unknown->ProcessingTime());
-  EXPECT_EQ(0, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->ProcessingTime(), 0);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 0);
   source1->record_element();
-  EXPECT_EQ(100, unknown->ProcessingTime());
-  EXPECT_EQ(100, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->ProcessingTime(), 100);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 100);
   source2->record_element();
-  EXPECT_EQ(200, unknown->ProcessingTime());
-  EXPECT_EQ(200, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->ProcessingTime(), 200);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 200);
   source1->record_element();
-  EXPECT_EQ(150, unknown->ProcessingTime());
-  EXPECT_EQ(150, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->ProcessingTime(), 150);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 150);
   source2->record_element();
-  EXPECT_EQ(100, unknown->ProcessingTime());
-  EXPECT_EQ(100, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->ProcessingTime(), 100);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 100);
   // Unknown node processing time should not affect its ProcessingTime() or
   // OutputTime().
   unknown->add_processing_time(100);
-  EXPECT_EQ(100, unknown->processing_time());
-  EXPECT_EQ(100, unknown->ProcessingTime());
-  EXPECT_EQ(100, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->processing_time(), 100);
+  EXPECT_EQ(unknown->ProcessingTime(), 100);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 100);
   // Unknown node number of elements should not affect its ProcessingTime() or
   // OutputTime().
   unknown->record_element();
-  EXPECT_EQ(1, unknown->num_elements());
-  EXPECT_EQ(100, unknown->ProcessingTime());
-  EXPECT_EQ(100, unknown->OutputTime(&input_times));
+  EXPECT_EQ(unknown->num_elements(), 1);
+  EXPECT_EQ(unknown->ProcessingTime(), 100);
+  EXPECT_EQ(unknown->OutputTime(&input_times), 100);
 }
 
 class TestNode : public model::Node {
@@ -355,35 +362,35 @@ class TestNode : public model::Node {
 TEST(SetterGetterTest, Node) {
   std::shared_ptr<TestNode> node =
       std::make_shared<TestNode>(model::Node::Args{-1, "TestNode", nullptr});
-  EXPECT_EQ(-1, node->id());
-  EXPECT_EQ("TestNode", node->name());
-  EXPECT_EQ(nullptr, node->output());
+  EXPECT_EQ(node->id(), -1);
+  EXPECT_EQ(node->name(), "TestNode");
+  EXPECT_EQ(node->output(), nullptr);
 
-  EXPECT_EQ(0, node->buffered_bytes());
+  EXPECT_EQ(node->buffered_bytes(), 0);
   node->add_buffered_bytes(42);
-  EXPECT_EQ(42, node->buffered_bytes());
+  EXPECT_EQ(node->buffered_bytes(), 42);
 
-  EXPECT_EQ(0, node->processing_time());
+  EXPECT_EQ(node->processing_time(), 0);
   node->record_start(1);
-  EXPECT_EQ(0, node->processing_time());
+  EXPECT_EQ(node->processing_time(), 0);
   node->record_stop(41);
-  EXPECT_EQ(40, node->processing_time());
+  EXPECT_EQ(node->processing_time(), 40);
   node->add_processing_time(2);
-  EXPECT_EQ(42, node->processing_time());
+  EXPECT_EQ(node->processing_time(), 42);
 
   std::shared_ptr<TestNode> input =
       std::make_shared<TestNode>(model::Node::Args{-1, "TestInput", node});
-  EXPECT_EQ(node.get(), input->output());
-  EXPECT_EQ(0, node->inputs().size());
+  EXPECT_EQ(input->output(), node.get());
+  EXPECT_EQ(node->inputs().size(), 0);
   node->add_input(input);
-  EXPECT_EQ(1, node->inputs().size());
-  EXPECT_EQ(input, node->inputs().front());
+  EXPECT_EQ(node->inputs().size(), 1);
+  EXPECT_EQ(node->inputs().front(), input);
   node->remove_input(input);
-  EXPECT_EQ(0, node->inputs().size());
+  EXPECT_EQ(node->inputs().size(), 0);
 
-  EXPECT_EQ(0, node->num_elements());
+  EXPECT_EQ(node->num_elements(), 0);
   node->record_element();
-  EXPECT_EQ(1, node->num_elements());
+  EXPECT_EQ(node->num_elements(), 1);
 }
 
 }  // namespace
diff --git a/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py b/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py
index e713494b52..391b6711e9 100644
--- a/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py
@@ -33,12 +33,20 @@ class AutotuneBenchmark(test.Benchmark):
   """Benchmarks for autotuning performance knobs."""
 
   def benchmarkMap(self):
+    a = self._benchmarkMap(autotune=False)
+    b = self._benchmarkMap(autotune=True)
+    print("speedup: %f" % (a / b))
+
+  def _benchmarkMap(self, autotune):
     k = 1024 * 1024
     dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
                                                 np.random.rand(4 * k,
                                                                1))).repeat()
     dataset = dataset.map(
         math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE)
+    options = dataset_ops.Options()
+    options.experimental_autotune = autotune
+    dataset = dataset.with_options(options)
     iterator = dataset_ops.make_one_shot_iterator(dataset)
     get_next = iterator.get_next()
 
@@ -46,7 +54,7 @@ class AutotuneBenchmark(test.Benchmark):
     with session.Session() as sess:
       for _ in range(5):
         sess.run(get_next.op)
-      for _ in range(1000):
+      for _ in range(10000):
         start = time.time()
         sess.run(get_next.op)
         end = time.time()
@@ -56,13 +64,17 @@ class AutotuneBenchmark(test.Benchmark):
           (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
            np.max(deltas)))
     self.report_benchmark(
-        iters=1000, wall_time=np.median(deltas), name="map_autotune")
+        iters=10000,
+        wall_time=np.median(deltas),
+        name="map" + ("_autotune" if autotune else ""))
+    return np.median(deltas)
 
   def benchmarkMapAndBatch(self):
-    self._benchmarkMapAndBatch(numa_aware=False)
-    self._benchmarkMapAndBatch(numa_aware=True)
+    a = self._benchmarkMapAndBatch(autotune=False)
+    b = self._benchmarkMapAndBatch(autotune=True)
+    print("speedup: %f" % (a / b))
 
-  def _benchmarkMapAndBatch(self, numa_aware):
+  def _benchmarkMapAndBatch(self, autotune):
     batch_size = 16
     k = 1024 * 1024
     dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
@@ -74,7 +86,7 @@ class AutotuneBenchmark(test.Benchmark):
             num_parallel_calls=optimization.AUTOTUNE,
             batch_size=batch_size))
     options = dataset_ops.Options()
-    options.experimental_numa_aware = numa_aware
+    options.experimental_autotune = autotune
     dataset = dataset.with_options(options)
     iterator = dataset_ops.make_one_shot_iterator(dataset)
     get_next = iterator.get_next()
@@ -83,7 +95,7 @@ class AutotuneBenchmark(test.Benchmark):
     with session.Session() as sess:
       for _ in range(5):
         sess.run(get_next.op)
-      for _ in range(100):
+      for _ in range(1000):
         start = time.time()
         sess.run(get_next.op)
         end = time.time()
@@ -94,11 +106,17 @@ class AutotuneBenchmark(test.Benchmark):
            np.max(deltas)))
 
     self.report_benchmark(
-        iters=100,
+        iters=1000,
         wall_time=np.median(deltas),
-        name=("numa_" if numa_aware else "") + "map_and_batch_autotune")
+        name="map_and_batch" + ("_autotune" if autotune else ""))
+    return np.median(deltas)
 
   def benchmarkInterleave(self):
+    a = self._benchmarkInterleave(autotune=False)
+    b = self._benchmarkInterleave(autotune=True)
+    print("speedup: %f" % (a / b))
+
+  def _benchmarkInterleave(self, autotune):
     k = 1024 * 1024
     dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
                                                 np.random.rand(4 * k,
@@ -108,6 +126,9 @@ class AutotuneBenchmark(test.Benchmark):
         lambda _: dataset,
         cycle_length=10,
         num_parallel_calls=optimization.AUTOTUNE)
+    options = dataset_ops.Options()
+    options.experimental_autotune = autotune
+    dataset = dataset.with_options(options)
     iterator = dataset_ops.make_one_shot_iterator(dataset)
     get_next = iterator.get_next()
 
@@ -115,7 +136,7 @@ class AutotuneBenchmark(test.Benchmark):
     with session.Session() as sess:
       for _ in range(5):
         sess.run(get_next.op)
-      for _ in range(1000):
+      for _ in range(10000):
         start = time.time()
         sess.run(get_next.op)
         end = time.time()
@@ -125,11 +146,17 @@ class AutotuneBenchmark(test.Benchmark):
           (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
            np.max(deltas)))
     self.report_benchmark(
-        iters=1000,
+        iters=10000,
         wall_time=np.median(deltas),
-        name="interleave_autotune")
+        name="interleave" + ("_autotune" if autotune else ""))
+    return np.median(deltas)
 
   def benchmarkMapAndInterleave(self):
+    a = self._benchmarkMapAndInterleave(autotune=False)
+    b = self._benchmarkMapAndInterleave(autotune=True)
+    print("speedup: %f" % (a / b))
+
+  def _benchmarkMapAndInterleave(self, autotune):
     k = 1024 * 1024
     a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1))
     b = (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))
@@ -161,6 +188,9 @@ class AutotuneBenchmark(test.Benchmark):
 
     dataset = dataset_ops.Dataset.zip((dataset, dataset_c))
     dataset = dataset.map(f2, num_parallel_calls=optimization.AUTOTUNE)
+    options = dataset_ops.Options()
+    options.experimental_autotune = autotune
+    dataset = dataset.with_options(options)
     iterator = dataset_ops.make_one_shot_iterator(dataset)
     get_next = iterator.get_next()
 
@@ -168,7 +198,7 @@ class AutotuneBenchmark(test.Benchmark):
     with session.Session() as sess:
       for _ in range(5):
         sess.run(get_next)
-      for _ in range(100):
+      for _ in range(1000):
         start = time.time()
         sess.run(get_next)
         end = time.time()
@@ -178,9 +208,10 @@ class AutotuneBenchmark(test.Benchmark):
           (np.median(deltas), np.mean(deltas), np.std(deltas), np.min(deltas),
            np.max(deltas)))
     self.report_benchmark(
-        iters=100,
+        iters=1000,
         wall_time=np.median(deltas),
-        name="map_and_interleave_autotune")
+        name="map_and_interleave" + ("_autotune" if autotune else ""))
+    return np.median(deltas)
 
 
 if __name__ == "__main__":
-- 
GitLab


From 2a09c39fff6908070302a77359d8a5f70826dbd8 Mon Sep 17 00:00:00 2001
From: Russell Power <power@google.com>
Date: Wed, 19 Dec 2018 15:10:02 -0800
Subject: [PATCH 859/873] Remove device capture from replay logs.

PiperOrigin-RevId: 226240619
---
 tensorflow/python/client/session_ref.cc | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/tensorflow/python/client/session_ref.cc b/tensorflow/python/client/session_ref.cc
index 4d361612b7..6639cf506e 100644
--- a/tensorflow/python/client/session_ref.cc
+++ b/tensorflow/python/client/session_ref.cc
@@ -109,21 +109,8 @@ class SessionLogger {
   }
 
   Status RecordNewSession(Session* session) {
-    LOG(INFO) << "New session discovered.  Capturing devices...";
     ReplayOp op;
     NewReplaySession* req = op.mutable_new_replay_session();
-
-    std::vector<DeviceAttributes> devices;
-    Status status = session->ListDevices(&devices);
-    if (status.ok()) {
-      LOG(INFO) << "Found: " << devices.size() << " devices.";
-      for (const DeviceAttributes& dev : devices) {
-        *req->mutable_devices()->add_local_device() = dev;
-      }
-    } else {
-      LOG(WARNING) << "Failed to list devices on session. Continuing.";
-    }
-
     req->set_session_handle(SessionToHandle(session));
     return Flush(op);
   }
-- 
GitLab


From 4c7452c8c9b632d7ad7232099637e6fe388c3dd2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 15:11:25 -0800
Subject: [PATCH 860/873] Modify TFLite NNAPI interface to use a struct instead
 of using directly function calls. Add an empty implementation for NNAPI
 outside Android.

PiperOrigin-RevId: 226240820
---
 .../lite/delegates/nnapi/nnapi_delegate.cc    |  142 +-
 tensorflow/lite/nnapi/BUILD                   |   12 +
 tensorflow/lite/nnapi/NeuralNetworksShim.cc   |  129 ++
 tensorflow/lite/nnapi/NeuralNetworksShim.h    | 1204 ++++++++---------
 tensorflow/lite/nnapi/nnapi_lib_test.cc       |   90 ++
 tensorflow/lite/nnapi_delegate.cc             |  135 +-
 6 files changed, 909 insertions(+), 803 deletions(-)
 create mode 100644 tensorflow/lite/nnapi/NeuralNetworksShim.cc
 create mode 100644 tensorflow/lite/nnapi/nnapi_lib_test.cc

diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index 7908bbf164..a10a2ee963 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -48,55 +48,35 @@ namespace {
   } while (0)
 
 namespace {
-int32_t GetAndroidSdkVersion() {
-#ifdef __ANDROID__
-  const char* sdkProp = "ro.build.version.sdk";
-  char sdkVersion[PROP_VALUE_MAX];
-  int length = __system_property_get(sdkProp, sdkVersion);
-  if (length != 0) {
-    for (int i = 0; i < length; ++i) {
-      int digit = sdkVersion[i] - '0';
-      if (digit < 0 || digit > 9) {
-        // Non-numeric SDK version, assume it's higher then expected;
-        return std::numeric_limits<int32_t>::max();
-      }
-    }
-    return atoi(sdkVersion);
-  }
-#endif  // __ANDROID__
-  return 0;
-}
-
 constexpr int32_t kMinSdkVersionForNNAPI = 27;
 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
-static const int32_t kAndroidSdkVersion = GetAndroidSdkVersion();
-
 }  // namespace
 
 // RAII NN API Model Destructor for use with std::unique_ptr
 struct NNFreeModel {
   void operator()(ANeuralNetworksModel* model) {
-    ANeuralNetworksModel_free(model);
+    NnApiImplementation()->ANeuralNetworksModel_free(model);
   }
 };
 // RAII NN API Compilation Destructor for use with std::unique_ptr
 struct NNFreeCompilation {
   void operator()(ANeuralNetworksCompilation* model) {
-    ANeuralNetworksCompilation_free(model);
+    NnApiImplementation()->ANeuralNetworksCompilation_free(model);
   }
 };
 
 // Manage NNAPI shared memory handle
 class NNMemory {
  public:
-  NNMemory(const char* name, size_t size) {
+  NNMemory(const NnApi* nnapi, const char* name, size_t size) {
 #ifdef __ANDROID__
+    nnapi_ = nnapi;
     byte_size_ = size;
     fd_ = ASharedMemory_create(name, size);
     data_ptr_ = reinterpret_cast<uint8_t*>(
         mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
-    ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE, fd_, 0,
-                                       &nn_memory_handle_);
+    nnapi_->ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE,
+                                               fd_, 0, &nn_memory_handle_);
 #endif
   }
 
@@ -106,7 +86,7 @@ class NNMemory {
       munmap(data_ptr_, byte_size_);
     }
     if (nn_memory_handle_) {
-      ANeuralNetworksMemory_free(nn_memory_handle_);
+      nnapi_->ANeuralNetworksMemory_free(nn_memory_handle_);
     }
     if (fd_ > 0) close(fd_);
 #endif
@@ -117,6 +97,7 @@ class NNMemory {
 
  private:
 #ifdef __ANDROID__
+  const NnApi* nnapi_;
   int fd_ = 0;
   size_t byte_size_ = 0;
 #endif
@@ -166,9 +147,10 @@ class OperandMapping {
 // operands for both tensors and parameters, and TFLite separates the two.
 class NNAPIOpBuilder {
  public:
-  NNAPIOpBuilder(TfLiteContext* context, OperandMapping* tensor_mapping,
-                 ANeuralNetworksModel* nn_model)
-      : context_(context),
+  NNAPIOpBuilder(const NnApi* nnapi, TfLiteContext* context,
+                 OperandMapping* tensor_mapping, ANeuralNetworksModel* nn_model)
+      : nnapi_(nnapi),
+        context_(context),
         operand_mapping_(tensor_mapping),
         nn_model_(nn_model) {}
 
@@ -224,7 +206,7 @@ class NNAPIOpBuilder {
         .dimensionCount = dimension_count,
         .dimensions = dims.data()};
     CHECK_NN(context_,
-             ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
+             nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
     int ann_operand = operand_mapping_->add_new_non_tensor_operand();
     augmented_outputs_.push_back(ann_operand);
     return kTfLiteOk;
@@ -241,7 +223,7 @@ class NNAPIOpBuilder {
         reinterpret_cast<uint32_t*>(tensor->dims->data), tensor->params.scale,
         tensor->params.zero_point};
     CHECK_NN(context_,
-             ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
+             nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
     augmented_outputs_.push_back(ann_index);
 
     *ann_tensor_index_out = ann_index;
@@ -298,11 +280,11 @@ class NNAPIOpBuilder {
         nn_type, static_cast<uint32_t>(tensor->dims->size),
         reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
     CHECK_NN(context_,
-             ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
+             nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
 
     if (tensor->allocation_type == kTfLiteMmapRo) {
       // TODO(b/80630405): Use NNAPIAllocation.
-      CHECK_NN(context_, ANeuralNetworksModel_setOperandValue(
+      CHECK_NN(context_, nnapi_->ANeuralNetworksModel_setOperandValue(
                              nn_model_, ann_tensor_index, tensor->data.raw,
                              tensor->bytes));
     }
@@ -314,7 +296,7 @@ class NNAPIOpBuilder {
   // Finish emitting the op (of type `type`) into the NN API.
   TfLiteStatus FinalizeAddOperation(ANeuralNetworksOperationType type) {
     // Actually add a NN API operation
-    CHECK_NN(context_, ANeuralNetworksModel_addOperation(
+    CHECK_NN(context_, nnapi_->ANeuralNetworksModel_addOperation(
                            nn_model_, type,
                            static_cast<uint32_t>(augmented_inputs_.size()),
                            augmented_inputs_.data(),
@@ -330,9 +312,9 @@ class NNAPIOpBuilder {
   TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
     ANeuralNetworksOperandType operand_type{.type = nn_type};
     CHECK_NN(context_,
-             ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
+             nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
     int ann_operand = operand_mapping_->add_new_non_tensor_operand();
-    CHECK_NN(context_, ANeuralNetworksModel_setOperandValue(
+    CHECK_NN(context_, nnapi_->ANeuralNetworksModel_setOperandValue(
                            nn_model_, ann_operand, &value, sizeof(T)));
     augmented_inputs_.push_back(ann_operand);
     return kTfLiteOk;
@@ -344,15 +326,18 @@ class NNAPIOpBuilder {
     ANeuralNetworksOperandType operand_type{
         .type = nn_type, .dimensionCount = 1, .dimensions = &num_values};
     CHECK_NN(context_,
-             ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
+             nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
     int ann_operand = operand_mapping_->add_new_non_tensor_operand();
     CHECK_NN(context_,
-             ANeuralNetworksModel_setOperandValue(
+             nnapi_->ANeuralNetworksModel_setOperandValue(
                  nn_model_, ann_operand, values, sizeof(T) * num_values));
     augmented_inputs_.push_back(ann_operand);
     return kTfLiteOk;
   }
 
+  // Access to NNAPI.
+  const NnApi* const nnapi_;
+
   // TfLiteContext for error handling.
   TfLiteContext* const context_;
 
@@ -388,7 +373,7 @@ ANeuralNetworksOperationType BasicMappingFn(
 // The kernel that represents the node sub set of TF Lite being run on NN API.
 class NNAPIDelegateKernel {
  public:
-  NNAPIDelegateKernel() = default;
+  NNAPIDelegateKernel() { nnapi_ = NnApiImplementation(); }
 
   typedef ANeuralNetworksOperationType (*MappingFn)(
       const NNAPIOpMappingArgs& mapping_args);
@@ -397,7 +382,7 @@ class NNAPIDelegateKernel {
   // when called. You can use this function to see if a node is supported
   // (i.e. that MappingFn is not nullptr).
   static MappingFn Map(TfLiteContext* context, int builtin_code, int version,
-                       TfLiteNode* node) {
+                       int android_sdk_version, TfLiteNode* node) {
     switch (builtin_code) {
       case kTfLiteBuiltinAdd:
         if (version == 1) {
@@ -516,7 +501,7 @@ class NNAPIDelegateKernel {
         }
         break;
       case kTfLiteBuiltinSqueeze:
-        if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) {
+        if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
           return [](const NNAPIOpMappingArgs& mapping_args)
                      -> ANeuralNetworksOperationType {
             auto builtin = reinterpret_cast<TfLiteSqueezeParams*>(
@@ -632,7 +617,7 @@ class NNAPIDelegateKernel {
         }
         break;
       case kTfLiteBuiltinSub:
-        if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11 &&
+        if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
           // NNAPI only support float sub.
           return [](const NNAPIOpMappingArgs& mapping_args)
@@ -645,7 +630,7 @@ class NNAPIDelegateKernel {
         }
         break;
       case kTfLiteBuiltinDiv:
-        if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11 &&
+        if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
           // NNAPI only support float div.
           return [](const NNAPIOpMappingArgs& mapping_args)
@@ -658,7 +643,7 @@ class NNAPIDelegateKernel {
         }
         break;
       case kTfLiteBuiltinPad:
-        if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11 &&
+        if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
             node->inputs->size == 2 &&
             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32) {
           // NNAPI does not support specifying the padding value.
@@ -668,12 +653,12 @@ class NNAPIDelegateKernel {
         }
         break;
       case kTfLiteBuiltinSpaceToBatchNd:
-        if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) {
+        if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
           return BasicMappingFn<ANEURALNETWORKS_SPACE_TO_BATCH_ND>;
         }
         break;
       case kTfLiteBuiltinStridedSlice:
-        if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) {
+        if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11) {
           return [](const NNAPIOpMappingArgs& mapping_args)
                      -> ANeuralNetworksOperationType {
             auto builtin = reinterpret_cast<TfLiteStridedSliceParams*>(
@@ -691,7 +676,7 @@ class NNAPIDelegateKernel {
         // dimensions.
         // TODO(b/110888333): Support dynamically-sized tensors in delegates.
         if ((version == 1) &&
-            (kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) &&
+            (android_sdk_version >= kMinSdkVersionForNNAPI11) &&
             (node->inputs->size > 1) &&
             (context->tensors[node->inputs->data[1]].allocation_type ==
              kTfLiteMmapRo)) {
@@ -789,7 +774,7 @@ class NNAPIDelegateKernel {
         break;
       case kTfLiteBuiltinMean:
         // NNAPI does not support generating a scalar as output for MEAN.
-        if (version == 1 && kAndroidSdkVersion >= kMinSdkVersionForNNAPI11 &&
+        if (version == 1 && android_sdk_version >= kMinSdkVersionForNNAPI11 &&
             context->tensors[node->inputs->data[0]].type == kTfLiteFloat32 &&
             context->tensors[node->outputs->data[0]].dims->size > 0) {
           return [](const NNAPIOpMappingArgs& mapping_args)
@@ -833,7 +818,7 @@ class NNAPIDelegateKernel {
 
     if (!nn_model_) {
       ANeuralNetworksModel* model;
-      CHECK_NN(context, ANeuralNetworksModel_create(&model));
+      CHECK_NN(context, nnapi_->ANeuralNetworksModel_create(&model));
       nn_model_.reset(model);
 
       TF_LITE_ENSURE_STATUS(
@@ -842,9 +827,9 @@ class NNAPIDelegateKernel {
 
     if (!nn_compilation_) {
       ANeuralNetworksCompilation* compilation;
-      CHECK_NN(context, ANeuralNetworksCompilation_create(nn_model_.get(),
-                                                          &compilation));
-      CHECK_NN(context, ANeuralNetworksCompilation_finish(compilation));
+      CHECK_NN(context, nnapi_->ANeuralNetworksCompilation_create(
+                            nn_model_.get(), &compilation));
+      CHECK_NN(context, nnapi_->ANeuralNetworksCompilation_finish(compilation));
       nn_compilation_.reset(compilation);
     }
     return kTfLiteOk;
@@ -852,8 +837,8 @@ class NNAPIDelegateKernel {
 
   TfLiteStatus Invoke(TfLiteContext* context, TfLiteNode* node) {
     ANeuralNetworksExecution* execution = nullptr;
-    CHECK_NN(context, ANeuralNetworksExecution_create(nn_compilation_.get(),
-                                                      &execution));
+    CHECK_NN(context, nnapi_->ANeuralNetworksExecution_create(
+                          nn_compilation_.get(), &execution));
 
     // Set the input tensor buffers. Note: we access tflite tensors using
     // absolute indices but NN api indices inputs by relative indices.
@@ -871,7 +856,7 @@ class NNAPIDelegateKernel {
         // copy data to pre-allocated shared memory.
         memcpy(nn_input_memory_->get_data_ptr() + input_offset,
                tensor->data.raw, tensor->bytes);
-        CHECK_NN(context, ANeuralNetworksExecution_setInputFromMemory(
+        CHECK_NN(context, nnapi_->ANeuralNetworksExecution_setInputFromMemory(
                               execution, relative_input_index, nullptr,
                               nn_input_memory_->get_handle(), input_offset,
                               tensor->bytes));
@@ -885,7 +870,7 @@ class NNAPIDelegateKernel {
     size_t output_offset = 0;
     for (auto output_index : TfLiteIntArrayView(node->outputs)) {
       TfLiteTensor* tensor = &context->tensors[output_index];
-      CHECK_NN(context, ANeuralNetworksExecution_setOutputFromMemory(
+      CHECK_NN(context, nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
                             execution, relative_output_index, nullptr,
                             nn_output_memory_->get_handle(), output_offset,
                             tensor->bytes));
@@ -901,17 +886,18 @@ class NNAPIDelegateKernel {
       // Here we are using a deep copy for state_in tensors so that we are not
       // reading and writing into the same buffer during a invocation.
       // TODO(110369471): using double shared buffer to minimize the copies.
-      CHECK_NN(context, ANeuralNetworksExecution_setOutput(
+      CHECK_NN(context, nnapi_->ANeuralNetworksExecution_setOutput(
                             execution, relative_output_index, nullptr,
                             tensor->data.raw, tensor->bytes));
       relative_output_index++;
     }
     // Invoke ANN in blocking fashion.
     ANeuralNetworksEvent* event = nullptr;
-    CHECK_NN(context, ANeuralNetworksExecution_startCompute(execution, &event));
-    CHECK_NN(context, ANeuralNetworksEvent_wait(event));
-    ANeuralNetworksEvent_free(event);
-    ANeuralNetworksExecution_free(execution);
+    CHECK_NN(context,
+             nnapi_->ANeuralNetworksExecution_startCompute(execution, &event));
+    CHECK_NN(context, nnapi_->ANeuralNetworksEvent_wait(event));
+    nnapi_->ANeuralNetworksEvent_free(event);
+    nnapi_->ANeuralNetworksExecution_free(execution);
 
     // copy results from shared memory to the destination.
     output_offset = 0;
@@ -926,6 +912,8 @@ class NNAPIDelegateKernel {
   }
 
  private:
+  // Access to NNApi.
+  const NnApi* nnapi_;
   // ANN API state.
   std::unique_ptr<ANeuralNetworksModel, NNFreeModel> nn_model_;
   std::unique_ptr<ANeuralNetworksCompilation, NNFreeCompilation>
@@ -946,7 +934,7 @@ class NNAPIDelegateKernel {
     // The operand builder allows creating a single op. We create it at this
     // reduced power position rather than in the for loop to avoid reallocating
     // the vectors.
-    NNAPIOpBuilder builder(context, &operand_mapping_, nn_model_.get());
+    NNAPIOpBuilder builder(nnapi_, context, &operand_mapping_, nn_model_.get());
     // Add Tensors
     // allocate outside to avoid realloc
     for (auto node_index : nodes_) {
@@ -969,9 +957,10 @@ class NNAPIDelegateKernel {
         }
       }
       // Get op type and operands
-      int nn_op_type = Map(context, reg->builtin_code, reg->version, node)(
-          {context, &builder, node, &model_state_outputs_,
-           &model_state_tfl_inputs_});
+      int nn_op_type = Map(
+          context, reg->builtin_code, reg->version, nnapi_->android_sdk_version,
+          node)({context, &builder, node, &model_state_outputs_,
+                 &model_state_tfl_inputs_});
       // Map outputs to NN API tensor indices.
       for (auto output_index : TfLiteIntArrayView(node->outputs)) {
         TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(output_index));
@@ -1016,24 +1005,25 @@ class NNAPIDelegateKernel {
     }
 
     // Tell ANN to declare inputs/outputs
-    CHECK_NN(context, ANeuralNetworksModel_identifyInputsAndOutputs(
+    CHECK_NN(context, nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
                           nn_model_.get(), inputs.size(), inputs.data(),
                           outputs.size(), outputs.data()));
 
     // Set relaxed computation mode for fp32 if possible.
-    if (kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) {
+    if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
       CHECK_NN(context,
-               ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+               nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
                    nn_model_.get(), context->allow_fp32_relax_to_fp16));
     }
 
     // Finalize the model
-    CHECK_NN(context, ANeuralNetworksModel_finish(nn_model_.get()));
+    CHECK_NN(context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()));
 
     // Create shared memory pool for inputs and outputs.
-    nn_input_memory_.reset(new NNMemory("input_pool", total_input_byte_size));
+    nn_input_memory_.reset(
+        new NNMemory(nnapi_, "input_pool", total_input_byte_size));
     nn_output_memory_.reset(
-        new NNMemory("output_pool", total_output_byte_size));
+        new NNMemory(nnapi_, "output_pool", total_output_byte_size));
 
     return kTfLiteOk;
   }
@@ -1049,7 +1039,9 @@ TfLiteDelegate* NnApiDelegate() {
       .Prepare = [](TfLiteContext* context,
                     TfLiteDelegate* delegate) -> TfLiteStatus {
         // Do not check nodes_ if NN API is unavailable.
-        if (kAndroidSdkVersion < kMinSdkVersionForNNAPI || !NNAPIExists()) {
+        const NnApi* nnapi = NnApiImplementation();
+        if (nnapi->android_sdk_version < kMinSdkVersionForNNAPI ||
+            !nnapi->nnapi_exists) {
           return kTfLiteOk;
         }
 
@@ -1062,6 +1054,7 @@ TfLiteDelegate* NnApiDelegate() {
         TfLiteIntArray* plan;
         TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
 
+        int android_sdk_version = NnApiImplementation()->android_sdk_version;
         // Check for every node if it is supported
         // TODO(b/80625235): Fix this to do more careful checking of versioning.
         for (int node_index : TfLiteIntArrayView(plan)) {
@@ -1070,7 +1063,8 @@ TfLiteDelegate* NnApiDelegate() {
           TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
               context, node_index, &node, &registration));
           if (NNAPIDelegateKernel::Map(context, registration->builtin_code,
-                                       registration->version, node)) {
+                                       registration->version,
+                                       android_sdk_version, node)) {
             supported_nodes.push_back(node_index);
           }
         }
diff --git a/tensorflow/lite/nnapi/BUILD b/tensorflow/lite/nnapi/BUILD
index 467a2b7a7b..390c3730cb 100644
--- a/tensorflow/lite/nnapi/BUILD
+++ b/tensorflow/lite/nnapi/BUILD
@@ -6,8 +6,20 @@ package(default_visibility = [
 
 cc_library(
     name = "nnapi_lib",
+    srcs = [
+        "NeuralNetworksShim.cc",
+    ],
     hdrs = [
         "NeuralNetworksShim.h",
     ],
     linkopts = ["-ldl"],
 )
+
+cc_test(
+    name = "nnapi_lib_test",
+    srcs = ["nnapi_lib_test.cc"],
+    deps = [
+        "//tensorflow/lite/nnapi:nnapi_lib",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
diff --git a/tensorflow/lite/nnapi/NeuralNetworksShim.cc b/tensorflow/lite/nnapi/NeuralNetworksShim.cc
new file mode 100644
index 0000000000..6858fbdd57
--- /dev/null
+++ b/tensorflow/lite/nnapi/NeuralNetworksShim.cc
@@ -0,0 +1,129 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/lite/nnapi/NeuralNetworksShim.h"
+
+#include <cstdlib>
+
+#ifdef __ANDROID__
+#include <sys/mman.h>
+#include <sys/system_properties.h>
+#include <unistd.h>
+#endif
+
+#define NNAPI_LOG(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
+
+namespace {
+
+#ifdef __ANDROID__
+int32_t GetAndroidSdkVersion() {
+  const char* sdkProp = "ro.build.version.sdk";
+  char sdkVersion[PROP_VALUE_MAX];
+  int length = __system_property_get(sdkProp, sdkVersion);
+  if (length != 0) {
+    int32_t result = 0;
+    for (int i = 0; i < length; ++i) {
+      int digit = sdkVersion[i] - '0';
+      if (digit < 0 || digit > 9) {
+        // Non-numeric SDK version, assume it's higher than expected;
+        return 0xffff;
+      }
+      result = result * 10 + digit;
+    }
+    return result;
+  }
+  return 0;
+}
+
+void* LoadFunction(void* handle, const char* name) {
+  if (handle == nullptr) {
+    return nullptr;
+  }
+  void* fn = dlsym(handle, name);
+  if (fn == nullptr) {
+    NNAPI_LOG("nnapi error: unable to open function %s", name);
+  }
+  return fn;
+}
+
+#define LOAD_FUNCTION(handle, name) \
+  nnapi.name = reinterpret_cast<name##_fn>(LoadFunction(handle, #name));
+
+#else
+
+#define LOAD_FUNCTION(handle, name) nnapi.name = nullptr;
+
+#endif
+
+const NnApi LoadNnApi() {
+  NnApi nnapi = {};
+
+#ifdef __ANDROID__
+  // TODO: change RTLD_LOCAL? Assumes there can be multiple instances of nn
+  // api RT
+  void* libneuralnetworks =
+      dlopen("libneuralnetworks.so", RTLD_LAZY | RTLD_LOCAL);
+  if (libneuralnetworks == nullptr) {
+    NNAPI_LOG("nnapi error: unable to open library %s", "libneuralnetworks.so");
+  }
+  void* libandroid = dlopen("libandroid.so", RTLD_LAZY | RTLD_LOCAL);
+  if (libneuralnetworks == nullptr) {
+    NNAPI_LOG("nnapi error: unable to open library %s", "libandroid.so");
+  }
+  nnapi.nnapi_exists = libneuralnetworks != nullptr;
+  nnapi.android_sdk_version = GetAndroidSdkVersion();
+#else
+  nnapi.nnapi_exists = false;
+  nnapi.android_sdk_version = 0;
+#endif
+
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksMemory_createFromFd);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksMemory_free);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_create);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_free);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_finish);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_addOperand);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_setOperandValue);
+  LOAD_FUNCTION(libneuralnetworks,
+                ANeuralNetworksModel_setOperandValueFromMemory);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksModel_addOperation);
+  LOAD_FUNCTION(libneuralnetworks,
+                ANeuralNetworksModel_identifyInputsAndOutputs);
+  LOAD_FUNCTION(libneuralnetworks,
+                ANeuralNetworksModel_relaxComputationFloat32toFloat16);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksCompilation_create);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksCompilation_free);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksCompilation_setPreference);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksCompilation_finish);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksExecution_create);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksExecution_free);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksExecution_setInput);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksExecution_setInputFromMemory);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksExecution_setOutput);
+  LOAD_FUNCTION(libneuralnetworks,
+                ANeuralNetworksExecution_setOutputFromMemory);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksExecution_startCompute);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksEvent_wait);
+  LOAD_FUNCTION(libneuralnetworks, ANeuralNetworksEvent_free);
+  LOAD_FUNCTION(libandroid, ASharedMemory_create);
+
+  return nnapi;
+}
+
+}  // namespace
+
+const NnApi* NnApiImplementation() {
+  static const NnApi nnapi = LoadNnApi();
+  return &nnapi;
+}
diff --git a/tensorflow/lite/nnapi/NeuralNetworksShim.h b/tensorflow/lite/nnapi/NeuralNetworksShim.h
index c39502f4ac..de44852666 100644
--- a/tensorflow/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/lite/nnapi/NeuralNetworksShim.h
@@ -20,64 +20,6 @@ limitations under the License.
 #include <stdio.h>
 #include <stdlib.h>
 
-// helpers
-
-#define NNAPI_LOG(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
-#define LOAD_FUNCTION(name) \
-  static name##_fn fn = reinterpret_cast<name##_fn>(loadFunction(#name));
-#define EXECUTE_FUNCTION(...) \
-  if (fn != nullptr) {        \
-    fn(__VA_ARGS__);          \
-  }
-#define EXECUTE_FUNCTION_RETURN(...) return fn != nullptr ? fn(__VA_ARGS__) : 0;
-
-inline void* loadLibrary(const char* name) {
-  // TODO: change RTLD_LOCAL? Assumes there can be multiple instances of nn
-  // api RT
-  void* handle = nullptr;
-#ifdef __ANDROID__
-  handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL);
-  if (handle == nullptr) {
-    NNAPI_LOG("nnapi error: unable to open library %s", name);
-  }
-#endif
-  return handle;
-}
-
-typedef int (*ASharedMemory_create_fn)(const char* name, size_t size);
-
-// ASharedMemory_create was added in Android 8.0, so safe to use with NNAPI
-// which was added in 8.1.
-inline int ASharedMemory_create(const char* name, size_t size) {
-  static void* handle = loadLibrary("libandroid.so");
-  static ASharedMemory_create_fn fn =
-      handle != nullptr ? reinterpret_cast<ASharedMemory_create_fn>(
-                              dlsym(handle, "ASharedMemory_create"))
-                        : nullptr;
-  return fn(name, size);
-}
-
-inline void* getLibraryHandle() {
-  static void* handle = loadLibrary("libneuralnetworks.so");
-  return handle;
-}
-
-inline void* loadFunction(const char* name) {
-  void* fn = nullptr;
-  if (getLibraryHandle() != nullptr) {
-    fn = dlsym(getLibraryHandle(), name);
-  }
-  if (fn == nullptr) {
-    NNAPI_LOG("nnapi error: unable to open function %s", name);
-  }
-  return fn;
-}
-
-inline bool NNAPIExists() {
-  static bool nnapi_is_available = getLibraryHandle();
-  return nnapi_is_available;
-}
-
 // NN api types based on NNAPI header file
 // https://developer.android.com/ndk/reference/group/neural-networks
 
@@ -407,606 +349,564 @@ typedef int (*ANeuralNetworksEvent_wait_fn)(ANeuralNetworksEvent* event);
 
 typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent* event);
 
-/**
- * Creates a shared memory object from a file descriptor.
- *
- * The shared memory is backed by a file descriptor via mmap.
- * See {@link ANeuralNetworksMemory} for a description on how to use
- * this shared memory.
- *
- * @param size The requested size in bytes.
- *             Must not be larger than the file size.
- * @param prot The desired memory protection for the mapping.
- *             It is either PROT_NONE or the bitwise OR of one or
- *             more of the following flags: PROT_READ, PROT_WRITE.
- * @param fd The requested file descriptor.
- *           The file descriptor has to be mmap-able. The file
- *           descriptor will be duplicated.
- * @param offset The offset to the beginning of the file of the area to map.
- *               The offset has to be aligned to a page size.
- * @param memory The memory object to be created.
- *               Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
- */
-inline int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd,
-                                              size_t offset,
-                                              ANeuralNetworksMemory** memory) {
-  LOAD_FUNCTION(ANeuralNetworksMemory_createFromFd);
-  EXECUTE_FUNCTION_RETURN(size, protect, fd, offset, memory);
-}
-
-/**
- * Delete a memory object.
- *
- * Destroys the object used by the run time to keep track of the memory.
- * This will free the underlying actual memory if no other code has open
- * handles to this memory.
- *
- * @param memory The memory object to be freed.
- */
-inline void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) {
-  LOAD_FUNCTION(ANeuralNetworksMemory_free);
-  EXECUTE_FUNCTION(memory);
-}
-
-/**
- * Create an empty {@link ANeuralNetworksModel}.
- *
- * <p>This only creates the object. Computation is performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * The model should be constructed with calls to
- * {@link ANeuralNetworksModel_addOperation} and
- * {@link ANeuralNetworksModel_addOperand}
- *
- * <p>{@link ANeuralNetworksModel_finish} should be called once the model
- * has been fully constructed.</p>
- *
- * <p>{@link ANeuralNetworksModel_free} should be called once the model
- * is no longer needed.</p>
- *
- * @param model The {@link ANeuralNetworksModel} to be created.
- *              Set to NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_create(ANeuralNetworksModel** model) {
-  LOAD_FUNCTION(ANeuralNetworksModel_create);
-  EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Destroy a model.
- *
- * The model need not have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be destroyed. Passing NULL is acceptable and
- *              results in no operation.
- */
-inline void ANeuralNetworksModel_free(ANeuralNetworksModel* model) {
-  LOAD_FUNCTION(ANeuralNetworksModel_free);
-  EXECUTE_FUNCTION(model);
-}
-
-/**
- * Indicate that we have finished modifying a model. Required before
- * calling {@link ANeuralNetworksCompilation_compile}.
- *
- * An application is responsible to make sure that no other thread uses
- * the model at the same time.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be finished.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) {
-  LOAD_FUNCTION(ANeuralNetworksModel_finish);
-  EXECUTE_FUNCTION_RETURN(model);
-}
-
-/**
- * Add an operand to a model.
- *
- * The order in which the operands are added is important. The first one added
- * to a model will have the index value 0, the second 1, etc. These indexes are
- * used as operand identifiers in {@link ANeuralNetworksModel_addOperation},
- * {@link ANeuralNetworksExecution_setInput},
- * {@link ANeuralNetworksExecution_setInputFromMemory},
- * {@link ANeuralNetworksExecution_setOutput},
- * {@link ANeuralNetworksExecution_setOutputFromMemory} and
- * {@link ANeuralNetworksExecution_setOperandValue}.
- *
- * To build a model that can accommodate inputs of various sizes, as you may
- * want to do for a CNN, set the size of the dimensions that will vary at run
- * time to 0. If you do so, provide the full dimensions when calling
- * {@link ANeuralNetworksExecution_setInput} or {@link
- * ANeuralNetworksExecution_setInputFromMemory}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param type The {@link ANeuralNetworksOperandType} that describes the shape
- * of the operand.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperand(
-    ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) {
-  LOAD_FUNCTION(ANeuralNetworksModel_addOperand);
-  EXECUTE_FUNCTION_RETURN(model, type);
-}
-
-/**
- * Sets an operand to a constant value.
- *
- * For scalar values, the content of buffer is copied into the model.
- *
- * For tensor values, a pointer to the buffer is stored within the model.
- * The application is responsible for not changing the content of this region
- * until all executions using this model have completed. As the data may
- * be copied during processing, modifying the data after this call yields
- * undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model,
-                                                int32_t index,
-                                                const void* buffer,
-                                                size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValue);
-  EXECUTE_FUNCTION_RETURN(model, index, buffer, length);
-}
-
-/**
- * Sets an operand to a value stored in a memory object.
- *
- * The content of the memory is not copied. A reference to that memory is stored
- * inside the model. The application is responsible for not changing the content
- * of the memory region until all executions using this model have completed.
- * As the data may be copied during processing, modifying the data after this
- * call yields undefined results.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @param model The model to be modified.
- * @param index The index of the model operand we're setting.
- * @param buffer A pointer to the data to use.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_setOperandValueFromMemory(
-    ANeuralNetworksModel* model, int32_t index,
-    const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksModel_setOperandValueFromMemory);
-  EXECUTE_FUNCTION_RETURN(model, index, memory, offset, length);
-}
-
-/**
- * Add an operation to a model.
- *
- * @param model The model to be modified.
- * @param type The type of the operation.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying each operand.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying each operand.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model,
-                                             ANeuralNetworksOperationType type,
-                                             uint32_t inputCount,
-                                             const uint32_t* inputs,
-                                             uint32_t outputCount,
-                                             const uint32_t* outputs) {
-  LOAD_FUNCTION(ANeuralNetworksModel_addOperation);
-  EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount,
-                          outputs);
-}
-
-/**
- * Specifies which operands will be the model's inputs and outputs.
- *
- * An operand cannot be used for both input and output. Doing so will
- * return an error.
- *
- * @param model The model to be modified.
- * @param inputCount The number of entries in the inputs array.
- * @param inputs An array of indexes identifying the input operands.
- * @param outputCount The number of entries in the outputs array.
- * @param outputs An array of indexes identifying the output operands.
- *
- * The operands specified by inputs and outputs must have been
- * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- *
- */
-inline int ANeuralNetworksModel_identifyInputsAndOutputs(
-    ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
-    uint32_t outputCount, const uint32_t* outputs) {
-  LOAD_FUNCTION(ANeuralNetworksModel_identifyInputsAndOutputs);
-  EXECUTE_FUNCTION_RETURN(model, inputCount, inputs, outputCount, outputs);
-}
-
-/**
- * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
- * calculated with range and/or precision as low as that of the IEEE 754 16-bit
- * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
- * must be calculated using at least the range and precision of the IEEE 754
- * 32-bit floating-point format.
- *
- * @param model The model to be modified.
- * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
- *              calculated with range and/or precision as low as that of the
- *              IEEE 754 16-bit floating point format. 'false' indicates
- *              {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
- *              at least the range and precision of the IEEE 754 32-bit floating
- *              point format.
- *
- * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
- * been called will return an error.
- *
- * Available since API level 28.
- *
- * See {@link ANeuralNetworksModel} for information on multithreaded usage.
- */
-inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(
-    ANeuralNetworksModel* model, bool allow) {
-  LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
-  EXECUTE_FUNCTION_RETURN(model, allow);
-}
-
-/**
- * Create a {@link ANeuralNetworksCompilation} to compile the given model.
- * This only creates the object. Compilation is only performed once
- * {@link ANeuralNetworksCompilation_start} is invoked.
- *
- * <p>The provided model must outlive the compilation.</p>
- *
- * The model must already have been finished by a call to
- * {@link ANeuralNetworksModel_finish}.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param model The {@link ANeuralNetworksModel} to be compiled.
- * @param compilation The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the model is invalid.
- */
-inline int ANeuralNetworksCompilation_create(
-    ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_create);
-  EXECUTE_FUNCTION_RETURN(model, compilation);
-}
-
-/**
- * Destroy a compilation.
- *
- * <p>If called on a compilation for which
- * {@link ANeuralNetworksCompilation_start} has been called, the
- * function will return immediately but will mark the compilation to be deleted
- * once the compilation completes. The {@link ANeuralNetworksCompilation_wait}
- * will return ERROR_DELETED.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be destroyed. Passing NULL is
- * acceptable and results in no operation.
- */
-inline void ANeuralNetworksCompilation_free(
-    ANeuralNetworksCompilation* compilation) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_free);
-  EXECUTE_FUNCTION(compilation);
-}
-
-/**
- * Sets the execution preference.
- *
- * <p>Provides guidance to the runtime when trade-offs are possible.</p>
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @param compilation The compilation to be modified.
- * @param preference Either {@link PREFER_LOW_POWER},
- *                  {@link PREFER_SINGLE_FAST_ANSWER}, or
- *                  {@link PREFER_SUSTAINED_SPEED}.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksCompilation_setPreference(
-    ANeuralNetworksCompilation* compilation, int32_t preference) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_setPreference);
-  EXECUTE_FUNCTION_RETURN(compilation, preference);
-}
-
-/**
- * Waits until the compilation completes.
- *
- * More than one thread can wait on a compilation. When the compilation
- * completes, all threads will be released.
- *
- * See {@link ANeuralNetworksCompilation} for information on multithreaded
- * usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally.
- */
-inline int ANeuralNetworksCompilation_finish(
-    ANeuralNetworksCompilation* compilation) {
-  LOAD_FUNCTION(ANeuralNetworksCompilation_finish);
-  EXECUTE_FUNCTION_RETURN(compilation);
-}
-/**
- * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
- * This only creates the object. Computation is only performed once
- * {@link ANeuralNetworksExecution_startCompute} is invoked.
- *
- * <p>The provided compilation must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
- * @param execution The newly created object or NULL if unsuccessful.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
- *         if the compilation is invalid.
- */
-inline int ANeuralNetworksExecution_create(
-    ANeuralNetworksCompilation* compilation,
-    ANeuralNetworksExecution** execution) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_create);
-  EXECUTE_FUNCTION_RETURN(compilation, execution);
-}
-
-/**
- * Destroy an execution.
- *
- * <p>If called on an execution for which
- * {@link ANeuralNetworksExecution_startCompute} has been called, the
- * function will return immediately but will mark the execution to be deleted
- * once the computation completes.   The {link ANeuralNetworksExecution_wait}
- * will return ANEURALNETWORKS_ERROR_DELETED.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be destroyed. Passing NULL is acceptable
- * and results in no operation.
- */
-inline void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_free);
-  EXECUTE_FUNCTION(execution);
-}
-
-/**
- * Associate a user buffer with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This should be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other properties of the type must be the same as
- *             specified in the model. If the type is the same as specified
- *             when the model was built, NULL can be passed.
- * @param buffer The buffer containing the data.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInput(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const void* buffer, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setInput);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
-
-/**
- * Associate part of a memory object with an input of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the input argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param memory The memory containing the data.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The size in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the input.
- */
-inline int ANeuralNetworksExecution_setInputFromMemory(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
-    size_t offset, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setInputFromMemory);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
+typedef int (*ASharedMemory_create_fn)(const char* name, size_t size);
 
-/**
- * Associate a user buffer with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided buffer must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param buffer The buffer where the data is to be written.
- * @param length The length in bytes of the buffer.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutput(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, void* buffer, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setOutput);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length);
-}
+struct NnApi {
+  bool nnapi_exists;
+  int32_t android_sdk_version;
+
+  /**
+   * Creates a shared memory object from a file descriptor.
+   *
+   * The shared memory is backed by a file descriptor via mmap.
+   * See {@link ANeuralNetworksMemory} for a description on how to use
+   * this shared memory.
+   *
+   * @param size The requested size in bytes.
+   *             Must not be larger than the file size.
+   * @param prot The desired memory protection for the mapping.
+   *             It is either PROT_NONE or the bitwise OR of one or
+   *             more of the following flags: PROT_READ, PROT_WRITE.
+   * @param fd The requested file descriptor.
+   *           The file descriptor has to be mmap-able. The file
+   *           descriptor will be duplicated.
+   * @param offset The offset to the beginning of the file of the area to map.
+   *               The offset has to be aligned to a page size.
+   * @param memory The memory object to be created.
+   *               Set to NULL if unsuccessful.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if the request completed normally.
+   */
+  int (*ANeuralNetworksMemory_createFromFd)(size_t size, int protect, int fd,
+                                            size_t offset,
+                                            ANeuralNetworksMemory** memory);
+
+  /**
+   * Delete a memory object.
+   *
+   * Destroys the object used by the run time to keep track of the memory.
+   * This will free the underlying actual memory if no other code has open
+   * handles to this memory.
+   *
+   * @param memory The memory object to be freed.
+   */
+  void (*ANeuralNetworksMemory_free)(ANeuralNetworksMemory* memory);
+
+  /**
+   * Create an empty {@link ANeuralNetworksModel}.
+   *
+   * <p>This only creates the object. Computation is performed once
+   * {@link ANeuralNetworksExecution_startCompute} is invoked.
+   *
+   * The model should be constructed with calls to
+   * {@link ANeuralNetworksModel_addOperation} and
+   * {@link ANeuralNetworksModel_addOperand}
+   *
+   * <p>{@link ANeuralNetworksModel_finish} should be called once the model
+   * has been fully constructed.</p>
+   *
+   * <p>{@link ANeuralNetworksModel_free} should be called once the model
+   * is no longer needed.</p>
+   *
+   * @param model The {@link ANeuralNetworksModel} to be created.
+   *              Set to NULL if unsuccessful.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksModel_create)(ANeuralNetworksModel** model);
+
+  /**
+   * Destroy a model.
+   *
+   * The model need not have been finished by a call to
+   * {@link ANeuralNetworksModel_finish}.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   * @param model The model to be destroyed. Passing NULL is acceptable and
+   *              results in no operation.
+   */
+  void (*ANeuralNetworksModel_free)(ANeuralNetworksModel* model);
+
+  /**
+   * Indicate that we have finished modifying a model. Required before
+   * calling {@link ANeuralNetworksCompilation_compile}.
+   *
+   * An application is responsible to make sure that no other thread uses
+   * the model at the same time.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   * @param model The model to be finished.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksModel_finish)(ANeuralNetworksModel* model);
+
+  /**
+   * Add an operand to a model.
+   *
+   * The order in which the operands are added is important. The first one added
+   * to a model will have the index value 0, the second 1, etc. These indexes
+   * are used as operand identifiers in
+   * {@link ANeuralNetworksModel_addOperation},
+   * {@link ANeuralNetworksExecution_setInput},
+   * {@link ANeuralNetworksExecution_setInputFromMemory},
+   * {@link ANeuralNetworksExecution_setOutput},
+   * {@link ANeuralNetworksExecution_setOutputFromMemory} and
+   * {@link ANeuralNetworksExecution_setOperandValue}.
+   *
+   * To build a model that can accommodate inputs of various sizes, as you may
+   * want to do for a CNN, set the size of the dimensions that will vary at run
+   * time to 0. If you do so, provide the full dimensions when calling
+   * {@link ANeuralNetworksExecution_setInput} or {@link
+   * ANeuralNetworksExecution_setInputFromMemory}.
+   *
+   * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+   * been called will return an error.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   * @param model The model to be modified.
+   * @param type The {@link ANeuralNetworksOperandType} that describes the shape
+   * of the operand.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksModel_addOperand)(
+      ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type);
+
+  /**
+   * Sets an operand to a constant value.
+   *
+   * For scalar values, the content of buffer is copied into the model.
+   *
+   * For tensor values, a pointer to the buffer is stored within the model.
+   * The application is responsible for not changing the content of this region
+   * until all executions using this model have completed. As the data may
+   * be copied during processing, modifying the data after this call yields
+   * undefined results.
+   *
+   * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+   * been called will return an error.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   * @param model The model to be modified.
+   * @param index The index of the model operand we're setting.
+   * @param buffer A pointer to the data to use.
+   * @param length The size in bytes of the data value.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksModel_setOperandValue)(ANeuralNetworksModel* model,
+                                              int32_t index, const void* buffer,
+                                              size_t length);
+
+  /**
+   * Sets an operand to a value stored in a memory object.
+   *
+   * The content of the memory is not copied. A reference to that memory is
+   * stored inside the model. The application is responsible for not changing
+   * the content of the memory region until all executions using this model have
+   * completed.
+   * As the data may be copied during processing, modifying the data after this
+   * call yields undefined results.
+   *
+   * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+   * been called will return an error.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   * @param model The model to be modified.
+   * @param index The index of the model operand we're setting.
+   * @param buffer A pointer to the data to use.
+   * @param memory The memory containing the data.
+   * @param offset This specifies the location of the data within the memory.
+   *               The offset is in bytes from the start of memory.
+   * @param length The size in bytes of the data value.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksModel_setOperandValueFromMemory)(
+      ANeuralNetworksModel* model, int32_t index,
+      const ANeuralNetworksMemory* memory, size_t offset, size_t length);
+
+  /**
+   * Add an operation to a model.
+   *
+   * @param model The model to be modified.
+   * @param type The type of the operation.
+   * @param inputCount The number of entries in the inputs array.
+   * @param inputs An array of indexes identifying each operand.
+   * @param outputCount The number of entries in the outputs array.
+   * @param outputs An array of indexes identifying each operand.
+   *
+   * The operands specified by inputs and outputs must have been
+   * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+   *
+   * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+   * been called will return an error.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksModel_addOperation)(ANeuralNetworksModel* model,
+                                           ANeuralNetworksOperationType type,
+                                           uint32_t inputCount,
+                                           const uint32_t* inputs,
+                                           uint32_t outputCount,
+                                           const uint32_t* outputs);
+
+  /**
+   * Specifies which operands will be the model's inputs and outputs.
+   *
+   * An operand cannot be used for both input and output. Doing so will
+   * return an error.
+   *
+   * @param model The model to be modified.
+   * @param inputCount The number of entries in the inputs array.
+   * @param inputs An array of indexes identifying the input operands.
+   * @param outputCount The number of entries in the outputs array.
+   * @param outputs An array of indexes identifying the output operands.
+   *
+   * The operands specified by inputs and outputs must have been
+   * previously added by calls to {@link ANeuralNetworksModel_addOperand}.
+   *
+   * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+   * been called will return an error.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   *
+   */
+  int (*ANeuralNetworksModel_identifyInputsAndOutputs)(
+      ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
+      uint32_t outputCount, const uint32_t* outputs);
+
+  /**
+   * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
+   * calculated with range and/or precision as low as that of the
+   * IEEE 754 16-bit floating-point format. By default,
+   * {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using at least
+   * the range and precision of the IEEE 754 32-bit floating-point format.
+   *
+   * @param model The model to be modified.
+   * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
+   *              calculated with range and/or precision as low as that of the
+   *              IEEE 754 16-bit floating point format. 'false' indicates
+   *              {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated
+   *              using at least the range and precision of the IEEE 754 32-bit
+   *              floating point format.
+   *
+   * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+   * been called will return an error.
+   *
+   * Available since API level 28.
+   *
+   * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+   */
+  int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16)(
+      ANeuralNetworksModel* model, bool allow);
+
+  /**
+   * Create a {@link ANeuralNetworksCompilation} to compile the given model.
+   * This only creates the object. Compilation is only performed once
+   * {@link ANeuralNetworksCompilation_start} is invoked.
+   *
+   * <p>The provided model must outlive the compilation.</p>
+   *
+   * The model must already have been finished by a call to
+   * {@link ANeuralNetworksModel_finish}.
+   *
+   * See {@link ANeuralNetworksCompilation} for information on multithreaded
+   * usage.
+   *
+   * @param model The {@link ANeuralNetworksModel} to be compiled.
+   * @param compilation The newly created object or NULL if unsuccessful.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+   *         if the model is invalid.
+   */
+  int (*ANeuralNetworksCompilation_create)(
+      ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation);
+
+  /**
+   * Destroy a compilation.
+   *
+   * <p>If called on a compilation for which
+   * {@link ANeuralNetworksCompilation_start} has been called, the
+   * function will return immediately but will mark the compilation to be
+   * deleted once the compilation completes. The
+   * {@link ANeuralNetworksCompilation_wait} will return ERROR_DELETED.
+   *
+   * See {@link ANeuralNetworksCompilation} for information on multithreaded
+   * usage.
+   *
+   * @param compilation The compilation to be destroyed. Passing NULL is
+   * acceptable and results in no operation.
+   */
+  void (*ANeuralNetworksCompilation_free)(
+      ANeuralNetworksCompilation* compilation);
+
+  /**
+   * Sets the execution preference.
+   *
+   * <p>Provides guidance to the runtime when trade-offs are possible.</p>
+   *
+   * See {@link ANeuralNetworksCompilation} for information on multithreaded
+   * usage.
+   *
+   * @param compilation The compilation to be modified.
+   * @param preference Either {@link PREFER_LOW_POWER},
+   *                  {@link PREFER_SINGLE_FAST_ANSWER}, or
+   *                  {@link PREFER_SUSTAINED_SPEED}.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksCompilation_setPreference)(
+      ANeuralNetworksCompilation* compilation, int32_t preference);
+
+  /**
+   * Waits until the compilation completes.
+   *
+   * More than one thread can wait on a compilation. When the compilation
+   * completes, all threads will be released.
+   *
+   * See {@link ANeuralNetworksCompilation} for information on multithreaded
+   * usage.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally.
+   */
+  int (*ANeuralNetworksCompilation_finish)(
+      ANeuralNetworksCompilation* compilation);
+
+  /**
+   * Create a {@link ANeuralNetworksExecution} to apply the given compilation.
+   * This only creates the object. Computation is only performed once
+   * {@link ANeuralNetworksExecution_startCompute} is invoked.
+   *
+   * <p>The provided compilation must outlive the execution.</p>
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated.
+   * @param execution The newly created object or NULL if unsuccessful.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA
+   *         if the compilation is invalid.
+   */
+  int (*ANeuralNetworksExecution_create)(
+      ANeuralNetworksCompilation* compilation,
+      ANeuralNetworksExecution** execution);
+
+  /**
+   * Destroy an execution.
+   *
+   * <p>If called on an execution for which
+   * {@link ANeuralNetworksExecution_startCompute} has been called, the
+   * function will return immediately but will mark the execution to be deleted
+   * once the computation completes.   The {link ANeuralNetworksExecution_wait}
+   * will return ANEURALNETWORKS_ERROR_DELETED.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be destroyed. Passing NULL is acceptable
+   * and results in no operation.
+   */
+  void (*ANeuralNetworksExecution_free)(ANeuralNetworksExecution* execution);
+
+  /**
+   * Associate a user buffer with an input of the model of the
+   * {@link ANeuralNetworksExecution}.
+   *
+   * <p>The provided buffer must outlive the execution.</p>
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be modified.
+   * @param index The index of the input argument we are setting. It is
+   *              an index into the lists passed to
+   *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is
+   *              not the index associated with {@link
+   * ANeuralNetworksModel_addOperand}.
+   * @param type The type of the operand. This should be used to specify the
+   *             dimensions that were set to 0 when the operand was added to the
+   *             model. All other properties of the type must be the same as
+   *             specified in the model. If the type is the same as specified
+   *             when the model was built, NULL can be passed.
+   * @param buffer The buffer containing the data.
+   * @param length The length in bytes of the buffer.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+   * the name is not recognized or the buffer is too small for the input.
+   */
+  int (*ANeuralNetworksExecution_setInput)(
+      ANeuralNetworksExecution* execution, int32_t index,
+      const ANeuralNetworksOperandType* type, const void* buffer,
+      size_t length);
+
+  /**
+   * Associate part of a memory object with an input of the model of the
+   * {@link ANeuralNetworksExecution}.
+   *
+   * <p>The provided memory must outlive the execution.</p>
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be modified.
+   * @param index The index of the input argument we are setting. It is
+   *              an index into the lists passed to
+   *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is
+   *              not the index associated with {@link
+   * ANeuralNetworksModel_addOperand}.
+   * @param type The type of the operand. This can be used to specify the
+   *             dimensions that were set to 0 when the operand was added to the
+   *             model. All other values must be the same as specified in the
+   *             model. If the type is the same as specified when the model
+   *             was built, NULL can be passed.
+   * @param memory The memory containing the data.
+   * @param offset This specifies the location of the data within the memory.
+   *               The offset is in bytes from the start of memory.
+   * @param length The size in bytes of the data value.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+   * the name is not recognized or the buffer is too small for the input.
+   */
+  int (*ANeuralNetworksExecution_setInputFromMemory)(
+      ANeuralNetworksExecution* execution, int32_t index,
+      const ANeuralNetworksOperandType* type,
+      const ANeuralNetworksMemory* memory, size_t offset, size_t length);
+
+  /**
+   * Associate a user buffer with an output of the model of the
+   * {@link ANeuralNetworksExecution}.
+   *
+   * <p>The provided buffer must outlive the execution.</p>
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be modified.
+   * @param index The index of the output argument we are setting. It is
+   *              an index into the lists passed to
+   *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is
+   *              not the index associated with {@link
+   * ANeuralNetworksModel_addOperand}.
+   * @param type The type of the operand. This can be used to specify the
+   *             dimensions that were set to 0 when the operand was added to the
+   *             model. All other values must be the same as specified in the
+   *             model. If the type is the same as specified when the model
+   *             was built, NULL can be passed.
+   * @param buffer The buffer where the data is to be written.
+   * @param length The length in bytes of the buffer.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+   * the name is not recognized or the buffer is too small for the output.
+   */
+  int (*ANeuralNetworksExecution_setOutput)(
+      ANeuralNetworksExecution* execution, int32_t index,
+      const ANeuralNetworksOperandType* type, void* buffer, size_t length);
+
+  /**
+   * Associate part of a memory object with an output of the model of the
+   * {@link ANeuralNetworksExecution}.
+   *
+   * <p>The provided memory must outlive the execution.</p>
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be modified.
+   * @param index The index of the output argument we are setting. It is
+   *              an index into the lists passed to
+   *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is
+   *              not the index associated with {@link
+   * ANeuralNetworksModel_addOperand}.
+   * @param type The type of the operand. This can be used to specify the
+   *             dimensions that were set to 0 when the operand was added to the
+   *             model. All other values must be the same as specified in the
+   *             model. If the type is the same as specified when the model
+   *             was built, NULL can be passed.
+   * @param memory The memory where the data is to be stored.
+   * @param offset This specifies the location of the data within the memory.
+   *               The offset is in bytes from the start of memory.
+   * @param length The length in bytes of the data value.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
+   * the name is not recognized or the buffer is too small for the output.
+   */
+  int (*ANeuralNetworksExecution_setOutputFromMemory)(
+      ANeuralNetworksExecution* execution, int32_t index,
+      const ANeuralNetworksOperandType* type,
+      const ANeuralNetworksMemory* memory, size_t offset, size_t length);
+
+  /**
+   * Schedule evaluation of the execution.
+   *
+   * <p>Schedules evaluation of the execution. Once the model has been
+   * applied and the outputs are ready to be consumed, the execution will be
+   * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that
+   * signal.
+   * </p>
+   *
+   * Multiple executions can be scheduled and evaluated concurrently, and
+   * compilations can be performed concurrently with executions. The runtime
+   * makes no guarantee on the ordering of the completion of compilations and
+   * executions. If it's important to the application, the application should
+   * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and
+   * {@link ANeuralNetworksExecution_wait}.
+   *
+   * ANeuralNetworksExecution_wait must be called to recuperate the resources
+   * used by the execution.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @param execution The execution to be scheduled and executed.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if successful.
+   */
+  int (*ANeuralNetworksExecution_startCompute)(
+      ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event);
+
+  /**
+   * Waits until the execution completes.
+   *
+   * More than one thread can wait on an event. When the execution completes,
+   * all threads will be released.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   *
+   * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
+   */
+  int (*ANeuralNetworksEvent_wait)(ANeuralNetworksEvent* event);
 
-/**
- * Associate part of a memory object with an output of the model of the
- * {@link ANeuralNetworksExecution}.
- *
- * <p>The provided memory must outlive the execution.</p>
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be modified.
- * @param index The index of the output argument we are setting. It is
- *              an index into the lists passed to
- *              {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not
- *              the index associated with {@link
- * ANeuralNetworksModel_addOperand}.
- * @param type The type of the operand. This can be used to specify the
- *             dimensions that were set to 0 when the operand was added to the
- *             model. All other values must be the same as specified in the
- *             model. If the type is the same as specified when the model
- *             was built, NULL can be passed.
- * @param memory The memory where the data is to be stored.
- * @param offset This specifies the location of the data within the memory.
- *               The offset is in bytes from the start of memory.
- * @param length The length in bytes of the data value.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if
- * the name is not recognized or the buffer is too small for the output.
- */
-inline int ANeuralNetworksExecution_setOutputFromMemory(
-    ANeuralNetworksExecution* execution, int32_t index,
-    const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory,
-    size_t offset, size_t length) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_setOutputFromMemory);
-  EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length);
-}
+  /**
+   * Destroys the event.
+   *
+   * See {@link ANeuralNetworksExecution} for information on multithreaded
+   * usage.
+   */
+  void (*ANeuralNetworksEvent_free)(ANeuralNetworksEvent* event);
 
-/**
- * Schedule evaluation of the execution.
- *
- * <p>Schedules evaluation of the execution. Once the model has been
- * applied and the outputs are ready to be consumed, the execution will be
- * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that signal.
- * </p>
- *
- * Multiple executions can be scheduled and evaluated concurrently, and
- * compilations can be performed concurrently with executions. The runtime makes
- * no guarantee on the ordering of the completion of compilations and
- * executions. If it's important to the application, the application should
- * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and
- * {@link ANeuralNetworksExecution_wait}.
- *
- * ANeuralNetworksExecution_wait must be called to recuperate the resources used
- * by the execution.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @param execution The execution to be scheduled and executed.
- *
- * @return ANEURALNETWORKS_NO_ERROR if successful.
- */
-inline int ANeuralNetworksExecution_startCompute(
-    ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) {
-  LOAD_FUNCTION(ANeuralNetworksExecution_startCompute);
-  EXECUTE_FUNCTION_RETURN(execution, event);
-}
+  // ASharedMemory_create was added in Android 8.0, so safe to use with NNAPI
+  // which was added in 8.1.
+  int (*ASharedMemory_create)(const char* name, size_t size);
 
-/**
- * Waits until the execution completes.
- *
- * More than one thread can wait on an event. When the execution completes,
- * all threads will be released.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
- *
- * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally.
- */
-inline int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) {
-  LOAD_FUNCTION(ANeuralNetworksEvent_wait);
-  EXECUTE_FUNCTION_RETURN(event);
-}
+  /**/
+};
 
 /**
- * Destroys the event.
- *
- * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
+ * Load the NNAPI implementation from the shared libraries.
+ * The NnApi structure is filled with all the pointers. If one function doesn't
+ * exist, a null pointer is stored.
  */
-inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) {
-  LOAD_FUNCTION(ANeuralNetworksEvent_free);
-  EXECUTE_FUNCTION(event);
-}
-
-/**/
+const NnApi* NnApiImplementation();
 
 #endif  // TENSORFLOW_LITE_NNAPI_NEURALNETWORKSSHIM_H_
diff --git a/tensorflow/lite/nnapi/nnapi_lib_test.cc b/tensorflow/lite/nnapi/nnapi_lib_test.cc
new file mode 100644
index 0000000000..46b90115b6
--- /dev/null
+++ b/tensorflow/lite/nnapi/nnapi_lib_test.cc
@@ -0,0 +1,90 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <gtest/gtest.h>
+#include "tensorflow/lite/nnapi/NeuralNetworksShim.h"
+
+namespace {
+
+TEST(NnapiLibTest, NnApiImplementation) {
+  const NnApi* nnapi_ = NnApiImplementation();
+  EXPECT_NE(nnapi_, nullptr);
+#ifdef __ANDROID__
+  EXPECT_TRUE(nnapi_->nnapi_exists);
+  EXPECT_GT(nnapi_->android_sdk_version, 0);
+  EXPECT_NE(nnapi_->ANeuralNetworksMemory_createFromFd, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksMemory_free, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_create, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_free, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_finish, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_addOperand, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_setOperandValue, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_setOperandValueFromMemory, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_addOperation, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs, nullptr);
+  if (nnapi_->android_sdk_version >= 28) {
+    // relaxComputationFloat32toFloat16 only available with Android 9.0 (P).
+    EXPECT_NE(nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16,
+              nullptr);
+  } else {
+    EXPECT_EQ(nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16,
+              nullptr);
+  }
+  EXPECT_NE(nnapi_->ANeuralNetworksCompilation_create, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksCompilation_free, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksCompilation_setPreference, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksCompilation_finish, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksExecution_create, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksExecution_free, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksExecution_setInput, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksExecution_setInputFromMemory, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksExecution_setOutput, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksExecution_setOutputFromMemory, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksExecution_startCompute, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksEvent_wait, nullptr);
+  EXPECT_NE(nnapi_->ANeuralNetworksEvent_free, nullptr);
+  EXPECT_NE(nnapi_->ASharedMemory_create, nullptr);
+#else
+  EXPECT_FALSE(nnapi_->nnapi_exists);
+  EXPECT_EQ(nnapi_->android_sdk_version, 0);
+  EXPECT_EQ(nnapi_->ANeuralNetworksMemory_createFromFd, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksMemory_free, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_create, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_free, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_finish, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_addOperand, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_setOperandValue, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_setOperandValueFromMemory, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_addOperation, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16,
+            nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksCompilation_create, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksCompilation_free, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksCompilation_setPreference, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksCompilation_finish, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksExecution_create, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksExecution_free, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksExecution_setInput, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksExecution_setInputFromMemory, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksExecution_setOutput, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksExecution_setOutputFromMemory, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksExecution_startCompute, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksEvent_wait, nullptr);
+  EXPECT_EQ(nnapi_->ANeuralNetworksEvent_free, nullptr);
+  EXPECT_EQ(nnapi_->ASharedMemory_create, nullptr);
+#endif
+}
+
+}  // namespace
diff --git a/tensorflow/lite/nnapi_delegate.cc b/tensorflow/lite/nnapi_delegate.cc
index 26d75696a1..202f38ba9c 100644
--- a/tensorflow/lite/nnapi_delegate.cc
+++ b/tensorflow/lite/nnapi_delegate.cc
@@ -84,56 +84,27 @@ void logError(const char* format, ...) {
 static const int64_t kOperandIdNotSet = -1;
 static const int64_t kOperandNotNeeded = -2;
 
-namespace {
-
-int32_t GetAndroidSdkVersion() {
-#ifdef __ANDROID__
-  const char* sdkProp = "ro.build.version.sdk";
-  char sdkVersion[PROP_VALUE_MAX];
-  int length = __system_property_get(sdkProp, sdkVersion);
-  if (length != 0) {
-    for (int i = 0; i < length; ++i) {
-      int digit = sdkVersion[i] - '0';
-      if (digit < 0 || digit > 9) {
-        // Non-numeric SDK version, assume it's higher then expected;
-        return 0xFFFF;
-      }
-    }
-    return atoi(sdkVersion);
-  }
-  FATAL("No %s prop", sdkProp);
-#endif  // __ANDROID__
-  return 0;
-}
-
-int32_t GetAndroidSdkVersionCached() {
-  static int32_t androidSdkVersion = GetAndroidSdkVersion();
-  return androidSdkVersion;
-}
-
-}  // namespace
-
 NNAPIAllocation::NNAPIAllocation(const char* filename,
                                  ErrorReporter* error_reporter)
     : MMAPAllocation(filename, error_reporter) {
   if (mmapped_buffer_ != MAP_FAILED)
-    CHECK_NN(ANeuralNetworksMemory_createFromFd(buffer_size_bytes_, PROT_READ,
-                                                mmap_fd_, 0, &handle_));
+    CHECK_NN(NnApiImplementation()->ANeuralNetworksMemory_createFromFd(
+        buffer_size_bytes_, PROT_READ, mmap_fd_, 0, &handle_));
 }
 
 NNAPIAllocation::~NNAPIAllocation() {
   if (handle_) {
-    ANeuralNetworksMemory_free(handle_);
+    NnApiImplementation()->ANeuralNetworksMemory_free(handle_);
   }
 }
 
 NNAPIDelegate::~NNAPIDelegate() {
   if (nn_compiled_model_) {
-    ANeuralNetworksCompilation_free(nn_compiled_model_);
+    NnApiImplementation()->ANeuralNetworksCompilation_free(nn_compiled_model_);
     nn_compiled_model_ = nullptr;
   }
   if (nn_model_) {
-    ANeuralNetworksModel_free(nn_model_);
+    NnApiImplementation()->ANeuralNetworksModel_free(nn_model_);
     nn_model_ = nullptr;
     // TODO(aselle): Is this thread-safe and callable multiple times?
   }
@@ -145,6 +116,7 @@ TfLiteStatus addTensorOperands(tflite::Subgraph* subgraph,
                                ANeuralNetworksModel* nn_model,
                                uint32_t* no_of_operands_added,
                                std::vector<int64_t>* nnapi_ids) {
+  const NnApi* nnapi = NnApiImplementation();
   uint32_t next_id = 0;
   for (size_t i = 0; i < subgraph->tensors_size(); i++) {
     // Skip temporaries and RNN back-edges.
@@ -198,24 +170,24 @@ TfLiteStatus addTensorOperands(tflite::Subgraph* subgraph,
         nn_type, static_cast<uint32_t>(tensor->dims->size),
         reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
     RETURN_ERROR_IF_NN_FAILED(
-        ANeuralNetworksModel_addOperand(nn_model, &operand_type));
+        nnapi->ANeuralNetworksModel_addOperand(nn_model, &operand_type));
     // TODO(aselle): Based on Michael's suggestion, limiting this to read
     // only memory
     if (tensor->allocation_type == kTfLiteMmapRo) {
       if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
               static_cast<const Allocation*>(tensor->allocation))) {
         RETURN_ERROR_IF_NN_FAILED(
-            ANeuralNetworksModel_setOperandValueFromMemory(
+            nnapi->ANeuralNetworksModel_setOperandValueFromMemory(
                 nn_model, next_id, alloc->memory(),
                 alloc->offset(tensor->data.raw), tensor->bytes));
       } else {
-        RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
+        RETURN_ERROR_IF_NN_FAILED(nnapi->ANeuralNetworksModel_setOperandValue(
             nn_model, next_id, tensor->data.raw, tensor->bytes));
       }
     } else if (tensor->bytes == 0) {
       // These size 0 tensors are optional tensors reserved.
-      RETURN_ERROR_IF_NN_FAILED(
-          ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0));
+      RETURN_ERROR_IF_NN_FAILED(nnapi->ANeuralNetworksModel_setOperandValue(
+          nn_model, next_id, nullptr, 0));
     }
 
     ++next_id;
@@ -244,6 +216,7 @@ TfLiteStatus AddOpsAndParams(
     uint32_t next_id, std::vector<int>* model_state_inputs,
     std::vector<int>* model_state_outputs,
     const std::vector<int64_t>& tensor_id_to_nnapi_id) {
+  const NnApi* nnapi = NnApiImplementation();
   for (size_t i = 0; i < subgraph->nodes_size(); i++) {
     const auto* node_and_registration = subgraph->node_and_registration(i);
     const TfLiteNode& node = node_and_registration->first;
@@ -258,21 +231,21 @@ TfLiteStatus AddOpsAndParams(
     MapAndAddTensorIds(node.outputs->data, node.outputs->size,
                        &augmented_outputs, tensor_id_to_nnapi_id);
 
-    auto add_scalar_int32 = [&nn_model, &augmented_inputs,
+    auto add_scalar_int32 = [nnapi, &nn_model, &augmented_inputs,
                              &next_id](int value) {
       ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32};
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-      CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
-                                                    sizeof(int32_t)))
+      CHECK_NN(nnapi->ANeuralNetworksModel_addOperand(nn_model, &operand_type))
+      CHECK_NN(nnapi->ANeuralNetworksModel_setOperandValue(
+          nn_model, next_id, &value, sizeof(int32_t)))
       augmented_inputs.push_back(next_id++);
     };
 
-    auto add_scalar_float32 = [&nn_model, &augmented_inputs,
+    auto add_scalar_float32 = [nnapi, &nn_model, &augmented_inputs,
                                &next_id](float value) {
       ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_FLOAT32};
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-      CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
-                                                    sizeof(float)))
+      CHECK_NN(nnapi->ANeuralNetworksModel_addOperand(nn_model, &operand_type))
+      CHECK_NN(nnapi->ANeuralNetworksModel_setOperandValue(
+          nn_model, next_id, &value, sizeof(float)))
       augmented_inputs.push_back(next_id++);
     };
 
@@ -281,8 +254,8 @@ TfLiteStatus AddOpsAndParams(
           .type = ANEURALNETWORKS_TENSOR_INT32,
           .dimensionCount = 1,
           .dimensions = &num_values};
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-      CHECK_NN(ANeuralNetworksModel_setOperandValue(
+      CHECK_NN(nnapi->ANeuralNetworksModel_addOperand(nn_model, &operand_type))
+      CHECK_NN(nnapi->ANeuralNetworksModel_setOperandValue(
           nn_model, next_id, values, sizeof(int32_t) * num_values));
       augmented_inputs.push_back(next_id++);
     };
@@ -291,15 +264,16 @@ TfLiteStatus AddOpsAndParams(
     // For each state_out tensor, a corresponding state_in operand needs to be
     // created for NNAPI.
     auto duplicate_state_tensor_float32 =
-        [subgraph, &nn_model, &next_id, &augmented_inputs, &model_state_inputs,
-         &model_state_outputs](int tensor_id) {
+        [nnapi, subgraph, &nn_model, &next_id, &augmented_inputs,
+         &model_state_inputs, &model_state_outputs](int tensor_id) {
           const TfLiteTensor* tensor = subgraph->tensor(tensor_id);
           ANeuralNetworksOperandType operand_type{
               ANEURALNETWORKS_TENSOR_FLOAT32,
               static_cast<uint32_t>(tensor->dims->size),
               reinterpret_cast<uint32_t*>(tensor->dims->data),
               tensor->params.scale, tensor->params.zero_point};
-          CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
+          CHECK_NN(
+              nnapi->ANeuralNetworksModel_addOperand(nn_model, &operand_type));
           augmented_inputs.push_back(next_id);
           model_state_inputs->push_back(next_id);
           model_state_outputs->push_back(tensor_id);
@@ -388,7 +362,7 @@ TfLiteStatus AddOpsAndParams(
     };
 
     // LSTM in NNAPI requires scratch tensor as an output operand.
-    auto add_lstm_scratch_tensor_float32 = [subgraph, &node, &nn_model,
+    auto add_lstm_scratch_tensor_float32 = [nnapi, subgraph, &node, &nn_model,
                                             &next_id, &augmented_outputs]() {
       if (node.temporaries->size == 0) return;
       int scratch_buffer_index = node.temporaries->data[0];
@@ -398,7 +372,7 @@ TfLiteStatus AddOpsAndParams(
           static_cast<uint32_t>(tensor->dims->size),
           reinterpret_cast<uint32_t*>(tensor->dims->data), tensor->params.scale,
           tensor->params.zero_point};
-      CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
+      CHECK_NN(nnapi->ANeuralNetworksModel_addOperand(nn_model, &operand_type));
       augmented_outputs.insert(augmented_outputs.begin(), next_id++);
     };
 
@@ -427,15 +401,16 @@ TfLiteStatus AddOpsAndParams(
     };
 
     // Handle optional input tensors.
-    auto add_optional_tensors = [&nn_model, &augmented_inputs,
+    auto add_optional_tensors = [nnapi, &nn_model, &augmented_inputs,
                                  &next_id](int nn_type) {
       for (size_t idx = 0; idx < augmented_inputs.size(); idx++) {
         if (augmented_inputs[idx] == kOptionalTensor) {
           const std::vector<uint32_t> dim = {0, 0};
           ANeuralNetworksOperandType operand_type{nn_type, 2, dim.data(), 0, 0};
-          CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
-          CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id,
-                                                        nullptr, 0))
+          CHECK_NN(
+              nnapi->ANeuralNetworksModel_addOperand(nn_model, &operand_type))
+          CHECK_NN(nnapi->ANeuralNetworksModel_setOperandValue(
+              nn_model, next_id, nullptr, 0))
           augmented_inputs[idx] = next_id++;
         }
       }
@@ -695,13 +670,13 @@ TfLiteStatus AddOpsAndParams(
         break;
     }
 
-    if (nnapi_version == 11 && GetAndroidSdkVersionCached() < 28) {
+    if (nnapi_version == 11 && nnapi->android_sdk_version < 28) {
       logError("Op %d needs NNAPI1.1", builtin);
       return kTfLiteError;
     }
 
     // Add the operation.
-    RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation(
+    RETURN_ERROR_IF_NN_FAILED(nnapi->ANeuralNetworksModel_addOperation(
         nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()),
         augmented_inputs.data(),
         static_cast<uint32_t>(augmented_outputs.size()),
@@ -713,9 +688,10 @@ TfLiteStatus AddOpsAndParams(
 TfLiteStatus NNAPIDelegate::BuildGraph(Subgraph* subgraph) {
   if (nn_model_ && nn_compiled_model_) return model_status_;
 
+  const NnApi* nnapi = NnApiImplementation();
   // TODO(aselle): This is not correct. need to handle resize invalidation.
   if (!nn_model_) {
-    CHECK_NN(ANeuralNetworksModel_create(&nn_model_));
+    CHECK_NN(nnapi->ANeuralNetworksModel_create(&nn_model_));
 
     // Find which tensors should be added to NNAPI. TFLite has temporaries
     // and RNN back-edges which are are not valid for NNAPI. We look through all
@@ -762,21 +738,22 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Subgraph* subgraph) {
                        model_states_outputs_.size(), &augmented_outputs,
                        tensor_id_to_nnapi_id);
 
-    CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs(
+    CHECK_NN(nnapi->ANeuralNetworksModel_identifyInputsAndOutputs(
         nn_model_, static_cast<uint32_t>(augmented_inputs.size()),
         reinterpret_cast<const uint32_t*>(augmented_inputs.data()),
         static_cast<uint32_t>(augmented_outputs.size()),
         reinterpret_cast<const uint32_t*>(augmented_outputs.data())));
 
-    if (GetAndroidSdkVersionCached() >= 28) {
-      CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+    if (nnapi->android_sdk_version >= 28) {
+      CHECK_NN(nnapi->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
           nn_model_, subgraph->GetAllowFp16PrecisionForFp32()));
     }
-    CHECK_NN(ANeuralNetworksModel_finish(nn_model_));
+    CHECK_NN(nnapi->ANeuralNetworksModel_finish(nn_model_));
   }
   if (!nn_compiled_model_) {
-    CHECK_NN(ANeuralNetworksCompilation_create(nn_model_, &nn_compiled_model_));
-    CHECK_NN(ANeuralNetworksCompilation_finish(nn_compiled_model_));
+    CHECK_NN(nnapi->ANeuralNetworksCompilation_create(nn_model_,
+                                                      &nn_compiled_model_));
+    CHECK_NN(nnapi->ANeuralNetworksCompilation_finish(nn_compiled_model_));
   }
   return kTfLiteOk;
 }
@@ -792,8 +769,10 @@ TfLiteStatus NNAPIDelegate::Invoke(Subgraph* subgraph) {
     return model_status_;
   }
 
+  const NnApi* nnapi = NnApiImplementation();
   ANeuralNetworksExecution* execution = nullptr;
-  CHECK_NN(ANeuralNetworksExecution_create(nn_compiled_model_, &execution));
+  CHECK_NN(
+      nnapi->ANeuralNetworksExecution_create(nn_compiled_model_, &execution));
 
   // Currently perform deep copy of input buffer
   for (size_t i = 0; i < subgraph->inputs().size(); i++) {
@@ -801,7 +780,7 @@ TfLiteStatus NNAPIDelegate::Invoke(Subgraph* subgraph) {
     // TODO(aselle): Is this what we want or do we want input instead?
     // TODO(aselle): This should be called setInputValue maybe to be cons.
     TfLiteTensor* tensor = subgraph->tensor(input);
-    CHECK_NN(ANeuralNetworksExecution_setInput(
+    CHECK_NN(nnapi->ANeuralNetworksExecution_setInput(
         execution, i, nullptr, tensor->data.raw, tensor->bytes));
   }
 
@@ -809,7 +788,7 @@ TfLiteStatus NNAPIDelegate::Invoke(Subgraph* subgraph) {
   for (size_t i = 0; i < subgraph->outputs().size(); i++) {
     int output = subgraph->outputs()[i];
     TfLiteTensor* tensor = subgraph->tensor(output);
-    CHECK_NN(ANeuralNetworksExecution_setOutput(
+    CHECK_NN(nnapi->ANeuralNetworksExecution_setOutput(
         execution, i, nullptr, tensor->data.raw, tensor->bytes));
   }
 
@@ -821,21 +800,21 @@ TfLiteStatus NNAPIDelegate::Invoke(Subgraph* subgraph) {
     // Here we are using a deep copy for state_in tensors so that we are not
     // reading and writing into the same buffer during a invocation.
     // TODO(miaowang): using double shared buffer to minimize the copies.
-    CHECK_NN(ANeuralNetworksExecution_setInput(
+    CHECK_NN(nnapi->ANeuralNetworksExecution_setInput(
         execution, i + subgraph->inputs().size(), nullptr, tensor->data.raw,
         tensor->bytes));
     // Tell NNAPI where to output the state_out.
-    CHECK_NN(ANeuralNetworksExecution_setOutput(
+    CHECK_NN(nnapi->ANeuralNetworksExecution_setOutput(
         execution, i + subgraph->outputs().size(), nullptr, tensor->data.raw,
         tensor->bytes));
   }
 
   // Currently use blocking compute.
   ANeuralNetworksEvent* event = nullptr;
-  CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event));
-  CHECK_NN(ANeuralNetworksEvent_wait(event));
-  ANeuralNetworksEvent_free(event);
-  ANeuralNetworksExecution_free(execution);
+  CHECK_NN(nnapi->ANeuralNetworksExecution_startCompute(execution, &event));
+  CHECK_NN(nnapi->ANeuralNetworksEvent_wait(event));
+  nnapi->ANeuralNetworksEvent_free(event);
+  nnapi->ANeuralNetworksExecution_free(execution);
 
 #if 0
   printf("From the NN API:\n");
@@ -853,6 +832,8 @@ TfLiteStatus NNAPIDelegate::Invoke(Subgraph* subgraph) {
   return kTfLiteOk;
 }
 
-bool NNAPIDelegate::IsSupported() { return NNAPIExists(); }
+bool NNAPIDelegate::IsSupported() {
+  return NnApiImplementation()->nnapi_exists;
+}
 
 }  // namespace tflite
-- 
GitLab


From 5b864796352c49a15fa2a443bc54b4d6bfdd6349 Mon Sep 17 00:00:00 2001
From: Andrew Selle <aselle@google.com>
Date: Wed, 19 Dec 2018 15:11:29 -0800
Subject: [PATCH 861/873] Automated rollback of commit
 7b9865971ed38fb3c46aa15c64c6660c50af2d83. Revert #23929.

PiperOrigin-RevId: 226240832
---
 tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
index 77889effc8..b09ee99768 100644
--- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py
@@ -91,7 +91,7 @@ vocabulary_size = 50000
 
 def build_dataset(words, n_words):
   """Process raw inputs into a dataset."""
-  count = [('UNK', -1)]
+  count = [['UNK', -1]]
   count.extend(collections.Counter(words).most_common(n_words - 1))
   dictionary = dict()
   for word, _ in count:
-- 
GitLab


From 94e331ac5b8ae01efde2d9948ec956e337d9eaf4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 15:21:24 -0800
Subject: [PATCH 862/873] In case StatusGroup has multiple status messages
 which all have a common prefix, we return the longest one. This is especially
 helpful when the status messages are incrementally build by appending to the
 previous ones during runtime.

PiperOrigin-RevId: 226242326
---
 tensorflow/core/lib/core/status.cc      | 19 +++++++++++++------
 tensorflow/core/lib/core/status_test.cc | 25 +++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/lib/core/status.cc b/tensorflow/core/lib/core/status.cc
index 7be5b9b513..3076c09337 100644
--- a/tensorflow/core/lib/core/status.cc
+++ b/tensorflow/core/lib/core/status.cc
@@ -156,19 +156,26 @@ Status StatusGroup::as_status() const {
     return Status::OK();
   }
 
-  // If there is only one message, or all of the messages are identical, return
-  // the original status.  This reduces verbosity and preserves existing
-  // behavior when possible.
+  // Reduce verbosity when handling duplicate messages. If there is only a
+  // single message, or all messages have similar content, then return the
+  // longest status message.
+  std::vector<Status> sorted_children(children_);
+  std::sort(sorted_children.begin(), sorted_children.end(),
+            [](const Status& a, const Status& b) {
+              return a.error_message().length() > b.error_message().length();
+            });
   bool single_status = true;
-  for (const Status& s : children_) {
-    if (s != children_[0]) {
+  for (const auto& s : sorted_children) {
+    if (s.code() != sorted_children[0].code() ||
+        sorted_children[0].error_message().find(s.error_message()) ==
+            string::npos) {
       single_status = false;
       break;
     }
   }
 
   if (single_status) {
-    return children_[0];
+    return sorted_children[0];
   }
 
   std::vector<string> fmt;
diff --git a/tensorflow/core/lib/core/status_test.cc b/tensorflow/core/lib/core/status_test.cc
index d3296b4fac..7c28184080 100644
--- a/tensorflow/core/lib/core/status_test.cc
+++ b/tensorflow/core/lib/core/status_test.cc
@@ -141,6 +141,31 @@ TEST(StatusGroup, ContainsChildMessages) {
   LOG(INFO) << d.as_status();
 }
 
+TEST(StatusGroup, ContainsIdenticalMessage) {
+  StatusGroup sg;
+  const Status internal(errors::Internal("Original error"));
+  for (size_t i = 0; i < 10; i++) {
+    sg.Update(internal);
+  }
+  EXPECT_EQ(sg.as_status(), internal);
+}
+
+TEST(StatusGroup, ContainsCommonPrefix) {
+  StatusGroup sg;
+  const Status a(errors::Internal("Original error"));
+  const Status b(errors::Internal("Original error is"));
+  const Status c(errors::Internal("Original error is invalid"));
+  sg.Update(a);
+  sg.Update(c);
+  sg.Update(c);
+  sg.Update(b);
+  sg.Update(c);
+  sg.Update(b);
+  sg.Update(a);
+  sg.Update(b);
+  EXPECT_EQ(sg.as_status(), c);
+}
+
 static void BM_TF_CHECK_OK(int iters) {
   tensorflow::Status s =
       (iters < 0) ? errors::InvalidArgument("Invalid") : Status::OK();
-- 
GitLab


From 62070156d9a54c7a810b6bc8383ab190a98c7c64 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Wed, 19 Dec 2018 15:36:22 -0800
Subject: [PATCH 863/873] Place inlined inputs before placing function body

PiperOrigin-RevId: 226244885
---
 .../grappler/optimizers/function_optimizer.cc | 21 +++++++++++++++++--
 .../optimizers/function_optimizer_test.cc     |  5 +++--
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc
index 4ec68c7543..73c950b3fc 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc
@@ -1374,7 +1374,24 @@ Status InlineIndirectFunctionCall(const NodeDef& func_node,
   const string prefix = strings::StrCat(func_node.name(), "/");
 
   // ------------------------------------------------------------------------ //
-  // First we need to assign device placements to all function body nodes.
+  // Before placing the function body nodes we pin input placeholders to the
+  // same device as their corresponding input nodes.
+
+  for (NodeDef& func_body_node : *item.graph.mutable_node()) {
+    if (item.IsInputPlaceholder(func_body_node.name())) {
+      const int input_idx = input_placeholders_idx[func_body_node.name()];
+      const GraphView::OutputPort output_port =
+          ctx->graph_view().GetRegularFanin({&func_node, input_idx});
+
+      VLOG(3) << "Pin inlined function input node '" << func_body_node.name()
+              << "' to the '" << output_port.node->device() << "' device.";
+      func_body_node.set_device(output_port.node->device());
+    }
+  }
+
+  // ------------------------------------------------------------------------ //
+  // After placing nodes corresponding to the function inputs, we need to assign
+  // device placements to all other function body nodes.
 
   GraphDef placed_graph_def;
 
@@ -1432,7 +1449,7 @@ Status InlineIndirectFunctionCall(const NodeDef& func_node,
       (*func_body_node.mutable_attr())["T"] = func_body_node.attr().at("dtype");
       func_body_node.mutable_attr()->erase("dtype");
       func_body_node.mutable_attr()->erase("shape");
-      int input_idx = input_placeholders_idx[func_body_node.name()];
+      const int input_idx = input_placeholders_idx[func_body_node.name()];
       func_body_node.add_input(strings::StrCat(inputs[input_idx].ToString()));
 
       // All side effects must happen before inputs can start executing.
diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
index c971eec3f4..79da7dfa2d 100644
--- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
+++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc
@@ -924,8 +924,9 @@ TEST_F(FunctionOptimizerTest, InlineIndirectFunctionWithDevicePlacement) {
       {NDef("a", "Placeholder", {}, {{"dtype", DT_FLOAT}}, cpu0),
        NDef("b", "Placeholder", {}, {{"dtype", DT_FLOAT}}, cpu1),
 
-       // Function must be inlined and `mul` node placed on a requested device.
-       NDef("c/x", "Identity", {"a:0"}, {{"T", DT_FLOAT}}, cpu1),
+       // Function must be inlined and `mul` node placed on a requested device,
+       // and input `Identity` nodes must be colocated with their source nodes.
+       NDef("c/x", "Identity", {"a:0"}, {{"T", DT_FLOAT}}, cpu0),
        NDef("c/y", "Identity", {"b:0"}, {{"T", DT_FLOAT}}, cpu1),
        NDef("c/mul", "Mul", {"c/x", "c/y"}, {{"T", DT_FLOAT}}, cpu1),
 
-- 
GitLab


From 286e5e5d59f9b1dd1565f464348e044b704d3ac9 Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Wed, 19 Dec 2018 15:39:44 -0800
Subject: [PATCH 864/873] [XLA:GPU] Don't segfault if infeeding data of the
 wrong shape.

Instead, return a helpful error.

PiperOrigin-RevId: 226245605
---
 .../compiler/xla/service/gpu/infeed_thunk.cc      | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
index 8c3a026740..8a96b5fabc 100644
--- a/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/infeed_thunk.cc
@@ -36,6 +36,21 @@ Status InfeedThunk::ExecuteOnStream(const BufferAllocations& buffer_allocations,
   ShapeTree<InfeedBuffer> infeed_buffers =
       GetOrCreateInfeedManager()->BlockingGetNextDestination();
 
+  // infeed_slices_'s shape should be a tuple of shape (buffers, token).
+  const auto& infeed_shape = infeed_slices_.shape();
+  TF_RET_CHECK(ShapeUtil::IsTuple(infeed_shape))
+      << ShapeUtil::HumanStringWithLayout(infeed_shape);
+  TF_RET_CHECK(infeed_shape.tuple_shapes().size() == 2)
+      << ShapeUtil::HumanStringWithLayout(infeed_shape);
+  TF_RET_CHECK(ShapeUtil::IsToken(infeed_shape.tuple_shapes(1)))
+      << ShapeUtil::HumanStringWithLayout(infeed_shape);
+  TF_RET_CHECK(
+      ShapeUtil::Equal(infeed_buffers.shape(), infeed_shape.tuple_shapes(0)))
+      << "Expected infeed of shape "
+      << ShapeUtil::HumanStringWithLayout(infeed_shape.tuple_shapes(0))
+      << " but was "
+      << ShapeUtil::HumanStringWithLayout(infeed_buffers.shape());
+
   {
     // The infeed buffer has an extra outer tuple with a token. Adjust the index
     // accordingly.
-- 
GitLab


From 632a5d9d405ff41c7701055792446d72d42eea1a Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Wed, 19 Dec 2018 15:53:36 -0800
Subject: [PATCH 865/873] [XLA] Make TriangularSolve ignore the elements not in
 the triangle of its triangular input.

PiperOrigin-RevId: 226247755
---
 tensorflow/compiler/xla/client/lib/BUILD      |  1 +
 tensorflow/compiler/xla/client/lib/matrix.cc  | 21 ++---
 tensorflow/compiler/xla/client/lib/matrix.h   |  4 +
 .../xla/client/lib/triangular_solve.cc        |  5 +
 .../xla/client/lib/triangular_solve_test.cc   | 92 +++++++++++++++++--
 5 files changed, 104 insertions(+), 19 deletions(-)

diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD
index 826b13fe37..6192b89b4a 100644
--- a/tensorflow/compiler/xla/client/lib/BUILD
+++ b/tensorflow/compiler/xla/client/lib/BUILD
@@ -412,6 +412,7 @@ xla_test(
     srcs = ["triangular_solve_test.cc"],
     tags = ["noasan"],  # sometimes times out, http://b/78650012
     deps = [
+        ":math",
         ":matrix",
         ":triangular_solve",
         "//tensorflow/compiler/xla:array2d",
diff --git a/tensorflow/compiler/xla/client/lib/matrix.cc b/tensorflow/compiler/xla/client/lib/matrix.cc
index ffd744d190..16c177b4e2 100644
--- a/tensorflow/compiler/xla/client/lib/matrix.cc
+++ b/tensorflow/compiler/xla/client/lib/matrix.cc
@@ -64,7 +64,7 @@ XlaOp GetMatrixDiagonal(XlaOp x) {
   });
 }
 
-XlaOp Triangle(XlaOp x, bool lower) {
+XlaOp TriangleMask(XlaOp x, int diagonal) {
   XlaBuilder* builder = x.builder();
   return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(x));
@@ -74,20 +74,19 @@ XlaOp Triangle(XlaOp x, bool lower) {
     const int64 n = shape.dimensions(n_dims - 1);
     absl::Span<const int64> major_dims =
         AsInt64Slice(shape.dimensions()).subspan(/*pos=*/0, /*len=*/n_dims - 2);
-    auto a = Iota(builder, U32, n);
-    auto b = Iota(builder, U32, m);
+    auto a = Iota(builder, S32, n);
+    auto b = Iota(builder, S32, m) + ConstantR0<int32>(builder, diagonal);
     XlaOp indicator;
-    if (lower) {
-      indicator = Ge(b, Broadcast(a, {m}), /*broadcast_dimensions=*/{0});
-    } else {
-      indicator = Le(b, Broadcast(a, {m}), /*broadcast_dimensions=*/{0});
-    }
-    auto mask = Broadcast(indicator, major_dims);
-
-    return Select(mask, x, Zeros(builder, shape));
+    indicator = Ge(b, Broadcast(a, {m}), /*broadcast_dimensions=*/{0});
+    return Broadcast(indicator, major_dims);
   });
 }
 
+XlaOp Triangle(XlaOp x, bool lower) {
+  return lower ? Select(TriangleMask(x, 0), x, ZerosLike(x))
+               : Select(TriangleMask(x, -1), ZerosLike(x), x);
+}
+
 XlaOp UpperTriangle(XlaOp x) { return Triangle(x, false); }
 
 XlaOp LowerTriangle(XlaOp x) { return Triangle(x, true); }
diff --git a/tensorflow/compiler/xla/client/lib/matrix.h b/tensorflow/compiler/xla/client/lib/matrix.h
index 8856f99c7a..916cd83748 100644
--- a/tensorflow/compiler/xla/client/lib/matrix.h
+++ b/tensorflow/compiler/xla/client/lib/matrix.h
@@ -31,6 +31,10 @@ XlaOp IdentityMatrix(XlaBuilder* builder, PrimitiveType type, int64 m, int64 n);
 // diagonal elements (i.e., with indices [..., i, i]).
 XlaOp GetMatrixDiagonal(XlaOp x);
 
+// Returns a lower-triangular mask, i.e., true below the `diagonal`-th diagonal
+// and false above that diagonal.
+XlaOp TriangleMask(XlaOp x, int diagonal);
+
 // Get the upper or lower triangle part of the last two dimensions
 XlaOp Triangle(XlaOp x, bool lower);
 
diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve.cc b/tensorflow/compiler/xla/client/lib/triangular_solve.cc
index 159e0c82dc..4bc2f3d121 100644
--- a/tensorflow/compiler/xla/client/lib/triangular_solve.cc
+++ b/tensorflow/compiler/xla/client/lib/triangular_solve.cc
@@ -417,6 +417,11 @@ XlaOp TriangularSolve(XlaOp a, XlaOp b, bool left_side, bool lower,
     auto inv_diag_blocks = InvertDiagonalBlocks(diag_blocks, lower, transpose_a,
                                                 conjugate_a, precision);
 
+    // Mask off the ignored elements of the triangular matrix a.
+    // TODO(phawkins): it would probably be preferable to perform this masking
+    // block by block inside SolveWithInvertedDiagonalBlocks.
+    a = Triangle(a, lower);
+
     // We now find the solution using GEMMs
     auto x =
         SolveWithInvertedDiagonalBlocks(a, b, inv_diag_blocks, left_side, lower,
diff --git a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc b/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
index 3fea627e6a..703227c949 100644
--- a/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
+++ b/tensorflow/compiler/xla/client/lib/triangular_solve_test.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/compiler/xla/array2d.h"
+#include "tensorflow/compiler/xla/client/lib/math.h"
 #include "tensorflow/compiler/xla/client/lib/matrix.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal.h"
@@ -37,12 +38,20 @@ namespace {
 using TriangularSolveTest = ClientLibraryTestBase;
 using TriangularSolveLeftLookingTest = ClientLibraryTestBase;
 
+static constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
+
 Array2D<float> AValsLower() {
-  return {{2, 0, 0, 0}, {3, 6, 0, 0}, {4, 7, 9, 0}, {5, 8, 10, 11}};
+  return {{2, kNan, kNan, kNan},
+          {3, 6, kNan, kNan},
+          {4, 7, 9, kNan},
+          {5, 8, 10, 11}};
 }
 
 Array2D<float> AValsUpper() {
-  return {{2, 3, 4, 5}, {0, 6, 7, 8}, {0, 0, 9, 10}, {0, 0, 0, 11}};
+  return {{2, 3, 4, 5},
+          {kNan, 6, 7, 8},
+          {kNan, kNan, 9, 10},
+          {kNan, kNan, kNan, 11}};
 }
 
 Array2D<float> BValsRight() {
@@ -53,18 +62,20 @@ Array2D<float> BValsLeft() {
   return {{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}};
 }
 
+static constexpr complex64 kNanC64 = complex64(kNan, kNan);
+
 Array2D<complex64> AValsLowerComplex() {
-  return {{2, 0, 0, 0},
-          {complex64(3, 1), 6, 0, 0},
-          {4, complex64(7, 2), 9, 0},
+  return {{2, kNanC64, kNanC64, kNanC64},
+          {complex64(3, 1), 6, kNanC64, kNanC64},
+          {4, complex64(7, 2), 9, kNanC64},
           {5, 8, complex64(10, 3), 11}};
 }
 
 Array2D<complex64> AValsUpperComplex() {
   return {{2, 3, complex64(4, 3), 5},
-          {0, 6, complex64(7, 2), 8},
-          {0, 0, complex64(9, 1), 10},
-          {0, 0, 0, 11}};
+          {kNanC64, 6, complex64(7, 2), 8},
+          {kNanC64, kNanC64, complex64(9, 1), 10},
+          {kNanC64, kNanC64, kNanC64, 11}};
 }
 
 Array2D<complex64> BValsRightComplex() {
@@ -367,5 +378,70 @@ XLA_TEST_F(TriangularSolveTest, BatchedLeftUpper) {
                              ErrorSpec(1e-2, 1e-2));
 }
 
+struct TriangularSolveTestSpec {
+  int m, n;  // A is mxm, B is mxn
+  bool left_side;
+  bool lower;
+  bool transpose_a;
+};
+
+class TriangularSolveParametricTest
+    : public ClientLibraryTestBase,
+      public ::testing::WithParamInterface<TriangularSolveTestSpec> {};
+
+XLA_TEST_P(TriangularSolveParametricTest, Random) {
+  TriangularSolveTestSpec spec = GetParam();
+
+  XlaBuilder builder(TestName());
+
+  Array2D<float> avals(spec.m, spec.m);
+  avals.FillRandom(1.0);
+  for (int i = 0; i < spec.m; ++i) {
+    avals(i, i) += 10;
+  }
+
+  std::pair<int, int> bdims = spec.left_side ? std::make_pair(spec.m, spec.n)
+                                             : std::make_pair(spec.n, spec.m);
+  Array2D<float> bvals(bdims.first, bdims.second);
+  bvals.FillRandom(1.0);
+
+  XlaOp a, b;
+  auto a_data = CreateR2Parameter<float>(avals, 0, "a", &builder, &a);
+  auto b_data = CreateR2Parameter<float>(bvals, 1, "b", &builder, &b);
+  auto x = TriangularSolve(a, b, spec.left_side, spec.lower, spec.transpose_a,
+                           /*conjugate_a=*/false,
+                           /*block_size=*/3);
+  auto a_tri = Triangle(a, spec.lower);
+  a_tri = MaybeTransposeInMinorDims(a_tri, spec.transpose_a);
+  if (spec.left_side) {
+    BatchDot(a_tri, x);
+  } else {
+    BatchDot(x, a_tri);
+  }
+
+  ComputeAndCompareR2<float>(&builder, bvals, {a_data.get(), b_data.get()},
+                             ErrorSpec(1e-2, 1e-2));
+}
+
+std::vector<TriangularSolveTestSpec> TriangularSolveTests() {
+  std::vector<TriangularSolveTestSpec> specs;
+  for (int m : {5, 10}) {
+    for (int n : {5, 10}) {
+      for (bool left_side : {false, true}) {
+        for (bool lower : {false, true}) {
+          for (bool transpose_a : {false, true}) {
+            specs.push_back({m, n, left_side, lower, transpose_a});
+          }
+        }
+      }
+    }
+  }
+  return specs;
+}
+
+INSTANTIATE_TEST_CASE_P(TriangularSolveParametricTestInstantiation,
+                        TriangularSolveParametricTest,
+                        ::testing::ValuesIn(TriangularSolveTests()));
+
 }  // namespace
 }  // namespace xla
-- 
GitLab


From 33096914a7e1afa84007451faa25ad66ab78f15d Mon Sep 17 00:00:00 2001
From: Yunlu Li <yunluli@google.com>
Date: Wed, 19 Dec 2018 16:00:11 -0800
Subject: [PATCH 866/873] Make tflite_driver able to run single op model with
 reference kernels.

PiperOrigin-RevId: 226248707
---
 tensorflow/lite/kernels/BUILD                 |  15 +
 tensorflow/lite/kernels/register_ref.cc       | 297 ++++++++++++++++++
 tensorflow/lite/kernels/register_ref.h        |  39 +++
 tensorflow/lite/testing/BUILD                 |   1 +
 tensorflow/lite/testing/tflite_driver.cc      |  14 +-
 tensorflow/lite/testing/tflite_driver.h       |   6 +-
 tensorflow/lite/testing/tflite_driver_test.cc |  34 ++
 7 files changed, 402 insertions(+), 4 deletions(-)
 create mode 100644 tensorflow/lite/kernels/register_ref.cc
 create mode 100644 tensorflow/lite/kernels/register_ref.h

diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD
index bad1c4aebf..5cc06c7a63 100644
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@@ -285,6 +285,21 @@ cc_library(
     ],
 )
 
+# The builtin_ops target will resolve to optimized kernels when available. This
+# target uses reference kernels only, and is useful for validation and testing.
+# It should *not* generally be used in production.
+cc_library(
+    name = "reference_ops",
+    srcs = ["register_ref.cc"],
+    hdrs = ["register_ref.h"],
+    deps = [
+        ":builtin_op_kernels",
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:util",
+        "//tensorflow/lite/c:c_api_internal",
+    ],
+)
+
 tf_cc_test(
     name = "audio_spectrogram_test",
     size = "small",
diff --git a/tensorflow/lite/kernels/register_ref.cc b/tensorflow/lite/kernels/register_ref.cc
new file mode 100644
index 0000000000..584e044b98
--- /dev/null
+++ b/tensorflow/lite/kernels/register_ref.cc
@@ -0,0 +1,297 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/register_ref.h"
+#include "tensorflow/lite/util.h"
+
+namespace tflite {
+namespace ops {
+
+namespace custom {
+
+TfLiteRegistration* Register_AUDIO_SPECTROGRAM();
+TfLiteRegistration* Register_LAYER_NORM_LSTM();
+TfLiteRegistration* Register_MFCC();
+TfLiteRegistration* Register_DETECTION_POSTPROCESS();
+TfLiteRegistration* Register_RELU_1();
+
+}  // namespace custom
+
+namespace builtin {
+
+// TODO(yunluli): Some of the registries, e.g. Tanh(), could only invoke
+// optimized kernels. Add a _REF() variant for them.
+TfLiteRegistration* Register_ABS();
+TfLiteRegistration* Register_RELU();
+TfLiteRegistration* Register_RELU_N1_TO_1();
+TfLiteRegistration* Register_RELU6();
+TfLiteRegistration* Register_TANH();
+TfLiteRegistration* Register_LOGISTIC();
+TfLiteRegistration* Register_AVERAGE_POOL_REF();
+TfLiteRegistration* Register_MAX_POOL_REF();
+TfLiteRegistration* Register_L2_POOL_REF();
+TfLiteRegistration* Register_CONVOLUTION_REF();
+TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF();
+TfLiteRegistration* Register_SVDF();
+TfLiteRegistration* Register_RNN();
+TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_RNN();
+TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_RNN();
+TfLiteRegistration* Register_EMBEDDING_LOOKUP();
+TfLiteRegistration* Register_EMBEDDING_LOOKUP_SPARSE();
+TfLiteRegistration* Register_FULLY_CONNECTED_REF();
+TfLiteRegistration* Register_LSH_PROJECTION();
+TfLiteRegistration* Register_HASHTABLE_LOOKUP();
+TfLiteRegistration* Register_SOFTMAX();
+TfLiteRegistration* Register_CONCATENATION_REF();
+TfLiteRegistration* Register_ADD_REF();
+TfLiteRegistration* Register_SPACE_TO_BATCH_ND_REF();
+TfLiteRegistration* Register_DIV_REF();
+TfLiteRegistration* Register_SUB_REF();
+TfLiteRegistration* Register_BATCH_TO_SPACE_ND_REF();
+TfLiteRegistration* Register_MUL_REF();
+TfLiteRegistration* Register_L2NORM_REF();
+TfLiteRegistration* Register_LOCAL_RESPONSE_NORM_REF();
+TfLiteRegistration* Register_LSTM();
+TfLiteRegistration* Register_BIDIRECTIONAL_SEQUENCE_LSTM();
+TfLiteRegistration* Register_UNIDIRECTIONAL_SEQUENCE_LSTM();
+TfLiteRegistration* Register_PAD_REF();
+TfLiteRegistration* Register_PADV2_REF();
+TfLiteRegistration* Register_RESHAPE();
+TfLiteRegistration* Register_RESIZE_BILINEAR_REF();
+TfLiteRegistration* Register_RESIZE_NEAREST_NEIGHBOR_REF();
+TfLiteRegistration* Register_SKIP_GRAM();
+TfLiteRegistration* Register_SPACE_TO_DEPTH_REF();
+TfLiteRegistration* Register_GATHER();
+TfLiteRegistration* Register_TRANSPOSE_REF();
+TfLiteRegistration* Register_MEAN_REF();
+TfLiteRegistration* Register_SPLIT();
+TfLiteRegistration* Register_SPLIT_V();
+TfLiteRegistration* Register_SQUEEZE();
+TfLiteRegistration* Register_STRIDED_SLICE_REF();
+TfLiteRegistration* Register_EXP();
+TfLiteRegistration* Register_TOPK_V2();
+TfLiteRegistration* Register_LOG();
+TfLiteRegistration* Register_LOG_SOFTMAX();
+TfLiteRegistration* Register_CAST();
+TfLiteRegistration* Register_DEQUANTIZE();
+TfLiteRegistration* Register_PRELU();
+TfLiteRegistration* Register_MAXIMUM();
+TfLiteRegistration* Register_MINIMUM();
+TfLiteRegistration* Register_ARG_MAX();
+TfLiteRegistration* Register_ARG_MIN();
+TfLiteRegistration* Register_GREATER();
+TfLiteRegistration* Register_GREATER_EQUAL();
+TfLiteRegistration* Register_LESS();
+TfLiteRegistration* Register_LESS_EQUAL();
+TfLiteRegistration* Register_FLOOR();
+TfLiteRegistration* Register_TILE();
+TfLiteRegistration* Register_NEG();
+TfLiteRegistration* Register_SUM();
+TfLiteRegistration* Register_REDUCE_PROD();
+TfLiteRegistration* Register_REDUCE_MAX();
+TfLiteRegistration* Register_REDUCE_MIN();
+TfLiteRegistration* Register_REDUCE_ANY();
+TfLiteRegistration* Register_SELECT();
+TfLiteRegistration* Register_SLICE();
+TfLiteRegistration* Register_SIN();
+TfLiteRegistration* Register_TRANSPOSECONV_REF();
+TfLiteRegistration* Register_EXPAND_DIMS();
+TfLiteRegistration* Register_SPARSE_TO_DENSE();
+TfLiteRegistration* Register_EQUAL();
+TfLiteRegistration* Register_NOT_EQUAL();
+TfLiteRegistration* Register_SQRT();
+TfLiteRegistration* Register_RSQRT();
+TfLiteRegistration* Register_SHAPE();
+TfLiteRegistration* Register_POW();
+TfLiteRegistration* Register_FAKE_QUANT();
+TfLiteRegistration* Register_PACK();
+TfLiteRegistration* Register_ONE_HOT();
+TfLiteRegistration* Register_LOGICAL_OR();
+TfLiteRegistration* Register_LOGICAL_AND();
+TfLiteRegistration* Register_LOGICAL_NOT();
+TfLiteRegistration* Register_UNPACK();
+TfLiteRegistration* Register_FLOOR_DIV();
+TfLiteRegistration* Register_SQUARE();
+TfLiteRegistration* Register_ZEROS_LIKE();
+TfLiteRegistration* Register_FLOOR_MOD();
+TfLiteRegistration* Register_RANGE();
+TfLiteRegistration* Register_LEAKY_RELU();
+TfLiteRegistration* Register_SQUARED_DIFFERENCE();
+TfLiteRegistration* Register_FILL();
+TfLiteRegistration* Register_MIRROR_PAD();
+
+namespace {
+
+TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) {
+  context->ReportError(
+      context,
+      "Regular TensorFlow ops are not supported by this interpreter. Make sure "
+      "you invoke the Flex delegate before inference.");
+  return kTfLiteError;
+}
+
+}  // namespace
+
+const TfLiteRegistration* BuiltinRefOpResolver::FindOp(
+    tflite::BuiltinOperator op, int version) const {
+  return MutableOpResolver::FindOp(op, version);
+}
+
+const TfLiteRegistration* BuiltinRefOpResolver::FindOp(const char* op,
+                                                       int version) const {
+  // Return the NULL Op for all ops whose name start with "Flex", allowing
+  // the interpreter to delegate their execution.
+  if (IsFlexOp(op)) {
+    static TfLiteRegistration null_op{
+        nullptr, nullptr, &UnsupportedTensorFlowOp,
+        nullptr, nullptr, BuiltinOperator_CUSTOM,
+        "Flex",  1};
+    return &null_op;
+  }
+  return MutableOpResolver::FindOp(op, version);
+}
+
+BuiltinRefOpResolver::BuiltinRefOpResolver() {
+  AddBuiltin(BuiltinOperator_ABS, Register_ABS());
+  AddBuiltin(BuiltinOperator_RELU, Register_RELU());
+  AddBuiltin(BuiltinOperator_RELU_N1_TO_1, Register_RELU_N1_TO_1());
+  AddBuiltin(BuiltinOperator_RELU6, Register_RELU6());
+  AddBuiltin(BuiltinOperator_TANH, Register_TANH());
+  AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC());
+  AddBuiltin(BuiltinOperator_AVERAGE_POOL_2D, Register_AVERAGE_POOL_REF());
+  AddBuiltin(BuiltinOperator_MAX_POOL_2D, Register_MAX_POOL_REF());
+  AddBuiltin(BuiltinOperator_L2_POOL_2D, Register_L2_POOL_REF());
+  AddBuiltin(BuiltinOperator_CONV_2D, Register_CONVOLUTION_REF());
+  AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D,
+             Register_DEPTHWISE_CONVOLUTION_REF(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_SVDF, Register_SVDF());
+  AddBuiltin(BuiltinOperator_RNN, Register_RNN());
+  AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN,
+             Register_BIDIRECTIONAL_SEQUENCE_RNN());
+  AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN,
+             Register_UNIDIRECTIONAL_SEQUENCE_RNN());
+  AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP, Register_EMBEDDING_LOOKUP());
+  AddBuiltin(BuiltinOperator_EMBEDDING_LOOKUP_SPARSE,
+             Register_EMBEDDING_LOOKUP_SPARSE());
+  AddBuiltin(BuiltinOperator_FULLY_CONNECTED, Register_FULLY_CONNECTED_REF(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_LSH_PROJECTION, Register_LSH_PROJECTION());
+  AddBuiltin(BuiltinOperator_HASHTABLE_LOOKUP, Register_HASHTABLE_LOOKUP());
+  AddBuiltin(BuiltinOperator_SOFTMAX, Register_SOFTMAX());
+  AddBuiltin(BuiltinOperator_CONCATENATION, Register_CONCATENATION_REF());
+  AddBuiltin(BuiltinOperator_ADD, Register_ADD_REF());
+  AddBuiltin(BuiltinOperator_SPACE_TO_BATCH_ND,
+             Register_SPACE_TO_BATCH_ND_REF());
+  AddBuiltin(BuiltinOperator_BATCH_TO_SPACE_ND,
+             Register_BATCH_TO_SPACE_ND_REF());
+  AddBuiltin(BuiltinOperator_MUL, Register_MUL_REF());
+  AddBuiltin(BuiltinOperator_L2_NORMALIZATION, Register_L2NORM_REF());
+  AddBuiltin(BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION,
+             Register_LOCAL_RESPONSE_NORM_REF());
+  AddBuiltin(BuiltinOperator_LSTM, Register_LSTM(), /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM,
+             Register_BIDIRECTIONAL_SEQUENCE_LSTM(), /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM,
+             Register_UNIDIRECTIONAL_SEQUENCE_LSTM());
+  AddBuiltin(BuiltinOperator_PAD, Register_PAD_REF());
+  AddBuiltin(BuiltinOperator_PADV2, Register_PADV2_REF());
+  AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE());
+  AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR_REF());
+  AddBuiltin(BuiltinOperator_RESIZE_NEAREST_NEIGHBOR,
+             Register_RESIZE_NEAREST_NEIGHBOR_REF());
+  AddBuiltin(BuiltinOperator_SKIP_GRAM, Register_SKIP_GRAM());
+  AddBuiltin(BuiltinOperator_SPACE_TO_DEPTH, Register_SPACE_TO_DEPTH_REF());
+  AddBuiltin(BuiltinOperator_GATHER, Register_GATHER());
+  AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE_REF());
+  AddBuiltin(BuiltinOperator_MEAN, Register_MEAN_REF());
+  AddBuiltin(BuiltinOperator_DIV, Register_DIV_REF());
+  AddBuiltin(BuiltinOperator_SUB, Register_SUB_REF());
+  AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT());
+  AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V());
+  AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE());
+  AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE_REF());
+  AddBuiltin(BuiltinOperator_EXP, Register_EXP());
+  AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2());
+  AddBuiltin(BuiltinOperator_LOG, Register_LOG());
+  AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX());
+  AddBuiltin(BuiltinOperator_CAST, Register_CAST());
+  AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE(),
+             /* min_version */ 1,
+             /* max_version */ 2);
+  AddBuiltin(BuiltinOperator_PRELU, Register_PRELU());
+  AddBuiltin(BuiltinOperator_MAXIMUM, Register_MAXIMUM());
+  AddBuiltin(BuiltinOperator_MINIMUM, Register_MINIMUM());
+  AddBuiltin(BuiltinOperator_ARG_MAX, Register_ARG_MAX());
+  AddBuiltin(BuiltinOperator_ARG_MIN, Register_ARG_MIN());
+  AddBuiltin(BuiltinOperator_GREATER, Register_GREATER());
+  AddBuiltin(BuiltinOperator_GREATER_EQUAL, Register_GREATER_EQUAL());
+  AddBuiltin(BuiltinOperator_LESS, Register_LESS());
+  AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL());
+  AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR());
+  AddBuiltin(BuiltinOperator_NEG, Register_NEG());
+  AddBuiltin(BuiltinOperator_SELECT, Register_SELECT());
+  AddBuiltin(BuiltinOperator_SLICE, Register_SLICE());
+  AddBuiltin(BuiltinOperator_SIN, Register_SIN());
+  AddBuiltin(BuiltinOperator_TRANSPOSE_CONV, Register_TRANSPOSECONV_REF());
+  AddBuiltin(BuiltinOperator_TILE, Register_TILE());
+  AddBuiltin(BuiltinOperator_SUM, Register_SUM());
+  AddBuiltin(BuiltinOperator_REDUCE_PROD, Register_REDUCE_PROD());
+  AddBuiltin(BuiltinOperator_REDUCE_MAX, Register_REDUCE_MAX());
+  AddBuiltin(BuiltinOperator_REDUCE_MIN, Register_REDUCE_MIN());
+  AddBuiltin(BuiltinOperator_REDUCE_ANY, Register_REDUCE_ANY());
+  AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXPAND_DIMS());
+  AddBuiltin(BuiltinOperator_SPARSE_TO_DENSE, Register_SPARSE_TO_DENSE());
+  AddBuiltin(BuiltinOperator_EQUAL, Register_EQUAL());
+  AddBuiltin(BuiltinOperator_NOT_EQUAL, Register_NOT_EQUAL());
+  AddBuiltin(BuiltinOperator_SQRT, Register_SQRT());
+  AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT());
+  AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE());
+  AddBuiltin(BuiltinOperator_POW, Register_POW());
+  AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT(), 1, 2);
+  AddBuiltin(BuiltinOperator_PACK, Register_PACK());
+  AddBuiltin(BuiltinOperator_ONE_HOT, Register_ONE_HOT());
+  AddBuiltin(BuiltinOperator_LOGICAL_OR, Register_LOGICAL_OR());
+  AddBuiltin(BuiltinOperator_LOGICAL_AND, Register_LOGICAL_AND());
+  AddBuiltin(BuiltinOperator_LOGICAL_NOT, Register_LOGICAL_NOT());
+  AddBuiltin(BuiltinOperator_UNPACK, Register_UNPACK());
+  AddBuiltin(BuiltinOperator_FLOOR_DIV, Register_FLOOR_DIV());
+  AddBuiltin(BuiltinOperator_SQUARE, Register_SQUARE());
+  AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE());
+  AddBuiltin(BuiltinOperator_FLOOR_MOD, Register_FLOOR_MOD());
+  AddBuiltin(BuiltinOperator_RANGE, Register_RANGE());
+  AddBuiltin(BuiltinOperator_LEAKY_RELU, Register_LEAKY_RELU());
+  AddBuiltin(BuiltinOperator_SQUARED_DIFFERENCE, Register_SQUARED_DIFFERENCE());
+  AddBuiltin(BuiltinOperator_FILL, Register_FILL());
+  AddBuiltin(BuiltinOperator_MIRROR_PAD, Register_MIRROR_PAD());
+
+  // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
+  // custom ops aren't always included by default.
+  AddCustom("Mfcc", tflite::ops::custom::Register_MFCC());
+  AddCustom("AudioSpectrogram",
+            tflite::ops::custom::Register_AUDIO_SPECTROGRAM());
+  AddCustom("LayerNormLstm", tflite::ops::custom::Register_LAYER_NORM_LSTM());
+  AddCustom("Relu1", tflite::ops::custom::Register_RELU_1());
+  AddCustom("TFLite_Detection_PostProcess",
+            tflite::ops::custom::Register_DETECTION_POSTPROCESS());
+}
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/kernels/register_ref.h b/tensorflow/lite/kernels/register_ref.h
new file mode 100644
index 0000000000..c66d4a25bc
--- /dev/null
+++ b/tensorflow/lite/kernels/register_ref.h
@@ -0,0 +1,39 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_REGISTER_REF_H_
+#define TENSORFLOW_LITE_KERNELS_REGISTER_REF_H_
+
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/mutable_op_resolver.h"
+
+namespace tflite {
+namespace ops {
+namespace builtin {
+
+class BuiltinRefOpResolver : public MutableOpResolver {
+ public:
+  BuiltinRefOpResolver();
+
+  const TfLiteRegistration* FindOp(tflite::BuiltinOperator op,
+                                   int version) const override;
+  const TfLiteRegistration* FindOp(const char* op, int version) const override;
+};
+
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_REGISTER_REF_H_
diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD
index 22ffed43cc..fa25cfaa69 100644
--- a/tensorflow/lite/testing/BUILD
+++ b/tensorflow/lite/testing/BUILD
@@ -165,6 +165,7 @@ cc_library(
         "//tensorflow/lite:string_util",
         "//tensorflow/lite/delegates/flex:delegate",
         "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/kernels:reference_ops",
         "@com_google_absl//absl/strings",
     ],
 )
diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc
index 4e11d49f25..ffe296432a 100644
--- a/tensorflow/lite/testing/tflite_driver.cc
+++ b/tensorflow/lite/testing/tflite_driver.cc
@@ -19,6 +19,8 @@ limitations under the License.
 #include "absl/strings/escaping.h"
 #include "tensorflow/lite/builtin_op_data.h"
 #include "tensorflow/lite/delegates/flex/delegate.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/register_ref.h"
 #include "tensorflow/lite/string_util.h"
 #include "tensorflow/lite/testing/split.h"
 
@@ -188,8 +190,15 @@ class TfLiteDriver::Expectation {
   size_t num_elements_;
 };
 
-TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name)
+TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name,
+                           bool reference_kernel)
     : use_nnapi_(use_nnapi) {
+  if (reference_kernel) {
+    resolver_.reset(new ops::builtin::BuiltinRefOpResolver);
+  } else {
+    resolver_.reset(new ops::builtin::BuiltinOpResolver);
+  }
+
   if (delegate_name == "FLEX") {
     delegate_ = FlexDelegate::Create();
   }
@@ -221,8 +230,7 @@ void TfLiteDriver::LoadModel(const string& bin_file_path) {
     Invalidate("Failed to mmap model " + bin_file_path);
     return;
   }
-  ops::builtin::BuiltinOpResolver builtins;
-  InterpreterBuilder(*model_, builtins)(&interpreter_);
+  InterpreterBuilder(*model_, *resolver_)(&interpreter_);
   if (!interpreter_) {
     Invalidate("Failed build interpreter");
     return;
diff --git a/tensorflow/lite/testing/tflite_driver.h b/tensorflow/lite/testing/tflite_driver.h
index 1da0533c57..537f20dfbf 100644
--- a/tensorflow/lite/testing/tflite_driver.h
+++ b/tensorflow/lite/testing/tflite_driver.h
@@ -16,10 +16,12 @@ limitations under the License.
 #define TENSORFLOW_LITE_TESTING_TFLITE_DRIVER_H_
 
 #include <map>
+#include <memory>
 
 #include "tensorflow/lite/delegates/flex/delegate.h"
 #include "tensorflow/lite/interpreter.h"
 #include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/kernels/register_ref.h"
 #include "tensorflow/lite/model.h"
 #include "tensorflow/lite/testing/test_runner.h"
 
@@ -29,7 +31,8 @@ namespace testing {
 // A test runner that feeds inputs into TF Lite and verifies its outputs.
 class TfLiteDriver : public TestRunner {
  public:
-  explicit TfLiteDriver(bool use_nnapi, const string& delegate = "");
+  explicit TfLiteDriver(bool use_nnapi, const string& delegate = "",
+                        bool reference_kernel = false);
   ~TfLiteDriver() override;
 
   void LoadModel(const string& bin_file_path) override;
@@ -65,6 +68,7 @@ class TfLiteDriver : public TestRunner {
 
   class Expectation;
 
+  std::unique_ptr<OpResolver> resolver_;
   std::unique_ptr<FlexDelegate> delegate_;
   bool use_nnapi_ = false;
   std::unique_ptr<FlatBufferModel> model_;
diff --git a/tensorflow/lite/testing/tflite_driver_test.cc b/tensorflow/lite/testing/tflite_driver_test.cc
index 6e953e5e19..81bf6700cb 100644
--- a/tensorflow/lite/testing/tflite_driver_test.cc
+++ b/tensorflow/lite/testing/tflite_driver_test.cc
@@ -56,6 +56,40 @@ TEST(TfliteDriverTest, SimpleTest) {
   ASSERT_TRUE(runner->CheckResults());
 }
 
+TEST(TfliteDriverTest, SingleAddOpTest) {
+  std::unique_ptr<TestRunner> runner(new TfLiteDriver(
+      /*use_nnapi*/ false, /*delegate*/ "", /*reference_kernel*/ true));
+
+  runner->SetModelBaseDir("tensorflow/lite");
+  runner->LoadModel("testdata/multi_add.bin");
+  ASSERT_TRUE(runner->IsValid());
+
+  ASSERT_THAT(runner->GetInputs(), ElementsAre(0, 1, 2, 3));
+  ASSERT_THAT(runner->GetOutputs(), ElementsAre(5, 6));
+
+  for (int i : {0, 1, 2, 3}) {
+    runner->ReshapeTensor(i, "1,2,2,1");
+  }
+  ASSERT_TRUE(runner->IsValid());
+
+  runner->AllocateTensors();
+
+  runner->SetInput(0, "0.1,0.2,0.3,0.4");
+  runner->SetInput(1, "0.001,0.002,0.003,0.004");
+  runner->SetInput(2, "0.001,0.002,0.003,0.004");
+  runner->SetInput(3, "0.01,0.02,0.03,0.04");
+
+  runner->ResetTensor(2);
+
+  runner->SetExpectation(5, "0.101,0.202,0.303,0.404");
+  runner->SetExpectation(6, "0.011,0.022,0.033,0.044");
+
+  runner->Invoke();
+  ASSERT_TRUE(runner->IsValid());
+
+  ASSERT_TRUE(runner->CheckResults());
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace tflite
-- 
GitLab


From d5ab00f2d6eb5c0a3c624312f58f4878230fb4a9 Mon Sep 17 00:00:00 2001
From: Alexandre Passos <apassos@google.com>
Date: Wed, 19 Dec 2018 16:04:16 -0800
Subject: [PATCH 867/873] Makes while_v2 compatible with the GradientTape.

Adds a test.

PiperOrigin-RevId: 226249565
---
 .../python/kernel_tests/while_v2_test.py      | 13 +++++++++
 tensorflow/python/ops/while_v2.py             | 28 +++++++++++--------
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/tensorflow/python/kernel_tests/while_v2_test.py b/tensorflow/python/kernel_tests/while_v2_test.py
index 6567ac9429..1f2c6f94c5 100644
--- a/tensorflow/python/kernel_tests/while_v2_test.py
+++ b/tensorflow/python/kernel_tests/while_v2_test.py
@@ -22,6 +22,7 @@ from absl.testing import parameterized
 
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.core.protobuf import rewriter_config_pb2
+from tensorflow.python.eager import backprop
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import meta_graph
@@ -116,6 +117,18 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
       self.assertSequenceEqual(self.evaluate(grady_1), [6.])
       self.assertSequenceEqual(self.evaluate(grady_2), [61.])
 
+  @test_util.run_deprecated_v1
+  def testGradientTape(self):
+    with backprop.GradientTape() as t:
+      x = constant_op.constant(2.)
+      t.watch(x)
+      ret = while_loop_v2(
+          lambda v: v < 4., lambda v: v * v, [x],
+          return_same_structure=False)  # x**2
+    grad = t.gradient(ret, x)
+    with self.cached_session() as sess:
+      self.assertAllEqual(sess.run(grad), 4.0)
+
   @test_util.run_deprecated_v1
   def testMultipleWhileLoops(self):
     x = constant_op.constant(2.)
diff --git a/tensorflow/python/ops/while_v2.py b/tensorflow/python/ops/while_v2.py
index 25fd2460ae..295686f814 100644
--- a/tensorflow/python/ops/while_v2.py
+++ b/tensorflow/python/ops/while_v2.py
@@ -242,8 +242,13 @@ def while_loop(cond,
 @ops.RegisterGradient("While")
 def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
   """The gradient of a While op produced by while_loop."""
-  cond_graph = _get_graph(op, "cond")
-  body_graph = _get_graph(op, "body")
+  # Note that op is not always the same as while_op because the gradient tape,
+  # for eager mode compatibility, forgets information about the proper op. Since
+  # the loop cannot run in eager mode, however, we can safely introspect into
+  # the graph here.
+  while_op = op.outputs[0].op
+  cond_graph = _get_graph(while_op, "cond")
+  body_graph = _get_graph(while_op, "body")
   orig_num_params = len(body_graph.outputs)
 
   maximum_iterations = op.get_attr(
@@ -287,16 +292,17 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
     new_inputs = body_grad_graph.empty_tensor_lists
     new_outputs = body_graph.outputs[orig_num_params:]
 
-    op._set_func_attr("cond", util.create_new_tf_function(cond_graph))
-    op._set_func_attr("body", util.create_new_tf_function(body_graph))
-    op._set_type_list_attr("T", body_graph.output_types)
-    op._set_shape_list_attr("output_shapes", body_graph.output_shapes)
-    op._add_while_inputs(new_inputs)
-    op._add_outputs([t.dtype for t in new_outputs],
-                    [t.shape for t in new_outputs])
+    while_op._set_func_attr("cond", util.create_new_tf_function(cond_graph))
+    while_op._set_func_attr("body", util.create_new_tf_function(body_graph))
+    while_op._set_type_list_attr("T", body_graph.output_types)
+    while_op._set_shape_list_attr("output_shapes", body_graph.output_shapes)
+    while_op._add_while_inputs(new_inputs)
+    while_op._add_outputs([t.dtype for t in new_outputs],
+                          [t.shape for t in new_outputs])
     _copy_handle_data(new_outputs, op.outputs[orig_num_params:])
 
-  captured_inputs = _resolve_grad_captures(body_graph, body_grad_graph, op)
+  captured_inputs = _resolve_grad_captures(body_graph, body_grad_graph,
+                                           while_op)
   loop_vars = args + captured_inputs
 
   def grad_cond(counter, max_iters, *unused_args):
@@ -314,7 +320,7 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
       util.create_new_tf_function(cond_grad_graph),
       util.create_new_tf_function(body_grad_graph),
       output_shapes=[t.shape for t in body_grad_graph.outputs],
-      name="%s_grad" % op.name)
+      name="%s_grad" % while_op.name)
 
   _copy_handle_data(body_grad_graph.outputs, outputs)
   util.maybe_set_lowering_attr(outputs[0].op)
-- 
GitLab


From c77e7e56de56c624116cf9eea340b4f96f032c85 Mon Sep 17 00:00:00 2001
From: Jared Duke <jdduke@google.com>
Date: Wed, 19 Dec 2018 16:08:17 -0800
Subject: [PATCH 868/873] Add FFT kernels to TFMobile

The marginal increase in binary size for an optimized ARM build
of libtensorflow_inference.so is ~100KB, a 0.5% increase. Clients
concerned about binary size should take advantage of op stripping
via selective registration.

PiperOrigin-RevId: 226250166
---
 tensorflow/contrib/makefile/tf_op_files.txt         |  1 +
 tensorflow/core/kernels/BUILD                       |  1 +
 tensorflow/lite/toco/tflite/operator_test.cc        |  5 +++--
 tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc | 12 ++++++++++++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 655c7eefcb..2cd7d6d519 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -119,6 +119,7 @@ tensorflow/core/kernels/fake_quant_ops.cc
 tensorflow/core/kernels/fifo_queue.cc
 tensorflow/core/kernels/fifo_queue_op.cc
 tensorflow/core/kernels/fill_functor.cc
+tensorflow/core/kernels/fft_ops.cc
 tensorflow/core/kernels/function_ops.cc
 tensorflow/core/kernels/fused_batch_norm_op.cc
 tensorflow/core/kernels/gather_functor.cc
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index c8aa2b3265..6bbce457f3 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -5595,6 +5595,7 @@ filegroup(
         "decode_bmp_op.cc",
         "depthtospace_op.cc",
         "dynamic_stitch_op.cc",
+        "fft_ops.cc",
         "in_topk_op.cc",
         "initializable_lookup_table.cc",
         "logging_ops.cc",
diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc
index f2f7221eb1..849eace8cc 100644
--- a/tensorflow/lite/toco/tflite/operator_test.cc
+++ b/tensorflow/lite/toco/tflite/operator_test.cc
@@ -614,10 +614,11 @@ TEST_F(OperatorTest, TestShouldExportAsFlexOp) {
   EXPECT_FALSE(ShouldExportAsFlexOp(false, "Conv2D"));
   EXPECT_TRUE(ShouldExportAsFlexOp(true, "Conv2D"));
   EXPECT_TRUE(ShouldExportAsFlexOp(true, "EluGrad"));
+  EXPECT_TRUE(ShouldExportAsFlexOp(true, "RFFT"));
   EXPECT_FALSE(ShouldExportAsFlexOp(true, "MyAwesomeCustomOp"));
-  // While the RFFT op is available on desktop, it is not in the kernel
+  // While the RandomShuffle op is available on desktop, it is not in the kernel
   // set available on mobile and should be excluded.
-  EXPECT_FALSE(ShouldExportAsFlexOp(true, "RFFT"));
+  EXPECT_FALSE(ShouldExportAsFlexOp(true, "RandomShuffle"));
 }
 
 TEST_F(OperatorTest, BuiltinMirrorPad) {
diff --git a/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc b/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc
index 039a918af1..3611c5d2f2 100644
--- a/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc
+++ b/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc
@@ -118,6 +118,9 @@ bool IsWhitelistedFlexOp(const std::string& tensorflow_op_name) {
           "FakeQuantWithMinMaxVarsPerChannel",
           "FakeQuantWithMinMaxVarsPerChannelGradient",
           "FakeQueue",
+          "FFT",
+          "FFT2D",
+          "FFT3D",
           "FIFOQueue",
           "FIFOQueueV2",
           "Fill",
@@ -143,6 +146,12 @@ bool IsWhitelistedFlexOp(const std::string& tensorflow_op_name) {
           "_HostSend",
           "Identity",
           "IdentityN",
+          "IFFT",
+          "IFFT2D",
+          "IFFT3D",
+          "IRFFT",
+          "IRFFT2D",
+          "IRFFT3D",
           "ImmutableConst",
           "InTopK",
           "InTopKV2",
@@ -311,6 +320,9 @@ bool IsWhitelistedFlexOp(const std::string& tensorflow_op_name) {
           "Reverse",
           "ReverseSequence",
           "ReverseV2",
+          "RFFT",
+          "RFFT2D",
+          "RFFT3D",
           "Round",
           "Rsqrt",
           "RsqrtGrad",
-- 
GitLab


From 31c0a32740d70bb0f4c1787e1087d4bd70d59dfe Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 16:21:53 -0800
Subject: [PATCH 869/873] Add `distribute_strategy` attribute to distributed
 variables.

PiperOrigin-RevId: 226252079
---
 .../python/collective_all_reduce_strategy.py  |   3 +-
 .../python/mirrored_strategy_multigpu_test.py |   9 ++
 .../python/parameter_server_strategy.py       |   3 +-
 .../python/parameter_server_strategy_test.py  |   1 +
 .../contrib/distribute/python/tpu_strategy.py |   9 +-
 .../contrib/distribute/python/values_test.py  |  26 ++--
 .../python/distribute/mirrored_strategy.py    |  19 +--
 tensorflow/python/distribute/values.py        | 114 +++++++++++-------
 8 files changed, 115 insertions(+), 69 deletions(-)

diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
index e6bbf0c308..12197c3d0d 100644
--- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
+++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py
@@ -222,7 +222,8 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
 
     # pylint: disable=protected-access
     return mirrored_strategy._create_mirrored_variable(
-        device_map, logical_device, _real_mirrored_creator, *args, **kwargs)
+        self._container_strategy(), device_map, logical_device,
+        _real_mirrored_creator, *args, **kwargs)
 
   def _distribute_dataset(self, dataset_fn):
     """Distributes the dataset to each local GPU."""
diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
index f4becf1d62..a6348d2457 100644
--- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
+++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py
@@ -693,6 +693,15 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
           distribution.extended.worker_devices[0]).read_value()))
       self.assertEqual(10.0, self.evaluate(ret_v_sum))
 
+  def testVarDistributeStrategy(self, distribution):
+    with distribution.scope():
+      mirrored = variable_scope.variable(1.0)
+      replica_local = variable_scope.variable(
+          1.0,
+          synchronization=variable_scope.VariableSynchronization.ON_READ)
+      self.assertIs(distribution, mirrored.distribute_strategy)
+      self.assertIs(distribution, replica_local.distribute_strategy)
+
 
 @combinations.generate(combinations.combine(
     distribution=[
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
index 5029d59641..2fd0c4d6ea 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py
@@ -303,7 +303,8 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended):
 
         # Create and wrap the variable.
         v = next_creator(*args, **kwargs)
-        wrapped = values.AggregatingVariable(v, aggregation)
+        wrapped = values.AggregatingVariable(
+            self._container_strategy(), v, aggregation)
 
         # Add the wrapped variable to the requested collections.
         # The handling of eager mode and the global step matches
diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
index 805c643e67..e6ae16d856 100644
--- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
+++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py
@@ -716,6 +716,7 @@ class ParameterServerStrategyWithChiefTest(ParameterServerStrategyTestBase,
                              id(get_step), get_step.__class__.__name__)))
       self.assertIs(values.AggregatingVariable, type(created_step))
       self.assertIs(values.AggregatingVariable, type(get_step))
+      self.assertIs(distribution, created_step.distribute_strategy)
 
   def testValueContainer(self):
     distribution = parameter_server_strategy.ParameterServerStrategy(
diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py
index 7352203fe1..e081a735e2 100644
--- a/tensorflow/contrib/distribute/python/tpu_strategy.py
+++ b/tensorflow/contrib/distribute/python/tpu_strategy.py
@@ -68,7 +68,8 @@ def get_tpu_system_metadata(tpu_cluster_resolver):
 
 # TODO(jhseu): Deduplicate with MirroredStrategy?
 def _create_tpu_mirrored_variable(  # pylint: disable=missing-docstring
-    device_map, logical_device, real_mirrored_creator, *args, **kwargs):
+    strategy, device_map, logical_device, real_mirrored_creator,
+    *args, **kwargs):
   # Figure out what collections this variable should be added to.
   # We'll add the TPUMirroredVariable to those collections instead.
   collections = kwargs.pop("collections", None)
@@ -101,7 +102,8 @@ def _create_tpu_mirrored_variable(  # pylint: disable=missing-docstring
     devices = device_map.logical_to_actual_devices(logical_device)
     value_list = real_mirrored_creator(devices, *args, **kwargs)
     result = values.TPUMirroredVariable(
-        device_map, value_list, aggregation, logical_device=logical_device)
+        strategy, device_map, value_list, aggregation,
+        logical_device=logical_device)
 
   if not context.executing_eagerly():
     g = ops.get_default_graph()
@@ -475,7 +477,8 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended):
       return value_list
 
     return _create_tpu_mirrored_variable(
-        device_map, logical_device, _real_mirrored_creator, *args, **kwargs)
+        self._container_strategy(), device_map, logical_device,
+        _real_mirrored_creator, *args, **kwargs)
 
   def _reduce_to(self, reduce_op, value, destinations):
     if values._enclosing_tpu_context() is not None:  # pylint: disable=protected-access
diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py
index 0e8e86f6b9..73efb524b9 100644
--- a/tensorflow/contrib/distribute/python/values_test.py
+++ b/tensorflow/contrib/distribute/python/values_test.py
@@ -191,7 +191,7 @@ def _make_mirrored():
       v.append(variable_scope.get_variable(
           name=n, initializer=init, use_resource=True))
   device_map = values.ReplicaDeviceMap(devices)
-  mirrored = values.MirroredVariable(device_map, v,
+  mirrored = values.MirroredVariable(None, device_map, v,
                                      variable_scope.VariableAggregation.SUM)
   return v, device_map, mirrored
 
@@ -314,7 +314,7 @@ class RegroupAndSelectDeviceTest(test.TestCase):
       v = variable_scope.get_variable(
           name="v", initializer=1., use_resource=True)
       device_map = values.ReplicaDeviceMap((d,))
-    mirrored = values.MirroredVariable(device_map, (v,),
+    mirrored = values.MirroredVariable(None, device_map, (v,),
                                        variable_scope.VariableAggregation.SUM)
     result = values.regroup(device_map, (v,))
     self.assertIs(mirrored, result)
@@ -813,7 +813,7 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
     v = variable_scope.get_variable(
         name="v", initializer=[1.], use_resource=True)
     device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
-    mirrored = values.MirroredVariable(device_map, (v,),
+    mirrored = values.MirroredVariable(None, device_map, (v,),
                                        variable_scope.VariableAggregation.MEAN)
 
     self.assertEqual(v.name, mirrored.name)
@@ -952,7 +952,7 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
         v = variable_scope.get_variable(
             name="v", initializer=1., use_resource=True)
       mirrored = values.MirroredVariable(
-          values.ReplicaDeviceMap(("/device:GPU:0",)), (v,),
+          distribution, values.ReplicaDeviceMap(("/device:GPU:0",)), (v,),
           variable_scope.VariableAggregation.MEAN)
       sess.run(variables_lib.global_variables_initializer())
       sess.run({"complicated": mirrored})
@@ -961,14 +961,14 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
 _devices = ("/device:GPU:0", "/device:CPU:0")
 
 
-def _make_replica_local(method):
+def _make_replica_local(method, strategy=None):
   device_map = values.ReplicaDeviceMap(_devices)
   v = []
   for d, n, init in zip(_devices, ["v", "v/replica"], [1., 2.]):
     with ops.device(d):
       v.append(variable_scope.get_variable(
           name=n, initializer=init, use_resource=True))
-  replica_local = values.ReplicaLocalVariable(device_map, v, method)
+  replica_local = values.ReplicaLocalVariable(strategy, device_map, v, method)
   return v, replica_local
 
 
@@ -996,7 +996,7 @@ class ReplicaLocalVariablePropertiesTest(test.TestCase):
         name="v", initializer=[1.], use_resource=True)
     device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
     replica_local = values.ReplicaLocalVariable(
-        device_map, (v,), variable_scope.VariableAggregation.MEAN)
+        None, device_map, (v,), variable_scope.VariableAggregation.MEAN)
 
     self.assertEqual(v.name, replica_local.name)
     self.assertEqual(v.dtype, replica_local.dtype)
@@ -1043,7 +1043,7 @@ class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase):
   def testSaveAndRestoreReplicaLocalSumOneGraph(self, distribution):
     with self.cached_session() as sess:
       v, replica_local = _make_replica_local(
-          variable_scope.VariableAggregation.SUM)
+          variable_scope.VariableAggregation.SUM, distribution)
 
       # Overwrite the initial values.
       self._assign_replica_local(_devices, v, [3., 4.])
@@ -1066,7 +1066,7 @@ class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase):
 
     with self.cached_session() as sess:
       v, replica_local = _make_replica_local(
-          variable_scope.VariableAggregation.MEAN)
+          variable_scope.VariableAggregation.MEAN, distribution)
 
       # Overwrite the initial values.
       self._assign_replica_local(_devices, v, [3., 4.])
@@ -1086,7 +1086,7 @@ class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase):
     """Save variables with mirroring, returns save_path."""
     with self.session(graph=ops.Graph()) as sess:
       v, replica_local = _make_replica_local(
-          variable_scope.VariableAggregation.MEAN)
+          variable_scope.VariableAggregation.MEAN, distribution)
 
       # Overwrite the initial values.
       self._assign_replica_local(_devices, v, [3., 4.])
@@ -1102,7 +1102,7 @@ class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase):
   def _save_replica_local_sum(self, distribution):
     """Save variables with mirroring, returns save_path."""
     with self.session(graph=ops.Graph()) as sess:
-      v, replica_local = _make_replica_local("sum")
+      v, replica_local = _make_replica_local("sum", distribution)
 
       # Overwrite the initial values.
       self._assign_replica_local(_devices, v, [1.5, 2.])
@@ -1149,7 +1149,7 @@ class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase):
     """Restore to variables with mirroring in a fresh graph."""
     with self.session(graph=ops.Graph()) as sess:
       v, replica_local = _make_replica_local(
-          variable_scope.VariableAggregation.MEAN)
+          variable_scope.VariableAggregation.MEAN, distribution)
 
       # Overwrite the initial values.
       self._assign_replica_local(_devices, v, [7., 8.])
@@ -1164,7 +1164,7 @@ class ReplicaLocalVariableTest(test.TestCase, parameterized.TestCase):
     """Restore to variables with mirroring in a fresh graph."""
     with self.session(graph=ops.Graph()) as sess:
       v, replica_local = _make_replica_local(
-          variable_scope.VariableAggregation.SUM)
+          variable_scope.VariableAggregation.SUM, distribution)
 
       # Overwrite the initial values.
       self._assign_replica_local(_devices, v, [7., 8.])
diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py
index 4f29d916c4..71030d750b 100644
--- a/tensorflow/python/distribute/mirrored_strategy.py
+++ b/tensorflow/python/distribute/mirrored_strategy.py
@@ -193,8 +193,8 @@ def _call_for_each_replica(distribution, device_map, fn, args, kwargs):
   return values.regroup(device_map, tuple(t.main_result for t in threads))
 
 
-def _create_mirrored_variable(device_map, logical_device, real_mirrored_creator,
-                              *args, **kwargs):  # pylint: disable=g-missing-docstring
+def _create_mirrored_variable(strategy, device_map, logical_device,  # pylint: disable=missing-docstring
+                              real_mirrored_creator, *args, **kwargs):
   # Figure out what collections this variable should be added to.
   # We'll add the MirroredVariable to those collections instead.
   collections = kwargs.pop("collections", None)
@@ -245,11 +245,13 @@ def _create_mirrored_variable(device_map, logical_device, real_mirrored_creator,
     value_list = real_mirrored_creator(devices, *args, **kwargs)
 
     if is_replica_local:
-      result = values.ReplicaLocalVariable(device_map, value_list, aggregation,
-                                           logical_device=logical_device)
+      result = values.ReplicaLocalVariable(
+          strategy, device_map, value_list, aggregation,
+          logical_device=logical_device)
     else:
-      result = values.MirroredVariable(device_map, value_list, aggregation,
-                                       logical_device=logical_device)
+      result = values.MirroredVariable(
+          strategy, device_map, value_list, aggregation,
+          logical_device=logical_device)
 
   # Add the wrapped variable to the requested collections.
   # The handling of eager mode and the global step matches
@@ -531,8 +533,9 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
           value_list.append(v)
       return value_list
 
-    return _create_mirrored_variable(device_map, logical_device,
-                                     _real_mirrored_creator, *args, **kwargs)
+    return _create_mirrored_variable(
+        self._container_strategy(), device_map, logical_device,
+        _real_mirrored_creator, *args, **kwargs)
 
   def _distribute_dataset(self, dataset_fn):
     if self._local_mode:
diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py
index e0c575b01c..c3036b5aa5 100644
--- a/tensorflow/python/distribute/values.py
+++ b/tensorflow/python/distribute/values.py
@@ -413,6 +413,18 @@ def _assign_on_device(device, variable, tensor):
     return variable.assign(array_ops.identity(tensor))
 
 
+def _assert_strategy(strategy):
+  if not distribution_strategy_context.has_distribution_strategy():
+    raise RuntimeError(
+        'Need to be inside "with strategy.scope()" for %s' %
+        (strategy,))
+  current_strategy = distribution_strategy_context.get_distribution_strategy()
+  if current_strategy is not strategy:
+    raise RuntimeError(
+        "Mixing different tf.distribute.Strategy objects: %s is not %s" %
+        (current_strategy, strategy))
+
+
 DistributedVarOp = collections.namedtuple(
     "DistributedVarOp", ["name", "graph", "type"])
 
@@ -422,7 +434,8 @@ class DistributedVariable(DistributedDelegate):
   # TODO(josh11b): Support changing the set of variables if e.g. if new
   # devices are joining or a device is to leave.
 
-  def __init__(self, device_map, values, logical_device=None):
+  def __init__(self, strategy, device_map, values, logical_device=None):
+    self._distribute_strategy = strategy
     super(DistributedVariable, self).__init__(
         device_map, values, logical_device=logical_device)
     self._common_name = self.primary.name.split(":")[0]
@@ -519,6 +532,10 @@ class DistributedVariable(DistributedDelegate):
   def shape(self):
     return self.primary.shape
 
+  @property
+  def distribute_strategy(self):
+    return self._distribute_strategy
+
   def get_shape(self):
     return self.primary.get_shape()
 
@@ -530,7 +547,7 @@ class DistributedVariable(DistributedDelegate):
     # We want cross-replica code that does some var.op.X calls
     # to work (even if the current device isn't in self.devices), but
     # other uses of var.op in a cross-replica context to fail.
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       return DistributedVarOp(self.primary.op.name,
                               self.primary.op.graph,
                               self.primary.op.type)
@@ -541,8 +558,7 @@ class DistributedVariable(DistributedDelegate):
     return self.primary._in_graph_mode   # pylint: disable=protected-access
 
   def read_value(self):
-    strategy = distribution_strategy_context.get_distribution_strategy()
-    return strategy.extended.read_var(self)
+    return self._distribute_strategy.extended.read_var(self)
 
   def _should_act_as_resource_variable(self):
     """Pass resource_variable_ops.is_resource_variable check."""
@@ -579,9 +595,10 @@ class MirroredVariable(DistributedVariable, Mirrored,
                        checkpointable.CheckpointableBase):
   """Holds a map from device to variables whose values are kept in sync."""
 
-  def __init__(self, device_map, values, aggregation, logical_device=None):
+  def __init__(
+      self, strategy, device_map, values, aggregation, logical_device=None):
     super(MirroredVariable, self).__init__(
-        device_map, values, logical_device=logical_device)
+        strategy, device_map, values, logical_device=logical_device)
     self._aggregation = aggregation
 
   # The arguments to update() are automatically unwrapped so the update()
@@ -591,8 +608,9 @@ class MirroredVariable(DistributedVariable, Mirrored,
   # update_non_slot() function (like OptimizerV2._finish), which can
   # update several non-slot variables in one call.
   def _assign_func(self, *args, **kwargs):
+    _assert_strategy(self._distribute_strategy)
     f = kwargs.pop("f")
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       update_device = distribute_lib.get_update_device()
       if update_device is not None:
         # We are calling an assign function on the mirrored variable in an
@@ -602,10 +620,9 @@ class MirroredVariable(DistributedVariable, Mirrored,
 
       # We are calling assign on the mirrored variable in cross replica context,
       # use `strategy.update()` to update the variable.
-      strategy = distribution_strategy_context.get_distribution_strategy()
-      return strategy.update(self, f, *args, **kwargs)
+      return self._distribute_strategy.update(self, f, *args, **kwargs)
     else:
-      _assert_replica_context()
+      _assert_replica_context(self._distribute_strategy)
       # We are calling an assign function on the mirrored variable in replica
       # context.
       # We reduce the value we want to assign/add/sub. More details about how we
@@ -648,7 +665,7 @@ class MirroredVariable(DistributedVariable, Mirrored,
 
   def _as_graph_element(self):
     # pylint: disable=protected-access
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       return self.primary._as_graph_element()
     return self.get()._as_graph_element()
 
@@ -698,8 +715,10 @@ def _enclosing_tpu_context():
 class TPUMirroredVariable(checkpointable.CheckpointableBase):
   """Holds a map from device to TPU variables whose values are kept in sync."""
 
-  def __init__(self, device_map, values, aggregation, logical_device=None):
+  def __init__(
+      self, strategy, device_map, values, aggregation, logical_device=None):
     assert isinstance(device_map, DeviceMap)
+    self._distribute_strategy = strategy
     self._device_map = device_map
     self._values = tuple(values)
     if logical_device is None:
@@ -756,6 +775,10 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
   def values(self):
     return self._values
 
+  @property
+  def distribute_strategy(self):
+    return self._distribute_strategy
+
   # pylint: disable=multiple-statements
   def __add__(self, o): return self.read_value() + o
   def __radd__(self, o): return o + self.read_value()
@@ -853,15 +876,11 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
   # update_non_slot() function (like OptimizerV2._finish), which can
   # update several non-slot variables in one call.
   def _assign_func(self, *args, **kwargs):
-    strategy = distribution_strategy_context.get_distribution_strategy()
-    if strategy.__class__.__name__ != "TPUStrategy":
-      raise ValueError("You may only assign to a TPUMirroredVariable within a "
-                       "TPUStrategy.")
+    _assert_strategy(self._distribute_strategy)
     f = kwargs.pop("f")
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       if _enclosing_tpu_context() is not None:
-        return distribution_strategy_context.get_distribution_strategy().update(
-            self, f, *args, **kwargs)
+        return self._distribute_strategy.update(self, f, *args, **kwargs)
 
       update_device = distribute_lib.get_update_device()
       # We are calling update on the mirrored variable in cross replica context.
@@ -871,10 +890,9 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
         v = self._get(device=update_device)
         return f(v, *args, **kwargs)
 
-      return distribution_strategy_context.get_distribution_strategy().update(
-          self, f, *args, **kwargs)
+      return self._distribute_strategy.update(self, f, *args, **kwargs)
     else:
-      _assert_replica_context()
+      _assert_replica_context(self._distribute_strategy)
       # We are calling an assign function on the mirrored variable in replica
       # context.
       # We reduce the value we want to assign/add/sub. More details about how we
@@ -1019,7 +1037,7 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase):
 
   def _as_graph_element(self):
     # pylint: disable=protected-access
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       return self.primary._as_graph_element()
     return self._read_variable_op()
 
@@ -1117,7 +1135,7 @@ class _ReplicaLocalSaveable(saver.BaseSaverBuilder.SaveableObject):
     # We use a callable so that we don't have to evaluate this expression
     # in the case where we are trying to restore instead of save.
     def tensor():
-      strategy = distribution_strategy_context.get_distribution_strategy()
+      strategy = replica_local_variable.distribute_strategy
       return strategy.extended.read_var(replica_local_variable)
 
     spec = saver.BaseSaverBuilder.SaveSpec(
@@ -1133,8 +1151,12 @@ class _ReplicaLocalSaveable(saver.BaseSaverBuilder.SaveableObject):
     return self._replica_local_variable.assign(tensor)
 
 
-def _assert_replica_context():
-  if not distribution_strategy_context.get_replica_context():
+def _assert_replica_context(strategy):
+  replica_context = distribution_strategy_context.get_replica_context()
+  if not replica_context:
+    raise RuntimeError(
+        "Replica-local variables may only be assigned in a replica context.")
+  if replica_context.strategy is not strategy:
     raise RuntimeError(
         "Replica-local variables may only be assigned in a replica context.")
 
@@ -1143,21 +1165,22 @@ class ReplicaLocalVariable(DistributedVariable, PerReplica,
                            checkpointable.CheckpointableBase):
   """Holds a map from device to variables whose values are reduced on save."""
 
-  def __init__(self, device_map, values, aggregation, logical_device=None):
+  def __init__(
+      self, strategy, device_map, values, aggregation, logical_device=None):
     self._aggregation = aggregation
     super(ReplicaLocalVariable, self).__init__(
-        device_map, values, logical_device=logical_device)
+        strategy, device_map, values, logical_device=logical_device)
 
   def assign_sub(self, *args, **kwargs):
-    _assert_replica_context()
+    _assert_replica_context(self._distribute_strategy)
     return self.get().assign_sub(*args, **kwargs)
 
   def assign_add(self, *args, **kwargs):
-    _assert_replica_context()
+    _assert_replica_context(self._distribute_strategy)
     return self.get().assign_add(*args, **kwargs)
 
   def assign(self, *args, **kwargs):
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       # To preserve the sum across save and restore, we have to divide the
       # total across all devices when restoring a variable that was summed
       # when saving.
@@ -1167,7 +1190,7 @@ class ReplicaLocalVariable(DistributedVariable, PerReplica,
       return control_flow_ops.group(tuple(
           _assign_on_device(v.device, v, tensor) for v in self._values))
     else:
-      _assert_replica_context()
+      _assert_replica_context(self._distribute_strategy)
       return self.get().assign(*args, **kwargs)
 
   @property
@@ -1185,7 +1208,7 @@ class ReplicaLocalVariable(DistributedVariable, PerReplica,
 
   def _as_graph_element(self):
     # pylint: disable=protected-access
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       return self._get_cross_replica()
     return self.get()._as_graph_element()
 
@@ -1998,7 +2021,7 @@ class MultiStepContext(object):
         `_last_step_outputs_reduce_ops` for later interpreting of the
         outputs as already reduced or not.
     """
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       self._last_step_outputs_reduce_ops[name] = reduce_op
       if reduce_op is None:
         self._last_step_outputs[name] = output
@@ -2024,7 +2047,7 @@ class MultiStepContext(object):
 
   def set_non_tensor_output(self, name, output):
     """Set `output` with `name` to be captured as a non tensor output."""
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       self._non_tensor_outputs[name] = output
     else:
       def merge_fn(distribution, value):
@@ -2061,7 +2084,8 @@ def value_container(val):
 class AggregatingVariable(checkpointable.CheckpointableBase):
   """A wrapper around a variable that aggregates updates across replicas."""
 
-  def __init__(self, v, aggregation):
+  def __init__(self, strategy, v, aggregation):
+    self._distribute_strategy = strategy
     self._v = v
     # NOTE: We don't use "_distributed_container" here because we don't want
     # to trigger that code path in regroup().
@@ -2071,12 +2095,17 @@ class AggregatingVariable(checkpointable.CheckpointableBase):
   def get(self):
     return self._v
 
+  @property
+  def distribute_strategy(self):
+    return self._distribute_strategy
+
   def __getattr__(self, name):
     return getattr(self._v, name)
 
   def _assign_func(self, *args, **kwargs):
+    _assert_strategy(self._distribute_strategy)
     f = kwargs.pop("f")
-    if distribution_strategy_context.get_cross_replica_context():
+    if distribution_strategy_context.in_cross_replica_context():
       update_device = distribute_lib.get_update_device()
       if update_device is not None:
         # We are calling an assign function in an update context.
@@ -2084,24 +2113,23 @@ class AggregatingVariable(checkpointable.CheckpointableBase):
 
       # We are calling an assign function in cross replica context, wrap it in
       # an update call.
-      return distribution_strategy_context.get_distribution_strategy().update(
-          self, f, *args, **kwargs)
+      return self._distribute_strategy.update(self, f, *args, **kwargs)
     else:
-      assert distribution_strategy_context.get_replica_context()
+      replica_context = distribution_strategy_context.get_replica_context()
+      assert replica_context
       # We are calling an assign function in replica context.
       # We reduce the value we want to assign/add/sub. More details about how we
       # handle the different use cases can be found in the _reduce method.
       # We call the function with the reduced value.
       if self._aggregation == vs.VariableAggregation.NONE:
         raise ValueError("You must specify an aggregation method to update a "
-                         "a variable in Replica Context.")
+                         "a variable in replica context.")
 
       def merge_fn(strategy, value, *other_args, **other_kwargs):
         v = _apply_aggregation(strategy, value, self._aggregation, self)
         return strategy.update(self, f, v, *other_args, **other_kwargs)
 
-      return distribution_strategy_context.get_replica_context().merge_call(
-          merge_fn, args=args, kwargs=kwargs)
+      return replica_context.merge_call(merge_fn, args=args, kwargs=kwargs)
 
   def assign_sub(self, *args, **kwargs):
     assign_sub_fn = lambda var, *a, **kw: var.assign_sub(*a, **kw)
-- 
GitLab


From 8cd607c56d53e041b0b8e4d76fef194fa0bf624f Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 19 Dec 2018 16:22:42 -0800
Subject: [PATCH 870/873] Explicitly defining PI to fix windows build.

PiperOrigin-RevId: 226252206
---
 tensorflow/core/kernels/sampling_kernels.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/kernels/sampling_kernels.h b/tensorflow/core/kernels/sampling_kernels.h
index 4e79d89831..a03a2c88db 100644
--- a/tensorflow/core/kernels/sampling_kernels.h
+++ b/tensorflow/core/kernels/sampling_kernels.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_CORE_KERNELS_SAMPLING_KERNELS_H_
 
 #include <cmath>
+
 #include "tensorflow/core/lib/core/stringpiece.h"
 
 namespace tensorflow {
@@ -68,14 +69,15 @@ struct LanczosKernelFunc {
   // Pass 1 for Lanczos1 kernel, 3 for Lanczos3 etc.
   explicit LanczosKernelFunc(float _radius) : radius(_radius) {}
   float operator()(float x) const {
+    constexpr float kPI = 3.14159265359;
     x = std::abs(x);
     if (x > radius) return 0.0;
     // Need to special case the limit case of sin(x) / x when x is zero.
     if (x <= 1e-3) {
       return 1.0;
     }
-    return radius * std::sin(M_PI * x) * std::sin(M_PI * x / radius) /
-           (M_PI * M_PI * x * x);
+    return radius * std::sin(kPI * x) * std::sin(kPI * x / radius) /
+           (kPI * kPI * x * x);
   }
   float Radius() const { return radius; }
   const float radius;
-- 
GitLab


From 0445684a64d1bea8490a99eb9ce278176133df75 Mon Sep 17 00:00:00 2001
From: Skye Wanderman-Milne <skyewm@google.com>
Date: Wed, 19 Dec 2018 16:23:12 -0800
Subject: [PATCH 871/873] Enable control flow v2 inside TF functions.

This makes it so control flow v2 is enabled when inside a function and
otherwise disabled (i.e. if inside of a legacy graph), regardless of
whether TF 2.0 behavior is enabled. Note that in eager mode, Python
control flow is used instead of control flow graph ops.

PiperOrigin-RevId: 226252291
---
 tensorflow/python/framework/importer.py            |  3 ++-
 tensorflow/python/keras/layers/normalization.py    |  8 +++++---
 tensorflow/python/keras/layers/unified_gru_test.py |  2 ++
 tensorflow/python/ops/control_flow_ops.py          |  8 ++++++--
 tensorflow/python/ops/control_flow_util.py         | 12 +++++++++---
 tensorflow/python/ops/tensor_array_ops.py          |  2 +-
 tensorflow/python/saved_model/save_test.py         |  1 +
 7 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py
index c737bd4881..e6f86f7f93 100644
--- a/tensorflow/python/framework/importer.py
+++ b/tensorflow/python/framework/importer.py
@@ -28,6 +28,7 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import function
 from tensorflow.python.framework import op_def_registry
 from tensorflow.python.framework import ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.util import compat
 from tensorflow.python.util.deprecation import deprecated_args
 from tensorflow.python.util.tf_export import tf_export
@@ -266,7 +267,7 @@ def _ProcessNewOps(graph):
         coloc_op = graph._get_operation_by_name_unsafe(coloc_op_name)  # pylint: disable=protected-access
       except KeyError:
         # Do not error in TF2 if the colocation cannot be guaranteed
-        if tf2.enabled():
+        if tf2.enabled() or control_flow_util.EnableControlFlowV2(graph):
           continue
 
         raise ValueError('Specified colocation to an op that '
diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py
index c3c5b2db7d..cee0da1b32 100644
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@@ -34,6 +34,7 @@ from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.input_spec import InputSpec
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
@@ -424,9 +425,10 @@ class BatchNormalizationV2(Layer):
           is_tpu_strategy = True
 
       # TODO(apassos,srbs,skyewm): the colocation constraints here are disabled
-      # because of a bug which leads cond_v2 to skip rewriting them creating
-      # conflicts.
-      if tf2.enabled() or is_tpu_strategy:
+      # because of a bug which leads cond_v2/while_v2 to skip rewriting them
+      # creating conflicts.
+      if (control_flow_util.EnableControlFlowV2(ops.get_default_graph()) or
+          is_tpu_strategy):
         cm = contextlib.contextmanager(lambda: (yield))()
       else:
         cm = ops.colocate_with(variable)
diff --git a/tensorflow/python/keras/layers/unified_gru_test.py b/tensorflow/python/keras/layers/unified_gru_test.py
index 6e77acc20a..57c1e151f9 100644
--- a/tensorflow/python/keras/layers/unified_gru_test.py
+++ b/tensorflow/python/keras/layers/unified_gru_test.py
@@ -423,6 +423,8 @@ class GRULayerV1OnlyTest(test.TestCase, parameterized.TestCase):
   @test_util.run_v1_only("b/120941292")
   @test_util.run_in_graph_and_eager_modes(config=_config)
   def test_statefulness_GRU(self):
+    self.skipTest('b/121275483')
+
     num_samples = 2
     timesteps = 3
     embedding_dim = 4
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index 99216d7fb1..2b481bded5 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -2047,7 +2047,9 @@ def cond(pred,
   ```
 
   """
-  if util.ENABLE_CONTROL_FLOW_V2 and not context.executing_eagerly():
+  # Always enable control flow v2 if building a function, regardless of toggle.
+  if (util.EnableControlFlowV2(ops.get_default_graph()) and
+      not context.executing_eagerly()):
     return cond_v2.cond_v2(pred, true_fn, false_fn, name)
 
   # We needed to make true_fn/false_fn keyword arguments for
@@ -3482,7 +3484,9 @@ def while_loop(cond,
   ```
 
   """
-  if util.ENABLE_CONTROL_FLOW_V2 and not context.executing_eagerly():
+  # Always enable control flow v2 if building a function, regardless of toggle.
+  if (util.EnableControlFlowV2(ops.get_default_graph()) and
+      not context.executing_eagerly()):
     return while_v2.while_loop(
         cond,
         body,
diff --git a/tensorflow/python/ops/control_flow_util.py b/tensorflow/python/ops/control_flow_util.py
index 1747f06109..8f5442da5e 100644
--- a/tensorflow/python/ops/control_flow_util.py
+++ b/tensorflow/python/ops/control_flow_util.py
@@ -26,16 +26,22 @@ from __future__ import print_function
 import os
 import traceback
 
-from tensorflow.python import tf2
 from tensorflow.python.platform import tf_logging as logging
 
-ENABLE_CONTROL_FLOW_V2 = (tf2.enabled() or
-                          os.getenv("TF_ENABLE_CONTROL_FLOW_V2", "0") != "0" or
+ENABLE_CONTROL_FLOW_V2 = (os.getenv("TF_ENABLE_CONTROL_FLOW_V2", "0") != "0" or
                           os.getenv("TF_ENABLE_COND_V2", "0") != "0" or
                           os.getenv("TF_ENABLE_WHILE_V2", "0") != "0" or
                           os.getenv("TF_ENABLE_TENSOR_ARRAY_V2", "0") != "0")
 
 
+def EnableControlFlowV2(graph):
+  """Returns whether control flow v2 should be used in `graph`."""
+  # Enable new control flow in FuncGraphs (but not legacy _FuncGraphs).
+  # TODO(skyewm): do something better than hasattr without messing up imports.
+  return ENABLE_CONTROL_FLOW_V2 or (
+      graph.building_function and not hasattr(graph, "_captured"))
+
+
 def IsInXLAContext(op):
   try:
     xla_compile = op.get_attr("_XlaCompile")
diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py
index 85333ee6b5..37d5e6ae2a 100644
--- a/tensorflow/python/ops/tensor_array_ops.py
+++ b/tensorflow/python/ops/tensor_array_ops.py
@@ -1008,7 +1008,7 @@ class TensorArray(object):
     if context.executing_eagerly():
       implementation = _EagerTensorArray
     else:
-      if control_flow_util.ENABLE_CONTROL_FLOW_V2:
+      if control_flow_util.EnableControlFlowV2(ops.get_default_graph()):
         implementation = _GraphTensorArrayV2
       else:
         implementation = _GraphTensorArray
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index cffc1ec202..f5d4b3d5a9 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -378,6 +378,7 @@ class MemoryTests(test.TestCase):
 
   @test_util.assert_no_garbage_created
   def test_no_reference_cycles(self):
+    self.skipTest("b/121159261")
     x = constant_op.constant([[3., 4.]])
     y = constant_op.constant([2.])
     self._model.call(x, y)
-- 
GitLab


From 65093ecfe62147c5b03d43e47c14318cd076b9d1 Mon Sep 17 00:00:00 2001
From: Rick Chao <rchao@google.com>
Date: Wed, 19 Dec 2018 16:25:49 -0800
Subject: [PATCH 872/873] Remove the 'mode' argument in on_epoch_begin and
 on_epoch_end methods of CallbackList and Callback classes, to eliminate
 exposure of ModeKeys api that's intended to be TensorFlow-internal. Add doc
 that says the methods should only be called during TRAIN mode.

PiperOrigin-RevId: 226252687
---
 tensorflow/python/keras/callbacks.py          | 36 +++++++++----------
 .../python/keras/engine/training_arrays.py    |  6 ++--
 .../python/keras/engine/training_generator.py |  6 ++--
 ...orflow.keras.callbacks.-c-s-v-logger.pbtxt |  2 +-
 ...tensorflow.keras.callbacks.-callback.pbtxt |  4 +--
 ...flow.keras.callbacks.-early-stopping.pbtxt |  2 +-
 .../tensorflow.keras.callbacks.-history.pbtxt |  2 +-
 ...low.keras.callbacks.-lambda-callback.pbtxt |  4 +--
 ...ow.keras.callbacks.-model-checkpoint.pbtxt |  2 +-
 ...ras.callbacks.-reduce-l-r-on-plateau.pbtxt |  2 +-
 ...flow.keras.callbacks.-remote-monitor.pbtxt |  2 +-
 ...w.keras.callbacks.-terminate-on-na-n.pbtxt |  4 +--
 ...orflow.keras.callbacks.-c-s-v-logger.pbtxt |  2 +-
 ...tensorflow.keras.callbacks.-callback.pbtxt |  4 +--
 ...flow.keras.callbacks.-early-stopping.pbtxt |  2 +-
 .../tensorflow.keras.callbacks.-history.pbtxt |  2 +-
 ...low.keras.callbacks.-lambda-callback.pbtxt |  4 +--
 ...ow.keras.callbacks.-model-checkpoint.pbtxt |  2 +-
 ...ras.callbacks.-reduce-l-r-on-plateau.pbtxt |  2 +-
 ...flow.keras.callbacks.-remote-monitor.pbtxt |  2 +-
 ...w.keras.callbacks.-terminate-on-na-n.pbtxt |  4 +--
 21 files changed, 50 insertions(+), 46 deletions(-)

diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index a8fb87f5cc..53a51f5274 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -242,35 +242,35 @@ class CallbackList(object):
   def on_batch_end(self, batch, logs=None):
     self._call_batch_hook(_TRAIN, 'end', batch, logs=logs)
 
-  def on_epoch_begin(self, epoch, logs=None, mode='train'):
+  def on_epoch_begin(self, epoch, logs=None):
     """Calls the `on_epoch_begin` methods of its callbacks.
 
+    This function should only be called during TRAIN mode.
+
     Arguments:
         epoch: integer, index of epoch.
         logs: dict. Currently no data is passed to this argument for this method
           but that may change in the future.
-        mode: One of 'train'/'test'/'predict'
     """
-    if mode == _TRAIN:
-      logs = logs or {}
-      for callback in self.callbacks:
-        callback.on_epoch_begin(epoch, logs)
+    logs = logs or {}
+    for callback in self.callbacks:
+      callback.on_epoch_begin(epoch, logs)
     self._reset_batch_timing()
 
-  def on_epoch_end(self, epoch, logs=None, mode='train'):
+  def on_epoch_end(self, epoch, logs=None):
     """Calls the `on_epoch_end` methods of its callbacks.
 
+    This function should only be called during TRAIN mode.
+
     Arguments:
         epoch: integer, index of epoch.
         logs: dict, metric results for this training epoch, and for the
           validation epoch if validation is performed. Validation result keys
           are prefixed with `val_`.
-        mode: One of 'train'/'test'/'predict'
     """
-    if mode == _TRAIN:
-      logs = logs or {}
-      for callback in self.callbacks:
-        callback.on_epoch_end(epoch, logs)
+    logs = logs or {}
+    for callback in self.callbacks:
+      callback.on_epoch_end(epoch, logs)
 
   def on_train_batch_begin(self, batch, logs=None):
     """Calls the `on_train_batch_begin` methods of its callbacks.
@@ -437,29 +437,29 @@ class Callback(object):
   def on_batch_end(self, batch, logs=None):
     """A backwards compatibility alias for `on_train_batch_end`."""
 
-  def on_epoch_begin(self, epoch, logs=None, mode='train'):
+  def on_epoch_begin(self, epoch, logs=None):
     """Called at the start of an epoch.
 
-    Subclasses should override for any actions to run.
+    Subclasses should override for any actions to run. This function should only
+    be called during TRAIN mode.
 
     Arguments:
         epoch: integer, index of epoch.
         logs: dict. Currently no data is passed to this argument for this method
           but that may change in the future.
-        mode: One of 'train'/'test'/'predict'
     """
 
-  def on_epoch_end(self, epoch, logs=None, mode='train'):
+  def on_epoch_end(self, epoch, logs=None):
     """Called at the end of an epoch.
 
-    Subclasses should override for any actions to run.
+    Subclasses should override for any actions to run. This function should only
+    be called during TRAIN mode.
 
     Arguments:
         epoch: integer, index of epoch.
         logs: dict, metric results for this training epoch, and for the
           validation epoch if validation is performed. Validation result keys
           are prefixed with `val_`.
-        mode: One of 'train'/'test'/'predict'
     """
 
   def on_train_batch_begin(self, batch, logs=None):
diff --git a/tensorflow/python/keras/engine/training_arrays.py b/tensorflow/python/keras/engine/training_arrays.py
index af67444b01..47074e6087 100644
--- a/tensorflow/python/keras/engine/training_arrays.py
+++ b/tensorflow/python/keras/engine/training_arrays.py
@@ -32,6 +32,7 @@ from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils.generic_utils import make_batches
 from tensorflow.python.keras.utils.generic_utils import slice_arrays
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training.mode_keys import ModeKeys
 
 try:
   from scipy.sparse import issparse  # pylint: disable=g-import-not-at-top
@@ -251,7 +252,8 @@ def model_iteration(model,
     # Setup work for each epoch
     epoch_logs = {}
     model.reset_metrics()
-    callbacks.on_epoch_begin(epoch, epoch_logs, mode=mode)
+    if mode == ModeKeys.TRAIN:
+      callbacks.on_epoch_begin(epoch, epoch_logs)
     progbar.on_epoch_begin(epoch, epoch_logs)
 
     if use_steps:
@@ -371,7 +373,7 @@ def model_iteration(model,
 
     if mode == 'train':
       # Epochs only apply to `fit`.
-      callbacks.on_epoch_end(epoch, epoch_logs, mode=mode)
+      callbacks.on_epoch_end(epoch, epoch_logs)
       progbar.on_epoch_end(epoch, epoch_logs)
 
   callbacks._call_end_hook(mode)
diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py
index bc6a3e8dd0..03cbe91dc4 100644
--- a/tensorflow/python/keras/engine/training_generator.py
+++ b/tensorflow/python/keras/engine/training_generator.py
@@ -34,6 +34,7 @@ from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.utils import data_utils
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util import nest
 
 
@@ -170,7 +171,8 @@ def model_iteration(model,
     # Setup work for each epoch.
     model.reset_metrics()
     epoch_logs = {}
-    callbacks.on_epoch_begin(epoch, epoch_logs, mode=mode)
+    if mode == ModeKeys.TRAIN:
+      callbacks.on_epoch_begin(epoch, epoch_logs)
     progbar.on_epoch_begin(epoch, epoch_logs)
 
     for step in range(steps_per_epoch):
@@ -233,7 +235,7 @@ def model_iteration(model,
 
     if mode == 'train':
       # Epochs only apply to `fit`.
-      callbacks.on_epoch_end(epoch, epoch_logs, mode=mode)
+      callbacks.on_epoch_end(epoch, epoch_logs)
       progbar.on_epoch_end(epoch, epoch_logs)
 
   callbacks._call_end_hook(mode)
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
index a5804d3bbc..0725f606e2 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt
index bbc02c4d71..14bfc3bedb 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-callback.pbtxt
@@ -16,11 +16,11 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_predict_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
index 6182baf0a3..9812bad8f6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt
index 9b1b068e22..5aa739391e 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-history.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt
index 92440188c8..bf5bcb68df 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-lambda-callback.pbtxt
@@ -17,11 +17,11 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_predict_batch_begin"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
index c10c236ad1..5ae176017b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
index 0db6b8d371..0fed6fd236 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt
index dac2049fe1..71cf7f4a49 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-remote-monitor.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
index 2834b74e8a..d5a59d870a 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
@@ -17,11 +17,11 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_predict_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
index a5804d3bbc..0725f606e2 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-c-s-v-logger.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt
index bbc02c4d71..14bfc3bedb 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-callback.pbtxt
@@ -16,11 +16,11 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_predict_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
index 6182baf0a3..9812bad8f6 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-early-stopping.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt
index 9b1b068e22..5aa739391e 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-history.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt
index 92440188c8..bf5bcb68df 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-lambda-callback.pbtxt
@@ -17,11 +17,11 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_predict_batch_begin"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
index c10c236ad1..5ae176017b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-model-checkpoint.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
index 0db6b8d371..0fed6fd236 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-reduce-l-r-on-plateau.pbtxt
@@ -21,7 +21,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt
index dac2049fe1..71cf7f4a49 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-remote-monitor.pbtxt
@@ -17,7 +17,7 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
index 2834b74e8a..d5a59d870a 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.callbacks.-terminate-on-na-n.pbtxt
@@ -17,11 +17,11 @@ tf_class {
   }
   member_method {
     name: "on_epoch_begin"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_epoch_end"
-    argspec: "args=[\'self\', \'epoch\', \'logs\', \'mode\'], varargs=None, keywords=None, defaults=[\'None\', \'train\'], "
+    argspec: "args=[\'self\', \'epoch\', \'logs\'], varargs=None, keywords=None, defaults=[\'None\'], "
   }
   member_method {
     name: "on_predict_batch_begin"
-- 
GitLab


From a2748231219a6c4582ce7e6593710db4c3eec176 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Wed, 19 Dec 2018 16:41:08 -0800
Subject: [PATCH 873/873] Adds eager support for MultiDeviceIterator

PiperOrigin-RevId: 226254965
---
 .../multi_device_iterator_test.py             | 85 ++++++++++++-------
 .../data/ops/multi_device_iterator_ops.py     | 73 ++++++++--------
 2 files changed, 95 insertions(+), 63 deletions(-)

diff --git a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
index 433ea620e1..66117cf5b9 100644
--- a/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
+++ b/tensorflow/python/data/kernel_tests/multi_device_iterator_test.py
@@ -23,6 +23,7 @@ from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import multi_device_iterator_ops
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -31,97 +32,109 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-# TODO(b/117581999): Add eager coverage.
+# TODO(b/121264236): Once we have a mechanism to have multiple devices in eager
+# / V2 mode, we should remove this annotation and the run_v1_only annotations
+# as well.
+@test_util.run_all_in_graph_and_eager_modes
 class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testNoGetNext(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2"])
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testBasic(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.assertEqual(i, self.evaluate(elem_on_1))
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testOneOnSameDevice(self):
     with ops.device("/cpu:0"):
       dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:0", "/cpu:1"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
 
     config = config_pb2.ConfigProto(device_count={"CPU": 2})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.assertEqual(i, self.evaluate(elem_on_1))
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testRepeatDevices(self):
     with ops.device("/cpu:0"):
       dataset = dataset_ops.Dataset.range(20)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2", "/cpu:1", "/cpu:2"])
-    elements = multi_device_iterator.get_next()
-    elem_on_1, elem_on_2, elem_on_3, elem_on_4 = elements
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 20, 4):
+        elements = multi_device_iterator.get_next()
+        elem_on_1, elem_on_2, elem_on_3, elem_on_4 = elements
         self.assertEqual(i, self.evaluate(elem_on_1))
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
         self.assertEqual(i + 2, self.evaluate(elem_on_3))
         self.assertEqual(i + 3, self.evaluate(elem_on_4))
       with self.assertRaises(errors.OutOfRangeError):
+        elements = multi_device_iterator.get_next()
+        elem_on_1, elem_on_2, elem_on_3, elem_on_4 = elements
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
         self.evaluate(elem_on_3)
         self.evaluate(elem_on_4)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testNotFullyDivisible(self):
     dataset = dataset_ops.Dataset.range(9)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 8, 2):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.assertEqual(i, self.evaluate(elem_on_1))
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
+      elem_on_1 = multi_device_iterator.get_next("/cpu:1")
       self.assertEqual(8, self.evaluate(elem_on_1))
       with self.assertRaises(errors.OutOfRangeError):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testGetNextAsOptional(self):
+    if context.executing_eagerly():
+      return
+
     dataset = dataset_ops.Dataset.range(9)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2"])
@@ -154,26 +167,31 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.InvalidArgumentError):
         self.evaluate(elem_on_2_t)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testUneven(self):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2"], max_buffer_size=4)
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
+        elem_on_1 = multi_device_iterator.get_next("/cpu:1")
         self.assertEqual(i, self.evaluate(elem_on_1))
       for i in range(0, 10, 2):
+        elem_on_2 = multi_device_iterator.get_next("/cpu:2")
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testMultipleInitializations(self):
+    if context.executing_eagerly():
+      return
+
     with ops.device("/cpu:0"):
       epoch = array_ops.placeholder(dtypes.int64, shape=[])
       dataset1 = dataset_ops.Dataset.from_tensors(epoch).repeat(1000)
@@ -191,6 +209,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
         self.assertEqual([(i, 0), (i, 1)], self.evaluate([elem_on_1,
                                                           elem_on_2]))
 
+  @test_util.run_v1_only
   def testBasicGpu(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -198,18 +217,20 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/gpu:0"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
 
     config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.assertEqual(i, self.evaluate(elem_on_1))
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
+  @test_util.run_v1_only
   def testUnevenGpu(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -217,21 +238,24 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
     dataset = dataset_ops.Dataset.range(10)
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/gpu:0"], max_buffer_size=4)
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
 
     config = config_pb2.ConfigProto(device_count={"CPU": 2, "GPU": 1})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
+        elem_on_1 = multi_device_iterator.get_next("/cpu:1")
         self.assertEqual(i, self.evaluate(elem_on_1))
       for i in range(0, 10, 2):
+        elem_on_2 = multi_device_iterator.get_next("/gpu:0")
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
+  @test_util.run_v1_only
   def testGetNextAsOptionalGpu(self):
-    if not test_util.is_gpu_available():
+    if not test_util.is_gpu_available() or context.executing_eagerly():
       self.skipTest("No GPU available")
 
     dataset = dataset_ops.Dataset.range(9)
@@ -266,7 +290,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.InvalidArgumentError):
         self.evaluate(elem_on_2_t)
 
-  @test_util.run_v1_only("b/120545219")
+  @test_util.run_v1_only
   def testOptimization(self):
     dataset = dataset_ops.Dataset.range(10)
     dataset = dataset.apply(optimization.assert_next(["MemoryCacheImpl"]))
@@ -279,18 +303,21 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase):
 
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2"])
-    elem_on_1, elem_on_2 = multi_device_iterator.get_next()
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
-    with self.test_session(config=config) as sess:
+    with self.test_session(config=config):
       self.evaluate(multi_device_iterator.initializer)
       for i in range(0, 10, 2):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.assertEqual(i, self.evaluate(elem_on_1))
         self.assertEqual(i + 1, self.evaluate(elem_on_2))
       with self.assertRaises(errors.OutOfRangeError):
+        elem_on_1, elem_on_2 = multi_device_iterator.get_next()
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
 
 if __name__ == "__main__":
+  ops.enable_eager_execution(
+      config=config_pb2.ConfigProto(device_count={"CPU": 3, "GPU": 1}))
   test.main()
diff --git a/tensorflow/python/data/ops/multi_device_iterator_ops.py b/tensorflow/python/data/ops/multi_device_iterator_ops.py
index 8192d53891..2682e4acd0 100644
--- a/tensorflow/python/data/ops/multi_device_iterator_ops.py
+++ b/tensorflow/python/data/ops/multi_device_iterator_ops.py
@@ -34,8 +34,7 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
   """A `dummy` generator dataset."""
 
   def __init__(self, shard_num, multi_device_iterator_resource, incarnation_id,
-               source_device, target_device, element_structure):
-    self._target_device = target_device
+               source_device, element_structure):
     self._structure = element_structure
 
     multi_device_iterator_string_handle = (
@@ -107,15 +106,14 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
     )
     self._finalize_captured_args = self._finalize_func.captured_inputs
 
-    with ops.device(self._target_device):
-      variant_tensor = gen_dataset_ops.generator_dataset(
-          self._init_captured_args,
-          self._next_captured_args,
-          self._finalize_captured_args,
-          init_func=self._init_func,
-          next_func=self._next_func,
-          finalize_func=self._finalize_func,
-          **dataset_ops.flat_structure(self))
+    variant_tensor = gen_dataset_ops.generator_dataset(
+        self._init_captured_args,
+        self._next_captured_args,
+        self._finalize_captured_args,
+        init_func=self._init_func,
+        next_func=self._next_func,
+        finalize_func=self._finalize_func,
+        **dataset_ops.flat_structure(self))
     super(_PerDeviceGenerator, self).__init__(variant_tensor)
 
   def _inputs(self):
@@ -155,10 +153,6 @@ class MultiDeviceIterator(object):
     Raises:
       RuntimeError: If run in Eager mode.
     """
-    if context.executing_eagerly():
-      # TODO(rohanj): Fix this. Tracking bug: b/116467184
-      raise RuntimeError("MultiDeviceIterator is not currently supported in "
-                         "Eager mode.")
     self._dataset = dataset._apply_options()  # pylint: disable=protected-access
     self._devices = devices
     self._source_device = source_device
@@ -187,27 +181,36 @@ class MultiDeviceIterator(object):
     # Create the per device iterators.
     self._device_iterators = []
     for i, device in enumerate(self._devices):
-      ds = _PerDeviceGenerator(
-          i, self._multi_device_iterator_resource, self._incarnation_id,
-          self._source_device_tensor, device, dataset._element_structure)  # pylint: disable=protected-access
-      if prefetch_buffer_size > 0:
-        ds = ds.prefetch(prefetch_buffer_size)
-      # TODO(jsimsa): Enable auto-tuning and optimizations when supported for
-      # non-CPU devices.
-      options = dataset_ops.Options()
-      options.experimental_autotune = False
-      options.experimental_optimization.apply_default_optimizations = False
-      ds = ds.with_options(options)
       with ops.device(device):
-        self._device_iterators.append(
-            dataset_ops.make_initializable_iterator(ds))
-
-    device_iterator_initializers = [
-        iterator.initializer for iterator in self._device_iterators
-    ]
-    self._initializer = control_flow_ops.group(*device_iterator_initializers)
+        ds = _PerDeviceGenerator(
+            i, self._multi_device_iterator_resource, self._incarnation_id,
+            self._source_device_tensor, dataset._element_structure)  # pylint: disable=protected-access
+        if prefetch_buffer_size > 0:
+          ds = ds.prefetch(prefetch_buffer_size)
+        # TODO(jsimsa): Enable auto-tuning and optimizations when supported for
+        # non-CPU devices.
+        options = dataset_ops.Options()
+        options.experimental_autotune = False
+        options.experimental_optimization.apply_default_optimizations = False
+        ds = ds.with_options(options)
+        if context.executing_eagerly():
+          self._device_iterators.append(dataset_ops.make_one_shot_iterator(ds))
+        else:
+          self._device_iterators.append(
+              dataset_ops.make_initializable_iterator(ds))
+
+    if not context.executing_eagerly():
+      device_iterator_initializers = [
+          iterator.initializer for iterator in self._device_iterators
+      ]
+      self._initializer = control_flow_ops.group(*device_iterator_initializers)
+
+  def get_next(self, device=None):
+    """Returns the next element given a `device`, else returns all in a list."""
+    if device is not None:
+      index = self._devices.index(device)
+      return self._device_iterators[index].get_next()
 
-  def get_next(self):
     result = []
     for i, device in enumerate(self._devices):
       with ops.device(device):
@@ -224,6 +227,8 @@ class MultiDeviceIterator(object):
 
   @property
   def initializer(self):
+    if context.executing_eagerly():
+      return control_flow_ops.no_op()
     return self._initializer
 
   @property
-- 
GitLab